#include "RegexExpression.h"
namespace
{
namespace
{
bool
(
const
wchar_t
*&
,
wchar_t
)
{
if
(*
==
)
{
++;
return
true
;
}
else
{
return
false
;
}
}
bool
(
const
wchar_t
*&
,
const
wchar_t
*
,
wchar_t
&
)
{
const
wchar_t
*
=::
(
, *
);
if
(
position
)
{
=*
++;
return
true
;
}
else
{
return
false
;
}
}
bool
(
const
wchar_t
*&
,
const
wchar_t
*
)
{
=
(
);
if
(
(
,
,
len
)==
0
)
{
+=
len
;
return
true
;
}
else
{
return
false
;
}
}
bool
(
const
wchar_t
*&
,
const
wchar_t
*
)
{
wchar_t
;
return
(
,
,
c
);
}
bool
(
const
wchar_t
*&
,
&
)
{
bool
=
false
;
=
0
;
while
(
L'0'
<=*
&& *
<=
L'9'
)
{
=
*
10
+(*
++)-
L'0'
;
readed
=
true
;
}
return
readed
;
}
bool
(
const
wchar_t
*&
,
&
)
{
const
wchar_t
*
=
;
if
((
L'A'
<=*
read
&& *
read
<=
L'Z'
) || (
L'a'
<=*
read
&& *
read
<=
L'z'
) || *
read
==
L'_'
)
{
read
++;
while
((
L'A'
<=*
read
&& *
read
<=
L'Z'
) || (
L'a'
<=*
read
&& *
read
<=
L'z'
) || (
L'0'
<=*
read
&& *
read
<=
L'9'
) || *
read
==
L'_'
)
{
read
++;
}
}
if
(
==
read
)
{
return
false
;
}
else
{
(
,
(
read
-
));
=
read
;
return
true
;
}
}
<
>
(
const
wchar_t
*&
)
{
=
0
;
=
0
;
if
(!*
)
{
return
0
;
}
else
if
(
(
,
L'+'
))
{
min
=
1
;
max
=-
1
;
}
else
if
(
(
,
L'*'
))
{
min
=
0
;
max
=-
1
;
}
else
if
(
(
,
L'?'
))
{
min
=
0
;
max
=
1
;
}
else
if
(
(
,
L'{'
))
{
if
(
(
,
min
))
{
if
(
(
,
L','
))
{
if
(!
(
,
max
))
{
max
=-
1
;
}
}
else
{
max
=
min
;
}
if
(!
(
,
L'}'
))
{
goto
THROW_EXCEPTION;
}
}
else
{
goto
THROW_EXCEPTION;
}
}
else
{
return
0
;
}
{
*
=
new
;
expression
->
=
min
;
expression
->
=
max
;
expression
->
=!
(
,
L'?'
);
return
expression
;
}
THROW_EXCEPTION:
throw
(
L"Regular expression syntax error: Illegal loop expression."
,
L"vl::regex_internal::ParseLoop"
,
L"input"
);
}
<
>
(
const
wchar_t
*&
)
{
if
(!*
)
{
return
0
;
}
else
if
(
(
,
L'^'
))
{
return
new
;
}
else
if
(
(
,
L'$'
))
{
return
new
;
}
else
if
(
(
,
L'\\'
) ||
(
,
L'/'
))
{
<
>
=
new
;
expression
=
false
;
switch
(*
)
{
case
L'.'
:
expression
.
(
(
1
,
65535
));
break
;
case
L'r'
:
expression
.
(
(
L'\r'
,
L'\r'
));
break
;
case
L'n'
:
expression
.
(
(
L'\n'
,
L'\n'
));
break
;
case
L't'
:
expression
.
(
(
L'\t'
,
L'\t'
));
break
;
case
L'\\'
:
case
L'/'
:
case
L'('
:
case
L')'
:
case
L'+'
:
case
L'*'
:
case
L'?'
:
case
L'|'
:
case
L'{'
:
case
L'}'
:
case
L'['
:
case
L']'
:
case
L'<'
:
case
L'>'
:
case
L'^'
:
case
L'$'
:
case
L'!'
:
case
L'='
:
expression
.
(
(*
, *
));
break
;
case
L'S'
:
expression
=
true
;
case
L's'
:
expression
.
(
(
L' '
,
L' '
));
expression
.
(
(
L'\r'
,
L'\r'
));
expression
.
(
(
L'\n'
,
L'\n'
));
expression
.
(
(
L'\t'
,
L'\t'
));
break
;
case
L'D'
:
expression
=
true
;
case
L'd'
:
expression
.
(
(
L'0'
,
L'9'
));
break
;
case
L'L'
:
expression
=
true
;
case
L'l'
:
expression
.
(
(
L'_'
,
L'_'
));
expression
.
(
(
L'A'
,
L'Z'
));
expression
.
(
(
L'a'
,
L'z'
));
break
;
case
L'W'
:
expression
=
true
;
case
L'w'
:
expression
.
(
(
L'_'
,
L'_'
));
expression
.
(
(
L'0'
,
L'9'
));
expression
.
(
(
L'A'
,
L'Z'
));
expression
.
(
(
L'a'
,
L'z'
));
break
;
default
:
throw
(
L"Regular expression syntax error: Illegal character escaping."
,
L"vl::regex_internal::ParseCharSet"
,
L"input"
);
}
++;
return
expression
;
}
else
if
(
(
,
L'['
))
{
<
>
=
new
;
if
(
(
,
L'^'
))
{
expression
=
true
;
}
else
{
expression
=
false
;
}
bool
=
false
;
wchar_t
=
L'\0'
;
wchar_t
=
L'\0'
;
while
(
true
)
{
if
(
(
,
L'\\'
) ||
(
,
L'/'
))
{
wchar_t
=
L'\0'
;
switch
(*
)
{
case
L'r'
:
c
=
L'\r'
;
break
;
case
L'n'
:
c
=
L'\n'
;
break
;
case
L't'
:
c
=
L'\t'
;
break
;
case
L'-'
:
case
L'['
:
case
L']'
:
case
L'\\'
:
case
L'/'
:
case
L'^'
:
case
L'$'
:
c
=*
;
break
;
default
:
throw
(
L"Regular expression syntax error: Illegal character escaping, only \"rnt-[]\\/\" are legal escaped characters in []."
,
L"vl::regex_internal::ParseCharSet"
,
L"input"
);
}
++;
midState
?
b
=
c
:
a
=
c
;
midState
=!
midState
;
}
else
if
(
(
,
L"-]"
))
{
goto
THROW_EXCEPTION;
}
else
if
(*
)
{
midState
?
b
=*
++:
a
=*
++;
midState
=!
midState
;
}
else
{
goto
THROW_EXCEPTION;
}
if
(
(
,
L']'
))
{
if
(
midState
)
{
b
=
a
;
}
if
(!
expression
(
(
a
,
b
)))
{
goto
THROW_EXCEPTION;
}
break
;
}
else
if
(
(
,
L'-'
))
{
if
(!
midState
)
{
goto
THROW_EXCEPTION;
}
}
else
{
if
(
midState
)
{
b
=
a
;
}
if
(
expression
(
(
a
,
b
)))
{
midState
=
false
;
}
else
{
goto
THROW_EXCEPTION;
}
}
}
return
expression
;
THROW_EXCEPTION:
throw
(
L"Regular expression syntax error: Illegal character set definition."
);
}
else
if
(
(
,
L"()+*?{}|"
))
{
--;
return
0
;
}
else
{
*
=
new
;
expression
->
=
false
;
expression
->
.
(
(*
, *
));
++;
return
expression
;
}
}
<
>
(
const
wchar_t
*&
)
{
if
(
(
,
L"(="
))
{
<
>
=
(
);
if
(!
(
,
L')'
))
{
goto
NEED_RIGHT_BRACKET;
}
*
=
new
;
expression
->
sub
;
return
expression
;
}
else
if
(
(
,
L"(!"
))
{
<
>
=
(
);
if
(!
(
,
L')'
))
{
goto
NEED_RIGHT_BRACKET;
}
*
=
new
;
expression
->
sub
;
return
expression
;
}
else
if
(
(
,
L"(<&"
))
{
;
if
(!
(
,
name
))
{
goto
NEED_NAME;
}
if
(!
(
,
L'>'
))
{
goto
NEED_GREATER;
}
if
(!
(
,
L')'
))
{
goto
NEED_RIGHT_BRACKET;
}
*
=
new
;
expression
->
name
;
return
expression
;
}
else
if
(
(
,
L"(<$"
))
{
;
=-
1
;
if
(
(
,
name
))
{
if
(
(
,
L';'
))
{
if
(!
(
,
index
))
{
goto
NEED_NUMBER;
}
}
}
else
if
(!
(
,
index
))
{
goto
NEED_NUMBER;
}
if
(!
(
,
L'>'
))
{
goto
NEED_GREATER;
}
if
(!
(
,
L')'
))
{
goto
NEED_RIGHT_BRACKET;
}
*
=
new
;
expression
->
name
;
expression
->
=
index
;
return
expression
;
}
else
if
(
(
,
L"(<"
))
{
;
if
(!
(
,
name
))
{
goto
NEED_NAME;
}
if
(!
(
,
L'>'
))
{
goto
NEED_GREATER;
}
<
>
=
(
);
if
(!
(
,
L')'
))
{
goto
NEED_RIGHT_BRACKET;
}
*
=
new
;
expression
->
name
;
expression
->
sub
;
return
expression
;
}
else
if
(
(
,
L"(?"
))
{
<
>
=
(
);
if
(!
(
,
L')'
))
{
goto
NEED_RIGHT_BRACKET;
}
*
=
new
;
expression
->
sub
;
return
expression
;
}
else
if
(
(
,
L'('
))
{
<
>
=
(
);
if
(!
(
,
L')'
))
{
goto
NEED_RIGHT_BRACKET;
}
return
sub
;
}
else
{
return
0
;
}
NEED_RIGHT_BRACKET:
throw
(
L"Regular expression syntax error: \")\" expected."
,
L"vl::regex_internal::ParseFunction"
,
L"input"
);
NEED_GREATER:
throw
(
L"Regular expression syntax error: \">\" expected."
,
L"vl::regex_internal::ParseFunction"
,
L"input"
);
NEED_NAME:
throw
(
L"Regular expression syntax error: Identifier expected."
,
L"vl::regex_internal::ParseFunction"
,
L"input"
);
NEED_NUMBER:
throw
(
L"Regular expression syntax error: Number expected."
,
L"vl::regex_internal::ParseFunction"
,
L"input"
);
}
<
>
(
const
wchar_t
*&
)
{
<
>
=
(
);
if
(!
unit
)
{
unit
(
);
}
if
(!
unit
)
{
return
0
;
}
<
>
;
while
((
loop
(
)))
{
loop
unit
;
unit
loop
;
}
return
unit
;
}
<
>
(
const
wchar_t
*&
)
{
<
>
=
(
);
while
(
true
)
{
<
>
=
(
);
if
(
right
)
{
*
=
new
;
sequence
->
expression
;
sequence
->
right
;
expression
sequence
;
}
else
{
break
;
}
}
return
expression
;
}
<
>
(
const
wchar_t
*&
)
{
<
>
=
(
);
while
(
true
)
{
if
(
(
,
L'|'
))
{
<
>
=
(
);
if
(
right
)
{
*
=
new
;
alternate
->
expression
;
alternate
->
right
;
expression
alternate
;
}
else
{
throw
(
L"Regular expression syntax error: Expression expected."
,
L"vl::regex_internal::ParseAlt"
,
L"input"
);
}
}
else
{
break
;
}
}
return
expression
;
}
<
>
(
const
wchar_t
*&
)
{
return
(
);
}
::
(
const
&
)
{
::
=
new
;
const
wchar_t
*
=
.
();
const
wchar_t
*
=
start
;
try
{
while
(
(
input
,
L"(<#"
))
{
;
if
(!
(
input
,
name
))
{
throw
(
L"Regular expression syntax error: Identifier expected."
,
L"vl::regex_internal::ParseRegexExpression"
,
L"code"
);
}
if
(!
(
input
,
L'>'
))
{
throw
(
L"Regular expression syntax error: \">\" expected."
,
L"vl::regex_internal::ParseFunction"
,
L"input"
);
}
<
>
=
(
input
);
if
(!
(
input
,
L')'
))
{
throw
(
L"Regular expression syntax error: \")\" expected."
,
L"vl::regex_internal::ParseFunction"
,
L"input"
);
}
if
(
regex
definitions
.
().
(
name
))
{
throw
(
L"Regular expression syntax error: Found duplicated sub expression name: \""
+
name
+
L"\". "
,
L"vl::regex_internal::ParseFunction"
,
L"input"
);
}
else
{
regex
definitions
.
(
name
,
sub
);
}
}
regex
(
input
);
if
(!
regex
)
{
throw
(
L"Regular expression syntax error: Expression expected."
,
L"vl::regex_internal::ParseUnit"
,
L"input"
);
}
if
(*
input
)
{
throw
(
L"Regular expression syntax error: Found unnecessary tokens."
,
L"vl::regex_internal::ParseUnit"
,
L"input"
);
}
return
regex
;
}
catch
(
const
&
)
{
throw
(
e
.
(),
,
input
-
start
);
}
}
(
const
&
)
{
;
for
(
=
0
;
i
<
.
();
i
++)
{
wchar_t
=
i
];
switch
(
c
)
{
case
L'\\'
:
case
L'/'
:
case
L'('
:
case
L')'
:
case
L'+'
:
case
L'*'
:
case
L'?'
:
case
L'|'
:
case
L'{'
:
case
L'}'
:
case
L'['
:
case
L']'
:
case
L'<'
:
case
L'>'
:
case
L'^'
:
case
L'$'
:
case
L'!'
:
case
L'='
:
result
+=
(
L"\\"
)+
c
;
break
;
case
L'\r'
:
result
+=
L"\\r"
;
break
;
case
L'\n'
:
result
+=
L"\\n"
;
break
;
case
L'\t'
:
result
+=
L"\\t"
;
break
;
default
:
result
c
;
}
}
return
result
;
}
(
const
&
)
{
;
for
(
=
0
;
i
<
.
();
i
++)
{
wchar_t
=
i
];
if
(
c
==
L'\\'
||
c
==
L'/'
)
{
if
(
i
<
.
()-
1
)
{
i
++;
c
=
i
];
switch
(
c
)
{
case
L'r'
:
result
+=
L"\r"
;
break
;
case
L'n'
:
result
+=
L"\n"
;
break
;
case
L't'
:
result
+=
L"\t"
;
break
;
default
:
result
c
;
}
continue
;
}
}
result
c
;
}
return
result
;
}
NormalizeEscapedTextForRegex
(
const
&
)
{
;
for
(
=
0
;
i
<
.
();
i
++)
{
wchar_t
=
i
];
if
(
c
==
L'\\'
||
c
==
L'/'
)
{
if
(
i
<
.
()-
1
)
{
i
++;
c
=
i
];
result
+=
(
L"\\"
)+
c
;
continue
;
}
}
result
c
;
}
return
result
;
}
bool
IsRegexEscapedLiteralString
(
const
&
)
{
for
(
=
0
;
i
<
.
();
i
++)
{
wchar_t
=
i
];
if
(
c
==
L'\\'
||
c
==
L'/'
)
{
i
++;
}
else
{
switch
(
c
)
{
case
L'\\'
:
case
L'/'
:
case
L'('
:
case
L')'
:
case
L'+'
:
case
L'*'
:
case
L'?'
:
case
L'|'
:
case
L'{'
:
case
L'}'
:
case
L'['
:
case
L']'
:
case
L'<'
:
case
L'>'
:
case
L'^'
:
case
L'$'
:
case
L'!'
:
case
L'='
:
return
false
;
}
}
}
return
true
;
}
}
}