; ; ; this file contains a script of tests to run through regress.exe ; ; comments start with a semicolon and proceed to the end of the line ; ; changes to regular expression compile flags start with a "-" as the first ; non-whitespace character and consist of a list of the printable names ; of the flags, for example "match_default" ; ; Other lines contain a test to perform using the current flag status ; the first token contains the expression to compile, the second the string ; to match it against. If the second string is "!" then the expression should ; not compile, that is the first string is an invalid regular expression. ; This is then followed by a list of integers that specify what should match, ; each pair represents the starting and ending positions of a subexpression ; starting with the zeroth subexpression (the whole match). ; A value of -1 indicates that the subexpression should not take part in the ; match at all, if the first value is -1 then no part of the expression should ; match the string. ; - match_default normal REG_EXTENDED ; ; try some really simple literals: a a 0 1 Z Z 0 1 Z aaa -1 -1 Z xxxxZZxxx 4 5 ; and some simple brackets: (a) zzzaazz 3 4 3 4 () zzz 0 0 0 0 () "" 0 0 0 0 ( ! ) ! (aa ! aa) ! a b -1 -1 \(\) () 0 2 \(a\) (a) 0 3 \() ! (\) ! p(a)rameter ABCparameterXYZ 3 12 4 5 [pq](a)rameter ABCparameterXYZ 3 12 4 5 ; now try escaped brackets: - match_default bk_parens REG_BASIC \(a\) zzzaazz 3 4 3 4 \(\) zzz 0 0 0 0 \(\) "" 0 0 0 0 \( ! \) ! \(aa ! aa\) ! () () 0 2 (a) (a) 0 3 (\) ! \() ! ; now move on to "." wildcards - match_default normal REG_EXTENDED REG_STARTEND . a 0 1 . \n 0 1 . \r 0 1 . \0 0 1 - match_default normal match_not_dot_newline REG_EXTENDED REG_STARTEND REG_NEWLINE . a 0 1 . \n -1 -1 . \r -1 -1 . \0 0 1 - match_default normal match_not_dot_null match_not_dot_newline REG_EXTENDED REG_STARTEND REG_NEWLINE . \n -1 -1 . \r -1 -1 ; this *WILL* produce an error from the POSIX API functions: - match_default normal match_not_dot_null match_not_dot_newline REG_EXTENDED REG_STARTEND REG_NEWLINE REG_NO_POSIX_TEST . \0 -1 -1 ; ; now move on to the repetion ops, ; starting with operator * - match_default normal REG_EXTENDED a* b 0 0 ab* a 0 1 ab* ab 0 2 ab* sssabbbbbbsss 3 10 ab*c* a 0 1 ab*c* abbb 0 4 ab*c* accc 0 4 ab*c* abbcc 0 5 *a ! \<* ! \>* ! \n* \n\n 0 2 \** ** 0 2 \* * 0 1 ; now try operator + ab+ a -1 -1 ab+ ab 0 2 ab+ sssabbbbbbsss 3 10 ab+c+ a -1 -1 ab+c+ abbb -1 -1 ab+c+ accc -1 -1 ab+c+ abbcc 0 5 +a ! \<+ ! \>+ ! \n+ \n\n 0 2 \+ + 0 1 \+ ++ 0 1 \++ ++ 0 2 - match_default normal bk_plus_qm REG_EXTENDED REG_NO_POSIX_TEST + + 0 1 \+ ! a\+ aa 0 2 ; now try operator ? - match_default normal REG_EXTENDED a? b 0 0 ab? a 0 1 ab? ab 0 2 ab? sssabbbbbbsss 3 5 ab?c? a 0 1 ab?c? abbb 0 2 ab?c? accc 0 2 ab?c? abcc 0 3 ?a ! \? ! \n? \n\n 0 1 \? ? 0 1 \? ?? 0 1 \?? ?? 0 1 - match_default normal bk_plus_qm REG_EXTENDED REG_NO_POSIX_TEST ? ? 0 1 \? ! a\? aa 0 1 a\? b 0 0 - match_default normal limited_ops a? a? 0 2 a+ a+ 0 2 a\? a? 0 2 a\+ a+ 0 2 ; now try operator {} - match_default normal REG_EXTENDED a{2} a -1 -1 a{2} aa 0 2 a{2} aaa 0 2 a{2,} a -1 -1 a{2,} aa 0 2 a{2,} aaaaa 0 5 a{2,4} a -1 -1 a{2,4} aa 0 2 a{2,4} aaa 0 3 a{2,4} aaaa 0 4 a{2,4} aaaaa 0 4 ; spaces are now allowed inside {} "a{ 2 , 4 }" aaaaa 0 4 a{} ! "a{ }" ! a{2 ! a} ! \{\} {} 0 2 - match_default normal bk_braces a\{2\} a -1 -1 a\{2\} aa 0 2 a\{2\} aaa 0 2 a\{2,\} a -1 -1 a\{2,\} aa 0 2 a\{2,\} aaaaa 0 5 a\{2,4\} a -1 -1 a\{2,4\} aa 0 2 a\{2,4\} aaa 0 3 a\{2,4\} aaaa 0 4 a\{2,4\} aaaaa 0 4 "a\{ 2 , 4 \}" aaaaa 0 4 {} {} 0 2 ; now test the alternation operator | - match_default normal REG_EXTENDED a|b a 0 1 a|b b 0 1 a(b|c) ab 0 2 1 2 a(b|c) ac 0 2 1 2 a(b|c) ad -1 -1 -1 -1 |c ! c| ! (|) ! (a|) ! (|a) ! a\| a| 0 2 - match_default normal limited_ops a| a| 0 2 a\| a| 0 2 | | 0 1 - match_default normal bk_vbar REG_NO_POSIX_TEST a| a| 0 2 a\|b a 0 1 a\|b b 0 1 ; now test the set operator [] - match_default normal REG_EXTENDED ; try some literals first [abc] a 0 1 [abc] b 0 1 [abc] c 0 1 [abc] d -1 -1 [^bcd] a 0 1 [^bcd] b -1 -1 [^bcd] d -1 -1 [^bcd] e 0 1 a[b]c abc 0 3 a[ab]c abc 0 3 a[^ab]c adc 0 3 a[]b]c a]c 0 3 a[[b]c a[c 0 3 a[-b]c a-c 0 3 a[^]b]c adc 0 3 a[^-b]c adc 0 3 a[b-]c a-c 0 3 a[b ! a[] ! ; then some ranges [b-e] a -1 -1 [b-e] b 0 1 [b-e] e 0 1 [b-e] f -1 -1 [^b-e] a 0 1 [^b-e] b -1 -1 [^b-e] e -1 -1 [^b-e] f 0 1 a[1-3]c a2c 0 3 a[3-1]c ! a[1-3-5]c ! a[1- ! ; and some classes a[[:alpha:]]c abc 0 3 a[[:unknown:]]c ! a[[: ! a[[:alpha ! a[[:alpha:] ! a[[:alpha,:] ! a[[:]:]]b ! a[[:-:]]b ! a[[:alph:]] ! a[[:alphabet:]] ! [[:alnum:]]+ -%@a0X_- 3 6 [[:alpha:]]+ -%@aX_0- 3 5 [[:blank:]]+ "a \tb" 1 4 [[:cntrl:]]+ a\n\tb 1 3 [[:digit:]]+ a019b 1 4 [[:graph:]]+ " a%b " 1 4 [[:lower:]]+ AabC 1 3 ; This test fails with STLPort, disable for now as this is a corner case anyway... ;[[:print:]]+ "\na b\n" 1 4 [[:punct:]]+ " %-&\t" 1 4 [[:space:]]+ "a \n\t\rb" 1 5 [[:upper:]]+ aBCd 1 3 [[:xdigit:]]+ p0f3Cx 1 5 ; now test flag settings: - escape_in_lists REG_NO_POSIX_TEST [\n] \n 0 1 - REG_NO_POSIX_TEST [\n] \n -1 -1 [\n] \\ 0 1 [[:class:] : 0 1 [[:class:] [ 0 1 [[:class:] c 0 1 ; line anchors - match_default normal REG_EXTENDED ^ab ab 0 2 ^ab xxabxx -1 -1 ^ab xx\nabzz 3 5 ab$ ab 0 2 ab$ abxx -1 -1 ab$ ab\nzz 0 2 - match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL ^ab ab -1 -1 ^ab xxabxx -1 -1 ^ab xx\nabzz 3 5 ab$ ab -1 -1 ab$ abxx -1 -1 ab$ ab\nzz 0 2 ; line anchors, single line mode - match_default normal match_single_line REG_NO_POSIX_TEST ^ab ab 0 2 ^ab xxabxx -1 -1 ^ab xx\nabzz -1 -1 ab$ ab 0 2 ab$ abxx -1 -1 ab$ ab\nzz -1 -1 - match_default match_not_bol match_not_eol normal REG_NO_POSIX_TEST match_single_line ^ab ab -1 -1 ^ab xxabxx -1 -1 ^ab xx\nabzz -1 -1 ab$ ab -1 -1 ab$ abxx -1 -1 ab$ ab\nzz -1 -1 ; back references - match_default normal REG_PERL a(b)\2c ! a(b\1)c ! a(b*)c\1d abbcbbd 0 7 1 3 a(b*)c\1d abbcbd -1 -1 a(b*)c\1d abbcbbbd -1 -1 ^(.)\1 abc -1 -1 a([bc])\1d abcdabbd 4 8 5 6 ; strictly speaking this is at best ambiguous, at worst wrong, this is what most ; re implimentations will match though. a(([bc])\2)*d abbccd 0 6 3 5 3 4 a(([bc])\2)*d abbcbd -1 -1 a((b)*\2)*d abbbd 0 5 1 4 2 3 ; perl only: (ab*)[ab]*\1 ababaaa 0 4 0 2 (a)\1bcd aabcd 0 5 0 1 (a)\1bc*d aabcd 0 5 0 1 (a)\1bc*d aabd 0 4 0 1 (a)\1bc*d aabcccd 0 7 0 1 (a)\1bc*[ce]d aabcccd 0 7 0 1 ^(a)\1b(c)*cd$ aabcccd 0 7 0 1 4 5 ; posix only: - match_default extended REG_EXTENDED (ab*)[ab]*\1 ababaaa 0 7 0 1 ; ; characters by code: - match_default normal REG_PERL REG_STARTEND \0101 A 0 1 \00 \0 0 1 \0 \0 0 1 \0172 z 0 1 ; ; word operators: \w a 0 1 \w z 0 1 \w A 0 1 \w Z 0 1 \w _ 0 1 \w } -1 -1 \w ` -1 -1 \w [ -1 -1 \w @ -1 -1 ; non-word: \W a -1 -1 \W z -1 -1 \W A -1 -1 \W Z -1 -1 \W _ -1 -1 \W } 0 1 \W ` 0 1 \W [ 0 1 \W @ 0 1 ; word start: \ abc 0 3 abc\> abcd -1 -1 abc\> abc\n 0 3 abc\> abc:: 0 3 ; word boundary: \babcd " abcd" 2 6 \bab cab -1 -1 \bab "\nab" 1 3 \btag ::tag 2 5 abc\b abc 0 3 abc\b abcd -1 -1 abc\b abc\n 0 3 abc\b abc:: 0 3 ; within word: \B ab 1 1 a\Bb ab 0 2 a\B ab 0 1 a\B a -1 -1 a\B "a " -1 -1 ; ; buffer operators: \`abc abc 0 3 \`abc \nabc -1 -1 \`abc " abc" -1 -1 abc\' abc 0 3 abc\' abc\n -1 -1 abc\' "abc " -1 -1 ; ; extra escape sequences: \a \a 0 1 \f \f 0 1 \n \n 0 1 \r \r 0 1 \t \t 0 1 \v \v 0 1 ; ; now follows various complex expressions designed to try and bust the matcher: a(((b)))c abc 0 3 1 2 1 2 1 2 a(b|(c))d abd 0 3 1 2 -1 -1 a(b|(c))d acd 0 3 1 2 1 2 a(b*|c)d abbd 0 4 1 3 ; just gotta have one DFA-buster, of course a[ab]{20} aaaaabaaaabaaaabaaaab 0 21 ; and an inline expansion in case somebody gets tricky a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab 0 21 ; and in case somebody just slips in an NFA... a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights 0 31 21 24 24 31 ; one really big one 1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b 1 71 ; fish for problems as brackets go past 8 [ab][cd][ef][gh][ij][kl][mn] xacegikmoq 1 8 [ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq 1 9 [ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy 1 10 [ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy 1 10 ; and as parenthesis go past 9: (a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi 1 9 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 (a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij 1 10 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 (a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk 1 11 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 (a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl 1 12 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 (a)d|(b)c abc 1 3 -1 -1 1 2 "_+((www)|(ftp)|(mailto)):_*" "_wwwnocolon _mailto:" 12 20 13 19 -1 -1 -1 -1 13 19 ; subtleties of matching a(b)?c\1d acd 0 3 -1 -1 a(b?c)+d accd 0 4 2 3 (wee|week)(knights|night) weeknights 0 10 0 3 3 10 .* abc 0 3 a(b|(c))d abd 0 3 1 2 -1 -1 a(b|(c))d acd 0 3 1 2 1 2 a(b*|c|e)d abbd 0 4 1 3 a(b*|c|e)d acd 0 3 1 2 a(b*|c|e)d ad 0 2 1 1 a(b?)c abc 0 3 1 2 a(b?)c ac 0 2 1 1 a(b+)c abc 0 3 1 2 a(b+)c abbbc 0 5 1 4 a(b*)c ac 0 2 1 1 (a|ab)(bc([de]+)f|cde) abcdef 0 6 0 1 1 6 3 5 a([bc]?)c abc 0 3 1 2 a([bc]?)c ac 0 2 1 1 a([bc]+)c abc 0 3 1 2 a([bc]+)c abcc 0 4 1 3 a([bc]+)bc abcbc 0 5 1 3 a(bb+|b)b abb 0 3 1 2 a(bbb+|bb+|b)b abb 0 3 1 2 a(bbb+|bb+|b)b abbb 0 4 1 3 a(bbb+|bb+|b)bb abbb 0 4 1 2 (.*).* abcdef 0 6 0 6 (a*)* bc 0 0 0 0 xyx*xz xyxxxxyxxxz 5 11 ; do we get the right subexpression when it is used more than once? a(b|c)*d ad 0 2 -1 -1 a(b|c)*d abcd 0 4 2 3 a(b|c)+d abd 0 3 1 2 a(b|c)+d abcd 0 4 2 3 a(b|c?)+d ad 0 2 1 1 a(b|c){0,0}d ad 0 2 -1 -1 a(b|c){0,1}d ad 0 2 -1 -1 a(b|c){0,1}d abd 0 3 1 2 a(b|c){0,2}d ad 0 2 -1 -1 a(b|c){0,2}d abcd 0 4 2 3 a(b|c){0,}d ad 0 2 -1 -1 a(b|c){0,}d abcd 0 4 2 3 a(b|c){1,1}d abd 0 3 1 2 a(b|c){1,2}d abd 0 3 1 2 a(b|c){1,2}d abcd 0 4 2 3 a(b|c){1,}d abd 0 3 1 2 a(b|c){1,}d abcd 0 4 2 3 a(b|c){2,2}d acbd 0 4 2 3 a(b|c){2,2}d abcd 0 4 2 3 a(b|c){2,4}d abcd 0 4 2 3 a(b|c){2,4}d abcbd 0 5 3 4 a(b|c){2,4}d abcbcd 0 6 4 5 a(b|c){2,}d abcd 0 4 2 3 a(b|c){2,}d abcbd 0 5 3 4 ; perl only: a(b|c?)+d abcd 0 4 3 3 a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1 a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3 ; posix only: - match_default extended REG_EXTENDED REG_STARTEND a(b|c?)+d abcd 0 4 2 3 a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3 a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1 a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3 - match_default normal REG_EXTENDED REG_STARTEND REG_NOSPEC literal \**?/{} \\**?/{} 0 7 - match_default normal REG_PERL ; try to match C++ syntax elements: ; line comment: //[^\n]* "++i //here is a line comment\n" 4 28 ; block comment: /\*([^*]|\*+[^*/])*\*+/ "/* here is a block comment */" 0 29 26 27 /\*([^*]|\*+[^*/])*\*+/ "/**/" 0 4 -1 -1 /\*([^*]|\*+[^*/])*\*+/ "/***/" 0 5 -1 -1 /\*([^*]|\*+[^*/])*\*+/ "/****/" 0 6 -1 -1 /\*([^*]|\*+[^*/])*\*+/ "/*****/" 0 7 -1 -1 /\*([^*]|\*+[^*/])*\*+/ "/*****/*/" 0 7 -1 -1 ; preprossor directives: ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol" 0 19 -1 -1 ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) #x" 0 25 -1 -1 ; perl only: ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 30 42 ; literals: ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF 0 4 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1 ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 0 2 0 2 -1 -1 0 2 -1 -1 -1 -1 -1 -1 ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu 0 5 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1 ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL 0 5 0 4 0 4 -1 -1 4 5 -1 -1 -1 -1 ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 0 24 0 18 0 18 -1 -1 19 24 19 24 22 24 ; strings: '([^\\']|\\.)*' '\\x3A' 0 6 4 5 '([^\\']|\\.)*' '\\'' 0 4 1 3 '([^\\']|\\.)*' '\\n' 0 4 1 3 ; posix only: - match_default extended escape_in_lists REG_EXTENDED REG_NO_POSIX_TEST ; we disable POSIX testing because it can't handle escapes in sets ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 28 42 ; now try and test some unicode specific characters: - match_default normal REG_PERL REG_UNICODE_ONLY [[:unicode:]]+ a\0300\0400z 1 3 [\x10-\xff] \39135\12409 -1 -1 [\01-\05]{5} \36865\36865\36865\36865\36865 -1 -1 ; finally try some case insensitive matches: - match_default normal REG_EXTENDED REG_ICASE ; upper and lower have no meaning here so they fail, however these ; may compile with other libraries... ;[[:lower:]] ! ;[[:upper:]] ! 0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\} 0 72 ; known and suspected bugs: - match_default normal REG_EXTENDED \( ( 0 1 \) ) 0 1 \$ $ 0 1 \^ ^ 0 1 \. . 0 1 \* * 0 1 \+ + 0 1 \? ? 0 1 \[ [ 0 1 \] ] 0 1 \| | 0 1 \\ \\ 0 1 # # 0 1 \# # 0 1 a- a- 0 2 \- - 0 1 \{ { 0 1 \} } 0 1 0 0 0 1 1 1 0 1 9 9 0 1 b b 0 1 B B 0 1 < < 0 1 > > 0 1 w w 0 1 W W 0 1 ` ` 0 1 ' ' 0 1 \n \n 0 1 , , 0 1 a a 0 1 f f 0 1 n n 0 1 r r 0 1 t t 0 1 v v 0 1 c c 0 1 x x 0 1 : : 0 1 (\.[[:alnum:]]+){2} "w.a.b " 1 5 3 5 - match_default normal REG_EXTENDED REG_ICASE a A 0 1 A a 0 1 [abc]+ abcABC 0 6 [ABC]+ abcABC 0 6 [a-z]+ abcABC 0 6 [A-Z]+ abzANZ 0 6 [a-Z]+ abzABZ 0 6 [A-z]+ abzABZ 0 6 [[:lower:]]+ abyzABYZ 0 8 [[:upper:]]+ abzABZ 0 6 [[:word:]]+ abcZZZ 0 6 [[:alpha:]]+ abyzABYZ 0 8 [[:alnum:]]+ 09abyzABYZ 0 10 ; updated tests for version 2: - match_default normal REG_EXTENDED \x41 A 0 1 \xff \255 0 1 \xFF \255 0 1 - match_default normal REG_EXTENDED REG_NO_POSIX_TEST \c@ \0 0 1 - match_default normal REG_EXTENDED \cA \1 0 1 \cz \58 0 1 \c= ! \c? ! =: =: 0 2 ; word start: [[:<:]]abcd " abcd" 2 6 [[:<:]]ab cab -1 -1 [[:<:]]ab "\nab" 1 3 [[:<:]]tag ::tag 2 5 ;word end: abc[[:>:]] abc 0 3 abc[[:>:]] abcd -1 -1 abc[[:>:]] abc\n 0 3 abc[[:>:]] abc:: 0 3 ; collating elements and rewritten set code: - match_default normal REG_EXTENDED REG_STARTEND [[.zero.]] 0 0 1 [[.one.]] 1 0 1 [[.two.]] 2 0 1 [[.three.]] 3 0 1 [[.a.]] baa 1 2 [[.right-curly-bracket.]] } 0 1 [[.NUL.]] \0 0 1 [[:<:]z] ! [a[:>:]] ! [[=a=]] a 0 1 [[=right-curly-bracket=]] } 0 1 - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE [[.A.]] A 0 1 [[.A.]] a 0 1 [[.A.]-b]+ AaBb 0 4 [A-[.b.]]+ AaBb 0 4 [[.a.]-B]+ AaBb 0 4 [a-[.B.]]+ AaBb 0 4 - match_default normal REG_EXTENDED REG_NO_POSIX_TEST [\x61] a 0 1 [\x61-c]+ abcd 0 3 [a-\x63]+ abcd 0 3 - match_default normal REG_EXTENDED REG_STARTEND [[.a.]-c]+ abcd 0 3 [a-[.c.]]+ abcd 0 3 [[:alpha:]-a] ! [a-[:alpha:]] ! ; try mutli-character ligatures: [[.ae.]] ae 0 2 [[.ae.]] aE -1 -1 [[.AE.]] AE 0 2 [[.Ae.]] Ae 0 2 [[.ae.]-b] a -1 -1 [[.ae.]-b] b 0 1 [[.ae.]-b] ae 0 2 [a-[.ae.]] a 0 1 [a-[.ae.]] b -1 -1 [a-[.ae.]] ae 0 2 - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE [[.ae.]] AE 0 2 [[.ae.]] Ae 0 2 [[.AE.]] Ae 0 2 [[.Ae.]] aE 0 2 [[.AE.]-B] a -1 -1 [[.Ae.]-b] b 0 1 [[.Ae.]-b] B 0 1 [[.ae.]-b] AE 0 2 - match_default normal REG_EXTENDED REG_STARTEND ;extended perl style escape sequences: \e \27 0 1 \x1b \27 0 1 \x{1b} \27 0 1 \x{} ! \x{ ! \x} ! \x ! \x{yy ! \x{1b ! - match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST \l+ ABabcAB 2 5 [\l]+ ABabcAB 2 5 [a-\l] ! [\l-a] ! [\L] ! \L+ abABCab 2 5 \u+ abABCab 2 5 [\u]+ abABCab 2 5 [\U] ! \U+ ABabcAB 2 5 \d+ ab012ab 2 5 [\d]+ ab012ab 2 5 [\D] ! \D+ 01abc01 2 5 \s+ "ab ab" 2 5 [\s]+ "ab ab" 2 5 [\S] ! \S+ " abc " 2 5 - match_default normal REG_EXTENDED REG_STARTEND \Qabc ! \Qabc\E abcd 0 3 \Qabc\Ed abcde 0 4 \Q+*?\\E +*?\\ 0 4 \C+ abcde 0 5 \X+ abcde 0 5 - match_default normal REG_EXTENDED REG_STARTEND REG_UNICODE_ONLY \X+ a\768\769 0 3 \X+ \2309\2307 0 2 ;DEVANAGARI script \X+ \2489\2494 0 2 ;BENGALI script - match_default normal REG_EXTENDED REG_STARTEND \Aabc abc 0 3 \Aabc aabc -1 -1 abc\z abc 0 3 abc\z abcd -1 -1 abc\Z abc\n\n 0 3 abc\Z abc 0 3 \Gabc abc 0 3 \Gabc dabcd -1 -1 a\Gbc abc -1 -1 a\Aab abc -1 -1 ; ; now test grep, ; basically check all our restart types - line, word, etc ; checking each one for null and non-null matches. ; - match_default normal REG_EXTENDED REG_STARTEND REG_GREP a " a a a aa" 1 2 3 4 5 6 7 8 8 9 a+b+ "aabaabbb ab" 0 3 3 8 9 11 a(b*|c|e)d adabbdacd 0 2 2 6 6 9 a "\na\na\na\naa" 1 2 3 4 5 6 7 8 8 9 ^ " \n\n \n\n\n" 0 0 4 4 5 5 8 8 9 9 10 10 ^ab "ab \nab ab\n" 0 2 5 7 ^[^\n]*\n " \n \n\n \n" 0 4 4 7 7 8 8 11 \ <123><><><> [[:digit:]]* 123ab1 <$0> <123><><><1> ; and now escapes: a+ "...aaa,,," $x "$x" a+ "...aaa,,," \a "\a" a+ "...aaa,,," \f "\f" a+ "...aaa,,," \n "\n" a+ "...aaa,,," \r "\r" a+ "...aaa,,," \t "\t" a+ "...aaa,,," \v "\v" a+ "...aaa,,," \x21 "!" a+ "...aaa,,," \x{21} "!" a+ "...aaa,,," \c@ \0 a+ "...aaa,,," \e \27 a+ "...aaa,,," \0101 A - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_sed format_no_copy (a+)(b+) ...aabb,, \0 aabb (a+)(b+) ...aabb,, \1 aa (a+)(b+) ...aabb,, \2 bb (a+)(b+) ...aabb,, & aabb (a+)(b+) ...aabb,, $ $ (a+)(b+) ...aabb,, $1 $1 (a+)(b+) ...aabb,, ()?: ()?: (a+)(b+) ...aabb,, \\ \\ (a+)(b+) ...aabb,, \& & - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_perl format_no_copy (a+)(b+) ...aabb,, $0 aabb (a+)(b+) ...aabb,, $1 aa (a+)(b+) ...aabb,, $2 bb (a+)(b+) ...aabb,, $& aabb (a+)(b+) ...aabb,, & & (a+)(b+) ...aabb,, \0 \0 (a+)(b+) ...aabb,, ()?: ()?: a+ "...aaa,,," \0101 A - match_default format_all normal REG_EXTENDED REG_STARTEND REG_MERGE ; move to copying unmatched data: a+ "...aaa,,," bbb "...bbb,,," a+(b+) "...aaabb,,," $1 "...bb,,," a+(b+) "...aaabb,,,ab*abbb?" $1 "...bb,,,b*bbb?" (a+)|(b+) "...aaabb,,,ab*abbb?" (?1A)(?2B) "...AB,,,AB*AB?" (a+)|(b+) "...aaabb,,,ab*abbb?" ?1A:B "...AB,,,AB*AB?" (a+)|(b+) "...aaabb,,,ab*abbb?" (?1A:B)C "...ACBC,,,ACBC*ACBC?" (a+)|(b+) "...aaabb,,,ab*abbb?" ?1:B "...B,,,B*B?" - match_default format_all normal REG_EXTENDED REG_STARTEND REG_MERGE format_first_only ; move to copying unmatched data, but replace first occurance only: a+ "...aaa,,," bbb "...bbb,,," a+(b+) "...aaabb,,," $1 "...bb,,," a+(b+) "...aaabb,,,ab*abbb?" $1 "...bb,,,ab*abbb?" (a+)|(b+) "...aaabb,,,ab*abbb?" (?1A)(?2B) "...Abb,,,ab*abbb?" ; ; changes to newline handling with 2.11: ; - match_default normal REG_EXTENDED REG_STARTEND REG_GREP ^. " \n \r\n " 0 1 3 4 7 8 .$ " \n \r\n " 1 2 4 5 8 9 - match_default normal REG_EXTENDED REG_STARTEND REG_GREP REG_UNICODE_ONLY ^. " \8232 \8233 " 0 1 3 4 5 6 .$ " \8232 \8233 " 1 2 3 4 6 7 ; ; non-greedy repeats added 21/04/00 - match_default normal REG_EXTENDED REG_PERL a** ! a*? aa 0 0 ^a*?$ aa 0 2 ^.*?$ aa 0 2 ^(?:a)*?$ aa 0 2 ^[ab]*?$ aa 0 2 a?? aa 0 0 a++ ! a+? aa 0 1 a{1,3}{1} ! a{1,3}? aaa 0 1 \w+?w ...ccccccwcccccw 3 10 \W+\w+?w ...ccccccwcccccw 0 10 abc|\w+? abd 0 1 abc|\w+? abcd 0 3 <\s*tag[^>]*>(.*?)<\s*/tag\s*> " here is some text " 1 29 6 23 <\s*tag[^>]*>(.*?)<\s*/tag\s*> " < tag attr=\"something\">here is some text< /tag > " 1 49 24 41 ; ; non-marking parenthesis added 25/04/00 - match_default normal REG_EXTENDED REG_PERL (?:abc)+ xxabcabcxx 2 8 (?:a+)(b+) xaaabbbx 1 7 4 7 (a+)(?:b+) xaaabbba 1 7 1 4 (?:(a+)b+) xaaabbba 1 7 1 4 (?:a+(b+)) xaaabbba 1 7 4 7 a+(?#b+)b+ xaaabbba 1 7 (a)(?:b|$) ab 0 2 0 1 (a)(?:b|$) a 0 1 0 1 ; ; try some partial matches: - match_partial match_default normal REG_EXTENDED REG_NO_POSIX_TEST REG_PARTIAL_MATCH REG_PERL (xyz)(.*)abc xyzaaab 0 7 (xyz)(.*)abc xyz 0 3 (xyz)(.*)abc xy 0 2 (xyz)(.*)abc x 0 1 (xyz)(.*)abc "" -1 -1 (xyz)(.*)abc aaaa -1 -1 .abc aaab 1 4 a[_] xxa 2 3 ; ; forward lookahead asserts added 21/01/02 - match_default normal REG_EXTENDED REG_NO_POSIX_TEST REG_PERL ((?:(?!a|b)\w)+)(\w+) " xxxabaxxx " 2 11 2 5 5 11 /\*(?:(?!\*/).)*\*/ " /**/ " 2 6 /\*(?:(?!\*/).)*\*/ " /***/ " 2 7 /\*(?:(?!\*/).)*\*/ " /********/ " 2 12 /\*(?:(?!\*/).)*\*/ " /* comment */ " 2 15 <\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)<\s*/\s*a\s*> " here " 1 24 16 20 <\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)<\s*/\s*a\s*> " here< / a > " 1 28 16 20 <\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)(?=<\s*/\s*a\s*>) " here " 1 20 16 20 <\s*a[^>]*>((?:(?!<\s*/\s*a\s*>).)*)(?=<\s*/\s*a\s*>) " here< / a > " 1 20 16 20 ; filename matching: ^(?!^(?:PRN|AUX|CLOCK\$|NUL|CON|COM\d|LPT\d|\..*)(?:\..+)?$)[^\x00-\x1f\\?*:\"|/]+$ command.com 0 11 ^(?!^(?:PRN|AUX|CLOCK\$|NUL|CON|COM\d|LPT\d|\..*)(?:\..+)?$)[^\x00-\x1f\\?*:\"|/]+$ PRN -1 -1 ^(?!^(?:PRN|AUX|CLOCK\$|NUL|CON|COM\d|LPT\d|\..*)(?:\..+)?$)[^\x00-\x1f\\?*:\"|/]+$ COM2 -1 -1 ; password checking: ^(?=.*\d).{4,8}$ abc3 0 4 ^(?=.*\d).{4,8}$ abc3def4 0 8 ^(?=.*\d).{4,8}$ ab2 -1 -1 ^(?=.*\d).{4,8}$ abcdefg -1 -1 ^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$ abc3 -1 -1 ^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$ abC3 0 4 ^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$ ABCD3 -1 -1 ; extended repeat checking to exercise new algorithms: ab.*xy abxy_ 0 4 ab.*xy ab_xy_ 0 5 ab.*xy abxy 0 4 ab.*xy ab_xy 0 5 ab.* ab 0 2 ab.* ab__ 0 4 ab.{2,5}xy ab__xy_ 0 6 ab.{2,5}xy ab____xy_ 0 8 ab.{2,5}xy ab_____xy_ 0 9 ab.{2,5}xy ab__xy 0 6 ab.{2,5}xy ab_____xy 0 9 ab.{2,5} ab__ 0 4 ab.{2,5} ab_______ 0 7 ab.{2,5}xy ab______xy -1 -1 ab.{2,5}xy ab_xy -1 -1 ab.*?xy abxy_ 0 4 ab.*?xy ab_xy_ 0 5 ab.*?xy abxy 0 4 ab.*?xy ab_xy 0 5 ab.*? ab 0 2 ab.*? ab__ 0 2 ab.{2,5}?xy ab__xy_ 0 6 ab.{2,5}?xy ab____xy_ 0 8 ab.{2,5}?xy ab_____xy_ 0 9 ab.{2,5}?xy ab__xy 0 6 ab.{2,5}?xy ab_____xy 0 9 ab.{2,5}? ab__ 0 4 ab.{2,5}? ab_______ 0 4 ab.{2,5}?xy ab______xy -1 -1 ab.{2,5}xy ab_xy -1 -1 ; again but with slower algorithm variant: - match_default normal REG_PERL REG_NO_POSIX_TEST match_not_dot_newline match_not_dot_null ab.*xy abxy_ 0 4 ab.*xy ab_xy_ 0 5 ab.*xy abxy 0 4 ab.*xy ab_xy 0 5 ab.* ab 0 2 ab.* ab__ 0 4 ab.{2,5}xy ab__xy_ 0 6 ab.{2,5}xy ab____xy_ 0 8 ab.{2,5}xy ab_____xy_ 0 9 ab.{2,5}xy ab__xy 0 6 ab.{2,5}xy ab_____xy 0 9 ab.{2,5} ab__ 0 4 ab.{2,5} ab_______ 0 7 ab.{2,5}xy ab______xy -1 -1 ab.{2,5}xy ab_xy -1 -1 ab.*?xy abxy_ 0 4 ab.*?xy ab_xy_ 0 5 ab.*?xy abxy 0 4 ab.*?xy ab_xy 0 5 ab.*? ab 0 2 ab.*? ab__ 0 2 ab.{2,5}?xy ab__xy_ 0 6 ab.{2,5}?xy ab____xy_ 0 8 ab.{2,5}?xy ab_____xy_ 0 9 ab.{2,5}?xy ab__xy 0 6 ab.{2,5}?xy ab_____xy 0 9 ab.{2,5}? ab__ 0 4 ab.{2,5}? ab_______ 0 4 ab.{2,5}?xy ab______xy -1 -1 ab.{2,5}xy ab_xy -1 -1 (.*?).somesite \n\n555.somesite 2 14 2 5 ; now again for single character repeats: ab_*xy abxy_ 0 4 ab_*xy ab_xy_ 0 5 ab_*xy abxy 0 4 ab_*xy ab_xy 0 5 ab_* ab 0 2 ab_* ab__ 0 4 ab_{2,5}xy ab__xy_ 0 6 ab_{2,5}xy ab____xy_ 0 8 ab_{2,5}xy ab_____xy_ 0 9 ab_{2,5}xy ab__xy 0 6 ab_{2,5}xy ab_____xy 0 9 ab_{2,5} ab__ 0 4 ab_{2,5} ab_______ 0 7 ab_{2,5}xy ab______xy -1 -1 ab_{2,5}xy ab_xy -1 -1 ab_*?xy abxy_ 0 4 ab_*?xy ab_xy_ 0 5 ab_*?xy abxy 0 4 ab_*?xy ab_xy 0 5 ab_*? ab 0 2 ab_*? ab__ 0 2 ab_{2,5}?xy ab__xy_ 0 6 ab_{2,5}?xy ab____xy_ 0 8 ab_{2,5}?xy ab_____xy_ 0 9 ab_{2,5}?xy ab__xy 0 6 ab_{2,5}?xy ab_____xy 0 9 ab_{2,5}? ab__ 0 4 ab_{2,5}? ab_______ 0 4 ab_{2,5}?xy ab______xy -1 -1 ab_{2,5}xy ab_xy -1 -1 (5*?).somesite //555.somesite 2 14 2 5 ; and again for sets: ab[_,;]*xy abxy_ 0 4 ab[_,;]*xy ab_xy_ 0 5 ab[_,;]*xy abxy 0 4 ab[_,;]*xy ab_xy 0 5 ab[_,;]* ab 0 2 ab[_,;]* ab__ 0 4 ab[_,;]{2,5}xy ab__xy_ 0 6 ab[_,;]{2,5}xy ab____xy_ 0 8 ab[_,;]{2,5}xy ab_____xy_ 0 9 ab[_,;]{2,5}xy ab__xy 0 6 ab[_,;]{2,5}xy ab_____xy 0 9 ab[_,;]{2,5} ab__ 0 4 ab[_,;]{2,5} ab_______ 0 7 ab[_,;]{2,5}xy ab______xy -1 -1 ab[_,;]{2,5}xy ab_xy -1 -1 ab[_,;]*?xy abxy_ 0 4 ab[_,;]*?xy ab_xy_ 0 5 ab[_,;]*?xy abxy 0 4 ab[_,;]*?xy ab_xy 0 5 ab[_,;]*? ab 0 2 ab[_,;]*? ab__ 0 2 ab[_,;]{2,5}?xy ab__xy_ 0 6 ab[_,;]{2,5}?xy ab____xy_ 0 8 ab[_,;]{2,5}?xy ab_____xy_ 0 9 ab[_,;]{2,5}?xy ab__xy 0 6 ab[_,;]{2,5}?xy ab_____xy 0 9 ab[_,;]{2,5}? ab__ 0 4 ab[_,;]{2,5}? ab_______ 0 4 ab[_,;]{2,5}?xy ab______xy -1 -1 ab[_,;]{2,5}xy ab_xy -1 -1 (\d*?).somesite //555.somesite 2 14 2 5 ; and again for tricky sets with digraphs: ab[_[.ae.]]*xy abxy_ 0 4 ab[_[.ae.]]*xy ab_xy_ 0 5 ab[_[.ae.]]*xy abxy 0 4 ab[_[.ae.]]*xy ab_xy 0 5 ab[_[.ae.]]* ab 0 2 ab[_[.ae.]]* ab__ 0 4 ab[_[.ae.]]{2,5}xy ab__xy_ 0 6 ab[_[.ae.]]{2,5}xy ab____xy_ 0 8 ab[_[.ae.]]{2,5}xy ab_____xy_ 0 9 ab[_[.ae.]]{2,5}xy ab__xy 0 6 ab[_[.ae.]]{2,5}xy ab_____xy 0 9 ab[_[.ae.]]{2,5} ab__ 0 4 ab[_[.ae.]]{2,5} ab_______ 0 7 ab[_[.ae.]]{2,5}xy ab______xy -1 -1 ab[_[.ae.]]{2,5}xy ab_xy -1 -1 ab[_[.ae.]]*?xy abxy_ 0 4 ab[_[.ae.]]*?xy ab_xy_ 0 5 ab[_[.ae.]]*?xy abxy 0 4 ab[_[.ae.]]*?xy ab_xy 0 5 ab[_[.ae.]]*? ab 0 2 ab[_[.ae.]]*? ab__ 0 2 ab[_[.ae.]]{2,5}?xy ab__xy_ 0 6 ab[_[.ae.]]{2,5}?xy ab____xy_ 0 8 ab[_[.ae.]]{2,5}?xy ab_____xy_ 0 9 ab[_[.ae.]]{2,5}?xy ab__xy 0 6 ab[_[.ae.]]{2,5}?xy ab_____xy 0 9 ab[_[.ae.]]{2,5}? ab__ 0 4 ab[_[.ae.]]{2,5}? ab_______ 0 4 ab[_[.ae.]]{2,5}?xy ab______xy -1 -1 ab[_[.ae.]]{2,5}xy ab_xy -1 -1 ([5[.ae.]]*?).somesite //555.somesite 2 14 2 5 ; new bugs detected in spring 2003: - normal match_continuous REG_NO_POSIX_TEST b abc -1 -1 - normal REG_PERL (?!foo)bar foobar 3 6 (?!foo)bar "??bar" 2 5 (?!foo)bar "barfoo" 0 3 (?!foo)bar "bar??" 0 3 (?!foo)bar bar 0 3 a\Z a\nb -1 -1 () abc 0 0 0 0 ^() abc 0 0 0 0 ^()+ abc 0 0 0 0 ^(){1} abc 0 0 0 0 ^(){2} abc 0 0 0 0 ^((){2}) abc 0 0 0 0 0 0 () "" 0 0 0 0 ()\1 "" 0 0 0 0 ()\1 a 0 0 0 0 a()\1b ab 0 2 1 1 a()b\1 ab 0 2 1 1 - normal match_not_dot_newline REG_NO_POSIX_TEST "(.*\r\n){3}.* abcdefghijklmnopqrstuvwxyz.*\r\n" "00001 01 \r\n00002 02 1 2 3 4 5 6 7 8 9 0\r\n00003 03 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890\r\n00004 04 \r\n00005 05 \r\n00006 06 Seite: 0001\r\n00007 07 StartSeitEEnde: 0001\r\n00008 08 StartSeiTe Ende: 0001\r\n00009 09 Start seiteEnde: 0001\r\n00010 10 28.2.03\r\n00011 11 Page: 0001\r\n00012 12 Juhu die Erste: 0001\r\n00013 13 Es war einmal! 0001\r\n00014 14 ABCDEFGHIJKLMNOPQRSTUVWXYZ0001\r\n00015 15 abcdefghijklmnopqrstuvwxyz0001\r\n00016 16 lars.schmeiser@gft.com\r\n00017 17 \r\n00018 18 \r\n00019 19 \r\n00020 20 \r\n00021 21 1 2 3 4 5 6 7 8 9 0\r\n00022 22 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890\r\n00023 01 \r\n00024 02 1 2 3 4 5 6 7 8 9 0\r\n00025 03 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890\r\n00026 04 \r\n00027 05 \r\n00028 06 Seite: 0002\r\n00029 07 StartSeitEEnde: 0002\r\n00030 08 StartSeiTe Ende: 0002\r\n00031 09 Start seiteEnde: 0002\r\n00032 10 28.02.2003\r\n00033 11 Page: 0002\r\n00034 12 Juhu die Erste: 0002\r\n00035 13 Es war einmal! 0002\r\n00036 14 ABCDEFGHIJKLMNOPQRSTUVWXYZ0002\r\n00037 15 abcdefghijklmnopqrstuvwxyz0002\r\n00038 16 lars.schmeiser@194.1.12.111\r\n00039 17 \r\n00040 18 \r\n00041 19 \r\n00042 20 \r\n00043 21 1 2 3 4 5 6 7 8 9 0\r\n" 753 1076 934 1005 - normal REG_PERL ; new (?: construct ) (?>^abc) abc 0 3 (?>^abc) def\nabc 4 7 (?>^abc) defabc -1 -1 (?>.*/)foo /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ -1 -1 (?>.*/)foo /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo 0 67 (?>(\.\d\d[1-9]?))\d+ 1.230003938 1 11 1 4 (?>(\.\d\d[1-9]?))\d+ 1.875000282 1 11 1 5 (?>(\.\d\d[1-9]?))\d+ 1.235 -1 -1 ^((?>\w+)|(?>\s+))*$ "now is the time for all good men to come to the aid of the party" 0 64 59 64 ^((?>\w+)|(?>\s+))*$ "this is not a line with only words and spaces!" -1 -1 ((?>\d+))(\w) 12345a 0 6 0 5 5 6 ((?>\d+))(\w) 12345+ -1 -1 ((?>\d+))(\d) 12345 -1 -1 (?>a+)b aaab 0 4 ((?>a+)b) aaab 0 4 0 4 (?>(a+))b aaab 0 4 0 3 (?>b)+ aaabbbccc 3 6 (?>a+|b+|c+)*c aaabbbbccccd 0 8 ((?>[^()]+)|\([^()]*\))+ ((abc(ade)ufh()()x 2 18 17 18 \(((?>[^()]+)|\([^()]+\))+\) (abc) 0 5 1 4 \(((?>[^()]+)|\([^()]+\))+\) (abc(def)xyz) 0 13 9 12 \(((?>[^()]+)|\([^()]+\))+\) ((()aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -1 -1 (?>a*)* a 0 1 (?>a*)* aa 0 2 (?>a*)* aaaa 0 4 (?>a*)* a 0 1 (?>a*)* aaabcde 0 3 ((?>a*))* aaaaa 0 5 5 5 ((?>a*))* aabbaa 0 2 2 2 ((?>a*?))* aaaaa 0 0 0 0 ((?>a*?))* aabbaa 0 0 0 0 "word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword" "word cat dog elephant mussel cow horse canary baboon snake shark otherword" 0 74 "word (?>(?:(?!otherword)[a-zA-Z0-9]+ ){0,30})otherword" "word cat dog elephant mussel cow horse canary baboon snake shark" -1 -1 "word (?>[a-zA-Z0-9]+ ){0,30}otherword" "word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I hope" -1 -1 "word (?>[a-zA-Z0-9]+ ){0,30}otherword" "word cat dog elephant mussel cow horse canary baboon snake shark the quick brown fox and the lazy dog and several other words getting close to thirty by now I really really hope otherword" -1 -1 ((?>Z)+|A)* ZABCDEFG 0 2 1 2 ((?>)+|A)* ! ; subtleties of matching with no sub-expressions marked - normal match_nosubs REG_NO_POSIX_TEST a(b?c)+d accd 0 4 (wee|week)(knights|night) weeknights 0 10 .* abc 0 3 a(b|(c))d abd 0 3 a(b|(c))d acd 0 3 a(b*|c|e)d abbd 0 4 a(b*|c|e)d acd 0 3 a(b*|c|e)d ad 0 2 a(b?)c abc 0 3 a(b?)c ac 0 2 a(b+)c abc 0 3 a(b+)c abbbc 0 5 a(b*)c ac 0 2 (a|ab)(bc([de]+)f|cde) abcdef 0 6 a([bc]?)c abc 0 3 a([bc]?)c ac 0 2 a([bc]+)c abc 0 3 a([bc]+)c abcc 0 4 a([bc]+)bc abcbc 0 5 a(bb+|b)b abb 0 3 a(bbb+|bb+|b)b abb 0 3 a(bbb+|bb+|b)b abbb 0 4 a(bbb+|bb+|b)bb abbb 0 4 (.*).* abcdef 0 6 (a*)* bc 0 0 - normal nosubs REG_NO_POSIX_TEST a(b?c)+d accd 0 4 (wee|week)(knights|night) weeknights 0 10 .* abc 0 3 a(b|(c))d abd 0 3 a(b|(c))d acd 0 3 a(b*|c|e)d abbd 0 4 a(b*|c|e)d acd 0 3 a(b*|c|e)d ad 0 2 a(b?)c abc 0 3 a(b?)c ac 0 2 a(b+)c abc 0 3 a(b+)c abbbc 0 5 a(b*)c ac 0 2 (a|ab)(bc([de]+)f|cde) abcdef 0 6 a([bc]?)c abc 0 3 a([bc]?)c ac 0 2 a([bc]+)c abc 0 3 a([bc]+)c abcc 0 4 a([bc]+)bc abcbc 0 5 a(bb+|b)b abb 0 3 a(bbb+|bb+|b)b abb 0 3 a(bbb+|bb+|b)b abbb 0 4 a(bbb+|bb+|b)bb abbb 0 4 (.*).* abcdef 0 6 (a*)* bc 0 0 ; new merge tests for case convertions: - match_default normal REG_PERL REG_STARTEND REG_MERGE abc "xyzabcCD" "\u$&" "xyzAbcCD" abc "xyzabcCD" "\U$&\E" "xyzABCCD" ABC "xyzABCCD" "\l$&" "xyzaBCCD" ABC "xyzABCCD" "\L$&\E" "xyzabcCD" - match_default normal REG_EXTENDED REG_STARTEND REG_MERGE format_sed abc "xyzabcCD" "\u\0" "xyzuabcCD" abc "xyzabcCD" "\U\0\E" "xyzUabcECD" ABC "xyzABCCD" "\l\0" "xyzlABCCD" ABC "xyzABCCD" "\L\0\E" "xyzLABCECD" ; and a couple of new bugs: - match_default normal REG_ICASE REG_NO_POSIX_TEST \u+ abcABC 0 6 \l+ abcABC 0 6