github.com/grumpyhome/grumpy@v0.3.1-0.20201208125205-7b775405bdf1/grumpy-runtime-src/third_party/stdlib/re_tests.py (about) 1 #!/usr/bin/env python 2 # -*- mode: python -*- 3 4 import re 5 6 # Re test suite and benchmark suite v1.5 7 8 # The 3 possible outcomes for each pattern 9 [SUCCEED, FAIL, SYNTAX_ERROR] = range(3) 10 11 # Benchmark suite (needs expansion) 12 # 13 # The benchmark suite does not test correctness, just speed. The 14 # first element of each tuple is the regex pattern; the second is a 15 # string to match it against. The benchmarking code will embed the 16 # second string inside several sizes of padding, to test how regex 17 # matching performs on large strings. 18 19 # benchmarks = [ 20 21 # # test common prefix 22 # ('Python|Perl', 'Perl'), # Alternation 23 # ('(Python|Perl)', 'Perl'), # Grouped alternation 24 25 # ('Python|Perl|Tcl', 'Perl'), # Alternation 26 # ('(Python|Perl|Tcl)', 'Perl'), # Grouped alternation 27 28 # ('(Python)\\1', 'PythonPython'), # Backreference 29 # ('([0a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization 30 # ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # A few sets 31 32 # ('Python', 'Python'), # Simple text literal 33 # ('.*Python', 'Python'), # Bad text literal 34 # ('.*Python.*', 'Python'), # Worse text literal 35 # ('.*(Python)', 'Python'), # Bad text literal with grouping 36 37 # ] 38 39 # Test suite (for verifying correctness) 40 # 41 # The test suite is a list of 5- or 3-tuples. The 5 parts of a 42 # complete tuple are: 43 # element 0: a string containing the pattern 44 # 1: the string to match against the pattern 45 # 2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR) 46 # 3: a string that will be eval()'ed to produce a test string. 47 # This is an arbitrary Python expression; the available 48 # variables are "found" (the whole match), and "g1", "g2", ... 49 # up to "g99" contain the contents of each group, or the 50 # string 'None' if the group wasn't given a value, or the 51 # string 'Error' if the group index was out of range; 52 # also "groups", the return value of m.group() (a tuple). 53 # 4: The expected result of evaluating the expression. 54 # If the two don't match, an error is reported. 55 # 56 # If the regex isn't expected to work, the latter two elements can be omitted. 57 58 tests = [ 59 # Test ?P< and ?P= extensions 60 ('(?P<foo_123', '', SYNTAX_ERROR), # Unterminated group identifier 61 ('(?P<1>a)', '', SYNTAX_ERROR), # Begins with a digit 62 ('(?P<!>a)', '', SYNTAX_ERROR), # Begins with an illegal char 63 ('(?P<foo!>a)', '', SYNTAX_ERROR), # Begins with an illegal char 64 65 # Same tests, for the ?P= form 66 ('(?P<foo_123>a)(?P=foo_123', 'aa', SYNTAX_ERROR), 67 ('(?P<foo_123>a)(?P=1)', 'aa', SYNTAX_ERROR), 68 ('(?P<foo_123>a)(?P=!)', 'aa', SYNTAX_ERROR), 69 ('(?P<foo_123>a)(?P=foo_124', 'aa', SYNTAX_ERROR), # Backref to undefined group 70 71 ('(?P<foo_123>a)', 'a', SUCCEED, 'g1', 'a'), 72 ('(?P<foo_123>a)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'), 73 74 # Test octal escapes 75 ('\\1', 'a', SYNTAX_ERROR), # Backreference 76 ('[\\1]', '\1', SUCCEED, 'found', '\1'), # Character 77 ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'), 78 ('\\141', 'a', SUCCEED, 'found', 'a'), 79 ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'), 80 81 # Test \0 is handled everywhere 82 (r'\0', '\0', SUCCEED, 'found', '\0'), 83 (r'[\0a]', '\0', SUCCEED, 'found', '\0'), 84 (r'[a\0]', '\0', SUCCEED, 'found', '\0'), 85 (r'[^a\0]', '\0', FAIL), 86 87 # Test various letter escapes 88 (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'), 89 (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'), 90 # NOTE: not an error under PCRE/PRE: 91 # (r'\u', '', SYNTAX_ERROR), # A Perl escape 92 (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'), 93 (r'\xff', '\377', SUCCEED, 'found', chr(255)), 94 # new \x semantics 95 (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)), 96 (r'\x00f', '\017', FAIL, 'found', chr(15)), 97 (r'\x00fe', '\376', FAIL, 'found', chr(254)), 98 # (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)), 99 # (r'\x00f', '\017', SUCCEED, 'found', chr(15)), 100 # (r'\x00fe', '\376', SUCCEED, 'found', chr(254)), 101 102 (r"^\w+=(\\[\000-\277]|[^\n\\])*", "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", 103 SUCCEED, 'found', "SRC=eval.c g.c blah blah blah \\\\"), 104 105 # Test that . only matches \n in DOTALL mode 106 ('a.b', 'acb', SUCCEED, 'found', 'acb'), 107 ('a.b', 'a\nb', FAIL), 108 ('a.*b', 'acc\nccb', FAIL), 109 ('a.{4,5}b', 'acc\nccb', FAIL), 110 ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'), 111 ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'), 112 ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'), 113 ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'), 114 ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'), 115 116 (')', '', SYNTAX_ERROR), # Unmatched right bracket 117 ('', '', SUCCEED, 'found', ''), # Empty pattern 118 ('abc', 'abc', SUCCEED, 'found', 'abc'), 119 ('abc', 'xbc', FAIL), 120 ('abc', 'axc', FAIL), 121 ('abc', 'abx', FAIL), 122 ('abc', 'xabcy', SUCCEED, 'found', 'abc'), 123 ('abc', 'ababc', SUCCEED, 'found', 'abc'), 124 ('ab*c', 'abc', SUCCEED, 'found', 'abc'), 125 ('ab*bc', 'abc', SUCCEED, 'found', 'abc'), 126 ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'), 127 ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 128 ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'), 129 ('ab+bc', 'abc', FAIL), 130 ('ab+bc', 'abq', FAIL), 131 ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 132 ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'), 133 ('ab?bc', 'abc', SUCCEED, 'found', 'abc'), 134 ('ab?bc', 'abbbbc', FAIL), 135 ('ab?c', 'abc', SUCCEED, 'found', 'abc'), 136 ('^abc$', 'abc', SUCCEED, 'found', 'abc'), 137 ('^abc$', 'abcc', FAIL), 138 ('^abc', 'abcc', SUCCEED, 'found', 'abc'), 139 ('^abc$', 'aabc', FAIL), 140 ('abc$', 'aabc', SUCCEED, 'found', 'abc'), 141 ('^', 'abc', SUCCEED, 'found+"-"', '-'), 142 ('$', 'abc', SUCCEED, 'found+"-"', '-'), 143 ('a.c', 'abc', SUCCEED, 'found', 'abc'), 144 ('a.c', 'axc', SUCCEED, 'found', 'axc'), 145 ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'), 146 ('a.*c', 'axyzd', FAIL), 147 ('a[bc]d', 'abc', FAIL), 148 ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'), 149 ('a[b-d]e', 'abd', FAIL), 150 ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'), 151 ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'), 152 ('a[-b]', 'a-', SUCCEED, 'found', 'a-'), 153 ('a[\\-b]', 'a-', SUCCEED, 'found', 'a-'), 154 # NOTE: not an error under PCRE/PRE: 155 # ('a[b-]', 'a-', SYNTAX_ERROR), 156 ('a[]b', '-', SYNTAX_ERROR), 157 ('a[', '-', SYNTAX_ERROR), 158 ('a\\', '-', SYNTAX_ERROR), 159 ('abc)', '-', SYNTAX_ERROR), 160 ('(abc', '-', SYNTAX_ERROR), 161 ('a]', 'a]', SUCCEED, 'found', 'a]'), 162 ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'), 163 ('a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'), 164 ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'), 165 ('a[^bc]d', 'abd', FAIL), 166 ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'), 167 ('a[^-b]c', 'a-c', FAIL), 168 ('a[^]b]c', 'a]c', FAIL), 169 ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'), 170 ('\\ba\\b', 'a-', SUCCEED, '"-"', '-'), 171 ('\\ba\\b', '-a', SUCCEED, '"-"', '-'), 172 ('\\ba\\b', '-a-', SUCCEED, '"-"', '-'), 173 ('\\by\\b', 'xy', FAIL), 174 ('\\by\\b', 'yz', FAIL), 175 ('\\by\\b', 'xyz', FAIL), 176 ('x\\b', 'xyz', FAIL), 177 ('x\\B', 'xyz', SUCCEED, '"-"', '-'), 178 ('\\Bz', 'xyz', SUCCEED, '"-"', '-'), 179 ('z\\B', 'xyz', FAIL), 180 ('\\Bx', 'xyz', FAIL), 181 ('\\Ba\\B', 'a-', FAIL, '"-"', '-'), 182 ('\\Ba\\B', '-a', FAIL, '"-"', '-'), 183 ('\\Ba\\B', '-a-', FAIL, '"-"', '-'), 184 ('\\By\\B', 'xy', FAIL), 185 ('\\By\\B', 'yz', FAIL), 186 ('\\By\\b', 'xy', SUCCEED, '"-"', '-'), 187 ('\\by\\B', 'yz', SUCCEED, '"-"', '-'), 188 ('\\By\\B', 'xyz', SUCCEED, '"-"', '-'), 189 ('ab|cd', 'abc', SUCCEED, 'found', 'ab'), 190 ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'), 191 ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'), 192 ('$b', 'b', FAIL), 193 ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'), 194 ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'), 195 ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'), 196 ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'), 197 ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'), 198 ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'), 199 ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'), 200 ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 201 ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 202 ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'), 203 (')(', '-', SYNTAX_ERROR), 204 ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'), 205 ('abc', '', FAIL), 206 ('a*', '', SUCCEED, 'found', ''), 207 ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'), 208 ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'), 209 ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'), 210 ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'), 211 ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'), 212 ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'), 213 ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'), 214 ('^(ab|cd)e', 'abcde', FAIL, 'xg1y', 'xy'), 215 ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'), 216 ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'), 217 ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'), 218 ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'), 219 ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), 220 ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), 221 ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'), 222 ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'), 223 ('a[bcd]+dcdcde', 'adcdcde', FAIL), 224 ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'), 225 ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'), 226 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'), 227 ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'), 228 ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), 229 ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'), 230 ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL), 231 ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL), 232 ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), 233 ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'), 234 ('multiple words of text', 'uh-uh', FAIL), 235 ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'), 236 ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'), 237 ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'), 238 ('[k]', 'ab', FAIL), 239 ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'), 240 ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), 241 ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), 242 ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'), 243 ('(a+).\\1$', 'aaaaa', SUCCEED, 'found+"-"+g1', 'aaaaa-aa'), 244 ('^(a+).\\1$', 'aaaa', FAIL), 245 ('(abc)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'), 246 ('([a-c]+)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'), 247 ('(a)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'), 248 ('(a+)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'), 249 ('(a+)+\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'), 250 ('(a).+\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'), 251 ('(a)ba*\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'), 252 ('(aa|a)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'), 253 ('(a|aa)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'), 254 ('(a+)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'), 255 ('([abc]*)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'), 256 ('(a)(b)c|ab', 'ab', SUCCEED, 'found+"-"+g1+"-"+g2', 'ab-None-None'), 257 ('(a)+x', 'aaax', SUCCEED, 'found+"-"+g1', 'aaax-a'), 258 ('([ac])+x', 'aacx', SUCCEED, 'found+"-"+g1', 'aacx-c'), 259 ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'), 260 ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', SUCCEED, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'), 261 ('([^N]*N)+', 'abNNxyzN', SUCCEED, 'found+"-"+g1', 'abNNxyzN-xyzN'), 262 ('([^N]*N)+', 'abNNxyz', SUCCEED, 'found+"-"+g1', 'abNN-N'), 263 ('([abc]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'abcx-abc'), 264 ('([abc]*)x', 'abc', FAIL), 265 ('([xyz]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'x-'), 266 ('(a)+b|aac', 'aac', SUCCEED, 'found+"-"+g1', 'aac-None'), 267 268 # Test symbolic groups 269 270 ('(?P<i d>aaa)a', 'aaaa', SYNTAX_ERROR), 271 ('(?P<id>aaa)a', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aaa'), 272 ('(?P<id>aa)(?P=id)', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aa'), 273 ('(?P<id>aa)(?P=xd)', 'aaaa', SYNTAX_ERROR), 274 275 # Test octal escapes/memory references 276 277 ('\\1', 'a', SYNTAX_ERROR), 278 ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'), 279 ('\\141', 'a', SUCCEED, 'found', 'a'), 280 ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'), 281 282 # All tests from Perl 283 284 ('abc', 'abc', SUCCEED, 'found', 'abc'), 285 ('abc', 'xbc', FAIL), 286 ('abc', 'axc', FAIL), 287 ('abc', 'abx', FAIL), 288 ('abc', 'xabcy', SUCCEED, 'found', 'abc'), 289 ('abc', 'ababc', SUCCEED, 'found', 'abc'), 290 ('ab*c', 'abc', SUCCEED, 'found', 'abc'), 291 ('ab*bc', 'abc', SUCCEED, 'found', 'abc'), 292 ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'), 293 ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 294 ('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 295 ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'), 296 ('ab+bc', 'abc', FAIL), 297 ('ab+bc', 'abq', FAIL), 298 ('ab{1,}bc', 'abq', FAIL), 299 ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 300 ('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 301 ('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 302 ('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 303 ('ab{4,5}bc', 'abbbbc', FAIL), 304 ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'), 305 ('ab?bc', 'abc', SUCCEED, 'found', 'abc'), 306 ('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'), 307 ('ab?bc', 'abbbbc', FAIL), 308 ('ab?c', 'abc', SUCCEED, 'found', 'abc'), 309 ('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'), 310 ('^abc$', 'abc', SUCCEED, 'found', 'abc'), 311 ('^abc$', 'abcc', FAIL), 312 ('^abc', 'abcc', SUCCEED, 'found', 'abc'), 313 ('^abc$', 'aabc', FAIL), 314 ('abc$', 'aabc', SUCCEED, 'found', 'abc'), 315 ('^', 'abc', SUCCEED, 'found', ''), 316 ('$', 'abc', SUCCEED, 'found', ''), 317 ('a.c', 'abc', SUCCEED, 'found', 'abc'), 318 ('a.c', 'axc', SUCCEED, 'found', 'axc'), 319 ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'), 320 ('a.*c', 'axyzd', FAIL), 321 ('a[bc]d', 'abc', FAIL), 322 ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'), 323 ('a[b-d]e', 'abd', FAIL), 324 ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'), 325 ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'), 326 ('a[-b]', 'a-', SUCCEED, 'found', 'a-'), 327 ('a[b-]', 'a-', SUCCEED, 'found', 'a-'), 328 ('a[b-a]', '-', SYNTAX_ERROR), 329 ('a[]b', '-', SYNTAX_ERROR), 330 ('a[', '-', SYNTAX_ERROR), 331 ('a]', 'a]', SUCCEED, 'found', 'a]'), 332 ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'), 333 ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'), 334 ('a[^bc]d', 'abd', FAIL), 335 ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'), 336 ('a[^-b]c', 'a-c', FAIL), 337 ('a[^]b]c', 'a]c', FAIL), 338 ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'), 339 ('ab|cd', 'abc', SUCCEED, 'found', 'ab'), 340 ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'), 341 ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'), 342 ('*a', '-', SYNTAX_ERROR), 343 ('(*)b', '-', SYNTAX_ERROR), 344 ('$b', 'b', FAIL), 345 ('a\\', '-', SYNTAX_ERROR), 346 ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'), 347 ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'), 348 ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'), 349 ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'), 350 ('abc)', '-', SYNTAX_ERROR), 351 ('(abc', '-', SYNTAX_ERROR), 352 ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'), 353 ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'), 354 ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'), 355 ('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'), 356 ('a**', '-', SYNTAX_ERROR), 357 ('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'), 358 ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 359 ('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 360 ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 361 ('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 362 ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'), 363 ('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'), 364 (')(', '-', SYNTAX_ERROR), 365 ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'), 366 ('abc', '', FAIL), 367 ('a*', '', SUCCEED, 'found', ''), 368 ('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'), 369 ('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'), 370 ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'), 371 ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'), 372 ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'), 373 ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'), 374 ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'), 375 ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'), 376 ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'), 377 ('^(ab|cd)e', 'abcde', FAIL), 378 ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'), 379 ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'), 380 ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'), 381 ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'), 382 ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), 383 ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), 384 ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'), 385 ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'), 386 ('a[bcd]+dcdcde', 'adcdcde', FAIL), 387 ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'), 388 ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'), 389 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'), 390 ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'), 391 ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), 392 ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'), 393 ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL), 394 ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL), 395 ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), 396 ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'), 397 ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'), 398 # Python does not have the same rules for \\41 so this is a syntax error 399 # ('((((((((((a))))))))))\\41', 'aa', FAIL), 400 # ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'), 401 ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR), 402 ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR), 403 ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'), 404 ('multiple words of text', 'uh-uh', FAIL), 405 ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'), 406 ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'), 407 ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'), 408 ('[k]', 'ab', FAIL), 409 ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'), 410 ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), 411 ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), 412 ('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'), 413 ('(?i)abc', 'XBC', FAIL), 414 ('(?i)abc', 'AXC', FAIL), 415 ('(?i)abc', 'ABX', FAIL), 416 ('(?i)abc', 'XABCY', SUCCEED, 'found', 'ABC'), 417 ('(?i)abc', 'ABABC', SUCCEED, 'found', 'ABC'), 418 ('(?i)ab*c', 'ABC', SUCCEED, 'found', 'ABC'), 419 ('(?i)ab*bc', 'ABC', SUCCEED, 'found', 'ABC'), 420 ('(?i)ab*bc', 'ABBC', SUCCEED, 'found', 'ABBC'), 421 ('(?i)ab*?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 422 ('(?i)ab{0,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 423 ('(?i)ab+?bc', 'ABBC', SUCCEED, 'found', 'ABBC'), 424 ('(?i)ab+bc', 'ABC', FAIL), 425 ('(?i)ab+bc', 'ABQ', FAIL), 426 ('(?i)ab{1,}bc', 'ABQ', FAIL), 427 ('(?i)ab+bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 428 ('(?i)ab{1,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 429 ('(?i)ab{1,3}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 430 ('(?i)ab{3,4}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 431 ('(?i)ab{4,5}?bc', 'ABBBBC', FAIL), 432 ('(?i)ab??bc', 'ABBC', SUCCEED, 'found', 'ABBC'), 433 ('(?i)ab??bc', 'ABC', SUCCEED, 'found', 'ABC'), 434 ('(?i)ab{0,1}?bc', 'ABC', SUCCEED, 'found', 'ABC'), 435 ('(?i)ab??bc', 'ABBBBC', FAIL), 436 ('(?i)ab??c', 'ABC', SUCCEED, 'found', 'ABC'), 437 ('(?i)ab{0,1}?c', 'ABC', SUCCEED, 'found', 'ABC'), 438 ('(?i)^abc$', 'ABC', SUCCEED, 'found', 'ABC'), 439 ('(?i)^abc$', 'ABCC', FAIL), 440 ('(?i)^abc', 'ABCC', SUCCEED, 'found', 'ABC'), 441 ('(?i)^abc$', 'AABC', FAIL), 442 ('(?i)abc$', 'AABC', SUCCEED, 'found', 'ABC'), 443 ('(?i)^', 'ABC', SUCCEED, 'found', ''), 444 ('(?i)$', 'ABC', SUCCEED, 'found', ''), 445 ('(?i)a.c', 'ABC', SUCCEED, 'found', 'ABC'), 446 ('(?i)a.c', 'AXC', SUCCEED, 'found', 'AXC'), 447 ('(?i)a.*?c', 'AXYZC', SUCCEED, 'found', 'AXYZC'), 448 ('(?i)a.*c', 'AXYZD', FAIL), 449 ('(?i)a[bc]d', 'ABC', FAIL), 450 ('(?i)a[bc]d', 'ABD', SUCCEED, 'found', 'ABD'), 451 ('(?i)a[b-d]e', 'ABD', FAIL), 452 ('(?i)a[b-d]e', 'ACE', SUCCEED, 'found', 'ACE'), 453 ('(?i)a[b-d]', 'AAC', SUCCEED, 'found', 'AC'), 454 ('(?i)a[-b]', 'A-', SUCCEED, 'found', 'A-'), 455 ('(?i)a[b-]', 'A-', SUCCEED, 'found', 'A-'), 456 ('(?i)a[b-a]', '-', SYNTAX_ERROR), 457 ('(?i)a[]b', '-', SYNTAX_ERROR), 458 ('(?i)a[', '-', SYNTAX_ERROR), 459 ('(?i)a]', 'A]', SUCCEED, 'found', 'A]'), 460 ('(?i)a[]]b', 'A]B', SUCCEED, 'found', 'A]B'), 461 ('(?i)a[^bc]d', 'AED', SUCCEED, 'found', 'AED'), 462 ('(?i)a[^bc]d', 'ABD', FAIL), 463 ('(?i)a[^-b]c', 'ADC', SUCCEED, 'found', 'ADC'), 464 ('(?i)a[^-b]c', 'A-C', FAIL), 465 ('(?i)a[^]b]c', 'A]C', FAIL), 466 ('(?i)a[^]b]c', 'ADC', SUCCEED, 'found', 'ADC'), 467 ('(?i)ab|cd', 'ABC', SUCCEED, 'found', 'AB'), 468 ('(?i)ab|cd', 'ABCD', SUCCEED, 'found', 'AB'), 469 ('(?i)()ef', 'DEF', SUCCEED, 'found+"-"+g1', 'EF-'), 470 ('(?i)*a', '-', SYNTAX_ERROR), 471 ('(?i)(*)b', '-', SYNTAX_ERROR), 472 ('(?i)$b', 'B', FAIL), 473 ('(?i)a\\', '-', SYNTAX_ERROR), 474 ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'), 475 ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'), 476 ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'), 477 ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'), 478 ('(?i)abc)', '-', SYNTAX_ERROR), 479 ('(?i)(abc', '-', SYNTAX_ERROR), 480 ('(?i)((a))', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'A-A-A'), 481 ('(?i)(a)b(c)', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABC-A-C'), 482 ('(?i)a+b+c', 'AABBABC', SUCCEED, 'found', 'ABC'), 483 ('(?i)a{1,}b{1,}c', 'AABBABC', SUCCEED, 'found', 'ABC'), 484 ('(?i)a**', '-', SYNTAX_ERROR), 485 ('(?i)a.+?c', 'ABCABC', SUCCEED, 'found', 'ABC'), 486 ('(?i)a.*?c', 'ABCABC', SUCCEED, 'found', 'ABC'), 487 ('(?i)a.{0,5}?c', 'ABCABC', SUCCEED, 'found', 'ABC'), 488 ('(?i)(a+|b)*', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'), 489 ('(?i)(a+|b){0,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'), 490 ('(?i)(a+|b)+', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'), 491 ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'), 492 ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'), 493 ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'), 494 ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'), 495 ('(?i))(', '-', SYNTAX_ERROR), 496 ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'), 497 ('(?i)abc', '', FAIL), 498 ('(?i)a*', '', SUCCEED, 'found', ''), 499 ('(?i)([abc])*d', 'ABBBCD', SUCCEED, 'found+"-"+g1', 'ABBBCD-C'), 500 ('(?i)([abc])*bcd', 'ABCD', SUCCEED, 'found+"-"+g1', 'ABCD-A'), 501 ('(?i)a|b|c|d|e', 'E', SUCCEED, 'found', 'E'), 502 ('(?i)(a|b|c|d|e)f', 'EF', SUCCEED, 'found+"-"+g1', 'EF-E'), 503 ('(?i)abcd*efg', 'ABCDEFG', SUCCEED, 'found', 'ABCDEFG'), 504 ('(?i)ab*', 'XABYABBBZ', SUCCEED, 'found', 'AB'), 505 ('(?i)ab*', 'XAYABBBZ', SUCCEED, 'found', 'A'), 506 ('(?i)(ab|cd)e', 'ABCDE', SUCCEED, 'found+"-"+g1', 'CDE-CD'), 507 ('(?i)[abhgefdc]ij', 'HIJ', SUCCEED, 'found', 'HIJ'), 508 ('(?i)^(ab|cd)e', 'ABCDE', FAIL), 509 ('(?i)(abc|)ef', 'ABCDEF', SUCCEED, 'found+"-"+g1', 'EF-'), 510 ('(?i)(a|b)c*d', 'ABCD', SUCCEED, 'found+"-"+g1', 'BCD-B'), 511 ('(?i)(ab|ab*)bc', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-A'), 512 ('(?i)a([bc]*)c*', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-BC'), 513 ('(?i)a([bc]*)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'), 514 ('(?i)a([bc]+)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'), 515 ('(?i)a([bc]*)(c+d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD'), 516 ('(?i)a[bcd]*dcdcde', 'ADCDCDE', SUCCEED, 'found', 'ADCDCDE'), 517 ('(?i)a[bcd]+dcdcde', 'ADCDCDE', FAIL), 518 ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'), 519 ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'), 520 ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'), 521 ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'), 522 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'), 523 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'), 524 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL), 525 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL), 526 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'), 527 ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'), 528 ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'), 529 #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL), 530 #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'), 531 ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'), 532 ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'), 533 ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'), 534 ('(?i)multiple words of text', 'UH-UH', FAIL), 535 ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'), 536 ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'), 537 ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'), 538 ('(?i)[k]', 'AB', FAIL), 539 # ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'), 540 # ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'), 541 ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'), 542 ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'), 543 ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'), 544 ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'), 545 ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'), 546 ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'), 547 ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'), 548 ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'), 549 ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'), 550 ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'), 551 ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'), 552 553 # lookbehind: split by : but not if it is escaped by -. 554 ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', SUCCEED, 'g1', 'bc-:de' ), 555 # escaping with \ as we know it 556 ('(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ), 557 # terminating with ' and escaping with ? as in edifact 558 ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", SUCCEED, 'g1', "bc?'de" ), 559 560 # Comments using the (?#...) syntax 561 562 ('w(?# comment', 'w', SYNTAX_ERROR), 563 ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'), 564 565 # Check odd placement of embedded pattern modifiers 566 567 # not an error under PCRE/PRE: 568 ('w(?i)', 'W', SUCCEED, 'found', 'W'), 569 # ('w(?i)', 'W', SYNTAX_ERROR), 570 571 # Comments using the x embedded pattern modifier 572 573 ("""(?x)w# comment 1 574 x y 575 # comment 2 576 z""", 'wxyz', SUCCEED, 'found', 'wxyz'), 577 578 # using the m embedded pattern modifier 579 580 ('^abc', """jkl 581 abc 582 xyz""", FAIL), 583 ('(?m)^abc', """jkl 584 abc 585 xyz""", SUCCEED, 'found', 'abc'), 586 587 ('(?m)abc$', """jkl 588 xyzabc 589 123""", SUCCEED, 'found', 'abc'), 590 591 # using the s embedded pattern modifier 592 593 ('a.b', 'a\nb', FAIL), 594 ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'), 595 596 # test \w, etc. both inside and outside character classes 597 598 ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'), 599 ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'), 600 ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'), 601 ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'), 602 ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'), 603 # not an error under PCRE/PRE: 604 # ('[\\d-x]', '-', SYNTAX_ERROR), 605 (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '), 606 (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '), 607 608 (r'\xff', '\377', SUCCEED, 'found', chr(255)), 609 # new \x semantics 610 (r'\x00ff', '\377', FAIL), 611 # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)), 612 (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'), 613 ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'), 614 (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)), 615 (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'), 616 617 # 618 # post-1.5.2 additions 619 620 # xmllib problem 621 (r'(([a-z]+):)?([a-z]+)$', 'smil', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-smil'), 622 # bug 110866: reference to undefined group 623 (r'((.)\1+)', '', SYNTAX_ERROR), 624 # bug 111869: search (PRE/PCRE fails on this one, SRE doesn't) 625 (r'.*d', 'abc\nabd', SUCCEED, 'found', 'abd'), 626 # bug 112468: various expected syntax errors 627 (r'(', '', SYNTAX_ERROR), 628 (r'[\41]', '!', SUCCEED, 'found', '!'), 629 # bug 114033: nothing to repeat 630 (r'(x?)?', 'x', SUCCEED, 'found', 'x'), 631 # bug 115040: rescan if flags are modified inside pattern 632 (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'), 633 # bug 115618: negative lookahead 634 (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'), 635 # bug 116251: character class bug 636 (r'[\w-]+', 'laser_beam', SUCCEED, 'found', 'laser_beam'), 637 # bug 123769+127259: non-greedy backtracking bug 638 (r'.*?\S *:', 'xx:', SUCCEED, 'found', 'xx:'), 639 (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'), 640 (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'), 641 # bug 127259: \Z shouldn't depend on multiline mode 642 (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''), 643 # bug 128899: uppercase literals under the ignorecase flag 644 (r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'), 645 (r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'), 646 (r'(?i)[M]+', 'MMM', SUCCEED, 'found', 'MMM'), 647 (r'(?i)[m]+', 'MMM', SUCCEED, 'found', 'MMM'), 648 # bug 130748: ^* should be an error (nothing to repeat) 649 (r'^*', '', SYNTAX_ERROR), 650 # bug 133283: minimizing repeat problem 651 (r'"(?:\\"|[^"])*?"', r'"\""', SUCCEED, 'found', r'"\""'), 652 # bug 477728: minimizing repeat problem 653 (r'^.*?$', 'one\ntwo\nthree\n', FAIL), 654 # bug 483789: minimizing repeat problem 655 (r'a[^>]*?b', 'a>b', FAIL), 656 # bug 490573: minimizing repeat problem 657 (r'^a*?$', 'foo', FAIL), 658 # bug 470582: nested groups problem 659 (r'^((a)c)?(ab)$', 'ab', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-ab'), 660 # another minimizing repeat problem (capturing groups in assertions) 661 ('^([ab]*?)(?=(b)?)c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'), 662 ('^([ab]*?)(?!(b))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'), 663 ('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'), 664 ] 665 666 # try: 667 # u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'") 668 # except (SyntaxError, ValueError): 669 # pass 670 # else: 671 # tests.extend([ 672 # # bug 410271: \b broken under locales 673 # (r'\b.\b', 'a', SUCCEED, 'found', 'a'), 674 # (r'(?u)\b.\b', u, SUCCEED, 'found', u), 675 # (r'(?u)\w', u, SUCCEED, 'found', u), 676 # ]) 677 678 for test in tests: 679 pattern, s, status = test[:3] 680 if status == SUCCEED: 681 assert re.search(pattern, s) 682 elif status == FAIL: 683 assert not re.search(pattern, s) 684 elif status == SYNTAX_ERROR: 685 try: 686 re.search(pattern, s) 687 assert AssertionError 688 except Exception: # pylint: disable=broad-except 689 pass