github.com/grumpyhome/grumpy@v0.3.1-0.20201208125205-7b775405bdf1/grumpy-runtime-src/third_party/stdlib/re_tests.py (about)

     1  #!/usr/bin/env python
     2  # -*- mode: python -*-
     3  
     4  import re
     5  
     6  # Re test suite and benchmark suite v1.5
     7  
     8  # The 3 possible outcomes for each pattern
     9  [SUCCEED, FAIL, SYNTAX_ERROR] = range(3)
    10  
    11  # Benchmark suite (needs expansion)
    12  #
    13  # The benchmark suite does not test correctness, just speed.  The
    14  # first element of each tuple is the regex pattern; the second is a
    15  # string to match it against.  The benchmarking code will embed the
    16  # second string inside several sizes of padding, to test how regex
    17  # matching performs on large strings.
    18  
    19  # benchmarks = [
    20  
    21  #     # test common prefix
    22  #     ('Python|Perl', 'Perl'),    # Alternation
    23  #     ('(Python|Perl)', 'Perl'),  # Grouped alternation
    24  
    25  #     ('Python|Perl|Tcl', 'Perl'),        # Alternation
    26  #     ('(Python|Perl|Tcl)', 'Perl'),      # Grouped alternation
    27  
    28  #     ('(Python)\\1', 'PythonPython'),    # Backreference
    29  #     ('([0a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization
    30  #     ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # A few sets
    31  
    32  #     ('Python', 'Python'),               # Simple text literal
    33  #     ('.*Python', 'Python'),             # Bad text literal
    34  #     ('.*Python.*', 'Python'),           # Worse text literal
    35  #     ('.*(Python)', 'Python'),           # Bad text literal with grouping
    36  
    37  # ]
    38  
    39  # Test suite (for verifying correctness)
    40  #
    41  # The test suite is a list of 5- or 3-tuples.  The 5 parts of a
    42  # complete tuple are:
    43  # element 0: a string containing the pattern
    44  #         1: the string to match against the pattern
    45  #         2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR)
    46  #         3: a string that will be eval()'ed to produce a test string.
    47  #            This is an arbitrary Python expression; the available
    48  #            variables are "found" (the whole match), and "g1", "g2", ...
    49  #            up to "g99" contain the contents of each group, or the
    50  #            string 'None' if the group wasn't given a value, or the
    51  #            string 'Error' if the group index was out of range;
    52  #            also "groups", the return value of m.group() (a tuple).
    53  #         4: The expected result of evaluating the expression.
    54  #            If the two don't match, an error is reported.
    55  #
    56  # If the regex isn't expected to work, the latter two elements can be omitted.
    57  
    58  tests = [
    59      # Test ?P< and ?P= extensions
    60      ('(?P<foo_123', '', SYNTAX_ERROR),      # Unterminated group identifier
    61      ('(?P<1>a)', '', SYNTAX_ERROR),         # Begins with a digit
    62      ('(?P<!>a)', '', SYNTAX_ERROR),         # Begins with an illegal char
    63      ('(?P<foo!>a)', '', SYNTAX_ERROR),      # Begins with an illegal char
    64  
    65      # Same tests, for the ?P= form
    66      ('(?P<foo_123>a)(?P=foo_123', 'aa', SYNTAX_ERROR),
    67      ('(?P<foo_123>a)(?P=1)', 'aa', SYNTAX_ERROR),
    68      ('(?P<foo_123>a)(?P=!)', 'aa', SYNTAX_ERROR),
    69      ('(?P<foo_123>a)(?P=foo_124', 'aa', SYNTAX_ERROR),  # Backref to undefined group
    70  
    71      ('(?P<foo_123>a)', 'a', SUCCEED, 'g1', 'a'),
    72      ('(?P<foo_123>a)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'),
    73  
    74      # Test octal escapes
    75      ('\\1', 'a', SYNTAX_ERROR),    # Backreference
    76      ('[\\1]', '\1', SUCCEED, 'found', '\1'),  # Character
    77      ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
    78      ('\\141', 'a', SUCCEED, 'found', 'a'),
    79      ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
    80  
    81      # Test \0 is handled everywhere
    82      (r'\0', '\0', SUCCEED, 'found', '\0'),
    83      (r'[\0a]', '\0', SUCCEED, 'found', '\0'),
    84      (r'[a\0]', '\0', SUCCEED, 'found', '\0'),
    85      (r'[^a\0]', '\0', FAIL),
    86  
    87      # Test various letter escapes
    88      (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
    89      (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
    90      # NOTE: not an error under PCRE/PRE:
    91      # (r'\u', '', SYNTAX_ERROR),    # A Perl escape
    92      (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
    93      (r'\xff', '\377', SUCCEED, 'found', chr(255)),
    94      # new \x semantics
    95      (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
    96      (r'\x00f', '\017', FAIL, 'found', chr(15)),
    97      (r'\x00fe', '\376', FAIL, 'found', chr(254)),
    98      # (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)),
    99      # (r'\x00f', '\017', SUCCEED, 'found', chr(15)),
   100      # (r'\x00fe', '\376', SUCCEED, 'found', chr(254)),
   101  
   102      (r"^\w+=(\\[\000-\277]|[^\n\\])*", "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c",
   103       SUCCEED, 'found', "SRC=eval.c g.c blah blah blah \\\\"),
   104  
   105      # Test that . only matches \n in DOTALL mode
   106      ('a.b', 'acb', SUCCEED, 'found', 'acb'),
   107      ('a.b', 'a\nb', FAIL),
   108      ('a.*b', 'acc\nccb', FAIL),
   109      ('a.{4,5}b', 'acc\nccb', FAIL),
   110      ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
   111      ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
   112      ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
   113      ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
   114      ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
   115  
   116      (')', '', SYNTAX_ERROR),           # Unmatched right bracket
   117      ('', '', SUCCEED, 'found', ''),    # Empty pattern
   118      ('abc', 'abc', SUCCEED, 'found', 'abc'),
   119      ('abc', 'xbc', FAIL),
   120      ('abc', 'axc', FAIL),
   121      ('abc', 'abx', FAIL),
   122      ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
   123      ('abc', 'ababc', SUCCEED, 'found', 'abc'),
   124      ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
   125      ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
   126      ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
   127      ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
   128      ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
   129      ('ab+bc', 'abc', FAIL),
   130      ('ab+bc', 'abq', FAIL),
   131      ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
   132      ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
   133      ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
   134      ('ab?bc', 'abbbbc', FAIL),
   135      ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
   136      ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
   137      ('^abc$', 'abcc', FAIL),
   138      ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
   139      ('^abc$', 'aabc', FAIL),
   140      ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
   141      ('^', 'abc', SUCCEED, 'found+"-"', '-'),
   142      ('$', 'abc', SUCCEED, 'found+"-"', '-'),
   143      ('a.c', 'abc', SUCCEED, 'found', 'abc'),
   144      ('a.c', 'axc', SUCCEED, 'found', 'axc'),
   145      ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
   146      ('a.*c', 'axyzd', FAIL),
   147      ('a[bc]d', 'abc', FAIL),
   148      ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
   149      ('a[b-d]e', 'abd', FAIL),
   150      ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
   151      ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
   152      ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
   153      ('a[\\-b]', 'a-', SUCCEED, 'found', 'a-'),
   154      # NOTE: not an error under PCRE/PRE:
   155      # ('a[b-]', 'a-', SYNTAX_ERROR),
   156      ('a[]b', '-', SYNTAX_ERROR),
   157      ('a[', '-', SYNTAX_ERROR),
   158      ('a\\', '-', SYNTAX_ERROR),
   159      ('abc)', '-', SYNTAX_ERROR),
   160      ('(abc', '-', SYNTAX_ERROR),
   161      ('a]', 'a]', SUCCEED, 'found', 'a]'),
   162      ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
   163      ('a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'),
   164      ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
   165      ('a[^bc]d', 'abd', FAIL),
   166      ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
   167      ('a[^-b]c', 'a-c', FAIL),
   168      ('a[^]b]c', 'a]c', FAIL),
   169      ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
   170      ('\\ba\\b', 'a-', SUCCEED, '"-"', '-'),
   171      ('\\ba\\b', '-a', SUCCEED, '"-"', '-'),
   172      ('\\ba\\b', '-a-', SUCCEED, '"-"', '-'),
   173      ('\\by\\b', 'xy', FAIL),
   174      ('\\by\\b', 'yz', FAIL),
   175      ('\\by\\b', 'xyz', FAIL),
   176      ('x\\b', 'xyz', FAIL),
   177      ('x\\B', 'xyz', SUCCEED, '"-"', '-'),
   178      ('\\Bz', 'xyz', SUCCEED, '"-"', '-'),
   179      ('z\\B', 'xyz', FAIL),
   180      ('\\Bx', 'xyz', FAIL),
   181      ('\\Ba\\B', 'a-', FAIL, '"-"', '-'),
   182      ('\\Ba\\B', '-a', FAIL, '"-"', '-'),
   183      ('\\Ba\\B', '-a-', FAIL, '"-"', '-'),
   184      ('\\By\\B', 'xy', FAIL),
   185      ('\\By\\B', 'yz', FAIL),
   186      ('\\By\\b', 'xy', SUCCEED, '"-"', '-'),
   187      ('\\by\\B', 'yz', SUCCEED, '"-"', '-'),
   188      ('\\By\\B', 'xyz', SUCCEED, '"-"', '-'),
   189      ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
   190      ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
   191      ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
   192      ('$b', 'b', FAIL),
   193      ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
   194      ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
   195      ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
   196      ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
   197      ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
   198      ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
   199      ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
   200      ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
   201      ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
   202      ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
   203      (')(', '-', SYNTAX_ERROR),
   204      ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
   205      ('abc', '', FAIL),
   206      ('a*', '', SUCCEED, 'found', ''),
   207      ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
   208      ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
   209      ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
   210      ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
   211      ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
   212      ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
   213      ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
   214      ('^(ab|cd)e', 'abcde', FAIL, 'xg1y', 'xy'),
   215      ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
   216      ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
   217      ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
   218      ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
   219      ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
   220      ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
   221      ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
   222      ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
   223      ('a[bcd]+dcdcde', 'adcdcde', FAIL),
   224      ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
   225      ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
   226      ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
   227      ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
   228      ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
   229      ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
   230      ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
   231      ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
   232      ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
   233      ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
   234      ('multiple words of text', 'uh-uh', FAIL),
   235      ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
   236      ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
   237      ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
   238      ('[k]', 'ab', FAIL),
   239      ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
   240      ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
   241      ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
   242      ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
   243      ('(a+).\\1$', 'aaaaa', SUCCEED, 'found+"-"+g1', 'aaaaa-aa'),
   244      ('^(a+).\\1$', 'aaaa', FAIL),
   245      ('(abc)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
   246      ('([a-c]+)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
   247      ('(a)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
   248      ('(a+)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
   249      ('(a+)+\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
   250      ('(a).+\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
   251      ('(a)ba*\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
   252      ('(aa|a)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
   253      ('(a|aa)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
   254      ('(a+)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
   255      ('([abc]*)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
   256      ('(a)(b)c|ab', 'ab', SUCCEED, 'found+"-"+g1+"-"+g2', 'ab-None-None'),
   257      ('(a)+x', 'aaax', SUCCEED, 'found+"-"+g1', 'aaax-a'),
   258      ('([ac])+x', 'aacx', SUCCEED, 'found+"-"+g1', 'aacx-c'),
   259      ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'),
   260      ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', SUCCEED, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'),
   261      ('([^N]*N)+', 'abNNxyzN', SUCCEED, 'found+"-"+g1', 'abNNxyzN-xyzN'),
   262      ('([^N]*N)+', 'abNNxyz', SUCCEED, 'found+"-"+g1', 'abNN-N'),
   263      ('([abc]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'abcx-abc'),
   264      ('([abc]*)x', 'abc', FAIL),
   265      ('([xyz]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'x-'),
   266      ('(a)+b|aac', 'aac', SUCCEED, 'found+"-"+g1', 'aac-None'),
   267  
   268      # Test symbolic groups
   269  
   270      ('(?P<i d>aaa)a', 'aaaa', SYNTAX_ERROR),
   271      ('(?P<id>aaa)a', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aaa'),
   272      ('(?P<id>aa)(?P=id)', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aa'),
   273      ('(?P<id>aa)(?P=xd)', 'aaaa', SYNTAX_ERROR),
   274  
   275      # Test octal escapes/memory references
   276  
   277      ('\\1', 'a', SYNTAX_ERROR),
   278      ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
   279      ('\\141', 'a', SUCCEED, 'found', 'a'),
   280      ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
   281  
   282      # All tests from Perl
   283  
   284      ('abc', 'abc', SUCCEED, 'found', 'abc'),
   285      ('abc', 'xbc', FAIL),
   286      ('abc', 'axc', FAIL),
   287      ('abc', 'abx', FAIL),
   288      ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
   289      ('abc', 'ababc', SUCCEED, 'found', 'abc'),
   290      ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
   291      ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
   292      ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
   293      ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
   294      ('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
   295      ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
   296      ('ab+bc', 'abc', FAIL),
   297      ('ab+bc', 'abq', FAIL),
   298      ('ab{1,}bc', 'abq', FAIL),
   299      ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
   300      ('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
   301      ('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
   302      ('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
   303      ('ab{4,5}bc', 'abbbbc', FAIL),
   304      ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
   305      ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
   306      ('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'),
   307      ('ab?bc', 'abbbbc', FAIL),
   308      ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
   309      ('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'),
   310      ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
   311      ('^abc$', 'abcc', FAIL),
   312      ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
   313      ('^abc$', 'aabc', FAIL),
   314      ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
   315      ('^', 'abc', SUCCEED, 'found', ''),
   316      ('$', 'abc', SUCCEED, 'found', ''),
   317      ('a.c', 'abc', SUCCEED, 'found', 'abc'),
   318      ('a.c', 'axc', SUCCEED, 'found', 'axc'),
   319      ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
   320      ('a.*c', 'axyzd', FAIL),
   321      ('a[bc]d', 'abc', FAIL),
   322      ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
   323      ('a[b-d]e', 'abd', FAIL),
   324      ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
   325      ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
   326      ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
   327      ('a[b-]', 'a-', SUCCEED, 'found', 'a-'),
   328      ('a[b-a]', '-', SYNTAX_ERROR),
   329      ('a[]b', '-', SYNTAX_ERROR),
   330      ('a[', '-', SYNTAX_ERROR),
   331      ('a]', 'a]', SUCCEED, 'found', 'a]'),
   332      ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
   333      ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
   334      ('a[^bc]d', 'abd', FAIL),
   335      ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
   336      ('a[^-b]c', 'a-c', FAIL),
   337      ('a[^]b]c', 'a]c', FAIL),
   338      ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
   339      ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
   340      ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
   341      ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
   342      ('*a', '-', SYNTAX_ERROR),
   343      ('(*)b', '-', SYNTAX_ERROR),
   344      ('$b', 'b', FAIL),
   345      ('a\\', '-', SYNTAX_ERROR),
   346      ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
   347      ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
   348      ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
   349      ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
   350      ('abc)', '-', SYNTAX_ERROR),
   351      ('(abc', '-', SYNTAX_ERROR),
   352      ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
   353      ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
   354      ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
   355      ('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'),
   356      ('a**', '-', SYNTAX_ERROR),
   357      ('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'),
   358      ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
   359      ('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
   360      ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
   361      ('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
   362      ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
   363      ('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
   364      (')(', '-', SYNTAX_ERROR),
   365      ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
   366      ('abc', '', FAIL),
   367      ('a*', '', SUCCEED, 'found', ''),
   368      ('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'),
   369      ('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'),
   370      ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
   371      ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
   372      ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
   373      ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
   374      ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
   375      ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
   376      ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
   377      ('^(ab|cd)e', 'abcde', FAIL),
   378      ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
   379      ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
   380      ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
   381      ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
   382      ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
   383      ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
   384      ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
   385      ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
   386      ('a[bcd]+dcdcde', 'adcdcde', FAIL),
   387      ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
   388      ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
   389      ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
   390      ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
   391      ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
   392      ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
   393      ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
   394      ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
   395      ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
   396      ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
   397      ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
   398  # Python does not have the same rules for \\41 so this is a syntax error
   399  #    ('((((((((((a))))))))))\\41', 'aa', FAIL),
   400  #    ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
   401      ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
   402      ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
   403      ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
   404      ('multiple words of text', 'uh-uh', FAIL),
   405      ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
   406      ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
   407      ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
   408      ('[k]', 'ab', FAIL),
   409      ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
   410      ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
   411      ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
   412      ('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'),
   413      ('(?i)abc', 'XBC', FAIL),
   414      ('(?i)abc', 'AXC', FAIL),
   415      ('(?i)abc', 'ABX', FAIL),
   416      ('(?i)abc', 'XABCY', SUCCEED, 'found', 'ABC'),
   417      ('(?i)abc', 'ABABC', SUCCEED, 'found', 'ABC'),
   418      ('(?i)ab*c', 'ABC', SUCCEED, 'found', 'ABC'),
   419      ('(?i)ab*bc', 'ABC', SUCCEED, 'found', 'ABC'),
   420      ('(?i)ab*bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
   421      ('(?i)ab*?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
   422      ('(?i)ab{0,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
   423      ('(?i)ab+?bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
   424      ('(?i)ab+bc', 'ABC', FAIL),
   425      ('(?i)ab+bc', 'ABQ', FAIL),
   426      ('(?i)ab{1,}bc', 'ABQ', FAIL),
   427      ('(?i)ab+bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
   428      ('(?i)ab{1,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
   429      ('(?i)ab{1,3}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
   430      ('(?i)ab{3,4}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
   431      ('(?i)ab{4,5}?bc', 'ABBBBC', FAIL),
   432      ('(?i)ab??bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
   433      ('(?i)ab??bc', 'ABC', SUCCEED, 'found', 'ABC'),
   434      ('(?i)ab{0,1}?bc', 'ABC', SUCCEED, 'found', 'ABC'),
   435      ('(?i)ab??bc', 'ABBBBC', FAIL),
   436      ('(?i)ab??c', 'ABC', SUCCEED, 'found', 'ABC'),
   437      ('(?i)ab{0,1}?c', 'ABC', SUCCEED, 'found', 'ABC'),
   438      ('(?i)^abc$', 'ABC', SUCCEED, 'found', 'ABC'),
   439      ('(?i)^abc$', 'ABCC', FAIL),
   440      ('(?i)^abc', 'ABCC', SUCCEED, 'found', 'ABC'),
   441      ('(?i)^abc$', 'AABC', FAIL),
   442      ('(?i)abc$', 'AABC', SUCCEED, 'found', 'ABC'),
   443      ('(?i)^', 'ABC', SUCCEED, 'found', ''),
   444      ('(?i)$', 'ABC', SUCCEED, 'found', ''),
   445      ('(?i)a.c', 'ABC', SUCCEED, 'found', 'ABC'),
   446      ('(?i)a.c', 'AXC', SUCCEED, 'found', 'AXC'),
   447      ('(?i)a.*?c', 'AXYZC', SUCCEED, 'found', 'AXYZC'),
   448      ('(?i)a.*c', 'AXYZD', FAIL),
   449      ('(?i)a[bc]d', 'ABC', FAIL),
   450      ('(?i)a[bc]d', 'ABD', SUCCEED, 'found', 'ABD'),
   451      ('(?i)a[b-d]e', 'ABD', FAIL),
   452      ('(?i)a[b-d]e', 'ACE', SUCCEED, 'found', 'ACE'),
   453      ('(?i)a[b-d]', 'AAC', SUCCEED, 'found', 'AC'),
   454      ('(?i)a[-b]', 'A-', SUCCEED, 'found', 'A-'),
   455      ('(?i)a[b-]', 'A-', SUCCEED, 'found', 'A-'),
   456      ('(?i)a[b-a]', '-', SYNTAX_ERROR),
   457      ('(?i)a[]b', '-', SYNTAX_ERROR),
   458      ('(?i)a[', '-', SYNTAX_ERROR),
   459      ('(?i)a]', 'A]', SUCCEED, 'found', 'A]'),
   460      ('(?i)a[]]b', 'A]B', SUCCEED, 'found', 'A]B'),
   461      ('(?i)a[^bc]d', 'AED', SUCCEED, 'found', 'AED'),
   462      ('(?i)a[^bc]d', 'ABD', FAIL),
   463      ('(?i)a[^-b]c', 'ADC', SUCCEED, 'found', 'ADC'),
   464      ('(?i)a[^-b]c', 'A-C', FAIL),
   465      ('(?i)a[^]b]c', 'A]C', FAIL),
   466      ('(?i)a[^]b]c', 'ADC', SUCCEED, 'found', 'ADC'),
   467      ('(?i)ab|cd', 'ABC', SUCCEED, 'found', 'AB'),
   468      ('(?i)ab|cd', 'ABCD', SUCCEED, 'found', 'AB'),
   469      ('(?i)()ef', 'DEF', SUCCEED, 'found+"-"+g1', 'EF-'),
   470      ('(?i)*a', '-', SYNTAX_ERROR),
   471      ('(?i)(*)b', '-', SYNTAX_ERROR),
   472      ('(?i)$b', 'B', FAIL),
   473      ('(?i)a\\', '-', SYNTAX_ERROR),
   474      ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'),
   475      ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'),
   476      ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'),
   477      ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'),
   478      ('(?i)abc)', '-', SYNTAX_ERROR),
   479      ('(?i)(abc', '-', SYNTAX_ERROR),
   480      ('(?i)((a))', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'A-A-A'),
   481      ('(?i)(a)b(c)', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABC-A-C'),
   482      ('(?i)a+b+c', 'AABBABC', SUCCEED, 'found', 'ABC'),
   483      ('(?i)a{1,}b{1,}c', 'AABBABC', SUCCEED, 'found', 'ABC'),
   484      ('(?i)a**', '-', SYNTAX_ERROR),
   485      ('(?i)a.+?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
   486      ('(?i)a.*?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
   487      ('(?i)a.{0,5}?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
   488      ('(?i)(a+|b)*', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
   489      ('(?i)(a+|b){0,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
   490      ('(?i)(a+|b)+', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
   491      ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
   492      ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
   493      ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
   494      ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'),
   495      ('(?i))(', '-', SYNTAX_ERROR),
   496      ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'),
   497      ('(?i)abc', '', FAIL),
   498      ('(?i)a*', '', SUCCEED, 'found', ''),
   499      ('(?i)([abc])*d', 'ABBBCD', SUCCEED, 'found+"-"+g1', 'ABBBCD-C'),
   500      ('(?i)([abc])*bcd', 'ABCD', SUCCEED, 'found+"-"+g1', 'ABCD-A'),
   501      ('(?i)a|b|c|d|e', 'E', SUCCEED, 'found', 'E'),
   502      ('(?i)(a|b|c|d|e)f', 'EF', SUCCEED, 'found+"-"+g1', 'EF-E'),
   503      ('(?i)abcd*efg', 'ABCDEFG', SUCCEED, 'found', 'ABCDEFG'),
   504      ('(?i)ab*', 'XABYABBBZ', SUCCEED, 'found', 'AB'),
   505      ('(?i)ab*', 'XAYABBBZ', SUCCEED, 'found', 'A'),
   506      ('(?i)(ab|cd)e', 'ABCDE', SUCCEED, 'found+"-"+g1', 'CDE-CD'),
   507      ('(?i)[abhgefdc]ij', 'HIJ', SUCCEED, 'found', 'HIJ'),
   508      ('(?i)^(ab|cd)e', 'ABCDE', FAIL),
   509      ('(?i)(abc|)ef', 'ABCDEF', SUCCEED, 'found+"-"+g1', 'EF-'),
   510      ('(?i)(a|b)c*d', 'ABCD', SUCCEED, 'found+"-"+g1', 'BCD-B'),
   511      ('(?i)(ab|ab*)bc', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-A'),
   512      ('(?i)a([bc]*)c*', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-BC'),
   513      ('(?i)a([bc]*)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
   514      ('(?i)a([bc]+)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
   515      ('(?i)a([bc]*)(c+d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD'),
   516      ('(?i)a[bcd]*dcdcde', 'ADCDCDE', SUCCEED, 'found', 'ADCDCDE'),
   517      ('(?i)a[bcd]+dcdcde', 'ADCDCDE', FAIL),
   518      ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'),
   519      ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'),
   520      ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'),
   521      ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'),
   522      ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
   523      ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'),
   524      ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL),
   525      ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL),
   526      ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
   527      ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'),
   528      ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
   529      #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
   530      #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
   531      ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
   532      ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
   533      ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
   534      ('(?i)multiple words of text', 'UH-UH', FAIL),
   535      ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'),
   536      ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'),
   537      ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'),
   538      ('(?i)[k]', 'AB', FAIL),
   539  #    ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'),
   540  #    ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'),
   541      ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'),
   542      ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
   543      ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
   544      ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
   545      ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
   546      ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
   547      ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'),
   548      ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
   549      ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
   550      ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
   551      ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
   552  
   553      # lookbehind: split by : but not if it is escaped by -.
   554      ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', SUCCEED, 'g1', 'bc-:de' ),
   555      # escaping with \ as we know it
   556      ('(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ),
   557      # terminating with ' and escaping with ? as in edifact
   558      ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", SUCCEED, 'g1', "bc?'de" ),
   559  
   560      # Comments using the (?#...) syntax
   561  
   562      ('w(?# comment', 'w', SYNTAX_ERROR),
   563      ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'),
   564  
   565      # Check odd placement of embedded pattern modifiers
   566  
   567      # not an error under PCRE/PRE:
   568      ('w(?i)', 'W', SUCCEED, 'found', 'W'),
   569      # ('w(?i)', 'W', SYNTAX_ERROR),
   570  
   571      # Comments using the x embedded pattern modifier
   572  
   573      ("""(?x)w# comment 1
   574          x y
   575          # comment 2
   576          z""", 'wxyz', SUCCEED, 'found', 'wxyz'),
   577  
   578      # using the m embedded pattern modifier
   579  
   580      ('^abc', """jkl
   581  abc
   582  xyz""", FAIL),
   583      ('(?m)^abc', """jkl
   584  abc
   585  xyz""", SUCCEED, 'found', 'abc'),
   586  
   587      ('(?m)abc$', """jkl
   588  xyzabc
   589  123""", SUCCEED, 'found', 'abc'),
   590  
   591      # using the s embedded pattern modifier
   592  
   593      ('a.b', 'a\nb', FAIL),
   594      ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
   595  
   596      # test \w, etc. both inside and outside character classes
   597  
   598      ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
   599      ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
   600      ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
   601      ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
   602      ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
   603      # not an error under PCRE/PRE:
   604      # ('[\\d-x]', '-', SYNTAX_ERROR),
   605      (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
   606      (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
   607  
   608      (r'\xff', '\377', SUCCEED, 'found', chr(255)),
   609      # new \x semantics
   610      (r'\x00ff', '\377', FAIL),
   611      # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
   612      (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
   613      ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
   614      (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
   615      (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
   616  
   617      #
   618      # post-1.5.2 additions
   619  
   620      # xmllib problem
   621      (r'(([a-z]+):)?([a-z]+)$', 'smil', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-smil'),
   622      # bug 110866: reference to undefined group
   623      (r'((.)\1+)', '', SYNTAX_ERROR),
   624      # bug 111869: search (PRE/PCRE fails on this one, SRE doesn't)
   625      (r'.*d', 'abc\nabd', SUCCEED, 'found', 'abd'),
   626      # bug 112468: various expected syntax errors
   627      (r'(', '', SYNTAX_ERROR),
   628      (r'[\41]', '!', SUCCEED, 'found', '!'),
   629      # bug 114033: nothing to repeat
   630      (r'(x?)?', 'x', SUCCEED, 'found', 'x'),
   631      # bug 115040: rescan if flags are modified inside pattern
   632      (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'),
   633      # bug 115618: negative lookahead
   634      (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
   635      # bug 116251: character class bug
   636      (r'[\w-]+', 'laser_beam', SUCCEED, 'found', 'laser_beam'),
   637      # bug 123769+127259: non-greedy backtracking bug
   638      (r'.*?\S *:', 'xx:', SUCCEED, 'found', 'xx:'),
   639      (r'a[ ]*?\ (\d+).*', 'a   10', SUCCEED, 'found', 'a   10'),
   640      (r'a[ ]*?\ (\d+).*', 'a    10', SUCCEED, 'found', 'a    10'),
   641      # bug 127259: \Z shouldn't depend on multiline mode
   642      (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''),
   643      # bug 128899: uppercase literals under the ignorecase flag
   644      (r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'),
   645      (r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'),
   646      (r'(?i)[M]+', 'MMM', SUCCEED, 'found', 'MMM'),
   647      (r'(?i)[m]+', 'MMM', SUCCEED, 'found', 'MMM'),
   648      # bug 130748: ^* should be an error (nothing to repeat)
   649      (r'^*', '', SYNTAX_ERROR),
   650      # bug 133283: minimizing repeat problem
   651      (r'"(?:\\"|[^"])*?"', r'"\""', SUCCEED, 'found', r'"\""'),
   652      # bug 477728: minimizing repeat problem
   653      (r'^.*?$', 'one\ntwo\nthree\n', FAIL),
   654      # bug 483789: minimizing repeat problem
   655      (r'a[^>]*?b', 'a>b', FAIL),
   656      # bug 490573: minimizing repeat problem
   657      (r'^a*?$', 'foo', FAIL),
   658      # bug 470582: nested groups problem
   659      (r'^((a)c)?(ab)$', 'ab', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-ab'),
   660      # another minimizing repeat problem (capturing groups in assertions)
   661      ('^([ab]*?)(?=(b)?)c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
   662      ('^([ab]*?)(?!(b))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
   663      ('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
   664  ]
   665  
   666  # try:
   667  #     u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
   668  # except (SyntaxError, ValueError):
   669  #     pass
   670  # else:
   671  #     tests.extend([
   672  #     # bug 410271: \b broken under locales
   673  #     (r'\b.\b', 'a', SUCCEED, 'found', 'a'),
   674  #     (r'(?u)\b.\b', u, SUCCEED, 'found', u),
   675  #     (r'(?u)\w', u, SUCCEED, 'found', u),
   676  #     ])
   677  
   678  for test in tests:
   679    pattern, s, status = test[:3]
   680    if status == SUCCEED:
   681      assert re.search(pattern, s)
   682    elif status == FAIL:
   683      assert not re.search(pattern, s)
   684    elif status == SYNTAX_ERROR:
   685      try:
   686        re.search(pattern, s)
   687        assert AssertionError
   688      except Exception:  # pylint: disable=broad-except
   689        pass