github.com/grumpyhome/grumpy@v0.3.1-0.20201208125205-7b775405bdf1/grumpy-runtime-src/third_party/pypy/_sre.py (about)

     1  # NOT_RPYTHON
     2  """
     3  A pure Python reimplementation of the _sre module from CPython 2.4
     4  Copyright 2005 Nik Haldimann, licensed under the MIT license
     5  
     6  This code is based on material licensed under CNRI's Python 1.6 license and
     7  copyrighted by: Copyright (c) 1997-2001 by Secret Labs AB
     8  """
     9  
    10  #import array
    11  import sys
    12  import operator
    13  
    14  # # TODO: Support from foo import * syntax.
    15  import sre_constants
    16  for name in sre_constants.__all__:
    17    globals()[name] = getattr(sre_constants, name)
    18  
    19  # Identifying as _sre from Python 2.3 or 2.4
    20  #if sys.version_info[:2] >= (2, 4):
    21  MAGIC = 20031017
    22  #else:
    23  #    MAGIC = 20030419
    24  
    25  # In _sre.c this is bytesize of the code word type of the C implementation.
    26  # There it's 2 for normal Python builds and more for wide unicode builds (large
    27  # enough to hold a 32-bit UCS-4 encoded character). Since here in pure Python
    28  # we only see re bytecodes as Python longs, we shouldn't have to care about the
    29  # codesize. But sre_compile will compile some stuff differently depending on the
    30  # codesize (e.g., charsets).
    31  # starting with python 3.3  CODESIZE is 4
    32  CODESIZE = 2
    33  
    34  copyright = "_sre.py 2.4c Copyright 2005 by Nik Haldimann"
    35  
    36  def getcodesize():
    37      return CODESIZE
    38  
    39  
    40  def compile(pattern, flags, code, groups=0, groupindex={}, indexgroup=[None]):
    41      """Compiles (or rather just converts) a pattern descriptor to a SRE_Pattern
    42      object. Actual compilation to opcodes happens in sre_compile."""
    43      return SRE_Pattern(pattern, flags, code, groups, groupindex, indexgroup)
    44  
    45  def getlower(char_ord, flags):
    46      if (char_ord < 128) or (flags & SRE_FLAG_UNICODE) \
    47                                or (flags & SRE_FLAG_LOCALE and char_ord < 256):
    48          # return ord(unichr(char_ord).lower())
    49          return ord(chr(char_ord).lower())
    50      else:
    51          return char_ord
    52  
    53  
    54  class SRE_Pattern(object):
    55  
    56      def __init__(self, pattern, flags, code, groups=0, groupindex={}, indexgroup=[None]):
    57          self.pattern = pattern
    58          self.flags = flags
    59          self.groups = groups
    60          self.groupindex = groupindex # Maps group names to group indices
    61          self._indexgroup = indexgroup # Maps indices to group names
    62          self._code = code
    63  
    64      def match(self, string, pos=0, endpos=sys.maxint):
    65          """If zero or more characters at the beginning of string match this
    66          regular expression, return a corresponding MatchObject instance. Return
    67          None if the string does not match the pattern."""
    68          state = _State(string, pos, endpos, self.flags)
    69          if state.match(self._code):
    70              return SRE_Match(self, state)
    71          else:
    72              return None
    73  
    74      def search(self, string, pos=0, endpos=sys.maxint):
    75          """Scan through string looking for a location where this regular
    76          expression produces a match, and return a corresponding MatchObject
    77          instance. Return None if no position in the string matches the
    78          pattern."""
    79          state = _State(string, pos, endpos, self.flags)
    80          if state.search(self._code):
    81              return SRE_Match(self, state)
    82          else:
    83              return None
    84  
    85      def findall(self, string, pos=0, endpos=sys.maxint):
    86          """Return a list of all non-overlapping matches of pattern in string."""
    87          matchlist = []
    88          state = _State(string, pos, endpos, self.flags)
    89          while state.start <= state.end:
    90              state.reset()
    91              state.string_position = state.start
    92              if not state.search(self._code):
    93                  break
    94              match = SRE_Match(self, state)
    95              if self.groups == 0 or self.groups == 1:
    96                  item = match.group(self.groups)
    97              else:
    98                  item = match.groups("")
    99              matchlist.append(item)
   100              if state.string_position == state.start:
   101                  state.start += 1
   102              else:
   103                  state.start = state.string_position
   104          return matchlist
   105  
   106      def _subx(self, template, string, count=0, subn=False):
   107          filter = template
   108          if not callable(template) and "\\" in template:
   109              # handle non-literal strings ; hand it over to the template compiler
   110              raise NotImplementedError()
   111          state = _State(string, 0, sys.maxint, self.flags)
   112          sublist = []
   113  
   114          n = last_pos = 0
   115          while not count or n < count:
   116              state.reset()
   117              state.string_position = state.start
   118              if not state.search(self._code):
   119                  break
   120              if last_pos < state.start:
   121                  sublist.append(string[last_pos:state.start])
   122              if not (last_pos == state.start and
   123                                  last_pos == state.string_position and n > 0):
   124                  # the above ignores empty matches on latest position
   125                  if callable(filter):
   126                      sublist.append(filter(SRE_Match(self, state)))
   127                  else:
   128                      sublist.append(filter)
   129                  last_pos = state.string_position
   130                  n += 1
   131              if state.string_position == state.start:
   132                  state.start += 1
   133              else:
   134                  state.start = state.string_position
   135  
   136          if last_pos < state.end:
   137              sublist.append(string[last_pos:state.end])
   138          item = "".join(sublist)
   139          if subn:
   140              return item, n
   141          else:
   142              return item
   143  
   144      def sub(self, repl, string, count=0):
   145          """Return the string obtained by replacing the leftmost non-overlapping
   146          occurrences of pattern in string by the replacement repl."""
   147          return self._subx(repl, string, count, False)
   148  
   149      def subn(self, repl, string, count=0):
   150          """Return the tuple (new_string, number_of_subs_made) found by replacing
   151          the leftmost non-overlapping occurrences of pattern with the replacement
   152          repl."""
   153          return self._subx(repl, string, count, True)
   154  
   155      def split(self, string, maxsplit=0):
   156          """Split string by the occurrences of pattern."""
   157          splitlist = []
   158          state = _State(string, 0, sys.maxint, self.flags)
   159          n = 0
   160          last = state.start
   161          while not maxsplit or n < maxsplit:
   162              state.reset()
   163              state.string_position = state.start
   164              if not state.search(self._code):
   165                  break
   166              if state.start == state.string_position: # zero-width match
   167                  if last == state.end:                # or end of string
   168                      break
   169                  state.start += 1
   170                  continue
   171              splitlist.append(string[last:state.start])
   172              # add groups (if any)
   173              if self.groups:
   174                  match = SRE_Match(self, state)
   175                  # TODO: Use .extend once it is implemented.
   176                  # splitlist.extend(list(match.groups(None)))
   177                  splitlist += (list(match.groups(None)))
   178              n += 1
   179              last = state.start = state.string_position
   180          splitlist.append(string[last:state.end])
   181          return splitlist
   182  
   183      def finditer(self, string, pos=0, endpos=sys.maxint):
   184          """Return a list of all non-overlapping matches of pattern in string."""
   185          scanner = self.scanner(string, pos, endpos)
   186          return iter(scanner.search, None)
   187  
   188      def scanner(self, string, start=0, end=sys.maxint):
   189          return SRE_Scanner(self, string, start, end)
   190  
   191      def __copy__(self):
   192          raise TypeError, "cannot copy this pattern object"
   193  
   194      def __deepcopy__(self):
   195          raise TypeError, "cannot copy this pattern object"
   196  
   197  
   198  class SRE_Scanner(object):
   199      """Undocumented scanner interface of sre."""
   200  
   201      def __init__(self, pattern, string, start, end):
   202          self.pattern = pattern
   203          self._state = _State(string, start, end, self.pattern.flags)
   204  
   205      def _match_search(self, matcher):
   206          state = self._state
   207          state.reset()
   208          state.string_position = state.start
   209          match = None
   210          if matcher(self.pattern._code):
   211              match = SRE_Match(self.pattern, state)
   212          if match is None or state.string_position == state.start:
   213              state.start += 1
   214          else:
   215              state.start = state.string_position
   216          return match
   217  
   218      def match(self):
   219          return self._match_search(self._state.match)
   220  
   221      def search(self):
   222          return self._match_search(self._state.search)
   223  
   224  
   225  class SRE_Match(object):
   226  
   227      def __init__(self, pattern, state):
   228          self.re = pattern
   229          self.string = state.string
   230          self.pos = state.pos
   231          self.endpos = state.end
   232          self.lastindex = state.lastindex
   233          if self.lastindex < 0:
   234              self.lastindex = None
   235          self.regs = self._create_regs(state)
   236          if pattern._indexgroup and 0 <= self.lastindex < len(pattern._indexgroup):
   237              # The above upper-bound check should not be necessary, as the re
   238              # compiler is supposed to always provide an _indexgroup list long
   239              # enough. But the re.Scanner class seems to screw up something
   240              # there, test_scanner in test_re won't work without upper-bound
   241              # checking. XXX investigate this and report bug to CPython.
   242              self.lastgroup = pattern._indexgroup[self.lastindex]
   243          else:
   244              self.lastgroup = None
   245  
   246      def _create_regs(self, state):
   247          """Creates a tuple of index pairs representing matched groups."""
   248          regs = [(state.start, state.string_position)]
   249          for group in range(self.re.groups):
   250              mark_index = 2 * group
   251              if mark_index + 1 < len(state.marks) \
   252                                      and state.marks[mark_index] is not None \
   253                                      and state.marks[mark_index + 1] is not None:
   254                  regs.append((state.marks[mark_index], state.marks[mark_index + 1]))
   255              else:
   256                  regs.append((-1, -1))
   257          return tuple(regs)
   258  
   259      def _get_index(self, group):
   260          if isinstance(group, int):
   261              if group >= 0 and group <= self.re.groups:
   262                  return group
   263          else:
   264              if group in self.re.groupindex:
   265                  return self.re.groupindex[group]
   266          raise IndexError("no such group")
   267  
   268      def _get_slice(self, group, default):
   269          group_indices = self.regs[group]
   270          if group_indices[0] >= 0:
   271              return self.string[group_indices[0]:group_indices[1]]
   272          else:
   273              return default
   274  
   275      def start(self, group=0):
   276          """Returns the indices of the start of the substring matched by group;
   277          group defaults to zero (meaning the whole matched substring). Returns -1
   278          if group exists but did not contribute to the match."""
   279          return self.regs[self._get_index(group)][0]
   280  
   281      def end(self, group=0):
   282          """Returns the indices of the end of the substring matched by group;
   283          group defaults to zero (meaning the whole matched substring). Returns -1
   284          if group exists but did not contribute to the match."""
   285          return self.regs[self._get_index(group)][1]
   286  
   287      def span(self, group=0):
   288          """Returns the 2-tuple (m.start(group), m.end(group))."""
   289          return self.start(group), self.end(group)
   290  
   291      def expand(self, template):
   292          """Return the string obtained by doing backslash substitution and
   293          resolving group references on template."""
   294          raise NotImplementedError
   295  
   296      def groups(self, default=None):
   297          """Returns a tuple containing all the subgroups of the match. The
   298          default argument is used for groups that did not participate in the
   299          match (defaults to None)."""
   300          groups = []
   301          for indices in self.regs[1:]:
   302              if indices[0] >= 0:
   303                  groups.append(self.string[indices[0]:indices[1]])
   304              else:
   305                  groups.append(default)
   306          return tuple(groups)
   307  
   308      def groupdict(self, default=None):
   309          """Return a dictionary containing all the named subgroups of the match.
   310          The default argument is used for groups that did not participate in the
   311          match (defaults to None)."""
   312          groupdict = {}
   313          for key, value in self.re.groupindex.items():
   314              groupdict[key] = self._get_slice(value, default)
   315          return groupdict
   316  
   317      def group(self, *args):
   318          """Returns one or more subgroups of the match. Each argument is either a
   319          group index or a group name."""
   320          if len(args) == 0:
   321              args = (0,)
   322          grouplist = []
   323          for group in args:
   324              grouplist.append(self._get_slice(self._get_index(group), None))
   325          if len(grouplist) == 1:
   326              return grouplist[0]
   327          else:
   328              return tuple(grouplist)
   329  
   330      def __copy__():
   331          raise TypeError, "cannot copy this pattern object"
   332  
   333      def __deepcopy__():
   334          raise TypeError, "cannot copy this pattern object"
   335  
   336  
   337  class _State(object):
   338  
   339      def __init__(self, string, start, end, flags):
   340          self.string = string
   341          if start < 0:
   342              start = 0
   343          if end > len(string):
   344              end = len(string)
   345          self.start = start
   346          self.string_position = self.start
   347          self.end = end
   348          self.pos = start
   349          self.flags = flags
   350          self.reset()
   351  
   352      def reset(self):
   353          self.marks = []
   354          self.lastindex = -1
   355          self.marks_stack = []
   356          self.context_stack = []
   357          self.repeat = None
   358  
   359      def match(self, pattern_codes):
   360          # Optimization: Check string length. pattern_codes[3] contains the
   361          # minimum length for a string to possibly match.
   362          if pattern_codes[0] == OPCODES["info"] and pattern_codes[3]:
   363              if self.end - self.string_position < pattern_codes[3]:
   364                  #_log("reject (got %d chars, need %d)"
   365                  #         % (self.end - self.string_position, pattern_codes[3]))
   366                  return False
   367  
   368          dispatcher = _OpcodeDispatcher()
   369          self.context_stack.append(_MatchContext(self, pattern_codes))
   370          has_matched = None
   371          while len(self.context_stack) > 0:
   372              context = self.context_stack[-1]
   373              has_matched = dispatcher.match(context)
   374              if has_matched is not None: # don't pop if context isn't done
   375                  # TODO: use .pop once it is implemented
   376                  # self.context_stack.pop()
   377                  self.context_stack = self.context_stack[:-1]
   378          return has_matched
   379  
   380      def search(self, pattern_codes):
   381          flags = 0
   382          if pattern_codes[0] == OPCODES["info"]:
   383              # optimization info block
   384              # <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>
   385              if pattern_codes[2] & SRE_INFO_PREFIX and pattern_codes[5] > 1:
   386                  return self.fast_search(pattern_codes)
   387              flags = pattern_codes[2]
   388              pattern_codes = pattern_codes[pattern_codes[1] + 1:]
   389  
   390          string_position = self.start
   391          if pattern_codes[0] == OPCODES["literal"]:
   392              # Special case: Pattern starts with a literal character. This is
   393              # used for short prefixes
   394              character = pattern_codes[1]
   395              while True:
   396                  while string_position < self.end \
   397                          and ord(self.string[string_position]) != character:
   398                      string_position += 1
   399                  if string_position >= self.end:
   400                      return False
   401                  self.start = string_position
   402                  string_position += 1
   403                  self.string_position = string_position
   404                  if flags & SRE_INFO_LITERAL:
   405                      return True
   406                  if self.match(pattern_codes[2:]):
   407                      return True
   408              return False
   409  
   410          # General case
   411          while string_position <= self.end:
   412              self.reset()
   413              self.start = self.string_position = string_position
   414              if self.match(pattern_codes):
   415                  return True
   416              string_position += 1
   417          return False
   418  
   419      def fast_search(self, pattern_codes):
   420          """Skips forward in a string as fast as possible using information from
   421          an optimization info block."""
   422          # pattern starts with a known prefix
   423          # <5=length> <6=skip> <7=prefix data> <overlap data>
   424          flags = pattern_codes[2]
   425          prefix_len = pattern_codes[5]
   426          prefix_skip = pattern_codes[6] # don't really know what this is good for
   427          prefix = pattern_codes[7:7 + prefix_len]
   428          overlap = pattern_codes[7 + prefix_len - 1:pattern_codes[1] + 1]
   429          pattern_codes = pattern_codes[pattern_codes[1] + 1:]
   430          i = 0
   431          string_position = self.string_position
   432          while string_position < self.end:
   433              while True:
   434                  if ord(self.string[string_position]) != prefix[i]:
   435                      if i == 0:
   436                          break
   437                      else:
   438                          i = overlap[i]
   439                  else:
   440                      i += 1
   441                      if i == prefix_len:
   442                          # found a potential match
   443                          self.start = string_position + 1 - prefix_len
   444                          self.string_position = string_position + 1 \
   445                                                       - prefix_len + prefix_skip
   446                          if flags & SRE_INFO_LITERAL:
   447                              return True # matched all of pure literal pattern
   448                          if self.match(pattern_codes[2 * prefix_skip:]):
   449                              return True
   450                          i = overlap[i]
   451                      break
   452              string_position += 1
   453          return False
   454  
   455      def set_mark(self, mark_nr, position):
   456          if mark_nr & 1:
   457              # This id marks the end of a group.
   458              self.lastindex = mark_nr / 2 + 1
   459          if mark_nr >= len(self.marks):
   460              # TODO: Use .extend once it is implemented
   461              # self.marks.extend([None] * (mark_nr - len(self.marks) + 1))
   462              self.marks += ([None] * (mark_nr - len(self.marks) + 1))
   463          self.marks[mark_nr] = position
   464  
   465      def get_marks(self, group_index):
   466          marks_index = 2 * group_index
   467          if len(self.marks) > marks_index + 1:
   468              return self.marks[marks_index], self.marks[marks_index + 1]
   469          else:
   470              return None, None
   471  
   472      def marks_push(self):
   473          self.marks_stack.append((self.marks[:], self.lastindex))
   474  
   475      def marks_pop(self):
   476          # TODO: Use .pop once implemented
   477          # self.marks, self.lastindex = self.marks_stack.pop()
   478          self.marks, self.lastindex = self.marks_stack[-1]
   479          self.marks_stack = self.marks_stack[:-1]
   480  
   481      def marks_pop_keep(self):
   482          self.marks, self.lastindex = self.marks_stack[-1]
   483  
   484      def marks_pop_discard(self):
   485          # TODO: Use .pop once implemented
   486          self.marks_stack = self.marks_stack[:-1]
   487  
   488      def lower(self, char_ord):
   489          return getlower(char_ord, self.flags)
   490  
   491  
   492  class _MatchContext(object):
   493  
   494      def __init__(self, state, pattern_codes):
   495          self.state = state
   496          self.pattern_codes = pattern_codes
   497          self.string_position = state.string_position
   498          self.code_position = 0
   499          self.has_matched = None
   500  
   501      def push_new_context(self, pattern_offset):
   502          """Creates a new child context of this context and pushes it on the
   503          stack. pattern_offset is the offset off the current code position to
   504          start interpreting from."""
   505          child_context = _MatchContext(self.state,
   506              self.pattern_codes[self.code_position + pattern_offset:])
   507          self.state.context_stack.append(child_context)
   508          return child_context
   509  
   510      def peek_char(self, peek=0):
   511          return self.state.string[self.string_position + peek]
   512  
   513      def skip_char(self, skip_count):
   514          self.string_position += skip_count
   515  
   516      def remaining_chars(self):
   517          return self.state.end - self.string_position
   518  
   519      def peek_code(self, peek=0):
   520          return self.pattern_codes[self.code_position + peek]
   521  
   522      def skip_code(self, skip_count):
   523          self.code_position += skip_count
   524  
   525      def remaining_codes(self):
   526          return len(self.pattern_codes) - self.code_position
   527  
   528      def at_beginning(self):
   529          return self.string_position == 0
   530  
   531      def at_end(self):
   532          return self.string_position == self.state.end
   533  
   534      def at_linebreak(self):
   535          return not self.at_end() and _is_linebreak(self.peek_char())
   536  
   537      def at_boundary(self, word_checker):
   538          if self.at_beginning() and self.at_end():
   539              return False
   540          that = not self.at_beginning() and word_checker(self.peek_char(-1))
   541          this = not self.at_end() and word_checker(self.peek_char())
   542          return this != that
   543  
   544  
   545  class _RepeatContext(_MatchContext):
   546  
   547      def __init__(self, context):
   548          _MatchContext.__init__(self, context.state,
   549                              context.pattern_codes[context.code_position:])
   550          self.count = -1
   551          self.previous = context.state.repeat
   552          self.last_position = None
   553  
   554  
   555  class _Dispatcher(object):
   556  
   557      DISPATCH_TABLE = None
   558  
   559      def dispatch(self, code, context):
   560          method = self.DISPATCH_TABLE.get(code, self.__class__.unknown)
   561          return method(self, context)
   562  
   563      def unknown(self, code, ctx):
   564          raise NotImplementedError()
   565  
   566      def build_dispatch_table(cls, code_dict, method_prefix):
   567          if cls.DISPATCH_TABLE is not None:
   568              return
   569          table = {}
   570          for key, value in code_dict.items():
   571              if hasattr(cls, "%s%s" % (method_prefix, key)):
   572                  table[value] = getattr(cls, "%s%s" % (method_prefix, key))
   573          cls.DISPATCH_TABLE = table
   574  
   575      build_dispatch_table = classmethod(build_dispatch_table)
   576  
   577  
   578  class _OpcodeDispatcher(_Dispatcher):
   579  
   580      def __init__(self):
   581          self.executing_contexts = {}
   582          self.at_dispatcher = _AtcodeDispatcher()
   583          self.ch_dispatcher = _ChcodeDispatcher()
   584          self.set_dispatcher = _CharsetDispatcher()
   585  
   586      def match(self, context):
   587          """Returns True if the current context matches, False if it doesn't and
   588          None if matching is not finished, ie must be resumed after child
   589          contexts have been matched."""
   590          while context.remaining_codes() > 0 and context.has_matched is None:
   591              opcode = context.peek_code()
   592              if not self.dispatch(opcode, context):
   593                  return None
   594          if context.has_matched is None:
   595              context.has_matched = False
   596          return context.has_matched
   597  
   598      def dispatch(self, opcode, context):
   599          """Dispatches a context on a given opcode. Returns True if the context
   600          is done matching, False if it must be resumed when next encountered."""
   601          if id(context) in self.executing_contexts:
   602              generator = self.executing_contexts[id(context)]
   603              del self.executing_contexts[id(context)]
   604              has_finished = generator.next()
   605          else:
   606              method = self.DISPATCH_TABLE.get(opcode, _OpcodeDispatcher.unknown)
   607              has_finished = method(self, context)
   608              if hasattr(has_finished, "next"): # avoid using the types module
   609                  generator = has_finished
   610                  has_finished = generator.next()
   611          if not has_finished:
   612              self.executing_contexts[id(context)] = generator
   613          return has_finished
   614  
   615      def op_success(self, ctx):
   616          # end of pattern
   617          #self._log(ctx, "SUCCESS")
   618          ctx.state.string_position = ctx.string_position
   619          ctx.has_matched = True
   620          return True
   621  
   622      def op_failure(self, ctx):
   623          # immediate failure
   624          #self._log(ctx, "FAILURE")
   625          ctx.has_matched = False
   626          return True
   627  
   628      def general_op_literal(self, ctx, compare, decorate=lambda x: x):
   629          if ctx.at_end() or not compare(decorate(ord(ctx.peek_char())),
   630                                              decorate(ctx.peek_code(1))):
   631              ctx.has_matched = False
   632          ctx.skip_code(2)
   633          ctx.skip_char(1)
   634  
   635      def op_literal(self, ctx):
   636          # match literal string
   637          # <LITERAL> <code>
   638          #self._log(ctx, "LITERAL", ctx.peek_code(1))
   639          self.general_op_literal(ctx, operator.eq)
   640          return True
   641  
   642      def op_not_literal(self, ctx):
   643          # match anything that is not the given literal character
   644          # <NOT_LITERAL> <code>
   645          #self._log(ctx, "NOT_LITERAL", ctx.peek_code(1))
   646          self.general_op_literal(ctx, operator.ne)
   647          return True
   648  
   649      def op_literal_ignore(self, ctx):
   650          # match literal regardless of case
   651          # <LITERAL_IGNORE> <code>
   652          #self._log(ctx, "LITERAL_IGNORE", ctx.peek_code(1))
   653          self.general_op_literal(ctx, operator.eq, ctx.state.lower)
   654          return True
   655  
   656      def op_not_literal_ignore(self, ctx):
   657          # match literal regardless of case
   658          # <LITERAL_IGNORE> <code>
   659          #self._log(ctx, "LITERAL_IGNORE", ctx.peek_code(1))
   660          self.general_op_literal(ctx, operator.ne, ctx.state.lower)
   661          return True
   662  
   663      def op_at(self, ctx):
   664          # match at given position
   665          # <AT> <code>
   666          #self._log(ctx, "AT", ctx.peek_code(1))
   667          if not self.at_dispatcher.dispatch(ctx.peek_code(1), ctx):
   668              ctx.has_matched = False
   669              return True
   670          ctx.skip_code(2)
   671          return True
   672  
   673      def op_category(self, ctx):
   674          # match at given category
   675          # <CATEGORY> <code>
   676          #self._log(ctx, "CATEGORY", ctx.peek_code(1))
   677          if ctx.at_end() or not self.ch_dispatcher.dispatch(ctx.peek_code(1), ctx):
   678              ctx.has_matched = False
   679              return True
   680          ctx.skip_code(2)
   681          ctx.skip_char(1)
   682          return True
   683  
   684      def op_any(self, ctx):
   685          # match anything (except a newline)
   686          # <ANY>
   687          #self._log(ctx, "ANY")
   688          if ctx.at_end() or ctx.at_linebreak():
   689              ctx.has_matched = False
   690              return True
   691          ctx.skip_code(1)
   692          ctx.skip_char(1)
   693          return True
   694  
   695      def op_any_all(self, ctx):
   696          # match anything
   697          # <ANY_ALL>
   698          #self._log(ctx, "ANY_ALL")
   699          if ctx.at_end():
   700              ctx.has_matched = False
   701              return True
   702          ctx.skip_code(1)
   703          ctx.skip_char(1)
   704          return True
   705  
   706      def general_op_in(self, ctx, decorate=lambda x: x):
   707          #self._log(ctx, "OP_IN")
   708          if ctx.at_end():
   709              ctx.has_matched = False
   710              return
   711          skip = ctx.peek_code(1)
   712          ctx.skip_code(2) # set op pointer to the set code
   713          if not self.check_charset(ctx, decorate(ord(ctx.peek_char()))):
   714              ctx.has_matched = False
   715              return
   716          ctx.skip_code(skip - 1)
   717          ctx.skip_char(1)
   718  
   719      def op_in(self, ctx):
   720          # match set member (or non_member)
   721          # <IN> <skip> <set>
   722          #self._log(ctx, "OP_IN")
   723          self.general_op_in(ctx)
   724          return True
   725  
   726      def op_in_ignore(self, ctx):
   727          # match set member (or non_member), disregarding case of current char
   728          # <IN_IGNORE> <skip> <set>
   729          #self._log(ctx, "OP_IN_IGNORE")
   730          self.general_op_in(ctx, ctx.state.lower)
   731          return True
   732  
   733      def op_jump(self, ctx):
   734          # jump forward
   735          # <JUMP> <offset>
   736          #self._log(ctx, "JUMP", ctx.peek_code(1))
   737          ctx.skip_code(ctx.peek_code(1) + 1)
   738          return True
   739  
   740      # skip info
   741      # <INFO> <skip>
   742      op_info = op_jump
   743  
   744      def op_mark(self, ctx):
   745          # set mark
   746          # <MARK> <gid>
   747          #self._log(ctx, "OP_MARK", ctx.peek_code(1))
   748          ctx.state.set_mark(ctx.peek_code(1), ctx.string_position)
   749          ctx.skip_code(2)
   750          return True
   751  
   752      def op_branch(self, ctx):
   753          # alternation
   754          # <BRANCH> <0=skip> code <JUMP> ... <NULL>
   755          #self._log(ctx, "BRANCH")
   756          ctx.state.marks_push()
   757          ctx.skip_code(1)
   758          current_branch_length = ctx.peek_code(0)
   759          while current_branch_length:
   760              # The following tries to shortcut branches starting with a
   761              # (unmatched) literal. _sre.c also shortcuts charsets here.
   762              if not (ctx.peek_code(1) == OPCODES["literal"] and \
   763                      (ctx.at_end() or ctx.peek_code(2) != ord(ctx.peek_char()))):
   764                  ctx.state.string_position = ctx.string_position
   765                  child_context = ctx.push_new_context(1)
   766                  yield False
   767                  if child_context.has_matched:
   768                      ctx.has_matched = True
   769                      yield True
   770                  ctx.state.marks_pop_keep()
   771              ctx.skip_code(current_branch_length)
   772              current_branch_length = ctx.peek_code(0)
   773          ctx.state.marks_pop_discard()
   774          ctx.has_matched = False
   775          yield True
   776  
   777      def op_repeat_one(self, ctx):
   778          # match repeated sequence (maximizing).
   779          # this operator only works if the repeated item is exactly one character
   780          # wide, and we're not already collecting backtracking points.
   781          # <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail
   782          mincount = ctx.peek_code(2)
   783          maxcount = ctx.peek_code(3)
   784          #self._log(ctx, "REPEAT_ONE", mincount, maxcount)
   785  
   786          if ctx.remaining_chars() < mincount:
   787              ctx.has_matched = False
   788              yield True
   789          ctx.state.string_position = ctx.string_position
   790          count = self.count_repetitions(ctx, maxcount)
   791          ctx.skip_char(count)
   792          if count < mincount:
   793              ctx.has_matched = False
   794              yield True
   795          if ctx.peek_code(ctx.peek_code(1) + 1) == OPCODES["success"]:
   796              # tail is empty.  we're finished
   797              ctx.state.string_position = ctx.string_position
   798              ctx.has_matched = True
   799              yield True
   800  
   801          ctx.state.marks_push()
   802          if ctx.peek_code(ctx.peek_code(1) + 1) == OPCODES["literal"]:
   803              # Special case: Tail starts with a literal. Skip positions where
   804              # the rest of the pattern cannot possibly match.
   805              char = ctx.peek_code(ctx.peek_code(1) + 2)
   806              while True:
   807                  while count >= mincount and \
   808                                  (ctx.at_end() or ord(ctx.peek_char()) != char):
   809                      ctx.skip_char(-1)
   810                      count -= 1
   811                  if count < mincount:
   812                      break
   813                  ctx.state.string_position = ctx.string_position
   814                  child_context = ctx.push_new_context(ctx.peek_code(1) + 1)
   815                  yield False
   816                  if child_context.has_matched:
   817                      ctx.has_matched = True
   818                      yield True
   819                  ctx.skip_char(-1)
   820                  count -= 1
   821                  ctx.state.marks_pop_keep()
   822  
   823          else:
   824              # General case: backtracking
   825              while count >= mincount:
   826                  ctx.state.string_position = ctx.string_position
   827                  child_context = ctx.push_new_context(ctx.peek_code(1) + 1)
   828                  yield False
   829                  if child_context.has_matched:
   830                      ctx.has_matched = True
   831                      yield True
   832                  ctx.skip_char(-1)
   833                  count -= 1
   834                  ctx.state.marks_pop_keep()
   835  
   836          ctx.state.marks_pop_discard()
   837          ctx.has_matched = False
   838          yield True
   839  
   840      def op_min_repeat_one(self, ctx):
   841          # match repeated sequence (minimizing)
   842          # <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail
   843          mincount = ctx.peek_code(2)
   844          maxcount = ctx.peek_code(3)
   845          #self._log(ctx, "MIN_REPEAT_ONE", mincount, maxcount)
   846  
   847          if ctx.remaining_chars() < mincount:
   848              ctx.has_matched = False
   849              yield True
   850          ctx.state.string_position = ctx.string_position
   851          if mincount == 0:
   852              count = 0
   853          else:
   854              count = self.count_repetitions(ctx, mincount)
   855              if count < mincount:
   856                  ctx.has_matched = False
   857                  yield True
   858              ctx.skip_char(count)
   859          if ctx.peek_code(ctx.peek_code(1) + 1) == OPCODES["success"]:
   860              # tail is empty.  we're finished
   861              ctx.state.string_position = ctx.string_position
   862              ctx.has_matched = True
   863              yield True
   864  
   865          ctx.state.marks_push()
   866          while maxcount == MAXREPEAT or count <= maxcount:
   867              ctx.state.string_position = ctx.string_position
   868              child_context = ctx.push_new_context(ctx.peek_code(1) + 1)
   869              yield False
   870              if child_context.has_matched:
   871                  ctx.has_matched = True
   872                  yield True
   873              ctx.state.string_position = ctx.string_position
   874              if self.count_repetitions(ctx, 1) == 0:
   875                  break
   876              ctx.skip_char(1)
   877              count += 1
   878              ctx.state.marks_pop_keep()
   879  
   880          ctx.state.marks_pop_discard()
   881          ctx.has_matched = False
   882          yield True
   883  
   884      def op_repeat(self, ctx):
   885          # create repeat context.  all the hard work is done by the UNTIL
   886          # operator (MAX_UNTIL, MIN_UNTIL)
   887          # <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail
   888          #self._log(ctx, "REPEAT", ctx.peek_code(2), ctx.peek_code(3))
   889          repeat = _RepeatContext(ctx)
   890          ctx.state.repeat = repeat
   891          ctx.state.string_position = ctx.string_position
   892          child_context = ctx.push_new_context(ctx.peek_code(1) + 1)
   893          yield False
   894          ctx.state.repeat = repeat.previous
   895          ctx.has_matched = child_context.has_matched
   896          yield True
   897  
   898      def op_max_until(self, ctx):
   899          # maximizing repeat
   900          # <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail
   901          repeat = ctx.state.repeat
   902          if repeat is None:
   903              raise RuntimeError("Internal re error: MAX_UNTIL without REPEAT.")
   904          mincount = repeat.peek_code(2)
   905          maxcount = repeat.peek_code(3)
   906          ctx.state.string_position = ctx.string_position
   907          count = repeat.count + 1
   908          #self._log(ctx, "MAX_UNTIL", count)
   909  
   910          if count < mincount:
   911              # not enough matches
   912              repeat.count = count
   913              child_context = repeat.push_new_context(4)
   914              yield False
   915              ctx.has_matched = child_context.has_matched
   916              if not ctx.has_matched:
   917                  repeat.count = count - 1
   918                  ctx.state.string_position = ctx.string_position
   919              yield True
   920  
   921          if (count < maxcount or maxcount == MAXREPEAT) \
   922                        and ctx.state.string_position != repeat.last_position:
   923              # we may have enough matches, if we can match another item, do so
   924              repeat.count = count
   925              ctx.state.marks_push()
   926              save_last_position = repeat.last_position # zero-width match protection
   927              repeat.last_position = ctx.state.string_position
   928              child_context = repeat.push_new_context(4)
   929              yield False
   930              repeat.last_position = save_last_position
   931              if child_context.has_matched:
   932                  ctx.state.marks_pop_discard()
   933                  ctx.has_matched = True
   934                  yield True
   935              ctx.state.marks_pop()
   936              repeat.count = count - 1
   937              ctx.state.string_position = ctx.string_position
   938  
   939          # cannot match more repeated items here.  make sure the tail matches
   940          ctx.state.repeat = repeat.previous
   941          child_context = ctx.push_new_context(1)
   942          yield False
   943          ctx.has_matched = child_context.has_matched
   944          if not ctx.has_matched:
   945              ctx.state.repeat = repeat
   946              ctx.state.string_position = ctx.string_position
   947          yield True
   948  
   949      def op_min_until(self, ctx):
   950          # minimizing repeat
   951          # <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail
   952          repeat = ctx.state.repeat
   953          if repeat is None:
   954              raise RuntimeError("Internal re error: MIN_UNTIL without REPEAT.")
   955          mincount = repeat.peek_code(2)
   956          maxcount = repeat.peek_code(3)
   957          ctx.state.string_position = ctx.string_position
   958          count = repeat.count + 1
   959          #self._log(ctx, "MIN_UNTIL", count)
   960  
   961          if count < mincount:
   962              # not enough matches
   963              repeat.count = count
   964              child_context = repeat.push_new_context(4)
   965              yield False
   966              ctx.has_matched = child_context.has_matched
   967              if not ctx.has_matched:
   968                  repeat.count = count - 1
   969                  ctx.state.string_position = ctx.string_position
   970              yield True
   971  
   972          # see if the tail matches
   973          ctx.state.marks_push()
   974          ctx.state.repeat = repeat.previous
   975          child_context = ctx.push_new_context(1)
   976          yield False
   977          if child_context.has_matched:
   978              ctx.has_matched = True
   979              yield True
   980          ctx.state.repeat = repeat
   981          ctx.state.string_position = ctx.string_position
   982          ctx.state.marks_pop()
   983  
   984          # match more until tail matches
   985          if count >= maxcount and maxcount != MAXREPEAT:
   986              ctx.has_matched = False
   987              yield True
   988          repeat.count = count
   989          child_context = repeat.push_new_context(4)
   990          yield False
   991          ctx.has_matched = child_context.has_matched
   992          if not ctx.has_matched:
   993              repeat.count = count - 1
   994              ctx.state.string_position = ctx.string_position
   995          yield True
   996  
   997      def general_op_groupref(self, ctx, decorate=lambda x: x):
   998          group_start, group_end = ctx.state.get_marks(ctx.peek_code(1))
   999          if group_start is None or group_end is None or group_end < group_start:
  1000              ctx.has_matched = False
  1001              return True
  1002          while group_start < group_end:
  1003              if ctx.at_end() or decorate(ord(ctx.peek_char())) \
  1004                                  != decorate(ord(ctx.state.string[group_start])):
  1005                  ctx.has_matched = False
  1006                  return True
  1007              group_start += 1
  1008              ctx.skip_char(1)
  1009          ctx.skip_code(2)
  1010          return True
  1011  
  1012      def op_groupref(self, ctx):
  1013          # match backreference
  1014          # <GROUPREF> <zero-based group index>
  1015          #self._log(ctx, "GROUPREF", ctx.peek_code(1))
  1016          return self.general_op_groupref(ctx)
  1017  
  1018      def op_groupref_ignore(self, ctx):
  1019          # match backreference case-insensitive
  1020          # <GROUPREF_IGNORE> <zero-based group index>
  1021          #self._log(ctx, "GROUPREF_IGNORE", ctx.peek_code(1))
  1022          return self.general_op_groupref(ctx, ctx.state.lower)
  1023  
  1024      def op_groupref_exists(self, ctx):
  1025          # <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ...
  1026          #self._log(ctx, "GROUPREF_EXISTS", ctx.peek_code(1))
  1027          group_start, group_end = ctx.state.get_marks(ctx.peek_code(1))
  1028          if group_start is None or group_end is None or group_end < group_start:
  1029              ctx.skip_code(ctx.peek_code(2) + 1)
  1030          else:
  1031              ctx.skip_code(3)
  1032          return True
  1033  
  1034      def op_assert(self, ctx):
  1035          # assert subpattern
  1036          # <ASSERT> <skip> <back> <pattern>
  1037          #self._log(ctx, "ASSERT", ctx.peek_code(2))
  1038          ctx.state.string_position = ctx.string_position - ctx.peek_code(2)
  1039          if ctx.state.string_position < 0:
  1040              ctx.has_matched = False
  1041              yield True
  1042          child_context = ctx.push_new_context(3)
  1043          yield False
  1044          if child_context.has_matched:
  1045              ctx.skip_code(ctx.peek_code(1) + 1)
  1046          else:
  1047              ctx.has_matched = False
  1048          yield True
  1049  
  1050      def op_assert_not(self, ctx):
  1051          # assert not subpattern
  1052          # <ASSERT_NOT> <skip> <back> <pattern>
  1053          #self._log(ctx, "ASSERT_NOT", ctx.peek_code(2))
  1054          ctx.state.string_position = ctx.string_position - ctx.peek_code(2)
  1055          if ctx.state.string_position >= 0:
  1056              child_context = ctx.push_new_context(3)
  1057              yield False
  1058              if child_context.has_matched:
  1059                  ctx.has_matched = False
  1060                  yield True
  1061          ctx.skip_code(ctx.peek_code(1) + 1)
  1062          yield True
  1063  
  1064      def unknown(self, ctx):
  1065          #self._log(ctx, "UNKNOWN", ctx.peek_code())
  1066          raise RuntimeError("Internal re error. Unknown opcode: %s" % ctx.peek_code())
  1067  
  1068      def check_charset(self, ctx, char):
  1069          """Checks whether a character matches set of arbitrary length. Assumes
  1070          the code pointer is at the first member of the set."""
  1071          self.set_dispatcher.reset(char)
  1072          save_position = ctx.code_position
  1073          result = None
  1074          while result is None:
  1075              result = self.set_dispatcher.dispatch(ctx.peek_code(), ctx)
  1076          ctx.code_position = save_position
  1077          return result
  1078  
  1079      def count_repetitions(self, ctx, maxcount):
  1080          """Returns the number of repetitions of a single item, starting from the
  1081          current string position. The code pointer is expected to point to a
  1082          REPEAT_ONE operation (with the repeated 4 ahead)."""
  1083          count = 0
  1084          real_maxcount = ctx.state.end - ctx.string_position
  1085          if maxcount < real_maxcount and maxcount != MAXREPEAT:
  1086              real_maxcount = maxcount
  1087          # XXX could special case every single character pattern here, as in C.
  1088          # This is a general solution, a bit hackisch, but works and should be
  1089          # efficient.
  1090          code_position = ctx.code_position
  1091          string_position = ctx.string_position
  1092          ctx.skip_code(4)
  1093          reset_position = ctx.code_position
  1094          while count < real_maxcount:
  1095              # this works because the single character pattern is followed by
  1096              # a success opcode
  1097              ctx.code_position = reset_position
  1098              self.dispatch(ctx.peek_code(), ctx)
  1099              if ctx.has_matched is False: # could be None as well
  1100                  break
  1101              count += 1
  1102          ctx.has_matched = None
  1103          ctx.code_position = code_position
  1104          ctx.string_position = string_position
  1105          return count
  1106  
  1107      def _log(self, context, opname, *args):
  1108          arg_string = ("%s " * len(args)) % args
  1109          _log("|%s|%s|%s %s" % (context.pattern_codes,
  1110              context.string_position, opname, arg_string))
  1111  
  1112  _OpcodeDispatcher.build_dispatch_table(OPCODES, "op_")
  1113  
  1114  
  1115  class _CharsetDispatcher(_Dispatcher):
  1116  
  1117      def __init__(self):
  1118          self.ch_dispatcher = _ChcodeDispatcher()
  1119  
  1120      def reset(self, char):
  1121          self.char = char
  1122          self.ok = True
  1123  
  1124      def set_failure(self, ctx):
  1125          return not self.ok
  1126      def set_literal(self, ctx):
  1127          # <LITERAL> <code>
  1128          if ctx.peek_code(1) == self.char:
  1129              return self.ok
  1130          else:
  1131              ctx.skip_code(2)
  1132      def set_category(self, ctx):
  1133          # <CATEGORY> <code>
  1134          if self.ch_dispatcher.dispatch(ctx.peek_code(1), ctx):
  1135              return self.ok
  1136          else:
  1137              ctx.skip_code(2)
  1138      def set_charset(self, ctx):
  1139          # <CHARSET> <bitmap> (16 bits per code word)
  1140          char_code = self.char
  1141          ctx.skip_code(1) # point to beginning of bitmap
  1142          if CODESIZE == 2:
  1143              if char_code < 256 and ctx.peek_code(char_code >> 4) \
  1144                                              & (1 << (char_code & 15)):
  1145                  return self.ok
  1146              ctx.skip_code(16) # skip bitmap
  1147          else:
  1148              if char_code < 256 and ctx.peek_code(char_code >> 5) \
  1149                                              & (1 << (char_code & 31)):
  1150                  return self.ok
  1151              ctx.skip_code(8) # skip bitmap
  1152      def set_range(self, ctx):
  1153          # <RANGE> <lower> <upper>
  1154          if ctx.peek_code(1) <= self.char <= ctx.peek_code(2):
  1155              return self.ok
  1156          ctx.skip_code(3)
  1157      def set_negate(self, ctx):
  1158          self.ok = not self.ok
  1159          ctx.skip_code(1)
  1160      def set_bigcharset(self, ctx):
  1161          # <BIGCHARSET> <blockcount> <256 blockindices> <blocks>
  1162          char_code = self.char
  1163          count = ctx.peek_code(1)
  1164          ctx.skip_code(2)
  1165          if char_code < 65536:
  1166              block_index = char_code >> 8
  1167              # NB: there are CODESIZE block indices per bytecode
  1168              # a = array.array("B")
  1169              a = []
  1170              # a.fromstring(array.array(CODESIZE == 2 and "H" or "I",
  1171              #         [ctx.peek_code(block_index / CODESIZE)]).tostring())
  1172              a += [ctx.peek_code(block_index // CODESIZE)]
  1173              block = a[block_index % CODESIZE]
  1174              ctx.skip_code(256 / CODESIZE) # skip block indices
  1175              block_value = ctx.peek_code(block * (32 / CODESIZE)
  1176                      + ((char_code & 255) >> (CODESIZE == 2 and 4 or 5)))
  1177              if block_value & (1 << (char_code & ((8 * CODESIZE) - 1))):
  1178                  return self.ok
  1179          else:
  1180              ctx.skip_code(256 / CODESIZE) # skip block indices
  1181          ctx.skip_code(count * (32 / CODESIZE)) # skip blocks
  1182      def unknown(self, ctx):
  1183          return False
  1184  
  1185  _CharsetDispatcher.build_dispatch_table(OPCODES, "set_")
  1186  
  1187  
  1188  class _AtcodeDispatcher(_Dispatcher):
  1189  
  1190      def at_beginning(self, ctx):
  1191          return ctx.at_beginning()
  1192      at_beginning_string = at_beginning
  1193      def at_beginning_line(self, ctx):
  1194          return ctx.at_beginning() or _is_linebreak(ctx.peek_char(-1))
  1195      def at_end(self, ctx):
  1196          return (ctx.remaining_chars() == 1 and ctx.at_linebreak()) or ctx.at_end()
  1197      def at_end_line(self, ctx):
  1198          return ctx.at_linebreak() or ctx.at_end()
  1199      def at_end_string(self, ctx):
  1200          return ctx.at_end()
  1201      def at_boundary(self, ctx):
  1202          return ctx.at_boundary(_is_word)
  1203      def at_non_boundary(self, ctx):
  1204          return not ctx.at_boundary(_is_word)
  1205      def at_loc_boundary(self, ctx):
  1206          return ctx.at_boundary(_is_loc_word)
  1207      def at_loc_non_boundary(self, ctx):
  1208          return not ctx.at_boundary(_is_loc_word)
  1209      def at_uni_boundary(self, ctx):
  1210          return ctx.at_boundary(_is_uni_word)
  1211      def at_uni_non_boundary(self, ctx):
  1212          return not ctx.at_boundary(_is_uni_word)
  1213      def unknown(self, ctx):
  1214          return False
  1215  
  1216  _AtcodeDispatcher.build_dispatch_table(ATCODES, "")
  1217  
  1218  
  1219  class _ChcodeDispatcher(_Dispatcher):
  1220  
  1221      def category_digit(self, ctx):
  1222          return _is_digit(ctx.peek_char())
  1223      def category_not_digit(self, ctx):
  1224          return not _is_digit(ctx.peek_char())
  1225      def category_space(self, ctx):
  1226          return _is_space(ctx.peek_char())
  1227      def category_not_space(self, ctx):
  1228          return not _is_space(ctx.peek_char())
  1229      def category_word(self, ctx):
  1230          return _is_word(ctx.peek_char())
  1231      def category_not_word(self, ctx):
  1232          return not _is_word(ctx.peek_char())
  1233      def category_linebreak(self, ctx):
  1234          return _is_linebreak(ctx.peek_char())
  1235      def category_not_linebreak(self, ctx):
  1236          return not _is_linebreak(ctx.peek_char())
  1237      def category_loc_word(self, ctx):
  1238          return _is_loc_word(ctx.peek_char())
  1239      def category_loc_not_word(self, ctx):
  1240          return not _is_loc_word(ctx.peek_char())
  1241      def category_uni_digit(self, ctx):
  1242          return ctx.peek_char().isdigit()
  1243      def category_uni_not_digit(self, ctx):
  1244          return not ctx.peek_char().isdigit()
  1245      def category_uni_space(self, ctx):
  1246          return ctx.peek_char().isspace()
  1247      def category_uni_not_space(self, ctx):
  1248          return not ctx.peek_char().isspace()
  1249      def category_uni_word(self, ctx):
  1250          return _is_uni_word(ctx.peek_char())
  1251      def category_uni_not_word(self, ctx):
  1252          return not _is_uni_word(ctx.peek_char())
  1253      def category_uni_linebreak(self, ctx):
  1254          return ord(ctx.peek_char()) in _uni_linebreaks
  1255      def category_uni_not_linebreak(self, ctx):
  1256          return ord(ctx.peek_char()) not in _uni_linebreaks
  1257      def unknown(self, ctx):
  1258          return False
  1259  
  1260  _ChcodeDispatcher.build_dispatch_table(CHCODES, "")
  1261  
  1262  
  1263  _ascii_char_info = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
  1264  2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
  1265  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
  1266  25, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
  1267  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
  1268  0, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
  1269  24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 ]
  1270  
  1271  def _is_digit(char):
  1272      code = ord(char)
  1273      return code < 128 and _ascii_char_info[code] & 1
  1274  
  1275  def _is_space(char):
  1276      code = ord(char)
  1277      return code < 128 and _ascii_char_info[code] & 2
  1278  
  1279  def _is_word(char):
  1280      # NB: non-ASCII chars aren't words according to _sre.c
  1281      code = ord(char)
  1282      return code < 128 and _ascii_char_info[code] & 16
  1283  
  1284  def _is_loc_word(char):
  1285      return (not (ord(char) & ~255) and char.isalnum()) or char == '_'
  1286  
  1287  def _is_uni_word(char):
  1288      return unichr(ord(char)).isalnum() or char == '_'
  1289  
  1290  def _is_linebreak(char):
  1291      return char == "\n"
  1292  
  1293  # Static list of all unicode codepoints reported by Py_UNICODE_ISLINEBREAK.
  1294  _uni_linebreaks = [10, 13, 28, 29, 30, 133, 8232, 8233]
  1295  
  1296  def _log(message):
  1297      if 0:
  1298          print message