gopkg.in/rethinkdb/rethinkdb-go.v6@v6.2.2/internal/gen_tests/parse_polyglot.py (about)

     1  #!/usr/bin/env python
     2  
     3  from __future__ import print_function
     4  
     5  import os, re, sys
     6  
     7  # == globals
     8  
     9  printDebug = False
    10  
    11  try:
    12      unicode
    13  except NameError:
    14      unicode = str
    15  
    16  # ==
    17  
    18  class yamlValue(unicode):
    19      linenumber = None
    20      def __new__(cls, value, linenumber=None):
    21          if isinstance(value, unicode):
    22              real = unicode.__new__(cls, value)
    23          else:
    24              real = unicode.__new__(cls, value, "utf-8")
    25          if linenumber is not None:
    26              real.linenumber = int(linenumber)
    27          return real
    28  
    29      def __repr__(self):
    30          real = super(yamlValue, self).__repr__()
    31          return real.lstrip('u')
    32  
    33  def parseYAML(source):
    34  
    35      def debug(message):
    36          if printDebug and message:
    37              message = str(message).rstrip()
    38              if message:
    39                  print(message)
    40                  sys.stdout.flush()
    41  
    42      commentLineRegex = re.compile('^\s*#')
    43      yamlLineRegex = re.compile('^(?P<indent> *)((?P<itemMarker>- +)(?P<itemContent>.*)|((?P<key>[\w\.]+)(?P<keyExtra>: *))?(?P<content>.*))\s*$')
    44  
    45      def parseYAML_inner(source, indent):
    46          returnItem = None
    47  
    48          for linenumber, line in source:
    49              if line == '': # no newline, so EOF
    50                  break
    51  
    52              debug('line %d (%d):%s' % (linenumber, indent, line))
    53  
    54              if line.strip() == '' or commentLineRegex.match(line): # empty or comment line, ignore
    55                  debug('\tempty/comment line')
    56                  continue
    57  
    58              # - parse line
    59  
    60              parsedLine = yamlLineRegex.match(line)
    61              if not parsedLine:
    62                  raise Exception('Unparseable YAML line %d: %s' % (linenumber, line.rstrip()))
    63  
    64              lineIndent = len(parsedLine.group('indent'))
    65              lineItemMarker = parsedLine.group('itemMarker')
    66              lineKey = parsedLine.group('key') or ''
    67              lineKeyExtra = parsedLine.group('keyExtra') or ''
    68              lineContent = (parsedLine.group('content') or parsedLine.group('itemContent') or '').strip()
    69  
    70              # - handle end-of-sections
    71              if lineIndent < indent:
    72                  # we have dropped out of this item, push back the line and return what we have
    73                  source.send((linenumber, line))
    74                  debug('\tout one level')
    75                  return returnItem
    76  
    77              # - array item
    78              if lineItemMarker:
    79                  debug('\tarray item')
    80                  # item in an array
    81                  if returnItem is None:
    82                      debug('\tnew array, indent is %d' % lineIndent)
    83                      returnItem = []
    84                      indent = lineIndent
    85                  elif not isinstance(returnItem, list):
    86                      raise Exception('Bad YAML, got a list item while working on a %s on line %d: %s' % (returnItem.__class__.__name__, linenumber, line.rstrip()))
    87                  indentLevel = lineIndent + len(lineItemMarker)
    88                  source.send((linenumber, (' ' * (indentLevel) )+ lineContent))
    89                  returnItem += [parseYAML_inner(source=source, indent=indent + 1)]
    90  
    91              # - dict item
    92              elif lineKey:
    93                  debug('\tdict item')
    94                  if returnItem is None:
    95                      debug('\tnew dict, indent is %d' % lineIndent)
    96                      # new dict
    97                      returnItem = {}
    98                      indent = lineIndent
    99                  elif not isinstance(returnItem, dict):
   100                      raise Exception('Bad YAML, got a dict value while working on a %s on line %d: %s' % (returnItem.__class__.__name__, linenumber, line.rstrip()))
   101                  indentLevel = lineIndent + len(lineKey) + len(lineKeyExtra)
   102                  source.send((linenumber, (' ' * indentLevel) + lineContent))
   103                  returnItem[lineKey] = parseYAML_inner(source=source, indent=indent + 1)
   104  
   105              # - data - one or more lines of text
   106              else:
   107                  debug('\tvalue')
   108                  if returnItem is None:
   109                      returnItem = yamlValue('', linenumber)
   110                      if lineContent.strip() in ('|', '|-', '>'):
   111                          continue # yaml multiline marker
   112                  elif not isinstance(returnItem, yamlValue):
   113                      raise Exception('Bad YAML, got a value while working on a %s on line %d: %s' % (returnItem.__class__.__name__, linenumber, line.rstrip()))
   114                  if returnItem:
   115                      returnItem = yamlValue(returnItem + "\n" + lineContent, returnItem.linenumber) # str subclasses are not fun
   116                  else:
   117                      returnItem = yamlValue(lineContent, linenumber)
   118          return returnItem
   119  
   120      def parseYAML_generator(source):
   121          if hasattr(source, 'capitalize'):
   122              if os.path.isfile(source):
   123                  source = open(source, 'r')
   124              else:
   125                  source = source.splitlines(True)
   126          elif hasattr(source, 'readlines'):
   127              pass # the for loop will already work
   128  
   129          backlines = []
   130          for linenumber, line in enumerate(source):
   131              backline = None
   132              usedLine = False
   133              while usedLine is False or backlines:
   134                  if backlines:
   135                      backline = yield backlines.pop()
   136                  else:
   137                      usedLine = True
   138                      backline = yield (linenumber + 1, line)
   139                  while backline: # loops returning None for every send()
   140                      assert isinstance(backline, tuple)
   141                      assert isinstance(backline[0], int)
   142                      backlines.append(backline)
   143                      backline = yield None
   144  
   145      return parseYAML_inner(parseYAML_generator(source), indent=0)
   146  
   147  if __name__ == '__main__':
   148      import optparse, pprint
   149  
   150      parser = optparse.OptionParser()
   151      parser.add_option("-d", "--debug", dest="debug", action="store_true", default=False, help="print debug information")
   152      (options, args) = parser.parse_args()
   153      printDebug = options.debug
   154  
   155      if len(args) < 1:
   156         parser.error('%s needs files to process' % os.path.basename(__file__))
   157  
   158      for filePath in args:
   159          if not os.path.isfile(filePath):
   160              sys.exit('target is not an existing file: %s' % os.path.basename(__file__))
   161  
   162      for filePath in args:
   163          print('=== %s' % filePath)
   164          pprint.pprint(parseYAML(filePath))