gopkg.in/rethinkdb/rethinkdb-go.v6@v6.2.2/internal/gen_tests/parse_polyglot.py (about) 1 #!/usr/bin/env python 2 3 from __future__ import print_function 4 5 import os, re, sys 6 7 # == globals 8 9 printDebug = False 10 11 try: 12 unicode 13 except NameError: 14 unicode = str 15 16 # == 17 18 class yamlValue(unicode): 19 linenumber = None 20 def __new__(cls, value, linenumber=None): 21 if isinstance(value, unicode): 22 real = unicode.__new__(cls, value) 23 else: 24 real = unicode.__new__(cls, value, "utf-8") 25 if linenumber is not None: 26 real.linenumber = int(linenumber) 27 return real 28 29 def __repr__(self): 30 real = super(yamlValue, self).__repr__() 31 return real.lstrip('u') 32 33 def parseYAML(source): 34 35 def debug(message): 36 if printDebug and message: 37 message = str(message).rstrip() 38 if message: 39 print(message) 40 sys.stdout.flush() 41 42 commentLineRegex = re.compile('^\s*#') 43 yamlLineRegex = re.compile('^(?P<indent> *)((?P<itemMarker>- +)(?P<itemContent>.*)|((?P<key>[\w\.]+)(?P<keyExtra>: *))?(?P<content>.*))\s*$') 44 45 def parseYAML_inner(source, indent): 46 returnItem = None 47 48 for linenumber, line in source: 49 if line == '': # no newline, so EOF 50 break 51 52 debug('line %d (%d):%s' % (linenumber, indent, line)) 53 54 if line.strip() == '' or commentLineRegex.match(line): # empty or comment line, ignore 55 debug('\tempty/comment line') 56 continue 57 58 # - parse line 59 60 parsedLine = yamlLineRegex.match(line) 61 if not parsedLine: 62 raise Exception('Unparseable YAML line %d: %s' % (linenumber, line.rstrip())) 63 64 lineIndent = len(parsedLine.group('indent')) 65 lineItemMarker = parsedLine.group('itemMarker') 66 lineKey = parsedLine.group('key') or '' 67 lineKeyExtra = parsedLine.group('keyExtra') or '' 68 lineContent = (parsedLine.group('content') or parsedLine.group('itemContent') or '').strip() 69 70 # - handle end-of-sections 71 if lineIndent < indent: 72 # we have dropped out of this item, push back the line and return what we have 73 source.send((linenumber, line)) 74 debug('\tout one level') 75 return returnItem 76 77 # - array item 78 if lineItemMarker: 79 debug('\tarray item') 80 # item in an array 81 if returnItem is None: 82 debug('\tnew array, indent is %d' % lineIndent) 83 returnItem = [] 84 indent = lineIndent 85 elif not isinstance(returnItem, list): 86 raise Exception('Bad YAML, got a list item while working on a %s on line %d: %s' % (returnItem.__class__.__name__, linenumber, line.rstrip())) 87 indentLevel = lineIndent + len(lineItemMarker) 88 source.send((linenumber, (' ' * (indentLevel) )+ lineContent)) 89 returnItem += [parseYAML_inner(source=source, indent=indent + 1)] 90 91 # - dict item 92 elif lineKey: 93 debug('\tdict item') 94 if returnItem is None: 95 debug('\tnew dict, indent is %d' % lineIndent) 96 # new dict 97 returnItem = {} 98 indent = lineIndent 99 elif not isinstance(returnItem, dict): 100 raise Exception('Bad YAML, got a dict value while working on a %s on line %d: %s' % (returnItem.__class__.__name__, linenumber, line.rstrip())) 101 indentLevel = lineIndent + len(lineKey) + len(lineKeyExtra) 102 source.send((linenumber, (' ' * indentLevel) + lineContent)) 103 returnItem[lineKey] = parseYAML_inner(source=source, indent=indent + 1) 104 105 # - data - one or more lines of text 106 else: 107 debug('\tvalue') 108 if returnItem is None: 109 returnItem = yamlValue('', linenumber) 110 if lineContent.strip() in ('|', '|-', '>'): 111 continue # yaml multiline marker 112 elif not isinstance(returnItem, yamlValue): 113 raise Exception('Bad YAML, got a value while working on a %s on line %d: %s' % (returnItem.__class__.__name__, linenumber, line.rstrip())) 114 if returnItem: 115 returnItem = yamlValue(returnItem + "\n" + lineContent, returnItem.linenumber) # str subclasses are not fun 116 else: 117 returnItem = yamlValue(lineContent, linenumber) 118 return returnItem 119 120 def parseYAML_generator(source): 121 if hasattr(source, 'capitalize'): 122 if os.path.isfile(source): 123 source = open(source, 'r') 124 else: 125 source = source.splitlines(True) 126 elif hasattr(source, 'readlines'): 127 pass # the for loop will already work 128 129 backlines = [] 130 for linenumber, line in enumerate(source): 131 backline = None 132 usedLine = False 133 while usedLine is False or backlines: 134 if backlines: 135 backline = yield backlines.pop() 136 else: 137 usedLine = True 138 backline = yield (linenumber + 1, line) 139 while backline: # loops returning None for every send() 140 assert isinstance(backline, tuple) 141 assert isinstance(backline[0], int) 142 backlines.append(backline) 143 backline = yield None 144 145 return parseYAML_inner(parseYAML_generator(source), indent=0) 146 147 if __name__ == '__main__': 148 import optparse, pprint 149 150 parser = optparse.OptionParser() 151 parser.add_option("-d", "--debug", dest="debug", action="store_true", default=False, help="print debug information") 152 (options, args) = parser.parse_args() 153 printDebug = options.debug 154 155 if len(args) < 1: 156 parser.error('%s needs files to process' % os.path.basename(__file__)) 157 158 for filePath in args: 159 if not os.path.isfile(filePath): 160 sys.exit('target is not an existing file: %s' % os.path.basename(__file__)) 161 162 for filePath in args: 163 print('=== %s' % filePath) 164 pprint.pprint(parseYAML(filePath))