github.com/grumpyhome/grumpy@v0.3.1-0.20201208125205-7b775405bdf1/grumpy-runtime-src/third_party/stdlib/sre_parse.py (about) 1 # 2 # Secret Labs' Regular Expression Engine 3 # 4 # convert re-style regular expression to sre pattern 5 # 6 # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. 7 # 8 # See the sre.py file for information on usage and redistribution. 9 # 10 11 """Internal support module for sre""" 12 13 # XXX: show string offset and offending character for all errors 14 15 import sys 16 17 # from sre_constants import * 18 import sre_constants 19 for name in sre_constants.__all__: 20 globals()[name] = getattr(sre_constants, name) 21 22 SPECIAL_CHARS = ".\\[{()*+?^$|" 23 REPEAT_CHARS = "*+?{" 24 25 DIGITS = set("0123456789") 26 27 OCTDIGITS = set("01234567") 28 HEXDIGITS = set("0123456789abcdefABCDEF") 29 30 WHITESPACE = set(" \t\n\r\v\f") 31 32 ESCAPES = { 33 r"\a": (LITERAL, ord("\a")), 34 r"\b": (LITERAL, ord("\b")), 35 r"\f": (LITERAL, ord("\f")), 36 r"\n": (LITERAL, ord("\n")), 37 r"\r": (LITERAL, ord("\r")), 38 r"\t": (LITERAL, ord("\t")), 39 r"\v": (LITERAL, ord("\v")), 40 r"\\": (LITERAL, ord("\\")) 41 } 42 43 CATEGORIES = { 44 r"\A": (AT, AT_BEGINNING_STRING), # start of string 45 r"\b": (AT, AT_BOUNDARY), 46 r"\B": (AT, AT_NON_BOUNDARY), 47 r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]), 48 r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]), 49 r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]), 50 r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), 51 r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]), 52 r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), 53 r"\Z": (AT, AT_END_STRING), # end of string 54 } 55 56 FLAGS = { 57 # standard flags 58 "i": SRE_FLAG_IGNORECASE, 59 "L": SRE_FLAG_LOCALE, 60 "m": SRE_FLAG_MULTILINE, 61 "s": SRE_FLAG_DOTALL, 62 "x": SRE_FLAG_VERBOSE, 63 # extensions 64 "t": SRE_FLAG_TEMPLATE, 65 "u": SRE_FLAG_UNICODE, 66 } 67 68 class Pattern(object): 69 # master pattern object. keeps track of global attributes 70 def __init__(self): 71 self.flags = 0 72 self.open = [] 73 self.groups = 1 74 self.groupdict = {} 75 self.lookbehind = 0 76 77 def opengroup(self, name=None): 78 gid = self.groups 79 self.groups = gid + 1 80 if name is not None: 81 ogid = self.groupdict.get(name, None) 82 if ogid is not None: 83 raise error, ("redefinition of group name %s as group %d; " 84 "was group %d" % (repr(name), gid, ogid)) 85 self.groupdict[name] = gid 86 self.open.append(gid) 87 return gid 88 def closegroup(self, gid): 89 # self.open.remove(gid) 90 self.open = [x for x in self.open if x != gid] 91 def checkgroup(self, gid): 92 return gid < self.groups and gid not in self.open 93 94 class SubPattern(object): 95 # a subpattern, in intermediate form 96 def __init__(self, pattern, data=None): 97 self.pattern = pattern 98 if data is None: 99 data = [] 100 self.data = data 101 self.width = None 102 def dump(self, level=0): 103 seqtypes = (tuple, list) 104 for op, av in self.data: 105 print level*" " + op, 106 if op == IN: 107 # member sublanguage 108 print 109 for op, a in av: 110 print (level+1)*" " + op, a 111 elif op == BRANCH: 112 print 113 for i, a in enumerate(av[1]): 114 if i: 115 print level*" " + "or" 116 a.dump(level+1) 117 elif op == GROUPREF_EXISTS: 118 condgroup, item_yes, item_no = av 119 print condgroup 120 item_yes.dump(level+1) 121 if item_no: 122 print level*" " + "else" 123 item_no.dump(level+1) 124 elif isinstance(av, seqtypes): 125 nl = 0 126 for a in av: 127 if isinstance(a, SubPattern): 128 if not nl: 129 print 130 a.dump(level+1) 131 nl = 1 132 else: 133 print a, 134 nl = 0 135 if not nl: 136 print 137 else: 138 print av 139 def __repr__(self): 140 return repr(self.data) 141 def __len__(self): 142 return len(self.data) 143 def __delitem__(self, index): 144 # del self.data[index] 145 self.data = self.data[:index] + self.data[index+1:] 146 def __getitem__(self, index): 147 if isinstance(index, slice): 148 return SubPattern(self.pattern, self.data[index]) 149 return self.data[index] 150 def __setitem__(self, index, code): 151 self.data[index] = code 152 def insert(self, index, code): 153 self.data.insert(index, code) 154 def append(self, code): 155 self.data.append(code) 156 def getwidth(self): 157 # determine the width (min, max) for this subpattern 158 if self.width: 159 return self.width 160 lo = hi = 0 161 UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY) 162 REPEATCODES = (MIN_REPEAT, MAX_REPEAT) 163 for op, av in self.data: 164 if op is BRANCH: 165 i = MAXREPEAT - 1 166 j = 0 167 for av in av[1]: 168 l, h = av.getwidth() 169 i = min(i, l) 170 j = max(j, h) 171 lo = lo + i 172 hi = hi + j 173 elif op is CALL: 174 i, j = av.getwidth() 175 lo = lo + i 176 hi = hi + j 177 elif op is SUBPATTERN: 178 i, j = av[1].getwidth() 179 lo = lo + i 180 hi = hi + j 181 elif op in REPEATCODES: 182 i, j = av[2].getwidth() 183 lo = lo + i * av[0] 184 hi = hi + j * av[1] 185 elif op in UNITCODES: 186 lo = lo + 1 187 hi = hi + 1 188 elif op == SUCCESS: 189 break 190 self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT) 191 return self.width 192 193 class Tokenizer(object): 194 def __init__(self, string): 195 self.string = string 196 self.index = 0 197 self.__next() 198 def __next(self): 199 if self.index >= len(self.string): 200 self.next = None 201 return 202 char = self.string[self.index] 203 if char[0] == "\\": 204 try: 205 c = self.string[self.index + 1] 206 except IndexError: 207 raise error, "bogus escape (end of line)" 208 char = char + c 209 self.index = self.index + len(char) 210 self.next = char 211 def match(self, char, skip=1): 212 if char == self.next: 213 if skip: 214 self.__next() 215 return 1 216 return 0 217 def get(self): 218 this = self.next 219 self.__next() 220 return this 221 def tell(self): 222 return self.index, self.next 223 def seek(self, index): 224 self.index, self.next = index 225 226 def isident(char): 227 return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_" 228 229 def isdigit(char): 230 return "0" <= char <= "9" 231 232 def isname(name): 233 # check that group name is a valid string 234 if not isident(name[0]): 235 return False 236 for char in name[1:]: 237 if not isident(char) and not isdigit(char): 238 return False 239 return True 240 241 def _class_escape(source, escape): 242 # handle escape code inside character class 243 code = ESCAPES.get(escape) 244 if code: 245 return code 246 code = CATEGORIES.get(escape) 247 if code and code[0] == IN: 248 return code 249 try: 250 c = escape[1:2] 251 if c == "x": 252 # hexadecimal escape (exactly two digits) 253 while source.next in HEXDIGITS and len(escape) < 4: 254 escape = escape + source.get() 255 escape = escape[2:] 256 if len(escape) != 2: 257 raise error, "bogus escape: %s" % repr("\\" + escape) 258 return LITERAL, int(escape, 16) & 0xff 259 elif c in OCTDIGITS: 260 # octal escape (up to three digits) 261 while source.next in OCTDIGITS and len(escape) < 4: 262 escape = escape + source.get() 263 escape = escape[1:] 264 return LITERAL, int(escape, 8) & 0xff 265 elif c in DIGITS: 266 raise error, "bogus escape: %s" % repr(escape) 267 if len(escape) == 2: 268 return LITERAL, ord(escape[1]) 269 except ValueError: 270 pass 271 raise error, "bogus escape: %s" % repr(escape) 272 273 def _escape(source, escape, state): 274 # handle escape code in expression 275 code = CATEGORIES.get(escape) 276 if code: 277 return code 278 code = ESCAPES.get(escape) 279 if code: 280 return code 281 try: 282 c = escape[1:2] 283 if c == "x": 284 # hexadecimal escape 285 while source.next in HEXDIGITS and len(escape) < 4: 286 escape = escape + source.get() 287 if len(escape) != 4: 288 raise ValueError 289 return LITERAL, int(escape[2:], 16) & 0xff 290 elif c == "0": 291 # octal escape 292 while source.next in OCTDIGITS and len(escape) < 4: 293 escape = escape + source.get() 294 return LITERAL, int(escape[1:], 8) & 0xff 295 elif c in DIGITS: 296 # octal escape *or* decimal group reference (sigh) 297 if source.next in DIGITS: 298 escape = escape + source.get() 299 if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and 300 source.next in OCTDIGITS): 301 # got three octal digits; this is an octal escape 302 escape = escape + source.get() 303 return LITERAL, int(escape[1:], 8) & 0xff 304 # not an octal escape, so this is a group reference 305 group = int(escape[1:]) 306 if group < state.groups: 307 if not state.checkgroup(group): 308 raise error, "cannot refer to open group" 309 # if state.lookbehind: 310 # import warnings 311 # warnings.warn('group references in lookbehind ' 312 # 'assertions are not supported', 313 # RuntimeWarning) 314 return GROUPREF, group 315 raise ValueError 316 if len(escape) == 2: 317 return LITERAL, ord(escape[1]) 318 except ValueError: 319 pass 320 raise error, "bogus escape: %s" % repr(escape) 321 322 def _parse_sub(source, state, nested=1): 323 # parse an alternation: a|b|c 324 325 items = [] 326 itemsappend = items.append 327 sourcematch = source.match 328 while 1: 329 itemsappend(_parse(source, state)) 330 if sourcematch("|"): 331 continue 332 if not nested: 333 break 334 if not source.next or sourcematch(")", 0): 335 break 336 else: 337 raise error, "pattern not properly closed" 338 339 if len(items) == 1: 340 return items[0] 341 342 subpattern = SubPattern(state) 343 subpatternappend = subpattern.append 344 345 # check if all items share a common prefix 346 while 1: 347 prefix, common = None, False 348 for item in items: 349 if not item: 350 break 351 if prefix is None: 352 prefix = item[0] 353 elif item[0] != prefix: 354 break 355 else: 356 # all subitems start with a common "prefix". 357 # move it out of the branch 358 # for item in items: 359 # print "del", item[0], items 360 # del item[0] 361 for i in range(len(items)): 362 items[i] = items[i][1:] 363 subpatternappend(prefix) 364 # continue # check next one 365 common = True 366 if common: 367 continue 368 break 369 370 # check if the branch can be replaced by a character set 371 for item in items: 372 if len(item) != 1 or item[0][0] != LITERAL: 373 break 374 else: 375 # we can store this as a character set instead of a 376 # branch (the compiler may optimize this even more) 377 set = [] 378 setappend = set.append 379 for item in items: 380 setappend(item[0]) 381 subpatternappend((IN, set)) 382 return subpattern 383 384 subpattern.append((BRANCH, (None, items))) 385 return subpattern 386 387 def _parse_sub_cond(source, state, condgroup): 388 item_yes = _parse(source, state) 389 if source.match("|"): 390 item_no = _parse(source, state) 391 if source.match("|"): 392 raise error, "conditional backref with more than two branches" 393 else: 394 item_no = None 395 if source.next and not source.match(")", 0): 396 raise error, "pattern not properly closed" 397 subpattern = SubPattern(state) 398 subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) 399 return subpattern 400 401 _PATTERNENDERS = set("|)") 402 _ASSERTCHARS = set("=!<") 403 _LOOKBEHINDASSERTCHARS = set("=!") 404 _REPEATCODES = set([MIN_REPEAT, MAX_REPEAT]) 405 406 def _parse(source, state): 407 # parse a simple pattern 408 subpattern = SubPattern(state) 409 410 # precompute constants into local variables 411 subpatternappend = subpattern.append 412 sourceget = source.get 413 sourcematch = source.match 414 _len = len 415 PATTERNENDERS = _PATTERNENDERS 416 ASSERTCHARS = _ASSERTCHARS 417 LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS 418 REPEATCODES = _REPEATCODES 419 420 while 1: 421 422 if source.next in PATTERNENDERS: 423 break # end of subpattern 424 this = sourceget() 425 if this is None: 426 break # end of pattern 427 428 if state.flags & SRE_FLAG_VERBOSE: 429 # skip whitespace and comments 430 if this in WHITESPACE: 431 continue 432 if this == "#": 433 while 1: 434 this = sourceget() 435 if this in (None, "\n"): 436 break 437 continue 438 439 if this and this[0] not in SPECIAL_CHARS: 440 subpatternappend((LITERAL, ord(this))) 441 442 elif this == "[": 443 # character set 444 set = [] 445 setappend = set.append 446 ## if sourcematch(":"): 447 ## pass # handle character classes 448 if sourcematch("^"): 449 setappend((NEGATE, None)) 450 # check remaining characters 451 start = set[:] 452 while 1: 453 this = sourceget() 454 if this == "]" and set != start: 455 break 456 elif this and this[0] == "\\": 457 code1 = _class_escape(source, this) 458 elif this: 459 code1 = LITERAL, ord(this) 460 else: 461 raise error, "unexpected end of regular expression" 462 if sourcematch("-"): 463 # potential range 464 this = sourceget() 465 if this == "]": 466 if code1[0] is IN: 467 code1 = code1[1][0] 468 setappend(code1) 469 setappend((LITERAL, ord("-"))) 470 break 471 elif this: 472 if this[0] == "\\": 473 code2 = _class_escape(source, this) 474 else: 475 code2 = LITERAL, ord(this) 476 if code1[0] != LITERAL or code2[0] != LITERAL: 477 raise error, "bad character range" 478 lo = code1[1] 479 hi = code2[1] 480 if hi < lo: 481 raise error, "bad character range" 482 setappend((RANGE, (lo, hi))) 483 else: 484 raise error, "unexpected end of regular expression" 485 else: 486 if code1[0] is IN: 487 code1 = code1[1][0] 488 setappend(code1) 489 490 # XXX: <fl> should move set optimization to compiler! 491 if _len(set)==1 and set[0][0] is LITERAL: 492 subpatternappend(set[0]) # optimization 493 elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL: 494 subpatternappend((NOT_LITERAL, set[1][1])) # optimization 495 else: 496 # XXX: <fl> should add charmap optimization here 497 subpatternappend((IN, set)) 498 499 elif this and this[0] in REPEAT_CHARS: 500 # repeat previous item 501 if this == "?": 502 min, max = 0, 1 503 elif this == "*": 504 min, max = 0, MAXREPEAT 505 506 elif this == "+": 507 min, max = 1, MAXREPEAT 508 elif this == "{": 509 if source.next == "}": 510 subpatternappend((LITERAL, ord(this))) 511 continue 512 here = source.tell() 513 min, max = 0, MAXREPEAT 514 lo = hi = "" 515 while source.next in DIGITS: 516 lo = lo + source.get() 517 if sourcematch(","): 518 while source.next in DIGITS: 519 hi = hi + sourceget() 520 else: 521 hi = lo 522 if not sourcematch("}"): 523 subpatternappend((LITERAL, ord(this))) 524 source.seek(here) 525 continue 526 if lo: 527 min = int(lo) 528 if min >= MAXREPEAT: 529 raise OverflowError("the repetition number is too large") 530 if hi: 531 max = int(hi) 532 if max >= MAXREPEAT: 533 raise OverflowError("the repetition number is too large") 534 if max < min: 535 raise error("bad repeat interval") 536 else: 537 raise error, "not supported" 538 # figure out which item to repeat 539 if subpattern: 540 item = subpattern[-1:] 541 else: 542 item = None 543 if not item or (_len(item) == 1 and item[0][0] == AT): 544 raise error, "nothing to repeat" 545 if item[0][0] in REPEATCODES: 546 raise error, "multiple repeat" 547 if sourcematch("?"): 548 subpattern[-1] = (MIN_REPEAT, (min, max, item)) 549 else: 550 subpattern[-1] = (MAX_REPEAT, (min, max, item)) 551 552 elif this == ".": 553 subpatternappend((ANY, None)) 554 555 elif this == "(": 556 group = 1 557 name = None 558 condgroup = None 559 if sourcematch("?"): 560 group = 0 561 # options 562 if sourcematch("P"): 563 # python extensions 564 if sourcematch("<"): 565 # named group: skip forward to end of name 566 name = "" 567 while 1: 568 char = sourceget() 569 if char is None: 570 raise error, "unterminated name" 571 if char == ">": 572 break 573 name = name + char 574 group = 1 575 if not name: 576 raise error("missing group name") 577 if not isname(name): 578 raise error("bad character in group name %r" % 579 name) 580 elif sourcematch("="): 581 # named backreference 582 name = "" 583 while 1: 584 char = sourceget() 585 if char is None: 586 raise error, "unterminated name" 587 if char == ")": 588 break 589 name = name + char 590 if not name: 591 raise error("missing group name") 592 if not isname(name): 593 raise error("bad character in backref group name " 594 "%r" % name) 595 gid = state.groupdict.get(name) 596 if gid is None: 597 # msg = "unknown group name: {0!r}".format(name) 598 msg = "unknown group name: %s" % (name) 599 raise error(msg) 600 # if state.lookbehind: 601 # import warnings 602 # warnings.warn('group references in lookbehind ' 603 # 'assertions are not supported', 604 # RuntimeWarning) 605 subpatternappend((GROUPREF, gid)) 606 continue 607 else: 608 char = sourceget() 609 if char is None: 610 raise error, "unexpected end of pattern" 611 raise error, "unknown specifier: ?P%s" % char 612 elif sourcematch(":"): 613 # non-capturing group 614 group = 2 615 elif sourcematch("#"): 616 # comment 617 while 1: 618 if source.next is None or source.next == ")": 619 break 620 sourceget() 621 if not sourcematch(")"): 622 raise error, "unbalanced parenthesis" 623 continue 624 elif source.next in ASSERTCHARS: 625 # lookahead assertions 626 char = sourceget() 627 dir = 1 628 if char == "<": 629 if source.next not in LOOKBEHINDASSERTCHARS: 630 raise error, "syntax error" 631 dir = -1 # lookbehind 632 char = sourceget() 633 state.lookbehind += 1 634 p = _parse_sub(source, state) 635 if dir < 0: 636 state.lookbehind -= 1 637 if not sourcematch(")"): 638 raise error, "unbalanced parenthesis" 639 if char == "=": 640 subpatternappend((ASSERT, (dir, p))) 641 else: 642 subpatternappend((ASSERT_NOT, (dir, p))) 643 continue 644 elif sourcematch("("): 645 # conditional backreference group 646 condname = "" 647 while 1: 648 char = sourceget() 649 if char is None: 650 raise error, "unterminated name" 651 if char == ")": 652 break 653 condname = condname + char 654 group = 2 655 if not condname: 656 raise error("missing group name") 657 if isname(condname): 658 condgroup = state.groupdict.get(condname) 659 if condgroup is None: 660 # msg = "unknown group name: {0!r}".format(condname) 661 msg = "unknown group name: %s" % (condname) 662 raise error(msg) 663 else: 664 try: 665 condgroup = int(condname) 666 except ValueError: 667 raise error, "bad character in group name" 668 # if state.lookbehind: 669 # import warnings 670 # warnings.warn('group references in lookbehind ' 671 # 'assertions are not supported', 672 # RuntimeWarning) 673 else: 674 # flags 675 if not source.next in FLAGS: 676 raise error, "unexpected end of pattern" 677 while source.next in FLAGS: 678 state.flags = state.flags | FLAGS[sourceget()] 679 if group: 680 # parse group contents 681 if group == 2: 682 # anonymous group 683 group = None 684 else: 685 group = state.opengroup(name) 686 if condgroup: 687 p = _parse_sub_cond(source, state, condgroup) 688 else: 689 p = _parse_sub(source, state) 690 if not sourcematch(")"): 691 raise error, "unbalanced parenthesis" 692 if group is not None: 693 state.closegroup(group) 694 subpatternappend((SUBPATTERN, (group, p))) 695 else: 696 while 1: 697 char = sourceget() 698 if char is None: 699 raise error, "unexpected end of pattern" 700 if char == ")": 701 break 702 raise error, "unknown extension" 703 704 elif this == "^": 705 subpatternappend((AT, AT_BEGINNING)) 706 707 elif this == "$": 708 subpattern.append((AT, AT_END)) 709 710 elif this and this[0] == "\\": 711 code = _escape(source, this, state) 712 subpatternappend(code) 713 714 else: 715 raise error, "parser error" 716 717 return subpattern 718 719 def parse(str, flags=0, pattern=None): 720 # parse 're' pattern into list of (opcode, argument) tuples 721 722 source = Tokenizer(str) 723 724 if pattern is None: 725 pattern = Pattern() 726 pattern.flags = flags 727 pattern.str = str 728 729 p = _parse_sub(source, pattern, 0) 730 731 tail = source.get() 732 if tail == ")": 733 raise error, "unbalanced parenthesis" 734 elif tail: 735 raise error, "bogus characters at end of regular expression" 736 737 if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE: 738 # the VERBOSE flag was switched on inside the pattern. to be 739 # on the safe side, we'll parse the whole thing again... 740 return parse(str, p.pattern.flags) 741 742 if flags & SRE_FLAG_DEBUG: 743 p.dump() 744 745 return p 746 747 def parse_template(source, pattern): 748 # parse 're' replacement string into list of literals and 749 # group references 750 s = Tokenizer(source) 751 sget = s.get 752 p = [] 753 a = p.append 754 def literal(literal, p=p, pappend=a): 755 if p and p[-1][0] is LITERAL: 756 p[-1] = LITERAL, p[-1][1] + literal 757 else: 758 pappend((LITERAL, literal)) 759 sep = source[:0] 760 if type(sep) is type(""): 761 makechar = chr 762 else: 763 makechar = unichr 764 while 1: 765 this = sget() 766 if this is None: 767 break # end of replacement string 768 if this and this[0] == "\\": 769 # group 770 c = this[1:2] 771 if c == "g": 772 name = "" 773 if s.match("<"): 774 while 1: 775 char = sget() 776 if char is None: 777 raise error, "unterminated group name" 778 if char == ">": 779 break 780 name = name + char 781 if not name: 782 raise error, "missing group name" 783 try: 784 index = int(name) 785 if index < 0: 786 raise error, "negative group number" 787 except ValueError: 788 if not isname(name): 789 raise error, "bad character in group name" 790 try: 791 index = pattern.groupindex[name] 792 except KeyError: 793 # msg = "unknown group name: {0!r}".format(name) 794 msg = "unknown group name: %s" % (name) 795 raise IndexError(msg) 796 a((MARK, index)) 797 elif c == "0": 798 if s.next in OCTDIGITS: 799 this = this + sget() 800 if s.next in OCTDIGITS: 801 this = this + sget() 802 literal(makechar(int(this[1:], 8) & 0xff)) 803 elif c in DIGITS: 804 isoctal = False 805 if s.next in DIGITS: 806 this = this + sget() 807 if (c in OCTDIGITS and this[2] in OCTDIGITS and 808 s.next in OCTDIGITS): 809 this = this + sget() 810 isoctal = True 811 literal(makechar(int(this[1:], 8) & 0xff)) 812 if not isoctal: 813 a((MARK, int(this[1:]))) 814 else: 815 try: 816 this = makechar(ESCAPES[this][1]) 817 except KeyError: 818 pass 819 literal(this) 820 else: 821 literal(this) 822 # convert template to groups and literals lists 823 i = 0 824 groups = [] 825 groupsappend = groups.append 826 literals = [None] * len(p) 827 for c, s in p: 828 if c is MARK: 829 groupsappend((i, s)) 830 # literal[i] is already None 831 else: 832 literals[i] = s 833 i = i + 1 834 return groups, literals 835 836 def expand_template(template, match): 837 g = match.group 838 sep = match.string[:0] 839 groups, literals = template 840 literals = literals[:] 841 try: 842 for index, group in groups: 843 literals[index] = s = g(group) 844 if s is None: 845 raise error, "unmatched group" 846 except IndexError: 847 raise error, "invalid group reference" 848 return sep.join(literals)