github.com/grumpyhome/grumpy@v0.3.1-0.20201208125205-7b775405bdf1/grumpy-runtime-src/third_party/stdlib/string.py (about) 1 """A collection of string operations (most are no longer used). 2 3 Warning: most of the code you see here isn't normally used nowadays. 4 Beginning with Python 1.6, many of these functions are implemented as 5 methods on the standard string object. They used to be implemented by 6 a built-in module called strop, but strop is now obsolete itself. 7 8 Public module variables: 9 10 whitespace -- a string containing all characters considered whitespace 11 lowercase -- a string containing all characters considered lowercase letters 12 uppercase -- a string containing all characters considered uppercase letters 13 letters -- a string containing all characters considered letters 14 digits -- a string containing all characters considered decimal digits 15 hexdigits -- a string containing all characters considered hexadecimal digits 16 octdigits -- a string containing all characters considered octal digits 17 punctuation -- a string containing all characters considered punctuation 18 printable -- a string containing all characters considered printable 19 20 """ 21 22 # Some strings for ctype-style character classification 23 whitespace = ' \t\n\r\v\f' 24 lowercase = 'abcdefghijklmnopqrstuvwxyz' 25 uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 26 letters = lowercase + uppercase 27 ascii_lowercase = lowercase 28 ascii_uppercase = uppercase 29 ascii_letters = ascii_lowercase + ascii_uppercase 30 digits = '0123456789' 31 hexdigits = digits + 'abcdef' + 'ABCDEF' 32 octdigits = '01234567' 33 punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" 34 printable = digits + letters + punctuation + whitespace 35 36 # Case conversion helpers 37 # Use str to convert Unicode literal in case of -U 38 # TODO: use map once implemented 39 # l = map(chr, xrange(256)) 40 l = [chr(x) for x in xrange(256)] 41 _idmap = str('').join(l) 42 del l 43 44 # Functions which aren't available as string methods. 45 46 # Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". 47 def capwords(s, sep=None): 48 """capwords(s [,sep]) -> string 49 50 Split the argument into words using split, capitalize each 51 word using capitalize, and join the capitalized words using 52 join. If the optional second argument sep is absent or None, 53 runs of whitespace characters are replaced by a single space 54 and leading and trailing whitespace are removed, otherwise 55 sep is used to split and join the words. 56 57 """ 58 return (sep or ' ').join(x.capitalize() for x in s.split(sep)) 59 60 61 # Construct a translation string 62 _idmapL = None 63 def maketrans(fromstr, tostr): 64 """maketrans(frm, to) -> string 65 66 Return a translation table (a string of 256 bytes long) 67 suitable for use in string.translate. The strings frm and to 68 must be of the same length. 69 70 """ 71 if len(fromstr) != len(tostr): 72 raise ValueError, "maketrans arguments must have same length" 73 global _idmapL 74 if not _idmapL: 75 _idmapL = list(_idmap) 76 L = _idmapL[:] 77 fromstr = map(ord, fromstr) 78 for i in range(len(fromstr)): 79 L[fromstr[i]] = tostr[i] 80 return ''.join(L) 81 82 83 84 #################################################################### 85 import re as _re 86 87 class _multimap(object): 88 """Helper class for combining multiple mappings. 89 90 Used by .{safe_,}substitute() to combine the mapping and keyword 91 arguments. 92 """ 93 def __init__(self, primary, secondary): 94 self._primary = primary 95 self._secondary = secondary 96 97 def __getitem__(self, key): 98 try: 99 return self._primary[key] 100 except KeyError: 101 return self._secondary[key] 102 103 104 class _TemplateMetaclass(type): 105 # pattern = r""" 106 # %(delim)s(?: 107 # (?P<escaped>%(delim)s) | # Escape sequence of two delimiters 108 # (?P<named>%(id)s) | # delimiter and a Python identifier 109 # {(?P<braced>%(id)s)} | # delimiter and a braced identifier 110 # (?P<invalid>) # Other ill-formed delimiter exprs 111 # ) 112 # """ 113 pattern = r""" 114 %s(?: 115 (?P<escaped>%s) | # Escape sequence of two delimiters 116 (?P<named>%s) | # delimiter and a Python identifier 117 {(?P<braced>%s)} | # delimiter and a braced identifier 118 (?P<invalid>) # Other ill-formed delimiter exprs 119 ) 120 """ 121 122 def __init__(cls, name, bases, dct): 123 # super(_TemplateMetaclass, cls).__init__(name, bases, dct) 124 super(_TemplateMetaclass, cls) 125 if 'pattern' in dct: 126 pattern = cls.pattern 127 else: 128 # pattern = _TemplateMetaclass.pattern % { 129 # 'delim' : _re.escape(cls.delimiter), 130 # 'id' : cls.idpattern, 131 # } 132 cls_delim, cls_id = _re.escape(cls.delimiter), cls.idpattern 133 pattern = _TemplateMetaclass.pattern % (cls_delim, cls_delim, cls_id, cls_id) 134 cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE) 135 136 137 class Template(object): 138 """A string class for supporting $-substitutions.""" 139 __metaclass__ = _TemplateMetaclass 140 141 delimiter = '$' 142 idpattern = r'[_a-z][_a-z0-9]*' 143 144 def __init__(self, template, *arg): 145 self.template = template 146 147 # Search for $$, $identifier, ${identifier}, and any bare $'s 148 149 def _invalid(self, mo): 150 i = mo.start('invalid') 151 lines = self.template[:i].splitlines(True) 152 if not lines: 153 colno = 1 154 lineno = 1 155 else: 156 colno = i - len(''.join(lines[:-1])) 157 lineno = len(lines) 158 raise ValueError('Invalid placeholder in string: line %d, col %d' % 159 (lineno, colno)) 160 161 def substitute(*args, **kws): 162 if not args: 163 raise TypeError("descriptor 'substitute' of 'Template' object " 164 "needs an argument") 165 self, args = args[0], args[1:] # allow the "self" keyword be passed 166 if len(args) > 1: 167 raise TypeError('Too many positional arguments') 168 if not args: 169 mapping = kws 170 elif kws: 171 mapping = _multimap(kws, args[0]) 172 else: 173 mapping = args[0] 174 # Helper function for .sub() 175 def convert(mo): 176 # Check the most common path first. 177 named = mo.group('named') or mo.group('braced') 178 if named is not None: 179 val = mapping[named] 180 # We use this idiom instead of str() because the latter will 181 # fail if val is a Unicode containing non-ASCII characters. 182 return '%s' % (val,) 183 if mo.group('escaped') is not None: 184 return self.delimiter 185 if mo.group('invalid') is not None: 186 self._invalid(mo) 187 raise ValueError('Unrecognized named group in pattern', 188 self.pattern) 189 return self.pattern.sub(convert, self.template) 190 191 def safe_substitute(*args, **kws): 192 if not args: 193 raise TypeError("descriptor 'safe_substitute' of 'Template' object " 194 "needs an argument") 195 self, args = args[0], args[1:] # allow the "self" keyword be passed 196 if len(args) > 1: 197 raise TypeError('Too many positional arguments') 198 if not args: 199 mapping = kws 200 elif kws: 201 mapping = _multimap(kws, args[0]) 202 else: 203 mapping = args[0] 204 # Helper function for .sub() 205 def convert(mo): 206 named = mo.group('named') or mo.group('braced') 207 if named is not None: 208 try: 209 # We use this idiom instead of str() because the latter 210 # will fail if val is a Unicode containing non-ASCII 211 return '%s' % (mapping[named],) 212 except KeyError: 213 return mo.group() 214 if mo.group('escaped') is not None: 215 return self.delimiter 216 if mo.group('invalid') is not None: 217 return mo.group() 218 raise ValueError('Unrecognized named group in pattern', 219 self.pattern) 220 return self.pattern.sub(convert, self.template) 221 222 223 224 #################################################################### 225 # NOTE: Everything below here is deprecated. Use string methods instead. 226 # This stuff will go away in Python 3.0. 227 228 # Backward compatible names for exceptions 229 index_error = ValueError 230 atoi_error = ValueError 231 atof_error = ValueError 232 atol_error = ValueError 233 234 # convert UPPER CASE letters to lower case 235 def lower(s): 236 """lower(s) -> string 237 238 Return a copy of the string s converted to lowercase. 239 240 """ 241 return s.lower() 242 243 # Convert lower case letters to UPPER CASE 244 def upper(s): 245 """upper(s) -> string 246 247 Return a copy of the string s converted to uppercase. 248 249 """ 250 return s.upper() 251 252 # Swap lower case letters and UPPER CASE 253 def swapcase(s): 254 """swapcase(s) -> string 255 256 Return a copy of the string s with upper case characters 257 converted to lowercase and vice versa. 258 259 """ 260 return s.swapcase() 261 262 # Strip leading and trailing tabs and spaces 263 def strip(s, chars=None): 264 """strip(s [,chars]) -> string 265 266 Return a copy of the string s with leading and trailing 267 whitespace removed. 268 If chars is given and not None, remove characters in chars instead. 269 If chars is unicode, S will be converted to unicode before stripping. 270 271 """ 272 return s.strip(chars) 273 274 # Strip leading tabs and spaces 275 def lstrip(s, chars=None): 276 """lstrip(s [,chars]) -> string 277 278 Return a copy of the string s with leading whitespace removed. 279 If chars is given and not None, remove characters in chars instead. 280 281 """ 282 return s.lstrip(chars) 283 284 # Strip trailing tabs and spaces 285 def rstrip(s, chars=None): 286 """rstrip(s [,chars]) -> string 287 288 Return a copy of the string s with trailing whitespace removed. 289 If chars is given and not None, remove characters in chars instead. 290 291 """ 292 return s.rstrip(chars) 293 294 295 # Split a string into a list of space/tab-separated words 296 def split(s, sep=None, maxsplit=-1): 297 """split(s [,sep [,maxsplit]]) -> list of strings 298 299 Return a list of the words in the string s, using sep as the 300 delimiter string. If maxsplit is given, splits at no more than 301 maxsplit places (resulting in at most maxsplit+1 words). If sep 302 is not specified or is None, any whitespace string is a separator. 303 304 (split and splitfields are synonymous) 305 306 """ 307 return s.split(sep, maxsplit) 308 splitfields = split 309 310 # Split a string into a list of space/tab-separated words 311 def rsplit(s, sep=None, maxsplit=-1): 312 """rsplit(s [,sep [,maxsplit]]) -> list of strings 313 314 Return a list of the words in the string s, using sep as the 315 delimiter string, starting at the end of the string and working 316 to the front. If maxsplit is given, at most maxsplit splits are 317 done. If sep is not specified or is None, any whitespace string 318 is a separator. 319 """ 320 return s.rsplit(sep, maxsplit) 321 322 # Join fields with optional separator 323 def join(words, sep = ' '): 324 """join(list [,sep]) -> string 325 326 Return a string composed of the words in list, with 327 intervening occurrences of sep. The default separator is a 328 single space. 329 330 (joinfields and join are synonymous) 331 332 """ 333 return sep.join(words) 334 joinfields = join 335 336 # Find substring, raise exception if not found 337 def index(s, *args): 338 """index(s, sub [,start [,end]]) -> int 339 340 Like find but raises ValueError when the substring is not found. 341 342 """ 343 return s.index(*args) 344 345 # Find last substring, raise exception if not found 346 def rindex(s, *args): 347 """rindex(s, sub [,start [,end]]) -> int 348 349 Like rfind but raises ValueError when the substring is not found. 350 351 """ 352 return s.rindex(*args) 353 354 # Count non-overlapping occurrences of substring 355 def count(s, *args): 356 """count(s, sub[, start[,end]]) -> int 357 358 Return the number of occurrences of substring sub in string 359 s[start:end]. Optional arguments start and end are 360 interpreted as in slice notation. 361 362 """ 363 return s.count(*args) 364 365 # Find substring, return -1 if not found 366 def find(s, *args): 367 """find(s, sub [,start [,end]]) -> in 368 369 Return the lowest index in s where substring sub is found, 370 such that sub is contained within s[start,end]. Optional 371 arguments start and end are interpreted as in slice notation. 372 373 Return -1 on failure. 374 375 """ 376 return s.find(*args) 377 378 # Find last substring, return -1 if not found 379 def rfind(s, *args): 380 """rfind(s, sub [,start [,end]]) -> int 381 382 Return the highest index in s where substring sub is found, 383 such that sub is contained within s[start,end]. Optional 384 arguments start and end are interpreted as in slice notation. 385 386 Return -1 on failure. 387 388 """ 389 return s.rfind(*args) 390 391 # for a bit of speed 392 _float = float 393 _int = int 394 _long = long 395 396 # Convert string to float 397 def atof(s): 398 """atof(s) -> float 399 400 Return the floating point number represented by the string s. 401 402 """ 403 return _float(s) 404 405 406 # Convert string to integer 407 def atoi(s , base=10): 408 """atoi(s [,base]) -> int 409 410 Return the integer represented by the string s in the given 411 base, which defaults to 10. The string s must consist of one 412 or more digits, possibly preceded by a sign. If base is 0, it 413 is chosen from the leading characters of s, 0 for octal, 0x or 414 0X for hexadecimal. If base is 16, a preceding 0x or 0X is 415 accepted. 416 417 """ 418 return _int(s, base) 419 420 421 # Convert string to long integer 422 def atol(s, base=10): 423 """atol(s [,base]) -> long 424 425 Return the long integer represented by the string s in the 426 given base, which defaults to 10. The string s must consist 427 of one or more digits, possibly preceded by a sign. If base 428 is 0, it is chosen from the leading characters of s, 0 for 429 octal, 0x or 0X for hexadecimal. If base is 16, a preceding 430 0x or 0X is accepted. A trailing L or l is not accepted, 431 unless base is 0. 432 433 """ 434 return _long(s, base) 435 436 437 # Left-justify a string 438 def ljust(s, width, *args): 439 """ljust(s, width[, fillchar]) -> string 440 441 Return a left-justified version of s, in a field of the 442 specified width, padded with spaces as needed. The string is 443 never truncated. If specified the fillchar is used instead of spaces. 444 445 """ 446 return s.ljust(width, *args) 447 448 # Right-justify a string 449 def rjust(s, width, *args): 450 """rjust(s, width[, fillchar]) -> string 451 452 Return a right-justified version of s, in a field of the 453 specified width, padded with spaces as needed. The string is 454 never truncated. If specified the fillchar is used instead of spaces. 455 456 """ 457 return s.rjust(width, *args) 458 459 # Center a string 460 def center(s, width, *args): 461 """center(s, width[, fillchar]) -> string 462 463 Return a center version of s, in a field of the specified 464 width. padded with spaces as needed. The string is never 465 truncated. If specified the fillchar is used instead of spaces. 466 467 """ 468 return s.center(width, *args) 469 470 # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' 471 # Decadent feature: the argument may be a string or a number 472 # (Use of this is deprecated; it should be a string as with ljust c.s.) 473 def zfill(x, width): 474 """zfill(x, width) -> string 475 476 Pad a numeric string x with zeros on the left, to fill a field 477 of the specified width. The string x is never truncated. 478 479 """ 480 if not isinstance(x, basestring): 481 x = repr(x) 482 return x.zfill(width) 483 484 # Expand tabs in a string. 485 # Doesn't take non-printing chars into account, but does understand \n. 486 def expandtabs(s, tabsize=8): 487 """expandtabs(s [,tabsize]) -> string 488 489 Return a copy of the string s with all tab characters replaced 490 by the appropriate number of spaces, depending on the current 491 column, and the tabsize (default 8). 492 493 """ 494 return s.expandtabs(tabsize) 495 496 # Character translation through look-up table. 497 def translate(s, table, deletions=""): 498 """translate(s,table [,deletions]) -> string 499 500 Return a copy of the string s, where all characters occurring 501 in the optional argument deletions are removed, and the 502 remaining characters have been mapped through the given 503 translation table, which must be a string of length 256. The 504 deletions argument is not allowed for Unicode strings. 505 506 """ 507 if deletions or table is None: 508 return s.translate(table, deletions) 509 else: 510 # Add s[:0] so that if s is Unicode and table is an 8-bit string, 511 # table is converted to Unicode. This means that table *cannot* 512 # be a dictionary -- for that feature, use u.translate() directly. 513 return s.translate(table + s[:0]) 514 515 # Capitalize a string, e.g. "aBc dEf" -> "Abc def". 516 def capitalize(s): 517 """capitalize(s) -> string 518 519 Return a copy of the string s with only its first character 520 capitalized. 521 522 """ 523 return s.capitalize() 524 525 # Substring replacement (global) 526 def replace(s, old, new, maxreplace=-1): 527 """replace (str, old, new[, maxreplace]) -> string 528 529 Return a copy of string str with all occurrences of substring 530 old replaced by new. If the optional argument maxreplace is 531 given, only the first maxreplace occurrences are replaced. 532 533 """ 534 return s.replace(old, new, maxreplace) 535 536 537 # Try importing optional built-in module "strop" -- if it exists, 538 # it redefines some string operations that are 100-1000 times faster. 539 # It also defines values for whitespace, lowercase and uppercase 540 # that match <ctype.h>'s definitions. 541 542 # try: 543 # from strop import maketrans, lowercase, uppercase, whitespace 544 # letters = lowercase + uppercase 545 # except ImportError: 546 # pass # Use the original versions 547 548 ######################################################################## 549 # the Formatter class 550 # see PEP 3101 for details and purpose of this class 551 552 # The hard parts are reused from the C implementation. They're exposed as "_" 553 # prefixed methods of str and unicode. 554 555 # The overall parser is implemented in str._formatter_parser. 556 # The field name parser is implemented in str._formatter_field_name_split 557 558 class Formatter(object): 559 def format(*args, **kwargs): 560 if not args: 561 raise TypeError("descriptor 'format' of 'Formatter' object " 562 "needs an argument") 563 self, args = args[0], args[1:] # allow the "self" keyword be passed 564 try: 565 format_string, args = args[0], args[1:] # allow the "format_string" keyword be passed 566 except IndexError: 567 if 'format_string' in kwargs: 568 format_string = kwargs.pop('format_string') 569 else: 570 raise TypeError("format() missing 1 required positional " 571 "argument: 'format_string'") 572 return self.vformat(format_string, args, kwargs) 573 574 def vformat(self, format_string, args, kwargs): 575 used_args = set() 576 result = self._vformat(format_string, args, kwargs, used_args, 2) 577 self.check_unused_args(used_args, args, kwargs) 578 return result 579 580 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth): 581 if recursion_depth < 0: 582 raise ValueError('Max string recursion exceeded') 583 result = [] 584 for literal_text, field_name, format_spec, conversion in \ 585 self.parse(format_string): 586 587 # output the literal text 588 if literal_text: 589 result.append(literal_text) 590 591 # if there's a field, output it 592 if field_name is not None: 593 # this is some markup, find the object and do 594 # the formatting 595 596 # given the field_name, find the object it references 597 # and the argument it came from 598 obj, arg_used = self.get_field(field_name, args, kwargs) 599 used_args.add(arg_used) 600 601 # do any conversion on the resulting object 602 obj = self.convert_field(obj, conversion) 603 604 # expand the format spec, if needed 605 format_spec = self._vformat(format_spec, args, kwargs, 606 used_args, recursion_depth-1) 607 608 # format the object and append to the result 609 result.append(self.format_field(obj, format_spec)) 610 611 return ''.join(result) 612 613 614 def get_value(self, key, args, kwargs): 615 if isinstance(key, (int, long)): 616 return args[key] 617 else: 618 return kwargs[key] 619 620 621 def check_unused_args(self, used_args, args, kwargs): 622 pass 623 624 625 def format_field(self, value, format_spec): 626 return format(value, format_spec) 627 628 629 def convert_field(self, value, conversion): 630 # do any conversion on the resulting object 631 if conversion is None: 632 return value 633 elif conversion == 's': 634 return str(value) 635 elif conversion == 'r': 636 return repr(value) 637 raise ValueError("Unknown conversion specifier %s" % (conversion)) 638 639 640 # returns an iterable that contains tuples of the form: 641 # (literal_text, field_name, format_spec, conversion) 642 # literal_text can be zero length 643 # field_name can be None, in which case there's no 644 # object to format and output 645 # if field_name is not None, it is looked up, formatted 646 # with format_spec and conversion and then used 647 def parse(self, format_string): 648 return format_string._formatter_parser() 649 650 651 # given a field_name, find the object it references. 652 # field_name: the field being looked up, e.g. "0.name" 653 # or "lookup[3]" 654 # used_args: a set of which args have been used 655 # args, kwargs: as passed in to vformat 656 def get_field(self, field_name, args, kwargs): 657 first, rest = field_name._formatter_field_name_split() 658 659 obj = self.get_value(first, args, kwargs) 660 661 # loop through the rest of the field_name, doing 662 # getattr or getitem as needed 663 for is_attr, i in rest: 664 if is_attr: 665 obj = getattr(obj, i) 666 else: 667 obj = obj[i] 668 669 return obj, first