github.com/grumpyhome/grumpy@v0.3.1-0.20201208125205-7b775405bdf1/grumpy-runtime-src/third_party/stdlib/json/decoder.py (about) 1 """Implementation of JSONDecoder 2 """ 3 import re 4 import sys 5 import _struct as struct 6 7 # from json import scanner 8 import json_scanner as scanner 9 10 # try: 11 # from _json import scanstring as c_scanstring 12 # except ImportError: 13 # c_scanstring = None 14 c_scanstring = None 15 16 __all__ = ['JSONDecoder'] 17 18 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 19 20 def _floatconstants(): 21 nan = struct.unpack('>d', b'\x7f\xf8\x00\x00\x00\x00\x00\x00') 22 inf = struct.unpack('>d', b'\x7f\xf0\x00\x00\x00\x00\x00\x00') 23 nan = nan[0] 24 inf = inf[0] 25 return nan, inf, -inf 26 27 NaN, PosInf, NegInf = _floatconstants() 28 29 30 def linecol(doc, pos): 31 lineno = doc.count('\n', 0, pos) + 1 32 if lineno == 1: 33 colno = pos + 1 34 else: 35 colno = pos - doc.rindex('\n', 0, pos) 36 return lineno, colno 37 38 39 def errmsg(msg, doc, pos, end=None): 40 # Note that this function is called from _json 41 lineno, colno = linecol(doc, pos) 42 if end is None: 43 # fmt = '{0}: line {1} column {2} (char {3})' 44 # return fmt.format(msg, lineno, colno, pos) 45 fmt = '%s: line %d column %d (char %d)' 46 return fmt % (msg, lineno, colno, pos) 47 endlineno, endcolno = linecol(doc, end) 48 # fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' 49 # return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) 50 fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' 51 return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) 52 53 54 _CONSTANTS = { 55 '-Infinity': NegInf, 56 'Infinity': PosInf, 57 'NaN': NaN, 58 } 59 60 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) 61 BACKSLASH = { 62 '"': u'"', '\\': u'\\', '/': u'/', 63 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', 64 } 65 66 DEFAULT_ENCODING = "utf-8" 67 68 def _decode_uXXXX(s, pos): 69 esc = s[pos + 1:pos + 5] 70 if len(esc) == 4 and esc[1] not in 'xX': 71 try: 72 return int(esc, 16) 73 except ValueError: 74 pass 75 msg = "Invalid \\uXXXX escape" 76 raise ValueError(errmsg(msg, s, pos)) 77 78 def py_scanstring(s, end, encoding=None, strict=True, 79 _b=BACKSLASH, _m=STRINGCHUNK.match): 80 """Scan the string s for a JSON string. End is the index of the 81 character in s after the quote that started the JSON string. 82 Unescapes all valid JSON string escape sequences and raises ValueError 83 on attempt to decode an invalid string. If strict is False then literal 84 control characters are allowed in the string. 85 86 Returns a tuple of the decoded string and the index of the character in s 87 after the end quote.""" 88 if encoding is None: 89 encoding = DEFAULT_ENCODING 90 chunks = [] 91 _append = chunks.append 92 begin = end - 1 93 while 1: 94 chunk = _m(s, end) 95 if chunk is None: 96 raise ValueError( 97 errmsg("Unterminated string starting at", s, begin)) 98 end = chunk.end() 99 content, terminator = chunk.groups() 100 # Content is contains zero or more unescaped string characters 101 if content: 102 if not isinstance(content, unicode): 103 content = unicode(content, encoding) 104 _append(content) 105 # Terminator is the end of string, a literal control character, 106 # or a backslash denoting that an escape sequence follows 107 if terminator == '"': 108 break 109 elif terminator != '\\': 110 if strict: 111 msg = "Invalid control character %r at" % (terminator,) 112 # msg = "Invalid control character {0!r} at".format(terminator) 113 raise ValueError(errmsg(msg, s, end)) 114 else: 115 _append(terminator) 116 continue 117 try: 118 esc = s[end] 119 except IndexError: 120 raise ValueError( 121 errmsg("Unterminated string starting at", s, begin)) 122 # If not a unicode escape sequence, must be in the lookup table 123 if esc != 'u': 124 try: 125 char = _b[esc] 126 except KeyError: 127 msg = "Invalid \\escape: " + repr(esc) 128 raise ValueError(errmsg(msg, s, end)) 129 end += 1 130 else: 131 # Unicode escape sequence 132 uni = _decode_uXXXX(s, end) 133 end += 5 134 # Check for surrogate pair on UCS-4 systems 135 if sys.maxunicode > 65535 and \ 136 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': 137 uni2 = _decode_uXXXX(s, end + 1) 138 if 0xdc00 <= uni2 <= 0xdfff: 139 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) 140 end += 6 141 char = unichr(uni) 142 # Append the unescaped character 143 _append(char) 144 return u''.join(chunks), end 145 146 147 # Use speedup if available 148 scanstring = c_scanstring or py_scanstring 149 150 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) 151 WHITESPACE_STR = ' \t\n\r' 152 153 def JSONObject(s_and_end, encoding, strict, scan_once, object_hook, 154 object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 155 s, end = s_and_end 156 pairs = [] 157 pairs_append = pairs.append 158 # Use a slice to prevent IndexError from being raised, the following 159 # check will raise a more specific ValueError if the string is empty 160 nextchar = s[end:end + 1] 161 # Normally we expect nextchar == '"' 162 if nextchar != '"': 163 if nextchar in _ws: 164 end = _w(s, end).end() 165 nextchar = s[end:end + 1] 166 # Trivial empty object 167 if nextchar == '}': 168 if object_pairs_hook is not None: 169 result = object_pairs_hook(pairs) 170 return result, end + 1 171 pairs = {} 172 if object_hook is not None: 173 pairs = object_hook(pairs) 174 return pairs, end + 1 175 elif nextchar != '"': 176 raise ValueError(errmsg( 177 "Expecting property name enclosed in double quotes", s, end)) 178 end += 1 179 while True: 180 key, end = scanstring(s, end, encoding, strict) 181 182 # To skip some function call overhead we optimize the fast paths where 183 # the JSON key separator is ": " or just ":". 184 if s[end:end + 1] != ':': 185 end = _w(s, end).end() 186 if s[end:end + 1] != ':': 187 raise ValueError(errmsg("Expecting ':' delimiter", s, end)) 188 end += 1 189 190 try: 191 if s[end] in _ws: 192 end += 1 193 if s[end] in _ws: 194 end = _w(s, end + 1).end() 195 except IndexError: 196 pass 197 198 try: 199 value, end = scan_once(s, end) 200 except StopIteration: 201 raise ValueError(errmsg("Expecting object", s, end)) 202 pairs_append((key, value)) 203 204 try: 205 nextchar = s[end] 206 if nextchar in _ws: 207 end = _w(s, end + 1).end() 208 nextchar = s[end] 209 except IndexError: 210 nextchar = '' 211 end += 1 212 213 if nextchar == '}': 214 break 215 elif nextchar != ',': 216 raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) 217 218 try: 219 nextchar = s[end] 220 if nextchar in _ws: 221 end += 1 222 nextchar = s[end] 223 if nextchar in _ws: 224 end = _w(s, end + 1).end() 225 nextchar = s[end] 226 except IndexError: 227 nextchar = '' 228 229 end += 1 230 if nextchar != '"': 231 raise ValueError(errmsg( 232 "Expecting property name enclosed in double quotes", s, end - 1)) 233 if object_pairs_hook is not None: 234 result = object_pairs_hook(pairs) 235 return result, end 236 pairs = dict(pairs) 237 if object_hook is not None: 238 pairs = object_hook(pairs) 239 return pairs, end 240 241 def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 242 s, end = s_and_end 243 values = [] 244 nextchar = s[end:end + 1] 245 if nextchar in _ws: 246 end = _w(s, end + 1).end() 247 nextchar = s[end:end + 1] 248 # Look-ahead for trivial empty array 249 if nextchar == ']': 250 return values, end + 1 251 _append = values.append 252 while True: 253 try: 254 value, end = scan_once(s, end) 255 except StopIteration: 256 raise ValueError(errmsg("Expecting object", s, end)) 257 _append(value) 258 nextchar = s[end:end + 1] 259 if nextchar in _ws: 260 end = _w(s, end + 1).end() 261 nextchar = s[end:end + 1] 262 end += 1 263 if nextchar == ']': 264 break 265 elif nextchar != ',': 266 raise ValueError(errmsg("Expecting ',' delimiter", s, end)) 267 try: 268 if s[end] in _ws: 269 end += 1 270 if s[end] in _ws: 271 end = _w(s, end + 1).end() 272 except IndexError: 273 pass 274 275 return values, end 276 277 class JSONDecoder(object): 278 """Simple JSON <http://json.org> decoder 279 280 Performs the following translations in decoding by default: 281 282 +---------------+-------------------+ 283 | JSON | Python | 284 +===============+===================+ 285 | object | dict | 286 +---------------+-------------------+ 287 | array | list | 288 +---------------+-------------------+ 289 | string | unicode | 290 +---------------+-------------------+ 291 | number (int) | int, long | 292 +---------------+-------------------+ 293 | number (real) | float | 294 +---------------+-------------------+ 295 | true | True | 296 +---------------+-------------------+ 297 | false | False | 298 +---------------+-------------------+ 299 | null | None | 300 +---------------+-------------------+ 301 302 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as 303 their corresponding ``float`` values, which is outside the JSON spec. 304 305 """ 306 307 def __init__(self, encoding=None, object_hook=None, parse_float=None, 308 parse_int=None, parse_constant=None, strict=True, 309 object_pairs_hook=None): 310 """``encoding`` determines the encoding used to interpret any ``str`` 311 objects decoded by this instance (utf-8 by default). It has no 312 effect when decoding ``unicode`` objects. 313 314 Note that currently only encodings that are a superset of ASCII work, 315 strings of other encodings should be passed in as ``unicode``. 316 317 ``object_hook``, if specified, will be called with the result 318 of every JSON object decoded and its return value will be used in 319 place of the given ``dict``. This can be used to provide custom 320 deserializations (e.g. to support JSON-RPC class hinting). 321 322 ``object_pairs_hook``, if specified will be called with the result of 323 every JSON object decoded with an ordered list of pairs. The return 324 value of ``object_pairs_hook`` will be used instead of the ``dict``. 325 This feature can be used to implement custom decoders that rely on the 326 order that the key and value pairs are decoded (for example, 327 collections.OrderedDict will remember the order of insertion). If 328 ``object_hook`` is also defined, the ``object_pairs_hook`` takes 329 priority. 330 331 ``parse_float``, if specified, will be called with the string 332 of every JSON float to be decoded. By default this is equivalent to 333 float(num_str). This can be used to use another datatype or parser 334 for JSON floats (e.g. decimal.Decimal). 335 336 ``parse_int``, if specified, will be called with the string 337 of every JSON int to be decoded. By default this is equivalent to 338 int(num_str). This can be used to use another datatype or parser 339 for JSON integers (e.g. float). 340 341 ``parse_constant``, if specified, will be called with one of the 342 following strings: -Infinity, Infinity, NaN. 343 This can be used to raise an exception if invalid JSON numbers 344 are encountered. 345 346 If ``strict`` is false (true is the default), then control 347 characters will be allowed inside strings. Control characters in 348 this context are those with character codes in the 0-31 range, 349 including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. 350 351 """ 352 self.encoding = encoding 353 self.object_hook = object_hook 354 self.object_pairs_hook = object_pairs_hook 355 self.parse_float = parse_float or float 356 self.parse_int = parse_int or int 357 self.parse_constant = parse_constant or _CONSTANTS.__getitem__ 358 self.strict = strict 359 self.parse_object = JSONObject 360 self.parse_array = JSONArray 361 self.parse_string = scanstring 362 self.scan_once = scanner.make_scanner(self) 363 364 def decode(self, s, _w=WHITESPACE.match): 365 """Return the Python representation of ``s`` (a ``str`` or ``unicode`` 366 instance containing a JSON document) 367 368 """ 369 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 370 end = _w(s, end).end() 371 if end != len(s): 372 raise ValueError(errmsg("Extra data", s, end, len(s))) 373 return obj 374 375 def raw_decode(self, s, idx=0): 376 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` 377 beginning with a JSON document) and return a 2-tuple of the Python 378 representation and the index in ``s`` where the document ended. 379 380 This can be used to decode a JSON document from a string that may 381 have extraneous data at the end. 382 383 """ 384 try: 385 obj, end = self.scan_once(s, idx) 386 except StopIteration: 387 raise ValueError("No JSON object could be decoded") 388 return obj, end