github.com/google/grumpy@v0.0.0-20171122020858-3ec87959189c/third_party/stdlib/json/decoder.py (about) 1 """Implementation of JSONDecoder 2 """ 3 import re 4 import sys 5 import _struct as struct 6 7 # from json import scanner 8 import json_scanner as scanner 9 10 # try: 11 # from _json import scanstring as c_scanstring 12 # except ImportError: 13 # c_scanstring = None 14 c_scanstring = None 15 16 __all__ = ['JSONDecoder'] 17 18 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 19 20 def _floatconstants(): 21 nan, = struct.unpack('>d', b'\x7f\xf8\x00\x00\x00\x00\x00\x00') 22 inf, = struct.unpack('>d', b'\x7f\xf0\x00\x00\x00\x00\x00\x00') 23 return nan, inf, -inf 24 25 NaN, PosInf, NegInf = _floatconstants() 26 27 28 def linecol(doc, pos): 29 lineno = doc.count('\n', 0, pos) + 1 30 if lineno == 1: 31 colno = pos + 1 32 else: 33 colno = pos - doc.rindex('\n', 0, pos) 34 return lineno, colno 35 36 37 def errmsg(msg, doc, pos, end=None): 38 # Note that this function is called from _json 39 lineno, colno = linecol(doc, pos) 40 if end is None: 41 # fmt = '{0}: line {1} column {2} (char {3})' 42 # return fmt.format(msg, lineno, colno, pos) 43 fmt = '%s: line %d column %d (char %d)' 44 return fmt % (msg, lineno, colno, pos) 45 endlineno, endcolno = linecol(doc, end) 46 # fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' 47 # return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) 48 fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' 49 return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) 50 51 52 _CONSTANTS = { 53 '-Infinity': NegInf, 54 'Infinity': PosInf, 55 'NaN': NaN, 56 } 57 58 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) 59 BACKSLASH = { 60 '"': u'"', '\\': u'\\', '/': u'/', 61 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', 62 } 63 64 DEFAULT_ENCODING = "utf-8" 65 66 def _decode_uXXXX(s, pos): 67 esc = s[pos + 1:pos + 5] 68 if len(esc) == 4 and esc[1] not in 'xX': 69 try: 70 return int(esc, 16) 71 except ValueError: 72 pass 73 msg = "Invalid \\uXXXX escape" 74 raise ValueError(errmsg(msg, s, pos)) 75 76 def py_scanstring(s, end, encoding=None, strict=True, 77 _b=BACKSLASH, _m=STRINGCHUNK.match): 78 """Scan the string s for a JSON string. End is the index of the 79 character in s after the quote that started the JSON string. 80 Unescapes all valid JSON string escape sequences and raises ValueError 81 on attempt to decode an invalid string. If strict is False then literal 82 control characters are allowed in the string. 83 84 Returns a tuple of the decoded string and the index of the character in s 85 after the end quote.""" 86 if encoding is None: 87 encoding = DEFAULT_ENCODING 88 chunks = [] 89 _append = chunks.append 90 begin = end - 1 91 while 1: 92 chunk = _m(s, end) 93 if chunk is None: 94 raise ValueError( 95 errmsg("Unterminated string starting at", s, begin)) 96 end = chunk.end() 97 content, terminator = chunk.groups() 98 # Content is contains zero or more unescaped string characters 99 if content: 100 if not isinstance(content, unicode): 101 content = unicode(content, encoding) 102 _append(content) 103 # Terminator is the end of string, a literal control character, 104 # or a backslash denoting that an escape sequence follows 105 if terminator == '"': 106 break 107 elif terminator != '\\': 108 if strict: 109 msg = "Invalid control character %r at" % (terminator,) 110 # msg = "Invalid control character {0!r} at".format(terminator) 111 raise ValueError(errmsg(msg, s, end)) 112 else: 113 _append(terminator) 114 continue 115 try: 116 esc = s[end] 117 except IndexError: 118 raise ValueError( 119 errmsg("Unterminated string starting at", s, begin)) 120 # If not a unicode escape sequence, must be in the lookup table 121 if esc != 'u': 122 try: 123 char = _b[esc] 124 except KeyError: 125 msg = "Invalid \\escape: " + repr(esc) 126 raise ValueError(errmsg(msg, s, end)) 127 end += 1 128 else: 129 # Unicode escape sequence 130 uni = _decode_uXXXX(s, end) 131 end += 5 132 # Check for surrogate pair on UCS-4 systems 133 if sys.maxunicode > 65535 and \ 134 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': 135 uni2 = _decode_uXXXX(s, end + 1) 136 if 0xdc00 <= uni2 <= 0xdfff: 137 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) 138 end += 6 139 char = unichr(uni) 140 # Append the unescaped character 141 _append(char) 142 return u''.join(chunks), end 143 144 145 # Use speedup if available 146 scanstring = c_scanstring or py_scanstring 147 148 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) 149 WHITESPACE_STR = ' \t\n\r' 150 151 def JSONObject(s_and_end, encoding, strict, scan_once, object_hook, 152 object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 153 s, end = s_and_end 154 pairs = [] 155 pairs_append = pairs.append 156 # Use a slice to prevent IndexError from being raised, the following 157 # check will raise a more specific ValueError if the string is empty 158 nextchar = s[end:end + 1] 159 # Normally we expect nextchar == '"' 160 if nextchar != '"': 161 if nextchar in _ws: 162 end = _w(s, end).end() 163 nextchar = s[end:end + 1] 164 # Trivial empty object 165 if nextchar == '}': 166 if object_pairs_hook is not None: 167 result = object_pairs_hook(pairs) 168 return result, end + 1 169 pairs = {} 170 if object_hook is not None: 171 pairs = object_hook(pairs) 172 return pairs, end + 1 173 elif nextchar != '"': 174 raise ValueError(errmsg( 175 "Expecting property name enclosed in double quotes", s, end)) 176 end += 1 177 while True: 178 key, end = scanstring(s, end, encoding, strict) 179 180 # To skip some function call overhead we optimize the fast paths where 181 # the JSON key separator is ": " or just ":". 182 if s[end:end + 1] != ':': 183 end = _w(s, end).end() 184 if s[end:end + 1] != ':': 185 raise ValueError(errmsg("Expecting ':' delimiter", s, end)) 186 end += 1 187 188 try: 189 if s[end] in _ws: 190 end += 1 191 if s[end] in _ws: 192 end = _w(s, end + 1).end() 193 except IndexError: 194 pass 195 196 try: 197 value, end = scan_once(s, end) 198 except StopIteration: 199 raise ValueError(errmsg("Expecting object", s, end)) 200 pairs_append((key, value)) 201 202 try: 203 nextchar = s[end] 204 if nextchar in _ws: 205 end = _w(s, end + 1).end() 206 nextchar = s[end] 207 except IndexError: 208 nextchar = '' 209 end += 1 210 211 if nextchar == '}': 212 break 213 elif nextchar != ',': 214 raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) 215 216 try: 217 nextchar = s[end] 218 if nextchar in _ws: 219 end += 1 220 nextchar = s[end] 221 if nextchar in _ws: 222 end = _w(s, end + 1).end() 223 nextchar = s[end] 224 except IndexError: 225 nextchar = '' 226 227 end += 1 228 if nextchar != '"': 229 raise ValueError(errmsg( 230 "Expecting property name enclosed in double quotes", s, end - 1)) 231 if object_pairs_hook is not None: 232 result = object_pairs_hook(pairs) 233 return result, end 234 pairs = dict(pairs) 235 if object_hook is not None: 236 pairs = object_hook(pairs) 237 return pairs, end 238 239 def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 240 s, end = s_and_end 241 values = [] 242 nextchar = s[end:end + 1] 243 if nextchar in _ws: 244 end = _w(s, end + 1).end() 245 nextchar = s[end:end + 1] 246 # Look-ahead for trivial empty array 247 if nextchar == ']': 248 return values, end + 1 249 _append = values.append 250 while True: 251 try: 252 value, end = scan_once(s, end) 253 except StopIteration: 254 raise ValueError(errmsg("Expecting object", s, end)) 255 _append(value) 256 nextchar = s[end:end + 1] 257 if nextchar in _ws: 258 end = _w(s, end + 1).end() 259 nextchar = s[end:end + 1] 260 end += 1 261 if nextchar == ']': 262 break 263 elif nextchar != ',': 264 raise ValueError(errmsg("Expecting ',' delimiter", s, end)) 265 try: 266 if s[end] in _ws: 267 end += 1 268 if s[end] in _ws: 269 end = _w(s, end + 1).end() 270 except IndexError: 271 pass 272 273 return values, end 274 275 class JSONDecoder(object): 276 """Simple JSON <http://json.org> decoder 277 278 Performs the following translations in decoding by default: 279 280 +---------------+-------------------+ 281 | JSON | Python | 282 +===============+===================+ 283 | object | dict | 284 +---------------+-------------------+ 285 | array | list | 286 +---------------+-------------------+ 287 | string | unicode | 288 +---------------+-------------------+ 289 | number (int) | int, long | 290 +---------------+-------------------+ 291 | number (real) | float | 292 +---------------+-------------------+ 293 | true | True | 294 +---------------+-------------------+ 295 | false | False | 296 +---------------+-------------------+ 297 | null | None | 298 +---------------+-------------------+ 299 300 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as 301 their corresponding ``float`` values, which is outside the JSON spec. 302 303 """ 304 305 def __init__(self, encoding=None, object_hook=None, parse_float=None, 306 parse_int=None, parse_constant=None, strict=True, 307 object_pairs_hook=None): 308 """``encoding`` determines the encoding used to interpret any ``str`` 309 objects decoded by this instance (utf-8 by default). It has no 310 effect when decoding ``unicode`` objects. 311 312 Note that currently only encodings that are a superset of ASCII work, 313 strings of other encodings should be passed in as ``unicode``. 314 315 ``object_hook``, if specified, will be called with the result 316 of every JSON object decoded and its return value will be used in 317 place of the given ``dict``. This can be used to provide custom 318 deserializations (e.g. to support JSON-RPC class hinting). 319 320 ``object_pairs_hook``, if specified will be called with the result of 321 every JSON object decoded with an ordered list of pairs. The return 322 value of ``object_pairs_hook`` will be used instead of the ``dict``. 323 This feature can be used to implement custom decoders that rely on the 324 order that the key and value pairs are decoded (for example, 325 collections.OrderedDict will remember the order of insertion). If 326 ``object_hook`` is also defined, the ``object_pairs_hook`` takes 327 priority. 328 329 ``parse_float``, if specified, will be called with the string 330 of every JSON float to be decoded. By default this is equivalent to 331 float(num_str). This can be used to use another datatype or parser 332 for JSON floats (e.g. decimal.Decimal). 333 334 ``parse_int``, if specified, will be called with the string 335 of every JSON int to be decoded. By default this is equivalent to 336 int(num_str). This can be used to use another datatype or parser 337 for JSON integers (e.g. float). 338 339 ``parse_constant``, if specified, will be called with one of the 340 following strings: -Infinity, Infinity, NaN. 341 This can be used to raise an exception if invalid JSON numbers 342 are encountered. 343 344 If ``strict`` is false (true is the default), then control 345 characters will be allowed inside strings. Control characters in 346 this context are those with character codes in the 0-31 range, 347 including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. 348 349 """ 350 self.encoding = encoding 351 self.object_hook = object_hook 352 self.object_pairs_hook = object_pairs_hook 353 self.parse_float = parse_float or float 354 self.parse_int = parse_int or int 355 self.parse_constant = parse_constant or _CONSTANTS.__getitem__ 356 self.strict = strict 357 self.parse_object = JSONObject 358 self.parse_array = JSONArray 359 self.parse_string = scanstring 360 self.scan_once = scanner.make_scanner(self) 361 362 def decode(self, s, _w=WHITESPACE.match): 363 """Return the Python representation of ``s`` (a ``str`` or ``unicode`` 364 instance containing a JSON document) 365 366 """ 367 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 368 end = _w(s, end).end() 369 if end != len(s): 370 raise ValueError(errmsg("Extra data", s, end, len(s))) 371 return obj 372 373 def raw_decode(self, s, idx=0): 374 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` 375 beginning with a JSON document) and return a 2-tuple of the Python 376 representation and the index in ``s`` where the document ended. 377 378 This can be used to decode a JSON document from a string that may 379 have extraneous data at the end. 380 381 """ 382 try: 383 obj, end = self.scan_once(s, idx) 384 except StopIteration: 385 raise ValueError("No JSON object could be decoded") 386 return obj, end