github.com/google/grumpy@v0.0.0-20171122020858-3ec87959189c/third_party/stdlib/json/decoder.py (about)

     1  """Implementation of JSONDecoder
     2  """
     3  import re
     4  import sys
     5  import _struct as struct
     6  
     7  # from json import scanner
     8  import json_scanner as scanner
     9  
    10  # try:
    11  #     from _json import scanstring as c_scanstring
    12  # except ImportError:
    13  #     c_scanstring = None
    14  c_scanstring = None
    15  
    16  __all__ = ['JSONDecoder']
    17  
    18  FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
    19  
    20  def _floatconstants():
    21      nan, = struct.unpack('>d', b'\x7f\xf8\x00\x00\x00\x00\x00\x00')
    22      inf, = struct.unpack('>d', b'\x7f\xf0\x00\x00\x00\x00\x00\x00')
    23      return nan, inf, -inf
    24  
    25  NaN, PosInf, NegInf = _floatconstants()
    26  
    27  
    28  def linecol(doc, pos):
    29      lineno = doc.count('\n', 0, pos) + 1
    30      if lineno == 1:
    31          colno = pos + 1
    32      else:
    33          colno = pos - doc.rindex('\n', 0, pos)
    34      return lineno, colno
    35  
    36  
    37  def errmsg(msg, doc, pos, end=None):
    38      # Note that this function is called from _json
    39      lineno, colno = linecol(doc, pos)
    40      if end is None:
    41          # fmt = '{0}: line {1} column {2} (char {3})'
    42          # return fmt.format(msg, lineno, colno, pos)
    43          fmt = '%s: line %d column %d (char %d)'
    44          return fmt % (msg, lineno, colno, pos)
    45      endlineno, endcolno = linecol(doc, end)
    46      # fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
    47      # return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
    48      fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
    49      return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
    50  
    51  
    52  _CONSTANTS = {
    53      '-Infinity': NegInf,
    54      'Infinity': PosInf,
    55      'NaN': NaN,
    56  }
    57  
    58  STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
    59  BACKSLASH = {
    60      '"': u'"', '\\': u'\\', '/': u'/',
    61      'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
    62  }
    63  
    64  DEFAULT_ENCODING = "utf-8"
    65  
    66  def _decode_uXXXX(s, pos):
    67      esc = s[pos + 1:pos + 5]
    68      if len(esc) == 4 and esc[1] not in 'xX':
    69          try:
    70              return int(esc, 16)
    71          except ValueError:
    72              pass
    73      msg = "Invalid \\uXXXX escape"
    74      raise ValueError(errmsg(msg, s, pos))
    75  
    76  def py_scanstring(s, end, encoding=None, strict=True,
    77          _b=BACKSLASH, _m=STRINGCHUNK.match):
    78      """Scan the string s for a JSON string. End is the index of the
    79      character in s after the quote that started the JSON string.
    80      Unescapes all valid JSON string escape sequences and raises ValueError
    81      on attempt to decode an invalid string. If strict is False then literal
    82      control characters are allowed in the string.
    83  
    84      Returns a tuple of the decoded string and the index of the character in s
    85      after the end quote."""
    86      if encoding is None:
    87          encoding = DEFAULT_ENCODING
    88      chunks = []
    89      _append = chunks.append
    90      begin = end - 1
    91      while 1:
    92          chunk = _m(s, end)
    93          if chunk is None:
    94              raise ValueError(
    95                  errmsg("Unterminated string starting at", s, begin))
    96          end = chunk.end()
    97          content, terminator = chunk.groups()
    98          # Content is contains zero or more unescaped string characters
    99          if content:
   100              if not isinstance(content, unicode):
   101                  content = unicode(content, encoding)
   102              _append(content)
   103          # Terminator is the end of string, a literal control character,
   104          # or a backslash denoting that an escape sequence follows
   105          if terminator == '"':
   106              break
   107          elif terminator != '\\':
   108              if strict:
   109                  msg = "Invalid control character %r at" % (terminator,)
   110                  # msg = "Invalid control character {0!r} at".format(terminator)
   111                  raise ValueError(errmsg(msg, s, end))
   112              else:
   113                  _append(terminator)
   114                  continue
   115          try:
   116              esc = s[end]
   117          except IndexError:
   118              raise ValueError(
   119                  errmsg("Unterminated string starting at", s, begin))
   120          # If not a unicode escape sequence, must be in the lookup table
   121          if esc != 'u':
   122              try:
   123                  char = _b[esc]
   124              except KeyError:
   125                  msg = "Invalid \\escape: " + repr(esc)
   126                  raise ValueError(errmsg(msg, s, end))
   127              end += 1
   128          else:
   129              # Unicode escape sequence
   130              uni = _decode_uXXXX(s, end)
   131              end += 5
   132              # Check for surrogate pair on UCS-4 systems
   133              if sys.maxunicode > 65535 and \
   134                 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
   135                  uni2 = _decode_uXXXX(s, end + 1)
   136                  if 0xdc00 <= uni2 <= 0xdfff:
   137                      uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
   138                      end += 6
   139              char = unichr(uni)
   140          # Append the unescaped character
   141          _append(char)
   142      return u''.join(chunks), end
   143  
   144  
   145  # Use speedup if available
   146  scanstring = c_scanstring or py_scanstring
   147  
   148  WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
   149  WHITESPACE_STR = ' \t\n\r'
   150  
   151  def JSONObject(s_and_end, encoding, strict, scan_once, object_hook,
   152                 object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
   153      s, end = s_and_end
   154      pairs = []
   155      pairs_append = pairs.append
   156      # Use a slice to prevent IndexError from being raised, the following
   157      # check will raise a more specific ValueError if the string is empty
   158      nextchar = s[end:end + 1]
   159      # Normally we expect nextchar == '"'
   160      if nextchar != '"':
   161          if nextchar in _ws:
   162              end = _w(s, end).end()
   163              nextchar = s[end:end + 1]
   164          # Trivial empty object
   165          if nextchar == '}':
   166              if object_pairs_hook is not None:
   167                  result = object_pairs_hook(pairs)
   168                  return result, end + 1
   169              pairs = {}
   170              if object_hook is not None:
   171                  pairs = object_hook(pairs)
   172              return pairs, end + 1
   173          elif nextchar != '"':
   174              raise ValueError(errmsg(
   175                  "Expecting property name enclosed in double quotes", s, end))
   176      end += 1
   177      while True:
   178          key, end = scanstring(s, end, encoding, strict)
   179  
   180          # To skip some function call overhead we optimize the fast paths where
   181          # the JSON key separator is ": " or just ":".
   182          if s[end:end + 1] != ':':
   183              end = _w(s, end).end()
   184              if s[end:end + 1] != ':':
   185                  raise ValueError(errmsg("Expecting ':' delimiter", s, end))
   186          end += 1
   187  
   188          try:
   189              if s[end] in _ws:
   190                  end += 1
   191                  if s[end] in _ws:
   192                      end = _w(s, end + 1).end()
   193          except IndexError:
   194              pass
   195  
   196          try:
   197              value, end = scan_once(s, end)
   198          except StopIteration:
   199              raise ValueError(errmsg("Expecting object", s, end))
   200          pairs_append((key, value))
   201  
   202          try:
   203              nextchar = s[end]
   204              if nextchar in _ws:
   205                  end = _w(s, end + 1).end()
   206                  nextchar = s[end]
   207          except IndexError:
   208              nextchar = ''
   209          end += 1
   210  
   211          if nextchar == '}':
   212              break
   213          elif nextchar != ',':
   214              raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
   215  
   216          try:
   217              nextchar = s[end]
   218              if nextchar in _ws:
   219                  end += 1
   220                  nextchar = s[end]
   221                  if nextchar in _ws:
   222                      end = _w(s, end + 1).end()
   223                      nextchar = s[end]
   224          except IndexError:
   225              nextchar = ''
   226  
   227          end += 1
   228          if nextchar != '"':
   229              raise ValueError(errmsg(
   230                  "Expecting property name enclosed in double quotes", s, end - 1))
   231      if object_pairs_hook is not None:
   232          result = object_pairs_hook(pairs)
   233          return result, end
   234      pairs = dict(pairs)
   235      if object_hook is not None:
   236          pairs = object_hook(pairs)
   237      return pairs, end
   238  
   239  def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
   240      s, end = s_and_end
   241      values = []
   242      nextchar = s[end:end + 1]
   243      if nextchar in _ws:
   244          end = _w(s, end + 1).end()
   245          nextchar = s[end:end + 1]
   246      # Look-ahead for trivial empty array
   247      if nextchar == ']':
   248          return values, end + 1
   249      _append = values.append
   250      while True:
   251          try:
   252              value, end = scan_once(s, end)
   253          except StopIteration:
   254              raise ValueError(errmsg("Expecting object", s, end))
   255          _append(value)
   256          nextchar = s[end:end + 1]
   257          if nextchar in _ws:
   258              end = _w(s, end + 1).end()
   259              nextchar = s[end:end + 1]
   260          end += 1
   261          if nextchar == ']':
   262              break
   263          elif nextchar != ',':
   264              raise ValueError(errmsg("Expecting ',' delimiter", s, end))
   265          try:
   266              if s[end] in _ws:
   267                  end += 1
   268                  if s[end] in _ws:
   269                      end = _w(s, end + 1).end()
   270          except IndexError:
   271              pass
   272  
   273      return values, end
   274  
   275  class JSONDecoder(object):
   276      """Simple JSON <http://json.org> decoder
   277  
   278      Performs the following translations in decoding by default:
   279  
   280      +---------------+-------------------+
   281      | JSON          | Python            |
   282      +===============+===================+
   283      | object        | dict              |
   284      +---------------+-------------------+
   285      | array         | list              |
   286      +---------------+-------------------+
   287      | string        | unicode           |
   288      +---------------+-------------------+
   289      | number (int)  | int, long         |
   290      +---------------+-------------------+
   291      | number (real) | float             |
   292      +---------------+-------------------+
   293      | true          | True              |
   294      +---------------+-------------------+
   295      | false         | False             |
   296      +---------------+-------------------+
   297      | null          | None              |
   298      +---------------+-------------------+
   299  
   300      It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
   301      their corresponding ``float`` values, which is outside the JSON spec.
   302  
   303      """
   304  
   305      def __init__(self, encoding=None, object_hook=None, parse_float=None,
   306              parse_int=None, parse_constant=None, strict=True,
   307              object_pairs_hook=None):
   308          """``encoding`` determines the encoding used to interpret any ``str``
   309          objects decoded by this instance (utf-8 by default).  It has no
   310          effect when decoding ``unicode`` objects.
   311  
   312          Note that currently only encodings that are a superset of ASCII work,
   313          strings of other encodings should be passed in as ``unicode``.
   314  
   315          ``object_hook``, if specified, will be called with the result
   316          of every JSON object decoded and its return value will be used in
   317          place of the given ``dict``.  This can be used to provide custom
   318          deserializations (e.g. to support JSON-RPC class hinting).
   319  
   320          ``object_pairs_hook``, if specified will be called with the result of
   321          every JSON object decoded with an ordered list of pairs.  The return
   322          value of ``object_pairs_hook`` will be used instead of the ``dict``.
   323          This feature can be used to implement custom decoders that rely on the
   324          order that the key and value pairs are decoded (for example,
   325          collections.OrderedDict will remember the order of insertion). If
   326          ``object_hook`` is also defined, the ``object_pairs_hook`` takes
   327          priority.
   328  
   329          ``parse_float``, if specified, will be called with the string
   330          of every JSON float to be decoded. By default this is equivalent to
   331          float(num_str). This can be used to use another datatype or parser
   332          for JSON floats (e.g. decimal.Decimal).
   333  
   334          ``parse_int``, if specified, will be called with the string
   335          of every JSON int to be decoded. By default this is equivalent to
   336          int(num_str). This can be used to use another datatype or parser
   337          for JSON integers (e.g. float).
   338  
   339          ``parse_constant``, if specified, will be called with one of the
   340          following strings: -Infinity, Infinity, NaN.
   341          This can be used to raise an exception if invalid JSON numbers
   342          are encountered.
   343  
   344          If ``strict`` is false (true is the default), then control
   345          characters will be allowed inside strings.  Control characters in
   346          this context are those with character codes in the 0-31 range,
   347          including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
   348  
   349          """
   350          self.encoding = encoding
   351          self.object_hook = object_hook
   352          self.object_pairs_hook = object_pairs_hook
   353          self.parse_float = parse_float or float
   354          self.parse_int = parse_int or int
   355          self.parse_constant = parse_constant or _CONSTANTS.__getitem__
   356          self.strict = strict
   357          self.parse_object = JSONObject
   358          self.parse_array = JSONArray
   359          self.parse_string = scanstring
   360          self.scan_once = scanner.make_scanner(self)
   361  
   362      def decode(self, s, _w=WHITESPACE.match):
   363          """Return the Python representation of ``s`` (a ``str`` or ``unicode``
   364          instance containing a JSON document)
   365  
   366          """
   367          obj, end = self.raw_decode(s, idx=_w(s, 0).end())
   368          end = _w(s, end).end()
   369          if end != len(s):
   370              raise ValueError(errmsg("Extra data", s, end, len(s)))
   371          return obj
   372  
   373      def raw_decode(self, s, idx=0):
   374          """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
   375          beginning with a JSON document) and return a 2-tuple of the Python
   376          representation and the index in ``s`` where the document ended.
   377  
   378          This can be used to decode a JSON document from a string that may
   379          have extraneous data at the end.
   380  
   381          """
   382          try:
   383              obj, end = self.scan_once(s, idx)
   384          except StopIteration:
   385              raise ValueError("No JSON object could be decoded")
   386          return obj, end