github.com/grumpyhome/grumpy@v0.3.1-0.20201208125205-7b775405bdf1/grumpy-runtime-src/third_party/stdlib/json/decoder.py (about)

     1  """Implementation of JSONDecoder
     2  """
     3  import re
     4  import sys
     5  import _struct as struct
     6  
     7  # from json import scanner
     8  import json_scanner as scanner
     9  
    10  # try:
    11  #     from _json import scanstring as c_scanstring
    12  # except ImportError:
    13  #     c_scanstring = None
    14  c_scanstring = None
    15  
    16  __all__ = ['JSONDecoder']
    17  
    18  FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
    19  
    20  def _floatconstants():
    21      nan = struct.unpack('>d', b'\x7f\xf8\x00\x00\x00\x00\x00\x00')
    22      inf = struct.unpack('>d', b'\x7f\xf0\x00\x00\x00\x00\x00\x00')
    23      nan = nan[0]
    24      inf = inf[0]
    25      return nan, inf, -inf
    26  
    27  NaN, PosInf, NegInf = _floatconstants()
    28  
    29  
    30  def linecol(doc, pos):
    31      lineno = doc.count('\n', 0, pos) + 1
    32      if lineno == 1:
    33          colno = pos + 1
    34      else:
    35          colno = pos - doc.rindex('\n', 0, pos)
    36      return lineno, colno
    37  
    38  
    39  def errmsg(msg, doc, pos, end=None):
    40      # Note that this function is called from _json
    41      lineno, colno = linecol(doc, pos)
    42      if end is None:
    43          # fmt = '{0}: line {1} column {2} (char {3})'
    44          # return fmt.format(msg, lineno, colno, pos)
    45          fmt = '%s: line %d column %d (char %d)'
    46          return fmt % (msg, lineno, colno, pos)
    47      endlineno, endcolno = linecol(doc, end)
    48      # fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
    49      # return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
    50      fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
    51      return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
    52  
    53  
    54  _CONSTANTS = {
    55      '-Infinity': NegInf,
    56      'Infinity': PosInf,
    57      'NaN': NaN,
    58  }
    59  
    60  STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
    61  BACKSLASH = {
    62      '"': u'"', '\\': u'\\', '/': u'/',
    63      'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
    64  }
    65  
    66  DEFAULT_ENCODING = "utf-8"
    67  
    68  def _decode_uXXXX(s, pos):
    69      esc = s[pos + 1:pos + 5]
    70      if len(esc) == 4 and esc[1] not in 'xX':
    71          try:
    72              return int(esc, 16)
    73          except ValueError:
    74              pass
    75      msg = "Invalid \\uXXXX escape"
    76      raise ValueError(errmsg(msg, s, pos))
    77  
    78  def py_scanstring(s, end, encoding=None, strict=True,
    79          _b=BACKSLASH, _m=STRINGCHUNK.match):
    80      """Scan the string s for a JSON string. End is the index of the
    81      character in s after the quote that started the JSON string.
    82      Unescapes all valid JSON string escape sequences and raises ValueError
    83      on attempt to decode an invalid string. If strict is False then literal
    84      control characters are allowed in the string.
    85  
    86      Returns a tuple of the decoded string and the index of the character in s
    87      after the end quote."""
    88      if encoding is None:
    89          encoding = DEFAULT_ENCODING
    90      chunks = []
    91      _append = chunks.append
    92      begin = end - 1
    93      while 1:
    94          chunk = _m(s, end)
    95          if chunk is None:
    96              raise ValueError(
    97                  errmsg("Unterminated string starting at", s, begin))
    98          end = chunk.end()
    99          content, terminator = chunk.groups()
   100          # Content is contains zero or more unescaped string characters
   101          if content:
   102              if not isinstance(content, unicode):
   103                  content = unicode(content, encoding)
   104              _append(content)
   105          # Terminator is the end of string, a literal control character,
   106          # or a backslash denoting that an escape sequence follows
   107          if terminator == '"':
   108              break
   109          elif terminator != '\\':
   110              if strict:
   111                  msg = "Invalid control character %r at" % (terminator,)
   112                  # msg = "Invalid control character {0!r} at".format(terminator)
   113                  raise ValueError(errmsg(msg, s, end))
   114              else:
   115                  _append(terminator)
   116                  continue
   117          try:
   118              esc = s[end]
   119          except IndexError:
   120              raise ValueError(
   121                  errmsg("Unterminated string starting at", s, begin))
   122          # If not a unicode escape sequence, must be in the lookup table
   123          if esc != 'u':
   124              try:
   125                  char = _b[esc]
   126              except KeyError:
   127                  msg = "Invalid \\escape: " + repr(esc)
   128                  raise ValueError(errmsg(msg, s, end))
   129              end += 1
   130          else:
   131              # Unicode escape sequence
   132              uni = _decode_uXXXX(s, end)
   133              end += 5
   134              # Check for surrogate pair on UCS-4 systems
   135              if sys.maxunicode > 65535 and \
   136                 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
   137                  uni2 = _decode_uXXXX(s, end + 1)
   138                  if 0xdc00 <= uni2 <= 0xdfff:
   139                      uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
   140                      end += 6
   141              char = unichr(uni)
   142          # Append the unescaped character
   143          _append(char)
   144      return u''.join(chunks), end
   145  
   146  
   147  # Use speedup if available
   148  scanstring = c_scanstring or py_scanstring
   149  
   150  WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
   151  WHITESPACE_STR = ' \t\n\r'
   152  
   153  def JSONObject(s_and_end, encoding, strict, scan_once, object_hook,
   154                 object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
   155      s, end = s_and_end
   156      pairs = []
   157      pairs_append = pairs.append
   158      # Use a slice to prevent IndexError from being raised, the following
   159      # check will raise a more specific ValueError if the string is empty
   160      nextchar = s[end:end + 1]
   161      # Normally we expect nextchar == '"'
   162      if nextchar != '"':
   163          if nextchar in _ws:
   164              end = _w(s, end).end()
   165              nextchar = s[end:end + 1]
   166          # Trivial empty object
   167          if nextchar == '}':
   168              if object_pairs_hook is not None:
   169                  result = object_pairs_hook(pairs)
   170                  return result, end + 1
   171              pairs = {}
   172              if object_hook is not None:
   173                  pairs = object_hook(pairs)
   174              return pairs, end + 1
   175          elif nextchar != '"':
   176              raise ValueError(errmsg(
   177                  "Expecting property name enclosed in double quotes", s, end))
   178      end += 1
   179      while True:
   180          key, end = scanstring(s, end, encoding, strict)
   181  
   182          # To skip some function call overhead we optimize the fast paths where
   183          # the JSON key separator is ": " or just ":".
   184          if s[end:end + 1] != ':':
   185              end = _w(s, end).end()
   186              if s[end:end + 1] != ':':
   187                  raise ValueError(errmsg("Expecting ':' delimiter", s, end))
   188          end += 1
   189  
   190          try:
   191              if s[end] in _ws:
   192                  end += 1
   193                  if s[end] in _ws:
   194                      end = _w(s, end + 1).end()
   195          except IndexError:
   196              pass
   197  
   198          try:
   199              value, end = scan_once(s, end)
   200          except StopIteration:
   201              raise ValueError(errmsg("Expecting object", s, end))
   202          pairs_append((key, value))
   203  
   204          try:
   205              nextchar = s[end]
   206              if nextchar in _ws:
   207                  end = _w(s, end + 1).end()
   208                  nextchar = s[end]
   209          except IndexError:
   210              nextchar = ''
   211          end += 1
   212  
   213          if nextchar == '}':
   214              break
   215          elif nextchar != ',':
   216              raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
   217  
   218          try:
   219              nextchar = s[end]
   220              if nextchar in _ws:
   221                  end += 1
   222                  nextchar = s[end]
   223                  if nextchar in _ws:
   224                      end = _w(s, end + 1).end()
   225                      nextchar = s[end]
   226          except IndexError:
   227              nextchar = ''
   228  
   229          end += 1
   230          if nextchar != '"':
   231              raise ValueError(errmsg(
   232                  "Expecting property name enclosed in double quotes", s, end - 1))
   233      if object_pairs_hook is not None:
   234          result = object_pairs_hook(pairs)
   235          return result, end
   236      pairs = dict(pairs)
   237      if object_hook is not None:
   238          pairs = object_hook(pairs)
   239      return pairs, end
   240  
   241  def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
   242      s, end = s_and_end
   243      values = []
   244      nextchar = s[end:end + 1]
   245      if nextchar in _ws:
   246          end = _w(s, end + 1).end()
   247          nextchar = s[end:end + 1]
   248      # Look-ahead for trivial empty array
   249      if nextchar == ']':
   250          return values, end + 1
   251      _append = values.append
   252      while True:
   253          try:
   254              value, end = scan_once(s, end)
   255          except StopIteration:
   256              raise ValueError(errmsg("Expecting object", s, end))
   257          _append(value)
   258          nextchar = s[end:end + 1]
   259          if nextchar in _ws:
   260              end = _w(s, end + 1).end()
   261              nextchar = s[end:end + 1]
   262          end += 1
   263          if nextchar == ']':
   264              break
   265          elif nextchar != ',':
   266              raise ValueError(errmsg("Expecting ',' delimiter", s, end))
   267          try:
   268              if s[end] in _ws:
   269                  end += 1
   270                  if s[end] in _ws:
   271                      end = _w(s, end + 1).end()
   272          except IndexError:
   273              pass
   274  
   275      return values, end
   276  
   277  class JSONDecoder(object):
   278      """Simple JSON <http://json.org> decoder
   279  
   280      Performs the following translations in decoding by default:
   281  
   282      +---------------+-------------------+
   283      | JSON          | Python            |
   284      +===============+===================+
   285      | object        | dict              |
   286      +---------------+-------------------+
   287      | array         | list              |
   288      +---------------+-------------------+
   289      | string        | unicode           |
   290      +---------------+-------------------+
   291      | number (int)  | int, long         |
   292      +---------------+-------------------+
   293      | number (real) | float             |
   294      +---------------+-------------------+
   295      | true          | True              |
   296      +---------------+-------------------+
   297      | false         | False             |
   298      +---------------+-------------------+
   299      | null          | None              |
   300      +---------------+-------------------+
   301  
   302      It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
   303      their corresponding ``float`` values, which is outside the JSON spec.
   304  
   305      """
   306  
   307      def __init__(self, encoding=None, object_hook=None, parse_float=None,
   308              parse_int=None, parse_constant=None, strict=True,
   309              object_pairs_hook=None):
   310          """``encoding`` determines the encoding used to interpret any ``str``
   311          objects decoded by this instance (utf-8 by default).  It has no
   312          effect when decoding ``unicode`` objects.
   313  
   314          Note that currently only encodings that are a superset of ASCII work,
   315          strings of other encodings should be passed in as ``unicode``.
   316  
   317          ``object_hook``, if specified, will be called with the result
   318          of every JSON object decoded and its return value will be used in
   319          place of the given ``dict``.  This can be used to provide custom
   320          deserializations (e.g. to support JSON-RPC class hinting).
   321  
   322          ``object_pairs_hook``, if specified will be called with the result of
   323          every JSON object decoded with an ordered list of pairs.  The return
   324          value of ``object_pairs_hook`` will be used instead of the ``dict``.
   325          This feature can be used to implement custom decoders that rely on the
   326          order that the key and value pairs are decoded (for example,
   327          collections.OrderedDict will remember the order of insertion). If
   328          ``object_hook`` is also defined, the ``object_pairs_hook`` takes
   329          priority.
   330  
   331          ``parse_float``, if specified, will be called with the string
   332          of every JSON float to be decoded. By default this is equivalent to
   333          float(num_str). This can be used to use another datatype or parser
   334          for JSON floats (e.g. decimal.Decimal).
   335  
   336          ``parse_int``, if specified, will be called with the string
   337          of every JSON int to be decoded. By default this is equivalent to
   338          int(num_str). This can be used to use another datatype or parser
   339          for JSON integers (e.g. float).
   340  
   341          ``parse_constant``, if specified, will be called with one of the
   342          following strings: -Infinity, Infinity, NaN.
   343          This can be used to raise an exception if invalid JSON numbers
   344          are encountered.
   345  
   346          If ``strict`` is false (true is the default), then control
   347          characters will be allowed inside strings.  Control characters in
   348          this context are those with character codes in the 0-31 range,
   349          including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
   350  
   351          """
   352          self.encoding = encoding
   353          self.object_hook = object_hook
   354          self.object_pairs_hook = object_pairs_hook
   355          self.parse_float = parse_float or float
   356          self.parse_int = parse_int or int
   357          self.parse_constant = parse_constant or _CONSTANTS.__getitem__
   358          self.strict = strict
   359          self.parse_object = JSONObject
   360          self.parse_array = JSONArray
   361          self.parse_string = scanstring
   362          self.scan_once = scanner.make_scanner(self)
   363  
   364      def decode(self, s, _w=WHITESPACE.match):
   365          """Return the Python representation of ``s`` (a ``str`` or ``unicode``
   366          instance containing a JSON document)
   367  
   368          """
   369          obj, end = self.raw_decode(s, idx=_w(s, 0).end())
   370          end = _w(s, end).end()
   371          if end != len(s):
   372              raise ValueError(errmsg("Extra data", s, end, len(s)))
   373          return obj
   374  
   375      def raw_decode(self, s, idx=0):
   376          """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
   377          beginning with a JSON document) and return a 2-tuple of the Python
   378          representation and the index in ``s`` where the document ended.
   379  
   380          This can be used to decode a JSON document from a string that may
   381          have extraneous data at the end.
   382  
   383          """
   384          try:
   385              obj, end = self.scan_once(s, idx)
   386          except StopIteration:
   387              raise ValueError("No JSON object could be decoded")
   388          return obj, end