github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/lib/python/simplejson/decoder.py (about)

     1  """Implementation of JSONDecoder
     2  """
     3  import re
     4  import sys
     5  import struct
     6  
     7  from simplejson.scanner import make_scanner
     8  def _import_c_scanstring():
     9      try:
    10          from simplejson._speedups import scanstring
    11          return scanstring
    12      except ImportError:
    13          return None
    14  c_scanstring = _import_c_scanstring()
    15  
    16  __all__ = ['JSONDecoder']
    17  
    18  FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
    19  
    20  def _floatconstants():
    21      _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
    22      # The struct module in Python 2.4 would get frexp() out of range here
    23      # when an endian is specified in the format string. Fixed in Python 2.5+
    24      if sys.byteorder != 'big':
    25          _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
    26      nan, inf = struct.unpack('dd', _BYTES)
    27      return nan, inf, -inf
    28  
    29  NaN, PosInf, NegInf = _floatconstants()
    30  
    31  
    32  class JSONDecodeError(ValueError):
    33      """Subclass of ValueError with the following additional properties:
    34      
    35      msg: The unformatted error message
    36      doc: The JSON document being parsed
    37      pos: The start index of doc where parsing failed
    38      end: The end index of doc where parsing failed (may be None)
    39      lineno: The line corresponding to pos
    40      colno: The column corresponding to pos
    41      endlineno: The line corresponding to end (may be None)
    42      endcolno: The column corresponding to end (may be None)
    43      
    44      """
    45      def __init__(self, msg, doc, pos, end=None):
    46          ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
    47          self.msg = msg
    48          self.doc = doc
    49          self.pos = pos
    50          self.end = end
    51          self.lineno, self.colno = linecol(doc, pos)
    52          if end is not None:
    53              self.endlineno, self.endcolno = linecol(doc, pos)
    54          else:
    55              self.endlineno, self.endcolno = None, None
    56  
    57  
    58  def linecol(doc, pos):
    59      lineno = doc.count('\n', 0, pos) + 1
    60      if lineno == 1:
    61          colno = pos
    62      else:
    63          colno = pos - doc.rindex('\n', 0, pos)
    64      return lineno, colno
    65  
    66  
    67  def errmsg(msg, doc, pos, end=None):
    68      # Note that this function is called from _speedups
    69      lineno, colno = linecol(doc, pos)
    70      if end is None:
    71          #fmt = '{0}: line {1} column {2} (char {3})'
    72          #return fmt.format(msg, lineno, colno, pos)
    73          fmt = '%s: line %d column %d (char %d)'
    74          return fmt % (msg, lineno, colno, pos)
    75      endlineno, endcolno = linecol(doc, end)
    76      #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
    77      #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
    78      fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
    79      return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
    80  
    81  
    82  _CONSTANTS = {
    83      '-Infinity': NegInf,
    84      'Infinity': PosInf,
    85      'NaN': NaN,
    86  }
    87  
    88  STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
    89  BACKSLASH = {
    90      '"': u'"', '\\': u'\\', '/': u'/',
    91      'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
    92  }
    93  
    94  DEFAULT_ENCODING = "utf-8"
    95  
    96  def py_scanstring(s, end, encoding=None, strict=True,
    97          _b=BACKSLASH, _m=STRINGCHUNK.match):
    98      """Scan the string s for a JSON string. End is the index of the
    99      character in s after the quote that started the JSON string.
   100      Unescapes all valid JSON string escape sequences and raises ValueError
   101      on attempt to decode an invalid string. If strict is False then literal
   102      control characters are allowed in the string.
   103  
   104      Returns a tuple of the decoded string and the index of the character in s
   105      after the end quote."""
   106      if encoding is None:
   107          encoding = DEFAULT_ENCODING
   108      chunks = []
   109      _append = chunks.append
   110      begin = end - 1
   111      while 1:
   112          chunk = _m(s, end)
   113          if chunk is None:
   114              raise JSONDecodeError(
   115                  "Unterminated string starting at", s, begin)
   116          end = chunk.end()
   117          content, terminator = chunk.groups()
   118          # Content is contains zero or more unescaped string characters
   119          if content:
   120              if not isinstance(content, unicode):
   121                  content = unicode(content, encoding)
   122              _append(content)
   123          # Terminator is the end of string, a literal control character,
   124          # or a backslash denoting that an escape sequence follows
   125          if terminator == '"':
   126              break
   127          elif terminator != '\\':
   128              if strict:
   129                  msg = "Invalid control character %r at" % (terminator,)
   130                  #msg = "Invalid control character {0!r} at".format(terminator)
   131                  raise JSONDecodeError(msg, s, end)
   132              else:
   133                  _append(terminator)
   134                  continue
   135          try:
   136              esc = s[end]
   137          except IndexError:
   138              raise JSONDecodeError(
   139                  "Unterminated string starting at", s, begin)
   140          # If not a unicode escape sequence, must be in the lookup table
   141          if esc != 'u':
   142              try:
   143                  char = _b[esc]
   144              except KeyError:
   145                  msg = "Invalid \\escape: " + repr(esc)
   146                  raise JSONDecodeError(msg, s, end)
   147              end += 1
   148          else:
   149              # Unicode escape sequence
   150              esc = s[end + 1:end + 5]
   151              next_end = end + 5
   152              if len(esc) != 4:
   153                  msg = "Invalid \\uXXXX escape"
   154                  raise JSONDecodeError(msg, s, end)
   155              uni = int(esc, 16)
   156              # Check for surrogate pair on UCS-4 systems
   157              if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
   158                  msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
   159                  if not s[end + 5:end + 7] == '\\u':
   160                      raise JSONDecodeError(msg, s, end)
   161                  esc2 = s[end + 7:end + 11]
   162                  if len(esc2) != 4:
   163                      raise JSONDecodeError(msg, s, end)
   164                  uni2 = int(esc2, 16)
   165                  uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
   166                  next_end += 6
   167              char = unichr(uni)
   168              end = next_end
   169          # Append the unescaped character
   170          _append(char)
   171      return u''.join(chunks), end
   172  
   173  
   174  # Use speedup if available
   175  scanstring = c_scanstring or py_scanstring
   176  
   177  WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
   178  WHITESPACE_STR = ' \t\n\r'
   179  
   180  def JSONObject((s, end), encoding, strict, scan_once, object_hook,
   181          object_pairs_hook, memo=None,
   182          _w=WHITESPACE.match, _ws=WHITESPACE_STR):
   183      # Backwards compatibility
   184      if memo is None:
   185          memo = {}
   186      memo_get = memo.setdefault
   187      pairs = []
   188      # Use a slice to prevent IndexError from being raised, the following
   189      # check will raise a more specific ValueError if the string is empty
   190      nextchar = s[end:end + 1]
   191      # Normally we expect nextchar == '"'
   192      if nextchar != '"':
   193          if nextchar in _ws:
   194              end = _w(s, end).end()
   195              nextchar = s[end:end + 1]
   196          # Trivial empty object
   197          if nextchar == '}':
   198              if object_pairs_hook is not None:
   199                  result = object_pairs_hook(pairs)
   200                  return result, end
   201              pairs = {}
   202              if object_hook is not None:
   203                  pairs = object_hook(pairs)
   204              return pairs, end + 1
   205          elif nextchar != '"':
   206              raise JSONDecodeError("Expecting property name", s, end)
   207      end += 1
   208      while True:
   209          key, end = scanstring(s, end, encoding, strict)
   210          key = memo_get(key, key)
   211  
   212          # To skip some function call overhead we optimize the fast paths where
   213          # the JSON key separator is ": " or just ":".
   214          if s[end:end + 1] != ':':
   215              end = _w(s, end).end()
   216              if s[end:end + 1] != ':':
   217                  raise JSONDecodeError("Expecting : delimiter", s, end)
   218  
   219          end += 1
   220  
   221          try:
   222              if s[end] in _ws:
   223                  end += 1
   224                  if s[end] in _ws:
   225                      end = _w(s, end + 1).end()
   226          except IndexError:
   227              pass
   228  
   229          try:
   230              value, end = scan_once(s, end)
   231          except StopIteration:
   232              raise JSONDecodeError("Expecting object", s, end)
   233          pairs.append((key, value))
   234  
   235          try:
   236              nextchar = s[end]
   237              if nextchar in _ws:
   238                  end = _w(s, end + 1).end()
   239                  nextchar = s[end]
   240          except IndexError:
   241              nextchar = ''
   242          end += 1
   243  
   244          if nextchar == '}':
   245              break
   246          elif nextchar != ',':
   247              raise JSONDecodeError("Expecting , delimiter", s, end - 1)
   248  
   249          try:
   250              nextchar = s[end]
   251              if nextchar in _ws:
   252                  end += 1
   253                  nextchar = s[end]
   254                  if nextchar in _ws:
   255                      end = _w(s, end + 1).end()
   256                      nextchar = s[end]
   257          except IndexError:
   258              nextchar = ''
   259  
   260          end += 1
   261          if nextchar != '"':
   262              raise JSONDecodeError("Expecting property name", s, end - 1)
   263  
   264      if object_pairs_hook is not None:
   265          result = object_pairs_hook(pairs)
   266          return result, end
   267      pairs = dict(pairs)
   268      if object_hook is not None:
   269          pairs = object_hook(pairs)
   270      return pairs, end
   271  
   272  def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
   273      values = []
   274      nextchar = s[end:end + 1]
   275      if nextchar in _ws:
   276          end = _w(s, end + 1).end()
   277          nextchar = s[end:end + 1]
   278      # Look-ahead for trivial empty array
   279      if nextchar == ']':
   280          return values, end + 1
   281      _append = values.append
   282      while True:
   283          try:
   284              value, end = scan_once(s, end)
   285          except StopIteration:
   286              raise JSONDecodeError("Expecting object", s, end)
   287          _append(value)
   288          nextchar = s[end:end + 1]
   289          if nextchar in _ws:
   290              end = _w(s, end + 1).end()
   291              nextchar = s[end:end + 1]
   292          end += 1
   293          if nextchar == ']':
   294              break
   295          elif nextchar != ',':
   296              raise JSONDecodeError("Expecting , delimiter", s, end)
   297  
   298          try:
   299              if s[end] in _ws:
   300                  end += 1
   301                  if s[end] in _ws:
   302                      end = _w(s, end + 1).end()
   303          except IndexError:
   304              pass
   305  
   306      return values, end
   307  
   308  class JSONDecoder(object):
   309      """Simple JSON <http://json.org> decoder
   310  
   311      Performs the following translations in decoding by default:
   312  
   313      +---------------+-------------------+
   314      | JSON          | Python            |
   315      +===============+===================+
   316      | object        | dict              |
   317      +---------------+-------------------+
   318      | array         | list              |
   319      +---------------+-------------------+
   320      | string        | unicode           |
   321      +---------------+-------------------+
   322      | number (int)  | int, long         |
   323      +---------------+-------------------+
   324      | number (real) | float             |
   325      +---------------+-------------------+
   326      | true          | True              |
   327      +---------------+-------------------+
   328      | false         | False             |
   329      +---------------+-------------------+
   330      | null          | None              |
   331      +---------------+-------------------+
   332  
   333      It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
   334      their corresponding ``float`` values, which is outside the JSON spec.
   335  
   336      """
   337  
   338      def __init__(self, encoding=None, object_hook=None, parse_float=None,
   339              parse_int=None, parse_constant=None, strict=True,
   340              object_pairs_hook=None):
   341          """
   342          *encoding* determines the encoding used to interpret any
   343          :class:`str` objects decoded by this instance (``'utf-8'`` by
   344          default).  It has no effect when decoding :class:`unicode` objects.
   345  
   346          Note that currently only encodings that are a superset of ASCII work,
   347          strings of other encodings should be passed in as :class:`unicode`.
   348  
   349          *object_hook*, if specified, will be called with the result of every
   350          JSON object decoded and its return value will be used in place of the
   351          given :class:`dict`.  This can be used to provide custom
   352          deserializations (e.g. to support JSON-RPC class hinting).
   353  
   354          *object_pairs_hook* is an optional function that will be called with
   355          the result of any object literal decode with an ordered list of pairs.
   356          The return value of *object_pairs_hook* will be used instead of the
   357          :class:`dict`.  This feature can be used to implement custom decoders
   358          that rely on the order that the key and value pairs are decoded (for
   359          example, :func:`collections.OrderedDict` will remember the order of
   360          insertion). If *object_hook* is also defined, the *object_pairs_hook*
   361          takes priority.
   362  
   363          *parse_float*, if specified, will be called with the string of every
   364          JSON float to be decoded.  By default, this is equivalent to
   365          ``float(num_str)``. This can be used to use another datatype or parser
   366          for JSON floats (e.g. :class:`decimal.Decimal`).
   367  
   368          *parse_int*, if specified, will be called with the string of every
   369          JSON int to be decoded.  By default, this is equivalent to
   370          ``int(num_str)``.  This can be used to use another datatype or parser
   371          for JSON integers (e.g. :class:`float`).
   372  
   373          *parse_constant*, if specified, will be called with one of the
   374          following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This
   375          can be used to raise an exception if invalid JSON numbers are
   376          encountered.
   377  
   378          *strict* controls the parser's behavior when it encounters an
   379          invalid control character in a string. The default setting of
   380          ``True`` means that unescaped control characters are parse errors, if
   381          ``False`` then control characters will be allowed in strings.
   382  
   383          """
   384          self.encoding = encoding
   385          self.object_hook = object_hook
   386          self.object_pairs_hook = object_pairs_hook
   387          self.parse_float = parse_float or float
   388          self.parse_int = parse_int or int
   389          self.parse_constant = parse_constant or _CONSTANTS.__getitem__
   390          self.strict = strict
   391          self.parse_object = JSONObject
   392          self.parse_array = JSONArray
   393          self.parse_string = scanstring
   394          self.memo = {}
   395          self.scan_once = make_scanner(self)
   396  
   397      def decode(self, s, _w=WHITESPACE.match):
   398          """Return the Python representation of ``s`` (a ``str`` or ``unicode``
   399          instance containing a JSON document)
   400  
   401          """
   402          obj, end = self.raw_decode(s, idx=_w(s, 0).end())
   403          end = _w(s, end).end()
   404          if end != len(s):
   405              raise JSONDecodeError("Extra data", s, end, len(s))
   406          return obj
   407  
   408      def raw_decode(self, s, idx=0):
   409          """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
   410          beginning with a JSON document) and return a 2-tuple of the Python
   411          representation and the index in ``s`` where the document ended.
   412  
   413          This can be used to decode a JSON document from a string that may
   414          have extraneous data at the end.
   415  
   416          """
   417          try:
   418              obj, end = self.scan_once(s, idx)
   419          except StopIteration:
   420              raise JSONDecodeError("No JSON object could be decoded", s, idx)
   421          return obj, end