github.com/slspeek/camlistore_namedsearch@v0.0.0-20140519202248-ed6f70f7721a/lib/python/simplejson/encoder.py (about)

     1  """Implementation of JSONEncoder
     2  """
     3  import re
     4  from decimal import Decimal
     5  
     6  def _import_speedups():
     7      try:
     8          from simplejson import _speedups
     9          return _speedups.encode_basestring_ascii, _speedups.make_encoder
    10      except ImportError:
    11          return None, None
    12  c_encode_basestring_ascii, c_make_encoder = _import_speedups()
    13  
    14  from simplejson.decoder import PosInf
    15  
    16  ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
    17  ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
    18  HAS_UTF8 = re.compile(r'[\x80-\xff]')
    19  ESCAPE_DCT = {
    20      '\\': '\\\\',
    21      '"': '\\"',
    22      '\b': '\\b',
    23      '\f': '\\f',
    24      '\n': '\\n',
    25      '\r': '\\r',
    26      '\t': '\\t',
    27  }
    28  for i in range(0x20):
    29      #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
    30      ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
    31  
    32  FLOAT_REPR = repr
    33  
    34  def encode_basestring(s):
    35      """Return a JSON representation of a Python string
    36  
    37      """
    38      if isinstance(s, str) and HAS_UTF8.search(s) is not None:
    39          s = s.decode('utf-8')
    40      def replace(match):
    41          return ESCAPE_DCT[match.group(0)]
    42      return u'"' + ESCAPE.sub(replace, s) + u'"'
    43  
    44  
    45  def py_encode_basestring_ascii(s):
    46      """Return an ASCII-only JSON representation of a Python string
    47  
    48      """
    49      if isinstance(s, str) and HAS_UTF8.search(s) is not None:
    50          s = s.decode('utf-8')
    51      def replace(match):
    52          s = match.group(0)
    53          try:
    54              return ESCAPE_DCT[s]
    55          except KeyError:
    56              n = ord(s)
    57              if n < 0x10000:
    58                  #return '\\u{0:04x}'.format(n)
    59                  return '\\u%04x' % (n,)
    60              else:
    61                  # surrogate pair
    62                  n -= 0x10000
    63                  s1 = 0xd800 | ((n >> 10) & 0x3ff)
    64                  s2 = 0xdc00 | (n & 0x3ff)
    65                  #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
    66                  return '\\u%04x\\u%04x' % (s1, s2)
    67      return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
    68  
    69  
    70  encode_basestring_ascii = (
    71      c_encode_basestring_ascii or py_encode_basestring_ascii)
    72  
    73  class JSONEncoder(object):
    74      """Extensible JSON <http://json.org> encoder for Python data structures.
    75  
    76      Supports the following objects and types by default:
    77  
    78      +-------------------+---------------+
    79      | Python            | JSON          |
    80      +===================+===============+
    81      | dict              | object        |
    82      +-------------------+---------------+
    83      | list, tuple       | array         |
    84      +-------------------+---------------+
    85      | str, unicode      | string        |
    86      +-------------------+---------------+
    87      | int, long, float  | number        |
    88      +-------------------+---------------+
    89      | True              | true          |
    90      +-------------------+---------------+
    91      | False             | false         |
    92      +-------------------+---------------+
    93      | None              | null          |
    94      +-------------------+---------------+
    95  
    96      To extend this to recognize other objects, subclass and implement a
    97      ``.default()`` method with another method that returns a serializable
    98      object for ``o`` if possible, otherwise it should call the superclass
    99      implementation (to raise ``TypeError``).
   100  
   101      """
   102      item_separator = ', '
   103      key_separator = ': '
   104      def __init__(self, skipkeys=False, ensure_ascii=True,
   105              check_circular=True, allow_nan=True, sort_keys=False,
   106              indent=None, separators=None, encoding='utf-8', default=None,
   107              use_decimal=False):
   108          """Constructor for JSONEncoder, with sensible defaults.
   109  
   110          If skipkeys is false, then it is a TypeError to attempt
   111          encoding of keys that are not str, int, long, float or None.  If
   112          skipkeys is True, such items are simply skipped.
   113  
   114          If ensure_ascii is true, the output is guaranteed to be str
   115          objects with all incoming unicode characters escaped.  If
   116          ensure_ascii is false, the output will be unicode object.
   117  
   118          If check_circular is true, then lists, dicts, and custom encoded
   119          objects will be checked for circular references during encoding to
   120          prevent an infinite recursion (which would cause an OverflowError).
   121          Otherwise, no such check takes place.
   122  
   123          If allow_nan is true, then NaN, Infinity, and -Infinity will be
   124          encoded as such.  This behavior is not JSON specification compliant,
   125          but is consistent with most JavaScript based encoders and decoders.
   126          Otherwise, it will be a ValueError to encode such floats.
   127  
   128          If sort_keys is true, then the output of dictionaries will be
   129          sorted by key; this is useful for regression tests to ensure
   130          that JSON serializations can be compared on a day-to-day basis.
   131  
   132          If indent is a string, then JSON array elements and object members
   133          will be pretty-printed with a newline followed by that string repeated
   134          for each level of nesting. ``None`` (the default) selects the most compact
   135          representation without any newlines. For backwards compatibility with
   136          versions of simplejson earlier than 2.1.0, an integer is also accepted
   137          and is converted to a string with that many spaces.
   138  
   139          If specified, separators should be a (item_separator, key_separator)
   140          tuple.  The default is (', ', ': ').  To get the most compact JSON
   141          representation you should specify (',', ':') to eliminate whitespace.
   142  
   143          If specified, default is a function that gets called for objects
   144          that can't otherwise be serialized.  It should return a JSON encodable
   145          version of the object or raise a ``TypeError``.
   146  
   147          If encoding is not None, then all input strings will be
   148          transformed into unicode using that encoding prior to JSON-encoding.
   149          The default is UTF-8.
   150          
   151          If use_decimal is true (not the default), ``decimal.Decimal`` will
   152          be supported directly by the encoder. For the inverse, decode JSON
   153          with ``parse_float=decimal.Decimal``.
   154  
   155          """
   156  
   157          self.skipkeys = skipkeys
   158          self.ensure_ascii = ensure_ascii
   159          self.check_circular = check_circular
   160          self.allow_nan = allow_nan
   161          self.sort_keys = sort_keys
   162          self.use_decimal = use_decimal
   163          if isinstance(indent, (int, long)):
   164              indent = ' ' * indent
   165          self.indent = indent
   166          if separators is not None:
   167              self.item_separator, self.key_separator = separators
   168          if default is not None:
   169              self.default = default
   170          self.encoding = encoding
   171  
   172      def default(self, o):
   173          """Implement this method in a subclass such that it returns
   174          a serializable object for ``o``, or calls the base implementation
   175          (to raise a ``TypeError``).
   176  
   177          For example, to support arbitrary iterators, you could
   178          implement default like this::
   179  
   180              def default(self, o):
   181                  try:
   182                      iterable = iter(o)
   183                  except TypeError:
   184                      pass
   185                  else:
   186                      return list(iterable)
   187                  return JSONEncoder.default(self, o)
   188  
   189          """
   190          raise TypeError(repr(o) + " is not JSON serializable")
   191  
   192      def encode(self, o):
   193          """Return a JSON string representation of a Python data structure.
   194  
   195          >>> from simplejson import JSONEncoder
   196          >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
   197          '{"foo": ["bar", "baz"]}'
   198  
   199          """
   200          # This is for extremely simple cases and benchmarks.
   201          if isinstance(o, basestring):
   202              if isinstance(o, str):
   203                  _encoding = self.encoding
   204                  if (_encoding is not None
   205                          and not (_encoding == 'utf-8')):
   206                      o = o.decode(_encoding)
   207              if self.ensure_ascii:
   208                  return encode_basestring_ascii(o)
   209              else:
   210                  return encode_basestring(o)
   211          # This doesn't pass the iterator directly to ''.join() because the
   212          # exceptions aren't as detailed.  The list call should be roughly
   213          # equivalent to the PySequence_Fast that ''.join() would do.
   214          chunks = self.iterencode(o, _one_shot=True)
   215          if not isinstance(chunks, (list, tuple)):
   216              chunks = list(chunks)
   217          if self.ensure_ascii:
   218              return ''.join(chunks)
   219          else:
   220              return u''.join(chunks)
   221  
   222      def iterencode(self, o, _one_shot=False):
   223          """Encode the given object and yield each string
   224          representation as available.
   225  
   226          For example::
   227  
   228              for chunk in JSONEncoder().iterencode(bigobject):
   229                  mysocket.write(chunk)
   230  
   231          """
   232          if self.check_circular:
   233              markers = {}
   234          else:
   235              markers = None
   236          if self.ensure_ascii:
   237              _encoder = encode_basestring_ascii
   238          else:
   239              _encoder = encode_basestring
   240          if self.encoding != 'utf-8':
   241              def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
   242                  if isinstance(o, str):
   243                      o = o.decode(_encoding)
   244                  return _orig_encoder(o)
   245  
   246          def floatstr(o, allow_nan=self.allow_nan,
   247                  _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
   248              # Check for specials. Note that this type of test is processor
   249              # and/or platform-specific, so do tests which don't depend on
   250              # the internals.
   251  
   252              if o != o:
   253                  text = 'NaN'
   254              elif o == _inf:
   255                  text = 'Infinity'
   256              elif o == _neginf:
   257                  text = '-Infinity'
   258              else:
   259                  return _repr(o)
   260  
   261              if not allow_nan:
   262                  raise ValueError(
   263                      "Out of range float values are not JSON compliant: " +
   264                      repr(o))
   265  
   266              return text
   267  
   268  
   269          key_memo = {}
   270          if (_one_shot and c_make_encoder is not None
   271                  and not self.indent and not self.sort_keys):
   272              _iterencode = c_make_encoder(
   273                  markers, self.default, _encoder, self.indent,
   274                  self.key_separator, self.item_separator, self.sort_keys,
   275                  self.skipkeys, self.allow_nan, key_memo, self.use_decimal)
   276          else:
   277              _iterencode = _make_iterencode(
   278                  markers, self.default, _encoder, self.indent, floatstr,
   279                  self.key_separator, self.item_separator, self.sort_keys,
   280                  self.skipkeys, _one_shot, self.use_decimal)
   281          try:
   282              return _iterencode(o, 0)
   283          finally:
   284              key_memo.clear()
   285  
   286  
   287  class JSONEncoderForHTML(JSONEncoder):
   288      """An encoder that produces JSON safe to embed in HTML.
   289  
   290      To embed JSON content in, say, a script tag on a web page, the
   291      characters &, < and > should be escaped. They cannot be escaped
   292      with the usual entities (e.g. &amp;) because they are not expanded
   293      within <script> tags.
   294      """
   295  
   296      def encode(self, o):
   297          # Override JSONEncoder.encode because it has hacks for
   298          # performance that make things more complicated.
   299          chunks = self.iterencode(o, True)
   300          if self.ensure_ascii:
   301              return ''.join(chunks)
   302          else:
   303              return u''.join(chunks)
   304  
   305      def iterencode(self, o, _one_shot=False):
   306          chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
   307          for chunk in chunks:
   308              chunk = chunk.replace('&', '\\u0026')
   309              chunk = chunk.replace('<', '\\u003c')
   310              chunk = chunk.replace('>', '\\u003e')
   311              yield chunk
   312  
   313  
   314  def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
   315          _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
   316          _use_decimal,
   317          ## HACK: hand-optimized bytecode; turn globals into locals
   318          False=False,
   319          True=True,
   320          ValueError=ValueError,
   321          basestring=basestring,
   322          Decimal=Decimal,
   323          dict=dict,
   324          float=float,
   325          id=id,
   326          int=int,
   327          isinstance=isinstance,
   328          list=list,
   329          long=long,
   330          str=str,
   331          tuple=tuple,
   332      ):
   333  
   334      def _iterencode_list(lst, _current_indent_level):
   335          if not lst:
   336              yield '[]'
   337              return
   338          if markers is not None:
   339              markerid = id(lst)
   340              if markerid in markers:
   341                  raise ValueError("Circular reference detected")
   342              markers[markerid] = lst
   343          buf = '['
   344          if _indent is not None:
   345              _current_indent_level += 1
   346              newline_indent = '\n' + (_indent * _current_indent_level)
   347              separator = _item_separator + newline_indent
   348              buf += newline_indent
   349          else:
   350              newline_indent = None
   351              separator = _item_separator
   352          first = True
   353          for value in lst:
   354              if first:
   355                  first = False
   356              else:
   357                  buf = separator
   358              if isinstance(value, basestring):
   359                  yield buf + _encoder(value)
   360              elif value is None:
   361                  yield buf + 'null'
   362              elif value is True:
   363                  yield buf + 'true'
   364              elif value is False:
   365                  yield buf + 'false'
   366              elif isinstance(value, (int, long)):
   367                  yield buf + str(value)
   368              elif isinstance(value, float):
   369                  yield buf + _floatstr(value)
   370              elif _use_decimal and isinstance(value, Decimal):
   371                  yield buf + str(value)
   372              else:
   373                  yield buf
   374                  if isinstance(value, (list, tuple)):
   375                      chunks = _iterencode_list(value, _current_indent_level)
   376                  elif isinstance(value, dict):
   377                      chunks = _iterencode_dict(value, _current_indent_level)
   378                  else:
   379                      chunks = _iterencode(value, _current_indent_level)
   380                  for chunk in chunks:
   381                      yield chunk
   382          if newline_indent is not None:
   383              _current_indent_level -= 1
   384              yield '\n' + (_indent * _current_indent_level)
   385          yield ']'
   386          if markers is not None:
   387              del markers[markerid]
   388  
   389      def _iterencode_dict(dct, _current_indent_level):
   390          if not dct:
   391              yield '{}'
   392              return
   393          if markers is not None:
   394              markerid = id(dct)
   395              if markerid in markers:
   396                  raise ValueError("Circular reference detected")
   397              markers[markerid] = dct
   398          yield '{'
   399          if _indent is not None:
   400              _current_indent_level += 1
   401              newline_indent = '\n' + (_indent * _current_indent_level)
   402              item_separator = _item_separator + newline_indent
   403              yield newline_indent
   404          else:
   405              newline_indent = None
   406              item_separator = _item_separator
   407          first = True
   408          if _sort_keys:
   409              items = dct.items()
   410              items.sort(key=lambda kv: kv[0])
   411          else:
   412              items = dct.iteritems()
   413          for key, value in items:
   414              if isinstance(key, basestring):
   415                  pass
   416              # JavaScript is weakly typed for these, so it makes sense to
   417              # also allow them.  Many encoders seem to do something like this.
   418              elif isinstance(key, float):
   419                  key = _floatstr(key)
   420              elif key is True:
   421                  key = 'true'
   422              elif key is False:
   423                  key = 'false'
   424              elif key is None:
   425                  key = 'null'
   426              elif isinstance(key, (int, long)):
   427                  key = str(key)
   428              elif _skipkeys:
   429                  continue
   430              else:
   431                  raise TypeError("key " + repr(key) + " is not a string")
   432              if first:
   433                  first = False
   434              else:
   435                  yield item_separator
   436              yield _encoder(key)
   437              yield _key_separator
   438              if isinstance(value, basestring):
   439                  yield _encoder(value)
   440              elif value is None:
   441                  yield 'null'
   442              elif value is True:
   443                  yield 'true'
   444              elif value is False:
   445                  yield 'false'
   446              elif isinstance(value, (int, long)):
   447                  yield str(value)
   448              elif isinstance(value, float):
   449                  yield _floatstr(value)
   450              elif _use_decimal and isinstance(value, Decimal):
   451                  yield str(value)
   452              else:
   453                  if isinstance(value, (list, tuple)):
   454                      chunks = _iterencode_list(value, _current_indent_level)
   455                  elif isinstance(value, dict):
   456                      chunks = _iterencode_dict(value, _current_indent_level)
   457                  else:
   458                      chunks = _iterencode(value, _current_indent_level)
   459                  for chunk in chunks:
   460                      yield chunk
   461          if newline_indent is not None:
   462              _current_indent_level -= 1
   463              yield '\n' + (_indent * _current_indent_level)
   464          yield '}'
   465          if markers is not None:
   466              del markers[markerid]
   467  
   468      def _iterencode(o, _current_indent_level):
   469          if isinstance(o, basestring):
   470              yield _encoder(o)
   471          elif o is None:
   472              yield 'null'
   473          elif o is True:
   474              yield 'true'
   475          elif o is False:
   476              yield 'false'
   477          elif isinstance(o, (int, long)):
   478              yield str(o)
   479          elif isinstance(o, float):
   480              yield _floatstr(o)
   481          elif isinstance(o, (list, tuple)):
   482              for chunk in _iterencode_list(o, _current_indent_level):
   483                  yield chunk
   484          elif isinstance(o, dict):
   485              for chunk in _iterencode_dict(o, _current_indent_level):
   486                  yield chunk
   487          elif _use_decimal and isinstance(o, Decimal):
   488              yield str(o)
   489          else:
   490              if markers is not None:
   491                  markerid = id(o)
   492                  if markerid in markers:
   493                      raise ValueError("Circular reference detected")
   494                  markers[markerid] = o
   495              o = _default(o)
   496              for chunk in _iterencode(o, _current_indent_level):
   497                  yield chunk
   498              if markers is not None:
   499                  del markers[markerid]
   500  
   501      return _iterencode