1 """Implementation of JSONEncoder
3 from __future__ import absolute_import
5 from operator import itemgetter
6 from decimal import Decimal
7 from .compat import u, unichr, binary_type, string_types, integer_types, PY3
8 def _import_speedups():
10 from . import _speedups
11 return _speedups.encode_basestring_ascii, _speedups.make_encoder
14 c_encode_basestring_ascii, c_make_encoder = _import_speedups()
16 from simplejson.decoder import PosInf
18 #ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
19 # This is required because u() will mangle the string and ur'' isn't valid
21 ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]')
22 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
23 HAS_UTF8 = re.compile(r'[\x80-\xff]')
34 #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
35 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
36 for i in [0x2028, 0x2029]:
37 ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,))
41 def encode_basestring(s, _PY3=PY3, _q=u('"')):
42 """Return a JSON representation of a Python string
46 if isinstance(s, binary_type):
49 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
52 return ESCAPE_DCT[match.group(0)]
53 return _q + ESCAPE.sub(replace, s) + _q
56 def py_encode_basestring_ascii(s, _PY3=PY3):
57 """Return an ASCII-only JSON representation of a Python string
61 if isinstance(s, binary_type):
64 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
73 #return '\\u{0:04x}'.format(n)
74 return '\\u%04x' % (n,)
78 s1 = 0xd800 | ((n >> 10) & 0x3ff)
79 s2 = 0xdc00 | (n & 0x3ff)
80 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
81 return '\\u%04x\\u%04x' % (s1, s2)
82 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
85 encode_basestring_ascii = (
86 c_encode_basestring_ascii or py_encode_basestring_ascii)
88 class JSONEncoder(object):
89 """Extensible JSON <http://json.org> encoder for Python data structures.
91 Supports the following objects and types by default:
93 +-------------------+---------------+
95 +===================+===============+
96 | dict, namedtuple | object |
97 +-------------------+---------------+
98 | list, tuple | array |
99 +-------------------+---------------+
100 | str, unicode | string |
101 +-------------------+---------------+
102 | int, long, float | number |
103 +-------------------+---------------+
105 +-------------------+---------------+
107 +-------------------+---------------+
109 +-------------------+---------------+
111 To extend this to recognize other objects, subclass and implement a
112 ``.default()`` method with another method that returns a serializable
113 object for ``o`` if possible, otherwise it should call the superclass
114 implementation (to raise ``TypeError``).
117 item_separator = ', '
119 def __init__(self, skipkeys=False, ensure_ascii=True,
120 check_circular=True, allow_nan=True, sort_keys=False,
121 indent=None, separators=None, encoding='utf-8', default=None,
122 use_decimal=True, namedtuple_as_object=True,
123 tuple_as_array=True, bigint_as_string=False,
124 item_sort_key=None, for_json=False, ignore_nan=False):
125 """Constructor for JSONEncoder, with sensible defaults.
127 If skipkeys is false, then it is a TypeError to attempt
128 encoding of keys that are not str, int, long, float or None. If
129 skipkeys is True, such items are simply skipped.
131 If ensure_ascii is true, the output is guaranteed to be str
132 objects with all incoming unicode characters escaped. If
133 ensure_ascii is false, the output will be unicode object.
135 If check_circular is true, then lists, dicts, and custom encoded
136 objects will be checked for circular references during encoding to
137 prevent an infinite recursion (which would cause an OverflowError).
138 Otherwise, no such check takes place.
140 If allow_nan is true, then NaN, Infinity, and -Infinity will be
141 encoded as such. This behavior is not JSON specification compliant,
142 but is consistent with most JavaScript based encoders and decoders.
143 Otherwise, it will be a ValueError to encode such floats.
145 If sort_keys is true, then the output of dictionaries will be
146 sorted by key; this is useful for regression tests to ensure
147 that JSON serializations can be compared on a day-to-day basis.
149 If indent is a string, then JSON array elements and object members
150 will be pretty-printed with a newline followed by that string repeated
151 for each level of nesting. ``None`` (the default) selects the most compact
152 representation without any newlines. For backwards compatibility with
153 versions of simplejson earlier than 2.1.0, an integer is also accepted
154 and is converted to a string with that many spaces.
156 If specified, separators should be an (item_separator, key_separator)
157 tuple. The default is (', ', ': ') if *indent* is ``None`` and
158 (',', ': ') otherwise. To get the most compact JSON representation,
159 you should specify (',', ':') to eliminate whitespace.
161 If specified, default is a function that gets called for objects
162 that can't otherwise be serialized. It should return a JSON encodable
163 version of the object or raise a ``TypeError``.
165 If encoding is not None, then all input strings will be
166 transformed into unicode using that encoding prior to JSON-encoding.
167 The default is UTF-8.
169 If use_decimal is true (not the default), ``decimal.Decimal`` will
170 be supported directly by the encoder. For the inverse, decode JSON
171 with ``parse_float=decimal.Decimal``.
173 If namedtuple_as_object is true (the default), objects with
174 ``_asdict()`` methods will be encoded as JSON objects.
176 If tuple_as_array is true (the default), tuple (and subclasses) will
177 be encoded as JSON arrays.
179 If bigint_as_string is true (not the default), ints 2**53 and higher
180 or lower than -2**53 will be encoded as strings. This is to avoid the
181 rounding that happens in Javascript otherwise.
183 If specified, item_sort_key is a callable used to sort the items in
184 each dictionary. This is useful if you want to sort items other than
185 in alphabetical order by key.
187 If for_json is true (not the default), objects with a ``for_json()``
188 method will use the return value of that method for encoding as JSON
189 instead of the object.
191 If *ignore_nan* is true (default: ``False``), then out of range
192 :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized
193 as ``null`` in compliance with the ECMA-262 specification. If true,
194 this will override *allow_nan*.
198 self.skipkeys = skipkeys
199 self.ensure_ascii = ensure_ascii
200 self.check_circular = check_circular
201 self.allow_nan = allow_nan
202 self.sort_keys = sort_keys
203 self.use_decimal = use_decimal
204 self.namedtuple_as_object = namedtuple_as_object
205 self.tuple_as_array = tuple_as_array
206 self.bigint_as_string = bigint_as_string
207 self.item_sort_key = item_sort_key
208 self.for_json = for_json
209 self.ignore_nan = ignore_nan
210 if indent is not None and not isinstance(indent, string_types):
211 indent = indent * ' '
213 if separators is not None:
214 self.item_separator, self.key_separator = separators
215 elif indent is not None:
216 self.item_separator = ','
217 if default is not None:
218 self.default = default
219 self.encoding = encoding
221 def default(self, o):
222 """Implement this method in a subclass such that it returns
223 a serializable object for ``o``, or calls the base implementation
224 (to raise a ``TypeError``).
226 For example, to support arbitrary iterators, you could
227 implement default like this::
229 def default(self, o):
235 return list(iterable)
236 return JSONEncoder.default(self, o)
239 raise TypeError(repr(o) + " is not JSON serializable")
242 """Return a JSON string representation of a Python data structure.
244 >>> from simplejson import JSONEncoder
245 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
246 '{"foo": ["bar", "baz"]}'
249 # This is for extremely simple cases and benchmarks.
250 if isinstance(o, binary_type):
251 _encoding = self.encoding
252 if (_encoding is not None and not (_encoding == 'utf-8')):
253 o = o.decode(_encoding)
254 if isinstance(o, string_types):
255 if self.ensure_ascii:
256 return encode_basestring_ascii(o)
258 return encode_basestring(o)
259 # This doesn't pass the iterator directly to ''.join() because the
260 # exceptions aren't as detailed. The list call should be roughly
261 # equivalent to the PySequence_Fast that ''.join() would do.
262 chunks = self.iterencode(o, _one_shot=True)
263 if not isinstance(chunks, (list, tuple)):
264 chunks = list(chunks)
265 if self.ensure_ascii:
266 return ''.join(chunks)
268 return u''.join(chunks)
270 def iterencode(self, o, _one_shot=False):
271 """Encode the given object and yield each string
272 representation as available.
276 for chunk in JSONEncoder().iterencode(bigobject):
277 mysocket.write(chunk)
280 if self.check_circular:
284 if self.ensure_ascii:
285 _encoder = encode_basestring_ascii
287 _encoder = encode_basestring
288 if self.encoding != 'utf-8':
289 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
290 if isinstance(o, binary_type):
291 o = o.decode(_encoding)
292 return _orig_encoder(o)
294 def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan,
295 _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
296 # Check for specials. Note that this type of test is processor
297 # and/or platform-specific, so do tests which don't depend on
313 "Out of range float values are not JSON compliant: " +
320 if (_one_shot and c_make_encoder is not None
321 and self.indent is None):
322 _iterencode = c_make_encoder(
323 markers, self.default, _encoder, self.indent,
324 self.key_separator, self.item_separator, self.sort_keys,
325 self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
326 self.namedtuple_as_object, self.tuple_as_array,
327 self.bigint_as_string, self.item_sort_key,
328 self.encoding, self.for_json, self.ignore_nan,
331 _iterencode = _make_iterencode(
332 markers, self.default, _encoder, self.indent, floatstr,
333 self.key_separator, self.item_separator, self.sort_keys,
334 self.skipkeys, _one_shot, self.use_decimal,
335 self.namedtuple_as_object, self.tuple_as_array,
336 self.bigint_as_string, self.item_sort_key,
337 self.encoding, self.for_json,
340 return _iterencode(o, 0)
345 class JSONEncoderForHTML(JSONEncoder):
346 """An encoder that produces JSON safe to embed in HTML.
348 To embed JSON content in, say, a script tag on a web page, the
349 characters &, < and > should be escaped. They cannot be escaped
350 with the usual entities (e.g. &) because they are not expanded
351 within <script> tags.
355 # Override JSONEncoder.encode because it has hacks for
356 # performance that make things more complicated.
357 chunks = self.iterencode(o, True)
358 if self.ensure_ascii:
359 return ''.join(chunks)
361 return u''.join(chunks)
363 def iterencode(self, o, _one_shot=False):
364 chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
366 chunk = chunk.replace('&', '\\u0026')
367 chunk = chunk.replace('<', '\\u003c')
368 chunk = chunk.replace('>', '\\u003e')
372 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
373 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
374 _use_decimal, _namedtuple_as_object, _tuple_as_array,
375 _bigint_as_string, _item_sort_key, _encoding, _for_json,
376 ## HACK: hand-optimized bytecode; turn globals into locals
378 ValueError=ValueError,
379 string_types=string_types,
384 integer_types=integer_types,
385 isinstance=isinstance,
390 if _item_sort_key and not callable(_item_sort_key):
391 raise TypeError("item_sort_key must be None or callable")
392 elif _sort_keys and not _item_sort_key:
393 _item_sort_key = itemgetter(0)
395 def _iterencode_list(lst, _current_indent_level):
399 if markers is not None:
401 if markerid in markers:
402 raise ValueError("Circular reference detected")
403 markers[markerid] = lst
405 if _indent is not None:
406 _current_indent_level += 1
407 newline_indent = '\n' + (_indent * _current_indent_level)
408 separator = _item_separator + newline_indent
409 buf += newline_indent
411 newline_indent = None
412 separator = _item_separator
419 if (isinstance(value, string_types) or
420 (_PY3 and isinstance(value, binary_type))):
421 yield buf + _encoder(value)
428 elif isinstance(value, integer_types):
429 yield ((buf + str(value))
430 if (not _bigint_as_string or
431 (-1 << 53) < value < (1 << 53))
432 else (buf + '"' + str(value) + '"'))
433 elif isinstance(value, float):
434 yield buf + _floatstr(value)
435 elif _use_decimal and isinstance(value, Decimal):
436 yield buf + str(value)
439 for_json = _for_json and getattr(value, 'for_json', None)
440 if for_json and callable(for_json):
441 chunks = _iterencode(for_json(), _current_indent_level)
442 elif isinstance(value, list):
443 chunks = _iterencode_list(value, _current_indent_level)
445 _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
446 if _asdict and callable(_asdict):
447 chunks = _iterencode_dict(_asdict(),
448 _current_indent_level)
449 elif _tuple_as_array and isinstance(value, tuple):
450 chunks = _iterencode_list(value, _current_indent_level)
451 elif isinstance(value, dict):
452 chunks = _iterencode_dict(value, _current_indent_level)
454 chunks = _iterencode(value, _current_indent_level)
457 if newline_indent is not None:
458 _current_indent_level -= 1
459 yield '\n' + (_indent * _current_indent_level)
461 if markers is not None:
462 del markers[markerid]
464 def _stringify_key(key):
465 if isinstance(key, string_types): # pragma: no cover
467 elif isinstance(key, binary_type):
468 key = key.decode(_encoding)
469 elif isinstance(key, float):
477 elif isinstance(key, integer_types):
479 elif _use_decimal and isinstance(key, Decimal):
484 raise TypeError("key " + repr(key) + " is not a string")
487 def _iterencode_dict(dct, _current_indent_level):
491 if markers is not None:
493 if markerid in markers:
494 raise ValueError("Circular reference detected")
495 markers[markerid] = dct
497 if _indent is not None:
498 _current_indent_level += 1
499 newline_indent = '\n' + (_indent * _current_indent_level)
500 item_separator = _item_separator + newline_indent
503 newline_indent = None
504 item_separator = _item_separator
507 iteritems = dct.items()
509 iteritems = dct.iteritems()
512 for k, v in dct.items():
513 if not isinstance(k, string_types):
514 k = _stringify_key(k)
518 items.sort(key=_item_sort_key)
521 for key, value in items:
522 if not (_item_sort_key or isinstance(key, string_types)):
523 key = _stringify_key(key)
525 # _skipkeys must be True
533 if (isinstance(value, string_types) or
534 (_PY3 and isinstance(value, binary_type))):
535 yield _encoder(value)
542 elif isinstance(value, integer_types):
544 if (not _bigint_as_string or
545 (-1 << 53) < value < (1 << 53))
546 else ('"' + str(value) + '"'))
547 elif isinstance(value, float):
548 yield _floatstr(value)
549 elif _use_decimal and isinstance(value, Decimal):
552 for_json = _for_json and getattr(value, 'for_json', None)
553 if for_json and callable(for_json):
554 chunks = _iterencode(for_json(), _current_indent_level)
555 elif isinstance(value, list):
556 chunks = _iterencode_list(value, _current_indent_level)
558 _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
559 if _asdict and callable(_asdict):
560 chunks = _iterencode_dict(_asdict(),
561 _current_indent_level)
562 elif _tuple_as_array and isinstance(value, tuple):
563 chunks = _iterencode_list(value, _current_indent_level)
564 elif isinstance(value, dict):
565 chunks = _iterencode_dict(value, _current_indent_level)
567 chunks = _iterencode(value, _current_indent_level)
570 if newline_indent is not None:
571 _current_indent_level -= 1
572 yield '\n' + (_indent * _current_indent_level)
574 if markers is not None:
575 del markers[markerid]
577 def _iterencode(o, _current_indent_level):
578 if (isinstance(o, string_types) or
579 (_PY3 and isinstance(o, binary_type))):
587 elif isinstance(o, integer_types):
589 if (not _bigint_as_string or
590 (-1 << 53) < o < (1 << 53))
591 else ('"' + str(o) + '"'))
592 elif isinstance(o, float):
595 for_json = _for_json and getattr(o, 'for_json', None)
596 if for_json and callable(for_json):
597 for chunk in _iterencode(for_json(), _current_indent_level):
599 elif isinstance(o, list):
600 for chunk in _iterencode_list(o, _current_indent_level):
603 _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
604 if _asdict and callable(_asdict):
605 for chunk in _iterencode_dict(_asdict(),
606 _current_indent_level):
608 elif (_tuple_as_array and isinstance(o, tuple)):
609 for chunk in _iterencode_list(o, _current_indent_level):
611 elif isinstance(o, dict):
612 for chunk in _iterencode_dict(o, _current_indent_level):
614 elif _use_decimal and isinstance(o, Decimal):
617 if markers is not None:
619 if markerid in markers:
620 raise ValueError("Circular reference detected")
621 markers[markerid] = o
623 for chunk in _iterencode(o, _current_indent_level):
625 if markers is not None:
626 del markers[markerid]