1 """Implementation of JSONDecoder
3 from __future__ import absolute_import
7 from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr
8 from .scanner import make_scanner, JSONDecodeError
10 def _import_c_scanstring():
12 from ._speedups import scanstring
16 c_scanstring = _import_c_scanstring()
18 # NOTE (3.1.0): JSONDecodeError may still be imported from this module for
19 # compatibility, but it was never in the __all__
20 __all__ = ['JSONDecoder']
22 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
24 def _floatconstants():
25 _BYTES = fromhex('7FF80000000000007FF0000000000000')
26 # The struct module in Python 2.4 would get frexp() out of range here
27 # when an endian is specified in the format string. Fixed in Python 2.5+
28 if sys.byteorder != 'big':
29 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
30 nan, inf = struct.unpack('dd', _BYTES)
33 NaN, PosInf, NegInf = _floatconstants()
41 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
43 '"': u('"'), '\\': u('\u005c'), '/': u('/'),
44 'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'),
47 DEFAULT_ENCODING = "utf-8"
49 def py_scanstring(s, end, encoding=None, strict=True,
50 _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join,
51 _PY3=PY3, _maxunicode=sys.maxunicode):
52 """Scan the string s for a JSON string. End is the index of the
53 character in s after the quote that started the JSON string.
54 Unescapes all valid JSON string escape sequences and raises ValueError
55 on attempt to decode an invalid string. If strict is False then literal
56 control characters are allowed in the string.
58 Returns a tuple of the decoded string and the index of the character in s
59 after the end quote."""
61 encoding = DEFAULT_ENCODING
63 _append = chunks.append
68 raise JSONDecodeError(
69 "Unterminated string starting at", s, begin)
71 content, terminator = chunk.groups()
72 # Content is contains zero or more unescaped string characters
74 if not _PY3 and not isinstance(content, text_type):
75 content = text_type(content, encoding)
77 # Terminator is the end of string, a literal control character,
78 # or a backslash denoting that an escape sequence follows
81 elif terminator != '\\':
83 msg = "Invalid control character %r at"
84 raise JSONDecodeError(msg, s, end)
91 raise JSONDecodeError(
92 "Unterminated string starting at", s, begin)
93 # If not a unicode escape sequence, must be in the lookup table
98 msg = "Invalid \\X escape sequence %r"
99 raise JSONDecodeError(msg, s, end)
102 # Unicode escape sequence
103 msg = "Invalid \\uXXXX escape sequence"
104 esc = s[end + 1:end + 5]
106 if len(esc) != 4 or escX == 'x' or escX == 'X':
107 raise JSONDecodeError(msg, s, end - 1)
111 raise JSONDecodeError(msg, s, end - 1)
113 # Check for surrogate pair on UCS-4 systems
114 # Note that this will join high/low surrogate pairs
115 # but will also pass unpaired surrogates through
116 if (_maxunicode > 65535 and
117 uni & 0xfc00 == 0xd800 and
118 s[end:end + 2] == '\\u'):
119 esc2 = s[end + 2:end + 6]
121 if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
125 raise JSONDecodeError(msg, s, end)
126 if uni2 & 0xfc00 == 0xdc00:
127 uni = 0x10000 + (((uni - 0xd800) << 10) |
131 # Append the unescaped character
133 return _join(chunks), end
136 # Use speedup if available
137 scanstring = c_scanstring or py_scanstring
139 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
140 WHITESPACE_STR = ' \t\n\r'
142 def JSONObject(state, encoding, strict, scan_once, object_hook,
143 object_pairs_hook, memo=None,
144 _w=WHITESPACE.match, _ws=WHITESPACE_STR):
146 # Backwards compatibility
149 memo_get = memo.setdefault
151 # Use a slice to prevent IndexError from being raised, the following
152 # check will raise a more specific ValueError if the string is empty
153 nextchar = s[end:end + 1]
154 # Normally we expect nextchar == '"'
157 end = _w(s, end).end()
158 nextchar = s[end:end + 1]
159 # Trivial empty object
161 if object_pairs_hook is not None:
162 result = object_pairs_hook(pairs)
163 return result, end + 1
165 if object_hook is not None:
166 pairs = object_hook(pairs)
167 return pairs, end + 1
168 elif nextchar != '"':
169 raise JSONDecodeError(
170 "Expecting property name enclosed in double quotes",
174 key, end = scanstring(s, end, encoding, strict)
175 key = memo_get(key, key)
177 # To skip some function call overhead we optimize the fast paths where
178 # the JSON key separator is ": " or just ":".
179 if s[end:end + 1] != ':':
180 end = _w(s, end).end()
181 if s[end:end + 1] != ':':
182 raise JSONDecodeError("Expecting ':' delimiter", s, end)
190 end = _w(s, end + 1).end()
194 value, end = scan_once(s, end)
195 pairs.append((key, value))
200 end = _w(s, end + 1).end()
208 elif nextchar != ',':
209 raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1)
217 end = _w(s, end + 1).end()
224 raise JSONDecodeError(
225 "Expecting property name enclosed in double quotes",
228 if object_pairs_hook is not None:
229 result = object_pairs_hook(pairs)
232 if object_hook is not None:
233 pairs = object_hook(pairs)
236 def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
239 nextchar = s[end:end + 1]
241 end = _w(s, end + 1).end()
242 nextchar = s[end:end + 1]
243 # Look-ahead for trivial empty array
245 return values, end + 1
247 raise JSONDecodeError("Expecting value or ']'", s, end)
248 _append = values.append
250 value, end = scan_once(s, end)
252 nextchar = s[end:end + 1]
254 end = _w(s, end + 1).end()
255 nextchar = s[end:end + 1]
259 elif nextchar != ',':
260 raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1)
266 end = _w(s, end + 1).end()
272 class JSONDecoder(object):
273 """Simple JSON <http://json.org> decoder
275 Performs the following translations in decoding by default:
277 +---------------+-------------------+
279 +===============+===================+
281 +---------------+-------------------+
283 +---------------+-------------------+
284 | string | str, unicode |
285 +---------------+-------------------+
286 | number (int) | int, long |
287 +---------------+-------------------+
288 | number (real) | float |
289 +---------------+-------------------+
291 +---------------+-------------------+
293 +---------------+-------------------+
295 +---------------+-------------------+
297 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
298 their corresponding ``float`` values, which is outside the JSON spec.
302 def __init__(self, encoding=None, object_hook=None, parse_float=None,
303 parse_int=None, parse_constant=None, strict=True,
304 object_pairs_hook=None):
306 *encoding* determines the encoding used to interpret any
307 :class:`str` objects decoded by this instance (``'utf-8'`` by
308 default). It has no effect when decoding :class:`unicode` objects.
310 Note that currently only encodings that are a superset of ASCII work,
311 strings of other encodings should be passed in as :class:`unicode`.
313 *object_hook*, if specified, will be called with the result of every
314 JSON object decoded and its return value will be used in place of the
315 given :class:`dict`. This can be used to provide custom
316 deserializations (e.g. to support JSON-RPC class hinting).
318 *object_pairs_hook* is an optional function that will be called with
319 the result of any object literal decode with an ordered list of pairs.
320 The return value of *object_pairs_hook* will be used instead of the
321 :class:`dict`. This feature can be used to implement custom decoders
322 that rely on the order that the key and value pairs are decoded (for
323 example, :func:`collections.OrderedDict` will remember the order of
324 insertion). If *object_hook* is also defined, the *object_pairs_hook*
327 *parse_float*, if specified, will be called with the string of every
328 JSON float to be decoded. By default, this is equivalent to
329 ``float(num_str)``. This can be used to use another datatype or parser
330 for JSON floats (e.g. :class:`decimal.Decimal`).
332 *parse_int*, if specified, will be called with the string of every
333 JSON int to be decoded. By default, this is equivalent to
334 ``int(num_str)``. This can be used to use another datatype or parser
335 for JSON integers (e.g. :class:`float`).
337 *parse_constant*, if specified, will be called with one of the
338 following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
339 can be used to raise an exception if invalid JSON numbers are
342 *strict* controls the parser's behavior when it encounters an
343 invalid control character in a string. The default setting of
344 ``True`` means that unescaped control characters are parse errors, if
345 ``False`` then control characters will be allowed in strings.
349 encoding = DEFAULT_ENCODING
350 self.encoding = encoding
351 self.object_hook = object_hook
352 self.object_pairs_hook = object_pairs_hook
353 self.parse_float = parse_float or float
354 self.parse_int = parse_int or int
355 self.parse_constant = parse_constant or _CONSTANTS.__getitem__
357 self.parse_object = JSONObject
358 self.parse_array = JSONArray
359 self.parse_string = scanstring
361 self.scan_once = make_scanner(self)
363 def decode(self, s, _w=WHITESPACE.match, _PY3=PY3):
364 """Return the Python representation of ``s`` (a ``str`` or ``unicode``
365 instance containing a JSON document)
368 if _PY3 and isinstance(s, binary_type):
369 s = s.decode(self.encoding)
370 obj, end = self.raw_decode(s)
371 end = _w(s, end).end()
373 raise JSONDecodeError("Extra data", s, end, len(s))
376 def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3):
377 """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
378 beginning with a JSON document) and return a 2-tuple of the Python
379 representation and the index in ``s`` where the document ended.
380 Optionally, ``idx`` can be used to specify an offset in ``s`` where
381 the JSON document begins.
383 This can be used to decode a JSON document from a string that may
384 have extraneous data at the end.
387 if _PY3 and not isinstance(s, text_type):
388 raise TypeError("Input string must be text, not bytes")
389 return self.scan_once(s, idx=_w(s, idx).end())