1 /* -*- mode: C; c-file-style: "python"; c-basic-offset: 4 -*- */
3 #include "structmember.h"
5 #if PY_MAJOR_VERSION >= 3
6 #define PyInt_FromSsize_t PyLong_FromSsize_t
7 #define PyInt_AsSsize_t PyLong_AsSsize_t
8 #define PyString_Check PyBytes_Check
9 #define PyString_GET_SIZE PyBytes_GET_SIZE
10 #define PyString_AS_STRING PyBytes_AS_STRING
11 #define PyString_FromStringAndSize PyBytes_FromStringAndSize
12 #define PyInt_Check(obj) 0
13 #define JSON_UNICHR Py_UCS4
14 #define JSON_InternFromString PyUnicode_InternFromString
15 #define JSON_Intern_GET_SIZE PyUnicode_GET_SIZE
16 #define JSON_ASCII_Check PyUnicode_Check
17 #define JSON_ASCII_AS_STRING PyUnicode_AsUTF8
18 #define PyInt_Type PyLong_Type
19 #define PyInt_FromString PyLong_FromString
21 #define PY3_UNUSED UNUSED
22 #define JSON_NewEmptyUnicode() PyUnicode_New(0, 127)
23 #else /* PY_MAJOR_VERSION >= 3 */
24 #define PY2_UNUSED UNUSED
26 #define PyUnicode_READY(obj) 0
27 #define PyUnicode_KIND(obj) (sizeof(Py_UNICODE))
28 #define PyUnicode_DATA(obj) ((void *)(PyUnicode_AS_UNICODE(obj)))
29 #define PyUnicode_READ(kind, data, index) ((JSON_UNICHR)((const Py_UNICODE *)(data))[(index)])
30 #define PyUnicode_GetLength PyUnicode_GET_SIZE
31 #define JSON_UNICHR Py_UNICODE
32 #define JSON_ASCII_Check PyString_Check
33 #define JSON_ASCII_AS_STRING PyString_AS_STRING
34 #define JSON_InternFromString PyString_InternFromString
35 #define JSON_Intern_GET_SIZE PyString_GET_SIZE
36 #define JSON_NewEmptyUnicode() PyUnicode_FromUnicode(NULL, 0)
37 #endif /* PY_MAJOR_VERSION < 3 */
39 #if PY_VERSION_HEX < 0x02070000
40 #if !defined(PyOS_string_to_double)
41 #define PyOS_string_to_double json_PyOS_string_to_double
43 json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception);
45 json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception)
48 assert(endptr == NULL);
49 assert(overflow_exception == NULL);
50 PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;)
51 x = PyOS_ascii_atof(s);
56 #endif /* PY_VERSION_HEX < 0x02070000 */
58 #if PY_VERSION_HEX < 0x02060000
60 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
63 #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)
65 #if !defined(PyVarObject_HEAD_INIT)
66 #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
68 #endif /* PY_VERSION_HEX < 0x02060000 */
70 #if PY_VERSION_HEX < 0x02050000
71 #if !defined(PY_SSIZE_T_MIN)
72 typedef int Py_ssize_t;
73 #define PY_SSIZE_T_MAX INT_MAX
74 #define PY_SSIZE_T_MIN INT_MIN
75 #define PyInt_FromSsize_t PyInt_FromLong
76 #define PyInt_AsSsize_t PyInt_AsLong
78 #if !defined(Py_IS_FINITE)
79 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
81 #endif /* PY_VERSION_HEX < 0x02050000 */
84 #define UNUSED __attribute__((__unused__))
89 #define DEFAULT_ENCODING "utf-8"
91 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
92 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
93 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
94 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
96 #define JSON_ALLOW_NAN 1
97 #define JSON_IGNORE_NAN 2
99 static PyTypeObject PyScannerType;
100 static PyTypeObject PyEncoderType;
103 PyObject *large_strings; /* A list of previously accumulated large strings */
104 PyObject *small_strings; /* Pending small strings */
108 JSON_Accu_Init(JSON_Accu *acc);
110 JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode);
112 JSON_Accu_FinishAsList(JSON_Accu *acc);
114 JSON_Accu_Destroy(JSON_Accu *acc);
116 #define ERR_EXPECTING_VALUE "Expecting value"
117 #define ERR_ARRAY_DELIMITER "Expecting ',' delimiter or ']'"
118 #define ERR_ARRAY_VALUE_FIRST "Expecting value or ']'"
119 #define ERR_OBJECT_DELIMITER "Expecting ',' delimiter or '}'"
120 #define ERR_OBJECT_PROPERTY "Expecting property name enclosed in double quotes"
121 #define ERR_OBJECT_PROPERTY_FIRST "Expecting property name enclosed in double quotes or '}'"
122 #define ERR_OBJECT_PROPERTY_DELIMITER "Expecting ':' delimiter"
123 #define ERR_STRING_UNTERMINATED "Unterminated string starting at"
124 #define ERR_STRING_CONTROL "Invalid control character %r at"
125 #define ERR_STRING_ESC1 "Invalid \\X escape sequence %r"
126 #define ERR_STRING_ESC4 "Invalid \\uXXXX escape sequence"
128 typedef struct _PyScannerObject {
132 PyObject *object_hook;
133 PyObject *pairs_hook;
134 PyObject *parse_float;
136 PyObject *parse_constant;
140 static PyMemberDef scanner_members[] = {
141 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
142 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
143 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
144 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
145 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
146 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
147 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
151 typedef struct _PyEncoderObject {
157 PyObject *key_separator;
158 PyObject *item_separator;
163 PyObject *skipkeys_bool;
166 /* 0, JSON_ALLOW_NAN, JSON_IGNORE_NAN */
167 int allow_or_ignore_nan;
169 int namedtuple_as_object;
171 int bigint_as_string;
172 PyObject *item_sort_key;
173 PyObject *item_sort_kw;
177 static PyMemberDef encoder_members[] = {
178 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
179 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
180 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
181 {"encoding", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoding"},
182 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
183 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
184 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
185 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
186 /* Python 2.5 does not support T_BOOl */
187 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys_bool), READONLY, "skipkeys"},
188 {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"},
189 {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"},
194 join_list_unicode(PyObject *lst);
196 JSON_ParseEncoding(PyObject *encoding);
198 JSON_UnicodeFromChar(JSON_UNICHR c);
200 maybe_quote_bigint(PyObject *encoded, PyObject *obj);
202 ascii_char_size(JSON_UNICHR c);
204 ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars);
206 ascii_escape_unicode(PyObject *pystr);
208 ascii_escape_str(PyObject *pystr);
210 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
211 #if PY_MAJOR_VERSION < 3
213 join_list_string(PyObject *lst);
215 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
217 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr);
219 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
222 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr);
224 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
226 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
228 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
230 scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
232 scanner_dealloc(PyObject *self);
234 scanner_clear(PyObject *self);
236 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
238 encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
240 encoder_dealloc(PyObject *self);
242 encoder_clear(PyObject *self);
244 encoder_stringify_key(PyEncoderObject *s, PyObject *key);
246 encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level);
248 encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level);
250 encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level);
252 _encoded_const(PyObject *obj);
254 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
256 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
258 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
260 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
262 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
264 _is_namedtuple(PyObject *obj);
266 _has_for_json_hook(PyObject *obj);
270 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
271 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
273 #define MIN_EXPANSION 6
276 JSON_Accu_Init(JSON_Accu *acc)
278 /* Lazily allocated */
279 acc->large_strings = NULL;
280 acc->small_strings = PyList_New(0);
281 if (acc->small_strings == NULL)
287 flush_accumulator(JSON_Accu *acc)
289 Py_ssize_t nsmall = PyList_GET_SIZE(acc->small_strings);
293 if (acc->large_strings == NULL) {
294 acc->large_strings = PyList_New(0);
295 if (acc->large_strings == NULL)
298 #if PY_MAJOR_VERSION >= 3
299 joined = join_list_unicode(acc->small_strings);
300 #else /* PY_MAJOR_VERSION >= 3 */
301 joined = join_list_string(acc->small_strings);
302 #endif /* PY_MAJOR_VERSION < 3 */
305 if (PyList_SetSlice(acc->small_strings, 0, nsmall, NULL)) {
309 ret = PyList_Append(acc->large_strings, joined);
317 JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode)
320 #if PY_MAJOR_VERSION >= 3
321 assert(PyUnicode_Check(unicode));
322 #else /* PY_MAJOR_VERSION >= 3 */
323 assert(JSON_ASCII_Check(unicode) || PyUnicode_Check(unicode));
324 #endif /* PY_MAJOR_VERSION < 3 */
326 if (PyList_Append(acc->small_strings, unicode))
328 nsmall = PyList_GET_SIZE(acc->small_strings);
329 /* Each item in a list of unicode objects has an overhead (in 64-bit
331 * - 8 bytes for the list slot
332 * - 56 bytes for the header of the unicode object
333 * that is, 64 bytes. 100000 such objects waste more than 6MB
334 * compared to a single concatenated string.
338 return flush_accumulator(acc);
342 JSON_Accu_FinishAsList(JSON_Accu *acc)
347 ret = flush_accumulator(acc);
348 Py_CLEAR(acc->small_strings);
350 Py_CLEAR(acc->large_strings);
353 res = acc->large_strings;
354 acc->large_strings = NULL;
356 return PyList_New(0);
361 JSON_Accu_Destroy(JSON_Accu *acc)
363 Py_CLEAR(acc->small_strings);
364 Py_CLEAR(acc->large_strings);
368 IS_DIGIT(JSON_UNICHR c)
370 return c >= '0' && c <= '9';
374 JSON_UnicodeFromChar(JSON_UNICHR c)
376 #if PY_MAJOR_VERSION >= 3
377 PyObject *rval = PyUnicode_New(1, c);
379 PyUnicode_WRITE(PyUnicode_KIND(rval), PyUnicode_DATA(rval), 0, c);
381 #else /* PY_MAJOR_VERSION >= 3 */
382 return PyUnicode_FromUnicode(&c, 1);
383 #endif /* PY_MAJOR_VERSION < 3 */
387 maybe_quote_bigint(PyObject *encoded, PyObject *obj)
389 static PyObject *big_long = NULL;
390 static PyObject *small_long = NULL;
391 if (big_long == NULL) {
392 big_long = PyLong_FromLongLong(1LL << 53);
393 if (big_long == NULL) {
398 if (small_long == NULL) {
399 small_long = PyLong_FromLongLong(-1LL << 53);
400 if (small_long == NULL) {
405 if (PyObject_RichCompareBool(obj, big_long, Py_GE) ||
406 PyObject_RichCompareBool(obj, small_long, Py_LE)) {
407 #if PY_MAJOR_VERSION >= 3
408 PyObject* quoted = PyUnicode_FromFormat("\"%U\"", encoded);
410 PyObject* quoted = PyString_FromFormat("\"%s\"",
411 PyString_AsString(encoded));
420 _is_namedtuple(PyObject *obj)
423 PyObject *_asdict = PyObject_GetAttrString(obj, "_asdict");
424 if (_asdict == NULL) {
428 rval = PyCallable_Check(_asdict);
434 _has_for_json_hook(PyObject *obj)
437 PyObject *for_json = PyObject_GetAttrString(obj, "for_json");
438 if (for_json == NULL) {
442 rval = PyCallable_Check(for_json);
448 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
450 /* PyObject to Py_ssize_t converter */
451 *size_ptr = PyInt_AsSsize_t(o);
452 if (*size_ptr == -1 && PyErr_Occurred())
458 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
460 /* Py_ssize_t to PyObject converter */
461 return PyInt_FromSsize_t(*size_ptr);
465 ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars)
467 /* Escape unicode code point c to ASCII escape sequences
468 in char *output. output must have at least 12 bytes unused to
469 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
471 output[chars++] = (char)c;
474 output[chars++] = '\\';
476 case '\\': output[chars++] = (char)c; break;
477 case '"': output[chars++] = (char)c; break;
478 case '\b': output[chars++] = 'b'; break;
479 case '\f': output[chars++] = 'f'; break;
480 case '\n': output[chars++] = 'n'; break;
481 case '\r': output[chars++] = 'r'; break;
482 case '\t': output[chars++] = 't'; break;
484 #if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3
486 /* UTF-16 surrogate pair */
487 JSON_UNICHR v = c - 0x10000;
488 c = 0xd800 | ((v >> 10) & 0x3ff);
489 output[chars++] = 'u';
490 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
491 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
492 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
493 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
494 c = 0xdc00 | (v & 0x3ff);
495 output[chars++] = '\\';
498 output[chars++] = 'u';
499 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
500 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
501 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
502 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
509 ascii_char_size(JSON_UNICHR c)
514 else if (c == '\\' ||
523 #if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3
524 else if (c >= 0x10000U) {
525 return 2 * MIN_EXPANSION;
529 return MIN_EXPANSION;
534 ascii_escape_unicode(PyObject *pystr)
536 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
538 Py_ssize_t input_chars;
539 Py_ssize_t output_size;
546 if (PyUnicode_READY(pystr))
549 kind = PyUnicode_KIND(pystr);
550 data = PyUnicode_DATA(pystr);
551 input_chars = PyUnicode_GetLength(pystr);
553 for (i = 0; i < input_chars; i++) {
554 output_size += ascii_char_size(PyUnicode_READ(kind, data, i));
556 #if PY_MAJOR_VERSION >= 3
557 rval = PyUnicode_New(output_size, 127);
561 assert(PyUnicode_KIND(rval) == PyUnicode_1BYTE_KIND);
562 output = (char *)PyUnicode_DATA(rval);
564 rval = PyString_FromStringAndSize(NULL, output_size);
568 output = PyString_AS_STRING(rval);
571 output[chars++] = '"';
572 for (i = 0; i < input_chars; i++) {
573 chars = ascii_escape_char(PyUnicode_READ(kind, data, i), output, chars);
575 output[chars++] = '"';
576 assert(chars == output_size);
580 #if PY_MAJOR_VERSION >= 3
583 ascii_escape_str(PyObject *pystr)
586 PyObject *input = PyUnicode_DecodeUTF8(PyString_AS_STRING(pystr), PyString_GET_SIZE(pystr), NULL);
589 rval = ascii_escape_unicode(input);
594 #else /* PY_MAJOR_VERSION >= 3 */
597 ascii_escape_str(PyObject *pystr)
599 /* Take a PyString pystr and return a new ASCII-only escaped PyString */
601 Py_ssize_t input_chars;
602 Py_ssize_t output_size;
608 input_chars = PyString_GET_SIZE(pystr);
609 input_str = PyString_AS_STRING(pystr);
612 /* Fast path for a string that's already ASCII */
613 for (i = 0; i < input_chars; i++) {
614 JSON_UNICHR c = (JSON_UNICHR)input_str[i];
616 /* We hit a non-ASCII character, bail to unicode mode */
618 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
622 rval = ascii_escape_unicode(uni);
626 output_size += ascii_char_size(c);
629 rval = PyString_FromStringAndSize(NULL, output_size);
634 output = PyString_AS_STRING(rval);
635 output[chars++] = '"';
636 for (i = 0; i < input_chars; i++) {
637 chars = ascii_escape_char((JSON_UNICHR)input_str[i], output, chars);
639 output[chars++] = '"';
640 assert(chars == output_size);
643 #endif /* PY_MAJOR_VERSION < 3 */
646 encoder_stringify_key(PyEncoderObject *s, PyObject *key)
648 if (PyUnicode_Check(key)) {
652 else if (PyString_Check(key)) {
653 #if PY_MAJOR_VERSION >= 3
654 return PyUnicode_Decode(
655 PyString_AS_STRING(key),
656 PyString_GET_SIZE(key),
657 JSON_ASCII_AS_STRING(s->encoding),
659 #else /* PY_MAJOR_VERSION >= 3 */
662 #endif /* PY_MAJOR_VERSION < 3 */
664 else if (PyFloat_Check(key)) {
665 return encoder_encode_float(s, key);
667 else if (key == Py_True || key == Py_False || key == Py_None) {
668 /* This must come before the PyInt_Check because
669 True and False are also 1 and 0.*/
670 return _encoded_const(key);
672 else if (PyInt_Check(key) || PyLong_Check(key)) {
673 return PyObject_Str(key);
675 else if (s->use_decimal && PyObject_TypeCheck(key, (PyTypeObject *)s->Decimal)) {
676 return PyObject_Str(key);
678 else if (s->skipkeys) {
682 PyErr_SetString(PyExc_TypeError, "keys must be a string");
687 encoder_dict_iteritems(PyEncoderObject *s, PyObject *dct)
690 PyObject *iter = NULL;
691 PyObject *lst = NULL;
692 PyObject *item = NULL;
693 PyObject *kstr = NULL;
694 static PyObject *sortfun = NULL;
695 static PyObject *sortargs = NULL;
697 if (sortargs == NULL) {
698 sortargs = PyTuple_New(0);
699 if (sortargs == NULL)
703 if (PyDict_CheckExact(dct))
704 items = PyDict_Items(dct);
706 items = PyMapping_Items(dct);
709 iter = PyObject_GetIter(items);
713 if (s->item_sort_kw == Py_None)
718 while ((item = PyIter_Next(iter))) {
719 PyObject *key, *value;
720 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
721 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
724 key = PyTuple_GET_ITEM(item, 0);
727 #if PY_MAJOR_VERSION < 3
728 else if (PyString_Check(key)) {
729 /* item can be added as-is */
731 #endif /* PY_MAJOR_VERSION < 3 */
732 else if (PyUnicode_Check(key)) {
733 /* item can be added as-is */
737 kstr = encoder_stringify_key(s, key);
740 else if (kstr == Py_None) {
745 value = PyTuple_GET_ITEM(item, 1);
748 tpl = PyTuple_Pack(2, kstr, value);
755 if (PyList_Append(lst, item))
760 if (PyErr_Occurred())
762 sortfun = PyObject_GetAttrString(lst, "sort");
765 if (!PyObject_Call(sortfun, sortargs, s->item_sort_kw))
768 iter = PyObject_GetIter(lst);
781 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
783 /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
784 static PyObject *JSONDecodeError = NULL;
786 if (JSONDecodeError == NULL) {
787 PyObject *scanner = PyImport_ImportModule("simplejson.scanner");
790 JSONDecodeError = PyObject_GetAttrString(scanner, "JSONDecodeError");
792 if (JSONDecodeError == NULL)
795 exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
797 PyErr_SetObject(JSONDecodeError, exc);
803 join_list_unicode(PyObject *lst)
805 /* return u''.join(lst) */
806 static PyObject *joinfn = NULL;
807 if (joinfn == NULL) {
808 PyObject *ustr = JSON_NewEmptyUnicode();
812 joinfn = PyObject_GetAttrString(ustr, "join");
817 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
820 #if PY_MAJOR_VERSION >= 3
821 #define join_list_string join_list_unicode
822 #else /* PY_MAJOR_VERSION >= 3 */
824 join_list_string(PyObject *lst)
826 /* return ''.join(lst) */
827 static PyObject *joinfn = NULL;
828 if (joinfn == NULL) {
829 PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
833 joinfn = PyObject_GetAttrString(ustr, "join");
838 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
840 #endif /* PY_MAJOR_VERSION < 3 */
843 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx)
845 /* return (rval, idx) tuple, stealing reference to rval */
849 steal a reference to rval, returns (rval, idx)
852 assert(PyErr_Occurred());
855 pyidx = PyInt_FromSsize_t(idx);
860 tpl = PyTuple_New(2);
866 PyTuple_SET_ITEM(tpl, 0, rval);
867 PyTuple_SET_ITEM(tpl, 1, pyidx);
871 #define APPEND_OLD_CHUNK \
872 if (chunk != NULL) { \
873 if (chunks == NULL) { \
874 chunks = PyList_New(0); \
875 if (chunks == NULL) { \
879 if (PyList_Append(chunks, chunk)) { \
885 #if PY_MAJOR_VERSION < 3
887 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
889 /* Read the JSON string from PyString pystr.
890 end is the index of the first character after the quote.
891 encoding is the encoding of pystr (must be an ASCII superset)
892 if strict is zero then literal control characters are allowed
893 *next_end_ptr is a return-by-reference index of the character
896 Return value is a new PyString (if ASCII-only) or PyUnicode
899 Py_ssize_t len = PyString_GET_SIZE(pystr);
900 Py_ssize_t begin = end - 1;
901 Py_ssize_t next = begin;
903 char *buf = PyString_AS_STRING(pystr);
904 PyObject *chunks = NULL;
905 PyObject *chunk = NULL;
906 PyObject *strchunk = NULL;
909 raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
912 else if (end < 0 || len < end) {
913 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
917 /* Find the end of the string or the next escape */
919 for (next = end; next < len; next++) {
920 c = (unsigned char)buf[next];
921 if (c == '"' || c == '\\') {
924 else if (strict && c <= 0x1f) {
925 raise_errmsg(ERR_STRING_CONTROL, pystr, next);
932 if (!(c == '"' || c == '\\')) {
933 raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
936 /* Pick up this chunk if it's not zero length */
939 #if PY_MAJOR_VERSION >= 3
941 chunk = PyUnicode_DecodeASCII(&buf[end], next - end, NULL);
944 chunk = PyUnicode_Decode(&buf[end], next - end, encoding, NULL);
949 #else /* PY_MAJOR_VERSION >= 3 */
950 strchunk = PyString_FromStringAndSize(&buf[end], next - end);
951 if (strchunk == NULL) {
955 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
964 #endif /* PY_MAJOR_VERSION < 3 */
972 raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
977 /* Non-unicode backslash escapes */
983 case 'b': c = '\b'; break;
984 case 'f': c = '\f'; break;
985 case 'n': c = '\n'; break;
986 case 'r': c = '\r'; break;
987 case 't': c = '\t'; break;
991 raise_errmsg(ERR_STRING_ESC1, pystr, end - 2);
1000 raise_errmsg(ERR_STRING_ESC4, pystr, next - 1);
1003 /* Decode 4 hex digits */
1004 for (; next < end; next++) {
1005 JSON_UNICHR digit = (JSON_UNICHR)buf[next];
1008 case '0': case '1': case '2': case '3': case '4':
1009 case '5': case '6': case '7': case '8': case '9':
1010 c |= (digit - '0'); break;
1011 case 'a': case 'b': case 'c': case 'd': case 'e':
1013 c |= (digit - 'a' + 10); break;
1014 case 'A': case 'B': case 'C': case 'D': case 'E':
1016 c |= (digit - 'A' + 10); break;
1018 raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
1022 #if (PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE))
1023 /* Surrogate pair */
1024 if ((c & 0xfc00) == 0xd800) {
1025 if (end + 6 < len && buf[next] == '\\' && buf[next+1] == 'u') {
1028 /* Decode 4 hex digits */
1029 for (next += 2; next < end; next++) {
1031 JSON_UNICHR digit = buf[next];
1033 case '0': case '1': case '2': case '3': case '4':
1034 case '5': case '6': case '7': case '8': case '9':
1035 c2 |= (digit - '0'); break;
1036 case 'a': case 'b': case 'c': case 'd': case 'e':
1038 c2 |= (digit - 'a' + 10); break;
1039 case 'A': case 'B': case 'C': case 'D': case 'E':
1041 c2 |= (digit - 'A' + 10); break;
1043 raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
1047 if ((c2 & 0xfc00) != 0xdc00) {
1048 /* not a low surrogate, rewind */
1053 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
1057 #endif /* PY_MAJOR_VERSION >= 3 || Py_UNICODE_WIDE */
1063 #if PY_MAJOR_VERSION >= 3
1064 chunk = JSON_UnicodeFromChar(c);
1065 if (chunk == NULL) {
1068 #else /* PY_MAJOR_VERSION >= 3 */
1070 chunk = JSON_UnicodeFromChar(c);
1071 if (chunk == NULL) {
1076 char c_char = Py_CHARMASK(c);
1077 chunk = PyString_FromStringAndSize(&c_char, 1);
1078 if (chunk == NULL) {
1085 if (chunks == NULL) {
1089 rval = JSON_NewEmptyUnicode();
1093 rval = join_list_string(chunks);
1100 *next_end_ptr = end;
1108 #endif /* PY_MAJOR_VERSION < 3 */
1111 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
1113 /* Read the JSON string from PyUnicode pystr.
1114 end is the index of the first character after the quote.
1115 if strict is zero then literal control characters are allowed
1116 *next_end_ptr is a return-by-reference index of the character
1119 Return value is a new PyUnicode
1122 Py_ssize_t begin = end - 1;
1123 Py_ssize_t next = begin;
1124 PY2_UNUSED int kind = PyUnicode_KIND(pystr);
1125 Py_ssize_t len = PyUnicode_GetLength(pystr);
1126 void *buf = PyUnicode_DATA(pystr);
1127 PyObject *chunks = NULL;
1128 PyObject *chunk = NULL;
1131 raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
1134 else if (end < 0 || len < end) {
1135 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
1139 /* Find the end of the string or the next escape */
1141 for (next = end; next < len; next++) {
1142 c = PyUnicode_READ(kind, buf, next);
1143 if (c == '"' || c == '\\') {
1146 else if (strict && c <= 0x1f) {
1147 raise_errmsg(ERR_STRING_CONTROL, pystr, next);
1151 if (!(c == '"' || c == '\\')) {
1152 raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
1155 /* Pick up this chunk if it's not zero length */
1158 #if PY_MAJOR_VERSION < 3
1159 chunk = PyUnicode_FromUnicode(&((const Py_UNICODE *)buf)[end], next - end);
1161 chunk = PyUnicode_Substring(pystr, end, next);
1163 if (chunk == NULL) {
1173 raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
1176 c = PyUnicode_READ(kind, buf, next);
1178 /* Non-unicode backslash escapes */
1184 case 'b': c = '\b'; break;
1185 case 'f': c = '\f'; break;
1186 case 'n': c = '\n'; break;
1187 case 'r': c = '\r'; break;
1188 case 't': c = '\t'; break;
1192 raise_errmsg(ERR_STRING_ESC1, pystr, end - 2);
1201 raise_errmsg(ERR_STRING_ESC4, pystr, next - 1);
1204 /* Decode 4 hex digits */
1205 for (; next < end; next++) {
1206 JSON_UNICHR digit = PyUnicode_READ(kind, buf, next);
1209 case '0': case '1': case '2': case '3': case '4':
1210 case '5': case '6': case '7': case '8': case '9':
1211 c |= (digit - '0'); break;
1212 case 'a': case 'b': case 'c': case 'd': case 'e':
1214 c |= (digit - 'a' + 10); break;
1215 case 'A': case 'B': case 'C': case 'D': case 'E':
1217 c |= (digit - 'A' + 10); break;
1219 raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
1223 #if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)
1224 /* Surrogate pair */
1225 if ((c & 0xfc00) == 0xd800) {
1227 if (end + 6 < len &&
1228 PyUnicode_READ(kind, buf, next) == '\\' &&
1229 PyUnicode_READ(kind, buf, next + 1) == 'u') {
1231 /* Decode 4 hex digits */
1232 for (next += 2; next < end; next++) {
1233 JSON_UNICHR digit = PyUnicode_READ(kind, buf, next);
1236 case '0': case '1': case '2': case '3': case '4':
1237 case '5': case '6': case '7': case '8': case '9':
1238 c2 |= (digit - '0'); break;
1239 case 'a': case 'b': case 'c': case 'd': case 'e':
1241 c2 |= (digit - 'a' + 10); break;
1242 case 'A': case 'B': case 'C': case 'D': case 'E':
1244 c2 |= (digit - 'A' + 10); break;
1246 raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
1250 if ((c2 & 0xfc00) != 0xdc00) {
1251 /* not a low surrogate, rewind */
1256 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
1263 chunk = JSON_UnicodeFromChar(c);
1264 if (chunk == NULL) {
1269 if (chunks == NULL) {
1273 rval = JSON_NewEmptyUnicode();
1277 rval = join_list_unicode(chunks);
1283 *next_end_ptr = end;
1292 PyDoc_STRVAR(pydoc_scanstring,
1293 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
1295 "Scan the string s for a JSON string. End is the index of the\n"
1296 "character in s after the quote that started the JSON string.\n"
1297 "Unescapes all valid JSON string escape sequences and raises ValueError\n"
1298 "on attempt to decode an invalid string. If strict is False then literal\n"
1299 "control characters are allowed in the string.\n"
1301 "Returns a tuple of the decoded string and the index of the character in s\n"
1302 "after the end quote."
1306 py_scanstring(PyObject* self UNUSED, PyObject *args)
1311 Py_ssize_t next_end = -1;
1312 char *encoding = NULL;
1314 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
1317 if (encoding == NULL) {
1318 encoding = DEFAULT_ENCODING;
1320 if (PyUnicode_Check(pystr)) {
1321 rval = scanstring_unicode(pystr, end, strict, &next_end);
1323 #if PY_MAJOR_VERSION < 3
1324 /* Using a bytes input is unsupported for scanning in Python 3.
1325 It is coerced to str in the decoder before it gets here. */
1326 else if (PyString_Check(pystr)) {
1327 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
1331 PyErr_Format(PyExc_TypeError,
1332 "first argument must be a string, not %.80s",
1333 Py_TYPE(pystr)->tp_name);
1336 return _build_rval_index_tuple(rval, next_end);
1339 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
1340 "encode_basestring_ascii(basestring) -> str\n"
1342 "Return an ASCII-only JSON representation of a Python string"
1346 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
1348 /* Return an ASCII-only JSON representation of a Python string */
1350 if (PyString_Check(pystr)) {
1351 return ascii_escape_str(pystr);
1353 else if (PyUnicode_Check(pystr)) {
1354 return ascii_escape_unicode(pystr);
1357 PyErr_Format(PyExc_TypeError,
1358 "first argument must be a string, not %.80s",
1359 Py_TYPE(pystr)->tp_name);
1365 scanner_dealloc(PyObject *self)
1367 /* Deallocate scanner object */
1368 scanner_clear(self);
1369 Py_TYPE(self)->tp_free(self);
1373 scanner_traverse(PyObject *self, visitproc visit, void *arg)
1376 assert(PyScanner_Check(self));
1377 s = (PyScannerObject *)self;
1378 Py_VISIT(s->encoding);
1379 Py_VISIT(s->strict);
1380 Py_VISIT(s->object_hook);
1381 Py_VISIT(s->pairs_hook);
1382 Py_VISIT(s->parse_float);
1383 Py_VISIT(s->parse_int);
1384 Py_VISIT(s->parse_constant);
1390 scanner_clear(PyObject *self)
1393 assert(PyScanner_Check(self));
1394 s = (PyScannerObject *)self;
1395 Py_CLEAR(s->encoding);
1396 Py_CLEAR(s->strict);
1397 Py_CLEAR(s->object_hook);
1398 Py_CLEAR(s->pairs_hook);
1399 Py_CLEAR(s->parse_float);
1400 Py_CLEAR(s->parse_int);
1401 Py_CLEAR(s->parse_constant);
1406 #if PY_MAJOR_VERSION < 3
1408 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1410 /* Read a JSON object from PyString pystr.
1411 idx is the index of the first character after the opening curly brace.
1412 *next_idx_ptr is a return-by-reference index to the first character after
1413 the closing curly brace.
1415 Returns a new PyObject (usually a dict, but object_hook or
1416 object_pairs_hook can change that)
1418 char *str = PyString_AS_STRING(pystr);
1419 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1420 PyObject *rval = NULL;
1421 PyObject *pairs = NULL;
1423 PyObject *key = NULL;
1424 PyObject *val = NULL;
1425 char *encoding = JSON_ASCII_AS_STRING(s->encoding);
1426 int strict = PyObject_IsTrue(s->strict);
1427 int has_pairs_hook = (s->pairs_hook != Py_None);
1429 Py_ssize_t next_idx;
1430 if (has_pairs_hook) {
1431 pairs = PyList_New(0);
1436 rval = PyDict_New();
1441 /* skip whitespace after { */
1442 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1444 /* only loop if the object is non-empty */
1445 if (idx <= end_idx && str[idx] != '}') {
1446 int trailing_delimiter = 0;
1447 while (idx <= end_idx) {
1449 trailing_delimiter = 0;
1452 if (str[idx] != '"') {
1453 raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
1456 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
1459 memokey = PyDict_GetItem(s->memo, key);
1460 if (memokey != NULL) {
1466 if (PyDict_SetItem(s->memo, key, key) < 0)
1471 /* skip whitespace between key and : delimiter, read :, skip whitespace */
1472 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1473 if (idx > end_idx || str[idx] != ':') {
1474 raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx);
1478 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1480 /* read any JSON data type */
1481 val = scan_once_str(s, pystr, idx, &next_idx);
1485 if (has_pairs_hook) {
1486 item = PyTuple_Pack(2, key, val);
1491 if (PyList_Append(pairs, item) == -1) {
1498 if (PyDict_SetItem(rval, key, val) < 0)
1505 /* skip whitespace before } or , */
1506 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1508 /* bail if the object is closed or we didn't get the , delimiter */
1510 if (idx > end_idx) break;
1511 if (str[idx] == '}') {
1514 else if (str[idx] != ',') {
1515 raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
1520 /* skip whitespace after , delimiter */
1521 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1522 trailing_delimiter = 1;
1524 if (trailing_delimiter) {
1525 raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
1529 /* verify that idx < end_idx, str[idx] should be '}' */
1530 if (idx > end_idx || str[idx] != '}') {
1532 raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
1534 raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx);
1539 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1540 if (s->pairs_hook != Py_None) {
1541 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1545 *next_idx_ptr = idx + 1;
1549 /* if object_hook is not None: rval = object_hook(rval) */
1550 if (s->object_hook != Py_None) {
1551 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1558 *next_idx_ptr = idx + 1;
1567 #endif /* PY_MAJOR_VERSION < 3 */
1570 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1572 /* Read a JSON object from PyUnicode pystr.
1573 idx is the index of the first character after the opening curly brace.
1574 *next_idx_ptr is a return-by-reference index to the first character after
1575 the closing curly brace.
1577 Returns a new PyObject (usually a dict, but object_hook can change that)
1579 void *str = PyUnicode_DATA(pystr);
1580 Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1;
1581 PY2_UNUSED int kind = PyUnicode_KIND(pystr);
1582 PyObject *rval = NULL;
1583 PyObject *pairs = NULL;
1585 PyObject *key = NULL;
1586 PyObject *val = NULL;
1587 int strict = PyObject_IsTrue(s->strict);
1588 int has_pairs_hook = (s->pairs_hook != Py_None);
1590 Py_ssize_t next_idx;
1592 if (has_pairs_hook) {
1593 pairs = PyList_New(0);
1598 rval = PyDict_New();
1603 /* skip whitespace after { */
1604 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1606 /* only loop if the object is non-empty */
1607 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') {
1608 int trailing_delimiter = 0;
1609 while (idx <= end_idx) {
1611 trailing_delimiter = 0;
1614 if (PyUnicode_READ(kind, str, idx) != '"') {
1615 raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
1618 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1621 memokey = PyDict_GetItem(s->memo, key);
1622 if (memokey != NULL) {
1628 if (PyDict_SetItem(s->memo, key, key) < 0)
1633 /* skip whitespace between key and : delimiter, read :, skip
1635 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1636 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
1637 raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx);
1641 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1643 /* read any JSON term */
1644 val = scan_once_unicode(s, pystr, idx, &next_idx);
1648 if (has_pairs_hook) {
1649 item = PyTuple_Pack(2, key, val);
1654 if (PyList_Append(pairs, item) == -1) {
1661 if (PyDict_SetItem(rval, key, val) < 0)
1668 /* skip whitespace before } or , */
1669 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1671 /* bail if the object is closed or we didn't get the ,
1674 if (idx > end_idx) break;
1675 if (PyUnicode_READ(kind, str, idx) == '}') {
1678 else if (PyUnicode_READ(kind, str, idx) != ',') {
1679 raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
1684 /* skip whitespace after , delimiter */
1685 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1686 trailing_delimiter = 1;
1688 if (trailing_delimiter) {
1689 raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
1694 /* verify that idx < end_idx, str[idx] should be '}' */
1695 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
1697 raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
1699 raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx);
1704 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1705 if (s->pairs_hook != Py_None) {
1706 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1710 *next_idx_ptr = idx + 1;
1714 /* if object_hook is not None: rval = object_hook(rval) */
1715 if (s->object_hook != Py_None) {
1716 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1723 *next_idx_ptr = idx + 1;
1733 #if PY_MAJOR_VERSION < 3
1735 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1737 /* Read a JSON array from PyString pystr.
1738 idx is the index of the first character after the opening brace.
1739 *next_idx_ptr is a return-by-reference index to the first character after
1742 Returns a new PyList
1744 char *str = PyString_AS_STRING(pystr);
1745 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1746 PyObject *val = NULL;
1747 PyObject *rval = PyList_New(0);
1748 Py_ssize_t next_idx;
1752 /* skip whitespace after [ */
1753 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1755 /* only loop if the array is non-empty */
1756 if (idx <= end_idx && str[idx] != ']') {
1757 int trailing_delimiter = 0;
1758 while (idx <= end_idx) {
1759 trailing_delimiter = 0;
1760 /* read any JSON term and de-tuplefy the (rval, idx) */
1761 val = scan_once_str(s, pystr, idx, &next_idx);
1766 if (PyList_Append(rval, val) == -1)
1772 /* skip whitespace between term and , */
1773 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1775 /* bail if the array is closed or we didn't get the , delimiter */
1776 if (idx > end_idx) break;
1777 if (str[idx] == ']') {
1780 else if (str[idx] != ',') {
1781 raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
1786 /* skip whitespace after , */
1787 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1788 trailing_delimiter = 1;
1790 if (trailing_delimiter) {
1791 raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
1796 /* verify that idx < end_idx, str[idx] should be ']' */
1797 if (idx > end_idx || str[idx] != ']') {
1798 if (PyList_GET_SIZE(rval)) {
1799 raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
1801 raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx);
1805 *next_idx_ptr = idx + 1;
1812 #endif /* PY_MAJOR_VERSION < 3 */
1815 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1817 /* Read a JSON array from PyString pystr.
1818 idx is the index of the first character after the opening brace.
1819 *next_idx_ptr is a return-by-reference index to the first character after
1822 Returns a new PyList
1824 PY2_UNUSED int kind = PyUnicode_KIND(pystr);
1825 void *str = PyUnicode_DATA(pystr);
1826 Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1;
1827 PyObject *val = NULL;
1828 PyObject *rval = PyList_New(0);
1829 Py_ssize_t next_idx;
1833 /* skip whitespace after [ */
1834 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1836 /* only loop if the array is non-empty */
1837 if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') {
1838 int trailing_delimiter = 0;
1839 while (idx <= end_idx) {
1840 trailing_delimiter = 0;
1841 /* read any JSON term */
1842 val = scan_once_unicode(s, pystr, idx, &next_idx);
1847 if (PyList_Append(rval, val) == -1)
1853 /* skip whitespace between term and , */
1854 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1856 /* bail if the array is closed or we didn't get the , delimiter */
1857 if (idx > end_idx) break;
1858 if (PyUnicode_READ(kind, str, idx) == ']') {
1861 else if (PyUnicode_READ(kind, str, idx) != ',') {
1862 raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
1867 /* skip whitespace after , */
1868 while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
1869 trailing_delimiter = 1;
1871 if (trailing_delimiter) {
1872 raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
1877 /* verify that idx < end_idx, str[idx] should be ']' */
1878 if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
1879 if (PyList_GET_SIZE(rval)) {
1880 raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
1882 raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx);
1886 *next_idx_ptr = idx + 1;
1895 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1897 /* Read a JSON constant from PyString pystr.
1898 constant is the constant string that was found
1899 ("NaN", "Infinity", "-Infinity").
1900 idx is the index of the first character of the constant
1901 *next_idx_ptr is a return-by-reference index to the first character after
1904 Returns the result of parse_constant
1908 /* constant is "NaN", "Infinity", or "-Infinity" */
1909 cstr = JSON_InternFromString(constant);
1913 /* rval = parse_constant(constant) */
1914 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1915 idx += JSON_Intern_GET_SIZE(cstr);
1917 *next_idx_ptr = idx;
1921 #if PY_MAJOR_VERSION < 3
1923 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr)
1925 /* Read a JSON number from PyString pystr.
1926 idx is the index of the first character of the number
1927 *next_idx_ptr is a return-by-reference index to the first character after
1930 Returns a new PyObject representation of that number:
1931 PyInt, PyLong, or PyFloat.
1932 May return other types if parse_int or parse_float are set
1934 char *str = PyString_AS_STRING(pystr);
1935 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1936 Py_ssize_t idx = start;
1941 /* read a sign if it's there, make sure it's not the end of the string */
1942 if (str[idx] == '-') {
1943 if (idx >= end_idx) {
1944 raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
1950 /* read as many integer digits as we find as long as it doesn't start with 0 */
1951 if (str[idx] >= '1' && str[idx] <= '9') {
1953 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1955 /* if it starts with 0 we only expect one integer digit */
1956 else if (str[idx] == '0') {
1959 /* no integer digits, error */
1961 raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
1965 /* if the next char is '.' followed by a digit then read all float digits */
1966 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1969 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1972 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1973 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1975 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1976 Py_ssize_t e_start = idx;
1979 /* read an exponent sign if present */
1980 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1982 /* read all digits */
1983 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1985 /* if we got a digit, then parse as float. if not, backtrack */
1986 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1994 /* copy the section we determined to be a number */
1995 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1999 /* parse as a float using a fast path if available, otherwise call user defined method */
2000 if (s->parse_float != (PyObject *)&PyFloat_Type) {
2001 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
2004 /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */
2005 double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
2007 if (d == -1.0 && PyErr_Occurred())
2009 rval = PyFloat_FromDouble(d);
2013 /* parse as an int using a fast path if available, otherwise call user defined method */
2014 if (s->parse_int != (PyObject *)&PyInt_Type) {
2015 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
2018 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
2022 *next_idx_ptr = idx;
2025 #endif /* PY_MAJOR_VERSION < 3 */
2028 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr)
2030 /* Read a JSON number from PyUnicode pystr.
2031 idx is the index of the first character of the number
2032 *next_idx_ptr is a return-by-reference index to the first character after
2035 Returns a new PyObject representation of that number:
2036 PyInt, PyLong, or PyFloat.
2037 May return other types if parse_int or parse_float are set
2039 PY2_UNUSED int kind = PyUnicode_KIND(pystr);
2040 void *str = PyUnicode_DATA(pystr);
2041 Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1;
2042 Py_ssize_t idx = start;
2048 /* read a sign if it's there, make sure it's not the end of the string */
2049 if (PyUnicode_READ(kind, str, idx) == '-') {
2050 if (idx >= end_idx) {
2051 raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
2057 /* read as many integer digits as we find as long as it doesn't start with 0 */
2058 c = PyUnicode_READ(kind, str, idx);
2060 /* if it starts with 0 we only expect one integer digit */
2063 else if (IS_DIGIT(c)) {
2065 while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) {
2070 /* no integer digits, error */
2071 raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
2075 /* if the next char is '.' followed by a digit then read all float digits */
2076 if (idx < end_idx &&
2077 PyUnicode_READ(kind, str, idx) == '.' &&
2078 IS_DIGIT(PyUnicode_READ(kind, str, idx + 1))) {
2081 while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++;
2084 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
2085 if (idx < end_idx &&
2086 (PyUnicode_READ(kind, str, idx) == 'e' ||
2087 PyUnicode_READ(kind, str, idx) == 'E')) {
2088 Py_ssize_t e_start = idx;
2091 /* read an exponent sign if present */
2092 if (idx < end_idx &&
2093 (PyUnicode_READ(kind, str, idx) == '-' ||
2094 PyUnicode_READ(kind, str, idx) == '+')) idx++;
2096 /* read all digits */
2097 while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++;
2099 /* if we got a digit, then parse as float. if not, backtrack */
2100 if (IS_DIGIT(PyUnicode_READ(kind, str, idx - 1))) {
2108 /* copy the section we determined to be a number */
2109 #if PY_MAJOR_VERSION >= 3
2110 numstr = PyUnicode_Substring(pystr, start, idx);
2112 numstr = PyUnicode_FromUnicode(&((Py_UNICODE *)str)[start], idx - start);
2117 /* parse as a float using a fast path if available, otherwise call user defined method */
2118 if (s->parse_float != (PyObject *)&PyFloat_Type) {
2119 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
2122 #if PY_MAJOR_VERSION >= 3
2123 rval = PyFloat_FromString(numstr);
2125 rval = PyFloat_FromString(numstr, NULL);
2130 /* no fast path for unicode -> int, just call */
2131 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
2134 *next_idx_ptr = idx;
2138 #if PY_MAJOR_VERSION < 3
2140 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
2142 /* Read one JSON term (of any kind) from PyString pystr.
2143 idx is the index of the first character of the term
2144 *next_idx_ptr is a return-by-reference index to the first character after
2147 Returns a new PyObject representation of the term.
2149 char *str = PyString_AS_STRING(pystr);
2150 Py_ssize_t length = PyString_GET_SIZE(pystr);
2151 PyObject *rval = NULL;
2152 int fallthrough = 0;
2153 if (idx >= length) {
2154 raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
2160 rval = scanstring_str(pystr, idx + 1,
2161 JSON_ASCII_AS_STRING(s->encoding),
2162 PyObject_IsTrue(s->strict),
2167 if (Py_EnterRecursiveCall(" while decoding a JSON object "
2170 rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
2171 Py_LeaveRecursiveCall();
2175 if (Py_EnterRecursiveCall(" while decoding a JSON array "
2178 rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
2179 Py_LeaveRecursiveCall();
2183 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
2185 *next_idx_ptr = idx + 4;
2193 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
2195 *next_idx_ptr = idx + 4;
2203 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
2204 Py_INCREF(Py_False);
2205 *next_idx_ptr = idx + 5;
2213 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
2214 rval = _parse_constant(s, "NaN", idx, next_idx_ptr);
2221 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
2222 rval = _parse_constant(s, "Infinity", idx, next_idx_ptr);
2229 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
2230 rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr);
2238 /* Didn't find a string, object, array, or named constant. Look for a number. */
2240 rval = _match_number_str(s, pystr, idx, next_idx_ptr);
2243 #endif /* PY_MAJOR_VERSION < 3 */
2247 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
2249 /* Read one JSON term (of any kind) from PyUnicode pystr.
2250 idx is the index of the first character of the term
2251 *next_idx_ptr is a return-by-reference index to the first character after
2254 Returns a new PyObject representation of the term.
2256 PY2_UNUSED int kind = PyUnicode_KIND(pystr);
2257 void *str = PyUnicode_DATA(pystr);
2258 Py_ssize_t length = PyUnicode_GetLength(pystr);
2259 PyObject *rval = NULL;
2260 int fallthrough = 0;
2261 if (idx >= length) {
2262 raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
2265 switch (PyUnicode_READ(kind, str, idx)) {
2268 rval = scanstring_unicode(pystr, idx + 1,
2269 PyObject_IsTrue(s->strict),
2274 if (Py_EnterRecursiveCall(" while decoding a JSON object "
2275 "from a unicode string"))
2277 rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
2278 Py_LeaveRecursiveCall();
2282 if (Py_EnterRecursiveCall(" while decoding a JSON array "
2283 "from a unicode string"))
2285 rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
2286 Py_LeaveRecursiveCall();
2290 if ((idx + 3 < length) &&
2291 PyUnicode_READ(kind, str, idx + 1) == 'u' &&
2292 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
2293 PyUnicode_READ(kind, str, idx + 3) == 'l') {
2295 *next_idx_ptr = idx + 4;
2303 if ((idx + 3 < length) &&
2304 PyUnicode_READ(kind, str, idx + 1) == 'r' &&
2305 PyUnicode_READ(kind, str, idx + 2) == 'u' &&
2306 PyUnicode_READ(kind, str, idx + 3) == 'e') {
2308 *next_idx_ptr = idx + 4;
2316 if ((idx + 4 < length) &&
2317 PyUnicode_READ(kind, str, idx + 1) == 'a' &&
2318 PyUnicode_READ(kind, str, idx + 2) == 'l' &&
2319 PyUnicode_READ(kind, str, idx + 3) == 's' &&
2320 PyUnicode_READ(kind, str, idx + 4) == 'e') {
2321 Py_INCREF(Py_False);
2322 *next_idx_ptr = idx + 5;
2330 if ((idx + 2 < length) &&
2331 PyUnicode_READ(kind, str, idx + 1) == 'a' &&
2332 PyUnicode_READ(kind, str, idx + 2) == 'N') {
2333 rval = _parse_constant(s, "NaN", idx, next_idx_ptr);
2340 if ((idx + 7 < length) &&
2341 PyUnicode_READ(kind, str, idx + 1) == 'n' &&
2342 PyUnicode_READ(kind, str, idx + 2) == 'f' &&
2343 PyUnicode_READ(kind, str, idx + 3) == 'i' &&
2344 PyUnicode_READ(kind, str, idx + 4) == 'n' &&
2345 PyUnicode_READ(kind, str, idx + 5) == 'i' &&
2346 PyUnicode_READ(kind, str, idx + 6) == 't' &&
2347 PyUnicode_READ(kind, str, idx + 7) == 'y') {
2348 rval = _parse_constant(s, "Infinity", idx, next_idx_ptr);
2355 if ((idx + 8 < length) &&
2356 PyUnicode_READ(kind, str, idx + 1) == 'I' &&
2357 PyUnicode_READ(kind, str, idx + 2) == 'n' &&
2358 PyUnicode_READ(kind, str, idx + 3) == 'f' &&
2359 PyUnicode_READ(kind, str, idx + 4) == 'i' &&
2360 PyUnicode_READ(kind, str, idx + 5) == 'n' &&
2361 PyUnicode_READ(kind, str, idx + 6) == 'i' &&
2362 PyUnicode_READ(kind, str, idx + 7) == 't' &&
2363 PyUnicode_READ(kind, str, idx + 8) == 'y') {
2364 rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr);
2372 /* Didn't find a string, object, array, or named constant. Look for a number. */
2374 rval = _match_number_unicode(s, pystr, idx, next_idx_ptr);
2379 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
2381 /* Python callable interface to scan_once_{str,unicode} */
2385 Py_ssize_t next_idx = -1;
2386 static char *kwlist[] = {"string", "idx", NULL};
2388 assert(PyScanner_Check(self));
2389 s = (PyScannerObject *)self;
2390 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
2393 if (PyUnicode_Check(pystr)) {
2394 rval = scan_once_unicode(s, pystr, idx, &next_idx);
2396 #if PY_MAJOR_VERSION < 3
2397 else if (PyString_Check(pystr)) {
2398 rval = scan_once_str(s, pystr, idx, &next_idx);
2400 #endif /* PY_MAJOR_VERSION < 3 */
2402 PyErr_Format(PyExc_TypeError,
2403 "first argument must be a string, not %.80s",
2404 Py_TYPE(pystr)->tp_name);
2407 PyDict_Clear(s->memo);
2408 return _build_rval_index_tuple(rval, next_idx);
2412 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2415 s = (PyScannerObject *)type->tp_alloc(type, 0);
2419 s->object_hook = NULL;
2420 s->pairs_hook = NULL;
2421 s->parse_float = NULL;
2422 s->parse_int = NULL;
2423 s->parse_constant = NULL;
2425 return (PyObject *)s;
2429 JSON_ParseEncoding(PyObject *encoding)
2431 if (encoding == NULL)
2433 if (encoding == Py_None)
2434 return JSON_InternFromString(DEFAULT_ENCODING);
2435 #if PY_MAJOR_VERSION < 3
2436 if (PyUnicode_Check(encoding))
2437 return PyUnicode_AsEncodedString(encoding, NULL, NULL);
2439 if (JSON_ASCII_Check(encoding)) {
2440 Py_INCREF(encoding);
2443 PyErr_SetString(PyExc_TypeError, "encoding must be a string");
2448 scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
2450 /* Initialize Scanner object */
2452 static char *kwlist[] = {"context", NULL};
2456 assert(PyScanner_Check(self));
2457 s = (PyScannerObject *)self;
2459 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
2462 if (s->memo == NULL) {
2463 s->memo = PyDict_New();
2464 if (s->memo == NULL)
2468 /* JSON_ASCII_AS_STRING is used on encoding */
2469 encoding = PyObject_GetAttrString(ctx, "encoding");
2470 s->encoding = JSON_ParseEncoding(encoding);
2471 Py_XDECREF(encoding);
2472 if (s->encoding == NULL)
2475 /* All of these will fail "gracefully" so we don't need to verify them */
2476 s->strict = PyObject_GetAttrString(ctx, "strict");
2477 if (s->strict == NULL)
2479 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
2480 if (s->object_hook == NULL)
2482 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
2483 if (s->pairs_hook == NULL)
2485 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
2486 if (s->parse_float == NULL)
2488 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
2489 if (s->parse_int == NULL)
2491 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
2492 if (s->parse_constant == NULL)
2498 Py_CLEAR(s->encoding);
2499 Py_CLEAR(s->strict);
2500 Py_CLEAR(s->object_hook);
2501 Py_CLEAR(s->pairs_hook);
2502 Py_CLEAR(s->parse_float);
2503 Py_CLEAR(s->parse_int);
2504 Py_CLEAR(s->parse_constant);
2508 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
2511 PyTypeObject PyScannerType = {
2512 PyVarObject_HEAD_INIT(NULL, 0)
2513 "simplejson._speedups.Scanner", /* tp_name */
2514 sizeof(PyScannerObject), /* tp_basicsize */
2515 0, /* tp_itemsize */
2516 scanner_dealloc, /* tp_dealloc */
2522 0, /* tp_as_number */
2523 0, /* tp_as_sequence */
2524 0, /* tp_as_mapping */
2526 scanner_call, /* tp_call */
2528 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
2529 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
2530 0, /* tp_as_buffer */
2531 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2532 scanner_doc, /* tp_doc */
2533 scanner_traverse, /* tp_traverse */
2534 scanner_clear, /* tp_clear */
2535 0, /* tp_richcompare */
2536 0, /* tp_weaklistoffset */
2538 0, /* tp_iternext */
2540 scanner_members, /* tp_members */
2544 0, /* tp_descr_get */
2545 0, /* tp_descr_set */
2546 0, /* tp_dictoffset */
2547 scanner_init, /* tp_init */
2548 0,/* PyType_GenericAlloc, */ /* tp_alloc */
2549 scanner_new, /* tp_new */
2550 0,/* PyObject_GC_Del, */ /* tp_free */
2554 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2557 s = (PyEncoderObject *)type->tp_alloc(type, 0);
2560 s->defaultfn = NULL;
2564 s->key_separator = NULL;
2565 s->item_separator = NULL;
2567 s->sort_keys = NULL;
2568 s->item_sort_key = NULL;
2569 s->item_sort_kw = NULL;
2572 return (PyObject *)s;
2576 encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
2578 /* initialize Encoder object */
2579 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", "bigint_as_string", "item_sort_key", "encoding", "for_json", "ignore_nan", "Decimal", NULL};
2582 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
2583 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo;
2584 PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array;
2585 PyObject *bigint_as_string, *item_sort_key, *encoding, *for_json;
2586 PyObject *ignore_nan, *Decimal;
2588 assert(PyEncoder_Check(self));
2589 s = (PyEncoderObject *)self;
2591 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOOOO:make_encoder", kwlist,
2592 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
2593 &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal,
2594 &namedtuple_as_object, &tuple_as_array, &bigint_as_string,
2595 &item_sort_key, &encoding, &for_json, &ignore_nan, &Decimal))
2598 s->markers = markers;
2599 s->defaultfn = defaultfn;
2600 s->encoder = encoder;
2601 s->encoding = JSON_ParseEncoding(encoding);
2602 if (s->encoding == NULL)
2605 s->key_separator = key_separator;
2606 s->item_separator = item_separator;
2607 s->skipkeys_bool = skipkeys;
2608 s->skipkeys = PyObject_IsTrue(skipkeys);
2609 s->key_memo = key_memo;
2610 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
2611 s->allow_or_ignore_nan = (
2612 (PyObject_IsTrue(ignore_nan) ? JSON_IGNORE_NAN : 0) |
2613 (PyObject_IsTrue(allow_nan) ? JSON_ALLOW_NAN : 0));
2614 s->use_decimal = PyObject_IsTrue(use_decimal);
2615 s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object);
2616 s->tuple_as_array = PyObject_IsTrue(tuple_as_array);
2617 s->bigint_as_string = PyObject_IsTrue(bigint_as_string);
2618 if (item_sort_key != Py_None) {
2619 if (!PyCallable_Check(item_sort_key))
2620 PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable");
2622 else if (PyObject_IsTrue(sort_keys)) {
2623 static PyObject *itemgetter0 = NULL;
2625 PyObject *operator = PyImport_ImportModule("operator");
2628 itemgetter0 = PyObject_CallMethod(operator, "itemgetter", "i", 0);
2629 Py_DECREF(operator);
2631 item_sort_key = itemgetter0;
2635 if (item_sort_key == Py_None) {
2637 s->item_sort_kw = Py_None;
2640 s->item_sort_kw = PyDict_New();
2641 if (s->item_sort_kw == NULL)
2643 if (PyDict_SetItemString(s->item_sort_kw, "key", item_sort_key))
2646 s->sort_keys = sort_keys;
2647 s->item_sort_key = item_sort_key;
2648 s->Decimal = Decimal;
2649 s->for_json = PyObject_IsTrue(for_json);
2651 Py_INCREF(s->markers);
2652 Py_INCREF(s->defaultfn);
2653 Py_INCREF(s->encoder);
2654 Py_INCREF(s->indent);
2655 Py_INCREF(s->key_separator);
2656 Py_INCREF(s->item_separator);
2657 Py_INCREF(s->key_memo);
2658 Py_INCREF(s->skipkeys_bool);
2659 Py_INCREF(s->sort_keys);
2660 Py_INCREF(s->item_sort_key);
2661 Py_INCREF(s->Decimal);
2666 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
2668 /* Python callable interface to encode_listencode_obj */
2669 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
2671 Py_ssize_t indent_level;
2674 assert(PyEncoder_Check(self));
2675 s = (PyEncoderObject *)self;
2676 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
2677 &obj, _convertPyInt_AsSsize_t, &indent_level))
2679 if (JSON_Accu_Init(&rval))
2681 if (encoder_listencode_obj(s, &rval, obj, indent_level)) {
2682 JSON_Accu_Destroy(&rval);
2685 return JSON_Accu_FinishAsList(&rval);
2689 _encoded_const(PyObject *obj)
2691 /* Return the JSON string representation of None, True, False */
2692 if (obj == Py_None) {
2693 static PyObject *s_null = NULL;
2694 if (s_null == NULL) {
2695 s_null = JSON_InternFromString("null");
2700 else if (obj == Py_True) {
2701 static PyObject *s_true = NULL;
2702 if (s_true == NULL) {
2703 s_true = JSON_InternFromString("true");
2708 else if (obj == Py_False) {
2709 static PyObject *s_false = NULL;
2710 if (s_false == NULL) {
2711 s_false = JSON_InternFromString("false");
2717 PyErr_SetString(PyExc_ValueError, "not a const");
2723 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
2725 /* Return the JSON representation of a PyFloat */
2726 double i = PyFloat_AS_DOUBLE(obj);
2727 if (!Py_IS_FINITE(i)) {
2728 if (!s->allow_or_ignore_nan) {
2729 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
2732 if (s->allow_or_ignore_nan & JSON_IGNORE_NAN) {
2733 return _encoded_const(Py_None);
2735 /* JSON_ALLOW_NAN is set */
2737 static PyObject *sInfinity = NULL;
2738 if (sInfinity == NULL)
2739 sInfinity = JSON_InternFromString("Infinity");
2741 Py_INCREF(sInfinity);
2745 static PyObject *sNegInfinity = NULL;
2746 if (sNegInfinity == NULL)
2747 sNegInfinity = JSON_InternFromString("-Infinity");
2749 Py_INCREF(sNegInfinity);
2750 return sNegInfinity;
2753 static PyObject *sNaN = NULL;
2755 sNaN = JSON_InternFromString("NaN");
2761 /* Use a better float format here? */
2762 return PyObject_Repr(obj);
2766 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
2768 /* Return the JSON representation of a string */
2770 return py_encode_basestring_ascii(NULL, obj);
2772 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
2776 _steal_accumulate(JSON_Accu *accu, PyObject *stolen)
2778 /* Append stolen and then decrement its reference count */
2779 int rval = JSON_Accu_Accumulate(accu, stolen);
2785 encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level)
2787 /* Encode Python object obj to a JSON term, rval is a PyList */
2790 if (obj == Py_None || obj == Py_True || obj == Py_False) {
2791 PyObject *cstr = _encoded_const(obj);
2793 rv = _steal_accumulate(rval, cstr);
2795 else if (PyString_Check(obj) || PyUnicode_Check(obj))
2797 PyObject *encoded = encoder_encode_string(s, obj);
2798 if (encoded != NULL)
2799 rv = _steal_accumulate(rval, encoded);
2801 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
2802 PyObject *encoded = PyObject_Str(obj);
2803 if (encoded != NULL) {
2804 if (s->bigint_as_string) {
2805 encoded = maybe_quote_bigint(encoded, obj);
2806 if (encoded == NULL)
2809 rv = _steal_accumulate(rval, encoded);
2812 else if (PyFloat_Check(obj)) {
2813 PyObject *encoded = encoder_encode_float(s, obj);
2814 if (encoded != NULL)
2815 rv = _steal_accumulate(rval, encoded);
2817 else if (s->for_json && _has_for_json_hook(obj)) {
2819 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2821 newobj = PyObject_CallMethod(obj, "for_json", NULL);
2822 if (newobj != NULL) {
2823 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2826 Py_LeaveRecursiveCall();
2828 else if (s->namedtuple_as_object && _is_namedtuple(obj)) {
2830 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2832 newobj = PyObject_CallMethod(obj, "_asdict", NULL);
2833 if (newobj != NULL) {
2834 rv = encoder_listencode_dict(s, rval, newobj, indent_level);
2837 Py_LeaveRecursiveCall();
2839 else if (PyList_Check(obj) || (s->tuple_as_array && PyTuple_Check(obj))) {
2840 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2842 rv = encoder_listencode_list(s, rval, obj, indent_level);
2843 Py_LeaveRecursiveCall();
2845 else if (PyDict_Check(obj)) {
2846 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2848 rv = encoder_listencode_dict(s, rval, obj, indent_level);
2849 Py_LeaveRecursiveCall();
2851 else if (s->use_decimal && PyObject_TypeCheck(obj, (PyTypeObject *)s->Decimal)) {
2852 PyObject *encoded = PyObject_Str(obj);
2853 if (encoded != NULL)
2854 rv = _steal_accumulate(rval, encoded);
2857 PyObject *ident = NULL;
2859 if (s->markers != Py_None) {
2861 ident = PyLong_FromVoidPtr(obj);
2864 has_key = PyDict_Contains(s->markers, ident);
2867 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2871 if (PyDict_SetItem(s->markers, ident, obj)) {
2876 if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2878 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2879 if (newobj == NULL) {
2881 Py_LeaveRecursiveCall();
2884 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2885 Py_LeaveRecursiveCall();
2891 else if (ident != NULL) {
2892 if (PyDict_DelItem(s->markers, ident)) {
2904 encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level)
2906 /* Encode Python dict dct a JSON term */
2907 static PyObject *open_dict = NULL;
2908 static PyObject *close_dict = NULL;
2909 static PyObject *empty_dict = NULL;
2910 PyObject *kstr = NULL;
2911 PyObject *ident = NULL;
2912 PyObject *iter = NULL;
2913 PyObject *item = NULL;
2914 PyObject *items = NULL;
2915 PyObject *encoded = NULL;
2918 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2919 open_dict = JSON_InternFromString("{");
2920 close_dict = JSON_InternFromString("}");
2921 empty_dict = JSON_InternFromString("{}");
2922 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2925 if (PyDict_Size(dct) == 0)
2926 return JSON_Accu_Accumulate(rval, empty_dict);
2928 if (s->markers != Py_None) {
2930 ident = PyLong_FromVoidPtr(dct);
2933 has_key = PyDict_Contains(s->markers, ident);
2936 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2939 if (PyDict_SetItem(s->markers, ident, dct)) {
2944 if (JSON_Accu_Accumulate(rval, open_dict))
2947 if (s->indent != Py_None) {
2948 /* TODO: DOES NOT RUN */
2951 newline_indent = '\n' + (_indent * _current_indent_level)
2952 separator = _item_separator + newline_indent
2953 buf += newline_indent
2957 iter = encoder_dict_iteritems(s, dct);
2962 while ((item = PyIter_Next(iter))) {
2963 PyObject *encoded, *key, *value;
2964 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
2965 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
2968 key = PyTuple_GET_ITEM(item, 0);
2971 value = PyTuple_GET_ITEM(item, 1);
2975 encoded = PyDict_GetItem(s->key_memo, key);
2976 if (encoded != NULL) {
2979 kstr = encoder_stringify_key(s, key);
2982 else if (kstr == Py_None) {
2990 if (JSON_Accu_Accumulate(rval, s->item_separator))
2993 if (encoded == NULL) {
2994 encoded = encoder_encode_string(s, kstr);
2996 if (encoded == NULL)
2998 if (PyDict_SetItem(s->key_memo, key, encoded))
3001 if (JSON_Accu_Accumulate(rval, encoded)) {
3005 if (JSON_Accu_Accumulate(rval, s->key_separator))
3007 if (encoder_listencode_obj(s, rval, value, indent_level))
3013 if (PyErr_Occurred())
3015 if (ident != NULL) {
3016 if (PyDict_DelItem(s->markers, ident))
3020 if (s->indent != Py_None) {
3021 /* TODO: DOES NOT RUN */
3024 yield '\n' + (_indent * _current_indent_level)
3027 if (JSON_Accu_Accumulate(rval, close_dict))
3032 Py_XDECREF(encoded);
3042 encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level)
3044 /* Encode Python list seq to a JSON term */
3045 static PyObject *open_array = NULL;
3046 static PyObject *close_array = NULL;
3047 static PyObject *empty_array = NULL;
3048 PyObject *ident = NULL;
3049 PyObject *iter = NULL;
3050 PyObject *obj = NULL;
3054 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
3055 open_array = JSON_InternFromString("[");
3056 close_array = JSON_InternFromString("]");
3057 empty_array = JSON_InternFromString("[]");
3058 if (open_array == NULL || close_array == NULL || empty_array == NULL)
3062 is_true = PyObject_IsTrue(seq);
3065 else if (is_true == 0)
3066 return JSON_Accu_Accumulate(rval, empty_array);
3068 if (s->markers != Py_None) {
3070 ident = PyLong_FromVoidPtr(seq);
3073 has_key = PyDict_Contains(s->markers, ident);
3076 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
3079 if (PyDict_SetItem(s->markers, ident, seq)) {
3084 iter = PyObject_GetIter(seq);
3088 if (JSON_Accu_Accumulate(rval, open_array))
3090 if (s->indent != Py_None) {
3091 /* TODO: DOES NOT RUN */
3094 newline_indent = '\n' + (_indent * _current_indent_level)
3095 separator = _item_separator + newline_indent
3096 buf += newline_indent
3099 while ((obj = PyIter_Next(iter))) {
3101 if (JSON_Accu_Accumulate(rval, s->item_separator))
3104 if (encoder_listencode_obj(s, rval, obj, indent_level))
3110 if (PyErr_Occurred())
3112 if (ident != NULL) {
3113 if (PyDict_DelItem(s->markers, ident))
3117 if (s->indent != Py_None) {
3118 /* TODO: DOES NOT RUN */
3121 yield '\n' + (_indent * _current_indent_level)
3124 if (JSON_Accu_Accumulate(rval, close_array))
3136 encoder_dealloc(PyObject *self)
3138 /* Deallocate Encoder */
3139 encoder_clear(self);
3140 Py_TYPE(self)->tp_free(self);
3144 encoder_traverse(PyObject *self, visitproc visit, void *arg)
3147 assert(PyEncoder_Check(self));
3148 s = (PyEncoderObject *)self;
3149 Py_VISIT(s->markers);
3150 Py_VISIT(s->defaultfn);
3151 Py_VISIT(s->encoder);
3152 Py_VISIT(s->encoding);
3153 Py_VISIT(s->indent);
3154 Py_VISIT(s->key_separator);
3155 Py_VISIT(s->item_separator);
3156 Py_VISIT(s->key_memo);
3157 Py_VISIT(s->sort_keys);
3158 Py_VISIT(s->item_sort_kw);
3159 Py_VISIT(s->item_sort_key);
3160 Py_VISIT(s->Decimal);
3165 encoder_clear(PyObject *self)
3167 /* Deallocate Encoder */
3169 assert(PyEncoder_Check(self));
3170 s = (PyEncoderObject *)self;
3171 Py_CLEAR(s->markers);
3172 Py_CLEAR(s->defaultfn);
3173 Py_CLEAR(s->encoder);
3174 Py_CLEAR(s->encoding);
3175 Py_CLEAR(s->indent);
3176 Py_CLEAR(s->key_separator);
3177 Py_CLEAR(s->item_separator);
3178 Py_CLEAR(s->key_memo);
3179 Py_CLEAR(s->skipkeys_bool);
3180 Py_CLEAR(s->sort_keys);
3181 Py_CLEAR(s->item_sort_kw);
3182 Py_CLEAR(s->item_sort_key);
3183 Py_CLEAR(s->Decimal);
3187 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
3190 PyTypeObject PyEncoderType = {
3191 PyVarObject_HEAD_INIT(NULL, 0)
3192 "simplejson._speedups.Encoder", /* tp_name */
3193 sizeof(PyEncoderObject), /* tp_basicsize */
3194 0, /* tp_itemsize */
3195 encoder_dealloc, /* tp_dealloc */
3201 0, /* tp_as_number */
3202 0, /* tp_as_sequence */
3203 0, /* tp_as_mapping */
3205 encoder_call, /* tp_call */
3207 0, /* tp_getattro */
3208 0, /* tp_setattro */
3209 0, /* tp_as_buffer */
3210 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
3211 encoder_doc, /* tp_doc */
3212 encoder_traverse, /* tp_traverse */
3213 encoder_clear, /* tp_clear */
3214 0, /* tp_richcompare */
3215 0, /* tp_weaklistoffset */
3217 0, /* tp_iternext */
3219 encoder_members, /* tp_members */
3223 0, /* tp_descr_get */
3224 0, /* tp_descr_set */
3225 0, /* tp_dictoffset */
3226 encoder_init, /* tp_init */
3228 encoder_new, /* tp_new */
3232 static PyMethodDef speedups_methods[] = {
3233 {"encode_basestring_ascii",
3234 (PyCFunction)py_encode_basestring_ascii,
3236 pydoc_encode_basestring_ascii},
3238 (PyCFunction)py_scanstring,
3241 {NULL, NULL, 0, NULL}
3244 PyDoc_STRVAR(module_doc,
3245 "simplejson speedups\n");
3247 #if PY_MAJOR_VERSION >= 3
3248 static struct PyModuleDef moduledef = {
3249 PyModuleDef_HEAD_INIT,
3250 "_speedups", /* m_name */
3251 module_doc, /* m_doc */
3253 speedups_methods, /* m_methods */
3254 NULL, /* m_reload */
3255 NULL, /* m_traverse */
3265 PyScannerType.tp_new = PyType_GenericNew;
3266 if (PyType_Ready(&PyScannerType) < 0)
3268 PyEncoderType.tp_new = PyType_GenericNew;
3269 if (PyType_Ready(&PyEncoderType) < 0)
3272 #if PY_MAJOR_VERSION >= 3
3273 m = PyModule_Create(&moduledef);
3275 m = Py_InitModule3("_speedups", speedups_methods, module_doc);
3277 Py_INCREF((PyObject*)&PyScannerType);
3278 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
3279 Py_INCREF((PyObject*)&PyEncoderType);
3280 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
3284 #if PY_MAJOR_VERSION >= 3
3286 PyInit__speedups(void)
3288 return moduleinit();