added the dependencies for the wikipedia exercises
authorBenjamin Mako Hill <mako@atdot.cc>
Sat, 26 Apr 2014 01:52:16 +0000 (18:52 -0700)
committerBenjamin Mako Hill <mako@atdot.cc>
Sat, 26 Apr 2014 01:52:16 +0000 (18:52 -0700)
24 files changed:
.gitignore [new file with mode: 0644]
LICENSE.md.mwclient [new file with mode: 0644]
LICENSE.txt.simplejson [new file with mode: 0644]
PKG-INF.simplejson [new file with mode: 0644]
README.rst.mwclient [new file with mode: 0644]
README.rst.simplejson [new file with mode: 0644]
mwclient/__init__.py [new file with mode: 0644]
mwclient/client.py [new file with mode: 0644]
mwclient/compatibility.py [new file with mode: 0644]
mwclient/errors.py [new file with mode: 0644]
mwclient/ex.py [new file with mode: 0644]
mwclient/http.py [new file with mode: 0644]
mwclient/listing.py [new file with mode: 0644]
mwclient/page.py [new file with mode: 0644]
mwclient/page_nowriteapi.py [new file with mode: 0644]
mwclient/upload.py [new file with mode: 0644]
simplejson/__init__.py [new file with mode: 0644]
simplejson/_speedups.c [new file with mode: 0644]
simplejson/compat.py [new file with mode: 0644]
simplejson/decoder.py [new file with mode: 0644]
simplejson/encoder.py [new file with mode: 0644]
simplejson/ordered_dict.py [new file with mode: 0644]
simplejson/scanner.py [new file with mode: 0644]
simplejson/tool.py [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..2f836aa
--- /dev/null
@@ -0,0 +1,2 @@
+*~
+*.pyc
diff --git a/LICENSE.md.mwclient b/LICENSE.md.mwclient
new file mode 100644 (file)
index 0000000..9359f20
--- /dev/null
@@ -0,0 +1,22 @@
+Copyright (c) 2006-2013 Bryan Tong Minh
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
diff --git a/LICENSE.txt.simplejson b/LICENSE.txt.simplejson
new file mode 100644 (file)
index 0000000..e05f49c
--- /dev/null
@@ -0,0 +1,79 @@
+simplejson is dual-licensed software. It is available under the terms
+of the MIT license, or the Academic Free License version 2.1. The full
+text of each license agreement is included below. This code is also
+licensed to the Python Software Foundation (PSF) under a Contributor
+Agreement.
+
+MIT License
+===========
+
+Copyright (c) 2006 Bob Ippolito
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+Academic Free License v. 2.1
+============================
+
+Copyright (c) 2006 Bob Ippolito.  All rights reserved.
+
+This Academic Free License (the "License") applies to any original work of authorship (the "Original Work") whose owner (the "Licensor") has placed the following notice immediately following the copyright notice for the Original Work:
+
+Licensed under the Academic Free License version 2.1
+
+1) Grant of Copyright License. Licensor hereby grants You a world-wide, royalty-free, non-exclusive, perpetual, sublicenseable license to do the following:
+
+a) to reproduce the Original Work in copies;
+
+b) to prepare derivative works ("Derivative Works") based upon the Original Work;
+
+c) to distribute copies of the Original Work and Derivative Works to the public;
+
+d) to perform the Original Work publicly; and
+
+e) to display the Original Work publicly.
+
+2) Grant of Patent License. Licensor hereby grants You a world-wide, royalty-free, non-exclusive, perpetual, sublicenseable license, under patent claims owned or controlled by the Licensor that are embodied in the Original Work as furnished by the Licensor, to make, use, sell and offer for sale the Original Work and Derivative Works.
+
+3) Grant of Source Code License. The term "Source Code" means the preferred form of the Original Work for making modifications to it and all available documentation describing how to modify the Original Work. Licensor hereby agrees to provide a machine-readable copy of the Source Code of the Original Work along with each copy of the Original Work that Licensor distributes. Licensor reserves the right to satisfy this obligation by placing a machine-readable copy of the Source Code in an information repository reasonably calculated to permit inexpensive and convenient access by You for as long as Licensor continues to distribute the Original Work, and by publishing the address of that information repository in a notice immediately following the copyright notice that applies to the Original Work.
+
+4) Exclusions From License Grant. Neither the names of Licensor, nor the names of any contributors to the Original Work, nor any of their trademarks or service marks, may be used to endorse or promote products derived from this Original Work without express prior written permission of the Licensor. Nothing in this License shall be deemed to grant any rights to trademarks, copyrights, patents, trade secrets or any other intellectual property of Licensor except as expressly stated herein. No patent license is granted to make, use, sell or offer to sell embodiments of any patent claims other than the licensed claims defined in Section 2. No right is granted to the trademarks of Licensor even if such marks are included in the Original Work. Nothing in this License shall be interpreted to prohibit Licensor from licensing under different terms from this License any Original Work that Licensor otherwise would have a right to license.
+
+5) This section intentionally omitted.
+
+6) Attribution Rights. You must retain, in the Source Code of any Derivative Works that You create, all copyright, patent or trademark notices from the Source Code of the Original Work, as well as any notices of licensing and any descriptive text identified therein as an "Attribution Notice." You must cause the Source Code for any Derivative Works that You create to carry a prominent Attribution Notice reasonably calculated to inform recipients that You have modified the Original Work.
+
+7) Warranty of Provenance and Disclaimer of Warranty. Licensor warrants that the copyright in and to the Original Work and the patent rights granted herein by Licensor are owned by the Licensor or are sublicensed to You under the terms of this License with the permission of the contributor(s) of those copyrights and patent rights. Except as expressly stated in the immediately proceeding sentence, the Original Work is provided under this License on an "AS IS" BASIS and WITHOUT WARRANTY, either express or implied, including, without limitation, the warranties of NON-INFRINGEMENT, MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY OF THE ORIGINAL WORK IS WITH YOU. This DISCLAIMER OF WARRANTY constitutes an essential part of this License. No license to Original Work is granted hereunder except under this disclaimer.
+
+8) Limitation of Liability. Under no circumstances and under no legal theory, whether in tort (including negligence), contract, or otherwise, shall the Licensor be liable to any person for any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or the use of the Original Work including, without limitation, damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses. This limitation of liability shall not apply to liability for death or personal injury resulting from Licensor's negligence to the extent applicable law prohibits such limitation. Some jurisdictions do not allow the exclusion or limitation of incidental or consequential damages, so this exclusion and limitation may not apply to You.
+
+9) Acceptance and Termination. If You distribute copies of the Original Work or a Derivative Work, You must make a reasonable effort under the circumstances to obtain the express assent of recipients to the terms of this License. Nothing else but this License (or another written agreement between Licensor and You) grants You permission to create Derivative Works based upon the Original Work or to exercise any of the rights granted in Section 1 herein, and any attempt to do so except under the terms of this License (or another written agreement between Licensor and You) is expressly prohibited by U.S. copyright law, the equivalent laws of other countries, and by international treaty. Therefore, by exercising any of the rights granted to You in Section 1 herein, You indicate Your acceptance of this License and all of its terms and conditions.
+
+10) Termination for Patent Action. This License shall terminate automatically and You may no longer exercise any of the rights granted to You by this License as of the date You commence an action, including a cross-claim or counterclaim, against Licensor or any licensee alleging that the Original Work infringes a patent. This termination provision shall not apply for an action alleging patent infringement by combinations of the Original Work with other software or hardware.
+
+11) Jurisdiction, Venue and Governing Law. Any action or suit relating to this License may be brought only in the courts of a jurisdiction wherein the Licensor resides or in which Licensor conducts its primary business, and under the laws of that jurisdiction excluding its conflict-of-law provisions. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any use of the Original Work outside the scope of this License or after its termination shall be subject to the requirements and penalties of the U.S. Copyright Act, 17 U.S.C. ยง 101 et seq., the equivalent laws of other countries, and international treaty. This section shall survive the termination of this License.
+
+12) Attorneys Fees. In any action to enforce the terms of this License or seeking damages relating thereto, the prevailing party shall be entitled to recover its costs and expenses, including, without limitation, reasonable attorneys' fees and costs incurred in connection with such action, including any appeal of such action. This section shall survive the termination of this License.
+
+13) Miscellaneous. This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable.
+
+14) Definition of "You" in This License. "You" throughout this License, whether in upper or lower case, means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with you. For purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
+
+15) Right to Use. You may use the Original Work in all ways not otherwise restricted or conditioned by this License or by law, and Licensor promises not to interfere with or be responsible for such uses by You.
+
+This license is Copyright (C) 2003-2004 Lawrence E. Rosen. All rights reserved. Permission is hereby granted to copy and distribute this license without modification. This license may not be modified without the express written permission of its copyright owner.
diff --git a/PKG-INF.simplejson b/PKG-INF.simplejson
new file mode 100644 (file)
index 0000000..f9288e1
--- /dev/null
@@ -0,0 +1,47 @@
+Metadata-Version: 1.1
+Name: simplejson
+Version: 3.4.0
+Summary: Simple, fast, extensible JSON encoder/decoder for Python
+Home-page: http://github.com/simplejson/simplejson
+Author: Bob Ippolito
+Author-email: bob@redivi.com
+License: MIT License
+Description: simplejson is a simple, fast, complete, correct and extensible
+        JSON <http://json.org> encoder and decoder for Python 2.5+
+        and Python 3.3+.  It is pure Python code with no dependencies,
+        but includes an optional C extension for a serious speed boost.
+        
+        The latest documentation for simplejson can be read online here:
+        http://simplejson.readthedocs.org/
+        
+        simplejson is the externally maintained development version of the
+        json library included with Python 2.6 and Python 3.0, but maintains
+        backwards compatibility with Python 2.5.
+        
+        The encoder can be specialized to provide serialization in any kind of
+        situation, without any special support by the objects to be serialized
+        (somewhat like pickle). This is best done with the ``default`` kwarg
+        to dumps.
+        
+        The decoder can handle incoming JSON strings of any specified encoding
+        (UTF-8 by default). It can also be specialized to post-process JSON
+        objects with the ``object_hook`` or ``object_pairs_hook`` kwargs. This
+        is particularly useful for implementing protocols such as JSON-RPC
+        that have a richer type system than JSON itself.
+        
+        
+Platform: any
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: License :: OSI Approved :: Academic Free License (AFL)
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.5
+Classifier: Programming Language :: Python :: 2.6
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
diff --git a/README.rst.mwclient b/README.rst.mwclient
new file mode 100644 (file)
index 0000000..0683c31
--- /dev/null
@@ -0,0 +1,82 @@
+mwclient
+========
+
+Mwclient is a client to the `MediaWiki API <//mediawiki.org/wiki/API>`_
+which provides access to most API functionality.
+It depends heavily on Bob Ippolito's `SimpleJSON <//github.com/simplejson/simplejson>`_,
+requires Python 2.4 and supports MediaWiki 1.11 and above.
+For functions not available in the current MediaWiki, a ``MediaWikiVersionError`` is raised.
+
+This framework was written by Bryan Tong Minh, who released the latest stable 
+`version 0.6.5 <//github.com/mwclient/mwclient/archive/REL_0_6_5.zip>`_ on 6 May 2011.
+The current `development version <//github.com/mwclient/mwclient>`_
+can be installed directly off github:
+
+.. code-block:: console
+
+    $ pip install git+git://github.com/mwclient/mwclient.git
+
+Please see the `release notes <//github.com/mwclient/mwclient/blob/master/RELEASE-NOTES.md>`_
+for a list of changes.
+
+Implementation notes
+--------------------
+
+Most properties and generators accept the same parameters as the API,
+without their two-letter prefix. Exceptions to this rule:
+
+* ``Image.imageinfo`` is the imageinfo of the latest image.
+  Earlier versions can be fetched using ``imagehistory()``
+* ``Site.all*``: parameter ``[ap]from`` renamed to ``start``
+* ``categorymembers`` is implemented as ``Category.members``
+* ``deletedrevs`` is ``deletedrevisions``
+* ``usercontribs`` is ``usercontributions``
+* First parameters of ``search`` and ``usercontributions`` are ``search`` and ``user`` 
+  respectively
+
+Properties and generators are implemented as Python generators.
+Their limit parameter is only an indication of the number of items in one chunk.
+It is not the total limit.
+Doing ``list(generator(limit = limit))`` will return ALL items of generator,
+and not be limited by the limit value.
+Default chunk size is generally the maximum chunk size.
+
+
+HTTPS
+-----
+
+To use https, specify the host as a tuple in the form of ``('https', hostname)``.
+
+
+Example
+-------
+
+For more information, see the
+`REFERENCE.md <//github.com/mwclient/mwclient/blob/master/REFERENCE.md>`_ file.
+
+.. code-block:: python
+
+       # Initialize Site object
+       import mwclient
+       site = mwclient.Site('commons.wikimedia.org')
+       site.login(username, password)  # Optional
+
+       # Edit page
+       page = site.Pages['Commons:Sandbox']
+       text = page.edit()
+       print 'Text in sandbox:', text.encode('utf-8')
+       page.save(text + u'\nExtra data', summary = 'Test edit')
+
+       # Printing imageusage
+       image = site.Images['Example.jpg']
+       print 'Image', image.name.encode('utf-8'), 'usage:'
+       for page in image.imageusage():
+               print 'Used:', page.name.encode('utf-8'), '; namespace', page.namespace
+               print 'Image info:', image.imageinfo
+
+       # Uploading a file
+       site.upload(open('file.jpg'), 'destination.jpg', 'Image description')
+
+       # Listing all categories (don't do this in reality)
+       for category in site.allcategories():
+               print category
diff --git a/README.rst.simplejson b/README.rst.simplejson
new file mode 100644 (file)
index 0000000..f2547ac
--- /dev/null
@@ -0,0 +1,23 @@
+simplejson is a simple, fast, complete, correct and extensible
+JSON <http://json.org> encoder and decoder for Python 2.5+
+and Python 3.3+.  It is pure Python code with no dependencies,
+but includes an optional C extension for a serious speed boost.
+
+The latest documentation for simplejson can be read online here:
+http://simplejson.readthedocs.org/
+
+simplejson is the externally maintained development version of the
+json library included with Python 2.6 and Python 3.0, but maintains
+backwards compatibility with Python 2.5.
+
+The encoder can be specialized to provide serialization in any kind of
+situation, without any special support by the objects to be serialized
+(somewhat like pickle). This is best done with the ``default`` kwarg
+to dumps.
+
+The decoder can handle incoming JSON strings of any specified encoding
+(UTF-8 by default). It can also be specialized to post-process JSON
+objects with the ``object_hook`` or ``object_pairs_hook`` kwargs. This
+is particularly useful for implementing protocols such as JSON-RPC
+that have a richer type system than JSON itself.
+
diff --git a/mwclient/__init__.py b/mwclient/__init__.py
new file mode 100644 (file)
index 0000000..bb32dde
--- /dev/null
@@ -0,0 +1,28 @@
+"""
+ Copyright (c) 2006-2011 Bryan Tong Minh
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use,
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following
+ conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ OTHER DEALINGS IN THE SOFTWARE.
+"""
+
+from errors import *
+from client import Site, __ver__
+import ex
diff --git a/mwclient/client.py b/mwclient/client.py
new file mode 100644 (file)
index 0000000..c628662
--- /dev/null
@@ -0,0 +1,632 @@
+__ver__ = '0.6.6'
+
+import urllib
+import urlparse
+import time
+import random
+import sys
+import weakref
+import socket
+import base64
+
+try:
+    import json
+except ImportError:
+    import simplejson as json
+import http
+import upload
+
+import errors
+import listing
+import page
+import compatibility
+
+try:
+    import gzip
+except ImportError:
+    gzip = None
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
+
+
+def parse_timestamp(t):
+    if t == '0000-00-00T00:00:00Z':
+        return (0, 0, 0, 0, 0, 0, 0, 0)
+    return time.strptime(t, '%Y-%m-%dT%H:%M:%SZ')
+
+
+class WaitToken(object):
+
+    def __init__(self):
+        self.id = '%x' % random.randint(0, sys.maxint)
+
+    def __hash__(self):
+        return hash(self.id)
+
+
+class Site(object):
+    api_limit = 500
+
+    def __init__(self, host, path='/w/', ext='.php', pool=None, retry_timeout=30,
+                 max_retries=25, wait_callback=lambda *x: None, clients_useragent=None,
+                 max_lag=3, compress=True, force_login=True, do_init=True, httpauth=None):
+        # Setup member variables
+        self.host = host
+        self.path = path
+        self.ext = ext
+        self.credentials = None
+        self.compress = compress
+        self.httpauth = httpauth
+        self.retry_timeout = retry_timeout
+        self.max_retries = max_retries
+        self.wait_callback = wait_callback
+        self.max_lag = str(max_lag)
+        self.force_login = force_login
+
+        # The token string => token object mapping
+        self.wait_tokens = weakref.WeakKeyDictionary()
+
+        # Site properties
+        self.blocked = False    # Whether current user is blocked
+        self.hasmsg = False  # Whether current user has new messages
+        self.groups = []    # Groups current user belongs to
+        self.rights = []    # Rights current user has
+        self.tokens = {}    # Edit tokens of the current user
+        self.version = None
+
+        self.namespaces = self.default_namespaces
+        self.writeapi = False
+
+        # Setup connection
+        if pool is None:
+            self.connection = http.HTTPPool(clients_useragent)
+        else:
+            self.connection = pool
+
+        # Page generators
+        self.pages = listing.PageList(self)
+        self.categories = listing.PageList(self, namespace=14)
+        self.images = listing.PageList(self, namespace=6)
+
+        # Compat page generators
+        self.Pages = self.pages
+        self.Categories = self.categories
+        self.Images = self.images
+
+        # Initialization status
+        self.initialized = False
+
+        if do_init:
+            try:
+                self.site_init()
+            except errors.APIError, e:
+                # Private wiki, do init after login
+                if e[0] not in (u'unknown_action', u'readapidenied'):
+                    raise
+
+    def site_init(self):
+        meta = self.api('query', meta='siteinfo|userinfo',
+                        siprop='general|namespaces', uiprop='groups|rights')
+
+        # Extract site info
+        self.site = meta['query']['general']
+        self.namespaces = dict(((i['id'], i.get('*', '')) for i in meta['query']['namespaces'].itervalues()))
+        self.writeapi = 'writeapi' in self.site
+
+        # Determine version
+        if self.site['generator'].startswith('MediaWiki '):
+            version = self.site['generator'][10:].split('.')
+
+            def split_num(s):
+                i = 0
+                while i < len(s):
+                    if s[i] < '0' or s[i] > '9':
+                        break
+                    i += 1
+                if s[i:]:
+                    return (int(s[:i]), s[i:], )
+                else:
+                    return (int(s[:i]), )
+            self.version = sum((split_num(s) for s in version), ())
+
+            if len(self.version) < 2:
+                raise errors.MediaWikiVersionError('Unknown MediaWiki %s' % '.'.join(version))
+        else:
+            raise errors.MediaWikiVersionError('Unknown generator %s' % self.site['generator'])
+        # Require 1.11 until some compatibility issues are fixed
+        self.require(1, 11)
+
+        # User info
+        userinfo = compatibility.userinfo(meta, self.require(1, 12, raise_error=False))
+        self.username = userinfo['name']
+        self.groups = userinfo.get('groups', [])
+        self.rights = userinfo.get('rights', [])
+        self.initialized = True
+
+    default_namespaces = {0: u'', 1: u'Talk', 2: u'User', 3: u'User talk', 4: u'Project', 5: u'Project talk',
+                          6: u'Image', 7: u'Image talk', 8: u'MediaWiki', 9: u'MediaWiki talk', 10: u'Template', 11: u'Template talk',
+                          12: u'Help', 13: u'Help talk', 14: u'Category', 15: u'Category talk', -1: u'Special', -2: u'Media'}
+
+    def __repr__(self):
+        return "<Site object '%s%s'>" % (self.host, self.path)
+
+    def api(self, action, *args, **kwargs):
+        """ An API call. Handles errors and returns dict object. """
+        kwargs.update(args)
+        if action == 'query':
+            if 'meta' in kwargs:
+                kwargs['meta'] += '|userinfo'
+            else:
+                kwargs['meta'] = 'userinfo'
+            if 'uiprop' in kwargs:
+                kwargs['uiprop'] += '|blockinfo|hasmsg'
+            else:
+                kwargs['uiprop'] = 'blockinfo|hasmsg'
+
+        token = self.wait_token()
+        while True:
+            info = self.raw_api(action, **kwargs)
+            if not info:
+                info = {}
+            res = self.handle_api_result(info, token=token)
+            if res:
+                return info
+
+    def handle_api_result(self, info, kwargs=None, token=None):
+        if token is None:
+            token = self.wait_token()
+
+        try:
+            userinfo = compatibility.userinfo(info, self.require(1, 12, raise_error=None))
+        except KeyError:
+            userinfo = ()
+        if 'blockedby' in userinfo:
+            self.blocked = (userinfo['blockedby'], userinfo.get('blockreason', u''))
+        else:
+            self.blocked = False
+        self.hasmsg = 'message' in userinfo
+        self.logged_in = 'anon' not in userinfo
+        if 'error' in info:
+            if info['error']['code'] in (u'internal_api_error_DBConnectionError', ):
+                self.wait(token)
+                return False
+            if '*' in info['error']:
+                raise errors.APIError(info['error']['code'],
+                                      info['error']['info'], info['error']['*'])
+            raise errors.APIError(info['error']['code'],
+                                  info['error']['info'], kwargs)
+        return True
+
+    @staticmethod
+    def _to_str(data):
+        if type(data) is unicode:
+            return data.encode('utf-8')
+        return str(data)
+
+    @staticmethod
+    def _query_string(*args, **kwargs):
+        kwargs.update(args)
+        qs = urllib.urlencode([(k, Site._to_str(v)) for k, v in kwargs.iteritems()
+                               if k != 'wpEditToken'])
+        if 'wpEditToken' in kwargs:
+            qs += '&wpEditToken=' + urllib.quote(Site._to_str(kwargs['wpEditToken']))
+        return qs
+
+    def raw_call(self, script, data):
+        url = self.path + script + self.ext
+        headers = {}
+        if not issubclass(data.__class__, upload.Upload):
+            headers['Content-Type'] = 'application/x-www-form-urlencoded'
+        if self.compress and gzip:
+            headers['Accept-Encoding'] = 'gzip'
+        if self.httpauth is not None:
+            credentials = base64.encodestring('%s:%s' % self.httpauth).replace('\n', '')
+            headers['Authorization'] = 'Basic %s' % credentials
+        token = self.wait_token((script, data))
+        while True:
+            try:
+                stream = self.connection.post(self.host,
+                                              url, data=data, headers=headers)
+                if stream.getheader('Content-Encoding') == 'gzip':
+                    # BAD.
+                    seekable_stream = StringIO(stream.read())
+                    stream = gzip.GzipFile(fileobj=seekable_stream)
+                return stream
+
+            except errors.HTTPStatusError, e:
+                if e[0] == 503 and e[1].getheader('X-Database-Lag'):
+                    self.wait(token, int(e[1].getheader('Retry-After')))
+                elif e[0] < 500 or e[0] > 599:
+                    raise
+                else:
+                    self.wait(token)
+            except errors.HTTPRedirectError:
+                raise
+            except errors.HTTPError:
+                self.wait(token)
+            except ValueError:
+                self.wait(token)
+
+    def raw_api(self, action, *args, **kwargs):
+        """Sends a call to the API."""
+        kwargs['action'] = action
+        kwargs['format'] = 'json'
+        data = self._query_string(*args, **kwargs)
+        json_data = self.raw_call('api', data).read()
+        try:
+            return json.loads(json_data)
+        except ValueError:
+            if json_data.startswith('MediaWiki API is not enabled for this site.'):
+                raise errors.APIDisabledError
+            raise
+
+    def raw_index(self, action, *args, **kwargs):
+        """Sends a call to index.php rather than the API."""
+        kwargs['action'] = action
+        kwargs['maxlag'] = self.max_lag
+        data = self._query_string(*args, **kwargs)
+        return self.raw_call('index', data).read().decode('utf-8', 'ignore')
+
+    def wait_token(self, args=None):
+        token = WaitToken()
+        self.wait_tokens[token] = (0, args)
+        return token
+
+    def wait(self, token, min_wait=0):
+        retry, args = self.wait_tokens[token]
+        self.wait_tokens[token] = (retry + 1, args)
+        if retry > self.max_retries and self.max_retries != -1:
+            raise errors.MaximumRetriesExceeded(self, token, args)
+        self.wait_callback(self, token, retry, args)
+
+        timeout = self.retry_timeout * retry
+        if timeout < min_wait:
+            timeout = min_wait
+        time.sleep(timeout)
+        return self.wait_tokens[token]
+
+    def require(self, major, minor, revision=None, raise_error=True):
+        if self.version is None:
+            if raise_error is None:
+                return
+            raise RuntimeError('Site %s has not yet been initialized' % repr(self))
+
+        if revision is None:
+            if self.version[:2] >= (major, minor):
+                return True
+            elif raise_error:
+                raise errors.MediaWikiVersionError('Requires version %s.%s, current version is %s.%s'
+                                                   % ((major, minor) + self.version[:2]))
+            else:
+                return False
+        else:
+            raise NotImplementedError
+
+    # Actions
+    def email(self, user, text, subject, cc=False):
+        """Sends email to a specified user on the wiki."""
+        # TODO: Use api!
+        postdata = {}
+        postdata['wpSubject'] = subject
+        postdata['wpText'] = text
+        if cc:
+            postdata['wpCCMe'] = '1'
+        postdata['wpEditToken'] = self.tokens['edit']
+        postdata['uselang'] = 'en'
+        postdata['title'] = u'Special:Emailuser/' + user
+
+        data = self.raw_index('submit', **postdata)
+        if 'var wgAction = "success";' not in data:
+            if 'This user has not specified a valid e-mail address' in data:
+                # Dirty hack
+                raise errors.NoSpecifiedEmailError, user
+            raise errors.EmailError, data
+
+    def login(self, username=None, password=None, cookies=None, domain=None):
+        """Login to the wiki."""
+        if self.initialized:
+            self.require(1, 10)
+
+        if username and password:
+            self.credentials = (username, password, domain)
+        if cookies:
+            if self.host not in self.conn.cookies:
+                self.conn.cookies[self.host] = http.CookieJar()
+            self.conn.cookies[self.host].update(cookies)
+
+        if self.credentials:
+            wait_token = self.wait_token()
+            kwargs = {
+                'lgname': self.credentials[0],
+                'lgpassword': self.credentials[1]
+            }
+            if self.credentials[2]:
+                kwargs['lgdomain'] = self.credentials[2]
+            while True:
+                login = self.api('login', **kwargs)
+                if login['login']['result'] == 'Success':
+                    break
+                elif login['login']['result'] == 'NeedToken':
+                    kwargs['lgtoken'] = login['login']['token']
+                elif login['login']['result'] == 'Throttled':
+                    self.wait(wait_token, login['login'].get('wait', 5))
+                else:
+                    raise errors.LoginError(self, login['login'])
+
+        if self.initialized:
+            info = self.api('query', meta='userinfo', uiprop='groups|rights')
+            userinfo = compatibility.userinfo(info, self.require(1, 12, raise_error=False))
+            self.username = userinfo['name']
+            self.groups = userinfo.get('groups', [])
+            self.rights = userinfo.get('rights', [])
+            self.tokens = {}
+        else:
+            self.site_init()
+
+    def upload(self, file=None, filename=None, description='', ignore=False, file_size=None,
+               url=None, session_key=None, comment=None):
+        """Upload a file to the wiki."""
+        if self.version[:2] < (1, 16):
+            return compatibility.old_upload(self, file=file, filename=filename,
+                                            description=description, ignore=ignore,
+                                            file_size=file_size)
+
+        image = self.Images[filename]
+        if not image.can('upload'):
+            raise errors.InsufficientPermission(filename)
+
+        predata = {}
+
+        if comment is None:
+            predata['comment'] = description
+        else:
+            predata['comment'] = comment
+            predata['text'] = description
+
+        if ignore:
+            predata['ignorewarnings'] = 'true'
+        predata['token'] = image.get_token('edit')
+        predata['action'] = 'upload'
+        predata['format'] = 'json'
+        predata['filename'] = filename
+        if url:
+            predata['url'] = url
+        if session_key:
+            predata['session_key'] = session_key
+
+        if file is None:
+            postdata = self._query_string(predata)
+        else:
+            if type(file) is str:
+                file_size = len(file)
+                file = StringIO(file)
+            if file_size is None:
+                file.seek(0, 2)
+                file_size = file.tell()
+                file.seek(0, 0)
+
+            postdata = upload.UploadFile('file', filename, file_size, file, predata)
+
+        wait_token = self.wait_token()
+        while True:
+            try:
+                data = self.raw_call('api', postdata).read()
+                info = json.loads(data)
+                if not info:
+                    info = {}
+                if self.handle_api_result(info, kwargs=predata):
+                    return info.get('upload', {})
+            except errors.HTTPStatusError, e:
+                if e[0] == 503 and e[1].getheader('X-Database-Lag'):
+                    self.wait(wait_token, int(e[1].getheader('Retry-After')))
+                elif e[0] < 500 or e[0] > 599:
+                    raise
+                else:
+                    self.wait(wait_token)
+            except errors.HTTPError:
+                self.wait(wait_token)
+            file.seek(0, 0)
+
+    def parse(self, text=None, title=None, page=None):
+        kwargs = {}
+        if text is not None:
+            kwargs['text'] = text
+        if title is not None:
+            kwargs['title'] = title
+        if page is not None:
+            kwargs['page'] = page
+        result = self.api('parse', **kwargs)
+        return result['parse']
+
+    # def block: requires 1.12
+    # def unblock: requires 1.12
+    # def patrol: requires 1.14
+    # def import: requires 1.15
+
+    # Lists
+    def allpages(self, start=None, prefix=None, namespace='0', filterredir='all',
+                 minsize=None, maxsize=None, prtype=None, prlevel=None,
+                 limit=None, dir='ascending', filterlanglinks='all', generator=True):
+        """Retrieve all pages on the wiki as a generator."""
+        self.require(1, 9)
+
+        pfx = listing.List.get_prefix('ap', generator)
+        kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix=prefix,
+                                                   minsize=minsize, maxsize=maxsize, prtype=prtype, prlevel=prlevel,
+                                                   namespace=namespace, filterredir=filterredir, dir=dir,
+                                                   filterlanglinks=filterlanglinks))
+        return listing.List.get_list(generator)(self, 'allpages', 'ap', limit=limit, return_values='title', **kwargs)
+    # def allimages(self): requires 1.12
+    # TODO!
+
+    def alllinks(self, start=None, prefix=None, unique=False, prop='title',
+                 namespace='0', limit=None, generator=True):
+        """Retrieve a list of all links on the wiki as a generator."""
+        self.require(1, 11)
+
+        pfx = listing.List.get_prefix('al', generator)
+        kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix=prefix,
+                                                   prop=prop, namespace=namespace))
+        if unique:
+            kwargs[pfx + 'unique'] = '1'
+        return listing.List.get_list(generator)(self, 'alllinks', 'al', limit=limit, return_values='title', **kwargs)
+
+    def allcategories(self, start=None, prefix=None, dir='ascending', limit=None, generator=True):
+        """Retrieve all categories on the wiki as a generator."""
+        self.require(1, 12)
+
+        pfx = listing.List.get_prefix('ac', generator)
+        kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), prefix=prefix, dir=dir))
+        return listing.List.get_list(generator)(self, 'allcategories', 'ac', limit=limit, **kwargs)
+
+    def allusers(self, start=None, prefix=None, group=None, prop=None, limit=None):
+        """Retrieve all users on the wiki as a generator."""
+        self.require(1, 11)
+
+        kwargs = dict(listing.List.generate_kwargs('au', ('from', start), prefix=prefix,
+                                                   group=group, prop=prop))
+        return listing.List(self, 'allusers', 'au', limit=limit, **kwargs)
+
+    def blocks(self, start=None, end=None, dir='older', ids=None, users=None, limit=None,
+               prop='id|user|by|timestamp|expiry|reason|flags'):
+        """Retrieve blocks as a generator.
+
+        Each block is a dictionary containing:
+        - user: the username or IP address of the user
+        - id: the ID of the block
+        - timestamp: when the block was added
+        - expiry: when the block runs out (infinity for indefinite blocks)
+        - reason: the reason they are blocked
+        - allowusertalk: key is present (empty string) if the user is allowed to edit their user talk page
+        - by: the administrator who blocked the user
+        - nocreate: key is present (empty string) if the user's ability to create accounts has been disabled.
+
+        """
+
+        self.require(1, 12)
+        # TODO: Fix. Fix what?
+        kwargs = dict(listing.List.generate_kwargs('bk', start=start, end=end, dir=dir,
+                                                   users=users, prop=prop))
+        return listing.List(self, 'blocks', 'bk', limit=limit, **kwargs)
+
+    def deletedrevisions(self, start=None, end=None, dir='older', namespace=None,
+                         limit=None, prop='user|comment'):
+        # TODO: Fix
+        self.require(1, 12)
+
+        kwargs = dict(listing.List.generate_kwargs('dr', start=start, end=end, dir=dir,
+                                                   namespace=namespace, prop=prop))
+        return listing.List(self, 'deletedrevs', 'dr', limit=limit, **kwargs)
+
+    def exturlusage(self, query, prop=None, protocol='http', namespace=None, limit=None):
+        """Retrieves list of pages that link to a particular domain or URL as a generator.
+
+        This API call mirrors the Special:LinkSearch function on-wiki.
+
+        Query can be a domain like 'bbc.co.uk'. Wildcards can be used, e.g. '*.bbc.co.uk'.
+        Alternatively, a query can contain a full domain name and some or all of a URL:
+        e.g. '*.wikipedia.org/wiki/*'
+
+        See <https://meta.wikimedia.org/wiki/Help:Linksearch> for details.
+
+        The generator returns dictionaries containing three keys:
+        - url: the URL linked to.
+        - ns: namespace of the wiki page
+        - pageid: the ID of the wiki page
+        - title: the page title.
+
+        """
+        self.require(1, 11)
+
+        kwargs = dict(listing.List.generate_kwargs('eu', query=query, prop=prop,
+                                                   protocol=protocol, namespace=namespace))
+        return listing.List(self, 'exturlusage', 'eu', limit=limit, **kwargs)
+
+    def logevents(self, type=None, prop=None, start=None, end=None,
+                  dir='older', user=None, title=None, limit=None, action=None):
+        self.require(1, 10)
+
+        kwargs = dict(listing.List.generate_kwargs('le', prop=prop, type=type, start=start,
+                                                   end=end, dir=dir, user=user, title=title, action=action))
+        return listing.List(self, 'logevents', 'le', limit=limit, **kwargs)
+
+    # def protectedtitles requires 1.15
+    def random(self, namespace, limit=20):
+        """Retrieves a generator of random page from a particular namespace.
+
+        limit specifies the number of random articles retrieved.
+        namespace is a namespace identifier integer.
+
+        Generator contains dictionary with namespace, page ID and title.
+
+        """
+        self.require(1, 12)
+
+        kwargs = dict(listing.List.generate_kwargs('rn', namespace=namespace))
+        return listing.List(self, 'random', 'rn', limit=limit, **kwargs)
+
+    def recentchanges(self, start=None, end=None, dir='older', namespace=None,
+                      prop=None, show=None, limit=None, type=None):
+        self.require(1, 9)
+
+        kwargs = dict(listing.List.generate_kwargs('rc', start=start, end=end, dir=dir,
+                                                   namespace=namespace, prop=prop, show=show, type=type))
+        return listing.List(self, 'recentchanges', 'rc', limit=limit, **kwargs)
+
+    def search(self, search, namespace='0', what='title', redirects=False, limit=None):
+        self.require(1, 11)
+
+        kwargs = dict(listing.List.generate_kwargs('sr', search=search, namespace=namespace, what=what))
+        if redirects:
+            kwargs['srredirects'] = '1'
+        return listing.List(self, 'search', 'sr', limit=limit, **kwargs)
+
+    def usercontributions(self, user, start=None, end=None, dir='older', namespace=None,
+                          prop=None, show=None, limit=None):
+        self.require(1, 9)
+
+        kwargs = dict(listing.List.generate_kwargs('uc', user=user, start=start, end=end,
+                                                   dir=dir, namespace=namespace, prop=prop, show=show))
+        return listing.List(self, 'usercontribs', 'uc', limit=limit, **kwargs)
+
+    def users(self, users, prop='blockinfo|groups|editcount'):
+        self.require(1, 12)
+
+        return listing.List(self, 'users', 'us', ususers='|'.join(users), usprop=prop)
+
+    def watchlist(self, allrev=False, start=None, end=None, namespace=None, dir='older',
+                  prop=None, show=None, limit=None):
+        self.require(1, 9)
+
+        kwargs = dict(listing.List.generate_kwargs('wl', start=start, end=end,
+                                                   namespace=namespace, dir=dir, prop=prop, show=show))
+        if allrev:
+            kwargs['wlallrev'] = '1'
+        return listing.List(self, 'watchlist', 'wl', limit=limit, **kwargs)
+
+    def expandtemplates(self, text, title=None, generatexml=False):
+        """Takes wikitext (text) and expands templates."""
+        self.require(1, 11)
+
+        kwargs = {}
+        if title is None:
+            kwargs['title'] = title
+        if generatexml:
+            kwargs['generatexml'] = '1'
+
+        result = self.api('expandtemplates', text=text, **kwargs)
+
+        if generatexml:
+            return result['expandtemplates']['*'], result['parsetree']['*']
+        else:
+            return result['expandtemplates']['*']
+
+    def ask(self, query, title=None):
+        """Ask a query against Semantic MediaWiki."""
+        kwargs = {}
+        if title is None:
+            kwargs['title'] = title
+        result = self.raw_api('ask', query=query, **kwargs)
+        return result['query']['results']
diff --git a/mwclient/compatibility.py b/mwclient/compatibility.py
new file mode 100644 (file)
index 0000000..273a16f
--- /dev/null
@@ -0,0 +1,98 @@
+import upload
+import errors
+
+
+def title(prefix, new_format):
+    if new_format:
+        return prefix + 'title'
+    else:
+        return 'titles'
+
+
+def userinfo(data, new_format=None):
+    if new_format is None:
+        # Unknown version; trying to guess
+        if 'userinfo' in data:
+            return data['userinfo']
+        elif 'userinfo' in data.get('query', ()):
+            return data['query']['userinfo']
+        else:
+            return {}
+    elif new_format:
+        return data['query']['userinfo']
+    else:
+        return data['userinfo']
+
+
+def iiprop(version):
+    if version[:2] >= (1, 13):
+        return 'timestamp|user|comment|url|size|sha1|metadata|archivename'
+    if version[:2] >= (1, 12):
+        return 'timestamp|user|comment|url|size|sha1|metadata'
+    else:
+        return 'timestamp|user|comment|url|size|sha1'
+
+
+def cmtitle(page, new_format, prefix=''):
+    if new_format:
+        return prefix + 'title', page.name
+    else:
+        return prefix + 'category', page.strip_namespace(page.name)
+
+
+def protectright(version):
+    if version[:2] >= (1, 13):
+        return 'editprotected'
+    else:
+        return 'protect'
+
+from cStringIO import StringIO
+
+
+def old_upload(self, file, filename, description, license='', ignore=False, file_size=None):
+    image = self.Images[filename]
+    if not image.can('upload'):
+        raise errors.InsufficientPermission(filename)
+    if image.exists and not ignore:
+        raise errors.FileExists(filename)
+
+    if type(file) is str:
+        file_size = len(file)
+        file = StringIO(file)
+    if file_size is None:
+        file.seek(0, 2)
+        file_size = file.tell()
+        file.seek(0, 0)
+
+    predata = {}
+    # Do this thing later so that an incomplete upload won't work
+    # predata['wpDestFile'] = filename
+    predata['wpUploadDescription'] = description
+    predata['wpLicense'] = license
+    if ignore:
+        predata['wpIgnoreWarning'] = 'true'
+    predata['wpUpload'] = 'Upload file'
+    predata['wpSourceType'] = 'file'
+    predata['wpDestFile'] = filename
+    predata['wpEditToken'] = image.get_token('edit')
+
+    postdata = upload.UploadFile('wpUploadFile', filename, file_size, file, predata)
+
+    wait_token = self.wait_token()
+    while True:
+        try:
+            self.connection.post(self.host,
+                                 self.path + 'index.php?title=Special:Upload&maxlag='
+                                 + self.max_lag, data=postdata).read()
+        except errors.HTTPStatusError, e:
+            if e[0] == 503 and e[1].getheader('X-Database-Lag'):
+                self.wait(wait_token, int(e[1].getheader('Retry-After')))
+            elif e[0] < 500 or e[0] > 599:
+                raise
+            else:
+                self.wait(wait_token)
+        except errors.HTTPError:
+            self.wait(wait_token)
+        else:
+            return
+        file.seek(0, 0)
diff --git a/mwclient/errors.py b/mwclient/errors.py
new file mode 100644 (file)
index 0000000..4075013
--- /dev/null
@@ -0,0 +1,66 @@
+class MwClientError(RuntimeError):
+    pass
+
+
+class MediaWikiVersionError(MwClientError):
+    pass
+
+
+class APIDisabledError(MwClientError):
+    pass
+
+
+class HTTPError(MwClientError):
+    pass
+
+
+class HTTPStatusError(MwClientError):
+    pass
+
+
+class HTTPRedirectError(HTTPError):
+    pass
+
+
+class MaximumRetriesExceeded(MwClientError):
+    pass
+
+
+class APIError(MwClientError):
+
+    def __init__(self, code, info, kwargs):
+        self.code = code
+        self.info = info
+        MwClientError.__init__(self, code, info, kwargs)
+
+
+class InsufficientPermission(MwClientError):
+    pass
+
+
+class UserBlocked(InsufficientPermission):
+    pass
+
+
+class EditError(MwClientError):
+    pass
+
+
+class ProtectedPageError(EditError, InsufficientPermission):
+    pass
+
+
+class FileExists(EditError):
+    pass
+
+
+class LoginError(MwClientError):
+    pass
+
+
+class EmailError(MwClientError):
+    pass
+
+
+class NoSpecifiedEmail(EmailError):
+    pass
diff --git a/mwclient/ex.py b/mwclient/ex.py
new file mode 100644 (file)
index 0000000..d0ec8da
--- /dev/null
@@ -0,0 +1,84 @@
+import client
+import http
+
+
+def read_config(config_files, **predata):
+    cfg = {}
+    for config_file in config_files:
+        cfg.update(_read_config_file(
+            config_file, predata))
+    return cfg
+
+
+def _read_config_file(_config_file, predata):
+    _file = open(_config_file)
+    exec _file in globals(), predata
+    _file.close()
+
+    for _k, _v in predata.iteritems():
+        if not _k.startswith('_'):
+            yield _k, _v
+    for _k, _v in locals().iteritems():
+        if not _k.startswith('_'):
+            yield _k, _v
+
+
+class SiteList(object):
+
+    def __init__(self):
+        self.sites = {}
+
+    def __getitem__(self, key):
+        if key not in self.sites:
+            self.sites[key] = {}
+        return self.sites[key]
+
+    def __iter__(self):
+        return self.sites.itervalues()
+
+
+class ConfiguredSite(client.Site):
+
+    def __init__(self, *config_files, **kwargs):
+        self.config = read_config(config_files, sites=SiteList())
+
+        if 'name' in kwargs:
+            self.config.update(self.config['sites'][kwargs['name']])
+
+        do_login = 'username' in self.config and 'password' in self.config
+
+        client.Site.__init__(self, host=self.config['host'],
+                             path=self.config['path'], ext=self.config.get('ext', '.php'),
+                             do_init=not do_login,
+                             retry_timeout=self.config.get('retry_timeout', 30),
+                             max_retries=self.config.get('max_retries', -1))
+
+        if do_login:
+            self.login(self.config['username'],
+                       self.config['password'])
+
+
+class ConfiguredPool(list):
+
+    def __init__(self, *config_files):
+        self.config = read_config(config_files, sites=SiteList())
+        self.pool = http.HTTPPool()
+
+        config = dict([(k, v) for k, v in self.config.iteritems()
+                       if k != 'sites'])
+
+        for site in self.config['sites']:
+            cfg = config.copy()
+            cfg.update(site)
+            site.update(cfg)
+
+            do_login = 'username' in site and 'password' in site
+
+            self.append(client.Site(host=site['host'],
+                                    path=site['path'], ext=site.get('ext', '.php'),
+                                    pool=self.pool, do_init=not do_login,
+                                    retry_timeout=site.get('retry_timeout', 30),
+                                    max_retries=site.get('max_retries', -1)))
+            if do_login:
+                self[-1].login(site['username'], site['password'])
+            self[-1].config = site
diff --git a/mwclient/http.py b/mwclient/http.py
new file mode 100644 (file)
index 0000000..15335d7
--- /dev/null
@@ -0,0 +1,260 @@
+import urllib2
+import urlparse
+import httplib
+import socket
+import time
+
+import upload
+import errors
+
+from client import __ver__
+
+
+class CookieJar(dict):
+
+    def __init__(self):
+        dict.__init__(self, ())
+
+    def extract_cookies(self, response):
+        for cookie in response.msg.getallmatchingheaders('Set-Cookie'):
+            self.parse_cookie(cookie.strip())
+        if response.getheader('set-cookie2', None):
+            # ...
+            raise RuntimeError, 'Set-Cookie2', value
+
+    def parse_cookie(self, cookie):
+        if not cookie:
+            return
+        value, attrs = cookie.split(': ', 1)[1].split(';', 1)
+        i = value.strip().split('=')
+        if len(i) == 1 and i[0] in self:
+            del self[i[0]]
+        else:
+            self[i[0]] = i[1]
+
+    def get_cookie_header(self):
+        return '; '.join(('%s=%s' % i for i in self.iteritems()))
+
+    def __iter__(self):
+        for k, v in self.iteritems():
+            yield Cookie(k, v)
+
+
+class Cookie(object):
+
+    def __init__(self, name, value):
+        self.name = name
+        self.value = value
+
+
+class HTTPPersistentConnection(object):
+    http_class = httplib.HTTPConnection
+    scheme_name = 'http'
+    useragent = None
+
+    def __init__(self, host, pool=None, clients_useragent=None):
+        self._conn = self.http_class(host)
+        self._conn.connect()
+        self.last_request = time.time()
+        self.cookies = {}
+
+        self.pool = pool
+        if pool:
+            self.cookies = pool.cookies
+
+        clients_useragent = clients_useragent or ""
+        if clients_useragent != "":
+            clients_useragent += " "
+        self.useragent = clients_useragent + 'MwClient/' + __ver__ + ' (https://github.com/mwclient/mwclient)'
+
+    def request(self, method, host, path, headers, data,
+                raise_on_not_ok=True, auto_redirect=True):
+
+        # Strip scheme
+        if type(host) is tuple:
+            host = host[1]
+
+        # Dirty hack...
+        if (time.time() - self.last_request) > 60:
+            self._conn.close()
+            self._conn.connect()
+
+        _headers = headers
+        headers = {}
+
+        headers['Connection'] = 'Keep-Alive'
+        headers['User-Agent'] = self.useragent
+        headers['Host'] = host
+        if host in self.cookies:
+            headers['Cookie'] = self.cookies[host].get_cookie_header()
+        if issubclass(data.__class__, upload.Upload):
+            headers['Content-Type'] = data.content_type
+            headers['Content-Length'] = str(data.length)
+        elif data:
+            headers['Content-Length'] = str(len(data))
+
+        if _headers:
+            headers.update(_headers)
+
+        try:
+            self._conn.request(method, path, headers=headers)
+            if issubclass(data.__class__, upload.Upload):
+                for s in data:
+                    self._conn.send(s)
+            elif data:
+                self._conn.send(data)
+
+            self.last_request = time.time()
+            try:
+                res = self._conn.getresponse()
+            except httplib.BadStatusLine:
+                self._conn.close()
+                self._conn.connect()
+                self._conn.request(method, path, data, headers)
+                res = self._conn.getresponse()
+        except socket.error, e:
+            self._conn.close()
+            raise errors.HTTPError, e
+        # except Exception, e:
+        #   raise errors.HTTPError, e
+
+        if not host in self.cookies:
+            self.cookies[host] = CookieJar()
+        self.cookies[host].extract_cookies(res)
+
+        if res.status >= 300 and res.status <= 399 and auto_redirect:
+            res.read()
+
+            location = urlparse.urlparse(res.getheader('Location'))
+            if res.status in (302, 303):
+                if 'Content-Type' in headers:
+                    del headers['Content-Type']
+                if 'Content-Length' in headers:
+                    del headers['Content-Length']
+                method = 'GET'
+                data = ''
+            old_path = path
+            path = location[2]
+            if location[4]:
+                path = path + '?' + location[4]
+
+            if location[0].lower() != self.scheme_name:
+                raise errors.HTTPRedirectError, ('Only HTTP connections are supported',
+                                                 res.getheader('Location'))
+
+            if self.pool is None:
+                if location[1] != host:
+                    raise errors.HTTPRedirectError, ('Redirecting to different hosts not supported',
+                                                     res.getheader('Location'))
+
+                return self.request(method, host, path, headers, data)
+            else:
+                if host == location[1] and path == old_path:
+                    conn = self.__class__(location[1], self.pool)
+                    self.pool.append(([location[1]], conn))
+                return self.pool.request(method, location[1], path,
+                                         headers, data, raise_on_not_ok, auto_redirect)
+
+        if res.status != 200 and raise_on_not_ok:
+            try:
+                raise errors.HTTPStatusError, (res.status, res)
+            finally:
+                res.close()
+
+        return res
+
+    def get(self, host, path, headers=None):
+        return self.request('GET', host, path, headers, None)
+
+    def post(self, host, path, headers=None, data=None):
+        return self.request('POST', host, path, headers, data)
+
+    def head(self, host, path, headers=None, auto_redirect=False):
+        res = self.request('HEAD', host, path, headers,
+                           data=None, raise_on_not_ok=False,
+                           auto_redirect=auto_redirect)
+        res.read()
+        return res.status, res.getheaders()
+
+    def close(self):
+        self._conn.close()
+
+    def fileno(self):
+        return self._conn.sock.fileno()
+
+
+class HTTPConnection(HTTPPersistentConnection):
+
+    def request(self, method, host, path, headers, data,
+                raise_on_not_ok=True, auto_redirect=True):
+        if not headers:
+            headers = {}
+        headers['Connection'] = 'Close'
+        res = HTTPPersistentConnection.request(self, method, host, path, headers, data,
+                                               raise_on_not_ok, auto_redirect)
+        return res
+
+
+class HTTPSPersistentConnection(HTTPPersistentConnection):
+    http_class = httplib.HTTPSConnection
+    scheme_name = 'https'
+
+
+class HTTPPool(list):
+
+    def __init__(self, clients_useragent=None):
+        list.__init__(self)
+        self.cookies = {}
+        self.clients_useragent = clients_useragent
+
+    def find_connection(self, host, scheme='http'):
+        if type(host) is tuple:
+            scheme, host = host
+
+        for hosts, conn in self:
+            if (scheme, host) in hosts:
+                return conn
+
+        redirected_host = None
+        for hosts, conn in self:
+            status, headers = conn.head(host, '/')
+            if status == 200:
+                hosts.append((scheme, host))
+                return conn
+            if status >= 300 and status <= 399:
+                # BROKEN!
+                headers = dict(headers)
+                location = urlparse.urlparse(headers.get('location', ''))
+                if (location[0], location[1]) == (scheme, host):
+                    hosts.append((scheme, host))
+                    return conn
+        if scheme == 'http':
+            cls = HTTPPersistentConnection
+        elif scheme == 'https':
+            cls = HTTPSPersistentConnection
+        else:
+            raise RuntimeError('Unsupported scheme', scheme)
+        conn = cls(host, self, self.clients_useragent)
+        self.append(([(scheme, host)], conn))
+        return conn
+
+    def get(self, host, path, headers=None):
+        return self.find_connection(host).get(host,
+                                              path, headers)
+
+    def post(self, host, path, headers=None, data=None):
+        return self.find_connection(host).post(host,
+                                               path, headers, data)
+
+    def head(self, host, path, headers=None, auto_redirect=False):
+        return self.find_connection(host).head(host,
+                                               path, headers, auto_redirect)
+
+    def request(self, method, host, path, headers, data,
+                raise_on_not_ok, auto_redirect):
+        return self.find_connection(host).request(method, host, path,
+                                                  headers, data, raise_on_not_ok, auto_redirect)
+
+    def close(self):
+        for hosts, conn in self:
+            conn.close()
diff --git a/mwclient/listing.py b/mwclient/listing.py
new file mode 100644 (file)
index 0000000..9fc9e29
--- /dev/null
@@ -0,0 +1,232 @@
+import client
+import page
+import compatibility
+
+
+class List(object):
+
+    def __init__(self, site, list_name, prefix, limit=None, return_values=None, max_items=None, *args, **kwargs):
+        # NOTE: Fix limit
+        self.site = site
+        self.list_name = list_name
+        self.generator = 'list'
+        self.prefix = prefix
+
+        kwargs.update(args)
+        self.args = kwargs
+
+        if limit is None:
+            limit = site.api_limit
+        self.args[self.prefix + 'limit'] = str(limit)
+
+        self.count = 0
+        self.max_items = max_items
+
+        self._iter = iter(xrange(0))
+
+        self.last = False
+        self.result_member = list_name
+        self.return_values = return_values
+
+    def __iter__(self):
+        return self
+
+    def next(self, full=False):
+        if self.max_items is not None:
+            if self.count >= self.max_items:
+                raise StopIteration
+        try:
+            item = self._iter.next()
+            self.count += 1
+            if 'timestamp' in item:
+                item['timestamp'] = client.parse_timestamp(item['timestamp'])
+            if full:
+                return item
+
+            if type(self.return_values) is tuple:
+                return tuple((item[i] for i in self.return_values))
+            elif self.return_values is None:
+                return item
+            else:
+                return item[self.return_values]
+
+        except StopIteration:
+            if self.last:
+                raise StopIteration
+            self.load_chunk()
+            return List.next(self, full=full)
+
+    def load_chunk(self):
+        data = self.site.api('query', (self.generator, self.list_name), *[(str(k), v) for k, v in self.args.iteritems()])
+        if not data:
+            # Non existent page
+            raise StopIteration
+        self.set_iter(data)
+
+        if self.list_name in data.get('query-continue', ()):
+            self.args.update(data['query-continue'][self.list_name])
+        else:
+            self.last = True
+
+    def set_iter(self, data):
+        if self.result_member not in data['query']:
+            self._iter = iter(xrange(0))
+        elif type(data['query'][self.result_member]) is list:
+            self._iter = iter(data['query'][self.result_member])
+        else:
+            self._iter = data['query'][self.result_member].itervalues()
+
+    def __repr__(self):
+        return "<List object '%s' for %s>" % (self.list_name, self.site)
+
+    @staticmethod
+    def generate_kwargs(_prefix, *args, **kwargs):
+        kwargs.update(args)
+        for key, value in kwargs.iteritems():
+            if value is not None:
+                yield _prefix + key, value
+
+    @staticmethod
+    def get_prefix(prefix, generator=False):
+        if generator:
+            return 'g' + prefix
+        else:
+            return prefix
+
+    @staticmethod
+    def get_list(generator=False):
+        if generator:
+            return GeneratorList
+        else:
+            return List
+
+
+class GeneratorList(List):
+
+    def __init__(self, site, list_name, prefix, *args, **kwargs):
+        List.__init__(self, site, list_name, prefix, *args, **kwargs)
+
+        self.args['g' + self.prefix + 'limit'] = self.args[self.prefix + 'limit']
+        del self.args[self.prefix + 'limit']
+        self.generator = 'generator'
+
+        self.args['prop'] = 'info|imageinfo'
+        self.args['inprop'] = 'protection'
+
+        self.result_member = 'pages'
+
+        self.page_class = page.Page
+
+    def next(self):
+        info = List.next(self, full=True)
+        if info['ns'] == 14:
+            return Category(self.site, u'', info)
+        if info['ns'] == 6:
+            return page.Image(self.site, u'', info)
+        return page.Page(self.site, u'', info)
+
+    def load_chunk(self):
+        # Put this here so that the constructor does not fail
+        # on uninitialized sites
+        self.args['iiprop'] = compatibility.iiprop(self.site.version)
+        return List.load_chunk(self)
+
+
+class Category(page.Page, GeneratorList):
+
+    def __init__(self, site, name, info=None, namespace=None):
+        page.Page.__init__(self, site, name, info)
+        kwargs = {}
+        kwargs.update((compatibility.cmtitle(self, self.site.require(
+            1, 12, raise_error=False), prefix='gcm'), ))
+        if namespace:
+            kwargs['gcmnamespace'] = namespace
+        GeneratorList.__init__(self, site, 'categorymembers', 'cm', **kwargs)
+
+    def __repr__(self):
+        return "<Category object '%s' for %s>" % (self.name.encode('utf-8'), self.site)
+
+    def members(self, prop='ids|title', namespace=None, sort='sortkey',
+                dir='asc', start=None, end=None, generator=True):
+        prefix = self.get_prefix('cm', generator)
+        kwargs = dict(self.generate_kwargs(prefix, prop=prop, namespace=namespace,
+                                           sort=sort, dir=dir, start=start, end=end, *(compatibility.cmtitle(
+                                                                                       self, self.site.require(1, 12, raise_error=False)), )))
+        return self.get_list(generator)(self.site, 'categorymembers', 'cm', **kwargs)
+
+
+class PageList(GeneratorList):
+
+    def __init__(self, site, prefix=None, start=None, namespace=0, redirects='all'):
+        self.namespace = namespace
+
+        kwargs = {}
+        if prefix:
+            kwargs['apprefix'] = prefix
+        if start:
+            kwargs['apfrom'] = start
+
+        GeneratorList.__init__(self, site, 'allpages', 'ap',
+                               apnamespace=str(namespace), apfilterredir=redirects, **kwargs)
+
+    def __getitem__(self, name):
+        return self.get(name, None)
+
+    def get(self, name, info=()):
+        if self.namespace == 14:
+            return Category(self.site, self.site.namespaces[14] + ':' + name, info)
+        elif self.namespace == 6:
+            return page.Image(self.site, self.site.namespaces[6] + ':' + name, info)
+        elif self.namespace != 0:
+            return page.Page(self.site, self.site.namespaces[self.namespace] + ':' + name, info)
+        else:
+            # Guessing page class
+            if type(name) is not int:
+                namespace = self.guess_namespace(name)
+                if namespace == 14:
+                    return Category(self.site, name, info)
+                elif namespace == 6:
+                    return page.Image(self.site, name, info)
+            return page.Page(self.site, name, info)
+
+    def guess_namespace(self, name):
+        normal_name = page.Page.normalize_title(name)
+        for ns in self.site.namespaces:
+            if ns == 0:
+                continue
+            if name.startswith(u'%s:' % self.site.namespaces[ns].replace(' ', '_')):
+                return ns
+            elif ns in self.site.default_namespaces:
+                if name.startswith(u'%s:' % self.site.default_namespaces[ns].replace(' ', '_')):
+                    return ns
+        return 0
+
+
+class PageProperty(List):
+
+    def __init__(self, page, prop, prefix, *args, **kwargs):
+        List.__init__(self, page.site, prop, prefix, titles=page.name, *args, **kwargs)
+        self.page = page
+        self.generator = 'prop'
+
+    def set_iter(self, data):
+        for page in data['query']['pages'].itervalues():
+            if page['title'] == self.page.name:
+                self._iter = iter(page.get(self.list_name, ()))
+                return
+        raise StopIteration
+
+
+class PagePropertyGenerator(GeneratorList):
+
+    def __init__(self, page, prop, prefix, *args, **kwargs):
+        GeneratorList.__init__(self, page.site, prop, prefix, titles=page.name, *args, **kwargs)
+        self.page = page
+
+
+class RevisionsIterator(PageProperty):
+
+    def load_chunk(self):
+        if 'rvstartid' in self.args and 'rvstart' in self.args:
+            del self.args['rvstart']
+        return PageProperty.load_chunk(self)
diff --git a/mwclient/page.py b/mwclient/page.py
new file mode 100644 (file)
index 0000000..0e5a1c9
--- /dev/null
@@ -0,0 +1,412 @@
+import client
+import errors
+import listing
+import compatibility
+from page_nowriteapi import OldPage
+
+import urllib
+import urlparse
+import time
+
+
+class Page(object):
+
+    def __init__(self, site, name, info=None, extra_properties={}):
+        if type(name) is type(self):
+            return self.__dict__.update(name.__dict__)
+        self.site = site
+        self.name = name
+        self.section = None
+
+        if not info:
+            if extra_properties:
+                prop = 'info|' + '|'.join(extra_properties.iterkeys())
+                extra_props = []
+                [extra_props.extend(extra_prop) for extra_prop in extra_properties.itervalues()]
+            else:
+                prop = 'info'
+                extra_props = ()
+
+            if type(name) is int:
+                info = self.site.api('query', prop=prop, pageids=name,
+                                     inprop='protection', *extra_props)
+            else:
+                info = self.site.api('query', prop=prop, titles=name,
+                                     inprop='protection', *extra_props)
+            info = info['query']['pages'].itervalues().next()
+        self._info = info
+
+        self.namespace = info.get('ns', 0)
+        self.name = info.get('title', u'')
+        if self.namespace:
+            self.page_title = self.strip_namespace(self.name)
+        else:
+            self.page_title = self.name
+
+        self.touched = client.parse_timestamp(info.get('touched', '0000-00-00T00:00:00Z'))
+        self.revision = info.get('lastrevid', 0)
+        self.exists = 'missing' not in info
+        self.length = info.get('length')
+        self.protection = dict([(i['type'], (i['level'], i['expiry'])) for i in info.get('protection', ()) if i])
+        self.redirect = 'redirect' in info
+
+        self.last_rev_time = None
+        self.edit_time = None
+
+    def redirects_to(self):
+        """ Returns the redirect target page, or None if the page is not a redirect page."""
+        info = self.site.api('query', prop='pageprops', titles=self.name, redirects='')['query']
+        if 'redirects' in info:
+            for page in info['redirects']:
+                if page['from'] == self.name:
+                    return Page(self.site, page['to'])
+            return None
+        else:
+            return None
+
+    def resolve_redirect(self):
+        """ Returns the redirect target page, or the current page if it's not a redirect page."""
+        target_page = self.redirects_to()
+        if target_page is None:
+            return self
+        else:
+            return target_page
+
+    def __repr__(self):
+        return "<Page object '%s' for %s>" % (self.name.encode('utf-8'), self.site)
+
+    def __unicode__(self):
+        return self.name
+
+    @staticmethod
+    def strip_namespace(title):
+        if title[0] == ':':
+            title = title[1:]
+        return title[title.find(':') + 1:]
+
+    @staticmethod
+    def normalize_title(title):
+        # TODO: Make site dependent
+        title = title.strip()
+        if title[0] == ':':
+            title = title[1:]
+        title = title[0].upper() + title[1:]
+        title = title.replace(' ', '_')
+        return title
+
+    def can(self, action):
+        level = self.protection.get(action, (action, ))[0]
+        if level == 'sysop':
+            level = compatibility.protectright(self.site.version)
+
+        return level in self.site.rights
+
+    def get_token(self, type, force=False):
+        self.site.require(1, 11)
+
+        if type not in self.site.tokens:
+            self.site.tokens[type] = '0'
+        if self.site.tokens.get(type, '0') == '0' or force:
+            info = self.site.api('query', titles=self.name,
+                                 prop='info', intoken=type)
+            for i in info['query']['pages'].itervalues():
+                if i['title'] == self.name:
+                    self.site.tokens[type] = i['%stoken' % type]
+        return self.site.tokens[type]
+
+    def get_expanded(self):
+        self.site.require(1, 12)
+
+        revs = self.revisions(prop='content', limit=1, expandtemplates=True)
+        try:
+            return revs.next()['*']
+        except StopIteration:
+            return u''
+
+    def edit(self, section=None, readonly=False):
+        """Returns wikitext for a specified section or for the whole page.
+
+        Retrieves the latest edit.
+
+        """
+        if not self.can('read'):
+            raise errors.InsufficientPermission(self)
+        if not self.exists:
+            return u''
+
+        revs = self.revisions(prop='content|timestamp', limit=1, section=section)
+        try:
+            rev = revs.next()
+            self.text = rev['*']
+            self.section = section
+            self.last_rev_time = rev['timestamp']
+        except StopIteration:
+            self.text = u''
+            self.section = None
+            self.edit_time = None
+        self.edit_time = time.gmtime()
+        return self.text
+
+    def save(self, text=u'', summary=u'', minor=False, bot=True, section=None, **kwargs):
+        """Save text of page."""
+        if not self.site.logged_in and self.site.force_login:
+            # Should we really check for this?
+            raise errors.LoginError(self.site)
+        if self.site.blocked:
+            raise errors.UserBlocked(self.site.blocked)
+        if not self.can('edit'):
+            raise errors.ProtectedPageError(self)
+
+        if not text:
+            text = self.text
+        if not section:
+            section = self.section
+
+        if not self.site.writeapi:
+            return OldPage.save(self, text=text, summary=summary, minor=False)
+
+        data = {}
+        if minor:
+            data['minor'] = '1'
+        if not minor:
+            data['notminor'] = '1'
+        if self.last_rev_time:
+            data['basetimestamp'] = time.strftime('%Y%m%d%H%M%S', self.last_rev_time)
+        if self.edit_time:
+            data['starttimestamp'] = time.strftime('%Y%m%d%H%M%S', self.edit_time)
+        if bot:
+            data['bot'] = '1'
+        if section:
+            data['section'] = section
+
+        data.update(kwargs)
+
+        def do_edit():
+            result = self.site.api('edit', title=self.name, text=text,
+                                   summary=summary, token=self.get_token('edit'),
+                                   **data)
+            if result['edit'].get('result').lower() == 'failure':
+                raise errors.EditError(self, result['edit'])
+            return result
+        try:
+            result = do_edit()
+        except errors.APIError, e:
+            if e.code == 'badtoken':
+                # Retry, but only once to avoid an infinite loop
+                self.get_token('edit', force=True)
+                try:
+                    result = do_edit()
+                except errors.APIError, e:
+                    self.handle_edit_error(e, summary)
+            else:
+                self.handle_edit_error(e, summary)
+
+        if result['edit'] == 'Success':
+            self.last_rev_time = client.parse_timestamp(result['newtimestamp'])
+        return result['edit']
+
+    def handle_edit_error(self, e, summary):
+        if e.code == 'editconflict':
+            raise errors.EditError(self, summary, e.info)
+        elif e.code in ('protectedtitle', 'cantcreate', 'cantcreate-anon', 'noimageredirect-anon',
+                        'noimageredirect', 'noedit-anon', 'noedit'):
+            raise errors.ProtectedPageError(self, e.code, e.info)
+        else:
+            raise
+
+    def get_expanded(self):
+        self.site.require(1, 12)
+
+        revs = self.revisions(prop='content', limit=1, expandtemplates=True)
+        try:
+            return revs.next()['*']
+        except StopIteration:
+            return u''
+
+    def move(self, new_title, reason='', move_talk=True, no_redirect=False):
+        """Move (rename) page to new_title.
+
+        If user account is an administrator, specify no_direct as True to not
+        leave a redirect.
+
+        If user does not have permission to move page, an InsufficientPermission
+        exception is raised.
+
+        """
+        if not self.can('move'):
+            raise errors.InsufficientPermission(self)
+
+        if not self.site.writeapi:
+            return OldPage.move(self, new_title=new_title,
+                                reason=reason, move_talk=move_talk)
+
+        data = {}
+        if move_talk:
+            data['movetalk'] = '1'
+        if no_redirect:
+            data['noredirect'] = '1'
+        result = self.site.api('move', ('from', self.name), to=new_title,
+                               token=self.get_token('move'), reason=reason, **data)
+        return result['move']
+
+    def delete(self, reason='', watch=False, unwatch=False, oldimage=False):
+        """Delete page.
+
+        If user does not have permission to delete page, an InsufficientPermission
+        exception is raised.
+
+        """
+        if not self.can('delete'):
+            raise errors.InsufficientPermission(self)
+
+        if not self.site.writeapi:
+            return OldPage.delete(self, reason=reason)
+
+        data = {}
+        if watch:
+            data['watch'] = '1'
+        if unwatch:
+            data['unwatch'] = '1'
+        if oldimage:
+            data['oldimage'] = oldimage
+        result = self.site.api('delete', title=self.name,
+                               token=self.get_token('delete'),
+                               reason=reason, **data)
+        return result['delete']
+
+    def purge(self):
+        """Purge server-side cache of page. This will re-render templates and other
+        dynamic content.
+
+        """
+        self.site.raw_index('purge', title=self.name)
+
+    # def watch: requires 1.14
+
+    # Properties
+    def backlinks(self, namespace=None, filterredir='all', redirect=False, limit=None, generator=True):
+        self.site.require(1, 9)
+        # Fix title for < 1.11 !!
+        prefix = listing.List.get_prefix('bl', generator)
+        kwargs = dict(listing.List.generate_kwargs(prefix,
+                                                   namespace=namespace, filterredir=filterredir))
+        if redirect:
+            kwargs['%sredirect' % prefix] = '1'
+        kwargs[compatibility.title(prefix, self.site.require(1, 11, raise_error=False))] = self.name
+
+        return listing.List.get_list(generator)(self.site, 'backlinks', 'bl', limit=limit, return_values='title', **kwargs)
+
+    def categories(self, generator=True):
+        self.site.require(1, 11)
+        if generator:
+            return listing.PagePropertyGenerator(self, 'categories', 'cl')
+        else:
+            # TODO: return sortkey if wanted
+            return listing.PageProperty(self, 'categories', 'cl', return_values='title')
+
+    def embeddedin(self, namespace=None, filterredir='all', redirect=False, limit=None, generator=True):
+        self.site.require(1, 9)
+        # Fix title for < 1.11 !!
+        prefix = listing.List.get_prefix('ei', generator)
+        kwargs = dict(listing.List.generate_kwargs(prefix,
+                                                   namespace=namespace, filterredir=filterredir))
+        if redirect:
+            kwargs['%sredirect' % prefix] = '1'
+        kwargs[compatibility.title(prefix, self.site.require(1, 11, raise_error=False))] = self.name
+
+        return listing.List.get_list(generator)(self.site, 'embeddedin', 'ei', limit=limit, return_values='title', **kwargs)
+
+    def extlinks(self):
+        self.site.require(1, 11)
+        return listing.PageProperty(self, 'extlinks', 'el', return_values='*')
+
+    def images(self, generator=True):
+        self.site.require(1, 9)
+        if generator:
+            return listing.PagePropertyGenerator(self, 'images', '')
+        else:
+            return listing.PageProperty(self, 'images', '', return_values='title')
+
+    def iwlinks(self):
+        self.site.require(1, 9)  # guessing...
+        return listing.PageProperty(self, 'iwlinks', 'iw', return_values=('prefix', '*'))
+
+    def langlinks(self, **kwargs):
+        self.site.require(1, 9)
+        return listing.PageProperty(self, 'langlinks', 'll', return_values=('lang', '*'), **kwargs)
+
+    def links(self, namespace=None, generator=True, redirects=False):
+        self.site.require(1, 9)
+        kwargs = dict(listing.List.generate_kwargs('pl', namespace=namespace))
+        if redirects:
+            kwargs['redirects'] = '1'
+        if generator:
+            return listing.PagePropertyGenerator(self, 'links', 'pl', **kwargs)
+        else:
+            return listing.PageProperty(self, 'links', 'pl', return_values='title', **kwargs)
+
+    def revisions(self, startid=None, endid=None, start=None, end=None,
+                  dir='older', user=None, excludeuser=None, limit=50,
+                  prop='ids|timestamp|flags|comment|user', expandtemplates=False, section=None):
+        self.site.require(1, 8)
+        kwargs = dict(listing.List.generate_kwargs('rv', startid=startid, endid=endid,
+                                                   start=start, end=end, user=user, excludeuser=excludeuser))
+        kwargs['rvdir'] = dir
+        kwargs['rvprop'] = prop
+        if expandtemplates:
+            kwargs['rvexpandtemplates'] = '1'
+        if section:
+            kwargs['rvsection'] = section
+
+        return listing.RevisionsIterator(self, 'revisions', 'rv', limit=limit, **kwargs)
+
+    def templates(self, namespace=None, generator=True):
+        self.site.require(1, 8)
+        kwargs = dict(listing.List.generate_kwargs('tl', namespace=namespace))
+        if generator:
+            return listing.PagePropertyGenerator(self, 'templates', 'tl')
+        else:
+            return listing.PageProperty(self, 'templates', 'tl', return_values='title')
+
+
+class Image(Page):
+
+    def __init__(self, site, name, info=None):
+        site.require(1, 11)
+        Page.__init__(self, site, name, info,
+                      extra_properties={'imageinfo': (('iiprop',
+                                                       compatibility.iiprop(site.version)), )})
+        self.imagerepository = self._info.get('imagerepository', '')
+        self.imageinfo = self._info.get('imageinfo', ({}, ))[0]
+
+    def imagehistory(self):
+        return listing.PageProperty(self, 'imageinfo', 'ii',
+                                    iiprop=compatibility.iiprop(self.site.version))
+
+    def imageusage(self, namespace=None, filterredir='all', redirect=False,
+                   limit=None, generator=True):
+        self.site.require(1, 11)
+        # TODO: Fix for versions < 1.11
+        prefix = listing.List.get_prefix('iu', generator)
+        kwargs = dict(listing.List.generate_kwargs(prefix, title=self.name,
+                                                   namespace=namespace, filterredir=filterredir))
+        if redirect:
+            kwargs['%sredirect' % prefix] = '1'
+        return listing.List.get_list(generator)(self.site, 'imageusage', 'iu',
+                                                limit=limit, return_values='title', **kwargs)
+
+    def duplicatefiles(self, limit=None):
+        self.require(1, 14)
+        return listing.PageProperty(self, 'duplicatefiles', 'df',
+                                    dflimit=limit)
+
+    def download(self):
+        url = self.imageinfo['url']
+        if not url.startswith('http://'):
+            url = 'http://' + self.site.host + url
+        url = urlparse.urlparse(url)
+        # TODO: query string
+        return self.site.connection.get(url[1], url[2])
+
+    def __repr__(self):
+        return "<Image object '%s' for %s>" % (self.name.encode('utf-8'), self.site)
diff --git a/mwclient/page_nowriteapi.py b/mwclient/page_nowriteapi.py
new file mode 100644 (file)
index 0000000..d6f68cd
--- /dev/null
@@ -0,0 +1,133 @@
+import time
+from HTMLParser import HTMLParser
+from htmlentitydefs import name2codepoint
+
+import errors
+
+
+class OldPage(object):
+
+    @staticmethod
+    def save(self, text=u'', summary=u'', minor=False):
+        data = {}
+        data['wpTextbox1'] = text
+        data['wpSummary'] = summary
+        data['wpSave'] = 'Save page'
+        data['wpEditToken'] = self.get_token('edit')
+        if self.last_rev_time:
+            data['wpEdittime'] = time.strftime('%Y%m%d%H%M%S', self.last_rev_time)
+        else:
+            data['wpEdittime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime())
+        if self.edit_time:
+            data['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', self.edit_time)
+        else:
+            data['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime())
+        data['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime())
+
+        if minor:
+            data['wpMinoredit'] = '1'
+        data['title'] = self.name
+
+        page_data = self.site.raw_index('submit', **data)
+
+        page = EditPage('editform')
+        page.feed(page_data)
+        page.close()
+
+        if page.data:
+            if page.readonly:
+                raise errors.ProtectedPageError(self)
+            self.get_token('edit', True)
+            raise errors.EditError(page.title, data)
+
+    @staticmethod
+    def move(self, new_title, reason='', move_talk=True):
+        postdata = {'wpNewTitle': new_title,
+                    'wpOldTitle': self.name,
+                    'wpReason': reason,
+                    'wpMove': '1',
+                    'wpEditToken': self.get_token('move')}
+        if move_talk:
+            postdata['wpMovetalk'] = '1'
+        postdata['title'] = 'Special:Movepage'
+
+        page_data = self.site.raw_index('submit', **data)
+
+        page = EditPage('movepage')
+        page.feed(page_data.decode('utf-8', 'ignore'))
+        page.close()
+
+        if 'wpEditToken' in page.data:
+            raise errors.EditError(page.title, postdata)
+
+    @staticmethod
+    def delete(self, reason=''):
+        postdata = {'wpReason': reason,
+                    'wpConfirmB': 'Delete',
+                    'mw-filedelete-submit': 'Delete',
+                    'wpEditToken': self.get_token('delete'),
+                    'title': self.name}
+
+        page_data = self.site.raw_index('delete', **postdata)
+
+
+class EditPage(HTMLParser):
+
+    def __init__(self, form):
+        HTMLParser.__init__(self)
+
+        self.form = form
+
+        self.in_form = False
+        self.in_text = False
+        self.in_title = False
+
+        self.data = {}
+        self.textdata = []
+        self.title = u''
+
+        self.readonly = True
+
+    def handle_starttag(self, tag, attrs):
+        self.in_title = (tag == 'title')
+
+        if (u'id', self.form) in attrs:
+            attrs = dict(attrs)
+            self.in_form = True
+            self.action = attrs['action']
+
+        if tag == 'input' and self.in_form and (u'type', u'submit') \
+                not in attrs and (u'type', u'checkbox') not in attrs:
+            attrs = dict(attrs)
+            if u'name' in attrs:
+                self.data[attrs[u'name']] = attrs.get(u'value', u'')
+
+        if self.in_form and tag == 'textarea':
+            self.in_text = True
+            self.readonly = (u'readonly', u'readonly') in attrs
+
+    def handle_endtag(self, tag):
+        if self.in_title and tag == 'title':
+            self.in_title = False
+        if self.in_form and tag == 'form':
+            self.in_form = False
+        if self.in_text and tag == 'textarea':
+            self.in_text = False
+
+    def handle_data(self, data):
+        if self.in_text:
+            self.textdata.append(data)
+        if self.in_title:
+            self.title += data
+
+    def handle_entityref(self, name):
+        if name in name2codepoint:
+            self.handle_data(unichr(name2codepoint[name]))
+        else:
+            self.handle_data(u'&%s;' % name)
+
+    def handle_charref(self, name):
+        try:
+            self.handle_data(unichr(int(name)))
+        except ValueError:
+            self.handle_data(u'&#$s;' % name)
diff --git a/mwclient/upload.py b/mwclient/upload.py
new file mode 100644 (file)
index 0000000..4d79cc2
--- /dev/null
@@ -0,0 +1,135 @@
+import random
+from cStringIO import StringIO
+
+
+class Upload(object):
+
+    """
+    Base class for upload objects. This class should always be subclassed
+    by upload classes and its constructor always be called.
+
+    Upload classes are file like object/iterators that have additional
+    variables length and content_type.
+    """
+
+    BLOCK_SIZE = 8192
+
+    def __init__(self, length, content_type):
+        self.length = length
+        self.content_type = content_type
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        data = self.read(self.BLOCK_SIZE)
+        if data == '':
+            raise StopIteration
+        return data
+
+    @staticmethod
+    def encode(s):
+        if type(s) is str:
+            return s
+        elif type(s) is unicode:
+            return s.encode('utf-8')
+        else:
+            return s
+
+
+class UploadRawData(Upload):
+
+    """
+    This upload class is simply a wrapper around StringIO
+    """
+
+    def __init__(self, data, content_type='application/x-www-form-urlencoded'):
+        self.fstr = StringIO(data)
+        Upload.__init__(self, len(data), content_type)
+
+    def read(self, length=-1):
+        return self.fstr.read(length)
+
+
+class UploadDict(UploadRawData):
+
+    """
+    This class creates an x-www-form-urlencoded representation of a dict
+    and then passes it through its parent UploadRawData
+    """
+
+    def __init__(self, data):
+        postdata = '&'.join('%s=%s' % (self.encode(i), self.encode(data[i])) for i in data)
+        UploadRawData.__init__(self, postdata)
+
+
+class UploadFile(Upload):
+
+    """
+    This class accepts a file with information and a postdata dictionary
+    and creates a multipart/form-data representation from it.
+    """
+    STAGE_FILEHEADER = 0
+    STAGE_FILE = 1
+    STAGE_POSTDATA = 2
+    STAGE_FOOTER = 3
+    STAGE_DONE = 4
+
+    def __init__(self, filefield, filename, filelength, file, data):
+        self.stage = self.STAGE_FILEHEADER
+        self.boundary = self.generate_boundary()
+        self.postdata = self.generate_multipart_from_dict(data)
+        self.footer = '\r\n--%s--\r\n' % self.boundary
+        self.fileheader = ('--%s\r\n' % self.boundary +
+                           'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' %
+                          (self.encode(filefield), self.encode(filename)) +
+                           'Content-Type: application/octet-stream\r\n\r\n')
+        self.file = file
+        self.length_left = filelength
+        self.str_data = None
+
+        Upload.__init__(self, len(self.fileheader) + filelength + len(self.postdata) + len(self.footer) + 2,
+                        'multipart/form-data; boundary=' + self.boundary)
+
+    def read(self, length):
+        if self.stage == self.STAGE_DONE:
+            return ''
+        elif self.stage != self.STAGE_FILE:
+            if self.str_data is None:
+                if self.stage == self.STAGE_FILEHEADER:
+                    self.str_data = StringIO(self.fileheader)
+                elif self.stage == self.STAGE_POSTDATA:
+                    self.str_data = StringIO(self.postdata)
+                elif self.stage == self.STAGE_FOOTER:
+                    self.str_data = StringIO(self.footer)
+            data = self.str_data.read(length)
+        else:
+            if self.length_left:
+                if length > self.length_left:
+                    length = self.length_left
+                data = self.file.read(length)
+                self.length_left -= len(data)
+            else:
+                self.stage += 1
+                return '\r\n'
+
+        if data == '':
+            self.stage += 1
+            self.str_data = None
+            return self.read(length)
+        return data
+
+    @staticmethod
+    def generate_boundary():
+        return '----%s----' % ''.join((random.choice(
+            'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789')
+            for i in xrange(32)))
+
+    def generate_multipart_from_dict(self, data):
+        postdata = []
+        for i in data:
+            postdata.append('--' + self.boundary)
+            postdata.append('Content-Disposition: form-data; name="%s"' % self.encode(i))
+            postdata.append('')
+            postdata.append(self.encode(data[i]))
+        return '\r\n'.join(postdata)
diff --git a/simplejson/__init__.py b/simplejson/__init__.py
new file mode 100644 (file)
index 0000000..702ddab
--- /dev/null
@@ -0,0 +1,547 @@
+r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
+JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
+interchange format.
+
+:mod:`simplejson` exposes an API familiar to users of the standard library
+:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
+version of the :mod:`json` library contained in Python 2.6, but maintains
+compatibility with Python 2.4 and Python 2.5 and (currently) has
+significant performance advantages, even without using the optional C
+extension for speedups.
+
+Encoding basic Python object hierarchies::
+
+    >>> import simplejson as json
+    >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
+    '["foo", {"bar": ["baz", null, 1.0, 2]}]'
+    >>> print(json.dumps("\"foo\bar"))
+    "\"foo\bar"
+    >>> print(json.dumps(u'\u1234'))
+    "\u1234"
+    >>> print(json.dumps('\\'))
+    "\\"
+    >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True))
+    {"a": 0, "b": 0, "c": 0}
+    >>> from simplejson.compat import StringIO
+    >>> io = StringIO()
+    >>> json.dump(['streaming API'], io)
+    >>> io.getvalue()
+    '["streaming API"]'
+
+Compact encoding::
+
+    >>> import simplejson as json
+    >>> obj = [1,2,3,{'4': 5, '6': 7}]
+    >>> json.dumps(obj, separators=(',',':'), sort_keys=True)
+    '[1,2,3,{"4":5,"6":7}]'
+
+Pretty printing::
+
+    >>> import simplejson as json
+    >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent='    '))
+    {
+        "4": 5,
+        "6": 7
+    }
+
+Decoding JSON::
+
+    >>> import simplejson as json
+    >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
+    >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
+    True
+    >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
+    True
+    >>> from simplejson.compat import StringIO
+    >>> io = StringIO('["streaming API"]')
+    >>> json.load(io)[0] == 'streaming API'
+    True
+
+Specializing JSON object decoding::
+
+    >>> import simplejson as json
+    >>> def as_complex(dct):
+    ...     if '__complex__' in dct:
+    ...         return complex(dct['real'], dct['imag'])
+    ...     return dct
+    ...
+    >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
+    ...     object_hook=as_complex)
+    (1+2j)
+    >>> from decimal import Decimal
+    >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
+    True
+
+Specializing JSON object encoding::
+
+    >>> import simplejson as json
+    >>> def encode_complex(obj):
+    ...     if isinstance(obj, complex):
+    ...         return [obj.real, obj.imag]
+    ...     raise TypeError(repr(o) + " is not JSON serializable")
+    ...
+    >>> json.dumps(2 + 1j, default=encode_complex)
+    '[2.0, 1.0]'
+    >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
+    '[2.0, 1.0]'
+    >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
+    '[2.0, 1.0]'
+
+
+Using simplejson.tool from the shell to validate and pretty-print::
+
+    $ echo '{"json":"obj"}' | python -m simplejson.tool
+    {
+        "json": "obj"
+    }
+    $ echo '{ 1.2:3.4}' | python -m simplejson.tool
+    Expecting property name: line 1 column 3 (char 2)
+"""
+from __future__ import absolute_import
+__version__ = '3.4.0'
+__all__ = [
+    'dump', 'dumps', 'load', 'loads',
+    'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
+    'OrderedDict', 'simple_first',
+]
+
+__author__ = 'Bob Ippolito <bob@redivi.com>'
+
+from decimal import Decimal
+
+from .scanner import JSONDecodeError
+from .decoder import JSONDecoder
+from .encoder import JSONEncoder, JSONEncoderForHTML
+def _import_OrderedDict():
+    import collections
+    try:
+        return collections.OrderedDict
+    except AttributeError:
+        from . import ordered_dict
+        return ordered_dict.OrderedDict
+OrderedDict = _import_OrderedDict()
+
+def _import_c_make_encoder():
+    try:
+        from ._speedups import make_encoder
+        return make_encoder
+    except ImportError:
+        return None
+
+_default_encoder = JSONEncoder(
+    skipkeys=False,
+    ensure_ascii=True,
+    check_circular=True,
+    allow_nan=True,
+    indent=None,
+    separators=None,
+    encoding='utf-8',
+    default=None,
+    use_decimal=True,
+    namedtuple_as_object=True,
+    tuple_as_array=True,
+    bigint_as_string=False,
+    item_sort_key=None,
+    for_json=False,
+    ignore_nan=False,
+)
+
+def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
+        allow_nan=True, cls=None, indent=None, separators=None,
+        encoding='utf-8', default=None, use_decimal=True,
+        namedtuple_as_object=True, tuple_as_array=True,
+        bigint_as_string=False, sort_keys=False, item_sort_key=None,
+        for_json=False, ignore_nan=False, **kw):
+    """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
+    ``.write()``-supporting file-like object).
+
+    If *skipkeys* is true then ``dict`` keys that are not basic types
+    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
+    will be skipped instead of raising a ``TypeError``.
+
+    If *ensure_ascii* is false, then the some chunks written to ``fp``
+    may be ``unicode`` instances, subject to normal Python ``str`` to
+    ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
+    understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
+    to cause an error.
+
+    If *check_circular* is false, then the circular reference check
+    for container types will be skipped and a circular reference will
+    result in an ``OverflowError`` (or worse).
+
+    If *allow_nan* is false, then it will be a ``ValueError`` to
+    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
+    in strict compliance of the original JSON specification, instead of using
+    the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See
+    *ignore_nan* for ECMA-262 compliant behavior.
+
+    If *indent* is a string, then JSON array elements and object members
+    will be pretty-printed with a newline followed by that string repeated
+    for each level of nesting. ``None`` (the default) selects the most compact
+    representation without any newlines. For backwards compatibility with
+    versions of simplejson earlier than 2.1.0, an integer is also accepted
+    and is converted to a string with that many spaces.
+
+    If specified, *separators* should be an
+    ``(item_separator, key_separator)`` tuple.  The default is ``(', ', ': ')``
+    if *indent* is ``None`` and ``(',', ': ')`` otherwise.  To get the most
+    compact JSON representation, you should specify ``(',', ':')`` to eliminate
+    whitespace.
+
+    *encoding* is the character encoding for str instances, default is UTF-8.
+
+    *default(obj)* is a function that should return a serializable version
+    of obj or raise ``TypeError``. The default simply raises ``TypeError``.
+
+    If *use_decimal* is true (default: ``True``) then decimal.Decimal
+    will be natively serialized to JSON with full precision.
+
+    If *namedtuple_as_object* is true (default: ``True``),
+    :class:`tuple` subclasses with ``_asdict()`` methods will be encoded
+    as JSON objects.
+
+    If *tuple_as_array* is true (default: ``True``),
+    :class:`tuple` (and subclasses) will be encoded as JSON arrays.
+
+    If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher
+    or lower than -2**53 will be encoded as strings. This is to avoid the
+    rounding that happens in Javascript otherwise. Note that this is still a
+    lossy operation that will not round-trip correctly and should be used
+    sparingly.
+
+    If specified, *item_sort_key* is a callable used to sort the items in
+    each dictionary. This is useful if you want to sort items other than
+    in alphabetical order by key. This option takes precedence over
+    *sort_keys*.
+
+    If *sort_keys* is true (default: ``False``), the output of dictionaries
+    will be sorted by item.
+
+    If *for_json* is true (default: ``False``), objects with a ``for_json()``
+    method will use the return value of that method for encoding as JSON
+    instead of the object.
+
+    If *ignore_nan* is true (default: ``False``), then out of range
+    :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as
+    ``null`` in compliance with the ECMA-262 specification. If true, this will
+    override *allow_nan*.
+
+    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
+    ``.default()`` method to serialize additional types), specify it with
+    the ``cls`` kwarg. NOTE: You should use *default* or *for_json* instead
+    of subclassing whenever possible.
+
+    """
+    # cached encoder
+    if (not skipkeys and ensure_ascii and
+        check_circular and allow_nan and
+        cls is None and indent is None and separators is None and
+        encoding == 'utf-8' and default is None and use_decimal
+        and namedtuple_as_object and tuple_as_array
+        and not bigint_as_string and not item_sort_key
+        and not for_json and not ignore_nan and not kw):
+        iterable = _default_encoder.iterencode(obj)
+    else:
+        if cls is None:
+            cls = JSONEncoder
+        iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
+            check_circular=check_circular, allow_nan=allow_nan, indent=indent,
+            separators=separators, encoding=encoding,
+            default=default, use_decimal=use_decimal,
+            namedtuple_as_object=namedtuple_as_object,
+            tuple_as_array=tuple_as_array,
+            bigint_as_string=bigint_as_string,
+            sort_keys=sort_keys,
+            item_sort_key=item_sort_key,
+            for_json=for_json,
+            ignore_nan=ignore_nan,
+            **kw).iterencode(obj)
+    # could accelerate with writelines in some versions of Python, at
+    # a debuggability cost
+    for chunk in iterable:
+        fp.write(chunk)
+
+
+def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
+        allow_nan=True, cls=None, indent=None, separators=None,
+        encoding='utf-8', default=None, use_decimal=True,
+        namedtuple_as_object=True, tuple_as_array=True,
+        bigint_as_string=False, sort_keys=False, item_sort_key=None,
+        for_json=False, ignore_nan=False, **kw):
+    """Serialize ``obj`` to a JSON formatted ``str``.
+
+    If ``skipkeys`` is false then ``dict`` keys that are not basic types
+    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
+    will be skipped instead of raising a ``TypeError``.
+
+    If ``ensure_ascii`` is false, then the return value will be a
+    ``unicode`` instance subject to normal Python ``str`` to ``unicode``
+    coercion rules instead of being escaped to an ASCII ``str``.
+
+    If ``check_circular`` is false, then the circular reference check
+    for container types will be skipped and a circular reference will
+    result in an ``OverflowError`` (or worse).
+
+    If ``allow_nan`` is false, then it will be a ``ValueError`` to
+    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
+    strict compliance of the JSON specification, instead of using the
+    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
+
+    If ``indent`` is a string, then JSON array elements and object members
+    will be pretty-printed with a newline followed by that string repeated
+    for each level of nesting. ``None`` (the default) selects the most compact
+    representation without any newlines. For backwards compatibility with
+    versions of simplejson earlier than 2.1.0, an integer is also accepted
+    and is converted to a string with that many spaces.
+
+    If specified, ``separators`` should be an
+    ``(item_separator, key_separator)`` tuple.  The default is ``(', ', ': ')``
+    if *indent* is ``None`` and ``(',', ': ')`` otherwise.  To get the most
+    compact JSON representation, you should specify ``(',', ':')`` to eliminate
+    whitespace.
+
+    ``encoding`` is the character encoding for str instances, default is UTF-8.
+
+    ``default(obj)`` is a function that should return a serializable version
+    of obj or raise TypeError. The default simply raises TypeError.
+
+    If *use_decimal* is true (default: ``True``) then decimal.Decimal
+    will be natively serialized to JSON with full precision.
+
+    If *namedtuple_as_object* is true (default: ``True``),
+    :class:`tuple` subclasses with ``_asdict()`` methods will be encoded
+    as JSON objects.
+
+    If *tuple_as_array* is true (default: ``True``),
+    :class:`tuple` (and subclasses) will be encoded as JSON arrays.
+
+    If *bigint_as_string* is true (not the default), ints 2**53 and higher
+    or lower than -2**53 will be encoded as strings. This is to avoid the
+    rounding that happens in Javascript otherwise.
+
+    If specified, *item_sort_key* is a callable used to sort the items in
+    each dictionary. This is useful if you want to sort items other than
+    in alphabetical order by key. This option takes precendence over
+    *sort_keys*.
+
+    If *sort_keys* is true (default: ``False``), the output of dictionaries
+    will be sorted by item.
+
+    If *for_json* is true (default: ``False``), objects with a ``for_json()``
+    method will use the return value of that method for encoding as JSON
+    instead of the object.
+
+    If *ignore_nan* is true (default: ``False``), then out of range
+    :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as
+    ``null`` in compliance with the ECMA-262 specification. If true, this will
+    override *allow_nan*.
+
+    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
+    ``.default()`` method to serialize additional types), specify it with
+    the ``cls`` kwarg. NOTE: You should use *default* instead of subclassing
+    whenever possible.
+
+    """
+    # cached encoder
+    if (not skipkeys and ensure_ascii and
+        check_circular and allow_nan and
+        cls is None and indent is None and separators is None and
+        encoding == 'utf-8' and default is None and use_decimal
+        and namedtuple_as_object and tuple_as_array
+        and not bigint_as_string and not sort_keys
+        and not item_sort_key and not for_json
+        and not ignore_nan and not kw):
+        return _default_encoder.encode(obj)
+    if cls is None:
+        cls = JSONEncoder
+    return cls(
+        skipkeys=skipkeys, ensure_ascii=ensure_ascii,
+        check_circular=check_circular, allow_nan=allow_nan, indent=indent,
+        separators=separators, encoding=encoding, default=default,
+        use_decimal=use_decimal,
+        namedtuple_as_object=namedtuple_as_object,
+        tuple_as_array=tuple_as_array,
+        bigint_as_string=bigint_as_string,
+        sort_keys=sort_keys,
+        item_sort_key=item_sort_key,
+        for_json=for_json,
+        ignore_nan=ignore_nan,
+        **kw).encode(obj)
+
+
+_default_decoder = JSONDecoder(encoding=None, object_hook=None,
+                               object_pairs_hook=None)
+
+
+def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
+        parse_int=None, parse_constant=None, object_pairs_hook=None,
+        use_decimal=False, namedtuple_as_object=True, tuple_as_array=True,
+        **kw):
+    """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
+    a JSON document) to a Python object.
+
+    *encoding* determines the encoding used to interpret any
+    :class:`str` objects decoded by this instance (``'utf-8'`` by
+    default).  It has no effect when decoding :class:`unicode` objects.
+
+    Note that currently only encodings that are a superset of ASCII work,
+    strings of other encodings should be passed in as :class:`unicode`.
+
+    *object_hook*, if specified, will be called with the result of every
+    JSON object decoded and its return value will be used in place of the
+    given :class:`dict`.  This can be used to provide custom
+    deserializations (e.g. to support JSON-RPC class hinting).
+
+    *object_pairs_hook* is an optional function that will be called with
+    the result of any object literal decode with an ordered list of pairs.
+    The return value of *object_pairs_hook* will be used instead of the
+    :class:`dict`.  This feature can be used to implement custom decoders
+    that rely on the order that the key and value pairs are decoded (for
+    example, :func:`collections.OrderedDict` will remember the order of
+    insertion). If *object_hook* is also defined, the *object_pairs_hook*
+    takes priority.
+
+    *parse_float*, if specified, will be called with the string of every
+    JSON float to be decoded.  By default, this is equivalent to
+    ``float(num_str)``. This can be used to use another datatype or parser
+    for JSON floats (e.g. :class:`decimal.Decimal`).
+
+    *parse_int*, if specified, will be called with the string of every
+    JSON int to be decoded.  By default, this is equivalent to
+    ``int(num_str)``.  This can be used to use another datatype or parser
+    for JSON integers (e.g. :class:`float`).
+
+    *parse_constant*, if specified, will be called with one of the
+    following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This
+    can be used to raise an exception if invalid JSON numbers are
+    encountered.
+
+    If *use_decimal* is true (default: ``False``) then it implies
+    parse_float=decimal.Decimal for parity with ``dump``.
+
+    To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
+    kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
+    of subclassing whenever possible.
+
+    """
+    return loads(fp.read(),
+        encoding=encoding, cls=cls, object_hook=object_hook,
+        parse_float=parse_float, parse_int=parse_int,
+        parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
+        use_decimal=use_decimal, **kw)
+
+
+def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
+        parse_int=None, parse_constant=None, object_pairs_hook=None,
+        use_decimal=False, **kw):
+    """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
+    document) to a Python object.
+
+    *encoding* determines the encoding used to interpret any
+    :class:`str` objects decoded by this instance (``'utf-8'`` by
+    default).  It has no effect when decoding :class:`unicode` objects.
+
+    Note that currently only encodings that are a superset of ASCII work,
+    strings of other encodings should be passed in as :class:`unicode`.
+
+    *object_hook*, if specified, will be called with the result of every
+    JSON object decoded and its return value will be used in place of the
+    given :class:`dict`.  This can be used to provide custom
+    deserializations (e.g. to support JSON-RPC class hinting).
+
+    *object_pairs_hook* is an optional function that will be called with
+    the result of any object literal decode with an ordered list of pairs.
+    The return value of *object_pairs_hook* will be used instead of the
+    :class:`dict`.  This feature can be used to implement custom decoders
+    that rely on the order that the key and value pairs are decoded (for
+    example, :func:`collections.OrderedDict` will remember the order of
+    insertion). If *object_hook* is also defined, the *object_pairs_hook*
+    takes priority.
+
+    *parse_float*, if specified, will be called with the string of every
+    JSON float to be decoded.  By default, this is equivalent to
+    ``float(num_str)``. This can be used to use another datatype or parser
+    for JSON floats (e.g. :class:`decimal.Decimal`).
+
+    *parse_int*, if specified, will be called with the string of every
+    JSON int to be decoded.  By default, this is equivalent to
+    ``int(num_str)``.  This can be used to use another datatype or parser
+    for JSON integers (e.g. :class:`float`).
+
+    *parse_constant*, if specified, will be called with one of the
+    following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This
+    can be used to raise an exception if invalid JSON numbers are
+    encountered.
+
+    If *use_decimal* is true (default: ``False``) then it implies
+    parse_float=decimal.Decimal for parity with ``dump``.
+
+    To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
+    kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
+    of subclassing whenever possible.
+
+    """
+    if (cls is None and encoding is None and object_hook is None and
+            parse_int is None and parse_float is None and
+            parse_constant is None and object_pairs_hook is None
+            and not use_decimal and not kw):
+        return _default_decoder.decode(s)
+    if cls is None:
+        cls = JSONDecoder
+    if object_hook is not None:
+        kw['object_hook'] = object_hook
+    if object_pairs_hook is not None:
+        kw['object_pairs_hook'] = object_pairs_hook
+    if parse_float is not None:
+        kw['parse_float'] = parse_float
+    if parse_int is not None:
+        kw['parse_int'] = parse_int
+    if parse_constant is not None:
+        kw['parse_constant'] = parse_constant
+    if use_decimal:
+        if parse_float is not None:
+            raise TypeError("use_decimal=True implies parse_float=Decimal")
+        kw['parse_float'] = Decimal
+    return cls(encoding=encoding, **kw).decode(s)
+
+
+def _toggle_speedups(enabled):
+    from . import decoder as dec
+    from . import encoder as enc
+    from . import scanner as scan
+    c_make_encoder = _import_c_make_encoder()
+    if enabled:
+        dec.scanstring = dec.c_scanstring or dec.py_scanstring
+        enc.c_make_encoder = c_make_encoder
+        enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or
+            enc.py_encode_basestring_ascii)
+        scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner
+    else:
+        dec.scanstring = dec.py_scanstring
+        enc.c_make_encoder = None
+        enc.encode_basestring_ascii = enc.py_encode_basestring_ascii
+        scan.make_scanner = scan.py_make_scanner
+    dec.make_scanner = scan.make_scanner
+    global _default_decoder
+    _default_decoder = JSONDecoder(
+        encoding=None,
+        object_hook=None,
+        object_pairs_hook=None,
+    )
+    global _default_encoder
+    _default_encoder = JSONEncoder(
+       skipkeys=False,
+       ensure_ascii=True,
+       check_circular=True,
+       allow_nan=True,
+       indent=None,
+       separators=None,
+       encoding='utf-8',
+       default=None,
+   )
+
+def simple_first(kv):
+    """Helper function to pass to item_sort_key to sort simple
+    elements to the top, then container elements.
+    """
+    return (isinstance(kv[1], (list, dict, tuple)), kv[0])
diff --git a/simplejson/_speedups.c b/simplejson/_speedups.c
new file mode 100644 (file)
index 0000000..e888873
--- /dev/null
@@ -0,0 +1,3296 @@
+/* -*- mode: C; c-file-style: "python"; c-basic-offset: 4 -*- */
+#include "Python.h"
+#include "structmember.h"
+
+#if PY_MAJOR_VERSION >= 3
+#define PyInt_FromSsize_t PyLong_FromSsize_t
+#define PyInt_AsSsize_t PyLong_AsSsize_t
+#define PyString_Check PyBytes_Check
+#define PyString_GET_SIZE PyBytes_GET_SIZE
+#define PyString_AS_STRING PyBytes_AS_STRING
+#define PyString_FromStringAndSize PyBytes_FromStringAndSize
+#define PyInt_Check(obj) 0
+#define JSON_UNICHR Py_UCS4
+#define JSON_InternFromString PyUnicode_InternFromString
+#define JSON_Intern_GET_SIZE PyUnicode_GET_SIZE
+#define JSON_ASCII_Check PyUnicode_Check
+#define JSON_ASCII_AS_STRING PyUnicode_AsUTF8
+#define PyInt_Type PyLong_Type
+#define PyInt_FromString PyLong_FromString
+#define PY2_UNUSED
+#define PY3_UNUSED UNUSED
+#define JSON_NewEmptyUnicode() PyUnicode_New(0, 127)
+#else /* PY_MAJOR_VERSION >= 3 */
+#define PY2_UNUSED UNUSED
+#define PY3_UNUSED
+#define PyUnicode_READY(obj) 0
+#define PyUnicode_KIND(obj) (sizeof(Py_UNICODE))
+#define PyUnicode_DATA(obj) ((void *)(PyUnicode_AS_UNICODE(obj)))
+#define PyUnicode_READ(kind, data, index) ((JSON_UNICHR)((const Py_UNICODE *)(data))[(index)])
+#define PyUnicode_GetLength PyUnicode_GET_SIZE
+#define JSON_UNICHR Py_UNICODE
+#define JSON_ASCII_Check PyString_Check
+#define JSON_ASCII_AS_STRING PyString_AS_STRING
+#define JSON_InternFromString PyString_InternFromString
+#define JSON_Intern_GET_SIZE PyString_GET_SIZE
+#define JSON_NewEmptyUnicode() PyUnicode_FromUnicode(NULL, 0)
+#endif /* PY_MAJOR_VERSION < 3 */
+
+#if PY_VERSION_HEX < 0x02070000
+#if !defined(PyOS_string_to_double)
+#define PyOS_string_to_double json_PyOS_string_to_double
+static double
+json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception);
+static double
+json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception)
+{
+    double x;
+    assert(endptr == NULL);
+    assert(overflow_exception == NULL);
+    PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;)
+    x = PyOS_ascii_atof(s);
+    PyFPE_END_PROTECT(x)
+    return x;
+}
+#endif
+#endif /* PY_VERSION_HEX < 0x02070000 */
+
+#if PY_VERSION_HEX < 0x02060000
+#if !defined(Py_TYPE)
+#define Py_TYPE(ob)     (((PyObject*)(ob))->ob_type)
+#endif
+#if !defined(Py_SIZE)
+#define Py_SIZE(ob)     (((PyVarObject*)(ob))->ob_size)
+#endif
+#if !defined(PyVarObject_HEAD_INIT)
+#define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
+#endif
+#endif /* PY_VERSION_HEX < 0x02060000 */
+
+#if PY_VERSION_HEX < 0x02050000
+#if !defined(PY_SSIZE_T_MIN)
+typedef int Py_ssize_t;
+#define PY_SSIZE_T_MAX INT_MAX
+#define PY_SSIZE_T_MIN INT_MIN
+#define PyInt_FromSsize_t PyInt_FromLong
+#define PyInt_AsSsize_t PyInt_AsLong
+#endif
+#if !defined(Py_IS_FINITE)
+#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
+#endif
+#endif /* PY_VERSION_HEX < 0x02050000 */
+
+#ifdef __GNUC__
+#define UNUSED __attribute__((__unused__))
+#else
+#define UNUSED
+#endif
+
+#define DEFAULT_ENCODING "utf-8"
+
+#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
+#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
+#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
+#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
+
+#define JSON_ALLOW_NAN 1
+#define JSON_IGNORE_NAN 2
+
+static PyTypeObject PyScannerType;
+static PyTypeObject PyEncoderType;
+
+typedef struct {
+    PyObject *large_strings;  /* A list of previously accumulated large strings */
+    PyObject *small_strings;  /* Pending small strings */
+} JSON_Accu;
+
+static int
+JSON_Accu_Init(JSON_Accu *acc);
+static int
+JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode);
+static PyObject *
+JSON_Accu_FinishAsList(JSON_Accu *acc);
+static void
+JSON_Accu_Destroy(JSON_Accu *acc);
+
+#define ERR_EXPECTING_VALUE "Expecting value"
+#define ERR_ARRAY_DELIMITER "Expecting ',' delimiter or ']'"
+#define ERR_ARRAY_VALUE_FIRST "Expecting value or ']'"
+#define ERR_OBJECT_DELIMITER "Expecting ',' delimiter or '}'"
+#define ERR_OBJECT_PROPERTY "Expecting property name enclosed in double quotes"
+#define ERR_OBJECT_PROPERTY_FIRST "Expecting property name enclosed in double quotes or '}'"
+#define ERR_OBJECT_PROPERTY_DELIMITER "Expecting ':' delimiter"
+#define ERR_STRING_UNTERMINATED "Unterminated string starting at"
+#define ERR_STRING_CONTROL "Invalid control character %r at"
+#define ERR_STRING_ESC1 "Invalid \\X escape sequence %r"
+#define ERR_STRING_ESC4 "Invalid \\uXXXX escape sequence"
+
+typedef struct _PyScannerObject {
+    PyObject_HEAD
+    PyObject *encoding;
+    PyObject *strict;
+    PyObject *object_hook;
+    PyObject *pairs_hook;
+    PyObject *parse_float;
+    PyObject *parse_int;
+    PyObject *parse_constant;
+    PyObject *memo;
+} PyScannerObject;
+
+static PyMemberDef scanner_members[] = {
+    {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
+    {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
+    {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
+    {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
+    {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
+    {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
+    {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
+    {NULL}
+};
+
+typedef struct _PyEncoderObject {
+    PyObject_HEAD
+    PyObject *markers;
+    PyObject *defaultfn;
+    PyObject *encoder;
+    PyObject *indent;
+    PyObject *key_separator;
+    PyObject *item_separator;
+    PyObject *sort_keys;
+    PyObject *key_memo;
+    PyObject *encoding;
+    PyObject *Decimal;
+    PyObject *skipkeys_bool;
+    int skipkeys;
+    int fast_encode;
+    /* 0, JSON_ALLOW_NAN, JSON_IGNORE_NAN */
+    int allow_or_ignore_nan;
+    int use_decimal;
+    int namedtuple_as_object;
+    int tuple_as_array;
+    int bigint_as_string;
+    PyObject *item_sort_key;
+    PyObject *item_sort_kw;
+    int for_json;
+} PyEncoderObject;
+
+static PyMemberDef encoder_members[] = {
+    {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
+    {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
+    {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
+    {"encoding", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoding"},
+    {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
+    {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
+    {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
+    {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
+    /* Python 2.5 does not support T_BOOl */
+    {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys_bool), READONLY, "skipkeys"},
+    {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"},
+    {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"},
+    {NULL}
+};
+
+static PyObject *
+join_list_unicode(PyObject *lst);
+static PyObject *
+JSON_ParseEncoding(PyObject *encoding);
+static PyObject *
+JSON_UnicodeFromChar(JSON_UNICHR c);
+static PyObject *
+maybe_quote_bigint(PyObject *encoded, PyObject *obj);
+static Py_ssize_t
+ascii_char_size(JSON_UNICHR c);
+static Py_ssize_t
+ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars);
+static PyObject *
+ascii_escape_unicode(PyObject *pystr);
+static PyObject *
+ascii_escape_str(PyObject *pystr);
+static PyObject *
+py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
+#if PY_MAJOR_VERSION < 3
+static PyObject *
+join_list_string(PyObject *lst);
+static PyObject *
+scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
+static PyObject *
+scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr);
+static PyObject *
+_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
+#endif
+static PyObject *
+scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr);
+static PyObject *
+scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
+static PyObject *
+_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
+static PyObject *
+scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
+static int
+scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
+static void
+scanner_dealloc(PyObject *self);
+static int
+scanner_clear(PyObject *self);
+static PyObject *
+encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
+static int
+encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
+static void
+encoder_dealloc(PyObject *self);
+static int
+encoder_clear(PyObject *self);
+static PyObject *
+encoder_stringify_key(PyEncoderObject *s, PyObject *key);
+static int
+encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level);
+static int
+encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level);
+static int
+encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level);
+static PyObject *
+_encoded_const(PyObject *obj);
+static void
+raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
+static PyObject *
+encoder_encode_string(PyEncoderObject *s, PyObject *obj);
+static int
+_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
+static PyObject *
+_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
+static PyObject *
+encoder_encode_float(PyEncoderObject *s, PyObject *obj);
+static int
+_is_namedtuple(PyObject *obj);
+static int
+_has_for_json_hook(PyObject *obj);
+static PyObject *
+moduleinit(void);
+
+#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
+#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
+
+#define MIN_EXPANSION 6
+
+static int
+JSON_Accu_Init(JSON_Accu *acc)
+{
+    /* Lazily allocated */
+    acc->large_strings = NULL;
+    acc->small_strings = PyList_New(0);
+    if (acc->small_strings == NULL)
+        return -1;
+    return 0;
+}
+
+static int
+flush_accumulator(JSON_Accu *acc)
+{
+    Py_ssize_t nsmall = PyList_GET_SIZE(acc->small_strings);
+    if (nsmall) {
+        int ret;
+        PyObject *joined;
+        if (acc->large_strings == NULL) {
+            acc->large_strings = PyList_New(0);
+            if (acc->large_strings == NULL)
+                return -1;
+        }
+#if PY_MAJOR_VERSION >= 3
+        joined = join_list_unicode(acc->small_strings);
+#else /* PY_MAJOR_VERSION >= 3 */
+        joined = join_list_string(acc->small_strings);
+#endif /* PY_MAJOR_VERSION < 3 */
+        if (joined == NULL)
+            return -1;
+        if (PyList_SetSlice(acc->small_strings, 0, nsmall, NULL)) {
+            Py_DECREF(joined);
+            return -1;
+        }
+        ret = PyList_Append(acc->large_strings, joined);
+        Py_DECREF(joined);
+        return ret;
+    }
+    return 0;
+}
+
+static int
+JSON_Accu_Accumulate(JSON_Accu *acc, PyObject *unicode)
+{
+    Py_ssize_t nsmall;
+#if PY_MAJOR_VERSION >= 3
+    assert(PyUnicode_Check(unicode));
+#else /* PY_MAJOR_VERSION >= 3 */
+    assert(JSON_ASCII_Check(unicode) || PyUnicode_Check(unicode));
+#endif /* PY_MAJOR_VERSION < 3 */
+
+    if (PyList_Append(acc->small_strings, unicode))
+        return -1;
+    nsmall = PyList_GET_SIZE(acc->small_strings);
+    /* Each item in a list of unicode objects has an overhead (in 64-bit
+     * builds) of:
+     *   - 8 bytes for the list slot
+     *   - 56 bytes for the header of the unicode object
+     * that is, 64 bytes.  100000 such objects waste more than 6MB
+     * compared to a single concatenated string.
+     */
+    if (nsmall < 100000)
+        return 0;
+    return flush_accumulator(acc);
+}
+
+static PyObject *
+JSON_Accu_FinishAsList(JSON_Accu *acc)
+{
+    int ret;
+    PyObject *res;
+
+    ret = flush_accumulator(acc);
+    Py_CLEAR(acc->small_strings);
+    if (ret) {
+        Py_CLEAR(acc->large_strings);
+        return NULL;
+    }
+    res = acc->large_strings;
+    acc->large_strings = NULL;
+    if (res == NULL)
+        return PyList_New(0);
+    return res;
+}
+
+static void
+JSON_Accu_Destroy(JSON_Accu *acc)
+{
+    Py_CLEAR(acc->small_strings);
+    Py_CLEAR(acc->large_strings);
+}
+
+static int
+IS_DIGIT(JSON_UNICHR c)
+{
+    return c >= '0' && c <= '9';
+}
+
+static PyObject *
+JSON_UnicodeFromChar(JSON_UNICHR c)
+{
+#if PY_MAJOR_VERSION >= 3
+    PyObject *rval = PyUnicode_New(1, c);
+    if (rval)
+        PyUnicode_WRITE(PyUnicode_KIND(rval), PyUnicode_DATA(rval), 0, c);
+    return rval;
+#else /* PY_MAJOR_VERSION >= 3 */
+    return PyUnicode_FromUnicode(&c, 1);
+#endif /* PY_MAJOR_VERSION < 3 */
+}
+
+static PyObject *
+maybe_quote_bigint(PyObject *encoded, PyObject *obj)
+{
+    static PyObject *big_long = NULL;
+    static PyObject *small_long = NULL;
+    if (big_long == NULL) {
+        big_long = PyLong_FromLongLong(1LL << 53);
+        if (big_long == NULL) {
+            Py_DECREF(encoded);
+            return NULL;
+        }
+    }
+    if (small_long == NULL) {
+        small_long = PyLong_FromLongLong(-1LL << 53);
+        if (small_long == NULL) {
+            Py_DECREF(encoded);
+            return NULL;
+        }
+    }
+    if (PyObject_RichCompareBool(obj, big_long, Py_GE) ||
+        PyObject_RichCompareBool(obj, small_long, Py_LE)) {
+#if PY_MAJOR_VERSION >= 3
+        PyObject* quoted = PyUnicode_FromFormat("\"%U\"", encoded);
+#else
+        PyObject* quoted = PyString_FromFormat("\"%s\"",
+                                               PyString_AsString(encoded));
+#endif
+        Py_DECREF(encoded);
+        encoded = quoted;
+    }
+    return encoded;
+}
+
+static int
+_is_namedtuple(PyObject *obj)
+{
+    int rval = 0;
+    PyObject *_asdict = PyObject_GetAttrString(obj, "_asdict");
+    if (_asdict == NULL) {
+        PyErr_Clear();
+        return 0;
+    }
+    rval = PyCallable_Check(_asdict);
+    Py_DECREF(_asdict);
+    return rval;
+}
+
+static int
+_has_for_json_hook(PyObject *obj)
+{
+    int rval = 0;
+    PyObject *for_json = PyObject_GetAttrString(obj, "for_json");
+    if (for_json == NULL) {
+        PyErr_Clear();
+        return 0;
+    }
+    rval = PyCallable_Check(for_json);
+    Py_DECREF(for_json);
+    return rval;
+}
+
+static int
+_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
+{
+    /* PyObject to Py_ssize_t converter */
+    *size_ptr = PyInt_AsSsize_t(o);
+    if (*size_ptr == -1 && PyErr_Occurred())
+        return 0;
+    return 1;
+}
+
+static PyObject *
+_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
+{
+    /* Py_ssize_t to PyObject converter */
+    return PyInt_FromSsize_t(*size_ptr);
+}
+
+static Py_ssize_t
+ascii_escape_char(JSON_UNICHR c, char *output, Py_ssize_t chars)
+{
+    /* Escape unicode code point c to ASCII escape sequences
+    in char *output. output must have at least 12 bytes unused to
+    accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
+    if (S_CHAR(c)) {
+        output[chars++] = (char)c;
+    }
+    else {
+        output[chars++] = '\\';
+        switch (c) {
+            case '\\': output[chars++] = (char)c; break;
+            case '"': output[chars++] = (char)c; break;
+            case '\b': output[chars++] = 'b'; break;
+            case '\f': output[chars++] = 'f'; break;
+            case '\n': output[chars++] = 'n'; break;
+            case '\r': output[chars++] = 'r'; break;
+            case '\t': output[chars++] = 't'; break;
+            default:
+#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3
+                if (c >= 0x10000) {
+                    /* UTF-16 surrogate pair */
+                    JSON_UNICHR v = c - 0x10000;
+                    c = 0xd800 | ((v >> 10) & 0x3ff);
+                    output[chars++] = 'u';
+                    output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
+                    output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
+                    output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
+                    output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
+                    c = 0xdc00 | (v & 0x3ff);
+                    output[chars++] = '\\';
+                }
+#endif
+                output[chars++] = 'u';
+                output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
+                output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
+                output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
+                output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
+        }
+    }
+    return chars;
+}
+
+static Py_ssize_t
+ascii_char_size(JSON_UNICHR c)
+{
+    if (S_CHAR(c)) {
+        return 1;
+    }
+    else if (c == '\\' ||
+               c == '"'  ||
+               c == '\b' ||
+               c == '\f' ||
+               c == '\n' ||
+               c == '\r' ||
+               c == '\t') {
+        return 2;
+    }
+#if defined(Py_UNICODE_WIDE) || PY_MAJOR_VERSION >= 3
+    else if (c >= 0x10000U) {
+        return 2 * MIN_EXPANSION;
+    }
+#endif
+    else {
+        return MIN_EXPANSION;
+    }
+}
+
+static PyObject *
+ascii_escape_unicode(PyObject *pystr)
+{
+    /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
+    Py_ssize_t i;
+    Py_ssize_t input_chars;
+    Py_ssize_t output_size;
+    Py_ssize_t chars;
+    PY2_UNUSED int kind;
+    void *data;
+    PyObject *rval;
+    char *output;
+
+    if (PyUnicode_READY(pystr))
+        return NULL;
+
+    kind = PyUnicode_KIND(pystr);
+    data = PyUnicode_DATA(pystr);
+    input_chars = PyUnicode_GetLength(pystr);
+    output_size = 2;
+    for (i = 0; i < input_chars; i++) {
+        output_size += ascii_char_size(PyUnicode_READ(kind, data, i));
+    }
+#if PY_MAJOR_VERSION >= 3
+    rval = PyUnicode_New(output_size, 127);
+    if (rval == NULL) {
+        return NULL;
+    }
+    assert(PyUnicode_KIND(rval) == PyUnicode_1BYTE_KIND);
+    output = (char *)PyUnicode_DATA(rval);
+#else
+    rval = PyString_FromStringAndSize(NULL, output_size);
+    if (rval == NULL) {
+        return NULL;
+    }
+    output = PyString_AS_STRING(rval);
+#endif
+    chars = 0;
+    output[chars++] = '"';
+    for (i = 0; i < input_chars; i++) {
+        chars = ascii_escape_char(PyUnicode_READ(kind, data, i), output, chars);
+    }
+    output[chars++] = '"';
+    assert(chars == output_size);
+    return rval;
+}
+
+#if PY_MAJOR_VERSION >= 3
+
+static PyObject *
+ascii_escape_str(PyObject *pystr)
+{
+    PyObject *rval;
+    PyObject *input = PyUnicode_DecodeUTF8(PyString_AS_STRING(pystr), PyString_GET_SIZE(pystr), NULL);
+    if (input == NULL)
+        return NULL;
+    rval = ascii_escape_unicode(input);
+    Py_DECREF(input);
+    return rval;
+}
+
+#else /* PY_MAJOR_VERSION >= 3 */
+
+static PyObject *
+ascii_escape_str(PyObject *pystr)
+{
+    /* Take a PyString pystr and return a new ASCII-only escaped PyString */
+    Py_ssize_t i;
+    Py_ssize_t input_chars;
+    Py_ssize_t output_size;
+    Py_ssize_t chars;
+    PyObject *rval;
+    char *output;
+    char *input_str;
+
+    input_chars = PyString_GET_SIZE(pystr);
+    input_str = PyString_AS_STRING(pystr);
+    output_size = 2;
+
+    /* Fast path for a string that's already ASCII */
+    for (i = 0; i < input_chars; i++) {
+        JSON_UNICHR c = (JSON_UNICHR)input_str[i];
+        if (c > 0x7f) {
+            /* We hit a non-ASCII character, bail to unicode mode */
+            PyObject *uni;
+            uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
+            if (uni == NULL) {
+                return NULL;
+            }
+            rval = ascii_escape_unicode(uni);
+            Py_DECREF(uni);
+            return rval;
+        }
+        output_size += ascii_char_size(c);
+    }
+
+    rval = PyString_FromStringAndSize(NULL, output_size);
+    if (rval == NULL) {
+        return NULL;
+    }
+    chars = 0;
+    output = PyString_AS_STRING(rval);
+    output[chars++] = '"';
+    for (i = 0; i < input_chars; i++) {
+        chars = ascii_escape_char((JSON_UNICHR)input_str[i], output, chars);
+    }
+    output[chars++] = '"';
+    assert(chars == output_size);
+    return rval;
+}
+#endif /* PY_MAJOR_VERSION < 3 */
+
+static PyObject *
+encoder_stringify_key(PyEncoderObject *s, PyObject *key)
+{
+    if (PyUnicode_Check(key)) {
+        Py_INCREF(key);
+        return key;
+    }
+    else if (PyString_Check(key)) {
+#if PY_MAJOR_VERSION >= 3
+        return PyUnicode_Decode(
+            PyString_AS_STRING(key),
+            PyString_GET_SIZE(key),
+            JSON_ASCII_AS_STRING(s->encoding),
+            NULL);
+#else /* PY_MAJOR_VERSION >= 3 */
+        Py_INCREF(key);
+        return key;
+#endif /* PY_MAJOR_VERSION < 3 */
+    }
+    else if (PyFloat_Check(key)) {
+        return encoder_encode_float(s, key);
+    }
+    else if (key == Py_True || key == Py_False || key == Py_None) {
+        /* This must come before the PyInt_Check because
+           True and False are also 1 and 0.*/
+        return _encoded_const(key);
+    }
+    else if (PyInt_Check(key) || PyLong_Check(key)) {
+        return PyObject_Str(key);
+    }
+    else if (s->use_decimal && PyObject_TypeCheck(key, (PyTypeObject *)s->Decimal)) {
+        return PyObject_Str(key);
+    }
+    else if (s->skipkeys) {
+        Py_INCREF(Py_None);
+        return Py_None;
+    }
+    PyErr_SetString(PyExc_TypeError, "keys must be a string");
+    return NULL;
+}
+
+static PyObject *
+encoder_dict_iteritems(PyEncoderObject *s, PyObject *dct)
+{
+    PyObject *items;
+    PyObject *iter = NULL;
+    PyObject *lst = NULL;
+    PyObject *item = NULL;
+    PyObject *kstr = NULL;
+    static PyObject *sortfun = NULL;
+    static PyObject *sortargs = NULL;
+
+    if (sortargs == NULL) {
+        sortargs = PyTuple_New(0);
+        if (sortargs == NULL)
+            return NULL;
+    }
+
+    if (PyDict_CheckExact(dct))
+        items = PyDict_Items(dct);
+    else
+        items = PyMapping_Items(dct);
+    if (items == NULL)
+        return NULL;
+    iter = PyObject_GetIter(items);
+    Py_DECREF(items);
+    if (iter == NULL)
+        return NULL;
+    if (s->item_sort_kw == Py_None)
+        return iter;
+    lst = PyList_New(0);
+    if (lst == NULL)
+        goto bail;
+    while ((item = PyIter_Next(iter))) {
+        PyObject *key, *value;
+        if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
+            PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
+            goto bail;
+        }
+        key = PyTuple_GET_ITEM(item, 0);
+        if (key == NULL)
+            goto bail;
+#if PY_MAJOR_VERSION < 3
+        else if (PyString_Check(key)) {
+            /* item can be added as-is */
+        }
+#endif /* PY_MAJOR_VERSION < 3 */
+        else if (PyUnicode_Check(key)) {
+            /* item can be added as-is */
+        }
+        else {
+            PyObject *tpl;
+            kstr = encoder_stringify_key(s, key);
+            if (kstr == NULL)
+                goto bail;
+            else if (kstr == Py_None) {
+                /* skipkeys */
+                Py_DECREF(kstr);
+                continue;
+            }
+            value = PyTuple_GET_ITEM(item, 1);
+            if (value == NULL)
+                goto bail;
+            tpl = PyTuple_Pack(2, kstr, value);
+            if (tpl == NULL)
+                goto bail;
+            Py_CLEAR(kstr);
+            Py_DECREF(item);
+            item = tpl;
+        }
+        if (PyList_Append(lst, item))
+            goto bail;
+        Py_DECREF(item);
+    }
+    Py_CLEAR(iter);
+    if (PyErr_Occurred())
+        goto bail;
+    sortfun = PyObject_GetAttrString(lst, "sort");
+    if (sortfun == NULL)
+        goto bail;
+    if (!PyObject_Call(sortfun, sortargs, s->item_sort_kw))
+        goto bail;
+    Py_CLEAR(sortfun);
+    iter = PyObject_GetIter(lst);
+    Py_CLEAR(lst);
+    return iter;
+bail:
+    Py_XDECREF(sortfun);
+    Py_XDECREF(kstr);
+    Py_XDECREF(item);
+    Py_XDECREF(lst);
+    Py_XDECREF(iter);
+    return NULL;
+}
+
+static void
+raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
+{
+    /* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
+    static PyObject *JSONDecodeError = NULL;
+    PyObject *exc;
+    if (JSONDecodeError == NULL) {
+        PyObject *scanner = PyImport_ImportModule("simplejson.scanner");
+        if (scanner == NULL)
+            return;
+        JSONDecodeError = PyObject_GetAttrString(scanner, "JSONDecodeError");
+        Py_DECREF(scanner);
+        if (JSONDecodeError == NULL)
+            return;
+    }
+    exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
+    if (exc) {
+        PyErr_SetObject(JSONDecodeError, exc);
+        Py_DECREF(exc);
+    }
+}
+
+static PyObject *
+join_list_unicode(PyObject *lst)
+{
+    /* return u''.join(lst) */
+    static PyObject *joinfn = NULL;
+    if (joinfn == NULL) {
+        PyObject *ustr = JSON_NewEmptyUnicode();
+        if (ustr == NULL)
+            return NULL;
+
+        joinfn = PyObject_GetAttrString(ustr, "join");
+        Py_DECREF(ustr);
+        if (joinfn == NULL)
+            return NULL;
+    }
+    return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
+}
+
+#if PY_MAJOR_VERSION >= 3
+#define join_list_string join_list_unicode
+#else /* PY_MAJOR_VERSION >= 3 */
+static PyObject *
+join_list_string(PyObject *lst)
+{
+    /* return ''.join(lst) */
+    static PyObject *joinfn = NULL;
+    if (joinfn == NULL) {
+        PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
+        if (ustr == NULL)
+            return NULL;
+
+        joinfn = PyObject_GetAttrString(ustr, "join");
+        Py_DECREF(ustr);
+        if (joinfn == NULL)
+            return NULL;
+    }
+    return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
+}
+#endif /* PY_MAJOR_VERSION < 3 */
+
+static PyObject *
+_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx)
+{
+    /* return (rval, idx) tuple, stealing reference to rval */
+    PyObject *tpl;
+    PyObject *pyidx;
+    /*
+    steal a reference to rval, returns (rval, idx)
+    */
+    if (rval == NULL) {
+        assert(PyErr_Occurred());
+        return NULL;
+    }
+    pyidx = PyInt_FromSsize_t(idx);
+    if (pyidx == NULL) {
+        Py_DECREF(rval);
+        return NULL;
+    }
+    tpl = PyTuple_New(2);
+    if (tpl == NULL) {
+        Py_DECREF(pyidx);
+        Py_DECREF(rval);
+        return NULL;
+    }
+    PyTuple_SET_ITEM(tpl, 0, rval);
+    PyTuple_SET_ITEM(tpl, 1, pyidx);
+    return tpl;
+}
+
+#define APPEND_OLD_CHUNK \
+    if (chunk != NULL) { \
+        if (chunks == NULL) { \
+            chunks = PyList_New(0); \
+            if (chunks == NULL) { \
+                goto bail; \
+            } \
+        } \
+        if (PyList_Append(chunks, chunk)) { \
+            goto bail; \
+        } \
+        Py_CLEAR(chunk); \
+    }
+
+#if PY_MAJOR_VERSION < 3
+static PyObject *
+scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
+{
+    /* Read the JSON string from PyString pystr.
+    end is the index of the first character after the quote.
+    encoding is the encoding of pystr (must be an ASCII superset)
+    if strict is zero then literal control characters are allowed
+    *next_end_ptr is a return-by-reference index of the character
+        after the end quote
+
+    Return value is a new PyString (if ASCII-only) or PyUnicode
+    */
+    PyObject *rval;
+    Py_ssize_t len = PyString_GET_SIZE(pystr);
+    Py_ssize_t begin = end - 1;
+    Py_ssize_t next = begin;
+    int has_unicode = 0;
+    char *buf = PyString_AS_STRING(pystr);
+    PyObject *chunks = NULL;
+    PyObject *chunk = NULL;
+    PyObject *strchunk = NULL;
+
+    if (len == end) {
+        raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
+        goto bail;
+    }
+    else if (end < 0 || len < end) {
+        PyErr_SetString(PyExc_ValueError, "end is out of bounds");
+        goto bail;
+    }
+    while (1) {
+        /* Find the end of the string or the next escape */
+        Py_UNICODE c = 0;
+        for (next = end; next < len; next++) {
+            c = (unsigned char)buf[next];
+            if (c == '"' || c == '\\') {
+                break;
+            }
+            else if (strict && c <= 0x1f) {
+                raise_errmsg(ERR_STRING_CONTROL, pystr, next);
+                goto bail;
+            }
+            else if (c > 0x7f) {
+                has_unicode = 1;
+            }
+        }
+        if (!(c == '"' || c == '\\')) {
+            raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
+            goto bail;
+        }
+        /* Pick up this chunk if it's not zero length */
+        if (next != end) {
+            APPEND_OLD_CHUNK
+#if PY_MAJOR_VERSION >= 3
+            if (!has_unicode) {
+                chunk = PyUnicode_DecodeASCII(&buf[end], next - end, NULL);
+            }
+            else {
+                chunk = PyUnicode_Decode(&buf[end], next - end, encoding, NULL);
+            }
+            if (chunk == NULL) {
+                goto bail;
+            }
+#else /* PY_MAJOR_VERSION >= 3 */
+            strchunk = PyString_FromStringAndSize(&buf[end], next - end);
+            if (strchunk == NULL) {
+                goto bail;
+            }
+            if (has_unicode) {
+                chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
+                Py_DECREF(strchunk);
+                if (chunk == NULL) {
+                    goto bail;
+                }
+            }
+            else {
+                chunk = strchunk;
+            }
+#endif /* PY_MAJOR_VERSION < 3 */
+        }
+        next++;
+        if (c == '"') {
+            end = next;
+            break;
+        }
+        if (next == len) {
+            raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
+            goto bail;
+        }
+        c = buf[next];
+        if (c != 'u') {
+            /* Non-unicode backslash escapes */
+            end = next + 1;
+            switch (c) {
+                case '"': break;
+                case '\\': break;
+                case '/': break;
+                case 'b': c = '\b'; break;
+                case 'f': c = '\f'; break;
+                case 'n': c = '\n'; break;
+                case 'r': c = '\r'; break;
+                case 't': c = '\t'; break;
+                default: c = 0;
+            }
+            if (c == 0) {
+                raise_errmsg(ERR_STRING_ESC1, pystr, end - 2);
+                goto bail;
+            }
+        }
+        else {
+            c = 0;
+            next++;
+            end = next + 4;
+            if (end >= len) {
+                raise_errmsg(ERR_STRING_ESC4, pystr, next - 1);
+                goto bail;
+            }
+            /* Decode 4 hex digits */
+            for (; next < end; next++) {
+                JSON_UNICHR digit = (JSON_UNICHR)buf[next];
+                c <<= 4;
+                switch (digit) {
+                    case '0': case '1': case '2': case '3': case '4':
+                    case '5': case '6': case '7': case '8': case '9':
+                        c |= (digit - '0'); break;
+                    case 'a': case 'b': case 'c': case 'd': case 'e':
+                    case 'f':
+                        c |= (digit - 'a' + 10); break;
+                    case 'A': case 'B': case 'C': case 'D': case 'E':
+                    case 'F':
+                        c |= (digit - 'A' + 10); break;
+                    default:
+                        raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
+                        goto bail;
+                }
+            }
+#if (PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE))
+            /* Surrogate pair */
+            if ((c & 0xfc00) == 0xd800) {
+                if (end + 6 < len && buf[next] == '\\' && buf[next+1] == 'u') {
+                   JSON_UNICHR c2 = 0;
+                   end += 6;
+                   /* Decode 4 hex digits */
+                   for (next += 2; next < end; next++) {
+                       c2 <<= 4;
+                       JSON_UNICHR digit = buf[next];
+                       switch (digit) {
+                        case '0': case '1': case '2': case '3': case '4':
+                        case '5': case '6': case '7': case '8': case '9':
+                            c2 |= (digit - '0'); break;
+                        case 'a': case 'b': case 'c': case 'd': case 'e':
+                        case 'f':
+                            c2 |= (digit - 'a' + 10); break;
+                        case 'A': case 'B': case 'C': case 'D': case 'E':
+                        case 'F':
+                            c2 |= (digit - 'A' + 10); break;
+                        default:
+                            raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
+                            goto bail;
+                       }
+                   }
+                   if ((c2 & 0xfc00) != 0xdc00) {
+                       /* not a low surrogate, rewind */
+                       end -= 6;
+                       next = end;
+                   }
+                   else {
+                       c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
+                   }
+               }
+           }
+#endif /* PY_MAJOR_VERSION >= 3 || Py_UNICODE_WIDE */
+        }
+        if (c > 0x7f) {
+            has_unicode = 1;
+        }
+        APPEND_OLD_CHUNK
+#if PY_MAJOR_VERSION >= 3
+        chunk = JSON_UnicodeFromChar(c);
+        if (chunk == NULL) {
+            goto bail;
+        }
+#else /* PY_MAJOR_VERSION >= 3 */
+        if (has_unicode) {
+            chunk = JSON_UnicodeFromChar(c);
+            if (chunk == NULL) {
+                goto bail;
+            }
+        }
+        else {
+            char c_char = Py_CHARMASK(c);
+            chunk = PyString_FromStringAndSize(&c_char, 1);
+            if (chunk == NULL) {
+                goto bail;
+            }
+        }
+#endif
+    }
+
+    if (chunks == NULL) {
+        if (chunk != NULL)
+            rval = chunk;
+        else
+            rval = JSON_NewEmptyUnicode();
+    }
+    else {
+        APPEND_OLD_CHUNK
+        rval = join_list_string(chunks);
+        if (rval == NULL) {
+            goto bail;
+        }
+        Py_CLEAR(chunks);
+    }
+
+    *next_end_ptr = end;
+    return rval;
+bail:
+    *next_end_ptr = -1;
+    Py_XDECREF(chunk);
+    Py_XDECREF(chunks);
+    return NULL;
+}
+#endif /* PY_MAJOR_VERSION < 3 */
+
+static PyObject *
+scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
+{
+    /* Read the JSON string from PyUnicode pystr.
+    end is the index of the first character after the quote.
+    if strict is zero then literal control characters are allowed
+    *next_end_ptr is a return-by-reference index of the character
+        after the end quote
+
+    Return value is a new PyUnicode
+    */
+    PyObject *rval;
+    Py_ssize_t begin = end - 1;
+    Py_ssize_t next = begin;
+    PY2_UNUSED int kind = PyUnicode_KIND(pystr);
+    Py_ssize_t len = PyUnicode_GetLength(pystr);
+    void *buf = PyUnicode_DATA(pystr);
+    PyObject *chunks = NULL;
+    PyObject *chunk = NULL;
+
+    if (len == end) {
+        raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
+        goto bail;
+    }
+    else if (end < 0 || len < end) {
+        PyErr_SetString(PyExc_ValueError, "end is out of bounds");
+        goto bail;
+    }
+    while (1) {
+        /* Find the end of the string or the next escape */
+        JSON_UNICHR c = 0;
+        for (next = end; next < len; next++) {
+            c = PyUnicode_READ(kind, buf, next);
+            if (c == '"' || c == '\\') {
+                break;
+            }
+            else if (strict && c <= 0x1f) {
+                raise_errmsg(ERR_STRING_CONTROL, pystr, next);
+                goto bail;
+            }
+        }
+        if (!(c == '"' || c == '\\')) {
+            raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
+            goto bail;
+        }
+        /* Pick up this chunk if it's not zero length */
+        if (next != end) {
+            APPEND_OLD_CHUNK
+#if PY_MAJOR_VERSION < 3
+            chunk = PyUnicode_FromUnicode(&((const Py_UNICODE *)buf)[end], next - end);
+#else
+            chunk = PyUnicode_Substring(pystr, end, next);
+#endif
+            if (chunk == NULL) {
+                goto bail;
+            }
+        }
+        next++;
+        if (c == '"') {
+            end = next;
+            break;
+        }
+        if (next == len) {
+            raise_errmsg(ERR_STRING_UNTERMINATED, pystr, begin);
+            goto bail;
+        }
+        c = PyUnicode_READ(kind, buf, next);
+        if (c != 'u') {
+            /* Non-unicode backslash escapes */
+            end = next + 1;
+            switch (c) {
+                case '"': break;
+                case '\\': break;
+                case '/': break;
+                case 'b': c = '\b'; break;
+                case 'f': c = '\f'; break;
+                case 'n': c = '\n'; break;
+                case 'r': c = '\r'; break;
+                case 't': c = '\t'; break;
+                default: c = 0;
+            }
+            if (c == 0) {
+                raise_errmsg(ERR_STRING_ESC1, pystr, end - 2);
+                goto bail;
+            }
+        }
+        else {
+            c = 0;
+            next++;
+            end = next + 4;
+            if (end >= len) {
+                raise_errmsg(ERR_STRING_ESC4, pystr, next - 1);
+                goto bail;
+            }
+            /* Decode 4 hex digits */
+            for (; next < end; next++) {
+                JSON_UNICHR digit = PyUnicode_READ(kind, buf, next);
+                c <<= 4;
+                switch (digit) {
+                    case '0': case '1': case '2': case '3': case '4':
+                    case '5': case '6': case '7': case '8': case '9':
+                        c |= (digit - '0'); break;
+                    case 'a': case 'b': case 'c': case 'd': case 'e':
+                    case 'f':
+                        c |= (digit - 'a' + 10); break;
+                    case 'A': case 'B': case 'C': case 'D': case 'E':
+                    case 'F':
+                        c |= (digit - 'A' + 10); break;
+                    default:
+                        raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
+                        goto bail;
+                }
+            }
+#if PY_MAJOR_VERSION >= 3 || defined(Py_UNICODE_WIDE)
+            /* Surrogate pair */
+            if ((c & 0xfc00) == 0xd800) {
+                JSON_UNICHR c2 = 0;
+               if (end + 6 < len &&
+                   PyUnicode_READ(kind, buf, next) == '\\' &&
+                   PyUnicode_READ(kind, buf, next + 1) == 'u') {
+                   end += 6;
+                   /* Decode 4 hex digits */
+                   for (next += 2; next < end; next++) {
+                       JSON_UNICHR digit = PyUnicode_READ(kind, buf, next);
+                       c2 <<= 4;
+                       switch (digit) {
+                        case '0': case '1': case '2': case '3': case '4':
+                        case '5': case '6': case '7': case '8': case '9':
+                            c2 |= (digit - '0'); break;
+                        case 'a': case 'b': case 'c': case 'd': case 'e':
+                        case 'f':
+                            c2 |= (digit - 'a' + 10); break;
+                        case 'A': case 'B': case 'C': case 'D': case 'E':
+                        case 'F':
+                            c2 |= (digit - 'A' + 10); break;
+                        default:
+                            raise_errmsg(ERR_STRING_ESC4, pystr, end - 5);
+                            goto bail;
+                       }
+                   }
+                   if ((c2 & 0xfc00) != 0xdc00) {
+                       /* not a low surrogate, rewind */
+                       end -= 6;
+                       next = end;
+                   }
+                   else {
+                       c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
+                   }
+               }
+           }
+#endif
+        }
+        APPEND_OLD_CHUNK
+        chunk = JSON_UnicodeFromChar(c);
+        if (chunk == NULL) {
+            goto bail;
+        }
+    }
+
+    if (chunks == NULL) {
+        if (chunk != NULL)
+            rval = chunk;
+        else
+            rval = JSON_NewEmptyUnicode();
+    }
+    else {
+        APPEND_OLD_CHUNK
+        rval = join_list_unicode(chunks);
+        if (rval == NULL) {
+            goto bail;
+        }
+        Py_CLEAR(chunks);
+    }
+    *next_end_ptr = end;
+    return rval;
+bail:
+    *next_end_ptr = -1;
+    Py_XDECREF(chunk);
+    Py_XDECREF(chunks);
+    return NULL;
+}
+
+PyDoc_STRVAR(pydoc_scanstring,
+    "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
+    "\n"
+    "Scan the string s for a JSON string. End is the index of the\n"
+    "character in s after the quote that started the JSON string.\n"
+    "Unescapes all valid JSON string escape sequences and raises ValueError\n"
+    "on attempt to decode an invalid string. If strict is False then literal\n"
+    "control characters are allowed in the string.\n"
+    "\n"
+    "Returns a tuple of the decoded string and the index of the character in s\n"
+    "after the end quote."
+);
+
+static PyObject *
+py_scanstring(PyObject* self UNUSED, PyObject *args)
+{
+    PyObject *pystr;
+    PyObject *rval;
+    Py_ssize_t end;
+    Py_ssize_t next_end = -1;
+    char *encoding = NULL;
+    int strict = 1;
+    if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
+        return NULL;
+    }
+    if (encoding == NULL) {
+        encoding = DEFAULT_ENCODING;
+    }
+    if (PyUnicode_Check(pystr)) {
+        rval = scanstring_unicode(pystr, end, strict, &next_end);
+    }
+#if PY_MAJOR_VERSION < 3
+    /* Using a bytes input is unsupported for scanning in Python 3.
+       It is coerced to str in the decoder before it gets here. */
+    else if (PyString_Check(pystr)) {
+        rval = scanstring_str(pystr, end, encoding, strict, &next_end);
+    }
+#endif
+    else {
+        PyErr_Format(PyExc_TypeError,
+                     "first argument must be a string, not %.80s",
+                     Py_TYPE(pystr)->tp_name);
+        return NULL;
+    }
+    return _build_rval_index_tuple(rval, next_end);
+}
+
+PyDoc_STRVAR(pydoc_encode_basestring_ascii,
+    "encode_basestring_ascii(basestring) -> str\n"
+    "\n"
+    "Return an ASCII-only JSON representation of a Python string"
+);
+
+static PyObject *
+py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
+{
+    /* Return an ASCII-only JSON representation of a Python string */
+    /* METH_O */
+    if (PyString_Check(pystr)) {
+        return ascii_escape_str(pystr);
+    }
+    else if (PyUnicode_Check(pystr)) {
+        return ascii_escape_unicode(pystr);
+    }
+    else {
+        PyErr_Format(PyExc_TypeError,
+                     "first argument must be a string, not %.80s",
+                     Py_TYPE(pystr)->tp_name);
+        return NULL;
+    }
+}
+
+static void
+scanner_dealloc(PyObject *self)
+{
+    /* Deallocate scanner object */
+    scanner_clear(self);
+    Py_TYPE(self)->tp_free(self);
+}
+
+static int
+scanner_traverse(PyObject *self, visitproc visit, void *arg)
+{
+    PyScannerObject *s;
+    assert(PyScanner_Check(self));
+    s = (PyScannerObject *)self;
+    Py_VISIT(s->encoding);
+    Py_VISIT(s->strict);
+    Py_VISIT(s->object_hook);
+    Py_VISIT(s->pairs_hook);
+    Py_VISIT(s->parse_float);
+    Py_VISIT(s->parse_int);
+    Py_VISIT(s->parse_constant);
+    Py_VISIT(s->memo);
+    return 0;
+}
+
+static int
+scanner_clear(PyObject *self)
+{
+    PyScannerObject *s;
+    assert(PyScanner_Check(self));
+    s = (PyScannerObject *)self;
+    Py_CLEAR(s->encoding);
+    Py_CLEAR(s->strict);
+    Py_CLEAR(s->object_hook);
+    Py_CLEAR(s->pairs_hook);
+    Py_CLEAR(s->parse_float);
+    Py_CLEAR(s->parse_int);
+    Py_CLEAR(s->parse_constant);
+    Py_CLEAR(s->memo);
+    return 0;
+}
+
+#if PY_MAJOR_VERSION < 3
+static PyObject *
+_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
+    /* Read a JSON object from PyString pystr.
+    idx is the index of the first character after the opening curly brace.
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the closing curly brace.
+
+    Returns a new PyObject (usually a dict, but object_hook or
+    object_pairs_hook can change that)
+    */
+    char *str = PyString_AS_STRING(pystr);
+    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
+    PyObject *rval = NULL;
+    PyObject *pairs = NULL;
+    PyObject *item;
+    PyObject *key = NULL;
+    PyObject *val = NULL;
+    char *encoding = JSON_ASCII_AS_STRING(s->encoding);
+    int strict = PyObject_IsTrue(s->strict);
+    int has_pairs_hook = (s->pairs_hook != Py_None);
+    int did_parse = 0;
+    Py_ssize_t next_idx;
+    if (has_pairs_hook) {
+        pairs = PyList_New(0);
+        if (pairs == NULL)
+            return NULL;
+    }
+    else {
+        rval = PyDict_New();
+        if (rval == NULL)
+            return NULL;
+    }
+
+    /* skip whitespace after { */
+    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+    /* only loop if the object is non-empty */
+    if (idx <= end_idx && str[idx] != '}') {
+       int trailing_delimiter = 0;
+        while (idx <= end_idx) {
+            PyObject *memokey;
+           trailing_delimiter = 0;
+
+            /* read key */
+            if (str[idx] != '"') {
+                raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
+                goto bail;
+            }
+            key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
+            if (key == NULL)
+                goto bail;
+            memokey = PyDict_GetItem(s->memo, key);
+            if (memokey != NULL) {
+                Py_INCREF(memokey);
+                Py_DECREF(key);
+                key = memokey;
+            }
+            else {
+                if (PyDict_SetItem(s->memo, key, key) < 0)
+                    goto bail;
+            }
+            idx = next_idx;
+
+            /* skip whitespace between key and : delimiter, read :, skip whitespace */
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+            if (idx > end_idx || str[idx] != ':') {
+                raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx);
+                goto bail;
+            }
+            idx++;
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+            /* read any JSON data type */
+            val = scan_once_str(s, pystr, idx, &next_idx);
+            if (val == NULL)
+                goto bail;
+
+            if (has_pairs_hook) {
+                item = PyTuple_Pack(2, key, val);
+                if (item == NULL)
+                    goto bail;
+                Py_CLEAR(key);
+                Py_CLEAR(val);
+                if (PyList_Append(pairs, item) == -1) {
+                    Py_DECREF(item);
+                    goto bail;
+                }
+                Py_DECREF(item);
+            }
+            else {
+                if (PyDict_SetItem(rval, key, val) < 0)
+                    goto bail;
+                Py_CLEAR(key);
+                Py_CLEAR(val);
+            }
+            idx = next_idx;
+
+            /* skip whitespace before } or , */
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+            /* bail if the object is closed or we didn't get the , delimiter */
+           did_parse = 1;
+            if (idx > end_idx) break;
+            if (str[idx] == '}') {
+                break;
+            }
+            else if (str[idx] != ',') {
+                raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
+                goto bail;
+            }
+            idx++;
+
+            /* skip whitespace after , delimiter */
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+           trailing_delimiter = 1;
+        }
+       if (trailing_delimiter) {
+           raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
+           goto bail;
+       }
+    }
+    /* verify that idx < end_idx, str[idx] should be '}' */
+    if (idx > end_idx || str[idx] != '}') {
+       if (did_parse) {
+           raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
+       } else {
+           raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx);
+       }
+        goto bail;
+    }
+
+    /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
+    if (s->pairs_hook != Py_None) {
+        val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
+        if (val == NULL)
+            goto bail;
+        Py_DECREF(pairs);
+        *next_idx_ptr = idx + 1;
+        return val;
+    }
+
+    /* if object_hook is not None: rval = object_hook(rval) */
+    if (s->object_hook != Py_None) {
+        val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
+        if (val == NULL)
+            goto bail;
+        Py_DECREF(rval);
+        rval = val;
+        val = NULL;
+    }
+    *next_idx_ptr = idx + 1;
+    return rval;
+bail:
+    Py_XDECREF(rval);
+    Py_XDECREF(key);
+    Py_XDECREF(val);
+    Py_XDECREF(pairs);
+    return NULL;
+}
+#endif /* PY_MAJOR_VERSION < 3 */
+
+static PyObject *
+_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
+    /* Read a JSON object from PyUnicode pystr.
+    idx is the index of the first character after the opening curly brace.
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the closing curly brace.
+
+    Returns a new PyObject (usually a dict, but object_hook can change that)
+    */
+    void *str = PyUnicode_DATA(pystr);
+    Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1;
+    PY2_UNUSED int kind = PyUnicode_KIND(pystr);
+    PyObject *rval = NULL;
+    PyObject *pairs = NULL;
+    PyObject *item;
+    PyObject *key = NULL;
+    PyObject *val = NULL;
+    int strict = PyObject_IsTrue(s->strict);
+    int has_pairs_hook = (s->pairs_hook != Py_None);
+    int did_parse = 0;
+    Py_ssize_t next_idx;
+
+    if (has_pairs_hook) {
+        pairs = PyList_New(0);
+        if (pairs == NULL)
+            return NULL;
+    }
+    else {
+        rval = PyDict_New();
+        if (rval == NULL)
+            return NULL;
+    }
+
+    /* skip whitespace after { */
+    while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
+
+    /* only loop if the object is non-empty */
+    if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') {
+       int trailing_delimiter = 0;
+        while (idx <= end_idx) {
+            PyObject *memokey;
+           trailing_delimiter = 0;
+
+            /* read key */
+            if (PyUnicode_READ(kind, str, idx) != '"') {
+                raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
+                goto bail;
+            }
+            key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
+            if (key == NULL)
+                goto bail;
+            memokey = PyDict_GetItem(s->memo, key);
+            if (memokey != NULL) {
+                Py_INCREF(memokey);
+                Py_DECREF(key);
+                key = memokey;
+            }
+            else {
+                if (PyDict_SetItem(s->memo, key, key) < 0)
+                    goto bail;
+            }
+            idx = next_idx;
+
+            /* skip whitespace between key and : delimiter, read :, skip
+               whitespace */
+            while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
+            if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
+                raise_errmsg(ERR_OBJECT_PROPERTY_DELIMITER, pystr, idx);
+                goto bail;
+            }
+            idx++;
+            while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
+
+            /* read any JSON term */
+            val = scan_once_unicode(s, pystr, idx, &next_idx);
+            if (val == NULL)
+                goto bail;
+
+            if (has_pairs_hook) {
+                item = PyTuple_Pack(2, key, val);
+                if (item == NULL)
+                    goto bail;
+                Py_CLEAR(key);
+                Py_CLEAR(val);
+                if (PyList_Append(pairs, item) == -1) {
+                    Py_DECREF(item);
+                    goto bail;
+                }
+                Py_DECREF(item);
+            }
+            else {
+                if (PyDict_SetItem(rval, key, val) < 0)
+                    goto bail;
+                Py_CLEAR(key);
+                Py_CLEAR(val);
+            }
+            idx = next_idx;
+
+            /* skip whitespace before } or , */
+            while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
+
+            /* bail if the object is closed or we didn't get the ,
+               delimiter */
+           did_parse = 1;
+            if (idx > end_idx) break;
+            if (PyUnicode_READ(kind, str, idx) == '}') {
+                break;
+            }
+            else if (PyUnicode_READ(kind, str, idx) != ',') {
+                raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
+                goto bail;
+            }
+            idx++;
+
+            /* skip whitespace after , delimiter */
+            while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
+           trailing_delimiter = 1;
+        }
+       if (trailing_delimiter) {
+           raise_errmsg(ERR_OBJECT_PROPERTY, pystr, idx);
+           goto bail;
+       }
+    }
+
+    /* verify that idx < end_idx, str[idx] should be '}' */
+    if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
+       if (did_parse) {
+           raise_errmsg(ERR_OBJECT_DELIMITER, pystr, idx);
+       } else {
+           raise_errmsg(ERR_OBJECT_PROPERTY_FIRST, pystr, idx);
+       }
+        goto bail;
+    }
+
+    /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
+    if (s->pairs_hook != Py_None) {
+        val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
+        if (val == NULL)
+            goto bail;
+        Py_DECREF(pairs);
+        *next_idx_ptr = idx + 1;
+        return val;
+    }
+
+    /* if object_hook is not None: rval = object_hook(rval) */
+    if (s->object_hook != Py_None) {
+        val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
+        if (val == NULL)
+            goto bail;
+        Py_DECREF(rval);
+        rval = val;
+        val = NULL;
+    }
+    *next_idx_ptr = idx + 1;
+    return rval;
+bail:
+    Py_XDECREF(rval);
+    Py_XDECREF(key);
+    Py_XDECREF(val);
+    Py_XDECREF(pairs);
+    return NULL;
+}
+
+#if PY_MAJOR_VERSION < 3
+static PyObject *
+_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
+    /* Read a JSON array from PyString pystr.
+    idx is the index of the first character after the opening brace.
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the closing brace.
+
+    Returns a new PyList
+    */
+    char *str = PyString_AS_STRING(pystr);
+    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
+    PyObject *val = NULL;
+    PyObject *rval = PyList_New(0);
+    Py_ssize_t next_idx;
+    if (rval == NULL)
+        return NULL;
+
+    /* skip whitespace after [ */
+    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+    /* only loop if the array is non-empty */
+    if (idx <= end_idx && str[idx] != ']') {
+       int trailing_delimiter = 0;
+        while (idx <= end_idx) {
+           trailing_delimiter = 0;
+            /* read any JSON term and de-tuplefy the (rval, idx) */
+            val = scan_once_str(s, pystr, idx, &next_idx);
+            if (val == NULL) {
+                goto bail;
+            }
+
+            if (PyList_Append(rval, val) == -1)
+                goto bail;
+
+            Py_CLEAR(val);
+            idx = next_idx;
+
+            /* skip whitespace between term and , */
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+            /* bail if the array is closed or we didn't get the , delimiter */
+            if (idx > end_idx) break;
+            if (str[idx] == ']') {
+                break;
+            }
+            else if (str[idx] != ',') {
+                raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
+                goto bail;
+            }
+            idx++;
+
+            /* skip whitespace after , */
+            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+           trailing_delimiter = 1;
+        }
+       if (trailing_delimiter) {
+           raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+           goto bail;
+       }
+    }
+
+    /* verify that idx < end_idx, str[idx] should be ']' */
+    if (idx > end_idx || str[idx] != ']') {
+       if (PyList_GET_SIZE(rval)) {
+           raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
+       } else {
+           raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx);
+       }
+        goto bail;
+    }
+    *next_idx_ptr = idx + 1;
+    return rval;
+bail:
+    Py_XDECREF(val);
+    Py_DECREF(rval);
+    return NULL;
+}
+#endif /* PY_MAJOR_VERSION < 3 */
+
+static PyObject *
+_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
+    /* Read a JSON array from PyString pystr.
+    idx is the index of the first character after the opening brace.
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the closing brace.
+
+    Returns a new PyList
+    */
+    PY2_UNUSED int kind = PyUnicode_KIND(pystr);
+    void *str = PyUnicode_DATA(pystr);
+    Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1;
+    PyObject *val = NULL;
+    PyObject *rval = PyList_New(0);
+    Py_ssize_t next_idx;
+    if (rval == NULL)
+        return NULL;
+
+    /* skip whitespace after [ */
+    while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
+
+    /* only loop if the array is non-empty */
+    if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') {
+       int trailing_delimiter = 0;
+        while (idx <= end_idx) {
+           trailing_delimiter = 0;
+            /* read any JSON term  */
+            val = scan_once_unicode(s, pystr, idx, &next_idx);
+            if (val == NULL) {
+                goto bail;
+            }
+
+            if (PyList_Append(rval, val) == -1)
+                goto bail;
+
+            Py_CLEAR(val);
+            idx = next_idx;
+
+            /* skip whitespace between term and , */
+            while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
+
+            /* bail if the array is closed or we didn't get the , delimiter */
+            if (idx > end_idx) break;
+            if (PyUnicode_READ(kind, str, idx) == ']') {
+                break;
+            }
+            else if (PyUnicode_READ(kind, str, idx) != ',') {
+                raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
+                goto bail;
+            }
+            idx++;
+
+            /* skip whitespace after , */
+            while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
+           trailing_delimiter = 1;
+        }
+       if (trailing_delimiter) {
+           raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+           goto bail;
+       }
+    }
+
+    /* verify that idx < end_idx, str[idx] should be ']' */
+    if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
+       if (PyList_GET_SIZE(rval)) {
+           raise_errmsg(ERR_ARRAY_DELIMITER, pystr, idx);
+       } else {
+           raise_errmsg(ERR_ARRAY_VALUE_FIRST, pystr, idx);
+       }
+        goto bail;
+    }
+    *next_idx_ptr = idx + 1;
+    return rval;
+bail:
+    Py_XDECREF(val);
+    Py_DECREF(rval);
+    return NULL;
+}
+
+static PyObject *
+_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
+    /* Read a JSON constant from PyString pystr.
+    constant is the constant string that was found
+        ("NaN", "Infinity", "-Infinity").
+    idx is the index of the first character of the constant
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the constant.
+
+    Returns the result of parse_constant
+    */
+    PyObject *cstr;
+    PyObject *rval;
+    /* constant is "NaN", "Infinity", or "-Infinity" */
+    cstr = JSON_InternFromString(constant);
+    if (cstr == NULL)
+        return NULL;
+
+    /* rval = parse_constant(constant) */
+    rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
+    idx += JSON_Intern_GET_SIZE(cstr);
+    Py_DECREF(cstr);
+    *next_idx_ptr = idx;
+    return rval;
+}
+
+#if PY_MAJOR_VERSION < 3
+static PyObject *
+_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr)
+{
+    /* Read a JSON number from PyString pystr.
+    idx is the index of the first character of the number
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the number.
+
+    Returns a new PyObject representation of that number:
+        PyInt, PyLong, or PyFloat.
+        May return other types if parse_int or parse_float are set
+    */
+    char *str = PyString_AS_STRING(pystr);
+    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
+    Py_ssize_t idx = start;
+    int is_float = 0;
+    PyObject *rval;
+    PyObject *numstr;
+
+    /* read a sign if it's there, make sure it's not the end of the string */
+    if (str[idx] == '-') {
+        if (idx >= end_idx) {
+            raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+            return NULL;
+        }
+        idx++;
+    }
+
+    /* read as many integer digits as we find as long as it doesn't start with 0 */
+    if (str[idx] >= '1' && str[idx] <= '9') {
+        idx++;
+        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+    }
+    /* if it starts with 0 we only expect one integer digit */
+    else if (str[idx] == '0') {
+        idx++;
+    }
+    /* no integer digits, error */
+    else {
+        raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+        return NULL;
+    }
+
+    /* if the next char is '.' followed by a digit then read all float digits */
+    if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
+        is_float = 1;
+        idx += 2;
+        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+    }
+
+    /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
+    if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
+
+        /* save the index of the 'e' or 'E' just in case we need to backtrack */
+        Py_ssize_t e_start = idx;
+        idx++;
+
+        /* read an exponent sign if present */
+        if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
+
+        /* read all digits */
+        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+
+        /* if we got a digit, then parse as float. if not, backtrack */
+        if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
+            is_float = 1;
+        }
+        else {
+            idx = e_start;
+        }
+    }
+
+    /* copy the section we determined to be a number */
+    numstr = PyString_FromStringAndSize(&str[start], idx - start);
+    if (numstr == NULL)
+        return NULL;
+    if (is_float) {
+        /* parse as a float using a fast path if available, otherwise call user defined method */
+        if (s->parse_float != (PyObject *)&PyFloat_Type) {
+            rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
+        }
+        else {
+            /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */
+            double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
+                                             NULL, NULL);
+            if (d == -1.0 && PyErr_Occurred())
+                return NULL;
+            rval = PyFloat_FromDouble(d);
+        }
+    }
+    else {
+        /* parse as an int using a fast path if available, otherwise call user defined method */
+        if (s->parse_int != (PyObject *)&PyInt_Type) {
+            rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
+        }
+        else {
+            rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
+        }
+    }
+    Py_DECREF(numstr);
+    *next_idx_ptr = idx;
+    return rval;
+}
+#endif /* PY_MAJOR_VERSION < 3 */
+
+static PyObject *
+_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr)
+{
+    /* Read a JSON number from PyUnicode pystr.
+    idx is the index of the first character of the number
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the number.
+
+    Returns a new PyObject representation of that number:
+        PyInt, PyLong, or PyFloat.
+        May return other types if parse_int or parse_float are set
+    */
+    PY2_UNUSED int kind = PyUnicode_KIND(pystr);
+    void *str = PyUnicode_DATA(pystr);
+    Py_ssize_t end_idx = PyUnicode_GetLength(pystr) - 1;
+    Py_ssize_t idx = start;
+    int is_float = 0;
+    JSON_UNICHR c;
+    PyObject *rval;
+    PyObject *numstr;
+
+    /* read a sign if it's there, make sure it's not the end of the string */
+    if (PyUnicode_READ(kind, str, idx) == '-') {
+        if (idx >= end_idx) {
+            raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+            return NULL;
+        }
+        idx++;
+    }
+
+    /* read as many integer digits as we find as long as it doesn't start with 0 */
+    c = PyUnicode_READ(kind, str, idx);
+    if (c == '0') {
+        /* if it starts with 0 we only expect one integer digit */
+        idx++;
+    }
+    else if (IS_DIGIT(c)) {
+        idx++;
+        while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) {
+            idx++;
+        }
+    }
+    else {
+        /* no integer digits, error */
+        raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+        return NULL;
+    }
+
+    /* if the next char is '.' followed by a digit then read all float digits */
+    if (idx < end_idx &&
+        PyUnicode_READ(kind, str, idx) == '.' &&
+        IS_DIGIT(PyUnicode_READ(kind, str, idx + 1))) {
+        is_float = 1;
+        idx += 2;
+        while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++;
+    }
+
+    /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
+    if (idx < end_idx &&
+        (PyUnicode_READ(kind, str, idx) == 'e' ||
+            PyUnicode_READ(kind, str, idx) == 'E')) {
+        Py_ssize_t e_start = idx;
+        idx++;
+
+        /* read an exponent sign if present */
+        if (idx < end_idx &&
+            (PyUnicode_READ(kind, str, idx) == '-' ||
+                PyUnicode_READ(kind, str, idx) == '+')) idx++;
+
+        /* read all digits */
+        while (idx <= end_idx && IS_DIGIT(PyUnicode_READ(kind, str, idx))) idx++;
+
+        /* if we got a digit, then parse as float. if not, backtrack */
+        if (IS_DIGIT(PyUnicode_READ(kind, str, idx - 1))) {
+            is_float = 1;
+        }
+        else {
+            idx = e_start;
+        }
+    }
+
+    /* copy the section we determined to be a number */
+#if PY_MAJOR_VERSION >= 3
+    numstr = PyUnicode_Substring(pystr, start, idx);
+#else
+    numstr = PyUnicode_FromUnicode(&((Py_UNICODE *)str)[start], idx - start);
+#endif
+    if (numstr == NULL)
+        return NULL;
+    if (is_float) {
+        /* parse as a float using a fast path if available, otherwise call user defined method */
+        if (s->parse_float != (PyObject *)&PyFloat_Type) {
+            rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
+        }
+        else {
+#if PY_MAJOR_VERSION >= 3
+            rval = PyFloat_FromString(numstr);
+#else
+            rval = PyFloat_FromString(numstr, NULL);
+#endif
+        }
+    }
+    else {
+        /* no fast path for unicode -> int, just call */
+        rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
+    }
+    Py_DECREF(numstr);
+    *next_idx_ptr = idx;
+    return rval;
+}
+
+#if PY_MAJOR_VERSION < 3
+static PyObject *
+scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
+    /* Read one JSON term (of any kind) from PyString pystr.
+    idx is the index of the first character of the term
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the number.
+
+    Returns a new PyObject representation of the term.
+    */
+    char *str = PyString_AS_STRING(pystr);
+    Py_ssize_t length = PyString_GET_SIZE(pystr);
+    PyObject *rval = NULL;
+    int fallthrough = 0;
+    if (idx >= length) {
+       raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+        return NULL;
+    }
+    switch (str[idx]) {
+        case '"':
+            /* string */
+            rval = scanstring_str(pystr, idx + 1,
+                JSON_ASCII_AS_STRING(s->encoding),
+                PyObject_IsTrue(s->strict),
+                next_idx_ptr);
+            break;
+        case '{':
+            /* object */
+            if (Py_EnterRecursiveCall(" while decoding a JSON object "
+                                      "from a string"))
+                return NULL;
+            rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
+            Py_LeaveRecursiveCall();
+            break;
+        case '[':
+            /* array */
+            if (Py_EnterRecursiveCall(" while decoding a JSON array "
+                                      "from a string"))
+                return NULL;
+            rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
+            Py_LeaveRecursiveCall();
+            break;
+        case 'n':
+            /* null */
+            if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
+                Py_INCREF(Py_None);
+                *next_idx_ptr = idx + 4;
+                rval = Py_None;
+            }
+            else
+                fallthrough = 1;
+            break;
+        case 't':
+            /* true */
+            if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
+                Py_INCREF(Py_True);
+                *next_idx_ptr = idx + 4;
+                rval = Py_True;
+            }
+            else
+                fallthrough = 1;
+            break;
+        case 'f':
+            /* false */
+            if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
+                Py_INCREF(Py_False);
+                *next_idx_ptr = idx + 5;
+                rval = Py_False;
+            }
+            else
+                fallthrough = 1;
+            break;
+        case 'N':
+            /* NaN */
+            if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
+                rval = _parse_constant(s, "NaN", idx, next_idx_ptr);
+            }
+            else
+                fallthrough = 1;
+            break;
+        case 'I':
+            /* Infinity */
+            if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
+                rval = _parse_constant(s, "Infinity", idx, next_idx_ptr);
+            }
+            else
+                fallthrough = 1;
+            break;
+        case '-':
+            /* -Infinity */
+            if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
+                rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr);
+            }
+            else
+                fallthrough = 1;
+            break;
+        default:
+            fallthrough = 1;
+    }
+    /* Didn't find a string, object, array, or named constant. Look for a number. */
+    if (fallthrough)
+        rval = _match_number_str(s, pystr, idx, next_idx_ptr);
+    return rval;
+}
+#endif /* PY_MAJOR_VERSION < 3 */
+
+
+static PyObject *
+scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
+    /* Read one JSON term (of any kind) from PyUnicode pystr.
+    idx is the index of the first character of the term
+    *next_idx_ptr is a return-by-reference index to the first character after
+        the number.
+
+    Returns a new PyObject representation of the term.
+    */
+    PY2_UNUSED int kind = PyUnicode_KIND(pystr);
+    void *str = PyUnicode_DATA(pystr);
+    Py_ssize_t length = PyUnicode_GetLength(pystr);
+    PyObject *rval = NULL;
+    int fallthrough = 0;
+    if (idx >= length) {
+       raise_errmsg(ERR_EXPECTING_VALUE, pystr, idx);
+        return NULL;
+    }
+    switch (PyUnicode_READ(kind, str, idx)) {
+        case '"':
+            /* string */
+            rval = scanstring_unicode(pystr, idx + 1,
+                PyObject_IsTrue(s->strict),
+                next_idx_ptr);
+            break;
+        case '{':
+            /* object */
+            if (Py_EnterRecursiveCall(" while decoding a JSON object "
+                                      "from a unicode string"))
+                return NULL;
+            rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
+            Py_LeaveRecursiveCall();
+            break;
+        case '[':
+            /* array */
+            if (Py_EnterRecursiveCall(" while decoding a JSON array "
+                                      "from a unicode string"))
+                return NULL;
+            rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
+            Py_LeaveRecursiveCall();
+            break;
+        case 'n':
+            /* null */
+            if ((idx + 3 < length) &&
+                PyUnicode_READ(kind, str, idx + 1) == 'u' &&
+                PyUnicode_READ(kind, str, idx + 2) == 'l' &&
+                PyUnicode_READ(kind, str, idx + 3) == 'l') {
+                Py_INCREF(Py_None);
+                *next_idx_ptr = idx + 4;
+                rval = Py_None;
+            }
+            else
+                fallthrough = 1;
+            break;
+        case 't':
+            /* true */
+            if ((idx + 3 < length) &&
+                PyUnicode_READ(kind, str, idx + 1) == 'r' &&
+                PyUnicode_READ(kind, str, idx + 2) == 'u' &&
+                PyUnicode_READ(kind, str, idx + 3) == 'e') {
+                Py_INCREF(Py_True);
+                *next_idx_ptr = idx + 4;
+                rval = Py_True;
+            }
+            else
+                fallthrough = 1;
+            break;
+        case 'f':
+            /* false */
+            if ((idx + 4 < length) &&
+                PyUnicode_READ(kind, str, idx + 1) == 'a' &&
+                PyUnicode_READ(kind, str, idx + 2) == 'l' &&
+                PyUnicode_READ(kind, str, idx + 3) == 's' &&
+                PyUnicode_READ(kind, str, idx + 4) == 'e') {
+                Py_INCREF(Py_False);
+                *next_idx_ptr = idx + 5;
+                rval = Py_False;
+            }
+            else
+                fallthrough = 1;
+            break;
+        case 'N':
+            /* NaN */
+            if ((idx + 2 < length) &&
+                PyUnicode_READ(kind, str, idx + 1) == 'a' &&
+                PyUnicode_READ(kind, str, idx + 2) == 'N') {
+                rval = _parse_constant(s, "NaN", idx, next_idx_ptr);
+            }
+            else
+                fallthrough = 1;
+            break;
+        case 'I':
+            /* Infinity */
+            if ((idx + 7 < length) &&
+                PyUnicode_READ(kind, str, idx + 1) == 'n' &&
+                PyUnicode_READ(kind, str, idx + 2) == 'f' &&
+                PyUnicode_READ(kind, str, idx + 3) == 'i' &&
+                PyUnicode_READ(kind, str, idx + 4) == 'n' &&
+                PyUnicode_READ(kind, str, idx + 5) == 'i' &&
+                PyUnicode_READ(kind, str, idx + 6) == 't' &&
+                PyUnicode_READ(kind, str, idx + 7) == 'y') {
+                rval = _parse_constant(s, "Infinity", idx, next_idx_ptr);
+            }
+            else
+                fallthrough = 1;
+            break;
+        case '-':
+            /* -Infinity */
+            if ((idx + 8 < length) &&
+                PyUnicode_READ(kind, str, idx + 1) == 'I' &&
+                PyUnicode_READ(kind, str, idx + 2) == 'n' &&
+                PyUnicode_READ(kind, str, idx + 3) == 'f' &&
+                PyUnicode_READ(kind, str, idx + 4) == 'i' &&
+                PyUnicode_READ(kind, str, idx + 5) == 'n' &&
+                PyUnicode_READ(kind, str, idx + 6) == 'i' &&
+                PyUnicode_READ(kind, str, idx + 7) == 't' &&
+                PyUnicode_READ(kind, str, idx + 8) == 'y') {
+                rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr);
+            }
+            else
+                fallthrough = 1;
+            break;
+        default:
+            fallthrough = 1;
+    }
+    /* Didn't find a string, object, array, or named constant. Look for a number. */
+    if (fallthrough)
+        rval = _match_number_unicode(s, pystr, idx, next_idx_ptr);
+    return rval;
+}
+
+static PyObject *
+scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
+{
+    /* Python callable interface to scan_once_{str,unicode} */
+    PyObject *pystr;
+    PyObject *rval;
+    Py_ssize_t idx;
+    Py_ssize_t next_idx = -1;
+    static char *kwlist[] = {"string", "idx", NULL};
+    PyScannerObject *s;
+    assert(PyScanner_Check(self));
+    s = (PyScannerObject *)self;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
+        return NULL;
+
+    if (PyUnicode_Check(pystr)) {
+        rval = scan_once_unicode(s, pystr, idx, &next_idx);
+    }
+#if PY_MAJOR_VERSION < 3
+    else if (PyString_Check(pystr)) {
+        rval = scan_once_str(s, pystr, idx, &next_idx);
+    }
+#endif /* PY_MAJOR_VERSION < 3 */
+    else {
+        PyErr_Format(PyExc_TypeError,
+                 "first argument must be a string, not %.80s",
+                 Py_TYPE(pystr)->tp_name);
+        return NULL;
+    }
+    PyDict_Clear(s->memo);
+    return _build_rval_index_tuple(rval, next_idx);
+}
+
+static PyObject *
+scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+    PyScannerObject *s;
+    s = (PyScannerObject *)type->tp_alloc(type, 0);
+    if (s != NULL) {
+        s->encoding = NULL;
+        s->strict = NULL;
+        s->object_hook = NULL;
+        s->pairs_hook = NULL;
+        s->parse_float = NULL;
+        s->parse_int = NULL;
+        s->parse_constant = NULL;
+    }
+    return (PyObject *)s;
+}
+
+static PyObject *
+JSON_ParseEncoding(PyObject *encoding)
+{
+    if (encoding == NULL)
+        return NULL;
+    if (encoding == Py_None)
+        return JSON_InternFromString(DEFAULT_ENCODING);
+#if PY_MAJOR_VERSION < 3
+    if (PyUnicode_Check(encoding))
+        return PyUnicode_AsEncodedString(encoding, NULL, NULL);
+#endif
+    if (JSON_ASCII_Check(encoding)) {
+        Py_INCREF(encoding);
+        return encoding;
+    }
+    PyErr_SetString(PyExc_TypeError, "encoding must be a string");
+    return NULL;
+}
+
+static int
+scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
+{
+    /* Initialize Scanner object */
+    PyObject *ctx;
+    static char *kwlist[] = {"context", NULL};
+    PyScannerObject *s;
+    PyObject *encoding;
+
+    assert(PyScanner_Check(self));
+    s = (PyScannerObject *)self;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
+        return -1;
+
+    if (s->memo == NULL) {
+        s->memo = PyDict_New();
+        if (s->memo == NULL)
+            goto bail;
+    }
+
+    /* JSON_ASCII_AS_STRING is used on encoding */
+    encoding = PyObject_GetAttrString(ctx, "encoding");
+    s->encoding = JSON_ParseEncoding(encoding);
+    Py_XDECREF(encoding);
+    if (s->encoding == NULL)
+        goto bail;
+
+    /* All of these will fail "gracefully" so we don't need to verify them */
+    s->strict = PyObject_GetAttrString(ctx, "strict");
+    if (s->strict == NULL)
+        goto bail;
+    s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
+    if (s->object_hook == NULL)
+        goto bail;
+    s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
+    if (s->pairs_hook == NULL)
+        goto bail;
+    s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
+    if (s->parse_float == NULL)
+        goto bail;
+    s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
+    if (s->parse_int == NULL)
+        goto bail;
+    s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
+    if (s->parse_constant == NULL)
+        goto bail;
+
+    return 0;
+
+bail:
+    Py_CLEAR(s->encoding);
+    Py_CLEAR(s->strict);
+    Py_CLEAR(s->object_hook);
+    Py_CLEAR(s->pairs_hook);
+    Py_CLEAR(s->parse_float);
+    Py_CLEAR(s->parse_int);
+    Py_CLEAR(s->parse_constant);
+    return -1;
+}
+
+PyDoc_STRVAR(scanner_doc, "JSON scanner object");
+
+static
+PyTypeObject PyScannerType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "simplejson._speedups.Scanner",       /* tp_name */
+    sizeof(PyScannerObject), /* tp_basicsize */
+    0,                    /* tp_itemsize */
+    scanner_dealloc, /* tp_dealloc */
+    0,                    /* tp_print */
+    0,                    /* tp_getattr */
+    0,                    /* tp_setattr */
+    0,                    /* tp_compare */
+    0,                    /* tp_repr */
+    0,                    /* tp_as_number */
+    0,                    /* tp_as_sequence */
+    0,                    /* tp_as_mapping */
+    0,                    /* tp_hash */
+    scanner_call,         /* tp_call */
+    0,                    /* tp_str */
+    0,/* PyObject_GenericGetAttr, */                    /* tp_getattro */
+    0,/* PyObject_GenericSetAttr, */                    /* tp_setattro */
+    0,                    /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
+    scanner_doc,          /* tp_doc */
+    scanner_traverse,                    /* tp_traverse */
+    scanner_clear,                    /* tp_clear */
+    0,                    /* tp_richcompare */
+    0,                    /* tp_weaklistoffset */
+    0,                    /* tp_iter */
+    0,                    /* tp_iternext */
+    0,                    /* tp_methods */
+    scanner_members,                    /* tp_members */
+    0,                    /* tp_getset */
+    0,                    /* tp_base */
+    0,                    /* tp_dict */
+    0,                    /* tp_descr_get */
+    0,                    /* tp_descr_set */
+    0,                    /* tp_dictoffset */
+    scanner_init,                    /* tp_init */
+    0,/* PyType_GenericAlloc, */        /* tp_alloc */
+    scanner_new,          /* tp_new */
+    0,/* PyObject_GC_Del, */              /* tp_free */
+};
+
+static PyObject *
+encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+    PyEncoderObject *s;
+    s = (PyEncoderObject *)type->tp_alloc(type, 0);
+    if (s != NULL) {
+        s->markers = NULL;
+        s->defaultfn = NULL;
+        s->encoder = NULL;
+        s->encoding = NULL;
+        s->indent = NULL;
+        s->key_separator = NULL;
+        s->item_separator = NULL;
+        s->key_memo = NULL;
+        s->sort_keys = NULL;
+        s->item_sort_key = NULL;
+        s->item_sort_kw = NULL;
+        s->Decimal = NULL;
+    }
+    return (PyObject *)s;
+}
+
+static int
+encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
+{
+    /* initialize Encoder object */
+    static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", "bigint_as_string", "item_sort_key", "encoding", "for_json", "ignore_nan", "Decimal", NULL};
+
+    PyEncoderObject *s;
+    PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
+    PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo;
+    PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array;
+    PyObject *bigint_as_string, *item_sort_key, *encoding, *for_json;
+    PyObject *ignore_nan, *Decimal;
+
+    assert(PyEncoder_Check(self));
+    s = (PyEncoderObject *)self;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOOOOO:make_encoder", kwlist,
+        &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
+        &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal,
+        &namedtuple_as_object, &tuple_as_array, &bigint_as_string,
+        &item_sort_key, &encoding, &for_json, &ignore_nan, &Decimal))
+        return -1;
+
+    s->markers = markers;
+    s->defaultfn = defaultfn;
+    s->encoder = encoder;
+    s->encoding = JSON_ParseEncoding(encoding);
+    if (s->encoding == NULL)
+        return -1;
+    s->indent = indent;
+    s->key_separator = key_separator;
+    s->item_separator = item_separator;
+    s->skipkeys_bool = skipkeys;
+    s->skipkeys = PyObject_IsTrue(skipkeys);
+    s->key_memo = key_memo;
+    s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
+    s->allow_or_ignore_nan = (
+        (PyObject_IsTrue(ignore_nan) ? JSON_IGNORE_NAN : 0) |
+        (PyObject_IsTrue(allow_nan) ? JSON_ALLOW_NAN : 0));
+    s->use_decimal = PyObject_IsTrue(use_decimal);
+    s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object);
+    s->tuple_as_array = PyObject_IsTrue(tuple_as_array);
+    s->bigint_as_string = PyObject_IsTrue(bigint_as_string);
+    if (item_sort_key != Py_None) {
+        if (!PyCallable_Check(item_sort_key))
+            PyErr_SetString(PyExc_TypeError, "item_sort_key must be None or callable");
+    }
+    else if (PyObject_IsTrue(sort_keys)) {
+        static PyObject *itemgetter0 = NULL;
+        if (!itemgetter0) {
+            PyObject *operator = PyImport_ImportModule("operator");
+            if (!operator)
+                return -1;
+            itemgetter0 = PyObject_CallMethod(operator, "itemgetter", "i", 0);
+            Py_DECREF(operator);
+        }
+        item_sort_key = itemgetter0;
+        if (!item_sort_key)
+            return -1;
+    }
+    if (item_sort_key == Py_None) {
+        Py_INCREF(Py_None);
+        s->item_sort_kw = Py_None;
+    }
+    else {
+        s->item_sort_kw = PyDict_New();
+        if (s->item_sort_kw == NULL)
+            return -1;
+        if (PyDict_SetItemString(s->item_sort_kw, "key", item_sort_key))
+            return -1;
+    }
+    s->sort_keys = sort_keys;
+    s->item_sort_key = item_sort_key;
+    s->Decimal = Decimal;
+    s->for_json = PyObject_IsTrue(for_json);
+
+    Py_INCREF(s->markers);
+    Py_INCREF(s->defaultfn);
+    Py_INCREF(s->encoder);
+    Py_INCREF(s->indent);
+    Py_INCREF(s->key_separator);
+    Py_INCREF(s->item_separator);
+    Py_INCREF(s->key_memo);
+    Py_INCREF(s->skipkeys_bool);
+    Py_INCREF(s->sort_keys);
+    Py_INCREF(s->item_sort_key);
+    Py_INCREF(s->Decimal);
+    return 0;
+}
+
+static PyObject *
+encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
+{
+    /* Python callable interface to encode_listencode_obj */
+    static char *kwlist[] = {"obj", "_current_indent_level", NULL};
+    PyObject *obj;
+    Py_ssize_t indent_level;
+    PyEncoderObject *s;
+    JSON_Accu rval;
+    assert(PyEncoder_Check(self));
+    s = (PyEncoderObject *)self;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
+        &obj, _convertPyInt_AsSsize_t, &indent_level))
+        return NULL;
+    if (JSON_Accu_Init(&rval))
+        return NULL;
+    if (encoder_listencode_obj(s, &rval, obj, indent_level)) {
+        JSON_Accu_Destroy(&rval);
+        return NULL;
+    }
+    return JSON_Accu_FinishAsList(&rval);
+}
+
+static PyObject *
+_encoded_const(PyObject *obj)
+{
+    /* Return the JSON string representation of None, True, False */
+    if (obj == Py_None) {
+        static PyObject *s_null = NULL;
+        if (s_null == NULL) {
+            s_null = JSON_InternFromString("null");
+        }
+        Py_INCREF(s_null);
+        return s_null;
+    }
+    else if (obj == Py_True) {
+        static PyObject *s_true = NULL;
+        if (s_true == NULL) {
+            s_true = JSON_InternFromString("true");
+        }
+        Py_INCREF(s_true);
+        return s_true;
+    }
+    else if (obj == Py_False) {
+        static PyObject *s_false = NULL;
+        if (s_false == NULL) {
+            s_false = JSON_InternFromString("false");
+        }
+        Py_INCREF(s_false);
+        return s_false;
+    }
+    else {
+        PyErr_SetString(PyExc_ValueError, "not a const");
+        return NULL;
+    }
+}
+
+static PyObject *
+encoder_encode_float(PyEncoderObject *s, PyObject *obj)
+{
+    /* Return the JSON representation of a PyFloat */
+    double i = PyFloat_AS_DOUBLE(obj);
+    if (!Py_IS_FINITE(i)) {
+        if (!s->allow_or_ignore_nan) {
+            PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
+            return NULL;
+        }
+        if (s->allow_or_ignore_nan & JSON_IGNORE_NAN) {
+            return _encoded_const(Py_None);
+        }
+        /* JSON_ALLOW_NAN is set */
+        else if (i > 0) {
+            static PyObject *sInfinity = NULL;
+            if (sInfinity == NULL)
+                sInfinity = JSON_InternFromString("Infinity");
+            if (sInfinity)
+                Py_INCREF(sInfinity);
+            return sInfinity;
+        }
+        else if (i < 0) {
+            static PyObject *sNegInfinity = NULL;
+            if (sNegInfinity == NULL)
+                sNegInfinity = JSON_InternFromString("-Infinity");
+            if (sNegInfinity)
+                Py_INCREF(sNegInfinity);
+            return sNegInfinity;
+        }
+        else {
+            static PyObject *sNaN = NULL;
+            if (sNaN == NULL)
+                sNaN = JSON_InternFromString("NaN");
+            if (sNaN)
+                Py_INCREF(sNaN);
+            return sNaN;
+        }
+    }
+    /* Use a better float format here? */
+    return PyObject_Repr(obj);
+}
+
+static PyObject *
+encoder_encode_string(PyEncoderObject *s, PyObject *obj)
+{
+    /* Return the JSON representation of a string */
+    if (s->fast_encode)
+        return py_encode_basestring_ascii(NULL, obj);
+    else
+        return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
+}
+
+static int
+_steal_accumulate(JSON_Accu *accu, PyObject *stolen)
+{
+    /* Append stolen and then decrement its reference count */
+    int rval = JSON_Accu_Accumulate(accu, stolen);
+    Py_DECREF(stolen);
+    return rval;
+}
+
+static int
+encoder_listencode_obj(PyEncoderObject *s, JSON_Accu *rval, PyObject *obj, Py_ssize_t indent_level)
+{
+    /* Encode Python object obj to a JSON term, rval is a PyList */
+    int rv = -1;
+    do {
+        if (obj == Py_None || obj == Py_True || obj == Py_False) {
+            PyObject *cstr = _encoded_const(obj);
+            if (cstr != NULL)
+                rv = _steal_accumulate(rval, cstr);
+        }
+        else if (PyString_Check(obj) || PyUnicode_Check(obj))
+        {
+            PyObject *encoded = encoder_encode_string(s, obj);
+            if (encoded != NULL)
+                rv = _steal_accumulate(rval, encoded);
+        }
+        else if (PyInt_Check(obj) || PyLong_Check(obj)) {
+            PyObject *encoded = PyObject_Str(obj);
+            if (encoded != NULL) {
+                if (s->bigint_as_string) {
+                    encoded = maybe_quote_bigint(encoded, obj);
+                    if (encoded == NULL)
+                        break;
+                }
+                rv = _steal_accumulate(rval, encoded);
+            }
+        }
+        else if (PyFloat_Check(obj)) {
+            PyObject *encoded = encoder_encode_float(s, obj);
+            if (encoded != NULL)
+                rv = _steal_accumulate(rval, encoded);
+        }
+        else if (s->for_json && _has_for_json_hook(obj)) {
+            PyObject *newobj;
+            if (Py_EnterRecursiveCall(" while encoding a JSON object"))
+                return rv;
+            newobj = PyObject_CallMethod(obj, "for_json", NULL);
+            if (newobj != NULL) {
+                rv = encoder_listencode_obj(s, rval, newobj, indent_level);
+                Py_DECREF(newobj);
+            }
+            Py_LeaveRecursiveCall();
+        }
+        else if (s->namedtuple_as_object && _is_namedtuple(obj)) {
+            PyObject *newobj;
+            if (Py_EnterRecursiveCall(" while encoding a JSON object"))
+                return rv;
+            newobj = PyObject_CallMethod(obj, "_asdict", NULL);
+            if (newobj != NULL) {
+                rv = encoder_listencode_dict(s, rval, newobj, indent_level);
+                Py_DECREF(newobj);
+            }
+            Py_LeaveRecursiveCall();
+        }
+        else if (PyList_Check(obj) || (s->tuple_as_array && PyTuple_Check(obj))) {
+            if (Py_EnterRecursiveCall(" while encoding a JSON object"))
+                return rv;
+            rv = encoder_listencode_list(s, rval, obj, indent_level);
+            Py_LeaveRecursiveCall();
+        }
+        else if (PyDict_Check(obj)) {
+            if (Py_EnterRecursiveCall(" while encoding a JSON object"))
+                return rv;
+            rv = encoder_listencode_dict(s, rval, obj, indent_level);
+            Py_LeaveRecursiveCall();
+        }
+        else if (s->use_decimal && PyObject_TypeCheck(obj, (PyTypeObject *)s->Decimal)) {
+            PyObject *encoded = PyObject_Str(obj);
+            if (encoded != NULL)
+                rv = _steal_accumulate(rval, encoded);
+        }
+        else {
+            PyObject *ident = NULL;
+            PyObject *newobj;
+            if (s->markers != Py_None) {
+                int has_key;
+                ident = PyLong_FromVoidPtr(obj);
+                if (ident == NULL)
+                    break;
+                has_key = PyDict_Contains(s->markers, ident);
+                if (has_key) {
+                    if (has_key != -1)
+                        PyErr_SetString(PyExc_ValueError, "Circular reference detected");
+                    Py_DECREF(ident);
+                    break;
+                }
+                if (PyDict_SetItem(s->markers, ident, obj)) {
+                    Py_DECREF(ident);
+                    break;
+                }
+            }
+            if (Py_EnterRecursiveCall(" while encoding a JSON object"))
+                return rv;
+            newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
+            if (newobj == NULL) {
+                Py_XDECREF(ident);
+                Py_LeaveRecursiveCall();
+                break;
+            }
+            rv = encoder_listencode_obj(s, rval, newobj, indent_level);
+            Py_LeaveRecursiveCall();
+            Py_DECREF(newobj);
+            if (rv) {
+                Py_XDECREF(ident);
+                rv = -1;
+            }
+            else if (ident != NULL) {
+                if (PyDict_DelItem(s->markers, ident)) {
+                    Py_XDECREF(ident);
+                    rv = -1;
+                }
+                Py_XDECREF(ident);
+            }
+        }
+    } while (0);
+    return rv;
+}
+
+static int
+encoder_listencode_dict(PyEncoderObject *s, JSON_Accu *rval, PyObject *dct, Py_ssize_t indent_level)
+{
+    /* Encode Python dict dct a JSON term */
+    static PyObject *open_dict = NULL;
+    static PyObject *close_dict = NULL;
+    static PyObject *empty_dict = NULL;
+    PyObject *kstr = NULL;
+    PyObject *ident = NULL;
+    PyObject *iter = NULL;
+    PyObject *item = NULL;
+    PyObject *items = NULL;
+    PyObject *encoded = NULL;
+    Py_ssize_t idx;
+
+    if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
+        open_dict = JSON_InternFromString("{");
+        close_dict = JSON_InternFromString("}");
+        empty_dict = JSON_InternFromString("{}");
+        if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
+            return -1;
+    }
+    if (PyDict_Size(dct) == 0)
+        return JSON_Accu_Accumulate(rval, empty_dict);
+
+    if (s->markers != Py_None) {
+        int has_key;
+        ident = PyLong_FromVoidPtr(dct);
+        if (ident == NULL)
+            goto bail;
+        has_key = PyDict_Contains(s->markers, ident);
+        if (has_key) {
+            if (has_key != -1)
+                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
+            goto bail;
+        }
+        if (PyDict_SetItem(s->markers, ident, dct)) {
+            goto bail;
+        }
+    }
+
+    if (JSON_Accu_Accumulate(rval, open_dict))
+        goto bail;
+
+    if (s->indent != Py_None) {
+        /* TODO: DOES NOT RUN */
+        indent_level += 1;
+        /*
+            newline_indent = '\n' + (_indent * _current_indent_level)
+            separator = _item_separator + newline_indent
+            buf += newline_indent
+        */
+    }
+
+    iter = encoder_dict_iteritems(s, dct);
+    if (iter == NULL)
+        goto bail;
+
+    idx = 0;
+    while ((item = PyIter_Next(iter))) {
+        PyObject *encoded, *key, *value;
+        if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
+            PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
+            goto bail;
+        }
+        key = PyTuple_GET_ITEM(item, 0);
+        if (key == NULL)
+            goto bail;
+        value = PyTuple_GET_ITEM(item, 1);
+        if (value == NULL)
+            goto bail;
+
+        encoded = PyDict_GetItem(s->key_memo, key);
+        if (encoded != NULL) {
+            Py_INCREF(encoded);
+        } else {
+            kstr = encoder_stringify_key(s, key);
+            if (kstr == NULL)
+                goto bail;
+            else if (kstr == Py_None) {
+                /* skipkeys */
+                Py_DECREF(item);
+                Py_DECREF(kstr);
+                continue;
+            }
+        }
+        if (idx) {
+            if (JSON_Accu_Accumulate(rval, s->item_separator))
+                goto bail;
+        }
+        if (encoded == NULL) {
+            encoded = encoder_encode_string(s, kstr);
+            Py_CLEAR(kstr);
+            if (encoded == NULL)
+                goto bail;
+            if (PyDict_SetItem(s->key_memo, key, encoded))
+                goto bail;
+        }
+        if (JSON_Accu_Accumulate(rval, encoded)) {
+            goto bail;
+        }
+        Py_CLEAR(encoded);
+        if (JSON_Accu_Accumulate(rval, s->key_separator))
+            goto bail;
+        if (encoder_listencode_obj(s, rval, value, indent_level))
+            goto bail;
+        Py_CLEAR(item);
+        idx += 1;
+    }
+    Py_CLEAR(iter);
+    if (PyErr_Occurred())
+        goto bail;
+    if (ident != NULL) {
+        if (PyDict_DelItem(s->markers, ident))
+            goto bail;
+        Py_CLEAR(ident);
+    }
+    if (s->indent != Py_None) {
+        /* TODO: DOES NOT RUN */
+        indent_level -= 1;
+        /*
+            yield '\n' + (_indent * _current_indent_level)
+        */
+    }
+    if (JSON_Accu_Accumulate(rval, close_dict))
+        goto bail;
+    return 0;
+
+bail:
+    Py_XDECREF(encoded);
+    Py_XDECREF(items);
+    Py_XDECREF(iter);
+    Py_XDECREF(kstr);
+    Py_XDECREF(ident);
+    return -1;
+}
+
+
+static int
+encoder_listencode_list(PyEncoderObject *s, JSON_Accu *rval, PyObject *seq, Py_ssize_t indent_level)
+{
+    /* Encode Python list seq to a JSON term */
+    static PyObject *open_array = NULL;
+    static PyObject *close_array = NULL;
+    static PyObject *empty_array = NULL;
+    PyObject *ident = NULL;
+    PyObject *iter = NULL;
+    PyObject *obj = NULL;
+    int is_true;
+    int i = 0;
+
+    if (open_array == NULL || close_array == NULL || empty_array == NULL) {
+        open_array = JSON_InternFromString("[");
+        close_array = JSON_InternFromString("]");
+        empty_array = JSON_InternFromString("[]");
+        if (open_array == NULL || close_array == NULL || empty_array == NULL)
+            return -1;
+    }
+    ident = NULL;
+    is_true = PyObject_IsTrue(seq);
+    if (is_true == -1)
+        return -1;
+    else if (is_true == 0)
+        return JSON_Accu_Accumulate(rval, empty_array);
+
+    if (s->markers != Py_None) {
+        int has_key;
+        ident = PyLong_FromVoidPtr(seq);
+        if (ident == NULL)
+            goto bail;
+        has_key = PyDict_Contains(s->markers, ident);
+        if (has_key) {
+            if (has_key != -1)
+                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
+            goto bail;
+        }
+        if (PyDict_SetItem(s->markers, ident, seq)) {
+            goto bail;
+        }
+    }
+
+    iter = PyObject_GetIter(seq);
+    if (iter == NULL)
+        goto bail;
+
+    if (JSON_Accu_Accumulate(rval, open_array))
+        goto bail;
+    if (s->indent != Py_None) {
+        /* TODO: DOES NOT RUN */
+        indent_level += 1;
+        /*
+            newline_indent = '\n' + (_indent * _current_indent_level)
+            separator = _item_separator + newline_indent
+            buf += newline_indent
+        */
+    }
+    while ((obj = PyIter_Next(iter))) {
+        if (i) {
+            if (JSON_Accu_Accumulate(rval, s->item_separator))
+                goto bail;
+        }
+        if (encoder_listencode_obj(s, rval, obj, indent_level))
+            goto bail;
+        i++;
+        Py_CLEAR(obj);
+    }
+    Py_CLEAR(iter);
+    if (PyErr_Occurred())
+        goto bail;
+    if (ident != NULL) {
+        if (PyDict_DelItem(s->markers, ident))
+            goto bail;
+        Py_CLEAR(ident);
+    }
+    if (s->indent != Py_None) {
+        /* TODO: DOES NOT RUN */
+        indent_level -= 1;
+        /*
+            yield '\n' + (_indent * _current_indent_level)
+        */
+    }
+    if (JSON_Accu_Accumulate(rval, close_array))
+        goto bail;
+    return 0;
+
+bail:
+    Py_XDECREF(obj);
+    Py_XDECREF(iter);
+    Py_XDECREF(ident);
+    return -1;
+}
+
+static void
+encoder_dealloc(PyObject *self)
+{
+    /* Deallocate Encoder */
+    encoder_clear(self);
+    Py_TYPE(self)->tp_free(self);
+}
+
+static int
+encoder_traverse(PyObject *self, visitproc visit, void *arg)
+{
+    PyEncoderObject *s;
+    assert(PyEncoder_Check(self));
+    s = (PyEncoderObject *)self;
+    Py_VISIT(s->markers);
+    Py_VISIT(s->defaultfn);
+    Py_VISIT(s->encoder);
+    Py_VISIT(s->encoding);
+    Py_VISIT(s->indent);
+    Py_VISIT(s->key_separator);
+    Py_VISIT(s->item_separator);
+    Py_VISIT(s->key_memo);
+    Py_VISIT(s->sort_keys);
+    Py_VISIT(s->item_sort_kw);
+    Py_VISIT(s->item_sort_key);
+    Py_VISIT(s->Decimal);
+    return 0;
+}
+
+static int
+encoder_clear(PyObject *self)
+{
+    /* Deallocate Encoder */
+    PyEncoderObject *s;
+    assert(PyEncoder_Check(self));
+    s = (PyEncoderObject *)self;
+    Py_CLEAR(s->markers);
+    Py_CLEAR(s->defaultfn);
+    Py_CLEAR(s->encoder);
+    Py_CLEAR(s->encoding);
+    Py_CLEAR(s->indent);
+    Py_CLEAR(s->key_separator);
+    Py_CLEAR(s->item_separator);
+    Py_CLEAR(s->key_memo);
+    Py_CLEAR(s->skipkeys_bool);
+    Py_CLEAR(s->sort_keys);
+    Py_CLEAR(s->item_sort_kw);
+    Py_CLEAR(s->item_sort_key);
+    Py_CLEAR(s->Decimal);
+    return 0;
+}
+
+PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
+
+static
+PyTypeObject PyEncoderType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "simplejson._speedups.Encoder",       /* tp_name */
+    sizeof(PyEncoderObject), /* tp_basicsize */
+    0,                    /* tp_itemsize */
+    encoder_dealloc, /* tp_dealloc */
+    0,                    /* tp_print */
+    0,                    /* tp_getattr */
+    0,                    /* tp_setattr */
+    0,                    /* tp_compare */
+    0,                    /* tp_repr */
+    0,                    /* tp_as_number */
+    0,                    /* tp_as_sequence */
+    0,                    /* tp_as_mapping */
+    0,                    /* tp_hash */
+    encoder_call,         /* tp_call */
+    0,                    /* tp_str */
+    0,                    /* tp_getattro */
+    0,                    /* tp_setattro */
+    0,                    /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
+    encoder_doc,          /* tp_doc */
+    encoder_traverse,     /* tp_traverse */
+    encoder_clear,        /* tp_clear */
+    0,                    /* tp_richcompare */
+    0,                    /* tp_weaklistoffset */
+    0,                    /* tp_iter */
+    0,                    /* tp_iternext */
+    0,                    /* tp_methods */
+    encoder_members,      /* tp_members */
+    0,                    /* tp_getset */
+    0,                    /* tp_base */
+    0,                    /* tp_dict */
+    0,                    /* tp_descr_get */
+    0,                    /* tp_descr_set */
+    0,                    /* tp_dictoffset */
+    encoder_init,         /* tp_init */
+    0,                    /* tp_alloc */
+    encoder_new,          /* tp_new */
+    0,                    /* tp_free */
+};
+
+static PyMethodDef speedups_methods[] = {
+    {"encode_basestring_ascii",
+        (PyCFunction)py_encode_basestring_ascii,
+        METH_O,
+        pydoc_encode_basestring_ascii},
+    {"scanstring",
+        (PyCFunction)py_scanstring,
+        METH_VARARGS,
+        pydoc_scanstring},
+    {NULL, NULL, 0, NULL}
+};
+
+PyDoc_STRVAR(module_doc,
+"simplejson speedups\n");
+
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef moduledef = {
+    PyModuleDef_HEAD_INIT,
+    "_speedups",        /* m_name */
+    module_doc,         /* m_doc */
+    -1,                 /* m_size */
+    speedups_methods,   /* m_methods */
+    NULL,               /* m_reload */
+    NULL,               /* m_traverse */
+    NULL,               /* m_clear*/
+    NULL,               /* m_free */
+};
+#endif
+
+static PyObject *
+moduleinit(void)
+{
+    PyObject *m;
+    PyScannerType.tp_new = PyType_GenericNew;
+    if (PyType_Ready(&PyScannerType) < 0)
+        return NULL;
+    PyEncoderType.tp_new = PyType_GenericNew;
+    if (PyType_Ready(&PyEncoderType) < 0)
+        return NULL;
+
+#if PY_MAJOR_VERSION >= 3
+    m = PyModule_Create(&moduledef);
+#else
+    m = Py_InitModule3("_speedups", speedups_methods, module_doc);
+#endif
+    Py_INCREF((PyObject*)&PyScannerType);
+    PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
+    Py_INCREF((PyObject*)&PyEncoderType);
+    PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
+    return m;
+}
+
+#if PY_MAJOR_VERSION >= 3
+PyMODINIT_FUNC
+PyInit__speedups(void)
+{
+    return moduleinit();
+}
+#else
+void
+init_speedups(void)
+{
+    moduleinit();
+}
+#endif
diff --git a/simplejson/compat.py b/simplejson/compat.py
new file mode 100644 (file)
index 0000000..a0af4a1
--- /dev/null
@@ -0,0 +1,46 @@
+"""Python 3 compatibility shims
+"""
+import sys
+if sys.version_info[0] < 3:
+    PY3 = False
+    def b(s):
+        return s
+    def u(s):
+        return unicode(s, 'unicode_escape')
+    import cStringIO as StringIO
+    StringIO = BytesIO = StringIO.StringIO
+    text_type = unicode
+    binary_type = str
+    string_types = (basestring,)
+    integer_types = (int, long)
+    unichr = unichr
+    reload_module = reload
+    def fromhex(s):
+        return s.decode('hex')
+
+else:
+    PY3 = True
+    if sys.version_info[:2] >= (3, 4):
+        from importlib import reload as reload_module
+    else:
+        from imp import reload as reload_module
+    import codecs
+    def b(s):
+        return codecs.latin_1_encode(s)[0]
+    def u(s):
+        return s
+    import io
+    StringIO = io.StringIO
+    BytesIO = io.BytesIO
+    text_type = str
+    binary_type = bytes
+    string_types = (str,)
+    integer_types = (int,)
+
+    def unichr(s):
+        return u(chr(s))
+
+    def fromhex(s):
+        return bytes.fromhex(s)
+
+long_type = integer_types[-1]
diff --git a/simplejson/decoder.py b/simplejson/decoder.py
new file mode 100644 (file)
index 0000000..38cb027
--- /dev/null
@@ -0,0 +1,389 @@
+"""Implementation of JSONDecoder
+"""
+from __future__ import absolute_import
+import re
+import sys
+import struct
+from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr
+from .scanner import make_scanner, JSONDecodeError
+
+def _import_c_scanstring():
+    try:
+        from ._speedups import scanstring
+        return scanstring
+    except ImportError:
+        return None
+c_scanstring = _import_c_scanstring()
+
+# NOTE (3.1.0): JSONDecodeError may still be imported from this module for
+# compatibility, but it was never in the __all__
+__all__ = ['JSONDecoder']
+
+FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
+
+def _floatconstants():
+    _BYTES = fromhex('7FF80000000000007FF0000000000000')
+    # The struct module in Python 2.4 would get frexp() out of range here
+    # when an endian is specified in the format string. Fixed in Python 2.5+
+    if sys.byteorder != 'big':
+        _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
+    nan, inf = struct.unpack('dd', _BYTES)
+    return nan, inf, -inf
+
+NaN, PosInf, NegInf = _floatconstants()
+
+_CONSTANTS = {
+    '-Infinity': NegInf,
+    'Infinity': PosInf,
+    'NaN': NaN,
+}
+
+STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
+BACKSLASH = {
+    '"': u('"'), '\\': u('\u005c'), '/': u('/'),
+    'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'),
+}
+
+DEFAULT_ENCODING = "utf-8"
+
+def py_scanstring(s, end, encoding=None, strict=True,
+        _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join,
+        _PY3=PY3, _maxunicode=sys.maxunicode):
+    """Scan the string s for a JSON string. End is the index of the
+    character in s after the quote that started the JSON string.
+    Unescapes all valid JSON string escape sequences and raises ValueError
+    on attempt to decode an invalid string. If strict is False then literal
+    control characters are allowed in the string.
+
+    Returns a tuple of the decoded string and the index of the character in s
+    after the end quote."""
+    if encoding is None:
+        encoding = DEFAULT_ENCODING
+    chunks = []
+    _append = chunks.append
+    begin = end - 1
+    while 1:
+        chunk = _m(s, end)
+        if chunk is None:
+            raise JSONDecodeError(
+                "Unterminated string starting at", s, begin)
+        end = chunk.end()
+        content, terminator = chunk.groups()
+        # Content is contains zero or more unescaped string characters
+        if content:
+            if not _PY3 and not isinstance(content, text_type):
+                content = text_type(content, encoding)
+            _append(content)
+        # Terminator is the end of string, a literal control character,
+        # or a backslash denoting that an escape sequence follows
+        if terminator == '"':
+            break
+        elif terminator != '\\':
+            if strict:
+                msg = "Invalid control character %r at"
+                raise JSONDecodeError(msg, s, end)
+            else:
+                _append(terminator)
+                continue
+        try:
+            esc = s[end]
+        except IndexError:
+            raise JSONDecodeError(
+                "Unterminated string starting at", s, begin)
+        # If not a unicode escape sequence, must be in the lookup table
+        if esc != 'u':
+            try:
+                char = _b[esc]
+            except KeyError:
+                msg = "Invalid \\X escape sequence %r"
+                raise JSONDecodeError(msg, s, end)
+            end += 1
+        else:
+            # Unicode escape sequence
+            msg = "Invalid \\uXXXX escape sequence"
+            esc = s[end + 1:end + 5]
+            escX = esc[1:2]
+            if len(esc) != 4 or escX == 'x' or escX == 'X':
+                raise JSONDecodeError(msg, s, end - 1)
+            try:
+                uni = int(esc, 16)
+            except ValueError:
+                raise JSONDecodeError(msg, s, end - 1)
+            end += 5
+            # Check for surrogate pair on UCS-4 systems
+            # Note that this will join high/low surrogate pairs
+            # but will also pass unpaired surrogates through
+            if (_maxunicode > 65535 and
+                uni & 0xfc00 == 0xd800 and
+                s[end:end + 2] == '\\u'):
+                esc2 = s[end + 2:end + 6]
+                escX = esc2[1:2]
+                if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
+                    try:
+                        uni2 = int(esc2, 16)
+                    except ValueError:
+                        raise JSONDecodeError(msg, s, end)
+                    if uni2 & 0xfc00 == 0xdc00:
+                        uni = 0x10000 + (((uni - 0xd800) << 10) |
+                                         (uni2 - 0xdc00))
+                        end += 6
+            char = unichr(uni)
+        # Append the unescaped character
+        _append(char)
+    return _join(chunks), end
+
+
+# Use speedup if available
+scanstring = c_scanstring or py_scanstring
+
+WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
+WHITESPACE_STR = ' \t\n\r'
+
+def JSONObject(state, encoding, strict, scan_once, object_hook,
+        object_pairs_hook, memo=None,
+        _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+    (s, end) = state
+    # Backwards compatibility
+    if memo is None:
+        memo = {}
+    memo_get = memo.setdefault
+    pairs = []
+    # Use a slice to prevent IndexError from being raised, the following
+    # check will raise a more specific ValueError if the string is empty
+    nextchar = s[end:end + 1]
+    # Normally we expect nextchar == '"'
+    if nextchar != '"':
+        if nextchar in _ws:
+            end = _w(s, end).end()
+            nextchar = s[end:end + 1]
+        # Trivial empty object
+        if nextchar == '}':
+            if object_pairs_hook is not None:
+                result = object_pairs_hook(pairs)
+                return result, end + 1
+            pairs = {}
+            if object_hook is not None:
+                pairs = object_hook(pairs)
+            return pairs, end + 1
+        elif nextchar != '"':
+            raise JSONDecodeError(
+                "Expecting property name enclosed in double quotes",
+                s, end)
+    end += 1
+    while True:
+        key, end = scanstring(s, end, encoding, strict)
+        key = memo_get(key, key)
+
+        # To skip some function call overhead we optimize the fast paths where
+        # the JSON key separator is ": " or just ":".
+        if s[end:end + 1] != ':':
+            end = _w(s, end).end()
+            if s[end:end + 1] != ':':
+                raise JSONDecodeError("Expecting ':' delimiter", s, end)
+
+        end += 1
+
+        try:
+            if s[end] in _ws:
+                end += 1
+                if s[end] in _ws:
+                    end = _w(s, end + 1).end()
+        except IndexError:
+            pass
+
+        value, end = scan_once(s, end)
+        pairs.append((key, value))
+
+        try:
+            nextchar = s[end]
+            if nextchar in _ws:
+                end = _w(s, end + 1).end()
+                nextchar = s[end]
+        except IndexError:
+            nextchar = ''
+        end += 1
+
+        if nextchar == '}':
+            break
+        elif nextchar != ',':
+            raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1)
+
+        try:
+            nextchar = s[end]
+            if nextchar in _ws:
+                end += 1
+                nextchar = s[end]
+                if nextchar in _ws:
+                    end = _w(s, end + 1).end()
+                    nextchar = s[end]
+        except IndexError:
+            nextchar = ''
+
+        end += 1
+        if nextchar != '"':
+            raise JSONDecodeError(
+                "Expecting property name enclosed in double quotes",
+                s, end - 1)
+
+    if object_pairs_hook is not None:
+        result = object_pairs_hook(pairs)
+        return result, end
+    pairs = dict(pairs)
+    if object_hook is not None:
+        pairs = object_hook(pairs)
+    return pairs, end
+
+def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+    (s, end) = state
+    values = []
+    nextchar = s[end:end + 1]
+    if nextchar in _ws:
+        end = _w(s, end + 1).end()
+        nextchar = s[end:end + 1]
+    # Look-ahead for trivial empty array
+    if nextchar == ']':
+        return values, end + 1
+    elif nextchar == '':
+        raise JSONDecodeError("Expecting value or ']'", s, end)
+    _append = values.append
+    while True:
+        value, end = scan_once(s, end)
+        _append(value)
+        nextchar = s[end:end + 1]
+        if nextchar in _ws:
+            end = _w(s, end + 1).end()
+            nextchar = s[end:end + 1]
+        end += 1
+        if nextchar == ']':
+            break
+        elif nextchar != ',':
+            raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1)
+
+        try:
+            if s[end] in _ws:
+                end += 1
+                if s[end] in _ws:
+                    end = _w(s, end + 1).end()
+        except IndexError:
+            pass
+
+    return values, end
+
+class JSONDecoder(object):
+    """Simple JSON <http://json.org> decoder
+
+    Performs the following translations in decoding by default:
+
+    +---------------+-------------------+
+    | JSON          | Python            |
+    +===============+===================+
+    | object        | dict              |
+    +---------------+-------------------+
+    | array         | list              |
+    +---------------+-------------------+
+    | string        | str, unicode      |
+    +---------------+-------------------+
+    | number (int)  | int, long         |
+    +---------------+-------------------+
+    | number (real) | float             |
+    +---------------+-------------------+
+    | true          | True              |
+    +---------------+-------------------+
+    | false         | False             |
+    +---------------+-------------------+
+    | null          | None              |
+    +---------------+-------------------+
+
+    It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
+    their corresponding ``float`` values, which is outside the JSON spec.
+
+    """
+
+    def __init__(self, encoding=None, object_hook=None, parse_float=None,
+            parse_int=None, parse_constant=None, strict=True,
+            object_pairs_hook=None):
+        """
+        *encoding* determines the encoding used to interpret any
+        :class:`str` objects decoded by this instance (``'utf-8'`` by
+        default).  It has no effect when decoding :class:`unicode` objects.
+
+        Note that currently only encodings that are a superset of ASCII work,
+        strings of other encodings should be passed in as :class:`unicode`.
+
+        *object_hook*, if specified, will be called with the result of every
+        JSON object decoded and its return value will be used in place of the
+        given :class:`dict`.  This can be used to provide custom
+        deserializations (e.g. to support JSON-RPC class hinting).
+
+        *object_pairs_hook* is an optional function that will be called with
+        the result of any object literal decode with an ordered list of pairs.
+        The return value of *object_pairs_hook* will be used instead of the
+        :class:`dict`.  This feature can be used to implement custom decoders
+        that rely on the order that the key and value pairs are decoded (for
+        example, :func:`collections.OrderedDict` will remember the order of
+        insertion). If *object_hook* is also defined, the *object_pairs_hook*
+        takes priority.
+
+        *parse_float*, if specified, will be called with the string of every
+        JSON float to be decoded.  By default, this is equivalent to
+        ``float(num_str)``. This can be used to use another datatype or parser
+        for JSON floats (e.g. :class:`decimal.Decimal`).
+
+        *parse_int*, if specified, will be called with the string of every
+        JSON int to be decoded.  By default, this is equivalent to
+        ``int(num_str)``.  This can be used to use another datatype or parser
+        for JSON integers (e.g. :class:`float`).
+
+        *parse_constant*, if specified, will be called with one of the
+        following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This
+        can be used to raise an exception if invalid JSON numbers are
+        encountered.
+
+        *strict* controls the parser's behavior when it encounters an
+        invalid control character in a string. The default setting of
+        ``True`` means that unescaped control characters are parse errors, if
+        ``False`` then control characters will be allowed in strings.
+
+        """
+        if encoding is None:
+            encoding = DEFAULT_ENCODING
+        self.encoding = encoding
+        self.object_hook = object_hook
+        self.object_pairs_hook = object_pairs_hook
+        self.parse_float = parse_float or float
+        self.parse_int = parse_int or int
+        self.parse_constant = parse_constant or _CONSTANTS.__getitem__
+        self.strict = strict
+        self.parse_object = JSONObject
+        self.parse_array = JSONArray
+        self.parse_string = scanstring
+        self.memo = {}
+        self.scan_once = make_scanner(self)
+
+    def decode(self, s, _w=WHITESPACE.match, _PY3=PY3):
+        """Return the Python representation of ``s`` (a ``str`` or ``unicode``
+        instance containing a JSON document)
+
+        """
+        if _PY3 and isinstance(s, binary_type):
+            s = s.decode(self.encoding)
+        obj, end = self.raw_decode(s)
+        end = _w(s, end).end()
+        if end != len(s):
+            raise JSONDecodeError("Extra data", s, end, len(s))
+        return obj
+
+    def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3):
+        """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
+        beginning with a JSON document) and return a 2-tuple of the Python
+        representation and the index in ``s`` where the document ended.
+        Optionally, ``idx`` can be used to specify an offset in ``s`` where
+        the JSON document begins.
+
+        This can be used to decode a JSON document from a string that may
+        have extraneous data at the end.
+
+        """
+        if _PY3 and not isinstance(s, text_type):
+            raise TypeError("Input string must be text, not bytes")
+        return self.scan_once(s, idx=_w(s, idx).end())
diff --git a/simplejson/encoder.py b/simplejson/encoder.py
new file mode 100644 (file)
index 0000000..9815ee5
--- /dev/null
@@ -0,0 +1,628 @@
+"""Implementation of JSONEncoder
+"""
+from __future__ import absolute_import
+import re
+from operator import itemgetter
+from decimal import Decimal
+from .compat import u, unichr, binary_type, string_types, integer_types, PY3
+def _import_speedups():
+    try:
+        from . import _speedups
+        return _speedups.encode_basestring_ascii, _speedups.make_encoder
+    except ImportError:
+        return None, None
+c_encode_basestring_ascii, c_make_encoder = _import_speedups()
+
+from simplejson.decoder import PosInf
+
+#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
+# This is required because u() will mangle the string and ur'' isn't valid
+# python3 syntax
+ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]')
+ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
+HAS_UTF8 = re.compile(r'[\x80-\xff]')
+ESCAPE_DCT = {
+    '\\': '\\\\',
+    '"': '\\"',
+    '\b': '\\b',
+    '\f': '\\f',
+    '\n': '\\n',
+    '\r': '\\r',
+    '\t': '\\t',
+}
+for i in range(0x20):
+    #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
+    ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
+for i in [0x2028, 0x2029]:
+    ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,))
+
+FLOAT_REPR = repr
+
+def encode_basestring(s, _PY3=PY3, _q=u('"')):
+    """Return a JSON representation of a Python string
+
+    """
+    if _PY3:
+        if isinstance(s, binary_type):
+            s = s.decode('utf-8')
+    else:
+        if isinstance(s, str) and HAS_UTF8.search(s) is not None:
+            s = s.decode('utf-8')
+    def replace(match):
+        return ESCAPE_DCT[match.group(0)]
+    return _q + ESCAPE.sub(replace, s) + _q
+
+
+def py_encode_basestring_ascii(s, _PY3=PY3):
+    """Return an ASCII-only JSON representation of a Python string
+
+    """
+    if _PY3:
+        if isinstance(s, binary_type):
+            s = s.decode('utf-8')
+    else:
+        if isinstance(s, str) and HAS_UTF8.search(s) is not None:
+            s = s.decode('utf-8')
+    def replace(match):
+        s = match.group(0)
+        try:
+            return ESCAPE_DCT[s]
+        except KeyError:
+            n = ord(s)
+            if n < 0x10000:
+                #return '\\u{0:04x}'.format(n)
+                return '\\u%04x' % (n,)
+            else:
+                # surrogate pair
+                n -= 0x10000
+                s1 = 0xd800 | ((n >> 10) & 0x3ff)
+                s2 = 0xdc00 | (n & 0x3ff)
+                #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
+                return '\\u%04x\\u%04x' % (s1, s2)
+    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
+
+
+encode_basestring_ascii = (
+    c_encode_basestring_ascii or py_encode_basestring_ascii)
+
+class JSONEncoder(object):
+    """Extensible JSON <http://json.org> encoder for Python data structures.
+
+    Supports the following objects and types by default:
+
+    +-------------------+---------------+
+    | Python            | JSON          |
+    +===================+===============+
+    | dict, namedtuple  | object        |
+    +-------------------+---------------+
+    | list, tuple       | array         |
+    +-------------------+---------------+
+    | str, unicode      | string        |
+    +-------------------+---------------+
+    | int, long, float  | number        |
+    +-------------------+---------------+
+    | True              | true          |
+    +-------------------+---------------+
+    | False             | false         |
+    +-------------------+---------------+
+    | None              | null          |
+    +-------------------+---------------+
+
+    To extend this to recognize other objects, subclass and implement a
+    ``.default()`` method with another method that returns a serializable
+    object for ``o`` if possible, otherwise it should call the superclass
+    implementation (to raise ``TypeError``).
+
+    """
+    item_separator = ', '
+    key_separator = ': '
+    def __init__(self, skipkeys=False, ensure_ascii=True,
+            check_circular=True, allow_nan=True, sort_keys=False,
+            indent=None, separators=None, encoding='utf-8', default=None,
+            use_decimal=True, namedtuple_as_object=True,
+            tuple_as_array=True, bigint_as_string=False,
+            item_sort_key=None, for_json=False, ignore_nan=False):
+        """Constructor for JSONEncoder, with sensible defaults.
+
+        If skipkeys is false, then it is a TypeError to attempt
+        encoding of keys that are not str, int, long, float or None.  If
+        skipkeys is True, such items are simply skipped.
+
+        If ensure_ascii is true, the output is guaranteed to be str
+        objects with all incoming unicode characters escaped.  If
+        ensure_ascii is false, the output will be unicode object.
+
+        If check_circular is true, then lists, dicts, and custom encoded
+        objects will be checked for circular references during encoding to
+        prevent an infinite recursion (which would cause an OverflowError).
+        Otherwise, no such check takes place.
+
+        If allow_nan is true, then NaN, Infinity, and -Infinity will be
+        encoded as such.  This behavior is not JSON specification compliant,
+        but is consistent with most JavaScript based encoders and decoders.
+        Otherwise, it will be a ValueError to encode such floats.
+
+        If sort_keys is true, then the output of dictionaries will be
+        sorted by key; this is useful for regression tests to ensure
+        that JSON serializations can be compared on a day-to-day basis.
+
+        If indent is a string, then JSON array elements and object members
+        will be pretty-printed with a newline followed by that string repeated
+        for each level of nesting. ``None`` (the default) selects the most compact
+        representation without any newlines. For backwards compatibility with
+        versions of simplejson earlier than 2.1.0, an integer is also accepted
+        and is converted to a string with that many spaces.
+
+        If specified, separators should be an (item_separator, key_separator)
+        tuple.  The default is (', ', ': ') if *indent* is ``None`` and
+        (',', ': ') otherwise.  To get the most compact JSON representation,
+        you should specify (',', ':') to eliminate whitespace.
+
+        If specified, default is a function that gets called for objects
+        that can't otherwise be serialized.  It should return a JSON encodable
+        version of the object or raise a ``TypeError``.
+
+        If encoding is not None, then all input strings will be
+        transformed into unicode using that encoding prior to JSON-encoding.
+        The default is UTF-8.
+
+        If use_decimal is true (not the default), ``decimal.Decimal`` will
+        be supported directly by the encoder. For the inverse, decode JSON
+        with ``parse_float=decimal.Decimal``.
+
+        If namedtuple_as_object is true (the default), objects with
+        ``_asdict()`` methods will be encoded as JSON objects.
+
+        If tuple_as_array is true (the default), tuple (and subclasses) will
+        be encoded as JSON arrays.
+
+        If bigint_as_string is true (not the default), ints 2**53 and higher
+        or lower than -2**53 will be encoded as strings. This is to avoid the
+        rounding that happens in Javascript otherwise.
+
+        If specified, item_sort_key is a callable used to sort the items in
+        each dictionary. This is useful if you want to sort items other than
+        in alphabetical order by key.
+
+        If for_json is true (not the default), objects with a ``for_json()``
+        method will use the return value of that method for encoding as JSON
+        instead of the object.
+
+        If *ignore_nan* is true (default: ``False``), then out of range
+        :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized
+        as ``null`` in compliance with the ECMA-262 specification. If true,
+        this will override *allow_nan*.
+
+        """
+
+        self.skipkeys = skipkeys
+        self.ensure_ascii = ensure_ascii
+        self.check_circular = check_circular
+        self.allow_nan = allow_nan
+        self.sort_keys = sort_keys
+        self.use_decimal = use_decimal
+        self.namedtuple_as_object = namedtuple_as_object
+        self.tuple_as_array = tuple_as_array
+        self.bigint_as_string = bigint_as_string
+        self.item_sort_key = item_sort_key
+        self.for_json = for_json
+        self.ignore_nan = ignore_nan
+        if indent is not None and not isinstance(indent, string_types):
+            indent = indent * ' '
+        self.indent = indent
+        if separators is not None:
+            self.item_separator, self.key_separator = separators
+        elif indent is not None:
+            self.item_separator = ','
+        if default is not None:
+            self.default = default
+        self.encoding = encoding
+
+    def default(self, o):
+        """Implement this method in a subclass such that it returns
+        a serializable object for ``o``, or calls the base implementation
+        (to raise a ``TypeError``).
+
+        For example, to support arbitrary iterators, you could
+        implement default like this::
+
+            def default(self, o):
+                try:
+                    iterable = iter(o)
+                except TypeError:
+                    pass
+                else:
+                    return list(iterable)
+                return JSONEncoder.default(self, o)
+
+        """
+        raise TypeError(repr(o) + " is not JSON serializable")
+
+    def encode(self, o):
+        """Return a JSON string representation of a Python data structure.
+
+        >>> from simplejson import JSONEncoder
+        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
+        '{"foo": ["bar", "baz"]}'
+
+        """
+        # This is for extremely simple cases and benchmarks.
+        if isinstance(o, binary_type):
+            _encoding = self.encoding
+            if (_encoding is not None and not (_encoding == 'utf-8')):
+                o = o.decode(_encoding)
+        if isinstance(o, string_types):
+            if self.ensure_ascii:
+                return encode_basestring_ascii(o)
+            else:
+                return encode_basestring(o)
+        # This doesn't pass the iterator directly to ''.join() because the
+        # exceptions aren't as detailed.  The list call should be roughly
+        # equivalent to the PySequence_Fast that ''.join() would do.
+        chunks = self.iterencode(o, _one_shot=True)
+        if not isinstance(chunks, (list, tuple)):
+            chunks = list(chunks)
+        if self.ensure_ascii:
+            return ''.join(chunks)
+        else:
+            return u''.join(chunks)
+
+    def iterencode(self, o, _one_shot=False):
+        """Encode the given object and yield each string
+        representation as available.
+
+        For example::
+
+            for chunk in JSONEncoder().iterencode(bigobject):
+                mysocket.write(chunk)
+
+        """
+        if self.check_circular:
+            markers = {}
+        else:
+            markers = None
+        if self.ensure_ascii:
+            _encoder = encode_basestring_ascii
+        else:
+            _encoder = encode_basestring
+        if self.encoding != 'utf-8':
+            def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
+                if isinstance(o, binary_type):
+                    o = o.decode(_encoding)
+                return _orig_encoder(o)
+
+        def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan,
+                _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
+            # Check for specials. Note that this type of test is processor
+            # and/or platform-specific, so do tests which don't depend on
+            # the internals.
+
+            if o != o:
+                text = 'NaN'
+            elif o == _inf:
+                text = 'Infinity'
+            elif o == _neginf:
+                text = '-Infinity'
+            else:
+                return _repr(o)
+
+            if ignore_nan:
+                text = 'null'
+            elif not allow_nan:
+                raise ValueError(
+                    "Out of range float values are not JSON compliant: " +
+                    repr(o))
+
+            return text
+
+
+        key_memo = {}
+        if (_one_shot and c_make_encoder is not None
+                and self.indent is None):
+            _iterencode = c_make_encoder(
+                markers, self.default, _encoder, self.indent,
+                self.key_separator, self.item_separator, self.sort_keys,
+                self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
+                self.namedtuple_as_object, self.tuple_as_array,
+                self.bigint_as_string, self.item_sort_key,
+                self.encoding, self.for_json, self.ignore_nan,
+                Decimal)
+        else:
+            _iterencode = _make_iterencode(
+                markers, self.default, _encoder, self.indent, floatstr,
+                self.key_separator, self.item_separator, self.sort_keys,
+                self.skipkeys, _one_shot, self.use_decimal,
+                self.namedtuple_as_object, self.tuple_as_array,
+                self.bigint_as_string, self.item_sort_key,
+                self.encoding, self.for_json,
+                Decimal=Decimal)
+        try:
+            return _iterencode(o, 0)
+        finally:
+            key_memo.clear()
+
+
+class JSONEncoderForHTML(JSONEncoder):
+    """An encoder that produces JSON safe to embed in HTML.
+
+    To embed JSON content in, say, a script tag on a web page, the
+    characters &, < and > should be escaped. They cannot be escaped
+    with the usual entities (e.g. &amp;) because they are not expanded
+    within <script> tags.
+    """
+
+    def encode(self, o):
+        # Override JSONEncoder.encode because it has hacks for
+        # performance that make things more complicated.
+        chunks = self.iterencode(o, True)
+        if self.ensure_ascii:
+            return ''.join(chunks)
+        else:
+            return u''.join(chunks)
+
+    def iterencode(self, o, _one_shot=False):
+        chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
+        for chunk in chunks:
+            chunk = chunk.replace('&', '\\u0026')
+            chunk = chunk.replace('<', '\\u003c')
+            chunk = chunk.replace('>', '\\u003e')
+            yield chunk
+
+
+def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
+        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
+        _use_decimal, _namedtuple_as_object, _tuple_as_array,
+        _bigint_as_string, _item_sort_key, _encoding, _for_json,
+        ## HACK: hand-optimized bytecode; turn globals into locals
+        _PY3=PY3,
+        ValueError=ValueError,
+        string_types=string_types,
+        Decimal=Decimal,
+        dict=dict,
+        float=float,
+        id=id,
+        integer_types=integer_types,
+        isinstance=isinstance,
+        list=list,
+        str=str,
+        tuple=tuple,
+    ):
+    if _item_sort_key and not callable(_item_sort_key):
+        raise TypeError("item_sort_key must be None or callable")
+    elif _sort_keys and not _item_sort_key:
+        _item_sort_key = itemgetter(0)
+
+    def _iterencode_list(lst, _current_indent_level):
+        if not lst:
+            yield '[]'
+            return
+        if markers is not None:
+            markerid = id(lst)
+            if markerid in markers:
+                raise ValueError("Circular reference detected")
+            markers[markerid] = lst
+        buf = '['
+        if _indent is not None:
+            _current_indent_level += 1
+            newline_indent = '\n' + (_indent * _current_indent_level)
+            separator = _item_separator + newline_indent
+            buf += newline_indent
+        else:
+            newline_indent = None
+            separator = _item_separator
+        first = True
+        for value in lst:
+            if first:
+                first = False
+            else:
+                buf = separator
+            if (isinstance(value, string_types) or
+                (_PY3 and isinstance(value, binary_type))):
+                yield buf + _encoder(value)
+            elif value is None:
+                yield buf + 'null'
+            elif value is True:
+                yield buf + 'true'
+            elif value is False:
+                yield buf + 'false'
+            elif isinstance(value, integer_types):
+                yield ((buf + str(value))
+                       if (not _bigint_as_string or
+                           (-1 << 53) < value < (1 << 53))
+                           else (buf + '"' + str(value) + '"'))
+            elif isinstance(value, float):
+                yield buf + _floatstr(value)
+            elif _use_decimal and isinstance(value, Decimal):
+                yield buf + str(value)
+            else:
+                yield buf
+                for_json = _for_json and getattr(value, 'for_json', None)
+                if for_json and callable(for_json):
+                    chunks = _iterencode(for_json(), _current_indent_level)
+                elif isinstance(value, list):
+                    chunks = _iterencode_list(value, _current_indent_level)
+                else:
+                    _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
+                    if _asdict and callable(_asdict):
+                        chunks = _iterencode_dict(_asdict(),
+                                                  _current_indent_level)
+                    elif _tuple_as_array and isinstance(value, tuple):
+                        chunks = _iterencode_list(value, _current_indent_level)
+                    elif isinstance(value, dict):
+                        chunks = _iterencode_dict(value, _current_indent_level)
+                    else:
+                        chunks = _iterencode(value, _current_indent_level)
+                for chunk in chunks:
+                    yield chunk
+        if newline_indent is not None:
+            _current_indent_level -= 1
+            yield '\n' + (_indent * _current_indent_level)
+        yield ']'
+        if markers is not None:
+            del markers[markerid]
+
+    def _stringify_key(key):
+        if isinstance(key, string_types): # pragma: no cover
+            pass
+        elif isinstance(key, binary_type):
+            key = key.decode(_encoding)
+        elif isinstance(key, float):
+            key = _floatstr(key)
+        elif key is True:
+            key = 'true'
+        elif key is False:
+            key = 'false'
+        elif key is None:
+            key = 'null'
+        elif isinstance(key, integer_types):
+            key = str(key)
+        elif _use_decimal and isinstance(key, Decimal):
+            key = str(key)
+        elif _skipkeys:
+            key = None
+        else:
+            raise TypeError("key " + repr(key) + " is not a string")
+        return key
+
+    def _iterencode_dict(dct, _current_indent_level):
+        if not dct:
+            yield '{}'
+            return
+        if markers is not None:
+            markerid = id(dct)
+            if markerid in markers:
+                raise ValueError("Circular reference detected")
+            markers[markerid] = dct
+        yield '{'
+        if _indent is not None:
+            _current_indent_level += 1
+            newline_indent = '\n' + (_indent * _current_indent_level)
+            item_separator = _item_separator + newline_indent
+            yield newline_indent
+        else:
+            newline_indent = None
+            item_separator = _item_separator
+        first = True
+        if _PY3:
+            iteritems = dct.items()
+        else:
+            iteritems = dct.iteritems()
+        if _item_sort_key:
+            items = []
+            for k, v in dct.items():
+                if not isinstance(k, string_types):
+                    k = _stringify_key(k)
+                    if k is None:
+                        continue
+                items.append((k, v))
+            items.sort(key=_item_sort_key)
+        else:
+            items = iteritems
+        for key, value in items:
+            if not (_item_sort_key or isinstance(key, string_types)):
+                key = _stringify_key(key)
+                if key is None:
+                    # _skipkeys must be True
+                    continue
+            if first:
+                first = False
+            else:
+                yield item_separator
+            yield _encoder(key)
+            yield _key_separator
+            if (isinstance(value, string_types) or
+                (_PY3 and isinstance(value, binary_type))):
+                yield _encoder(value)
+            elif value is None:
+                yield 'null'
+            elif value is True:
+                yield 'true'
+            elif value is False:
+                yield 'false'
+            elif isinstance(value, integer_types):
+                yield (str(value)
+                       if (not _bigint_as_string or
+                           (-1 << 53) < value < (1 << 53))
+                           else ('"' + str(value) + '"'))
+            elif isinstance(value, float):
+                yield _floatstr(value)
+            elif _use_decimal and isinstance(value, Decimal):
+                yield str(value)
+            else:
+                for_json = _for_json and getattr(value, 'for_json', None)
+                if for_json and callable(for_json):
+                    chunks = _iterencode(for_json(), _current_indent_level)
+                elif isinstance(value, list):
+                    chunks = _iterencode_list(value, _current_indent_level)
+                else:
+                    _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
+                    if _asdict and callable(_asdict):
+                        chunks = _iterencode_dict(_asdict(),
+                                                  _current_indent_level)
+                    elif _tuple_as_array and isinstance(value, tuple):
+                        chunks = _iterencode_list(value, _current_indent_level)
+                    elif isinstance(value, dict):
+                        chunks = _iterencode_dict(value, _current_indent_level)
+                    else:
+                        chunks = _iterencode(value, _current_indent_level)
+                for chunk in chunks:
+                    yield chunk
+        if newline_indent is not None:
+            _current_indent_level -= 1
+            yield '\n' + (_indent * _current_indent_level)
+        yield '}'
+        if markers is not None:
+            del markers[markerid]
+
+    def _iterencode(o, _current_indent_level):
+        if (isinstance(o, string_types) or
+            (_PY3 and isinstance(o, binary_type))):
+            yield _encoder(o)
+        elif o is None:
+            yield 'null'
+        elif o is True:
+            yield 'true'
+        elif o is False:
+            yield 'false'
+        elif isinstance(o, integer_types):
+            yield (str(o)
+                   if (not _bigint_as_string or
+                       (-1 << 53) < o < (1 << 53))
+                       else ('"' + str(o) + '"'))
+        elif isinstance(o, float):
+            yield _floatstr(o)
+        else:
+            for_json = _for_json and getattr(o, 'for_json', None)
+            if for_json and callable(for_json):
+                for chunk in _iterencode(for_json(), _current_indent_level):
+                    yield chunk
+            elif isinstance(o, list):
+                for chunk in _iterencode_list(o, _current_indent_level):
+                    yield chunk
+            else:
+                _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
+                if _asdict and callable(_asdict):
+                    for chunk in _iterencode_dict(_asdict(),
+                            _current_indent_level):
+                        yield chunk
+                elif (_tuple_as_array and isinstance(o, tuple)):
+                    for chunk in _iterencode_list(o, _current_indent_level):
+                        yield chunk
+                elif isinstance(o, dict):
+                    for chunk in _iterencode_dict(o, _current_indent_level):
+                        yield chunk
+                elif _use_decimal and isinstance(o, Decimal):
+                    yield str(o)
+                else:
+                    if markers is not None:
+                        markerid = id(o)
+                        if markerid in markers:
+                            raise ValueError("Circular reference detected")
+                        markers[markerid] = o
+                    o = _default(o)
+                    for chunk in _iterencode(o, _current_indent_level):
+                        yield chunk
+                    if markers is not None:
+                        del markers[markerid]
+
+    return _iterencode
diff --git a/simplejson/ordered_dict.py b/simplejson/ordered_dict.py
new file mode 100644 (file)
index 0000000..87ad888
--- /dev/null
@@ -0,0 +1,119 @@
+"""Drop-in replacement for collections.OrderedDict by Raymond Hettinger
+
+http://code.activestate.com/recipes/576693/
+
+"""
+from UserDict import DictMixin
+
+# Modified from original to support Python 2.4, see
+# http://code.google.com/p/simplejson/issues/detail?id=53
+try:
+    all
+except NameError:
+    def all(seq):
+        for elem in seq:
+            if not elem:
+                return False
+        return True
+
+class OrderedDict(dict, DictMixin):
+
+    def __init__(self, *args, **kwds):
+        if len(args) > 1:
+            raise TypeError('expected at most 1 arguments, got %d' % len(args))
+        try:
+            self.__end
+        except AttributeError:
+            self.clear()
+        self.update(*args, **kwds)
+
+    def clear(self):
+        self.__end = end = []
+        end += [None, end, end]         # sentinel node for doubly linked list
+        self.__map = {}                 # key --> [key, prev, next]
+        dict.clear(self)
+
+    def __setitem__(self, key, value):
+        if key not in self:
+            end = self.__end
+            curr = end[1]
+            curr[2] = end[1] = self.__map[key] = [key, curr, end]
+        dict.__setitem__(self, key, value)
+
+    def __delitem__(self, key):
+        dict.__delitem__(self, key)
+        key, prev, next = self.__map.pop(key)
+        prev[2] = next
+        next[1] = prev
+
+    def __iter__(self):
+        end = self.__end
+        curr = end[2]
+        while curr is not end:
+            yield curr[0]
+            curr = curr[2]
+
+    def __reversed__(self):
+        end = self.__end
+        curr = end[1]
+        while curr is not end:
+            yield curr[0]
+            curr = curr[1]
+
+    def popitem(self, last=True):
+        if not self:
+            raise KeyError('dictionary is empty')
+        # Modified from original to support Python 2.4, see
+        # http://code.google.com/p/simplejson/issues/detail?id=53
+        if last:
+            key = reversed(self).next()
+        else:
+            key = iter(self).next()
+        value = self.pop(key)
+        return key, value
+
+    def __reduce__(self):
+        items = [[k, self[k]] for k in self]
+        tmp = self.__map, self.__end
+        del self.__map, self.__end
+        inst_dict = vars(self).copy()
+        self.__map, self.__end = tmp
+        if inst_dict:
+            return (self.__class__, (items,), inst_dict)
+        return self.__class__, (items,)
+
+    def keys(self):
+        return list(self)
+
+    setdefault = DictMixin.setdefault
+    update = DictMixin.update
+    pop = DictMixin.pop
+    values = DictMixin.values
+    items = DictMixin.items
+    iterkeys = DictMixin.iterkeys
+    itervalues = DictMixin.itervalues
+    iteritems = DictMixin.iteritems
+
+    def __repr__(self):
+        if not self:
+            return '%s()' % (self.__class__.__name__,)
+        return '%s(%r)' % (self.__class__.__name__, self.items())
+
+    def copy(self):
+        return self.__class__(self)
+
+    @classmethod
+    def fromkeys(cls, iterable, value=None):
+        d = cls()
+        for key in iterable:
+            d[key] = value
+        return d
+
+    def __eq__(self, other):
+        if isinstance(other, OrderedDict):
+            return len(self)==len(other) and \
+                   all(p==q for p, q in  zip(self.items(), other.items()))
+        return dict.__eq__(self, other)
+
+    def __ne__(self, other):
+        return not self == other
diff --git a/simplejson/scanner.py b/simplejson/scanner.py
new file mode 100644 (file)
index 0000000..b7918b3
--- /dev/null
@@ -0,0 +1,128 @@
+"""JSON token scanner
+"""
+import re
+def _import_c_make_scanner():
+    try:
+        from simplejson._speedups import make_scanner
+        return make_scanner
+    except ImportError:
+        return None
+c_make_scanner = _import_c_make_scanner()
+
+__all__ = ['make_scanner', 'JSONDecodeError']
+
+NUMBER_RE = re.compile(
+    r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
+    (re.VERBOSE | re.MULTILINE | re.DOTALL))
+
+class JSONDecodeError(ValueError):
+    """Subclass of ValueError with the following additional properties:
+
+    msg: The unformatted error message
+    doc: The JSON document being parsed
+    pos: The start index of doc where parsing failed
+    end: The end index of doc where parsing failed (may be None)
+    lineno: The line corresponding to pos
+    colno: The column corresponding to pos
+    endlineno: The line corresponding to end (may be None)
+    endcolno: The column corresponding to end (may be None)
+
+    """
+    # Note that this exception is used from _speedups
+    def __init__(self, msg, doc, pos, end=None):
+        ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
+        self.msg = msg
+        self.doc = doc
+        self.pos = pos
+        self.end = end
+        self.lineno, self.colno = linecol(doc, pos)
+        if end is not None:
+            self.endlineno, self.endcolno = linecol(doc, end)
+        else:
+            self.endlineno, self.endcolno = None, None
+
+    def __reduce__(self):
+        return self.__class__, (self.msg, self.doc, self.pos, self.end)
+
+
+def linecol(doc, pos):
+    lineno = doc.count('\n', 0, pos) + 1
+    if lineno == 1:
+        colno = pos + 1
+    else:
+        colno = pos - doc.rindex('\n', 0, pos)
+    return lineno, colno
+
+
+def errmsg(msg, doc, pos, end=None):
+    lineno, colno = linecol(doc, pos)
+    msg = msg.replace('%r', repr(doc[pos:pos + 1]))
+    if end is None:
+        fmt = '%s: line %d column %d (char %d)'
+        return fmt % (msg, lineno, colno, pos)
+    endlineno, endcolno = linecol(doc, end)
+    fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
+    return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
+
+
+def py_make_scanner(context):
+    parse_object = context.parse_object
+    parse_array = context.parse_array
+    parse_string = context.parse_string
+    match_number = NUMBER_RE.match
+    encoding = context.encoding
+    strict = context.strict
+    parse_float = context.parse_float
+    parse_int = context.parse_int
+    parse_constant = context.parse_constant
+    object_hook = context.object_hook
+    object_pairs_hook = context.object_pairs_hook
+    memo = context.memo
+
+    def _scan_once(string, idx):
+        errmsg = 'Expecting value'
+        try:
+            nextchar = string[idx]
+        except IndexError:
+            raise JSONDecodeError(errmsg, string, idx)
+
+        if nextchar == '"':
+            return parse_string(string, idx + 1, encoding, strict)
+        elif nextchar == '{':
+            return parse_object((string, idx + 1), encoding, strict,
+                _scan_once, object_hook, object_pairs_hook, memo)
+        elif nextchar == '[':
+            return parse_array((string, idx + 1), _scan_once)
+        elif nextchar == 'n' and string[idx:idx + 4] == 'null':
+            return None, idx + 4
+        elif nextchar == 't' and string[idx:idx + 4] == 'true':
+            return True, idx + 4
+        elif nextchar == 'f' and string[idx:idx + 5] == 'false':
+            return False, idx + 5
+
+        m = match_number(string, idx)
+        if m is not None:
+            integer, frac, exp = m.groups()
+            if frac or exp:
+                res = parse_float(integer + (frac or '') + (exp or ''))
+            else:
+                res = parse_int(integer)
+            return res, m.end()
+        elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
+            return parse_constant('NaN'), idx + 3
+        elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
+            return parse_constant('Infinity'), idx + 8
+        elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
+            return parse_constant('-Infinity'), idx + 9
+        else:
+            raise JSONDecodeError(errmsg, string, idx)
+
+    def scan_once(string, idx):
+        try:
+            return _scan_once(string, idx)
+        finally:
+            memo.clear()
+
+    return scan_once
+
+make_scanner = c_make_scanner or py_make_scanner
diff --git a/simplejson/tool.py b/simplejson/tool.py
new file mode 100644 (file)
index 0000000..062e8e2
--- /dev/null
@@ -0,0 +1,42 @@
+r"""Command-line tool to validate and pretty-print JSON
+
+Usage::
+
+    $ echo '{"json":"obj"}' | python -m simplejson.tool
+    {
+        "json": "obj"
+    }
+    $ echo '{ 1.2:3.4}' | python -m simplejson.tool
+    Expecting property name: line 1 column 2 (char 2)
+
+"""
+from __future__ import with_statement
+import sys
+import simplejson as json
+
+def main():
+    if len(sys.argv) == 1:
+        infile = sys.stdin
+        outfile = sys.stdout
+    elif len(sys.argv) == 2:
+        infile = open(sys.argv[1], 'r')
+        outfile = sys.stdout
+    elif len(sys.argv) == 3:
+        infile = open(sys.argv[1], 'r')
+        outfile = open(sys.argv[2], 'w')
+    else:
+        raise SystemExit(sys.argv[0] + " [infile [outfile]]")
+    with infile:
+        try:
+            obj = json.load(infile,
+                            object_pairs_hook=json.OrderedDict,
+                            use_decimal=True)
+        except ValueError:
+            raise SystemExit(sys.exc_info()[1])
+    with outfile:
+        json.dump(obj, outfile, sort_keys=True, indent='    ', use_decimal=True)
+        outfile.write('\n')
+
+
+if __name__ == '__main__':
+    main()

Benjamin Mako Hill || Want to submit a patch?