1 # python-simplemediawiki - Extremely low-level wrapper to the MediaWiki API
2 # Copyright (C) 2010 Red Hat, Inc.
4 # This library is free software; you can redistribute it and/or modify it under
5 # the terms of the GNU Lesser General Public License as published by the Free
6 # Software Foundation; either version 2.1 of the License, or (at your option)
9 # This library is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
14 # You should have received a copy of the GNU General Public License along with
15 # this program. If not, see <http://www.gnu.org/licenses/>.
18 simplemediawiki is an extremely low-level wrapper to the MediaWiki API. It
19 automatically handles cookies and gzip compression so that you can make basic
20 calls to the API in the easiest way possible. It also provides a few functions
21 to make day-to-day API access easier.
23 To use this module, instantiate a MediaWiki object, passing it the URL of
24 api.php for the wiki you want to work with. Calls go through MediaWiki.call().
25 A generic login wrapper as well as functions to determine limits and get a list
26 of namespaces are provided for your convenience.
28 >>> from simplemediawiki import MediaWiki
29 >>> wiki = MediaWiki('http://en.wikipedia.org/w/api.php')
30 >>> wiki.call({'action': 'query', 'prop': 'revisions', 'titles': 'Main Page'})
31 {u'query': {u'pages': {...}}}
38 from StringIO import StringIO
42 __author__ = 'Ian Weller <ian@ianweller.org>'
44 DEFAULT_UA = ('python-simplemediawiki/%s '
45 '+https://github.com/ianweller/python-simplemediawiki') \
51 Class to represent a MediaWiki installation with an enabled API.
53 api_url: URL to api.php (usually similar to http://example.com/w/api.php)
57 _psuedo_namespaces = None
58 _mediawiki_version = None
60 def __init__(self, api_url, cookie_file=None, user_agent=DEFAULT_UA):
61 self._api_url = api_url
63 self._cj = cookielib.MozillaCookieJar(cookie_file)
70 self._cj = cookielib.CookieJar()
71 self._opener = urllib2.build_opener(
72 urllib2.HTTPCookieProcessor(self._cj)
74 self._opener.addheaders = [('User-agent', user_agent)]
76 def _fetch_http(self, url, params):
78 Standard HTTP request handler for this class with gzip and cookie
81 request = urllib2.Request(url, urllib.urlencode(params))
82 request.add_header('Accept-encoding', 'gzip')
83 response = self._opener.open(request)
84 if isinstance(self._cj, cookielib.MozillaCookieJar):
86 if response.headers.get('Content-Encoding') == 'gzip':
87 compressed = StringIO(response.read())
88 gzipper = gzip.GzipFile(fileobj=compressed)
91 data = response.read()
94 def call(self, params):
96 Make a call to the wiki. Returns a dictionary that represents the JSON
99 params['format'] = 'json'
100 return json.loads(self._fetch_http(self._api_url, params))
102 def normalize_api_url(self):
104 This function checks the given URL for a correct API endpoint and
105 returns that URL, while also helpfully setting this object's API URL to
106 it. If it can't magically conjure an API endpoint, it returns False.
108 def tester(self, api_url):
110 Attempts to fetch general information about the MediaWiki instance
111 in order to test whether the given URL will return JSON.
113 data = self._fetch_http(api_url, {'action': 'query',
118 data_json = json.loads(data)
119 # may as well set the version
121 version_string = data_json['query']['general']['generator']
122 self._mediawiki_version = version_string.split(' ', 1)[1]
125 return (data, data_json)
129 data, data_json = tester(self, self._api_url)
133 # if there's an index.php in the URL, we might find the API
134 if 'index.php' in self._api_url:
135 test_api_url = self._api_url.split('index.php')[0] + 'api.php'
137 test_data, test_data_json = tester(self, test_api_url)
138 print (test_data, test_data_json)
140 self._api_url = test_api_url
145 def login(self, user, passwd, token=None):
147 Convenience function for logging into the wiki. It should never be
148 necessary to provide a token argument; it is part of the login process
149 since MediaWiki 1.15.3 (see MediaWiki bug 23076).
151 data = {'action': 'login',
153 'lgpassword': passwd}
155 data['lgtoken'] = token
156 result = self.call(data)
157 if result['login']['result'] == 'Success':
158 self._high_limits = None
160 elif result['login']['result'] == 'NeedToken' and not token:
161 return self.login(user, passwd, result['login']['token'])
167 Conveinence function for logging out of the wiki.
169 data = {'action': 'logout'}
171 self._high_limits = None
174 def limits(self, low, high):
176 Convenience function for determining appropriate limits in the API. If
177 the logged in user has the "apihighlimits" right, it will return the
178 high argument; otherwise it will return the low argument.
180 if self._high_limits == None:
181 result = self.call({'action': 'query',
184 self._high_limits = 'apihighlimits' in \
185 result['query']['userinfo']['rights']
186 if self._high_limits:
191 def namespaces(self, psuedo=True):
193 Fetches a list of namespaces for this wiki.
195 if self._namespaces == None:
196 result = self.call({'action': 'query',
198 'siprop': 'namespaces'})
199 self._namespaces = {}
200 self._psuedo_namespaces = {}
201 for nsid in result['query']['namespaces']:
203 self._namespaces[int(nsid)] = \
204 result['query']['namespaces'][nsid]['*']
206 self._psuedo_namespaces[int(nsid)] = \
207 result['query']['namespaces'][nsid]['*']
210 retval.update(self._namespaces)
211 retval.update(self._psuedo_namespaces)
214 return self._namespaces
217 def parse_date(date):
219 Converts dates provided by the MediaWiki API into datetime.datetime
222 return iso8601.parse_date(date)