1 # python-simplemediawiki - Extremely low-level wrapper to the MediaWiki API
2 # Copyright (C) 2010 Red Hat, Inc.
4 # This library is free software; you can redistribute it and/or modify it under
5 # the terms of the GNU Lesser General Public License as published by the Free
6 # Software Foundation; either version 2.1 of the License, or (at your option)
9 # This library is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
14 # You should have received a copy of the GNU General Public License along with
15 # this program. If not, see <http://www.gnu.org/licenses/>.
18 simplemediawiki is an extremely low-level wrapper to the MediaWiki API. It
19 automatically handles cookies and gzip compression so that you can make basic
20 calls to the API in the easiest way possible. It also provides a few functions
21 to make day-to-day API access easier.
23 To use this module, instantiate a MediaWiki object, passing it the URL of
24 api.php for the wiki you want to work with. Calls go through MediaWiki.call().
25 A generic login wrapper as well as functions to determine limits and get a list
26 of namespaces are provided for your convenience.
28 >>> from simplemediawiki import MediaWiki
29 >>> wiki = MediaWiki('http://en.wikipedia.org/w/api.php')
30 >>> wiki.call({'action': 'query', 'prop': 'revisions', 'titles': 'Main Page'})
31 {u'query': {u'pages': {...}}}
36 import iso8601.iso8601 as iso8601
38 from StringIO import StringIO
42 __author__ = 'Ian Weller <ian@ianweller.org>'
44 DEFAULT_UA = ('python-simplemediawiki/%s '
45 '+https://github.com/ianweller/python-simplemediawiki') \
51 Class to represent a MediaWiki installation with an enabled API.
53 api_url: URL to api.php (usually similar to http://example.com/w/api.php)
57 _psuedo_namespaces = None
58 _mediawiki_version = None
60 def __init__(self, api_url, cookie_file=None, user_agent=DEFAULT_UA):
61 self._api_url = api_url
63 self._cj = cookielib.MozillaCookieJar(cookie_file)
70 self._cj = cookielib.CookieJar()
71 self._opener = urllib2.build_opener(
72 urllib2.HTTPCookieProcessor(self._cj)
74 self._opener.addheaders = [('User-agent', user_agent)]
76 def _fetch_http(self, url, params):
77 request = urllib2.Request(url, urllib.urlencode(params))
78 request.add_header('Accept-encoding', 'gzip')
79 response = self._opener.open(request)
80 if isinstance(self._cj, cookielib.MozillaCookieJar):
82 if response.headers.get('Content-Encoding') == 'gzip':
83 compressed = StringIO(response.read())
84 gzipper = gzip.GzipFile(fileobj=compressed)
87 data = response.read()
90 def call(self, params):
92 Make a call to the wiki. Returns a dictionary that represents the JSON
95 params['format'] = 'json'
96 return json.loads(self._fetch_http(self._api_url, params))
98 def normalize_api_url(self):
100 This function checks the given URL for a correct API endpoint and
101 returns that URL, while also helpfully setting this object's API URL to
102 it. If it can't magically conjure an API endpoint, it returns False.
104 data, data_json = self._normalize_api_url_tester(self._api_url)
108 # if there's an index.php in the URL, we might find the API
109 if 'index.php' in self._api_url:
110 test_api_url = self._api_url.split('index.php')[0] + 'api.php'
112 test_data, test_data_json = \
113 self._normalize_api_url_tester(test_api_url)
114 print (test_data, test_data_json)
116 self._api_url = test_api_url
120 def _normalize_api_url_tester(self, api_url):
121 data = self._fetch_http(api_url, {'action': 'query',
126 data_json = json.loads(data)
127 # may as well set the version
129 version_string = data_json['query']['general']['generator']
130 self._mediawiki_version = version_string.split(' ', 1)[1]
133 return (data, data_json)
137 def login(self, user, passwd, token=None):
139 Convenience function for logging into the wiki. It should never be
140 necessary to provide a token argument; it is part of the login process
141 since MediaWiki 1.15.3 (see MediaWiki bug 23076).
143 data = {'action': 'login',
145 'lgpassword': passwd}
147 data['lgtoken'] = token
148 result = self.call(data)
149 if result['login']['result'] == 'Success':
150 self._high_limits = None
152 elif result['login']['result'] == 'NeedToken' and not token:
153 return self.login(user, passwd, result['login']['token'])
159 Conveinence function for logging out of the wiki.
161 data = {'action': 'logout'}
163 self._high_limits = None
166 def limits(self, low, high):
168 Convenience function for determining appropriate limits in the API. If
169 the logged in user has the "apihighlimits" right, it will return the
170 high argument; otherwise it will return the low argument.
172 if self._high_limits == None:
173 result = self.call({'action': 'query',
176 self._high_limits = 'apihighlimits' in \
177 result['query']['userinfo']['rights']
178 if self._high_limits:
183 def namespaces(self, psuedo=True):
185 Fetches a list of namespaces for this wiki.
187 if self._namespaces == None:
188 result = self.call({'action': 'query',
190 'siprop': 'namespaces'})
191 self._namespaces = {}
192 self._psuedo_namespaces = {}
193 for nsid in result['query']['namespaces']:
195 self._namespaces[int(nsid)] = \
196 result['query']['namespaces'][nsid]['*']
198 self._psuedo_namespaces[int(nsid)] = \
199 result['query']['namespaces'][nsid]['*']
202 retval.update(self._namespaces)
203 retval.update(self._psuedo_namespaces)
206 return self._namespaces
209 def parse_date(date):
211 Converts dates provided by the MediaWiki API into datetime.datetime
214 return iso8601.parse_date(date)