0e5a1c9b5e69fd8c21b740b2f54b2dd43788f96f
[wikipedia-api-cdsw] / mwclient / page.py
1 import client
2 import errors
3 import listing
4 import compatibility
5 from page_nowriteapi import OldPage
6
7 import urllib
8 import urlparse
9 import time
10
11
12 class Page(object):
13
14     def __init__(self, site, name, info=None, extra_properties={}):
15         if type(name) is type(self):
16             return self.__dict__.update(name.__dict__)
17         self.site = site
18         self.name = name
19         self.section = None
20
21         if not info:
22             if extra_properties:
23                 prop = 'info|' + '|'.join(extra_properties.iterkeys())
24                 extra_props = []
25                 [extra_props.extend(extra_prop) for extra_prop in extra_properties.itervalues()]
26             else:
27                 prop = 'info'
28                 extra_props = ()
29
30             if type(name) is int:
31                 info = self.site.api('query', prop=prop, pageids=name,
32                                      inprop='protection', *extra_props)
33             else:
34                 info = self.site.api('query', prop=prop, titles=name,
35                                      inprop='protection', *extra_props)
36             info = info['query']['pages'].itervalues().next()
37         self._info = info
38
39         self.namespace = info.get('ns', 0)
40         self.name = info.get('title', u'')
41         if self.namespace:
42             self.page_title = self.strip_namespace(self.name)
43         else:
44             self.page_title = self.name
45
46         self.touched = client.parse_timestamp(info.get('touched', '0000-00-00T00:00:00Z'))
47         self.revision = info.get('lastrevid', 0)
48         self.exists = 'missing' not in info
49         self.length = info.get('length')
50         self.protection = dict([(i['type'], (i['level'], i['expiry'])) for i in info.get('protection', ()) if i])
51         self.redirect = 'redirect' in info
52
53         self.last_rev_time = None
54         self.edit_time = None
55
56     def redirects_to(self):
57         """ Returns the redirect target page, or None if the page is not a redirect page."""
58         info = self.site.api('query', prop='pageprops', titles=self.name, redirects='')['query']
59         if 'redirects' in info:
60             for page in info['redirects']:
61                 if page['from'] == self.name:
62                     return Page(self.site, page['to'])
63             return None
64         else:
65             return None
66
67     def resolve_redirect(self):
68         """ Returns the redirect target page, or the current page if it's not a redirect page."""
69         target_page = self.redirects_to()
70         if target_page is None:
71             return self
72         else:
73             return target_page
74
75     def __repr__(self):
76         return "<Page object '%s' for %s>" % (self.name.encode('utf-8'), self.site)
77
78     def __unicode__(self):
79         return self.name
80
81     @staticmethod
82     def strip_namespace(title):
83         if title[0] == ':':
84             title = title[1:]
85         return title[title.find(':') + 1:]
86
87     @staticmethod
88     def normalize_title(title):
89         # TODO: Make site dependent
90         title = title.strip()
91         if title[0] == ':':
92             title = title[1:]
93         title = title[0].upper() + title[1:]
94         title = title.replace(' ', '_')
95         return title
96
97     def can(self, action):
98         level = self.protection.get(action, (action, ))[0]
99         if level == 'sysop':
100             level = compatibility.protectright(self.site.version)
101
102         return level in self.site.rights
103
104     def get_token(self, type, force=False):
105         self.site.require(1, 11)
106
107         if type not in self.site.tokens:
108             self.site.tokens[type] = '0'
109         if self.site.tokens.get(type, '0') == '0' or force:
110             info = self.site.api('query', titles=self.name,
111                                  prop='info', intoken=type)
112             for i in info['query']['pages'].itervalues():
113                 if i['title'] == self.name:
114                     self.site.tokens[type] = i['%stoken' % type]
115         return self.site.tokens[type]
116
117     def get_expanded(self):
118         self.site.require(1, 12)
119
120         revs = self.revisions(prop='content', limit=1, expandtemplates=True)
121         try:
122             return revs.next()['*']
123         except StopIteration:
124             return u''
125
126     def edit(self, section=None, readonly=False):
127         """Returns wikitext for a specified section or for the whole page.
128
129         Retrieves the latest edit.
130
131         """
132         if not self.can('read'):
133             raise errors.InsufficientPermission(self)
134         if not self.exists:
135             return u''
136
137         revs = self.revisions(prop='content|timestamp', limit=1, section=section)
138         try:
139             rev = revs.next()
140             self.text = rev['*']
141             self.section = section
142             self.last_rev_time = rev['timestamp']
143         except StopIteration:
144             self.text = u''
145             self.section = None
146             self.edit_time = None
147         self.edit_time = time.gmtime()
148         return self.text
149
150     def save(self, text=u'', summary=u'', minor=False, bot=True, section=None, **kwargs):
151         """Save text of page."""
152         if not self.site.logged_in and self.site.force_login:
153             # Should we really check for this?
154             raise errors.LoginError(self.site)
155         if self.site.blocked:
156             raise errors.UserBlocked(self.site.blocked)
157         if not self.can('edit'):
158             raise errors.ProtectedPageError(self)
159
160         if not text:
161             text = self.text
162         if not section:
163             section = self.section
164
165         if not self.site.writeapi:
166             return OldPage.save(self, text=text, summary=summary, minor=False)
167
168         data = {}
169         if minor:
170             data['minor'] = '1'
171         if not minor:
172             data['notminor'] = '1'
173         if self.last_rev_time:
174             data['basetimestamp'] = time.strftime('%Y%m%d%H%M%S', self.last_rev_time)
175         if self.edit_time:
176             data['starttimestamp'] = time.strftime('%Y%m%d%H%M%S', self.edit_time)
177         if bot:
178             data['bot'] = '1'
179         if section:
180             data['section'] = section
181
182         data.update(kwargs)
183
184         def do_edit():
185             result = self.site.api('edit', title=self.name, text=text,
186                                    summary=summary, token=self.get_token('edit'),
187                                    **data)
188             if result['edit'].get('result').lower() == 'failure':
189                 raise errors.EditError(self, result['edit'])
190             return result
191         try:
192             result = do_edit()
193         except errors.APIError, e:
194             if e.code == 'badtoken':
195                 # Retry, but only once to avoid an infinite loop
196                 self.get_token('edit', force=True)
197                 try:
198                     result = do_edit()
199                 except errors.APIError, e:
200                     self.handle_edit_error(e, summary)
201             else:
202                 self.handle_edit_error(e, summary)
203
204         if result['edit'] == 'Success':
205             self.last_rev_time = client.parse_timestamp(result['newtimestamp'])
206         return result['edit']
207
208     def handle_edit_error(self, e, summary):
209         if e.code == 'editconflict':
210             raise errors.EditError(self, summary, e.info)
211         elif e.code in ('protectedtitle', 'cantcreate', 'cantcreate-anon', 'noimageredirect-anon',
212                         'noimageredirect', 'noedit-anon', 'noedit'):
213             raise errors.ProtectedPageError(self, e.code, e.info)
214         else:
215             raise
216
217     def get_expanded(self):
218         self.site.require(1, 12)
219
220         revs = self.revisions(prop='content', limit=1, expandtemplates=True)
221         try:
222             return revs.next()['*']
223         except StopIteration:
224             return u''
225
226     def move(self, new_title, reason='', move_talk=True, no_redirect=False):
227         """Move (rename) page to new_title.
228
229         If user account is an administrator, specify no_direct as True to not
230         leave a redirect.
231
232         If user does not have permission to move page, an InsufficientPermission
233         exception is raised.
234
235         """
236         if not self.can('move'):
237             raise errors.InsufficientPermission(self)
238
239         if not self.site.writeapi:
240             return OldPage.move(self, new_title=new_title,
241                                 reason=reason, move_talk=move_talk)
242
243         data = {}
244         if move_talk:
245             data['movetalk'] = '1'
246         if no_redirect:
247             data['noredirect'] = '1'
248         result = self.site.api('move', ('from', self.name), to=new_title,
249                                token=self.get_token('move'), reason=reason, **data)
250         return result['move']
251
252     def delete(self, reason='', watch=False, unwatch=False, oldimage=False):
253         """Delete page.
254
255         If user does not have permission to delete page, an InsufficientPermission
256         exception is raised.
257
258         """
259         if not self.can('delete'):
260             raise errors.InsufficientPermission(self)
261
262         if not self.site.writeapi:
263             return OldPage.delete(self, reason=reason)
264
265         data = {}
266         if watch:
267             data['watch'] = '1'
268         if unwatch:
269             data['unwatch'] = '1'
270         if oldimage:
271             data['oldimage'] = oldimage
272         result = self.site.api('delete', title=self.name,
273                                token=self.get_token('delete'),
274                                reason=reason, **data)
275         return result['delete']
276
277     def purge(self):
278         """Purge server-side cache of page. This will re-render templates and other
279         dynamic content.
280
281         """
282         self.site.raw_index('purge', title=self.name)
283
284     # def watch: requires 1.14
285
286     # Properties
287     def backlinks(self, namespace=None, filterredir='all', redirect=False, limit=None, generator=True):
288         self.site.require(1, 9)
289         # Fix title for < 1.11 !!
290         prefix = listing.List.get_prefix('bl', generator)
291         kwargs = dict(listing.List.generate_kwargs(prefix,
292                                                    namespace=namespace, filterredir=filterredir))
293         if redirect:
294             kwargs['%sredirect' % prefix] = '1'
295         kwargs[compatibility.title(prefix, self.site.require(1, 11, raise_error=False))] = self.name
296
297         return listing.List.get_list(generator)(self.site, 'backlinks', 'bl', limit=limit, return_values='title', **kwargs)
298
299     def categories(self, generator=True):
300         self.site.require(1, 11)
301         if generator:
302             return listing.PagePropertyGenerator(self, 'categories', 'cl')
303         else:
304             # TODO: return sortkey if wanted
305             return listing.PageProperty(self, 'categories', 'cl', return_values='title')
306
307     def embeddedin(self, namespace=None, filterredir='all', redirect=False, limit=None, generator=True):
308         self.site.require(1, 9)
309         # Fix title for < 1.11 !!
310         prefix = listing.List.get_prefix('ei', generator)
311         kwargs = dict(listing.List.generate_kwargs(prefix,
312                                                    namespace=namespace, filterredir=filterredir))
313         if redirect:
314             kwargs['%sredirect' % prefix] = '1'
315         kwargs[compatibility.title(prefix, self.site.require(1, 11, raise_error=False))] = self.name
316
317         return listing.List.get_list(generator)(self.site, 'embeddedin', 'ei', limit=limit, return_values='title', **kwargs)
318
319     def extlinks(self):
320         self.site.require(1, 11)
321         return listing.PageProperty(self, 'extlinks', 'el', return_values='*')
322
323     def images(self, generator=True):
324         self.site.require(1, 9)
325         if generator:
326             return listing.PagePropertyGenerator(self, 'images', '')
327         else:
328             return listing.PageProperty(self, 'images', '', return_values='title')
329
330     def iwlinks(self):
331         self.site.require(1, 9)  # guessing...
332         return listing.PageProperty(self, 'iwlinks', 'iw', return_values=('prefix', '*'))
333
334     def langlinks(self, **kwargs):
335         self.site.require(1, 9)
336         return listing.PageProperty(self, 'langlinks', 'll', return_values=('lang', '*'), **kwargs)
337
338     def links(self, namespace=None, generator=True, redirects=False):
339         self.site.require(1, 9)
340         kwargs = dict(listing.List.generate_kwargs('pl', namespace=namespace))
341         if redirects:
342             kwargs['redirects'] = '1'
343         if generator:
344             return listing.PagePropertyGenerator(self, 'links', 'pl', **kwargs)
345         else:
346             return listing.PageProperty(self, 'links', 'pl', return_values='title', **kwargs)
347
348     def revisions(self, startid=None, endid=None, start=None, end=None,
349                   dir='older', user=None, excludeuser=None, limit=50,
350                   prop='ids|timestamp|flags|comment|user', expandtemplates=False, section=None):
351         self.site.require(1, 8)
352         kwargs = dict(listing.List.generate_kwargs('rv', startid=startid, endid=endid,
353                                                    start=start, end=end, user=user, excludeuser=excludeuser))
354         kwargs['rvdir'] = dir
355         kwargs['rvprop'] = prop
356         if expandtemplates:
357             kwargs['rvexpandtemplates'] = '1'
358         if section:
359             kwargs['rvsection'] = section
360
361         return listing.RevisionsIterator(self, 'revisions', 'rv', limit=limit, **kwargs)
362
363     def templates(self, namespace=None, generator=True):
364         self.site.require(1, 8)
365         kwargs = dict(listing.List.generate_kwargs('tl', namespace=namespace))
366         if generator:
367             return listing.PagePropertyGenerator(self, 'templates', 'tl')
368         else:
369             return listing.PageProperty(self, 'templates', 'tl', return_values='title')
370
371
372 class Image(Page):
373
374     def __init__(self, site, name, info=None):
375         site.require(1, 11)
376         Page.__init__(self, site, name, info,
377                       extra_properties={'imageinfo': (('iiprop',
378                                                        compatibility.iiprop(site.version)), )})
379         self.imagerepository = self._info.get('imagerepository', '')
380         self.imageinfo = self._info.get('imageinfo', ({}, ))[0]
381
382     def imagehistory(self):
383         return listing.PageProperty(self, 'imageinfo', 'ii',
384                                     iiprop=compatibility.iiprop(self.site.version))
385
386     def imageusage(self, namespace=None, filterredir='all', redirect=False,
387                    limit=None, generator=True):
388         self.site.require(1, 11)
389         # TODO: Fix for versions < 1.11
390         prefix = listing.List.get_prefix('iu', generator)
391         kwargs = dict(listing.List.generate_kwargs(prefix, title=self.name,
392                                                    namespace=namespace, filterredir=filterredir))
393         if redirect:
394             kwargs['%sredirect' % prefix] = '1'
395         return listing.List.get_list(generator)(self.site, 'imageusage', 'iu',
396                                                 limit=limit, return_values='title', **kwargs)
397
398     def duplicatefiles(self, limit=None):
399         self.require(1, 14)
400         return listing.PageProperty(self, 'duplicatefiles', 'df',
401                                     dflimit=limit)
402
403     def download(self):
404         url = self.imageinfo['url']
405         if not url.startswith('http://'):
406             url = 'http://' + self.site.host + url
407         url = urlparse.urlparse(url)
408         # TODO: query string
409         return self.site.connection.get(url[1], url[2])
410
411     def __repr__(self):
412         return "<Image object '%s' for %s>" % (self.name.encode('utf-8'), self.site)

Benjamin Mako Hill || Want to submit a patch?