X-Git-Url: https://projects.mako.cc/source/wikipedia-api-cdsw/blobdiff_plain/f17f0a3f63dd03d70cdc693da0bda53a1e85671b..99337ede51f8abbcf0663e7c93672f70f9b6ddd9:/mwclient/listing.py diff --git a/mwclient/listing.py b/mwclient/listing.py deleted file mode 100644 index d6bc1e8..0000000 --- a/mwclient/listing.py +++ /dev/null @@ -1,237 +0,0 @@ -import client -import page - - -class List(object): - - def __init__(self, site, list_name, prefix, limit=None, return_values=None, max_items=None, *args, **kwargs): - # NOTE: Fix limit - self.site = site - self.list_name = list_name - self.generator = 'list' - self.prefix = prefix - - kwargs.update(args) - self.args = kwargs - - if limit is None: - limit = site.api_limit - self.args[self.prefix + 'limit'] = str(limit) - if 'continue' not in self.args: - self.args['continue'] = '' - - self.count = 0 - self.max_items = max_items - - self._iter = iter(xrange(0)) - - self.last = False - self.result_member = list_name - self.return_values = return_values - - def __iter__(self): - return self - - def next(self, full=False): - if self.max_items is not None: - if self.count >= self.max_items: - raise StopIteration - try: - item = self._iter.next() - self.count += 1 - if 'timestamp' in item: - item['timestamp'] = client.parse_timestamp(item['timestamp']) - if full: - return item - - if type(self.return_values) is tuple: - return tuple((item[i] for i in self.return_values)) - elif self.return_values is None: - return item - else: - return item[self.return_values] - - except StopIteration: - if self.last: - raise StopIteration - self.load_chunk() - return List.next(self, full=full) - - def load_chunk(self): - data = self.site.api('query', (self.generator, self.list_name), *[(str(k), v) for k, v in self.args.iteritems()]) - if not data: - # Non existent page - raise StopIteration - self.set_iter(data) - - if data.get('continue'): - # New style continuation, added in MediaWiki 1.21 - self.args.update(data['continue']) - - elif self.list_name in data.get('query-continue', ()): - # Old style continuation - self.args.update(data['query-continue'][self.list_name]) - - else: - self.last = True - - def set_iter(self, data): - if self.result_member not in data['query']: - self._iter = iter(xrange(0)) - elif type(data['query'][self.result_member]) is list: - self._iter = iter(data['query'][self.result_member]) - else: - self._iter = data['query'][self.result_member].itervalues() - - def __repr__(self): - return "" % (self.list_name, self.site) - - @staticmethod - def generate_kwargs(_prefix, *args, **kwargs): - kwargs.update(args) - for key, value in kwargs.iteritems(): - if value is not None and value is not False: - yield _prefix + key, value - - @staticmethod - def get_prefix(prefix, generator=False): - if generator: - return 'g' + prefix - else: - return prefix - - @staticmethod - def get_list(generator=False): - if generator: - return GeneratorList - else: - return List - - -class GeneratorList(List): - - def __init__(self, site, list_name, prefix, *args, **kwargs): - List.__init__(self, site, list_name, prefix, *args, **kwargs) - - self.args['g' + self.prefix + 'limit'] = self.args[self.prefix + 'limit'] - del self.args[self.prefix + 'limit'] - self.generator = 'generator' - - self.args['prop'] = 'info|imageinfo' - self.args['inprop'] = 'protection' - - self.result_member = 'pages' - - self.page_class = page.Page - - def next(self): - info = List.next(self, full=True) - if info['ns'] == 14: - return Category(self.site, u'', info) - if info['ns'] == 6: - return page.Image(self.site, u'', info) - return page.Page(self.site, u'', info) - - def load_chunk(self): - # Put this here so that the constructor does not fail - # on uninitialized sites - self.args['iiprop'] = 'timestamp|user|comment|url|size|sha1|metadata|archivename' - return List.load_chunk(self) - - -class Category(page.Page, GeneratorList): - - def __init__(self, site, name, info=None, namespace=None): - page.Page.__init__(self, site, name, info) - kwargs = {} - kwargs['gcmtitle'] = self.name - if namespace: - kwargs['gcmnamespace'] = namespace - GeneratorList.__init__(self, site, 'categorymembers', 'cm', **kwargs) - - def __repr__(self): - return "" % (self.name.encode('utf-8'), self.site) - - def members(self, prop='ids|title', namespace=None, sort='sortkey', - dir='asc', start=None, end=None, generator=True): - prefix = self.get_prefix('cm', generator) - kwargs = dict(self.generate_kwargs(prefix, prop=prop, namespace=namespace, - sort=sort, dir=dir, start=start, end=end, title=self.name)) - return self.get_list(generator)(self.site, 'categorymembers', 'cm', **kwargs) - - -class PageList(GeneratorList): - - def __init__(self, site, prefix=None, start=None, namespace=0, redirects='all'): - self.namespace = namespace - - kwargs = {} - if prefix: - kwargs['apprefix'] = prefix - if start: - kwargs['apfrom'] = start - - GeneratorList.__init__(self, site, 'allpages', 'ap', - apnamespace=str(namespace), apfilterredir=redirects, **kwargs) - - def __getitem__(self, name): - return self.get(name, None) - - def get(self, name, info=()): - if self.namespace == 14: - return Category(self.site, self.site.namespaces[14] + ':' + name, info) - elif self.namespace == 6: - return page.Image(self.site, self.site.namespaces[6] + ':' + name, info) - elif self.namespace != 0: - return page.Page(self.site, self.site.namespaces[self.namespace] + ':' + name, info) - else: - # Guessing page class - if type(name) is not int: - namespace = self.guess_namespace(name) - if namespace == 14: - return Category(self.site, name, info) - elif namespace == 6: - return page.Image(self.site, name, info) - return page.Page(self.site, name, info) - - def guess_namespace(self, name): - normal_name = page.Page.normalize_title(name) - for ns in self.site.namespaces: - if ns == 0: - continue - if name.startswith(u'%s:' % self.site.namespaces[ns].replace(' ', '_')): - return ns - elif ns in self.site.default_namespaces: - if name.startswith(u'%s:' % self.site.default_namespaces[ns].replace(' ', '_')): - return ns - return 0 - - -class PageProperty(List): - - def __init__(self, page, prop, prefix, *args, **kwargs): - List.__init__(self, page.site, prop, prefix, titles=page.name, *args, **kwargs) - self.page = page - self.generator = 'prop' - - def set_iter(self, data): - for page in data['query']['pages'].itervalues(): - if page['title'] == self.page.name: - self._iter = iter(page.get(self.list_name, ())) - return - raise StopIteration - - -class PagePropertyGenerator(GeneratorList): - - def __init__(self, page, prop, prefix, *args, **kwargs): - GeneratorList.__init__(self, page.site, prop, prefix, titles=page.name, *args, **kwargs) - self.page = page - - -class RevisionsIterator(PageProperty): - - def load_chunk(self): - if 'rvstartid' in self.args and 'rvstart' in self.args: - del self.args['rvstart'] - return PageProperty.load_chunk(self)