From dfc0b520a1cbd590a3bf2941c66e3b7556a6cb2d Mon Sep 17 00:00:00 2001 From: Dafydd Harries Date: Thu, 7 Apr 2011 22:30:06 -0400 Subject: [PATCH] import --- backwash.js | 3 + backwash.py | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++ notes | 44 ++++++++++++ sample.wpl | 14 ++++ 4 files changed, 255 insertions(+) create mode 100644 backwash.js create mode 100644 backwash.py create mode 100644 notes create mode 100644 sample.wpl diff --git a/backwash.js b/backwash.js new file mode 100644 index 0000000..8f8c6cc --- /dev/null +++ b/backwash.js @@ -0,0 +1,3 @@ + + +http://luasog.sourceforge.net/ diff --git a/backwash.py b/backwash.py new file mode 100644 index 0000000..c712028 --- /dev/null +++ b/backwash.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# http://pywikipediabot.sourceforge.net/ +# svn co http://svn.wikimedia.org/svnroot/pywikipedia/trunk/pywikipedia pywikipedia + + +import sys +import re +import iso8601 +from simplemediawiki import MediaWiki + +# TODO: +# - any space names in page names need to be turned into '_'s +# - we need to de-interwikify any interwiki links given to Wiki: +# - normalize messages for non-unix linebreaks + +class Wiki: + def __init__(self, url): + self.url = url + self.wiki = MediaWiki(self.url + 'api.php') + + def get_revid_for_date(self, page, date): + wc = self.wiki.call({ + 'action': 'query', + 'prop': 'revisions', + 'titles': page, + 'rvlimit': 1, + 'redirects': 'true', + 'rvstart': date, + 'rvdir' : 'newer', + 'rvprop' : 'timestamp|ids'}) + pages = wc['query']['pages'] + return pages[pages.keys()[0]]['revisions'][0]['revid'] + + def get_edit_token(self, page): + wc = self.wiki.call({ + 'action' : 'query', + 'prop': 'info', + 'page': page, + 'intoken' : 'edit'}) + import pprint + pprint.pprint(wc) + pages = wc['query']['pages'] + page = pages[pages.keys()[0]] + return page['edittoken'] + + def get_talk_page(self, page): + if ':' in page: + (namespace, rest) = page.split(':', 1) + return '%s_talk:%s' % (namespace, rest) + else: + return 'Talk:%s' % page + + def get_page_text(self, page): + wc = self.wiki.call({ + 'action': 'query', + 'prop': 'revisions', + 'titles': page, + 'redirects': 'true', + 'rvprop' : 'content'}) + pages = wc['query']['pages'] + return pages[pages.keys()[0]]['revisions'][0]['revid'] + + #query&prop=revisions&rvprop=content&format=xml&titles=Main%20Page + pass + + def append_to_talk_page(self, page): + talk_page = self.get_talk_page(page) + edit_token = self.get_edit_token(page) + + wc = self.wiki.call({ + 'action' : 'edit', + 'title': talk_page, + 'bot' : 'true', + 'token' : self.get_edit_token(page)}) + import pprint + pprint.pprint(wc) + pages = wc['query']['pages'] + page = pages[pages.keys()[0]] + return page['edittoken'] + + + +def make_link(wiki, page, revision): + return '%s/index.php?title=%s&oldid=%s' % (wiki, page, revision) + +class WikiEdit: + fields = ['page', 'date', 'e-mail', 'export-date', 'redirect', 'page-revision', 'user-agent', 'username', 'wiki'] + datefields = ['date', 'export-date'] + + def __init__(self, headers={}, body=''): + self.headers = headers + self.body = body + + def to_comment(self): + assembly = [('date', 'On %s,'), + ('username', ' [[User:%s]]'), + (' an anonymous user'), + ('e-mail', ' (e-mail)'), + (' made a comment',), + ('user-agent', ' (using %s)') + ] + + comment = ": " + for stage in assembly: + if len(stage) == 1: + comment += stage[0] + elif self.headers.has_key(stage[0]): + comment += stage[1] % (self.headers[stage[0]]) + elif len(stage) == 3: + comment += stage[2] + + return comment + ":\n\n" + self.body + + @classmethod + def from_string(cls, s): + body_index = s.index('\n\n') + # headers = dict([(X[:X.index(':')], X[X.index(':')+1:].strip()) for X in s[:body_index].split('\n')]) + kvs = [re.split(': *', key_value, 1) for key_value in s[:body_index].splitlines()] + headers = dict([(key.lower(), value) for (key, value) in kvs]) + body = s[body_index+2:] + return cls(headers, body) + + + + # @classmethod + # defomrom_string(cls, s): + # body_index = s.index('\n\n') + # ders = dict([(X[:X.index(':')], X[X.index(':')+1:].strip()) for X in s[:body_index].split('\n')]) + # kvs = [re.split(': *', key_value, 1) for key_value in s[:body_index].splitlines()] + # headers = dict([(key.lower(), value) for (key, value) in kvs]) + # print headers + # body = s[body_index+2:] + # return cls(headers, body) + + +def parse_edits(lines): + edits = [] + #cur = WikiEdit() + #message_body = False + acc = "" + for line in lines: + if line.startswith('Page:'): + if '\n\n' in acc: + we = WikiEdit.from_string(acc) + edits.append(we) + print we, we.headers, we.body + #sys.exit(0) + acc = "" + acc += line + + # cur = WikiEdit() + # edits.append(cur) + # message_body = False + + # if line == '\n': + # message_body = True + + # elif message_body: + # cur.body += line + + # else: + # key_end = line.index(':') + # cur.headers[line[:key_end]] = line[key_end+1:].strip() + + if acc != '': + we = WikiEdit.from_string(acc) + edits.append(we) + print (we, we.headers, we.body) + + return edits + +if __name__ =='__main__': + # parse the .wpe file on standard + edits = parse_edits(sys.stdin) + + for edit in edits: + # if there is an export date but no revision, lets go to wiki an + # find out what the revision id is + if edit.headers.has_key("export-date") and \ + not edit.headers.has_key("page-revision"): + wiki = Wiki(edit.headers['wiki']) + revid = wiki.get_revid_for_date(edit.headers["page"], edit.headers["export-date"]) + else: + revid = edit.headers['page-revision'] + + print revid + # rvdir=newer&rvprop=timestamp|ids + + edit_msg = edit.to_comment() + print edit_msg + +# iso8601.parse_date("2007-06-20T12:34:40+03:00") diff --git a/notes b/notes new file mode 100644 index 0000000..32b851f --- /dev/null +++ b/notes @@ -0,0 +1,44 @@ +Page [MANDATORY] + + Page can either be a talk page or can be any other page in the + wiki. If the page is not a talk page, the message will be appended + to the associated talk page + + Section can be indicated in this field with a # + + +Date [OPTIONAL] + + The time that the change was made (based on the local clock). + +Username [OPTIONAL] + + The Username, on the wiki in question, of the person who made the edit. + +E-mail [OPTIONAL] + + Email address of the person who made the edit. + +Export-date [OPTIONAL] + + The date that the offline readers database was exported + +Redirect [OPTIONAL] + + Can be set to `Nofollow' for + +Page-revision [OPTIONAL] + + The MediaWiki Page ID. + +User-agent [OPTIONAL] + + Name and version of program that has generated query. + +Username [OPTIONAL] +Wiki [OPTIONAL] + + Identification of the wiki to push changes. + + The form should be a link to the MediaWiki API or, oprtionally, an + interwiki link "code to a known wiki. diff --git a/sample.wpl b/sample.wpl new file mode 100644 index 0000000..b004822 --- /dev/null +++ b/sample.wpl @@ -0,0 +1,14 @@ +Page: Benjamin_Mako_Hill +Email: mako@atdot.cc +Username: Benjamin Mako Hill +Export-date: 2009-06-21T05:45:00Z +Wiki: http://wiki.mako.cc/ + +This is the first test text from backwash.py. If you read this it means that we've posted it successfully! + + +Page: Benjamin Mako Hill +Page-revision: 2810 +Wiki: http://wiki.mako.cc/ + +This is the second test item uploaded from backwash.py. -- 2.30.2