--- /dev/null
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# http://pywikipediabot.sourceforge.net/
+# svn co http://svn.wikimedia.org/svnroot/pywikipedia/trunk/pywikipedia pywikipedia
+
+
+import sys
+import re
+import iso8601
+from simplemediawiki import MediaWiki
+
+# TODO:
+# - any space names in page names need to be turned into '_'s
+# - we need to de-interwikify any interwiki links given to Wiki:
+# - normalize messages for non-unix linebreaks
+
+class Wiki:
+ def __init__(self, url):
+ self.url = url
+ self.wiki = MediaWiki(self.url + 'api.php')
+
+ def get_revid_for_date(self, page, date):
+ wc = self.wiki.call({
+ 'action': 'query',
+ 'prop': 'revisions',
+ 'titles': page,
+ 'rvlimit': 1,
+ 'redirects': 'true',
+ 'rvstart': date,
+ 'rvdir' : 'newer',
+ 'rvprop' : 'timestamp|ids'})
+ pages = wc['query']['pages']
+ return pages[pages.keys()[0]]['revisions'][0]['revid']
+
+ def get_edit_token(self, page):
+ wc = self.wiki.call({
+ 'action' : 'query',
+ 'prop': 'info',
+ 'page': page,
+ 'intoken' : 'edit'})
+ import pprint
+ pprint.pprint(wc)
+ pages = wc['query']['pages']
+ page = pages[pages.keys()[0]]
+ return page['edittoken']
+
+ def get_talk_page(self, page):
+ if ':' in page:
+ (namespace, rest) = page.split(':', 1)
+ return '%s_talk:%s' % (namespace, rest)
+ else:
+ return 'Talk:%s' % page
+
+ def get_page_text(self, page):
+ wc = self.wiki.call({
+ 'action': 'query',
+ 'prop': 'revisions',
+ 'titles': page,
+ 'redirects': 'true',
+ 'rvprop' : 'content'})
+ pages = wc['query']['pages']
+ return pages[pages.keys()[0]]['revisions'][0]['revid']
+
+ #query&prop=revisions&rvprop=content&format=xml&titles=Main%20Page
+ pass
+
+ def append_to_talk_page(self, page):
+ talk_page = self.get_talk_page(page)
+ edit_token = self.get_edit_token(page)
+
+ wc = self.wiki.call({
+ 'action' : 'edit',
+ 'title': talk_page,
+ 'bot' : 'true',
+ 'token' : self.get_edit_token(page)})
+ import pprint
+ pprint.pprint(wc)
+ pages = wc['query']['pages']
+ page = pages[pages.keys()[0]]
+ return page['edittoken']
+
+
+
+def make_link(wiki, page, revision):
+ return '%s/index.php?title=%s&oldid=%s' % (wiki, page, revision)
+
+class WikiEdit:
+ fields = ['page', 'date', 'e-mail', 'export-date', 'redirect', 'page-revision', 'user-agent', 'username', 'wiki']
+ datefields = ['date', 'export-date']
+
+ def __init__(self, headers={}, body=''):
+ self.headers = headers
+ self.body = body
+
+ def to_comment(self):
+ assembly = [('date', 'On %s,'),
+ ('username', ' [[User:%s]]'),
+ (' an anonymous user'),
+ ('e-mail', ' (<a href="mailto:%s">e-mail</a>)'),
+ (' made a comment',),
+ ('user-agent', ' (using %s)')
+ ]
+
+ comment = ": "
+ for stage in assembly:
+ if len(stage) == 1:
+ comment += stage[0]
+ elif self.headers.has_key(stage[0]):
+ comment += stage[1] % (self.headers[stage[0]])
+ elif len(stage) == 3:
+ comment += stage[2]
+
+ return comment + ":\n\n" + self.body
+
+ @classmethod
+ def from_string(cls, s):
+ body_index = s.index('\n\n')
+ # headers = dict([(X[:X.index(':')], X[X.index(':')+1:].strip()) for X in s[:body_index].split('\n')])
+ kvs = [re.split(': *', key_value, 1) for key_value in s[:body_index].splitlines()]
+ headers = dict([(key.lower(), value) for (key, value) in kvs])
+ body = s[body_index+2:]
+ return cls(headers, body)
+
+
+
+ # @classmethod
+ # defomrom_string(cls, s):
+ # body_index = s.index('\n\n')
+ # ders = dict([(X[:X.index(':')], X[X.index(':')+1:].strip()) for X in s[:body_index].split('\n')])
+ # kvs = [re.split(': *', key_value, 1) for key_value in s[:body_index].splitlines()]
+ # headers = dict([(key.lower(), value) for (key, value) in kvs])
+ # print headers
+ # body = s[body_index+2:]
+ # return cls(headers, body)
+
+
+def parse_edits(lines):
+ edits = []
+ #cur = WikiEdit()
+ #message_body = False
+ acc = ""
+ for line in lines:
+ if line.startswith('Page:'):
+ if '\n\n' in acc:
+ we = WikiEdit.from_string(acc)
+ edits.append(we)
+ print we, we.headers, we.body
+ #sys.exit(0)
+ acc = ""
+ acc += line
+
+ # cur = WikiEdit()
+ # edits.append(cur)
+ # message_body = False
+
+ # if line == '\n':
+ # message_body = True
+
+ # elif message_body:
+ # cur.body += line
+
+ # else:
+ # key_end = line.index(':')
+ # cur.headers[line[:key_end]] = line[key_end+1:].strip()
+
+ if acc != '':
+ we = WikiEdit.from_string(acc)
+ edits.append(we)
+ print (we, we.headers, we.body)
+
+ return edits
+
+if __name__ =='__main__':
+ # parse the .wpe file on standard
+ edits = parse_edits(sys.stdin)
+
+ for edit in edits:
+ # if there is an export date but no revision, lets go to wiki an
+ # find out what the revision id is
+ if edit.headers.has_key("export-date") and \
+ not edit.headers.has_key("page-revision"):
+ wiki = Wiki(edit.headers['wiki'])
+ revid = wiki.get_revid_for_date(edit.headers["page"], edit.headers["export-date"])
+ else:
+ revid = edit.headers['page-revision']
+
+ print revid
+ # rvdir=newer&rvprop=timestamp|ids
+
+ edit_msg = edit.to_comment()
+ print edit_msg
+
+# iso8601.parse_date("2007-06-20T12:34:40+03:00")