.gitignore
[iron-blogger] / import.py
1 #!usr/bin/python
2 from lxml import html
3 import yaml
4
5 tree = html.fromstring(open('/tmp/iron-blogger.html').read())
6
7 who = {}
8 for tr in list(tree.xpath('//tr'))[1:]:
9     username = str(tr.xpath('td[1]/tt/text()')[0])
10     links = tr.xpath('td[2]/a')
11     links = [(l.text, l.attrib['href']) for l in links]
12     start = str(tr.xpath('td[3]/text()')[0]).strip()
13     who[username] = dict(links=links, start=start)
14
15 print yaml.safe_dump(who)

Benjamin Mako Hill || Want to submit a patch?