]> projects.mako.cc - iron-blogger/blob - import-feeds.py
Weekly report in HTML
[iron-blogger] / import-feeds.py
1 #!/usr/bin/python
2 from lxml import html
3 import yaml
4 import urllib2
5 import urlparse
6
7 with open('bloggers.yml') as f:
8     users = yaml.safe_load(f.read())
9
10 def fetch_links(url):
11     tree = html.fromstring(urllib2.urlopen(url).read())
12     links = tree.xpath(
13         '//link[@rel="alternate"][contains(@type, "rss") or ' +
14         'contains(@type, "atom") or contains(@type, "rdf")]')
15     candidates = [l for l in links if
16                   'atom' in l.attrib['type'] and
17                   'comments' not in l.attrib['href'].lower() and
18                   'comments' not in l.attrib.get('title','')]
19     if candidates:
20         return candidates[0].attrib['href']
21     return links[0].attrib['href']
22
23 for (name, u) in users.items():
24     print "[%s]" % name
25     for e in u['links']:
26         (title, url) = e[0:2]
27         print " - %s:" % title.strip()
28         e[0] = e[0].strip()
29         if len(e) == 3:
30             continue
31         link = fetch_links(url)
32         if not link.startswith('http:'):
33             link = urlparse.urljoin(url, link)
34         print "   %s" % (link,)
35         e.append(link)
36
37 with open('bloggers.yml', 'w') as f:
38     yaml.safe_dump(users, f)

Benjamin Mako Hill || Want to submit a patch?