projects.mako.cc - iron-blogger/blob - import-feeds.py

   1 #!/usr/bin/python
   2 from lxml import html
   3 import yaml
   4 import sys
   5 import urllib2
   6 import urlparse
   7
   8 with open('bloggers.yml') as f:
   9     users = yaml.safe_load(f.read())
  10
  11 def fetch_links(url):
  12     tree = html.fromstring(urllib2.urlopen(url).read())
  13     links = tree.xpath(
  14         '//link[@rel="alternate"][contains(@type, "rss") or ' +
  15         'contains(@type, "atom") or contains(@type, "rdf")]')
  16     candidates = [l for l in links if
  17                   'atom' in l.attrib['type'] and
  18                   'comments' not in l.attrib['href'].lower() and
  19                   'comments' not in l.attrib.get('title','')]
  20     if candidates:
  21         return candidates[0].attrib['href']
  22     elif links:
  23         return links[0].attrib['href']
  24     else:
  25         print >>sys.stderr, "No link found for %s" % (url,)
  26         return None
  27
  28 for (name, u) in users.items():
  29     for e in u['links']:
  30         (title, url) = e[0:2]
  31         e[0] = e[0].strip()
  32         if len(e) == 3:
  33             continue
  34         link = fetch_links(url)
  35         if link:
  36             if not link.startswith('http:'):
  37                 link = urlparse.urljoin(url, link)
  38             e.append(link)
  39
  40 with open('bloggers.yml', 'w') as f:
  41     yaml.safe_dump(users, f)