X-Git-Url: https://projects.mako.cc/source/iron-blogger/blobdiff_plain/0d15f2365b270c0e5c101c65e77371b4c7ada00c..d97eb533020e1dc9fde31d86b6d8bf5a4fc30b9a:/import-feeds.py diff --git a/import-feeds.py b/import-feeds.py new file mode 100644 index 0000000..fa370c4 --- /dev/null +++ b/import-feeds.py @@ -0,0 +1,37 @@ +from lxml import html +import yaml +import urllib2 +import urlparse + +with open('bloggers.yml') as f: + users = yaml.safe_load(f.read()) + +def fetch_links(url): + tree = html.fromstring(urllib2.urlopen(url).read()) + links = tree.xpath( + '//link[@rel="alternate"][contains(@type, "rss") or ' + + 'contains(@type, "atom") or contains(@type, "rdf")]') + candidates = [l for l in links if + 'atom' in l.attrib['type'] and + 'comments' not in l.attrib['href'].lower() and + 'comments' not in l.attrib.get('title','')] + if candidates: + return candidates[0].attrib['href'] + return links[0].attrib['href'] + +for (name, u) in users.items(): + print "[%s]" % name + for e in u['links']: + (title, url) = e[0:2] + print " - %s:" % title.strip() + e[0] = e[0].strip() + if len(e) == 3: + continue + link = fetch_links(url) + if not link.startswith('http:'): + link = urlparse.urljoin(url, link) + print " %s" % (link,) + e.append(link) + +with open('bloggers.yml', 'w') as f: + yaml.safe_dump(users, f)