X-Git-Url: https://projects.mako.cc/source/iron-blogger/blobdiff_plain/0d15f2365b270c0e5c101c65e77371b4c7ada00c..d97eb533020e1dc9fde31d86b6d8bf5a4fc30b9a:/import-feeds.py

diff --git a/import-feeds.py b/import-feeds.py
new file mode 100644
index 0000000..fa370c4
--- /dev/null
+++ b/import-feeds.py
@@ -0,0 +1,37 @@
+from lxml import html
+import yaml
+import urllib2
+import urlparse
+
+with open('bloggers.yml') as f:
+    users = yaml.safe_load(f.read())
+
+def fetch_links(url):
+    tree = html.fromstring(urllib2.urlopen(url).read())
+    links = tree.xpath(
+        '//link[@rel="alternate"][contains(@type, "rss") or ' +
+        'contains(@type, "atom") or contains(@type, "rdf")]')
+    candidates = [l for l in links if
+                  'atom' in l.attrib['type'] and
+                  'comments' not in l.attrib['href'].lower() and
+                  'comments' not in l.attrib.get('title','')]
+    if candidates:
+        return candidates[0].attrib['href']
+    return links[0].attrib['href']
+
+for (name, u) in users.items():
+    print "[%s]" % name
+    for e in u['links']:
+        (title, url) = e[0:2]
+        print " - %s:" % title.strip()
+        e[0] = e[0].strip()
+        if len(e) == 3:
+            continue
+        link = fetch_links(url)
+        if not link.startswith('http:'):
+            link = urlparse.urljoin(url, link)
+        print "   %s" % (link,)
+        e.append(link)
+
+with open('bloggers.yml', 'w') as f:
+    yaml.safe_dump(users, f)