import-feeds: Handle blogs without <link> tags.
authorNelson Elhage <nelhage@mit.edu>
Tue, 9 Feb 2010 22:23:57 +0000 (17:23 -0500)
committerNelson Elhage <nelhage@mit.edu>
Tue, 9 Feb 2010 22:23:57 +0000 (17:23 -0500)
import-feeds.py

index 9d4f9bb4268fd9b9ee1b13d7317fc3a117cf6571..55741672d4daba1a26b48526a362a963e8347a5f 100755 (executable)
@@ -1,6 +1,7 @@
 #!/usr/bin/python
 from lxml import html
 import yaml
+import sys
 import urllib2
 import urlparse
 
@@ -18,7 +19,11 @@ def fetch_links(url):
                   'comments' not in l.attrib.get('title','')]
     if candidates:
         return candidates[0].attrib['href']
-    return links[0].attrib['href']
+    elif links:
+        return links[0].attrib['href']
+    else:
+        print >>sys.stderr, "No link found for %s" % (url,)
+        return None
 
 for (name, u) in users.items():
     for e in u['links']:
@@ -27,9 +32,10 @@ for (name, u) in users.items():
         if len(e) == 3:
             continue
         link = fetch_links(url)
-        if not link.startswith('http:'):
-            link = urlparse.urljoin(url, link)
-        e.append(link)
+        if link:
+            if not link.startswith('http:'):
+                link = urlparse.urljoin(url, link)
+            e.append(link)
 
 with open('bloggers.yml', 'w') as f:
     yaml.safe_dump(users, f)

Benjamin Mako Hill || Want to submit a patch?