scan-feeds: Import feeds into the same file.
authorNelson Elhage <nelhage@mit.edu>
Tue, 19 Jan 2010 15:52:29 +0000 (10:52 -0500)
committerNelson Elhage <nelhage@mit.edu>
Tue, 19 Jan 2010 15:52:29 +0000 (10:52 -0500)
This allows me to accumulate feeds over time in case someone's blog is
down at any given moment.

scan-feeds.py

index 9801762b242691d2a66e56216a2e97d01624a9a4..7e27b8c2a5c8e0e9012b744a19f2950747e7e9ff 100755 (executable)
@@ -9,7 +9,11 @@ import dateutil.tz as tz
 with open('bloggers.yml') as f:
     users = yaml.safe_load(f.read())
 
 with open('bloggers.yml') as f:
     users = yaml.safe_load(f.read())
 
-log = {}
+try:
+    with open('out/report.yml') as f:
+        log = yaml.safe_load(f.read())
+except IOError:
+    log = {}
 
 START = datetime.datetime(2009, 12, 21, 6)
 
 
 START = datetime.datetime(2009, 12, 21, 6)
 
@@ -37,19 +41,17 @@ def parse_feeds(weeks, uri):
 
         while len(weeks) <= wn:
             weeks.append([])
 
         while len(weeks) <= wn:
             weeks.append([])
-        weeks[wn].append(dict(
-                date=date,
-                title=post.title,
-                url=get_link(post)))
+
+        post = dict(date=date,
+                    title=post.title,
+                    url=get_link(post))
+        if post['url'] not in [p['url'] for p in weeks[wn]]:
+            weeks[wn].append(post)
 
 for (username, u) in users.items():
 
 for (username, u) in users.items():
-    weeks = []
-    print "[%s]" % (username)
+    weeks = log.setdefault(username, [])
     for l in u['links']:
         parse_feeds(weeks, l[2])
     for l in u['links']:
         parse_feeds(weeks, l[2])
-    log[username] = weeks
-    for (i, w) in enumerate(weeks):
-        print " [%d]: %s" % (i, w)
 
 with open('out/report.yml', 'w') as f:
     yaml.safe_dump(log, f)
 
 with open('out/report.yml', 'w') as f:
     yaml.safe_dump(log, f)

Benjamin Mako Hill || Want to submit a patch?