Rename some scripts to make more sense.
[iron-blogger] / scan-feeds.py
diff --git a/scan-feeds.py b/scan-feeds.py
new file mode 100755 (executable)
index 0000000..998a1cc
--- /dev/null
@@ -0,0 +1,60 @@
+#!/usr/bin/python
+import yaml
+import feedparser
+import datetime
+from dateutil.parser import parse
+import dateutil.tz as tz
+
+with open('bloggers.yml') as f:
+    users = yaml.safe_load(f.read())
+
+log = {}
+
+START = datetime.datetime(2009, 12, 21, 6)
+
+def parse_published(pub):
+    return parse(pub).astimezone(tz.tzlocal()).replace(tzinfo=None)
+
+def get_date(post):
+    if 'published' in post:
+        return post.published
+    return post.updated
+
+def get_link(post):
+    if 'links' in post:
+        links = dict((l.rel, l) for l in post.links if 'html' in l.type)
+        if 'self' in links:
+            return links['self'].href
+        elif 'alternate' in links:
+            return links['alternate'].href
+    if 'href' in post:
+        return post.href
+    return None
+
+def parse_feeds(weeks, uri):
+    feed = feedparser.parse(uri)
+    for post in feed.entries:
+        date = parse_published(get_date(post))
+
+        if date < START:
+            continue
+        wn = (date - START).days / 7
+
+        while len(weeks) <= wn:
+            weeks.append([])
+        weeks[wn].append(dict(
+                date=date,
+                title=post.title,
+                url=get_link(post)))
+
+for (username, u) in users.items():
+    weeks = []
+    print "[%s]" % (username)
+    for l in u['links']:
+        parse_feeds(weeks, l[2])
+    log[username] = weeks
+    for (i, w) in enumerate(weeks):
+        print " [%d]: %s" % (i, w)
+
+with open('out/report.yml', 'w') as f:
+    yaml.safe_dump(log, f)

Benjamin Mako Hill || Want to submit a patch?