X-Git-Url: https://projects.mako.cc/source/iron-blogger/blobdiff_plain/bf1fb66fb40e3ad7a6bb8a3c361aa3318cfc0331..75eca5ec298ea59b5dbd3f968f5104cd2a690579:/scan-feeds.py diff --git a/scan-feeds.py b/scan-feeds.py new file mode 100755 index 0000000..998a1cc --- /dev/null +++ b/scan-feeds.py @@ -0,0 +1,60 @@ +#!/usr/bin/python +import yaml +import feedparser +import datetime +from dateutil.parser import parse +import dateutil.tz as tz + +with open('bloggers.yml') as f: + users = yaml.safe_load(f.read()) + +log = {} + +START = datetime.datetime(2009, 12, 21, 6) + +def parse_published(pub): + return parse(pub).astimezone(tz.tzlocal()).replace(tzinfo=None) + +def get_date(post): + if 'published' in post: + return post.published + return post.updated + +def get_link(post): + if 'links' in post: + links = dict((l.rel, l) for l in post.links if 'html' in l.type) + if 'self' in links: + return links['self'].href + elif 'alternate' in links: + return links['alternate'].href + if 'href' in post: + return post.href + return None + +def parse_feeds(weeks, uri): + feed = feedparser.parse(uri) + for post in feed.entries: + date = parse_published(get_date(post)) + + if date < START: + continue + wn = (date - START).days / 7 + + while len(weeks) <= wn: + weeks.append([]) + weeks[wn].append(dict( + date=date, + title=post.title, + url=get_link(post))) + +for (username, u) in users.items(): + weeks = [] + print "[%s]" % (username) + for l in u['links']: + parse_feeds(weeks, l[2]) + log[username] = weeks + for (i, w) in enumerate(weeks): + print " [%d]: %s" % (i, w) + +with open('out/report.yml', 'w') as f: + yaml.safe_dump(log, f)