X-Git-Url: https://projects.mako.cc/source/iron-blogger/blobdiff_plain/8c882ab61f0be6f76df016d98a20db04323c6be1..da7ddff377022280bcba9dc20fefaf9e8ec53b17:/scan-feeds.py?ds=inline diff --git a/scan-feeds.py b/scan-feeds.py index 6373316..82438bb 100755 --- a/scan-feeds.py +++ b/scan-feeds.py @@ -2,13 +2,18 @@ import yaml import feedparser import datetime +import sys from dateutil.parser import parse import dateutil.tz as tz with open('bloggers.yml') as f: users = yaml.safe_load(f.read()) -log = {} +try: + with open('out/report.yml') as f: + log = yaml.safe_load(f.read()) +except IOError: + log = {} START = datetime.datetime(2009, 12, 21, 6) @@ -16,25 +21,17 @@ def parse_published(pub): return parse(pub).astimezone(tz.tzlocal()).replace(tzinfo=None) def get_date(post): - if 'published' in post: - return post.published - return post.updated + for k in ('published', 'created', 'updated'): + if k in post: + return post[k] def get_link(post): - if 'links' in post: - links = dict((l.rel, l) for l in post.links if 'html' in l.type) - if 'self' in links: - return links['self'].href - elif 'alternate' in links: - return links['alternate'].href - if 'href' in post: - return post.href - if 'link' in post: - return post.link - return None + return post.link def parse_feeds(weeks, uri): feed = feedparser.parse(uri) + if not feed.entries: + print >>sys.stderr, "WARN: no entries for ", uri for post in feed.entries: date = parse_published(get_date(post)) @@ -44,19 +41,23 @@ def parse_feeds(weeks, uri): while len(weeks) <= wn: weeks.append([]) - weeks[wn].append(dict( - date=date, - title=post.title, - url=get_link(post))) - -for (username, u) in users.items(): - weeks = [] - print "[%s]" % (username) - for l in u['links']: - parse_feeds(weeks, l[2]) - log[username] = weeks - for (i, w) in enumerate(weeks): - print " [%d]: %s" % (i, w) + + post = dict(date=date, + title=post.title, + url=get_link(post)) + if post['url'] not in [p['url'] for p in weeks[wn]]: + weeks[wn].append(post) + +if len(sys.argv) > 1: + for username in sys.argv[1:]: + weeks = log.setdefault(username, []) + for l in users[username]['links']: + parse_feeds(weeks, l[2]) +else: + for (username, u) in users.items(): + weeks = log.setdefault(username, []) + for l in u['links']: + parse_feeds(weeks, l[2]) with open('out/report.yml', 'w') as f: yaml.safe_dump(log, f)