From: Nelson Elhage Date: Tue, 19 Jan 2010 15:52:29 +0000 (-0500) Subject: scan-feeds: Import feeds into the same file. X-Git-Url: https://projects.mako.cc/source/iron-blogger/commitdiff_plain/ab6966baf120f059c56d1f3185e65988e2adbb0b?hp=11dec64ca0ef2b3b1123bf4668da395867ebabe2 scan-feeds: Import feeds into the same file. This allows me to accumulate feeds over time in case someone's blog is down at any given moment. --- diff --git a/scan-feeds.py b/scan-feeds.py index 9801762..7e27b8c 100755 --- a/scan-feeds.py +++ b/scan-feeds.py @@ -9,7 +9,11 @@ import dateutil.tz as tz with open('bloggers.yml') as f: users = yaml.safe_load(f.read()) -log = {} +try: + with open('out/report.yml') as f: + log = yaml.safe_load(f.read()) +except IOError: + log = {} START = datetime.datetime(2009, 12, 21, 6) @@ -37,19 +41,17 @@ def parse_feeds(weeks, uri): while len(weeks) <= wn: weeks.append([]) - weeks[wn].append(dict( - date=date, - title=post.title, - url=get_link(post))) + + post = dict(date=date, + title=post.title, + url=get_link(post)) + if post['url'] not in [p['url'] for p in weeks[wn]]: + weeks[wn].append(post) for (username, u) in users.items(): - weeks = [] - print "[%s]" % (username) + weeks = log.setdefault(username, []) for l in u['links']: parse_feeds(weeks, l[2]) - log[username] = weeks - for (i, w) in enumerate(weeks): - print " [%d]: %s" % (i, w) with open('out/report.yml', 'w') as f: yaml.safe_dump(log, f)