import yaml
import feedparser
import datetime
+import sys
from dateutil.parser import parse
import dateutil.tz as tz
with open('bloggers.yml') as f:
users = yaml.safe_load(f.read())
-log = {}
+try:
+ with open('out/report.yml') as f:
+ log = yaml.safe_load(f.read())
+except IOError:
+ log = {}
START = datetime.datetime(2009, 12, 21, 6)
return post.updated
def get_link(post):
- if 'links' in post:
- links = dict((l.rel, l) for l in post.links if 'html' in l.type)
- if 'self' in links:
- return links['self'].href
- elif 'alternate' in links:
- return links['alternate'].href
- if 'href' in post:
- return post.href
- return None
+ return post.link
def parse_feeds(weeks, uri):
feed = feedparser.parse(uri)
+ if not feed.entries:
+ print >>sys.stderr, "WARN: no entries for ", uri
for post in feed.entries:
date = parse_published(get_date(post))
while len(weeks) <= wn:
weeks.append([])
- weeks[wn].append(dict(
- date=date,
- title=post.title,
- url=get_link(post)))
+
+ post = dict(date=date,
+ title=post.title,
+ url=get_link(post))
+ if post['url'] not in [p['url'] for p in weeks[wn]]:
+ weeks[wn].append(post)
for (username, u) in users.items():
- weeks = []
- print "[%s]" % (username)
+ weeks = log.setdefault(username, [])
for l in u['links']:
parse_feeds(weeks, l[2])
- log[username] = weeks
- for (i, w) in enumerate(weeks):
- print " [%d]: %s" % (i, w)
with open('out/report.yml', 'w') as f:
yaml.safe_dump(log, f)