scan-feeds: Warn if we can't find someone's entries.
[iron-blogger] / scan-feeds.py
1 #!/usr/bin/python
2 import yaml
3 import feedparser
4 import datetime
5 import sys
6 from dateutil.parser import parse
7 import dateutil.tz as tz
8
9 with open('bloggers.yml') as f:
10     users = yaml.safe_load(f.read())
11
12 log = {}
13
14 START = datetime.datetime(2009, 12, 21, 6)
15
16 def parse_published(pub):
17     return parse(pub).astimezone(tz.tzlocal()).replace(tzinfo=None)
18
19 def get_date(post):
20     if 'published' in post:
21         return post.published
22     return post.updated
23
24 def get_link(post):
25     return post.link
26
27 def parse_feeds(weeks, uri):
28     feed = feedparser.parse(uri)
29     if not feed.entries:
30         print >>sys.stderr, "WARN: no entries for ", uri
31     for post in feed.entries:
32         date = parse_published(get_date(post))
33
34         if date < START:
35             continue
36         wn = (date - START).days / 7
37
38         while len(weeks) <= wn:
39             weeks.append([])
40         weeks[wn].append(dict(
41                 date=date,
42                 title=post.title,
43                 url=get_link(post)))
44
45 for (username, u) in users.items():
46     weeks = []
47     print "[%s]" % (username)
48     for l in u['links']:
49         parse_feeds(weeks, l[2])
50     log[username] = weeks
51     for (i, w) in enumerate(weeks):
52         print " [%d]: %s" % (i, w)
53
54 with open('out/report.yml', 'w') as f:
55     yaml.safe_dump(log, f)

Benjamin Mako Hill || Want to submit a patch?