]> projects.mako.cc - iron-blogger/blob - scan-feeds.py
geofft gave me money
[iron-blogger] / scan-feeds.py
1 #!/usr/bin/python
2 import yaml
3 import feedparser
4 import datetime
5 import sys
6 from dateutil.parser import parse
7 import dateutil.tz as tz
8
9 with open('bloggers.yml') as f:
10     users = yaml.safe_load(f.read())
11
12 try:
13     with open('out/report.yml') as f:
14         log = yaml.safe_load(f.read())
15 except IOError:
16     log = {}
17
18 START = datetime.datetime(2009, 12, 21, 6)
19
20 def parse_published(pub):
21     return parse(pub).astimezone(tz.tzlocal()).replace(tzinfo=None)
22
23 def get_date(post):
24     if 'published' in post:
25         return post.published
26     return post.updated
27
28 def get_link(post):
29     return post.link
30
31 def parse_feeds(weeks, uri):
32     feed = feedparser.parse(uri)
33     if not feed.entries:
34         print >>sys.stderr, "WARN: no entries for ", uri
35     for post in feed.entries:
36         date = parse_published(get_date(post))
37
38         if date < START:
39             continue
40         wn = (date - START).days / 7
41
42         while len(weeks) <= wn:
43             weeks.append([])
44
45         post = dict(date=date,
46                     title=post.title,
47                     url=get_link(post))
48         if post['url'] not in [p['url'] for p in weeks[wn]]:
49             weeks[wn].append(post)
50
51 for (username, u) in users.items():
52     weeks = log.setdefault(username, [])
53     for l in u['links']:
54         parse_feeds(weeks, l[2])
55
56 with open('out/report.yml', 'w') as f:
57     yaml.safe_dump(log, f)

Benjamin Mako Hill || Want to submit a patch?