]> projects.mako.cc - iron-blogger/blob - scan-feeds.py
scan-feeds: Accept a list of users
[iron-blogger] / scan-feeds.py
1 #!/usr/bin/python
2 import yaml
3 import feedparser
4 import datetime
5 import sys
6 from dateutil.parser import parse
7 import dateutil.tz as tz
8
9 with open('bloggers.yml') as f:
10     users = yaml.safe_load(f.read())
11
12 try:
13     with open('out/report.yml') as f:
14         log = yaml.safe_load(f.read())
15 except IOError:
16     log = {}
17
18 START = datetime.datetime(2009, 12, 21, 6)
19
20 def parse_published(pub):
21     return parse(pub).astimezone(tz.tzlocal()).replace(tzinfo=None)
22
23 def get_date(post):
24     for k in ('published', 'created', 'updated'):
25         if k in post:
26             return post[k]
27
28 def get_link(post):
29     return post.link
30
31 def parse_feeds(weeks, uri):
32     feed = feedparser.parse(uri)
33     if not feed.entries:
34         print >>sys.stderr, "WARN: no entries for ", uri
35     for post in feed.entries:
36         date = parse_published(get_date(post))
37
38         if date < START:
39             continue
40         wn = (date - START).days / 7
41
42         while len(weeks) <= wn:
43             weeks.append([])
44
45         post = dict(date=date,
46                     title=post.title,
47                     url=get_link(post))
48         if post['url'] not in [p['url'] for p in weeks[wn]]:
49             weeks[wn].append(post)
50
51 if len(sys.argv) > 1:
52     for username in sys.argv[1:]:
53         weeks = log.setdefault(username, [])
54         for l in users[username]['links']:
55             parse_feeds(weeks, l[2])
56 else:
57     for (username, u) in users.items():
58         weeks = log.setdefault(username, [])
59         for l in u['links']:
60             parse_feeds(weeks, l[2])
61
62 with open('out/report.yml', 'w') as f:
63     yaml.safe_dump(log, f)

Benjamin Mako Hill || Want to submit a patch?