]> projects.mako.cc - iron-blogger/blob - scan-feeds.py
998a1cc5e4c2dc722e8a28d970ec200ed8dcd334
[iron-blogger] / scan-feeds.py
1 #!/usr/bin/python
2 import yaml
3 import feedparser
4 import datetime
5 from dateutil.parser import parse
6 import dateutil.tz as tz
7
8 with open('bloggers.yml') as f:
9     users = yaml.safe_load(f.read())
10
11 log = {}
12
13 START = datetime.datetime(2009, 12, 21, 6)
14
15 def parse_published(pub):
16     return parse(pub).astimezone(tz.tzlocal()).replace(tzinfo=None)
17
18 def get_date(post):
19     if 'published' in post:
20         return post.published
21     return post.updated
22
23 def get_link(post):
24     if 'links' in post:
25         links = dict((l.rel, l) for l in post.links if 'html' in l.type)
26         if 'self' in links:
27             return links['self'].href
28         elif 'alternate' in links:
29             return links['alternate'].href
30     if 'href' in post:
31         return post.href
32     return None
33
34 def parse_feeds(weeks, uri):
35     feed = feedparser.parse(uri)
36     for post in feed.entries:
37         date = parse_published(get_date(post))
38
39         if date < START:
40             continue
41         wn = (date - START).days / 7
42
43         while len(weeks) <= wn:
44             weeks.append([])
45         weeks[wn].append(dict(
46                 date=date,
47                 title=post.title,
48                 url=get_link(post)))
49
50 for (username, u) in users.items():
51     weeks = []
52     print "[%s]" % (username)
53     for l in u['links']:
54         parse_feeds(weeks, l[2])
55     log[username] = weeks
56     for (i, w) in enumerate(weeks):
57         print " [%d]: %s" % (i, w)
58
59 with open('out/report.yml', 'w') as f:
60     yaml.safe_dump(log, f)

Benjamin Mako Hill || Want to submit a patch?