import yaml
import feedparser
import datetime
+import sys
from dateutil.parser import parse
import dateutil.tz as tz
with open('bloggers.yml') as f:
users = yaml.safe_load(f.read())
-log = {}
+try:
+ with open('out/report.yml') as f:
+ log = yaml.safe_load(f.read())
+except IOError:
+ log = {}
-START = datetime.datetime(2009, 12, 21, 6)
+START = datetime.datetime(2011, 10, 24, 6)
def parse_published(pub):
return parse(pub).astimezone(tz.tzlocal()).replace(tzinfo=None)
def get_date(post):
- if 'published' in post:
- return post.published
- return post.updated
+ for k in ('published', 'created', 'updated'):
+ if k in post:
+ return post[k]
def get_link(post):
- if 'links' in post:
- links = dict((l.rel, l) for l in post.links if 'html' in l.type)
- if 'self' in links:
- return links['self'].href
- elif 'alternate' in links:
- return links['alternate'].href
- if 'href' in post:
- return post.href
- if 'link' in post:
- return post.link
- return None
+ return post.link
def parse_feeds(weeks, uri):
feed = feedparser.parse(uri)
+
+ print >>sys.stderr, "Parsing: %s" % uri
+
+ if not feed.entries:
+ print >>sys.stderr, "WARN: no entries for ", uri
for post in feed.entries:
date = parse_published(get_date(post))
while len(weeks) <= wn:
weeks.append([])
- weeks[wn].append(dict(
- date=date,
- title=post.title,
- url=get_link(post)))
-
-for (username, u) in users.items():
- weeks = []
- print "[%s]" % (username)
- for l in u['links']:
- parse_feeds(weeks, l[2])
- log[username] = weeks
- for (i, w) in enumerate(weeks):
- print " [%d]: %s" % (i, w)
+
+ post = dict(date=date,
+ title=post.title,
+ url=get_link(post))
+ if post['url'] not in [p['url'] for p in weeks[wn]]:
+ weeks[wn].append(post)
+
+if len(sys.argv) > 1:
+ for username in sys.argv[1:]:
+ weeks = log.setdefault(username, [])
+ for l in users[username]['links']:
+ parse_feeds(weeks, l[2])
+else:
+ for (username, u) in users.items():
+ weeks = log.setdefault(username, [])
+ for l in u['links']:
+ parse_feeds(weeks, l[2])
with open('out/report.yml', 'w') as f:
yaml.safe_dump(log, f)