#!/usr/bin/python
from lxml import html
import yaml
+import sys
import urllib2
import urlparse
'comments' not in l.attrib.get('title','')]
if candidates:
return candidates[0].attrib['href']
- return links[0].attrib['href']
+ elif links:
+ return links[0].attrib['href']
+ else:
+ print >>sys.stderr, "No link found for %s" % (url,)
+ return None
for (name, u) in users.items():
- print "[%s]" % name
for e in u['links']:
(title, url) = e[0:2]
- print " - %s:" % title.strip()
e[0] = e[0].strip()
if len(e) == 3:
continue
link = fetch_links(url)
- if not link.startswith('http:'):
- link = urlparse.urljoin(url, link)
- print " %s" % (link,)
- e.append(link)
+ if link:
+ if not link.startswith('http:'):
+ link = urlparse.urljoin(url, link)
+ e.append(link)
with open('bloggers.yml', 'w') as f:
yaml.safe_dump(users, f)