merged in all the code from ariel's branches and moved wikibios to a subdir
[matplotlib-cdsw] / load_hp_data.py
1 """ load_hp_data.py 
2
3 A module for loading data from the Harry Potter wikipedia data set
4
5 """ 
6 import csv
7 from datetime import datetime
8
9 f = open('hp_wiki.tsv', 'r')
10 reader = csv.DictReader(f, delimiter='\t')
11
12 columns = {}
13 for fieldname in reader.fieldnames:
14         columns[fieldname] = []
15
16
17 rows = []
18 for row in reader:
19     # Convert timestamp from a string to a date:
20     row['timestamp'] = datetime.strptime(row['timestamp'], '%Y-%m-%d %H:%M:%S')
21     # Convert size from a string to an integer:
22     row['size'] = int(row['size'])
23     rows.append(row)
24
25 # Sort these things, so that they give you nice ordered time-series
26 sort_rows = sorted(rows, key=lambda row: row['timestamp'], reverse=False)
27
28 rows = sort_rows
29 for row in sort_rows:
30     for fieldname, value in row.items():
31         columns[fieldname].append(value)

Benjamin Mako Hill || Want to submit a patch?