Start implementing plotting from the Harry Potter wikipedia data-set.
authorarokem <arokem@gmail.com>
Thu, 7 May 2015 03:16:24 +0000 (20:16 -0700)
committerAriel Rokem <arokem@gmail.com>
Thu, 7 May 2015 03:16:24 +0000 (20:16 -0700)
001-hello-plot.py [new file with mode: 0644]
README
load_hp_data.py [new file with mode: 0644]

diff --git a/001-hello-plot.py b/001-hello-plot.py
new file mode 100644 (file)
index 0000000..ef749ff
--- /dev/null
@@ -0,0 +1,13 @@
+""" 
+
+hello_plot.py
+
+A first plot with matplotlib
+
+"""
+
+import matplotlib.pyplot as plt 
+figure = plt.figure()
+axis = figure.add_subplot(111)
+plt.plot([1,2,3], [1,2,3])
+plt.show()
diff --git a/README b/README
index 46259447c3f1261cba7cb1c3ee4bb6c9af625cb2..34835e1de48095776dc54bcd7cb38139c18619a7 100644 (file)
--- a/README
+++ b/README
@@ -1,4 +1,3 @@
 Using this dataset requires that you first download the following dataset:
 
 Using this dataset requires that you first download the following dataset:
 
-http://communitydata.cc/~mako/wikipedia_bios.csv
-
+http://communitydata.cc/~mako/hp_wiki.tsv
diff --git a/load_hp_data.py b/load_hp_data.py
new file mode 100644 (file)
index 0000000..85cf142
--- /dev/null
@@ -0,0 +1,23 @@
+""" load_hp_data.py 
+
+A module for loading data from the Harry Potter wikipedia data set
+
+""" 
+import csv
+from datetime import datetime
+
+f = open('hp_wiki.tsv', 'r')
+reader = csv.DictReader(f, delimiter='\t')
+
+columns = {}
+for fieldname in reader.fieldnames:
+       columns[fieldname] = []
+
+
+rows = []
+for row in reader:
+       # Convert timestamp from a string to a date:
+       row['timestamp'] = datetime.strptime(row['timestamp'], '%Y-%m-%d %H:%M:%S')
+       rows.append(row)
+       for fieldname, value in row.items():
+               columns[fieldname].append(value)
\ No newline at end of file

Benjamin Mako Hill || Want to submit a patch?