From 224a19e5f73433a38828d6bd8a655fce7c220058 Mon Sep 17 00:00:00 2001 From: arokem Date: Wed, 6 May 2015 20:16:24 -0700 Subject: [PATCH 1/1] Start implementing plotting from the Harry Potter wikipedia data-set. --- 001-hello-plot.py | 13 +++++++++++++ README | 3 +-- load_hp_data.py | 23 +++++++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 001-hello-plot.py create mode 100644 load_hp_data.py diff --git a/001-hello-plot.py b/001-hello-plot.py new file mode 100644 index 0000000..ef749ff --- /dev/null +++ b/001-hello-plot.py @@ -0,0 +1,13 @@ +""" + +hello_plot.py + +A first plot with matplotlib + +""" + +import matplotlib.pyplot as plt +figure = plt.figure() +axis = figure.add_subplot(111) +plt.plot([1,2,3], [1,2,3]) +plt.show() diff --git a/README b/README index 4625944..34835e1 100644 --- a/README +++ b/README @@ -1,4 +1,3 @@ Using this dataset requires that you first download the following dataset: -http://communitydata.cc/~mako/wikipedia_bios.csv - +http://communitydata.cc/~mako/hp_wiki.tsv diff --git a/load_hp_data.py b/load_hp_data.py new file mode 100644 index 0000000..85cf142 --- /dev/null +++ b/load_hp_data.py @@ -0,0 +1,23 @@ +""" load_hp_data.py + +A module for loading data from the Harry Potter wikipedia data set + +""" +import csv +from datetime import datetime + +f = open('hp_wiki.tsv', 'r') +reader = csv.DictReader(f, delimiter='\t') + +columns = {} +for fieldname in reader.fieldnames: + columns[fieldname] = [] + + +rows = [] +for row in reader: + # Convert timestamp from a string to a date: + row['timestamp'] = datetime.strptime(row['timestamp'], '%Y-%m-%d %H:%M:%S') + rows.append(row) + for fieldname, value in row.items(): + columns[fieldname].append(value) \ No newline at end of file -- 2.30.2