import matplotlib.pyplot as plt
figure, axis = plt.subplots(1)
-plt.plot([1,2,3], [1,2,3])
+plt.plot([1,2,3], [2,4,8])
plt.show()
--- /dev/null
+"""
+003-plot-timeseries.py
+
+Plot data from the Harry Potter data-set as a time-series
+
+"""
+
+
+import matplotlib.pyplot as plt
+import load_hp_data as hp
+
+# We can play with styles:
+#plt.style.use('bmh')
+plt.style.use('ggplot')
+# To see available styles, type:
+#plt.style.available
+
+fig, ax = plt.subplots(1)
+ax.plot(hp.columns['timestamp'], hp.columns['size'])
+ax.set_xlabel('Time')
+ax.set_ylabel('Size of the edit')
+
+plt.show()
+
+
+# Challenge: plot the relationship between edit size. Use
+
+## Hint 1:
+
+#delta_time1 = hp.columns['timestamp'][1] - hp.columns['timestamp'][0]
+
+## Hint 2:
+
+# You can give `plt.plot` more arguments to control the shape/size/color
+# of the markers used. For example, try:
+
+# ax.plot([1,2,3], [2,4,8], '.')
+# ax.plot([1,2,3], [2,4,8], 'r.')
--- /dev/null
+"""
+004-plot-histogram.py
+
+Plot a histogram of edit sizes
+
+"""
+
+import matplotlib.pyplot as plt
+import load_hp_data as hp
+
+plt.style.use('ggplot')
+
+fig, ax = plt.subplots(1)
+ax.hist(hp.columns['size'], bins=1000)
+ax.set_xlabel('Size of the edit')
+ax.set_ylabel('')
+ax.set_title('Edit size distribution')
+
+# Maybe don't really need that axis to be so long:
+# ax.set_xlim([0, 200000])
+
+plt.show()
rows = []
for row in reader:
- # Convert timestamp from a string to a date:
- row['timestamp'] = datetime.strptime(row['timestamp'], '%Y-%m-%d %H:%M:%S')
- rows.append(row)
- for fieldname, value in row.items():
- columns[fieldname].append(value)
\ No newline at end of file
+ # Convert timestamp from a string to a date:
+ row['timestamp'] = datetime.strptime(row['timestamp'], '%Y-%m-%d %H:%M:%S')
+ # Convert size from a string to an integer:
+ row['size'] = int(row['size'])
+ rows.append(row)
+
+# Sort these things, so that they give you nice time-series
+sort_rows = sorted(rows, key=lambda row: row['timestamp'], reverse=False)
+
+rows = sort_rows
+for row in sort_rows:
+ for fieldname, value in row.items():
+ columns[fieldname].append(value)