From 4869907119b3a0609fa6b58ea68f2e6545d3d49e Mon Sep 17 00:00:00 2001 From: arokem Date: Thu, 7 May 2015 17:50:46 -0700 Subject: [PATCH] Added some challenges to the HP analysis. --- 003-plot-timeseries.py | 13 ++++++++++--- 004-plot-histogram.py | 4 +++- load_hp_data.py | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/003-plot-timeseries.py b/003-plot-timeseries.py index ec98776..f0fe255 100644 --- a/003-plot-timeseries.py +++ b/003-plot-timeseries.py @@ -23,11 +23,12 @@ ax.set_ylabel('Size of the edit') plt.show() -# Challenge: plot the relationship between edit size. Use +# Challenge: Is edit size related to how long it's been since the last edit? +# => Plot the relationship between edit size and the time since the last edit: -## Hint 1: +## Hint 1: the number of seconds between two edits is: -#delta_time1 = hp.columns['timestamp'][1] - hp.columns['timestamp'][0] +#delta_time1 = (hp.columns['timestamp'][1] - hp.columns['timestamp'][0]).total_seconds() ## Hint 2: @@ -36,3 +37,9 @@ plt.show() # ax.plot([1,2,3], [2,4,8], '.') # ax.plot([1,2,3], [2,4,8], 'r.') + +# And see online documentation here: +# http://matplotlib.org/api/pyplot_summary.html +# http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.plot + + diff --git a/004-plot-histogram.py b/004-plot-histogram.py index 785a068..b938fea 100644 --- a/004-plot-histogram.py +++ b/004-plot-histogram.py @@ -18,5 +18,7 @@ ax.set_title('Edit size distribution') # Maybe don't really need that axis to be so long: # ax.set_xlim([0, 200000]) - plt.show() + +## Challenge : A 'mega-user' is a user with more than 1000 edits. +# Plot a bar chart with the maximal edit size for each one of the mega-users diff --git a/load_hp_data.py b/load_hp_data.py index 0fe78c5..55d497c 100644 --- a/load_hp_data.py +++ b/load_hp_data.py @@ -22,7 +22,7 @@ for row in reader: row['size'] = int(row['size']) rows.append(row) -# Sort these things, so that they give you nice time-series +# Sort these things, so that they give you nice ordered time-series sort_rows = sorted(rows, key=lambda row: row['timestamp'], reverse=False) rows = sort_rows -- 2.30.2