From: arokem Date: Fri, 8 May 2015 00:50:46 +0000 (-0700) Subject: Added some challenges to the HP analysis. X-Git-Url: https://projects.mako.cc/source/matplotlib-cdsw/commitdiff_plain/4869907119b3a0609fa6b58ea68f2e6545d3d49e?hp=7bd9bbbbb11d7585b76e63e354c0c779d5a8c547 Added some challenges to the HP analysis. --- diff --git a/003-plot-timeseries.py b/003-plot-timeseries.py index ec98776..f0fe255 100644 --- a/003-plot-timeseries.py +++ b/003-plot-timeseries.py @@ -23,11 +23,12 @@ ax.set_ylabel('Size of the edit') plt.show() -# Challenge: plot the relationship between edit size. Use +# Challenge: Is edit size related to how long it's been since the last edit? +# => Plot the relationship between edit size and the time since the last edit: -## Hint 1: +## Hint 1: the number of seconds between two edits is: -#delta_time1 = hp.columns['timestamp'][1] - hp.columns['timestamp'][0] +#delta_time1 = (hp.columns['timestamp'][1] - hp.columns['timestamp'][0]).total_seconds() ## Hint 2: @@ -36,3 +37,9 @@ plt.show() # ax.plot([1,2,3], [2,4,8], '.') # ax.plot([1,2,3], [2,4,8], 'r.') + +# And see online documentation here: +# http://matplotlib.org/api/pyplot_summary.html +# http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.plot + + diff --git a/004-plot-histogram.py b/004-plot-histogram.py index 785a068..b938fea 100644 --- a/004-plot-histogram.py +++ b/004-plot-histogram.py @@ -18,5 +18,7 @@ ax.set_title('Edit size distribution') # Maybe don't really need that axis to be so long: # ax.set_xlim([0, 200000]) - plt.show() + +## Challenge : A 'mega-user' is a user with more than 1000 edits. +# Plot a bar chart with the maximal edit size for each one of the mega-users diff --git a/load_hp_data.py b/load_hp_data.py index 0fe78c5..55d497c 100644 --- a/load_hp_data.py +++ b/load_hp_data.py @@ -22,7 +22,7 @@ for row in reader: row['size'] = int(row['size']) rows.append(row) -# Sort these things, so that they give you nice time-series +# Sort these things, so that they give you nice ordered time-series sort_rows = sorted(rows, key=lambda row: row['timestamp'], reverse=False) rows = sort_rows