cleaned up the wikibios project file so that it's more general
[matplotlib-cdsw] / edits.py
1 """Plot the number of edits vs. article creation date, for male and female bios."""
2 import wikibios
3 from matplotlib import pyplot
4 from operator import itemgetter
5
6 figure = pyplot.figure()
7
8 rows_female_by_firstedit = sorted(wikibios.rows_female, key=itemgetter('firstedit'))
9 firstedit_female_medians = []
10 mean_edits_female_by_firstedit = []
11 N = 1000
12 i = 0
13 while i + N <= len(rows_female_by_firstedit):
14         chunk = rows_female_by_firstedit[i:i+N]
15         i = i + N
16
17         firstedit_female_medians.append(chunk[N / 2]['firstedit'])
18
19         total_edits = 0.0
20         for row in chunk:
21                 total_edits = total_edits + row['edits']
22         mean_edits = total_edits / N
23         mean_edits_female_by_firstedit.append(mean_edits)
24
25 rows_male_by_firstedit = sorted(wikibios.rows_male, key=itemgetter('firstedit'))
26 firstedit_male_medians = []
27 mean_edits_male_by_firstedit = []
28 i = 0
29 while i + N <= len(rows_male_by_firstedit):
30         chunk = rows_male_by_firstedit[i:i+N]
31         i = i + N
32
33         firstedit_male_medians.append(chunk[N / 2]['firstedit'])
34
35         total_edits = 0.0
36         for row in chunk:
37                 total_edits = total_edits + row['edits']
38         mean_edits = total_edits / N
39         mean_edits_male_by_firstedit.append(mean_edits)
40
41 axes = figure.gca()
42 axes.plot(firstedit_female_medians, mean_edits_female_by_firstedit, label='Female')
43 axes.plot(firstedit_male_medians, mean_edits_male_by_firstedit, label='Male')
44 axes.set_xlabel('First Edit Date')
45 axes.set_ylabel('Mean Number of Edits')
46 axes.legend()
47
48 figure.savefig('edits.pdf')

Benjamin Mako Hill || Want to submit a patch?