initial versions of solutions to harry potter challenges
[harrypotter-wikipedia-cdsw-solutions] / solution3.py
1 # Q: Create graphs in a spreadsheet of the trend lines (i.e., edits per day over time) for the three most active editors?
2
3 from csv import DictReader
4
5 # STEP 1: read in the input file and count by day
6 input_file = open("hp_wiki.tsv", 'r', encoding="utf-8")
7
8 edits_by_editor = {}
9 for row in DictReader(input_file, delimiter="\t"):
10     user = row['user']
11
12     if user in edits_by_editor:
13         edits_by_editor[user] = edits_by_editor[user] + 1
14     else:
15         edits_by_editor[user] = 1
16
17 input_file.close()
18
19
20 # STEP 2: find the list of the top 3 editor
21 top_editors = []
22 for user in sorted(edits_by_editor, key=edits_by_editor.get, reverse=True):
23     if len(top_editors) >= 3:
24         break
25     else:
26         top_editors.append(user)
27
28 # STEP 3.1: first, create a dictionary of dictionaries, one per user
29 user_edits_by_day = {}
30 for user in top_editors:
31     user_edits_by_day[user] = {}
32
33
34 # STEP 3.2: now, fill that by doing a version of the first count by
35 # going back through the original data and this time just count each
36 # of the three editors
37
38 input_file = open("hp_wiki.tsv", 'r', encoding="utf-8")
39 for row in DictReader(input_file, delimiter="\t"):
40     user = row['user']
41     
42     if user not in top_editors:
43         continue
44
45     day = row['timestamp'][0:10]
46
47     if day in user_edits_by_day[user]:
48         user_edits_by_day[user][day] = user_edits_by_day[user][day] + 1
49     else:
50         user_edits_by_day[user][day] = 1
51
52
53 # STEP 4: print it all out
54 # output the counts by day
55 output_file = open("hp_edits_by_day_top3_users.tsv", "w", encoding='utf-8')
56
57 # write a header
58 output_file.write("user\tday\tedits\n")
59
60 # iterate through every day and print out data into the file
61 for user in top_editors:
62     for day in user_edits_by_day[user]:
63         output_file.write("\t".join([user, day, str(user_edits_by_day[user][day])]) + "\n")
64
65 output_file.close()
66
67 # Example of interactive graph in Google Docs:
68 # http://mako.cc/go/0g

Benjamin Mako Hill || Want to submit a patch?