1 # Q: Create graphs in a spreadsheet of the trend lines (i.e., edits per day over time) for the three most popular articles?
3 from csv import DictReader
5 # STEP 1: read in the input file and count by article
6 input_file = open("hp_wiki.tsv", 'r', encoding="utf-8")
9 for row in DictReader(input_file, delimiter="\t"):
12 if title in edits_by_article:
13 edits_by_article[title] = edits_by_article[title] + 1
15 edits_by_article[title] = 1
20 # STEP 2: find the list of the top 3 articles
22 for title in sorted(edits_by_article, key=edits_by_article.get, reverse=True):
23 if len(top_articles) >= 3:
26 top_articles.append(title)
29 # STEP 3: now, fill that by doing a version of the first count by
30 # going back through the original data and this time just count each
31 # of the three articles
33 article_edits_by_day = {}
35 input_file = open("hp_wiki.tsv", 'r', encoding="utf-8")
36 for row in DictReader(input_file, delimiter="\t"):
39 if title not in top_articles:
42 day = row['timestamp'][0:10]
44 if day in article_edits_by_day:
45 article_edits_by_day[day][title] = article_edits_by_day[day][title] + 1
47 article_edits_by_day[day] = {}
48 for tmp_title in top_articles:
49 if tmp_title == title:
50 article_edits_by_day[day][tmp_title] = 1
52 article_edits_by_day[day][tmp_title] = 0
55 # STEP 4: print it all out
56 # output the counts by day
57 output_file = open("hp_edits_by_day_top3_articles.tsv", "w", encoding='utf-8')
60 title_header_string = "\t".join(top_articles)
62 output_file.write("day\t" + title_header_string + "\n")
64 # iterate through every day and print out data into the file
65 for day in article_edits_by_day:
67 for title in top_articles:
68 title_values.append(str(article_edits_by_day[day][title]))
70 title_values_string = "\t".join(title_values)
71 output_file.write("\t".join([day, title_values_string]) + "\n")
75 # Example of interactive graph in Google Docs:
76 # http://mako.cc/go/0h