From: Benjamin Mako Hill Date: Tue, 28 Apr 2015 00:10:23 +0000 (-0700) Subject: added two example programs plus gitignore file X-Git-Url: https://projects.mako.cc/source/harrypotter-wikipedia-cdsw/commitdiff_plain/fcf662671099db62bdfd5607f9fe51c31747a999?ds=sidebyside added two example programs plus gitignore file --- diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..72699f7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/*.csv diff --git a/hpwp-minor.py b/hpwp-minor.py new file mode 100644 index 0000000..4e173f2 --- /dev/null +++ b/hpwp-minor.py @@ -0,0 +1,16 @@ +from csv import DictReader + +input_file = open("hp_wiki.csv", 'r') + +num_edits = 0 +num_anon = 0 +for row in DictReader(input_file): + num_edits = num_edits + 1 + if row["anon"] == "False": + num_anon = num_anon + 1 + +prop_anon = num_anon / num_edits + +print("total edits: %s" % num_edits) +print("anon edits: %s" % num_anon) +print("proportion anon: %s" % prop_anon) diff --git a/hwpw-trend.py b/hwpw-trend.py new file mode 100644 index 0000000..939406e --- /dev/null +++ b/hwpw-trend.py @@ -0,0 +1,27 @@ +from csv import DictReader + +# read in the input file and count by day +input_file = open("hp_wiki.csv", 'r') + +edits_by_day = {} +for row in DictReader(input_file): + day_string = row['timestamp'][0:10] + + if day_string in edits_by_day: + edits_by_day[day_string] = edits_by_day[day_string] + 1 + else: + edits_by_day[day_string] = 1 + +input_file.close() + +# output the counts by day +output_file = open("hp_edits_by_day.csv", "w") + +# write a header +output_file.write("date,edits\n") + +# iterate through every day and print out data into the file +for day_string in edits_by_day: + output_file.write(",".join([day_string, str(edits_by_day[day_string])]) + "\n") + +output_file.close()