X-Git-Url: https://projects.mako.cc/source/harrypotter-wikipedia-cdsw/blobdiff_plain/c4b1a145625537c7ac8131b133e6d85c587ff203..ce5c13c094d659125fe85d59b9bc0e4c2bf40072:/hpwp-trend.py diff --git a/hpwp-trend.py b/hpwp-trend.py index 939406e..b9d1c7b 100644 --- a/hpwp-trend.py +++ b/hpwp-trend.py @@ -1,10 +1,12 @@ +import encoding_fix + from csv import DictReader # read in the input file and count by day -input_file = open("hp_wiki.csv", 'r') +input_file = open("hp_wiki.tsv", 'r', encoding="utf-8") edits_by_day = {} -for row in DictReader(input_file): +for row in DictReader(input_file, delimiter="\t"): day_string = row['timestamp'][0:10] if day_string in edits_by_day: @@ -15,13 +17,13 @@ for row in DictReader(input_file): input_file.close() # output the counts by day -output_file = open("hp_edits_by_day.csv", "w") +output_file = open("hp_edits_by_day.tsv", "w", encoding='utf-8') # write a header -output_file.write("date,edits\n") +output_file.write("date\tedits\n") # iterate through every day and print out data into the file -for day_string in edits_by_day: - output_file.write(",".join([day_string, str(edits_by_day[day_string])]) + "\n") +for day_string in edits_by_day.keys(): + output_file.write("\t".join([day_string, str(edits_by_day[day_string])]) + "\n") output_file.close()