projects.mako.cc - harrypotter-wikipedia-cdsw/blob - hpwp-trend.py

   1 from csv import DictReader
   2
   3 # read in the input file and count by day
   4 input_file = open("hp_wiki.tsv", 'r', encoding="utf-8")
   5
   6 edits_by_day = {}
   7 for row in DictReader(input_file, delimiter="\t"):
   8     day_string = row['timestamp'][0:10]
   9
  10     if day_string in edits_by_day:
  11         edits_by_day[day_string] = edits_by_day[day_string] + 1
  12     else:
  13         edits_by_day[day_string] = 1
  14
  15 input_file.close()
  16
  17 # output the counts by day
  18 output_file = open("hp_edits_by_day.tsv", "w", encoding='utf-8')
  19
  20 # write a header
  21 output_file.write("date\tedits\n")
  22
  23 # iterate through every day and print out data into the file
  24 for day_string in edits_by_day.keys():
  25     output_file.write("\t".join([day_string, str(edits_by_day[day_string])]) + "\n")
  26
  27 output_file.close()