articles = articles_json["*"][0]["a"]["*"]
# open a filie to write all the output
-output = open("hp_wiki.csv", "w")
-output.write(",".join(["title", "user", "timestamp", "size", "anon", "minor", "revid"]) + "\n")
+output = open("hp_wiki.tsv", "w", encoding="utf-8")
+output.write("\t".join(["title", "user", "timestamp", "size", "anon", "minor", "revid"]) + "\n")
# for every article
for article in articles:
# get the list of revisions from our function and then interating through it printinig it out
revisions = get_article_revisions(title)
for rev in revisions:
- output.write(",".join(['"' + rev["title"] + '"', '"' + rev["user"] + '"',
+ output.write("\t".join(['"' + rev["title"] + '"', '"' + rev["user"] + '"',
rev["timestamp"], str(rev["size"]), str(rev["anon"]),
str(rev["minor"]), str(rev["revid"])]) + "\n")
from csv import DictReader
-input_file = open("hp_wiki.csv", 'r')
+input_file = open("hp_wiki.tsv", 'r', encoding="utf-8")
num_edits = 0
num_anon = 0
-for row in DictReader(input_file):
+for row in DictReader(input_file, delimiter="\t"):
num_edits = num_edits + 1
- if row["anon"] == "False":
+ if row["anon"] == "True":
num_anon = num_anon + 1
prop_anon = num_anon / num_edits
from csv import DictReader
# read in the input file and count by day
-input_file = open("hp_wiki.csv", 'r')
+input_file = open("hp_wiki.tsv", 'r', encoding="utf-8")
edits_by_day = {}
-for row in DictReader(input_file):
+for row in DictReader(input_file, delimiter="\t"):
day_string = row['timestamp'][0:10]
if day_string in edits_by_day:
input_file.close()
# output the counts by day
-output_file = open("hp_edits_by_day.csv", "w")
+output_file = open("hp_edits_by_day.tsv", "w", encoding='utf-8')
# write a header
-output_file.write("date,edits\n")
+output_file.write("date\tedits\n")
# iterate through every day and print out data into the file
-for day_string in edits_by_day:
- output_file.write(",".join([day_string, str(edits_by_day[day_string])]) + "\n")
+for day_string in edits_by_day.keys():
+ output_file.write("\t".join([day_string, str(edits_by_day[day_string])]) + "\n")
output_file.close()