]> projects.mako.cc - harrypotter-wikipedia-cdsw/blob - harrypotter_edit_trend.ipynb
13d3d1118e0d9c023374550ae81599aa97defe72
[harrypotter-wikipedia-cdsw] / harrypotter_edit_trend.ipynb
1 {
2  "cells": [
3   {
4    "cell_type": "code",
5    "execution_count": null,
6    "metadata": {},
7    "outputs": [],
8    "source": [
9     "from csv import DictReader"
10    ]
11   },
12   {
13    "cell_type": "code",
14    "execution_count": null,
15    "metadata": {},
16    "outputs": [],
17    "source": [
18     "# read in the input file and count by day\n",
19     "input_file = open(\"hp_wiki.tsv\", 'r', encoding=\"utf-8\")"
20    ]
21   },
22   {
23    "cell_type": "code",
24    "execution_count": null,
25    "metadata": {},
26    "outputs": [],
27    "source": [
28     "edits_by_day = {}\n",
29     "for row in DictReader(input_file, delimiter=\"\\t\"):\n",
30     "    day_string = row['timestamp'][0:10]\n",
31     "\n",
32     "    if day_string in edits_by_day:\n",
33     "        edits_by_day[day_string] = edits_by_day[day_string] + 1\n",
34     "    else:\n",
35     "        edits_by_day[day_string] = 1"
36    ]
37   },
38   {
39    "cell_type": "code",
40    "execution_count": null,
41    "metadata": {},
42    "outputs": [],
43    "source": [
44     "input_file.close()"
45    ]
46   },
47   {
48    "cell_type": "code",
49    "execution_count": null,
50    "metadata": {},
51    "outputs": [],
52    "source": [
53     "# output the counts by day\n",
54     "output_file = open(\"hp_edits_by_day.tsv\", \"w\", encoding='utf-8')\n",
55     "\n",
56     "# write a header\n",
57     "print(\"date\\tedits\", file=output_file)"
58    ]
59   },
60   {
61    "cell_type": "code",
62    "execution_count": null,
63    "metadata": {},
64    "outputs": [],
65    "source": [
66     "# iterate through every day and print out data into the file\n",
67     "for day_string in edits_by_day.keys():\n",
68     "    print(\"\\t\".join([day_string, str(edits_by_day[day_string])]), file=output_file)\n",
69     "\n",
70     "output_file.close()"
71    ]
72   },
73   {
74    "cell_type": "code",
75    "execution_count": null,
76    "metadata": {},
77    "outputs": [],
78    "source": []
79   }
80  ],
81  "metadata": {
82   "kernelspec": {
83    "display_name": "Python 3",
84    "language": "python",
85    "name": "python3"
86   },
87   "language_info": {
88    "codemirror_mode": {
89     "name": "ipython",
90     "version": 3
91    },
92    "file_extension": ".py",
93    "mimetype": "text/x-python",
94    "name": "python",
95    "nbconvert_exporter": "python",
96    "pygments_lexer": "ipython3",
97    "version": "3.7.3"
98   }
99  },
100  "nbformat": 4,
101  "nbformat_minor": 2
102 }

Benjamin Mako Hill || Want to submit a patch?