added example code that uses the urls
authorBenjamin Mako Hill <mako@atdot.cc>
Sat, 26 Apr 2014 02:01:57 +0000 (19:01 -0700)
committerBenjamin Mako Hill <mako@atdot.cc>
Sat, 26 Apr 2014 02:01:57 +0000 (19:01 -0700)
wikipedia-raw1.py [new file with mode: 0644]
wikipedia-raw2.py [new file with mode: 0644]
wikipedia1-mwc1.py [new file with mode: 0644]
wikipedia2-mwc2.py [new file with mode: 0644]

diff --git a/wikipedia-raw1.py b/wikipedia-raw1.py
new file mode 100644 (file)
index 0000000..95e554d
--- /dev/null
@@ -0,0 +1,14 @@
+import simplejson as json
+from urllib2 import urlopen
+
+wp_call = urlopen('https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=Main_Page&rvlimit=100&rvprop=timestamp|user&format=json')
+
+response = json.loads(wp_call.read())
+
+for page_id in response["query"]["pages"].keys():
+    page_title = response["query"]["pages"][page_id]["title"]
+    revisions = response["query"]["pages"][page_id]["revisions"]
+
+    for rev in revisions:
+        print page_title + "\t" + rev["user"] + "\t" + rev["timestamp"]
+
diff --git a/wikipedia-raw2.py b/wikipedia-raw2.py
new file mode 100644 (file)
index 0000000..c7eca1b
--- /dev/null
@@ -0,0 +1,22 @@
+import time
+import simplejson as json
+from urllib2 import urlopen
+
+url_base = 'https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=%s&rvlimit=100&rvprop=timestamp|user&format=json'
+
+pages = ["Benjamin_Mako_Hill", "Python", "Data_science"]
+
+for page_title in pages:
+    
+    wp_call = urlopen(url_base % page_title)
+    response = json.loads(wp_call.read())
+
+    for page_id in response["query"]["pages"].keys():
+        page_title = response["query"]["pages"][page_id]["title"]
+        revisions = response["query"]["pages"][page_id]["revisions"]
+
+        for rev in revisions:
+            print page_title + "\t" + rev["user"] + "\t" + rev["timestamp"]
+
+
+    time.sleep(3)
diff --git a/wikipedia1-mwc1.py b/wikipedia1-mwc1.py
new file mode 100644 (file)
index 0000000..7671c84
--- /dev/null
@@ -0,0 +1,13 @@
+import time
+import simplejson as json
+import mwclient
+
+def format_time(t):
+    return(time.strftime('%Y-%m-%d %H:%M:%S', t))
+
+site = mwclient.Site('en.wikipedia.org')
+
+page = site.Pages["Data science"]
+
+for revision in page.revisions():
+    print revision["user"] + "\t" + format_time(revision['timestamp'])
diff --git a/wikipedia2-mwc2.py b/wikipedia2-mwc2.py
new file mode 100644 (file)
index 0000000..0fee364
--- /dev/null
@@ -0,0 +1,6 @@
+import mwclient
+site = mwclient.Site('en.wikipedia.org')
+
+category = site.Pages['Category:University of Washington']
+for page in category:
+    print page.name

Benjamin Mako Hill || Want to submit a patch?