]> projects.mako.cc - wikipedia-api-cdsw/commitdiff
two quick updates to the raw files
authorBenjamin Mako Hill <mako@atdot.cc>
Mon, 20 Apr 2015 22:26:35 +0000 (15:26 -0700)
committerBenjamin Mako Hill <mako@atdot.cc>
Mon, 20 Apr 2015 22:29:46 +0000 (15:29 -0700)
- move to Python 3 (i.e., print()'ify stuff)
- requests can do JSON natively using .json() on the request object which
  solves the problem with requests returning bytes

wikipedia-raw1.py
wikipedia-raw2.py

index fef3a582647dacd2c5f2209e698f0f5ae3edaabd..05222a2ffd55b3ae59ae2c17f344dc3f5e869207 100644 (file)
@@ -1,14 +1,13 @@
-import json
 import requests
 
 wp_call = requests.get('https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=Main_Page&rvlimit=100&rvprop=timestamp|user&format=json')
 
-response = json.loads(wp_call.content)
+response = wp_call.json()
 
 for page_id in response["query"]["pages"].keys():
     page_title = response["query"]["pages"][page_id]["title"]
     revisions = response["query"]["pages"][page_id]["revisions"]
 
     for rev in revisions:
-        print page_title + "\t" + rev["user"] + "\t" + rev["timestamp"]
+        print(page_title + "\t" + rev["user"] + "\t" + rev["timestamp"])
 
index d84b378a770f5606a4318583c501d8306fc2f28c..a8efd34cadcf8bed96163be045b6efc2b66c34bc 100644 (file)
@@ -1,5 +1,4 @@
 import time
-import json
 import requests
 
 url_base = 'https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=%s&rvlimit=100&rvprop=timestamp|user&format=json'
@@ -9,14 +8,13 @@ pages = ["Benjamin_Mako_Hill", "Python", "Data_science"]
 for page_title in pages:
     
     wp_call = requests.get(url_base % page_title)
-    response = json.loads(wp_call.content)
+    response = wp_call.json()
 
     for page_id in response["query"]["pages"].keys():
         page_title = response["query"]["pages"][page_id]["title"]
         revisions = response["query"]["pages"][page_id]["revisions"]
 
         for rev in revisions:
-            print page_title + "\t" + rev["user"] + "\t" + rev["timestamp"]
-
+            print(page_title + "\t" + rev["user"] + "\t" + rev["timestamp"])
 
     time.sleep(3)

Benjamin Mako Hill || Want to submit a patch?