From: Benjamin Mako Hill Date: Mon, 20 Apr 2015 22:26:35 +0000 (-0700) Subject: two quick updates to the raw files X-Git-Url: https://projects.mako.cc/source/wikipedia-api-cdsw/commitdiff_plain/f17f0a3f63dd03d70cdc693da0bda53a1e85671b?ds=inline;hp=f2892ae64a4eb963904669373c148d9dbdfa3b1b two quick updates to the raw files - move to Python 3 (i.e., print()'ify stuff) - requests can do JSON natively using .json() on the request object which solves the problem with requests returning bytes --- diff --git a/wikipedia-raw1.py b/wikipedia-raw1.py index fef3a58..05222a2 100644 --- a/wikipedia-raw1.py +++ b/wikipedia-raw1.py @@ -1,14 +1,13 @@ -import json import requests wp_call = requests.get('https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=Main_Page&rvlimit=100&rvprop=timestamp|user&format=json') -response = json.loads(wp_call.content) +response = wp_call.json() for page_id in response["query"]["pages"].keys(): page_title = response["query"]["pages"][page_id]["title"] revisions = response["query"]["pages"][page_id]["revisions"] for rev in revisions: - print page_title + "\t" + rev["user"] + "\t" + rev["timestamp"] + print(page_title + "\t" + rev["user"] + "\t" + rev["timestamp"]) diff --git a/wikipedia-raw2.py b/wikipedia-raw2.py index d84b378..a8efd34 100644 --- a/wikipedia-raw2.py +++ b/wikipedia-raw2.py @@ -1,5 +1,4 @@ import time -import json import requests url_base = 'https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=%s&rvlimit=100&rvprop=timestamp|user&format=json' @@ -9,14 +8,13 @@ pages = ["Benjamin_Mako_Hill", "Python", "Data_science"] for page_title in pages: wp_call = requests.get(url_base % page_title) - response = json.loads(wp_call.content) + response = wp_call.json() for page_id in response["query"]["pages"].keys(): page_title = response["query"]["pages"][page_id]["title"] revisions = response["query"]["pages"][page_id]["revisions"] for rev in revisions: - print page_title + "\t" + rev["user"] + "\t" + rev["timestamp"] - + print(page_title + "\t" + rev["user"] + "\t" + rev["timestamp"]) time.sleep(3)