From f17f0a3f63dd03d70cdc693da0bda53a1e85671b Mon Sep 17 00:00:00 2001 From: Benjamin Mako Hill Date: Mon, 20 Apr 2015 15:26:35 -0700 Subject: [PATCH] two quick updates to the raw files - move to Python 3 (i.e., print()'ify stuff) - requests can do JSON natively using .json() on the request object which solves the problem with requests returning bytes --- wikipedia-raw1.py | 5 ++--- wikipedia-raw2.py | 6 ++---- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/wikipedia-raw1.py b/wikipedia-raw1.py index fef3a58..05222a2 100644 --- a/wikipedia-raw1.py +++ b/wikipedia-raw1.py @@ -1,14 +1,13 @@ -import json import requests wp_call = requests.get('https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=Main_Page&rvlimit=100&rvprop=timestamp|user&format=json') -response = json.loads(wp_call.content) +response = wp_call.json() for page_id in response["query"]["pages"].keys(): page_title = response["query"]["pages"][page_id]["title"] revisions = response["query"]["pages"][page_id]["revisions"] for rev in revisions: - print page_title + "\t" + rev["user"] + "\t" + rev["timestamp"] + print(page_title + "\t" + rev["user"] + "\t" + rev["timestamp"]) diff --git a/wikipedia-raw2.py b/wikipedia-raw2.py index d84b378..a8efd34 100644 --- a/wikipedia-raw2.py +++ b/wikipedia-raw2.py @@ -1,5 +1,4 @@ import time -import json import requests url_base = 'https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=%s&rvlimit=100&rvprop=timestamp|user&format=json' @@ -9,14 +8,13 @@ pages = ["Benjamin_Mako_Hill", "Python", "Data_science"] for page_title in pages: wp_call = requests.get(url_base % page_title) - response = json.loads(wp_call.content) + response = wp_call.json() for page_id in response["query"]["pages"].keys(): page_title = response["query"]["pages"][page_id]["title"] revisions = response["query"]["pages"][page_id]["revisions"] for rev in revisions: - print page_title + "\t" + rev["user"] + "\t" + rev["timestamp"] - + print(page_title + "\t" + rev["user"] + "\t" + rev["timestamp"]) time.sleep(3) -- 2.39.5