two quick updates to the raw files
[wikipedia-api-cdsw] / wikipedia-raw2.py
1 import time
2 import requests
3
4 url_base = 'https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=%s&rvlimit=100&rvprop=timestamp|user&format=json'
5
6 pages = ["Benjamin_Mako_Hill", "Python", "Data_science"]
7
8 for page_title in pages:
9     
10     wp_call = requests.get(url_base % page_title)
11     response = wp_call.json()
12
13     for page_id in response["query"]["pages"].keys():
14         page_title = response["query"]["pages"][page_id]["title"]
15         revisions = response["query"]["pages"][page_id]["revisions"]
16
17         for rev in revisions:
18             print(page_title + "\t" + rev["user"] + "\t" + rev["timestamp"])
19
20     time.sleep(3)

Benjamin Mako Hill || Want to submit a patch?