From 4f9f564e2fac44ff1330a1d4d5bb4c7424791306 Mon Sep 17 00:00:00 2001 From: Benjamin Mako Hill Date: Fri, 25 Apr 2014 19:01:57 -0700 Subject: [PATCH] added example code that uses the urls --- wikipedia-raw1.py | 14 ++++++++++++++ wikipedia-raw2.py | 22 ++++++++++++++++++++++ wikipedia1-mwc1.py | 13 +++++++++++++ wikipedia2-mwc2.py | 6 ++++++ 4 files changed, 55 insertions(+) create mode 100644 wikipedia-raw1.py create mode 100644 wikipedia-raw2.py create mode 100644 wikipedia1-mwc1.py create mode 100644 wikipedia2-mwc2.py diff --git a/wikipedia-raw1.py b/wikipedia-raw1.py new file mode 100644 index 0000000..95e554d --- /dev/null +++ b/wikipedia-raw1.py @@ -0,0 +1,14 @@ +import simplejson as json +from urllib2 import urlopen + +wp_call = urlopen('https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=Main_Page&rvlimit=100&rvprop=timestamp|user&format=json') + +response = json.loads(wp_call.read()) + +for page_id in response["query"]["pages"].keys(): + page_title = response["query"]["pages"][page_id]["title"] + revisions = response["query"]["pages"][page_id]["revisions"] + + for rev in revisions: + print page_title + "\t" + rev["user"] + "\t" + rev["timestamp"] + diff --git a/wikipedia-raw2.py b/wikipedia-raw2.py new file mode 100644 index 0000000..c7eca1b --- /dev/null +++ b/wikipedia-raw2.py @@ -0,0 +1,22 @@ +import time +import simplejson as json +from urllib2 import urlopen + +url_base = 'https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=%s&rvlimit=100&rvprop=timestamp|user&format=json' + +pages = ["Benjamin_Mako_Hill", "Python", "Data_science"] + +for page_title in pages: + + wp_call = urlopen(url_base % page_title) + response = json.loads(wp_call.read()) + + for page_id in response["query"]["pages"].keys(): + page_title = response["query"]["pages"][page_id]["title"] + revisions = response["query"]["pages"][page_id]["revisions"] + + for rev in revisions: + print page_title + "\t" + rev["user"] + "\t" + rev["timestamp"] + + + time.sleep(3) diff --git a/wikipedia1-mwc1.py b/wikipedia1-mwc1.py new file mode 100644 index 0000000..7671c84 --- /dev/null +++ b/wikipedia1-mwc1.py @@ -0,0 +1,13 @@ +import time +import simplejson as json +import mwclient + +def format_time(t): + return(time.strftime('%Y-%m-%d %H:%M:%S', t)) + +site = mwclient.Site('en.wikipedia.org') + +page = site.Pages["Data science"] + +for revision in page.revisions(): + print revision["user"] + "\t" + format_time(revision['timestamp']) diff --git a/wikipedia2-mwc2.py b/wikipedia2-mwc2.py new file mode 100644 index 0000000..0fee364 --- /dev/null +++ b/wikipedia2-mwc2.py @@ -0,0 +1,6 @@ +import mwclient +site = mwclient.Site('en.wikipedia.org') + +category = site.Pages['Category:University of Washington'] +for page in category: + print page.name -- 2.30.2