From: Benjamin Mako Hill Date: Sat, 26 Apr 2014 02:01:57 +0000 (-0700) Subject: added example code that uses the urls X-Git-Url: https://projects.mako.cc/source/wikipedia-api-cdsw/commitdiff_plain/4f9f564e2fac44ff1330a1d4d5bb4c7424791306 added example code that uses the urls --- diff --git a/wikipedia-raw1.py b/wikipedia-raw1.py new file mode 100644 index 0000000..95e554d --- /dev/null +++ b/wikipedia-raw1.py @@ -0,0 +1,14 @@ +import simplejson as json +from urllib2 import urlopen + +wp_call = urlopen('https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=Main_Page&rvlimit=100&rvprop=timestamp|user&format=json') + +response = json.loads(wp_call.read()) + +for page_id in response["query"]["pages"].keys(): + page_title = response["query"]["pages"][page_id]["title"] + revisions = response["query"]["pages"][page_id]["revisions"] + + for rev in revisions: + print page_title + "\t" + rev["user"] + "\t" + rev["timestamp"] + diff --git a/wikipedia-raw2.py b/wikipedia-raw2.py new file mode 100644 index 0000000..c7eca1b --- /dev/null +++ b/wikipedia-raw2.py @@ -0,0 +1,22 @@ +import time +import simplejson as json +from urllib2 import urlopen + +url_base = 'https://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=%s&rvlimit=100&rvprop=timestamp|user&format=json' + +pages = ["Benjamin_Mako_Hill", "Python", "Data_science"] + +for page_title in pages: + + wp_call = urlopen(url_base % page_title) + response = json.loads(wp_call.read()) + + for page_id in response["query"]["pages"].keys(): + page_title = response["query"]["pages"][page_id]["title"] + revisions = response["query"]["pages"][page_id]["revisions"] + + for rev in revisions: + print page_title + "\t" + rev["user"] + "\t" + rev["timestamp"] + + + time.sleep(3) diff --git a/wikipedia1-mwc1.py b/wikipedia1-mwc1.py new file mode 100644 index 0000000..7671c84 --- /dev/null +++ b/wikipedia1-mwc1.py @@ -0,0 +1,13 @@ +import time +import simplejson as json +import mwclient + +def format_time(t): + return(time.strftime('%Y-%m-%d %H:%M:%S', t)) + +site = mwclient.Site('en.wikipedia.org') + +page = site.Pages["Data science"] + +for revision in page.revisions(): + print revision["user"] + "\t" + format_time(revision['timestamp']) diff --git a/wikipedia2-mwc2.py b/wikipedia2-mwc2.py new file mode 100644 index 0000000..0fee364 --- /dev/null +++ b/wikipedia2-mwc2.py @@ -0,0 +1,6 @@ +import mwclient +site = mwclient.Site('en.wikipedia.org') + +category = site.Pages['Category:University of Washington'] +for page in category: + print page.name