--- /dev/null
+## Questions
+
+1. Save the revision metadata printed in wikipedia1-2.py to a file called "wikipedia_revisions.tsv".
+2. Print out the revision ids and edit summaries (i.e., comment) of each revision for the article on Python.
+3. Find out what other data or metadata you can print out for a revision for an article.
+4. Which article is in more categories? Python (programming language) or R (programming language)?
+5. Find out how many revisions to the article on "Python (programming language)" were made by user "Peterl"? How about "Hfastedge"?
+6. How would you use the API to find out how many revisions/edits the user "Benjamin Mako Hill" has made to Wikipedia?
+7. Can you build a list of all of the articles edited by "Benjamin Mako Hill"? What is the article with the longest title that user "Benjamin Mako Hill" has edited?
+8. How many edits to the article "Python (programming language)" where made in 2014?
+
+## About
+
+Answers to the wikipedia-data-examples project created for the Community Data Science Workshop.
+
+Answers were created by: Benjamin Mako Hill <makohill@uw.edu>
+
+All answers are written for Python 3.
--- /dev/null
+# 1. Save the revision metadata printed in wikipedia1-2.py to a file called "wikipedia_revisions.tsv".
+
+
+import requests
+
+# raw string:
+# ?action=query&prop=revisions&titles=Python_(programming_language)&rvlimit=500&rvprop=timestamp|user&format=json')
+
+# parameter version which makes a little more sense
+parameters = {'action' : 'query',
+ 'prop' : 'revisions',
+ 'titles' : 'Python (programming language)',
+ 'rvlimit' : 500,
+ 'rvprop' : "timestamp|user",
+ 'format' : 'json',
+ 'continue' : ''}
+
+output_file = open("wikipedia_revisions.tsv", 'w')
+
+# run a "while True" loop
+while True:
+ wp_call = requests.get('https://en.wikipedia.org/w/api.php', params=parameters)
+ response = wp_call.json()
+
+ for page_id in response["query"]["pages"].keys():
+ page_title = response["query"]["pages"][page_id]["title"]
+ revisions = response["query"]["pages"][page_id]["revisions"]
+
+ for rev in revisions:
+ print(page_title + "\t" + rev["user"] + "\t" + rev["timestamp"], file=output_file)
+
+ if 'continue' in response:
+ parameters.update(response['continue'])
+ else:
+ break
+
+
+output_file.close()
--- /dev/null
+# 2. Print out the revision ids and edit summaries (i.e., comment) of each revision for the article on Python.
+
+import requests
+
+# parameter version which makes a little more sense
+parameters = {'action' : 'query',
+ 'prop' : 'revisions',
+ 'titles' : 'Python (programming language)',
+ 'rvlimit' : 100,
+ # changed this line to add ids|comment
+ 'rvprop' : "ids|comment",
+ 'format' : 'json',
+ 'continue' : ''}
+
+# run a "while True" loop
+while True:
+ wp_call = requests.get('https://en.wikipedia.org/w/api.php', params=parameters)
+ response = wp_call.json()
+
+ for page_id in response["query"]["pages"].keys():
+ revisions = response["query"]["pages"][page_id]["revisions"]
+
+ for rev in revisions:
+ # changed this line to add revid and comment
+ print(str(rev["revid"]) + "\t" + rev["comment"])
+
+ if 'continue' in response:
+ parameters.update(response['continue'])
+ else:
+ break
+
--- /dev/null
+# Find out what other data or metadata you can print out for a a
+# revision for an article.
+#
+# The answer to this question comes down to the possibily options you
+# can give to the "rvprop" parameter to requests. You can find that
+# list here: https://www.mediawiki.org/wiki/API:Revisions#Parameters
+#
+# Here's the list at the writing of writing:
+#
+# rvprop: Which properties to get for each revision
+# ids: Get both of these IDs: revid, parentid (default) MW 1.11+
+# flags: Whether the revision was a minor edit (default) MW 1.11+
+# timestamp: The date and time the revision was made (default)
+# user: The user who made the revision, as well as userhidden and anon flags (default) MW 1.8
+# userid: User id of revision creator, as well as userhidden and anon flags MW 1.17+
+# sha1: SHA-1 (base 16) of the revision MW 1.19+
+# contentmodel: Content model id of the revision MW 1.21+
+# comment: The edit comment (default)
+# parsedcomment: The edit/log comment in HTML format with wikilinks and section references expanded into hyperlinks MW 1.16
+# size: The size of the revision text in bytes MW 1.11+
+# content: The revision content. If set, the maximum limit will be 10 times as low
+# tags: Any tags for this revision, such as those added by AbuseFilter MW 1.16+
+
--- /dev/null
+# 4. Which article is in more categories? Python (programming language) or R (programming language)?
+
+import requests
+
+article_list = ["Python (programming language)", "R (programming language)"]
+
+for article in article_list:
+ # Get the list of categories
+ parameters = {'action' : 'query',
+ 'titles' : article,
+ 'prop' : 'categories',
+ 'format' : 'json',
+ 'continue' : ''}
+
+ # reset the counter to zero once per article
+ counter = 0
+
+ # run a "while True" loop
+ while True:
+ wp_call = requests.get('https://en.wikipedia.org/w/api.php', params=parameters)
+ response = wp_call.json()
+
+ for page_id in response["query"]["pages"].keys():
+ for category in response["query"]["pages"][page_id]['categories']:
+ counter = counter + 1
+
+ if 'continue' in response:
+ parameters.update(response['continue'])
+ else:
+ break
+
+ output_line = "%s: %s categories" % (article, counter)
+ print(output_line)
--- /dev/null
+# 5. Find out how many revisions to the article on "Python
+# (programming language)" were made by user "Peterl"? How about
+# "Hfastedge"?
+
+import requests
+
+# parameter version which makes a little more sense
+parameters = {'action' : 'query',
+ 'prop' : 'revisions',
+ 'titles' : 'Python (programming language)',
+ 'rvlimit' : 500,
+ 'rvprop' : "ids|user",
+ 'format' : 'json',
+ 'continue' : ''}
+
+user_counts = {}
+
+# run a "while True" loop
+while True:
+ wp_call = requests.get('https://en.wikipedia.org/w/api.php', params=parameters)
+ response = wp_call.json()
+
+ for page_id in response["query"]["pages"].keys():
+ revisions = response["query"]["pages"][page_id]["revisions"]
+
+ for rev in revisions:
+ current_user = rev['user']
+
+ if current_user in user_counts:
+ user_counts[current_user] = user_counts[current_user] + 1
+ else:
+ user_counts[current_user] = 1
+
+ if 'continue' in response:
+ parameters.update(response['continue'])
+ else:
+ break
+
+
+# now that we've built up the dictionary, lets print it out
+for editor in user_counts.keys():
+ print("%s made %s edits" % (editor, user_counts[editor]))
+
--- /dev/null
+# 5. Find out how many revisions to the article on "Python
+# (programming language)" were made by user "Peterl"? How about
+# "Hfastedge"?
+
+import requests
+
+# parameter version which makes a little more sense
+parameters = {'action' : 'query',
+ 'prop' : 'revisions',
+ 'titles' : 'Python (programming language)',
+ 'rvlimit' : 500,
+ 'rvprop' : "ids|user",
+ 'format' : 'json',
+ 'continue' : ''}
+
+user_list = ['Peterl', 'Hfastedge']
+
+counter_peterl = 0
+counter_hfastedge = 0
+
+# run a "while True" loop
+while True:
+ wp_call = requests.get('https://en.wikipedia.org/w/api.php', params=parameters)
+ response = wp_call.json()
+
+ for page_id in response["query"]["pages"].keys():
+ revisions = response["query"]["pages"][page_id]["revisions"]
+
+ for rev in revisions:
+ if rev['user'] == "Peterl":
+ counter_peterl = counter_peterl + 1
+ if rev['user'] == "Hfastedge":
+ counter_hfastedge = counter_hfastedge + 1
+
+ if 'continue' in response:
+ parameters.update(response['continue'])
+ else:
+ break
+
+
+print("Peterl made %s edits" % counter_peterl)
+print("Hfastedge made %s edits" % counter_hfastedge)
--- /dev/null
+# 6. How would you use the API to find out how many revisions/edits the user "Benjamin Mako Hill" has made to Wikipedia?
+
+# I found documentation here: https://www.mediawiki.org/wiki/API:Users
+
+import requests
+
+# parameter version which makes a little more sense
+parameters = {'action' : 'query',
+ 'list' : 'users',
+ 'ususers' : 'Benjamin Mako Hill',
+ 'usprop' : 'editcount',
+ 'format' : 'json' }
+
+wp_call = requests.get('https://en.wikipedia.org/w/api.php', params=parameters)
+response = wp_call.json()
+
+print(response['query']['users'][0]['editcount'])
--- /dev/null
+# 7. Can you build a list of all of the articles edited by "Benjamin
+# Mako Hill"? What is the article with the longest title that user
+# Benjamin Mako Hill has edited?
+
+# Step 1: Searching around on Google, I found this documentation which
+# seemed like the right way to answer this question:
+# https://www.mediawiki.org/wiki/API:Usercontribs
+
+import requests
+
+edited_pages = []
+
+# parameter version which makes a little more sense
+parameters = {'action' : 'query',
+ 'list' : 'usercontribs',
+ 'ucuser' : 'Benjamin Mako Hill',
+ 'uclimit' : 500,
+ 'ucprop' : 'title',
+ 'format' : 'json',
+ 'continue' : ''}
+
+while True:
+ wp_call = requests.get('https://en.wikipedia.org/w/api.php', params=parameters)
+ response = wp_call.json()
+
+ contribs = response['query']['usercontribs']
+
+ for contrib in contribs:
+ if contrib['title'] not in edited_pages:
+ edited_pages.append(contrib['title'])
+
+ # keep looping if we need to continue
+ if 'continue' in response:
+ parameters.update(response['continue'])
+ else:
+ break
+
+# print out the list of pages
+counter = 0
+for page in edited_pages:
+ counter = counter + 1
+ print(page)
+
+print("TOTAL PAGES: %s" % counter)
--- /dev/null
+# 8. How many edits to the article "Python (programming language)"
+# where made in 2014?
+
+# Useful page is here: https://www.mediawiki.org/wiki/API:Revisions
+
+import requests
+
+
+# parameter version which makes a little more sense
+parameters = {'action' : 'query',
+ 'prop' : 'revisions',
+ 'titles' : 'Python (programming language)',
+ 'rvlimit' : 500,
+ 'rvprop' : "timestamp",
+ 'format' : 'json',
+ 'continue' : ''}
+
+edits_in_2014 = 0
+
+# run a "while True" loop
+while True:
+ wp_call = requests.get('https://en.wikipedia.org/w/api.php', params=parameters)
+ response = wp_call.json()
+
+ for page_id in response["query"]["pages"].keys():
+ revisions = response["query"]["pages"][page_id]["revisions"]
+
+ for rev in revisions:
+ if rev['timestamp'][0:4] == "2014":
+ edits_in_2014 = edits_in_2014 + 1
+
+ if 'continue' in response:
+ parameters.update(response['continue'])
+ else:
+ break
+
+print(edits_in_2014)