\item \e{2968} Wikipedia-related publications in the Scopus database
as of November 2013
- \item \e{160} recent publications reviewed or mentioned in the 12 issues
- of the Wikimedia Research Newsletter August 2013-July 2014.
+ \item \e{191} recent publications reviewed or mentioned in the 12 issues
+ of the Wikimedia Research Newsletter from July 2014 to June 2015.
\end{itemize}
\end{frame}
\item Represent \e{important themes} from Wikipedia in the last year.
\item Research that is likely to be of \e{interest} to Wikimedians.
\item Research by people who are \e{not at Wikimania}.
+ \item \ldots with a bias towards \e{peer-reviewed} publications
\end{itemize}
\note{This is my disclaimer slide...
\section{Paper Summaries}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% \subsection{Event Prediction}
+\begin{frame}
+ \centertext{6em}{Wikipedia as a Source of Data}
+
+ \note{Mako}
+\end{frame}
+
+\begin{frame}
+
+ \frametitle{Wikipedia as a source of data}
+
+ \larger \larger Ronen, S., Gonçalves, B., Hu, K. Z., Vespignani, A.,
+ Pinker, S., \& Hidalgo, C. A. (2014). \e{Links that speak: The
+ global language network and its association with global
+ fame}. Proceedings of the National Academy of Sciences, 111(52),
+ E5616—E5622. \href{http://doi.org/10.1073/pnas.1410931111}{doi:10.1073/pnas.1410931111}
+
+\end{frame}
+
+\begin{frame}
+ \frametitle{How to measure the global influence of languages?}
+
+ \larger \larger
+
+ \e{Traditional} methods rely on:
+
+ \begin{itemize}
+ \larger \larger
+ \item \e{Population} of speakers
+ \item \e{Income} or political power of speakers
+ \end{itemize}
+
+ Paper presents \e{new network method} based on measuring
+ \e{co-speakers} of languages in several data sources including
+ Wikipedia.
+
+\end{frame}
+
+\begin{frame}
+ \frametitle{Wikipedia as a source of data: Ronen et al.}
+
+ \includegraphics[width=\textwidth]{figures/ronen_fig1.png}
+
+ \note{Two languages are connected when users that edit an article in
+ one Wikipedia language edition are significantly more likely to
+ also edit an article in the edition of the other language.
+
+ If an editor of Spanish is also likely to edit Galician, we'll
+ call those languages connected.}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Wikipedia as a source of data: Ronen et al.}
+
+ \includegraphics[width=\textwidth]{figures/ronen_people.png}
+
+ \note{\begin{itemize}
+ \item The number of people per language (born 1800–1950) with
+ articles in at least 26 Wikipedia language editions as a
+ function of their language’s eigenvector centrality.
+ \item The bottom row shows the number of people per language (born
+ 1800–1950) listed in \emph{Human Accomplishment} (a book by
+ Charles Murray) as a function of their language’s eigenvector
+ centrality.
+ \end{itemize}}
+\end{frame}
+
+
+\subsection{Community and Organization}
+
+\begin{frame}
+ \centertext{6em}{Community and Organization}
+
+ \note{Mako}
+\end{frame}
+
+\begin{frame}
+
+ \frametitle{Community and organization}
+
+ \larger \larger Warncke-Wang, M., Ranjan, V., Terveen, L., \& Hecht,
+ B. (2015). \e{Misalignment Between Supply and Demand of Quality Content
+ in Peer Production Communities}. In Ninth International AAAI
+ Conference on Web and Social Media (ICWSM).
+
+ % Retrieved from \href{http://www.aaai.org/ocs/index.php/ICWSM/ICWSM15/paper/view/10591}{http://www.aaai.org/ocs/index.php/ICWSM/ICWSM15/paper/view/10591}
+
+\end{frame}
+
+\begin{frame}
+ \frametitle{Community and organization: Warncke-Wang et al.}
+
+ \larger \larger
+ \e{Perfect Alignment Hypothesis (PAH)}: There is an exact match
+ between the supply of high-quality content and the demand for it.
+
+ \bigskip
+
+ \includegraphics[width=\textwidth]{figures/warncke-english_confusion.pdf}
+
+ \note{\e{Quality}: Stub, Start, C, B, Good Article, A, Featured Article
+
+ \e{Popularity}: equivalently sized buckets}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Community and organization: Warncke-Wang et al.}
-% \begin{frame}
-% \centertext{6em}{Event Prediction}
+ Measure of the degree of misalignment can be used to build lists of
+ categories that are relatively \e{``overproduced''} and
+ \e{``underproduced''}:
-% \note{Mako
+ \bigskip
-% This was the year that studies of readership of Wikipedia really
-% blossomed. People figured out how to use the view data. Much of
-% what they used it for was prediction.}
-% \end{frame}
+ \includegraphics[width=\textwidth]{figures/warncke-english_overunder.pdf}
+\end{frame}
+
+\subsection{Content Quality}
-% \begin{frame}
+\begin{frame}
+ \centertext{6em}{Content quality}
+
+ \note{Tilman
+
+ A decade after the landmark "Nature" study, there still aren't too
+ many systematic evaluations of the accuracy of Wikipedia's content.
+ Health articles continue to receive scrutiny, though. With good
+ reason: Wikipedia is "the most frequently consulted online health
+ care resource globally" [NEJM article].}
+\end{frame}
+
+\begin{frame}
-% \frametitle{Wikipedia Viewership and Flu Prediction}
+\frametitle{Quality of drug articles}
+
+ \larger \larger
+ Hwang et al., ``\e{Drug Safety in the Digital Age}.''
+ N Engl J Med 2014; 370:2460-2462 June 26, 2014
+ \href{http://dx.doi.org/10.1056/NEJMp1401767}{doi: 10.1056/NEJMp1401767}.
+ \bigskip
+
+ Kräenbring et al., \e{Accuracy and completeness of drug
+ information in Wikipedia: a comparison with standard textbooks of
+ pharmacology}. PLoS One 9 (9): e106930.
+ \href{http://dx.doi.org/10.1371/journal.pone.0106930}
+ {doi:10.1371/journal.pone.0106930}
+
+
+ \note{Tilman
+
+ We selected two papers that evaluated drug articles, with
+ different approaches. The first one is a short article in the
+ extremely prestigious NEJM.}
+\end{frame}
-% \larger \larger McIver, David J., and John
-% S. Brownstein. ``\e{Wikipedia Usage Estimates Prevalence of
-% Influenza-Like Illness in the United States in Near Real-Time}.''
-% PLoS Comput Biol 10, no. 4 (April 17, 2014):
-% e1003581. \href{http://dx.doi.org/10.1371/journal.pcbi.1003581}{doi:10.1371/journal.pcbi.1003581}.
+\begin{frame}
+
+\frametitle{Quality of drug articles: NEJM}
-% \end{frame}
+ \includegraphics[width=0.49\textwidth]{figures/Pradaxa_tweet_FDAMedWach.png}
+ % from https://twitter.com/FDAMedWatch/status/281547908095041536
+ % = first one in the list at http://www.nejm.org/doi/suppl/10.1056/NEJMp1401767/suppl_file/nejmp1401767_appendix.pdf
+ \includegraphics[width=0.49\textwidth]{figures/Dabitragan_Contraindications_WP_FDA_warning}
+
+ \tikz{\node [yshift=1.5cm,xshift=-0.4cm] at (current page.center) {\includegraphics[width=1.5cm]{figures/long-arrow-right.png}};}
+ \begin{itemize}
+ \larger \larger
+ \item The US Food and Drug Administration (\e{FDA}) frequently
+ issues safety warnings about prescription drugs. How long does it
+ take until these are reflected on English Wikipedia?
+ \item 41\% updated within two weeks (58\% for high-prevalent
+ diseases), but 36\% still unchanged after more than a year.
+ \end{itemize}
+
+ \note{Tilman
+
+ Articles about drugs used to treat high-prevalent diseases (affecting
+ > 1 m Americans / year) were updated faster.\\
+ But the result still caused concern.\\
+ Authors find "there may be a benefit to enabling the FDA to update or
+ automatically feed new safety communications to Wikipedia pages, as
+ it does with WebMD". The paper raised awareness among WikiProject
+ Medicine editors, but there's no systematic updating mechanism yet.}
-% \begin{frame}
-% \frametitle{Wikipedia Viewership and Flu Prediction: Motivation}
+\end{frame}
-% \begin{itemize}
-% \larger \larger
-% \item \e{Google Flu Trends} uses search engine queries to try to
-% predict influenza epidemics more quickly than traditional methods.
-% \item ..but it has been criticized as being biased (e.g., by media coverage).
-% \item WP is freely available and viewership data is free, unlike
-% Google which is proprietary.
+\begin{frame}
-% \end{itemize}
+\frametitle{Quality of drug articles: PLoS One}
-% \note{2009 H1N1 Swine Flu broke GFT.}
-% \end{frame}
+ \begin{itemize}
+ \larger \larger \larger
+ \item Selected 100 drugs from German undergrad curriculum in pharmacology
+ \item Extracted information from two standard textbooks
+ \item "Accuracy of drug information in [German] Wikipedia was 99.7\%±0.2\% when compared to the textbook data." Similar results for English Wikipedia
+ \end{itemize}
+
+\end{frame}
+
+
+\begin{frame}
+
+\frametitle{Quality of drug articles: PLoS One}
+
+ \begin{itemize}
+ \larger \larger \larger
+ \item Completeness (as compared to the textbooks):
+ \begin{itemize} \larger \larger
+ \item 83.8\% (of 224 statements) for German WP
+ \item 87.2\% for English WP
+ \end{itemize}
+ \item Completeness of contraindications information was 100\% in the En WP sample.
+ \item English WP cited academic publications more often than German WP.
+ \item Quality "significantly improved" in drug articles assessed
+ in a 2010 study.
+ \end{itemize}
+
+ \note{Tilman
+
+ The majority of the missing information (62.5\%) on German WP
+ was judged non-relevant for undergrad students.
+
+ The result on completeness of contraindications information is
+ somewhat in contrast with the NEJM study. Then again, the
+ textbooks were probably not perfectly up-to-date either.}
+\end{frame}
-% \begin{frame}
-% \frametitle{Wikipedia Viewership and Flu Prediction: Methods}
-% \begin{itemize}
-% \larger \larger \larger
-% \item Measure traffic to flu related articles on Wikipedia
-% \item Compare to the ``gold standard'' data from the Center for
-% Disease Control (CDC)
-% \end{itemize}
+\begin{frame}
+ \centertext{6em}{Automation in Wikipedia}
-% \end{frame}
+ \note{Tilman
+
+ Starting to see more practical applications of AI methods to editing.
-% \begin{frame}
-% \frametitle{Wikipedia Viewership and Flu Prediction: Results}
+ Bots have been writing Wikipedia articles ever since back in 2002,
+ User:Rambot covered US municipalities from US census data.
+
+ Picked these two related papers for their somewhat unusual approach}
+\end{frame}
-% \centering
-% \includegraphics[width=\textwidth]{figures/flu.png}
-% \note{\begin{itemize}
-% \larger \larger
+\begin{frame}
+ \frametitle{Automation in Wikipedia}
-% \item Wikipedia better than Google at predicting peak flu weeks.
-% \item Wikipedia better at predicting relative influenza rates.
-% \end{itemize}}
+ \larger \larger
+ Banerjee et al., \e{Playscript Classification and Automatic Wikipedia
+ Play Articles Generation}.
+ 2014 22nd International Conference on Pattern Recognition (ICPR).
+ pp. 3630–3635.
+ \href{http://dx.doi.org/10.1109/ICPR.2014.624}
+ {DOI:10.1109/ICPR.2014.624}
+ \href{http://www.cse.unt.edu/~ccaragea/papers/icpr14.pdf}{Author's copy}
+
+\end{frame}
+
+
+\begin{frame}
+
+\frametitle{Automation in Wikipedia: Bot-written theatre play articles}
+
+ \begin{itemize}
+ \larger \larger \larger
+ \item Bot searches for playscripts and related documents on the web
+ \bigskip
+ \item Extract key information from them, e.g.
+ \begin{itemize} \larger
+ \item The play's main characters
+ \item Relevant sentences from online synopses of the play
+ \item Mentions in Google Books and Google News (as evidence that
+ the play satisfies Wikipedia's notability criteria)
+
+ \end{itemize}
+
+ \item Some heuristics to exclude non-encyclopedic sentences, e.g.
+ first person statements
+
+ \end{itemize}
+
+ \note{Tilman
+
+ NB: Most article creation bots work from well-defined databases
+ (e.g. species, census data, geographical databases).
+
+ This bots finds article topics and online references itself,
+ using an elaborate classifier algorithm to distinguish scripts
+ from non-scripts.}
+\end{frame}
+
+\begin{frame}
+\frametitle{Automation in Wikipedia: Bot-written theatre play articles}
+
+ \includegraphics[width=0.3\textwidth]{figures/Fourteen_submission.png}
+ \begin{itemize}
+ \larger \larger \larger
+ \item 15 articles submitted at Articles for Creation. Two accepted
+ by Wikipedia editors. One of them without major changes.
+
+
+ \end{itemize}
+
+ \note{Tilman
+
+ Editors were unaware the articles had been automatically generated.
+
+
+ Related paper by some of the same authors:
+
+ Banerjee et al., \e{WikiKreator: Improving Wikipedia Stubs
+ Automatically}. \href{https://siddbanpsu.github.io/publications/acl2015-banerjee-preprint.pdf}
+ {preprint}, accepted paper at ACL2015
+
+ Elaborate classifier method to find suitable web resources for
+ expanding stubs - but copying sentences wholesale from these into
+ articles landed the bot (User:MightyPepper) in a \href{https://en.wikipedia.org/wiki/Wikipedia:Contributor_copyright_investigations/Archive#2015}{contributor copyright investigation}\ldots
+ }
+\end{frame}
+
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Gender on Wikipedia}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+ \centertext{6em}{Gender Beyond the Gap}
+
+
+ \note{Aaron:
+
+ Research focused on understanding gender dynamics in Wikipedia
+ and their impact is another area of research that has continued
+ to expand this year. A number of high quality papers came out,
+ several of which analyzed how gender figures in the content of
+ the encyclopedias.
+ }
+ \end{frame}
+
+ \begin{frame}
+ \frametitle{It's a Man's Wikipedia?}
+ \larger \larger Wagner, Claudia; David Garcia; Mohsen Jadidi; and Markus
+Strohmaier. 2015. \href{https://www.aaai.org/ocs/index.php/ICWSM/ICWSM15/paper/view/10585}{``\e{It's a Man's Wikipedia? Assessing
+ Gender Inequality in an Online Encyclopedia}.''} Ninth International
+AAAI Conference on Web and Social Media (ICWSM).
+
+ \end{frame}
+
+ \begin{frame}
+
+ \frametitle{It's a Man's Wikipedia: Motivation}
+
+ \begin{itemize}
+ \larger \larger \larger
+ \item We know there's a gender gap.
+ \item Need for more multidimensional analysis of \e{how gender is
+ represented in content of articles across Wikipedias}.
+ \end{itemize}
+
+ \note{
+}
+ \end{frame}
+
+ \begin{frame}
+
+ \frametitle{It's a Man's Wikipedia: Methods}
+
+ \begin{itemize}
+ \larger \larger \larger
+ \item Use data from three sources (Freebase, ``Human
+ Accomplishment,'' and Pantheon) as baselines for comparison with
+ six Wikipedias (EN, ES, DE, FR, IT, RU).
+ \item Examine multiple potential forms of bias: coverage,
+ structure, lexical characteristics, visibility.
+ \end{itemize}
+
+ \end{frame}
+
+ \begin{frame}
+ \frametitle{It's a Man's Wikipedia: Results}
+
+ \centering
+ \visible<2->{
+ \includegraphics[width=\textwidth]{figures/mans_wikipedia_fig.pdf}
+ }
+ \note{
+ \e{Some key findings:}
+ \begin{itemize}
+ \larger \larger
+ \item 1: Coverage of women (\# articles, length) in WPs is generally
+ better than other sources.
+ \item 2: Articles about women tend to be less centrally connected in
+ the network of articles than articles about men (Smurfette!)
+ \item 3: (\e{viz}) Content of articles about women uses different words
+ than those about men. Much higher incidence of language related to
+ family, gender, and relationships.
+ \end{itemize}
+}
+ \end{frame}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Using Wikipedia in Education} % TODO Fix title
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+\begin{frame}
+ \centertext{6em}{Adopting Wikipedia as a Teaching Tool}
+
+ \note{Aaron:
+
+ Research focused on understanding how Wikipedia and related
+ resources are adopted for classroom teaching. Growing area of
+ work, still somewhat preliminary findings. Nonetheless, some of
+ the papers in this domain make for entertaining reading...
+ }
+ \end{frame}
+
+ \begin{frame}
+ \frametitle{WP and the Wisdom of Crowds}
+
+ \larger \larger Barnhisel, Greg and Marcia
+ Rapchak. 2014. \href{http://ur1.ca/n5919}{``\e{Wikipedia and the Wisdom of Crowds: A Student
+ Project}.''} Communications in Information Literacy 8(1):
+ 145-159. doi:10.7548/cil.v8i1.249.
+\end{frame}
+
+
+\begin{frame}
+ \frametitle{WP and the Wisdom of Crowds: Motivation}
+ \begin{itemize}
+ \larger \larger
+ \item Students use Wikipedia uncritically. Don't understand how low
+ quality much of the information may be or how it may be
+ manipulated.
+ \item Professor (author) believes that WP is full of dubious
+ information. Wants to unmask that for his students.
+ \item Through more in-depth exposure, students may understand the
+ limitations of collaborative, open systems of knowledge production.
+ \end{itemize}
+\end{frame}
+
+
+\begin{frame}
+ \frametitle{WP and the Wisdom of Crowds: Methods}
+
+ \begin{itemize}
+ \larger \larger
+ \item Require a Senior (college) composition class to work on
+ editing WP articles (together and individually) throughout the
+ semester.
+ \item Incorporate assignments to help students learn about the
+ history of WP as well as how to use it.
+ \item Require students to reflect on their experiences in writing.
+ \item Require students to analyze the pros/cons of open
+ collaborative writing in their final projects.
+ \end{itemize}
+
+\note{
+ This is all sort of fabulously in-line with exactly what the WikiEd
+ Foundation recommends instructors do (!).
+}
+\end{frame}
+
+
+\begin{frame}
+ \frametitle{WP and the Wisdom of Crowds: Results}
+
+\centering
+\begin{quote}
+ \e{Both sources [crowds and experts] have different merits... My
+ life experience since class pulls me in favor of the wisdom of the
+ crowd. In my recent studies, I have found that I can learn much
+ more from a group of my peers than from a single expert.\\ \hfill
+ --- Student 1}
+\end{quote}
+
+\note{ Not exactly what the instructor expected. Essentially, both he
+ and the students came away with much more nuanced, and positive,
+ views of the relative merits, possibilities, and limitations of open
+ collaborative knowledge production. A happy ending :) }
+\end{frame}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Conclusion}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Meta-Analyses}
+
+\begin{frame}
+
+ \frametitle{Meta-Analyses}
+
+ \begin{itemize}
+
+ \larger \larger
+ \item Mesgari, Mostafa and Okoli, Chitu and Mehdi, Mohamad and Nielsen, Finn Årup and Lanamäki, Arto. 2014. \href{http://spectrum.library.concordia.ca/978652/}{``The sum of all human knowledge": A systematic review of scholarly research on the content of Wikipedia''}. Journal of the Association for Information Science and Technology.
+
+ \item Miquel-Ribé,
+ Marc. 2015. \href{https://www.aaai.org/ocs/index.php/ICWSM/ICWSM15/paper/view/10645}{``User
+ Engagement on Wikipedia, A Review of Studies of Readers and
+ Editors.''} Ninth International AAAI Conference on Web and
+ Social Media (ICWSM).
+
+ \end{itemize}
+
+\end{frame}
+
%% SLIDE: Other Resources
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{More Resources}
\begin{itemize}
\larger \larger
- \item \e{Wikimedia Research Newsletter} [[:meta:Research:Newsletter]]
- \item \e{WikiSym} (Later this month in Berlin!)
+ \item \e{Wikimedia Research Newsletter} [[:meta:Research:Newsletter]] / @WikiResearch
+ \item \e{WikiSym/OpenSym} (This August in San Francisco!)
\item \e{WikiPapers Repository} [http://wikipapers.referata.com]
\item \e{Much More}
\end{itemize}
\end{frame}
-\subsection{Meta-Analyses}
-
-\begin{frame}
-
- \frametitle{Meta-Analyses}
-
- \begin{itemize}
-
- \larger \larger
-
- \item Okoli et al.,
- \href{https://spectrum.library.concordia.ca/978618/}{``The sum of
- all human knowledge'': a systematic review of scholarly research
- on the content of Wikipedia}.
-
- \item Bar-Ilan and Aharony,
- \href{http://dl.acm.org/citation.cfm?doid=2615569.2615643}{Twelve
- years of Wikipedia research}.
-
- \item Taraborelli. \href{https://meta.wikimedia.org/wiki/Research:Newsletter/2013/August\#Keynote\_on\_applicable\_Wikipedia\_research}{Keynote
- on Wikipedia Research}. OpenSym 2013. Hong Kong.
-
- \item Benkler, Shaw, and Hill,
- \href{http://mako.cc/academic/benkler\_shaw\_hill-peer\_production\_ci.pdf}{Peer
- Production: A Modality of Collective Intelligence}.
-
- \end{itemize}
-
-\end{frame}
\end{document}
+
+% LocalWords: xshift yshift makopurple Tilman wikipedia Scopus Hu
+% LocalWords: Ronen Gonçalves Vespignani Hidalgo al Galician Ranjan
+% LocalWords: eigenvector Warncke Terveen Hecht underproduced NEJM
+% LocalWords: Hwang Engl doi Kräenbring WebMD WikiProject Mohsen
+% LocalWords: Jadidi Markus Strohmaier Wikipedias WPs Smurfette
+% LocalWords: Barnhisel Rapchak WikiEd Mesgari Mostafa Okoli Chitu
+% LocalWords: Mehdi Mohamad Årup Lanamäki Arto Miquel Ribé OpenSym
+% LocalWords: WikiPapers