X-Git-Url: https://projects.mako.cc/source/state_of_wikimedia_research_2015/blobdiff_plain/cf79c51c1cffe95eed4fe82aa967fb08f992cf5a..7cd2a85d1c8bc2d5189e0ad63b44756dc3b24b3d:/20150717-wikimania_research.tex diff --git a/20150717-wikimania_research.tex b/20150717-wikimania_research.tex index 6da9b9b..8150bad 100644 --- a/20150717-wikimania_research.tex +++ b/20150717-wikimania_research.tex @@ -286,8 +286,8 @@ \item \e{2968} Wikipedia-related publications in the Scopus database as of November 2013 - \item \e{160} recent publications reviewed or mentioned in the 12 issues - of the Wikimedia Research Newsletter August 2013-July 2014. + \item \e{191} recent publications reviewed or mentioned in the 12 issues + of the Wikimedia Research Newsletter from July 2014 to June 2015. \end{itemize} \end{frame} @@ -319,80 +319,516 @@ \section{Paper Summaries} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% \subsection{Event Prediction} +\begin{frame} + \centertext{6em}{Wikipedia as a Source of Data} + + \note{Mako} +\end{frame} + +\begin{frame} + + \frametitle{Wikipedia as a source of data} + + \larger \larger Ronen, S., Gonçalves, B., Hu, K. Z., Vespignani, A., + Pinker, S., \& Hidalgo, C. A. (2014). \e{Links that speak: The + global language network and its association with global + fame}. Proceedings of the National Academy of Sciences, 111(52), + E5616—E5622. \href{http://doi.org/10.1073/pnas.1410931111}{doi:10.1073/pnas.1410931111} + +\end{frame} + +\begin{frame} + \frametitle{How to measure the global influence of languages?} + + \larger \larger + + \e{Traditional} methods rely on: + + \begin{itemize} + \larger \larger + \item \e{Population} of speakers + \item \e{Income} or political power of speakers + \end{itemize} + + Paper presents \e{new network method} based on measuring + \e{co-speakers} of languages in several data sources including + Wikipedia. + +\end{frame} + +\begin{frame} + \frametitle{Wikipedia as a source of data: Ronen et al.} + + \includegraphics[width=\textwidth]{figures/ronen_fig1.png} + + \note{Two languages are connected when users that edit an article in + one Wikipedia language edition are significantly more likely to + also edit an article in the edition of the other language. + + If an editor of Spanish is also likely to edit Galician, we'll + call those languages connected.} +\end{frame} + +\begin{frame} + \frametitle{Wikipedia as a source of data: Ronen et al.} + + \includegraphics[width=\textwidth]{figures/ronen_people.png} + + \note{\begin{itemize} + \item The number of people per language (born 1800–1950) with + articles in at least 26 Wikipedia language editions as a + function of their language’s eigenvector centrality. + \item The bottom row shows the number of people per language (born + 1800–1950) listed in \emph{Human Accomplishment} (a book by + Charles Murray) as a function of their language’s eigenvector + centrality. + \end{itemize}} +\end{frame} + + +\subsection{Community and Organization} + +\begin{frame} + \centertext{6em}{Community and Organization} + + \note{Mako} +\end{frame} + +\begin{frame} + + \frametitle{Community and organization} + + \larger \larger Warncke-Wang, M., Ranjan, V., Terveen, L., \& Hecht, + B. (2015). \e{Misalignment Between Supply and Demand of Quality Content + in Peer Production Communities}. In Ninth International AAAI + Conference on Web and Social Media (ICWSM). + + % Retrieved from \href{http://www.aaai.org/ocs/index.php/ICWSM/ICWSM15/paper/view/10591}{http://www.aaai.org/ocs/index.php/ICWSM/ICWSM15/paper/view/10591} + +\end{frame} + +\begin{frame} + \frametitle{Community and organization: Warncke-Wang et al.} + + \larger \larger + \e{Perfect Alignment Hypothesis (PAH)}: There is an exact match + between the supply of high-quality content and the demand for it. + + \bigskip + + \includegraphics[width=\textwidth]{figures/warncke-english_confusion.pdf} + + \note{\e{Quality}: Stub, Start, C, B, Good Article, A, Featured Article + + \e{Popularity}: equivalently sized buckets} +\end{frame} + +\begin{frame} + \frametitle{Community and organization: Warncke-Wang et al.} -% \begin{frame} -% \centertext{6em}{Event Prediction} + Measure of the degree of misalignment can be used to build lists of + categories that are relatively \e{``overproduced''} and + \e{``underproduced''}: -% \note{Mako + \bigskip -% This was the year that studies of readership of Wikipedia really -% blossomed. People figured out how to use the view data. Much of -% what they used it for was prediction.} -% \end{frame} + \includegraphics[width=\textwidth]{figures/warncke-english_overunder.pdf} +\end{frame} + +\subsection{Content Quality} -% \begin{frame} +\begin{frame} + \centertext{6em}{Content quality} + + \note{Tilman + + A decade after the landmark "Nature" study, there still aren't too + many systematic evaluations of the accuracy of Wikipedia's content. + Health articles continue to receive scrutiny, though. With good + reason: Wikipedia is "the most frequently consulted online health + care resource globally" [NEJM article].} +\end{frame} + +\begin{frame} -% \frametitle{Wikipedia Viewership and Flu Prediction} +\frametitle{Quality of drug articles} + + \larger \larger + Hwang et al., ``\e{Drug Safety in the Digital Age}.'' + N Engl J Med 2014; 370:2460-2462 June 26, 2014 + \href{http://dx.doi.org/10.1056/NEJMp1401767}{doi: 10.1056/NEJMp1401767}. + \bigskip + + Kräenbring et al., \e{Accuracy and completeness of drug + information in Wikipedia: a comparison with standard textbooks of + pharmacology}. PLoS One 9 (9): e106930. + \href{http://dx.doi.org/10.1371/journal.pone.0106930} + {doi:10.1371/journal.pone.0106930} + + + \note{Tilman + + We selected two papers that evaluated drug articles, with + different approaches. The first one is a short article in the + extremely prestigious NEJM.} +\end{frame} -% \larger \larger McIver, David J., and John -% S. Brownstein. ``\e{Wikipedia Usage Estimates Prevalence of -% Influenza-Like Illness in the United States in Near Real-Time}.'' -% PLoS Comput Biol 10, no. 4 (April 17, 2014): -% e1003581. \href{http://dx.doi.org/10.1371/journal.pcbi.1003581}{doi:10.1371/journal.pcbi.1003581}. +\begin{frame} + +\frametitle{Quality of drug articles: NEJM} -% \end{frame} + \includegraphics[width=0.49\textwidth]{figures/Pradaxa_tweet_FDAMedWach.png} + % from https://twitter.com/FDAMedWatch/status/281547908095041536 + % = first one in the list at http://www.nejm.org/doi/suppl/10.1056/NEJMp1401767/suppl_file/nejmp1401767_appendix.pdf + \includegraphics[width=0.49\textwidth]{figures/Dabitragan_Contraindications_WP_FDA_warning} + + \tikz{\node [yshift=1.5cm,xshift=-0.4cm] at (current page.center) {\includegraphics[width=1.5cm]{figures/long-arrow-right.png}};} + \begin{itemize} + \larger \larger + \item The US Food and Drug Administration (\e{FDA}) frequently + issues safety warnings about prescription drugs. How long does it + take until these are reflected on English Wikipedia? + \item 41\% updated within two weeks (58\% for high-prevalent + diseases), but 36\% still unchanged after more than a year. + \end{itemize} + + \note{Tilman + + Articles about drugs used to treat high-prevalent diseases (affecting + > 1 m Americans / year) were updated faster.\\ + But the result still caused concern.\\ + Authors find "there may be a benefit to enabling the FDA to update or + automatically feed new safety communications to Wikipedia pages, as + it does with WebMD". The paper raised awareness among WikiProject + Medicine editors, but there's no systematic updating mechanism yet.} -% \begin{frame} -% \frametitle{Wikipedia Viewership and Flu Prediction: Motivation} +\end{frame} -% \begin{itemize} -% \larger \larger -% \item \e{Google Flu Trends} uses search engine queries to try to -% predict influenza epidemics more quickly than traditional methods. -% \item ..but it has been criticized as being biased (e.g., by media coverage). -% \item WP is freely available and viewership data is free, unlike -% Google which is proprietary. +\begin{frame} -% \end{itemize} +\frametitle{Quality of drug articles: PLoS One} -% \note{2009 H1N1 Swine Flu broke GFT.} -% \end{frame} + \begin{itemize} + \larger \larger \larger + \item Selected 100 drugs from German undergrad curriculum in pharmacology + \item Extracted information from two standard textbooks + \item "Accuracy of drug information in [German] Wikipedia was 99.7\%±0.2\% when compared to the textbook data." Similar results for English Wikipedia + \end{itemize} + +\end{frame} + + +\begin{frame} + +\frametitle{Quality of drug articles: PLoS One} + + \begin{itemize} + \larger \larger \larger + \item Completeness (as compared to the textbooks): + \begin{itemize} \larger \larger + \item 83.8\% (of 224 statements) for German WP + \item 87.2\% for English WP + \end{itemize} + \item Completeness of contraindications information was 100\% in the En WP sample. + \item English WP cited academic publications more often than German WP. + \item Quality "significantly improved" in drug articles assessed + in a 2010 study. + \end{itemize} + + \note{Tilman + + The majority of the missing information (62.5\%) on German WP + was judged non-relevant for undergrad students. + + The result on completeness of contraindications information is + somewhat in contrast with the NEJM study. Then again, the + textbooks were probably not perfectly up-to-date either.} +\end{frame} -% \begin{frame} -% \frametitle{Wikipedia Viewership and Flu Prediction: Methods} -% \begin{itemize} -% \larger \larger \larger -% \item Measure traffic to flu related articles on Wikipedia -% \item Compare to the ``gold standard'' data from the Center for -% Disease Control (CDC) -% \end{itemize} +\begin{frame} + \centertext{6em}{Automation in Wikipedia} -% \end{frame} + \note{Tilman + + Starting to see more practical applications of AI methods to editing. -% \begin{frame} -% \frametitle{Wikipedia Viewership and Flu Prediction: Results} + Bots have been writing Wikipedia articles ever since back in 2002, + User:Rambot covered US municipalities from US census data. + + Picked these two related papers for their somewhat unusual approach} +\end{frame} -% \centering -% \includegraphics[width=\textwidth]{figures/flu.png} -% \note{\begin{itemize} -% \larger \larger +\begin{frame} + \frametitle{Automation in Wikipedia} -% \item Wikipedia better than Google at predicting peak flu weeks. -% \item Wikipedia better at predicting relative influenza rates. -% \end{itemize}} + \larger \larger + Banerjee et al., \e{Playscript Classification and Automatic Wikipedia + Play Articles Generation}. + 2014 22nd International Conference on Pattern Recognition (ICPR). + pp. 3630–3635. + \href{http://dx.doi.org/10.1109/ICPR.2014.624} + {DOI:10.1109/ICPR.2014.624} + \href{http://www.cse.unt.edu/~ccaragea/papers/icpr14.pdf}{Author's copy} + +\end{frame} + + +\begin{frame} + +\frametitle{Automation in Wikipedia: Bot-written theatre play articles} + + \begin{itemize} + \larger \larger \larger + \item Bot searches for playscripts and related documents on the web + \bigskip + \item Extract key information from them, e.g. + \begin{itemize} \larger + \item The play's main characters + \item Relevant sentences from online synopses of the play + \item Mentions in Google Books and Google News (as evidence that + the play satisfies Wikipedia's notability criteria) + + \end{itemize} + + \item Some heuristics to exclude non-encyclopedic sentences, e.g. + first person statements + + \end{itemize} + + \note{Tilman + + NB: Most article creation bots work from well-defined databases + (e.g. species, census data, geographical databases). + + This bots finds article topics and online references itself, + using an elaborate classifier algorithm to distinguish scripts + from non-scripts.} +\end{frame} + +\begin{frame} +\frametitle{Automation in Wikipedia: Bot-written theatre play articles} + + \includegraphics[width=0.3\textwidth]{figures/Fourteen_submission.png} + \begin{itemize} + \larger \larger \larger + \item 15 articles submitted at Articles for Creation. Two accepted + by Wikipedia editors. One of them without major changes. + + + \end{itemize} + + \note{Tilman + + Editors were unaware the articles had been automatically generated. + + + Related paper by some of the same authors: + + Banerjee et al., \e{WikiKreator: Improving Wikipedia Stubs + Automatically}. \href{https://siddbanpsu.github.io/publications/acl2015-banerjee-preprint.pdf} + {preprint}, accepted paper at ACL2015 + + Elaborate classifier method to find suitable web resources for + expanding stubs - but copying sentences wholesale from these into + articles landed the bot (User:MightyPepper) in a \href{https://en.wikipedia.org/wiki/Wikipedia:Contributor_copyright_investigations/Archive#2015}{contributor copyright investigation}\ldots + } +\end{frame} + + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Gender on Wikipedia} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} + \centertext{6em}{Gender Beyond the Gap} + + + \note{Aaron: + + Research focused on understanding gender dynamics in Wikipedia + and their impact is another area of research that has continued + to expand this year. A number of high quality papers came out, + several of which analyzed how gender figures in the content of + the encyclopedias. + } + \end{frame} + + \begin{frame} + \frametitle{It's a Man's Wikipedia?} + \larger \larger Wagner, Claudia; David Garcia; Mohsen Jadidi; and Markus +Strohmaier. 2015. \href{https://www.aaai.org/ocs/index.php/ICWSM/ICWSM15/paper/view/10585}{``\e{It's a Man's Wikipedia? Assessing + Gender Inequality in an Online Encyclopedia}.''} Ninth International +AAAI Conference on Web and Social Media (ICWSM). + + \end{frame} + + \begin{frame} + + \frametitle{It's a Man's Wikipedia: Motivation} + + \begin{itemize} + \larger \larger \larger + \item We know there's a gender gap. + \item Need for more multidimensional analysis of \e{how gender is + represented in content of articles across Wikipedias}. + \end{itemize} + + \note{ +} + \end{frame} + + \begin{frame} + + \frametitle{It's a Man's Wikipedia: Methods} + + \begin{itemize} + \larger \larger \larger + \item Use data from three sources (Freebase, ``Human + Accomplishment,'' and Pantheon) as baselines for comparison with + six Wikipedias (EN, ES, DE, FR, IT, RU). + \item Examine multiple potential forms of bias: coverage, + structure, lexical characteristics, visibility. + \end{itemize} + + \end{frame} + + \begin{frame} + \frametitle{It's a Man's Wikipedia: Results} + + \centering + \visible<2->{ + \includegraphics[width=\textwidth]{figures/mans_wikipedia_fig.pdf} + } + \note{ + \e{Some key findings:} + \begin{itemize} + \larger \larger + \item 1: Coverage of women (\# articles, length) in WPs is generally + better than other sources. + \item 2: Articles about women tend to be less centrally connected in + the network of articles than articles about men (Smurfette!) + \item 3: (\e{viz}) Content of articles about women uses different words + than those about men. Much higher incidence of language related to + family, gender, and relationships. + \end{itemize} +} + \end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Using Wikipedia in Education} % TODO Fix title +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} + \centertext{6em}{Adopting Wikipedia as a Teaching Tool} + + \note{Aaron: + + Research focused on understanding how Wikipedia and related + resources are adopted for classroom teaching. Growing area of + work, still somewhat preliminary findings. Nonetheless, some of + the papers in this domain make for entertaining reading... + } + \end{frame} + + \begin{frame} + \frametitle{WP and the Wisdom of Crowds} + + \larger \larger Barnhisel, Greg and Marcia + Rapchak. 2014. \href{http://ur1.ca/n5919}{``\e{Wikipedia and the Wisdom of Crowds: A Student + Project}.''} Communications in Information Literacy 8(1): + 145-159. doi:10.7548/cil.v8i1.249. +\end{frame} + + +\begin{frame} + \frametitle{WP and the Wisdom of Crowds: Motivation} + \begin{itemize} + \larger \larger + \item Students use Wikipedia uncritically. Don't understand how low + quality much of the information may be or how it may be + manipulated. + \item Professor (author) believes that WP is full of dubious + information. Wants to unmask that for his students. + \item Through more in-depth exposure, students may understand the + limitations of collaborative, open systems of knowledge production. + \end{itemize} +\end{frame} + + +\begin{frame} + \frametitle{WP and the Wisdom of Crowds: Methods} + + \begin{itemize} + \larger \larger + \item Require a Senior (college) composition class to work on + editing WP articles (together and individually) throughout the + semester. + \item Incorporate assignments to help students learn about the + history of WP as well as how to use it. + \item Require students to reflect on their experiences in writing. + \item Require students to analyze the pros/cons of open + collaborative writing in their final projects. + \end{itemize} + +\note{ + This is all sort of fabulously in-line with exactly what the WikiEd + Foundation recommends instructors do (!). +} +\end{frame} + + +\begin{frame} + \frametitle{WP and the Wisdom of Crowds: Results} + +\centering +\begin{quote} + \e{Both sources [crowds and experts] have different merits... My + life experience since class pulls me in favor of the wisdom of the + crowd. In my recent studies, I have found that I can learn much + more from a group of my peers than from a single expert.\\ \hfill + --- Student 1} +\end{quote} + +\note{ Not exactly what the instructor expected. Essentially, both he + and the students came away with much more nuanced, and positive, + views of the relative merits, possibilities, and limitations of open + collaborative knowledge production. A happy ending :) } +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Conclusion} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Meta-Analyses} + +\begin{frame} + + \frametitle{Meta-Analyses} + + \begin{itemize} + + \larger \larger + \item Mesgari, Mostafa and Okoli, Chitu and Mehdi, Mohamad and Nielsen, Finn Årup and Lanamäki, Arto. 2014. \href{http://spectrum.library.concordia.ca/978652/}{``The sum of all human knowledge": A systematic review of scholarly research on the content of Wikipedia''}. Journal of the Association for Information Science and Technology. + + \item Miquel-Ribé, + Marc. 2015. \href{https://www.aaai.org/ocs/index.php/ICWSM/ICWSM15/paper/view/10645}{``User + Engagement on Wikipedia, A Review of Studies of Readers and + Editors.''} Ninth International AAAI Conference on Web and + Social Media (ICWSM). + + \end{itemize} + +\end{frame} + %% SLIDE: Other Resources %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{More Resources} @@ -425,35 +861,15 @@ \end{frame} -\subsection{Meta-Analyses} - -\begin{frame} - - \frametitle{Meta-Analyses} - - \begin{itemize} - - \larger \larger - - \item Okoli et al., - \href{https://spectrum.library.concordia.ca/978618/}{``The sum of - all human knowledge'': a systematic review of scholarly research - on the content of Wikipedia}. - - \item Bar-Ilan and Aharony, - \href{http://dl.acm.org/citation.cfm?doid=2615569.2615643}{Twelve - years of Wikipedia research}. - - \item Taraborelli. \href{https://meta.wikimedia.org/wiki/Research:Newsletter/2013/August\#Keynote\_on\_applicable\_Wikipedia\_research}{Keynote - on Wikipedia Research}. OpenSym 2013. Hong Kong. - - \item Benkler, Shaw, and Hill, - \href{http://mako.cc/academic/benkler\_shaw\_hill-peer\_production\_ci.pdf}{Peer - Production: A Modality of Collective Intelligence}. - - \end{itemize} - -\end{frame} \end{document} + +% LocalWords: xshift yshift makopurple Tilman wikipedia Scopus Hu +% LocalWords: Ronen Gonçalves Vespignani Hidalgo al Galician Ranjan +% LocalWords: eigenvector Warncke Terveen Hecht underproduced NEJM +% LocalWords: Hwang Engl doi Kräenbring WebMD WikiProject Mohsen +% LocalWords: Jadidi Markus Strohmaier Wikipedias WPs Smurfette +% LocalWords: Barnhisel Rapchak WikiEd Mesgari Mostafa Okoli Chitu +% LocalWords: Mehdi Mohamad Årup Lanamäki Arto Miquel Ribé OpenSym +% LocalWords: WikiPapers