X-Git-Url: https://projects.mako.cc/source/state_of_wikimedia_research_2015/blobdiff_plain/3fc3ed7fb7e392a6dc9441d958ada9c257d587ea..7cd2a85d1c8bc2d5189e0ad63b44756dc3b24b3d:/20150717-wikimania_research.tex diff --git a/20150717-wikimania_research.tex b/20150717-wikimania_research.tex index 967244d..8150bad 100644 --- a/20150717-wikimania_research.tex +++ b/20150717-wikimania_research.tex @@ -286,8 +286,8 @@ \item \e{2968} Wikipedia-related publications in the Scopus database as of November 2013 - \item \e{160} recent publications reviewed or mentioned in the 12 issues - of the Wikimedia Research Newsletter August 2013-July 2014. + \item \e{191} recent publications reviewed or mentioned in the 12 issues + of the Wikimedia Research Newsletter from July 2014 to June 2015. \end{itemize} \end{frame} @@ -337,12 +337,55 @@ \end{frame} +\begin{frame} + \frametitle{How to measure the global influence of languages?} + + \larger \larger + + \e{Traditional} methods rely on: + + \begin{itemize} + \larger \larger + \item \e{Population} of speakers + \item \e{Income} or political power of speakers + \end{itemize} + + Paper presents \e{new network method} based on measuring + \e{co-speakers} of languages in several data sources including + Wikipedia. + +\end{frame} + \begin{frame} \frametitle{Wikipedia as a source of data: Ronen et al.} \includegraphics[width=\textwidth]{figures/ronen_fig1.png} + + \note{Two languages are connected when users that edit an article in + one Wikipedia language edition are significantly more likely to + also edit an article in the edition of the other language. + + If an editor of Spanish is also likely to edit Galician, we'll + call those languages connected.} +\end{frame} + +\begin{frame} + \frametitle{Wikipedia as a source of data: Ronen et al.} + + \includegraphics[width=\textwidth]{figures/ronen_people.png} + + \note{\begin{itemize} + \item The number of people per language (born 1800–1950) with + articles in at least 26 Wikipedia language editions as a + function of their language’s eigenvector centrality. + \item The bottom row shows the number of people per language (born + 1800–1950) listed in \emph{Human Accomplishment} (a book by + Charles Murray) as a function of their language’s eigenvector + centrality. + \end{itemize}} \end{frame} + \subsection{Community and Organization} \begin{frame} @@ -366,13 +409,29 @@ \begin{frame} \frametitle{Community and organization: Warncke-Wang et al.} + + \larger \larger + \e{Perfect Alignment Hypothesis (PAH)}: There is an exact match + between the supply of high-quality content and the demand for it. + + \bigskip \includegraphics[width=\textwidth]{figures/warncke-english_confusion.pdf} + + \note{\e{Quality}: Stub, Start, C, B, Good Article, A, Featured Article + + \e{Popularity}: equivalently sized buckets} \end{frame} \begin{frame} \frametitle{Community and organization: Warncke-Wang et al.} + Measure of the degree of misalignment can be used to build lists of + categories that are relatively \e{``overproduced''} and + \e{``underproduced''}: + + \bigskip + \includegraphics[width=\textwidth]{figures/warncke-english_overunder.pdf} \end{frame} @@ -489,26 +548,287 @@ -% \begin{frame} -% \frametitle{Wikipedia Viewership and Flu Prediction: Results} +\begin{frame} + \centertext{6em}{Automation in Wikipedia} + + \note{Tilman + + Starting to see more practical applications of AI methods to editing. + + Bots have been writing Wikipedia articles ever since back in 2002, + User:Rambot covered US municipalities from US census data. + + Picked these two related papers for their somewhat unusual approach} +\end{frame} + + +\begin{frame} + \frametitle{Automation in Wikipedia} -% \centering -% \includegraphics[width=\textwidth]{figures/flu.png} + \larger \larger + Banerjee et al., \e{Playscript Classification and Automatic Wikipedia + Play Articles Generation}. + 2014 22nd International Conference on Pattern Recognition (ICPR). + pp. 3630–3635. + \href{http://dx.doi.org/10.1109/ICPR.2014.624} + {DOI:10.1109/ICPR.2014.624} + \href{http://www.cse.unt.edu/~ccaragea/papers/icpr14.pdf}{Author's copy} -% \note{\begin{itemize} -% \larger \larger +\end{frame} -% \item Wikipedia better than Google at predicting peak flu weeks. -% \item Wikipedia better at predicting relative influenza rates. -% \end{itemize}} -% \end{frame} +\begin{frame} + +\frametitle{Automation in Wikipedia: Bot-written theatre play articles} + + \begin{itemize} + \larger \larger \larger + \item Bot searches for playscripts and related documents on the web + \bigskip + \item Extract key information from them, e.g. + \begin{itemize} \larger + \item The play's main characters + \item Relevant sentences from online synopses of the play + \item Mentions in Google Books and Google News (as evidence that + the play satisfies Wikipedia's notability criteria) + + \end{itemize} + + \item Some heuristics to exclude non-encyclopedic sentences, e.g. + first person statements + + \end{itemize} + + \note{Tilman + + NB: Most article creation bots work from well-defined databases + (e.g. species, census data, geographical databases). + + This bots finds article topics and online references itself, + using an elaborate classifier algorithm to distinguish scripts + from non-scripts.} +\end{frame} + +\begin{frame} +\frametitle{Automation in Wikipedia: Bot-written theatre play articles} + + \includegraphics[width=0.3\textwidth]{figures/Fourteen_submission.png} + \begin{itemize} + \larger \larger \larger + \item 15 articles submitted at Articles for Creation. Two accepted + by Wikipedia editors. One of them without major changes. + + + \end{itemize} + + \note{Tilman + + Editors were unaware the articles had been automatically generated. + + + Related paper by some of the same authors: + + Banerjee et al., \e{WikiKreator: Improving Wikipedia Stubs + Automatically}. \href{https://siddbanpsu.github.io/publications/acl2015-banerjee-preprint.pdf} + {preprint}, accepted paper at ACL2015 + + Elaborate classifier method to find suitable web resources for + expanding stubs - but copying sentences wholesale from these into + articles landed the bot (User:MightyPepper) in a \href{https://en.wikipedia.org/wiki/Wikipedia:Contributor_copyright_investigations/Archive#2015}{contributor copyright investigation}\ldots + } +\end{frame} + + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Gender on Wikipedia} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} + \centertext{6em}{Gender Beyond the Gap} + + + \note{Aaron: + + Research focused on understanding gender dynamics in Wikipedia + and their impact is another area of research that has continued + to expand this year. A number of high quality papers came out, + several of which analyzed how gender figures in the content of + the encyclopedias. + } + \end{frame} + + \begin{frame} + \frametitle{It's a Man's Wikipedia?} + \larger \larger Wagner, Claudia; David Garcia; Mohsen Jadidi; and Markus +Strohmaier. 2015. \href{https://www.aaai.org/ocs/index.php/ICWSM/ICWSM15/paper/view/10585}{``\e{It's a Man's Wikipedia? Assessing + Gender Inequality in an Online Encyclopedia}.''} Ninth International +AAAI Conference on Web and Social Media (ICWSM). + + \end{frame} + + \begin{frame} + + \frametitle{It's a Man's Wikipedia: Motivation} + + \begin{itemize} + \larger \larger \larger + \item We know there's a gender gap. + \item Need for more multidimensional analysis of \e{how gender is + represented in content of articles across Wikipedias}. + \end{itemize} + + \note{ +} + \end{frame} + + \begin{frame} + + \frametitle{It's a Man's Wikipedia: Methods} + + \begin{itemize} + \larger \larger \larger + \item Use data from three sources (Freebase, ``Human + Accomplishment,'' and Pantheon) as baselines for comparison with + six Wikipedias (EN, ES, DE, FR, IT, RU). + \item Examine multiple potential forms of bias: coverage, + structure, lexical characteristics, visibility. + \end{itemize} + + \end{frame} + + \begin{frame} + \frametitle{It's a Man's Wikipedia: Results} + + \centering + \visible<2->{ + \includegraphics[width=\textwidth]{figures/mans_wikipedia_fig.pdf} + } + \note{ + \e{Some key findings:} + \begin{itemize} + \larger \larger + \item 1: Coverage of women (\# articles, length) in WPs is generally + better than other sources. + \item 2: Articles about women tend to be less centrally connected in + the network of articles than articles about men (Smurfette!) + \item 3: (\e{viz}) Content of articles about women uses different words + than those about men. Much higher incidence of language related to + family, gender, and relationships. + \end{itemize} +} + \end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Using Wikipedia in Education} % TODO Fix title +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{frame} + \centertext{6em}{Adopting Wikipedia as a Teaching Tool} + + \note{Aaron: + + Research focused on understanding how Wikipedia and related + resources are adopted for classroom teaching. Growing area of + work, still somewhat preliminary findings. Nonetheless, some of + the papers in this domain make for entertaining reading... + } + \end{frame} + + \begin{frame} + \frametitle{WP and the Wisdom of Crowds} + + \larger \larger Barnhisel, Greg and Marcia + Rapchak. 2014. \href{http://ur1.ca/n5919}{``\e{Wikipedia and the Wisdom of Crowds: A Student + Project}.''} Communications in Information Literacy 8(1): + 145-159. doi:10.7548/cil.v8i1.249. +\end{frame} + + +\begin{frame} + \frametitle{WP and the Wisdom of Crowds: Motivation} + \begin{itemize} + \larger \larger + \item Students use Wikipedia uncritically. Don't understand how low + quality much of the information may be or how it may be + manipulated. + \item Professor (author) believes that WP is full of dubious + information. Wants to unmask that for his students. + \item Through more in-depth exposure, students may understand the + limitations of collaborative, open systems of knowledge production. + \end{itemize} +\end{frame} + + +\begin{frame} + \frametitle{WP and the Wisdom of Crowds: Methods} + + \begin{itemize} + \larger \larger + \item Require a Senior (college) composition class to work on + editing WP articles (together and individually) throughout the + semester. + \item Incorporate assignments to help students learn about the + history of WP as well as how to use it. + \item Require students to reflect on their experiences in writing. + \item Require students to analyze the pros/cons of open + collaborative writing in their final projects. + \end{itemize} + +\note{ + This is all sort of fabulously in-line with exactly what the WikiEd + Foundation recommends instructors do (!). +} +\end{frame} + + +\begin{frame} + \frametitle{WP and the Wisdom of Crowds: Results} + +\centering +\begin{quote} + \e{Both sources [crowds and experts] have different merits... My + life experience since class pulls me in favor of the wisdom of the + crowd. In my recent studies, I have found that I can learn much + more from a group of my peers than from a single expert.\\ \hfill + --- Student 1} +\end{quote} + +\note{ Not exactly what the instructor expected. Essentially, both he + and the students came away with much more nuanced, and positive, + views of the relative merits, possibilities, and limitations of open + collaborative knowledge production. A happy ending :) } +\end{frame} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Conclusion} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Meta-Analyses} + +\begin{frame} + + \frametitle{Meta-Analyses} + + \begin{itemize} + + \larger \larger + \item Mesgari, Mostafa and Okoli, Chitu and Mehdi, Mohamad and Nielsen, Finn Årup and Lanamäki, Arto. 2014. \href{http://spectrum.library.concordia.ca/978652/}{``The sum of all human knowledge": A systematic review of scholarly research on the content of Wikipedia''}. Journal of the Association for Information Science and Technology. + + \item Miquel-Ribé, + Marc. 2015. \href{https://www.aaai.org/ocs/index.php/ICWSM/ICWSM15/paper/view/10645}{``User + Engagement on Wikipedia, A Review of Studies of Readers and + Editors.''} Ninth International AAAI Conference on Web and + Social Media (ICWSM). + + \end{itemize} + +\end{frame} + %% SLIDE: Other Resources %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{More Resources} @@ -541,35 +861,15 @@ \end{frame} -\subsection{Meta-Analyses} - -\begin{frame} - - \frametitle{Meta-Analyses} - - \begin{itemize} - - \larger \larger - - \item Okoli et al., - \href{https://spectrum.library.concordia.ca/978618/}{``The sum of - all human knowledge'': a systematic review of scholarly research - on the content of Wikipedia}. - - \item Bar-Ilan and Aharony, - \href{http://dl.acm.org/citation.cfm?doid=2615569.2615643}{Twelve - years of Wikipedia research}. - - \item Taraborelli. \href{https://meta.wikimedia.org/wiki/Research:Newsletter/2013/August\#Keynote\_on\_applicable\_Wikipedia\_research}{Keynote - on Wikipedia Research}. OpenSym 2013. Hong Kong. - - \item Benkler, Shaw, and Hill, - \href{http://mako.cc/academic/benkler\_shaw\_hill-peer\_production\_ci.pdf}{Peer - Production: A Modality of Collective Intelligence}. - - \end{itemize} - -\end{frame} \end{document} + +% LocalWords: xshift yshift makopurple Tilman wikipedia Scopus Hu +% LocalWords: Ronen Gonçalves Vespignani Hidalgo al Galician Ranjan +% LocalWords: eigenvector Warncke Terveen Hecht underproduced NEJM +% LocalWords: Hwang Engl doi Kräenbring WebMD WikiProject Mohsen +% LocalWords: Jadidi Markus Strohmaier Wikipedias WPs Smurfette +% LocalWords: Barnhisel Rapchak WikiEd Mesgari Mostafa Okoli Chitu +% LocalWords: Mehdi Mohamad Årup Lanamäki Arto Miquel Ribé OpenSym +% LocalWords: WikiPapers