From: Benjamin Mako Hill Date: Fri, 8 Aug 2014 14:17:51 +0000 (-0700) Subject: imported the talk given at wikimania X-Git-Url: https://projects.mako.cc/source/state_of_wikimedia_research_2014/commitdiff_plain/72fa64cae2d562a0d23de1a60d048a5cd44814ee?ds=sidebyside imported the talk given at wikimania --- 72fa64cae2d562a0d23de1a60d048a5cd44814ee diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..852b108 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +auto/ +/*.aux +/*.log +/*.nav +/*.out +/*.pdf +/*.snm +/*.toc +/notes.config +/vc diff --git a/20140808-wikimania_research.tex b/20140808-wikimania_research.tex new file mode 100644 index 0000000..4ac3a26 --- /dev/null +++ b/20140808-wikimania_research.tex @@ -0,0 +1,921 @@ +\documentclass[xcolor=dvipsnames]{beamer} + +% set up the file to create notes in the output PDFs +\usepackage{pgfpages} +\input{notes.config} + +\renewcommand{\rmdefault}{ugm} +\usepackage[garamond]{mathdesign} + +\renewcommand{\sfdefault}{phv} +\usepackage{relsize} + +\usepackage{ucs} +\usepackage[utf8x]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{textcomp} + +% add tikz and a bunch of tikz foo +\usepackage{tikz} +\usetikzlibrary{shapes,shapes.misc,backgrounds,fit,positioning} +\tikzstyle{every picture}+=[overlay,remember picture] + +% add functions to circle parts of slides (e.g., in tables) +\newcommand\marktopleft[1]{% + \tikz[overlay,remember picture] + \node (marker-#1-a) at (0,1.5ex) {};% +} +\newcommand\markbottomright[1]{% + \tikz[overlay,remember picture] + \node (marker-#1-b) at (0,0) {};% + \tikz[overlay,remember picture,dashed,inner sep=3pt] + \node[violet!75,ultra thick,draw,rounded rectangle,fit=(marker-#1-a.center) (marker-#1-b.center)] {};% +} + +% DEPRECATED function to build a huge centered dropshadow +\newcommand\dropshadow[3]{% + \node[black!30!white] at (#1+0.1,#2-0.1) { + \scalebox{2}{\Huge \textbf{#3}} + }; + \node at (#1,#2) { + \scalebox{2}{\Huge \e{#3}} + }; +} + +% create an empty quotetxt so we can reuse it +\newcommand{\quotetxt}{} + +% more flexible non-tikz alternative with no dropshadow +\newlength{\centertxtlen} +\makeatletter +\newcommand\centertext[2]{% + \setlength{\centertxtlen}{#1}% + \setlength{\centertxtlen}{0.48\centertxtlen}% + {\centering + \fontsize{#1}{2\centertxtlen}\selectfont + \e{#2} + + } +} + +% add function to stop numbering appendix slides +\newcommand{\backupbegin}{ + \newcounter{framenumberappendix} + \setcounter{framenumberappendix}{\value{framenumber}} +} +\newcommand{\backupend}{ + \addtocounter{framenumberappendix}{-\value{framenumber}} + \addtocounter{framenumber}{\value{framenumberappendix}} +} + +% packages i use in essentially every document +\usepackage{graphicx} +\usepackage{url} +% \usepackage{dcolumn} +% \usepackage{booktabs} + +% replace footnotes with symbols instead of numbers +\renewcommand*{\thefootnote}{\fnsymbol{footnote}} +\usepackage{perpage} +\MakePerPage{footnote} + +%% BEAMER THEME STUFF +\usetheme[pageofpages=/,% String used between the current page and the + % total page count. + bullet=default,% Use circles instead of squares for bullets. + titleline=false,% Show a line below the frame title. + alternativetitlepage=true,% Use the fancy title page. + titlepagelogo=figures/logo.pdf,% Logo for the first page. + %watermark=watermark-polito,% Watermark used in every page. + watermarkheight=100px,% Height of the watermark. + watermarkheightmult=4,% The watermark image is 4 times bigger + % than watermarkheight. + ]{Torino} + +\usecolortheme{mako} +\useinnertheme{rectangles} +%\setbeamertemplate{blocks}[rounded][] +\setbeamercolor{block title}{bg=makopurple3, fg=White} + +\setbeamertemplate{items}[default] +\setbeamertemplate{blocks}[shadow=true] + +\usepackage{tcolorbox} +% These options will be applied to all `tcolorboxes` +\tcbset{% + noparskip, + colback=makopurple5, %background color of the box + colframe=makopurple1, %color of frame and title background + coltext=black, %color of body text + coltitle=white, %color of title text + arc=0em, + left=0.1em, + right=0.1em, + fonttitle=\bfseries, + alerted/.style={coltitle=red, + colframe=gray!40}, + example/.style={coltitle=black, + colframe=green!20, + colback=green!5}, + } + + +%\useoutertheme{infolines} + + +\hypersetup{colorlinks=true, linkcolor=Black, citecolor=Black, filecolor=makopurple1, + urlcolor=Plum, unicode=true} + +% create a boldface version of the header +\setbeamerfont{frametitle}{series=\bfseries} +\setbeamerfont{title}{series=\bfseries} + +% tweak the beamer font to make it a bit lists a bit smaller +\setbeamerfont*{itemize/enumerate body}{size=\small} +\setbeamerfont*{itemize/enumerate subbody}{size=\footnotesize} +\setbeamerfont*{itemize/enumerate subsubbody}{size=\footnotesize} + +% indent the margins of the itemize lists a little bit +\setlength{\leftmargin}{0pt} +\setlength{\leftmargini}{0.7cm} +\setlength{\leftmarginii}{0.7cm} + +% create a new \e{} command to make things purple and bold +\newcommand{\e}[1]{\textcolor{makopurple1}{\textbf{#1}}} + +% remove the nagivation symbols +\setbeamertemplate{navigation symbols}{} + +\title{Presentation Title} +% \subtitle{Presentation Subtitle} +\author[Benj. Mako Hill]{\textbf{Benjamin Mako Hill}\\ mako@mit.edu} + +\institute[MIT/Harvard]{\textbf{Massachusetts Institute of Technology}\\ + Sloan School of Management\\ + MIT Media Lab\\ + \hspace{1pt}\\ + \textbf{Harvard University}\\ + Berkman Center for Internet and Society} + +\date{December 2, 1980} + +\begin{document} + +% remove some of the space in the itemize to make it quite compact +\let\olditemize\itemize +\renewcommand\itemize{\olditemize\itemsep-1pt} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Introduction} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% SLIDE: Title Slide +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}[plain] + + \begin{tikzpicture} + \node at (current page.center) [xshift=-3.5cm, yshift=0.5cm, opacity=0.4] + {\includegraphics[height=\paperheight]{figures/wikimedia_projects.png}}; + + + \node at (current page.south east) + [anchor=south east,text width=1.8\paperwidth,align=right,color=black] + { + {\spaceskip 0.3em% + \fontsize{2.5em}{2.5em} + \selectfont {\bf \color{makopurple4} The State of Wikimedia\\ + Research: 2013-2014} \par} + + \vspace{1em} + + {\spaceskip 0.3em% + \fontsize{2.0em}{2.1em} + \selectfont {\bf \color{black} Benjamin Mako Hill\\ + Tilman Bayer\\ + Aaron Shaw\\ + Wikimania 2014, London\\ + August 8, 2014} \par} + + }; + \end{tikzpicture} + + \input{vc} + + \tikz[overlay,shift=(current page.south west)]{\node [xshift=5.6em,yshift=0.5em]{\colorbox{makopurple1}{\color{white} \tt \smaller \smaller \smaller revision:\ \VCRevision\ (\VCDateTEX)}};} + + \note{I've been doing this for many years. I started in 2008 and + have done this almost every single year since. + + This began as an excuse for me to make sure I was up to date on + Wikimedia Research.} + +\end{frame} + +%% SLIDE: Anecdote from Wikimania 2008 +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\renewcommand{\quotetxt}{``This talk will try to [provide] a quick + tour – a literature review in the scholarly parlance – of the last + year's academic landscape around Wikimedia and its projects geared + at non-academic editors and readers. It will try to categorize, + distill, and describe, from a birds eye view, the academic landscape + as it is shaping up around + our project.''\\ + \hfill – \e{From my Wikimania 2008 Submission}} + +\begin{frame} + + {\smaller \quotetxt} + + \pause + \includegraphics[width=\textwidth]{figures/google_scholar_result.png} + + \pause + \tikz{\draw (current page.center) [xshift=-2.1cm, yshift=0.9cm, color=red] + ellipse (1.5cm and 0.5cm);} + + \note<1>{Back in Wikimania 2008, I set out to run a session at + Wikimania that would provide a comprehensive literature review of + articles in Wikipedia published in the last year. + + \begin{quote} + \quotetxt + \end{quote} + + Then, about two weeks before Wikimania, I did the scholar search + so I could build the literature.} + + \note<2->{I tried to import the whole list into Zotero and managed + to get banned for abusing the Google Scholar because they thought + that no human being could realistically consume the amount of + material published on Wikipedia that year. + + So anyway, I had a 45 minute talk so it worked out to 3.45 seconds + to per paper... + + And believe it or not, this year is even bigger. + + And my talk is even shorter.} + +\end{frame} + +%% SLIDE: Citations Per Year +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame} + + \includegraphics[width=\textwidth]{figures/citations_by_year.pdf} + + \centering + + {\smaller \emph{Number of citation, per year, with the term + “wikipedia” in the title.\\ + (Source: Google scholar results. Accessed: 2013-08-06)}} + + \note{Academics have written \e{a lot} of papers about + Wikipedia. There are more than 500 papers published about + Wikipedia each year and although we've reached and moved past a + peak it seems, it's not slowing by much.} + +\end{frame} + +\begin{frame} + + \begin{itemize} + \larger \larger + \item \e{2968} Wikipedia-related publications in the Scopus database + as of November 2013 + + \item \e{160} recent publications reviewed or mentioned in the 12 issues + of the Wikimedia Research Newsletter August 2013-July 2014. + \end{itemize} + +\end{frame} + +%% SLIDE: My Scope Conditions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame} + + \includegraphics[width=\textwidth]{figures/multiple_issues.png} + + \larger \larger + In selecting papers for this session, the goal is always to choose + examples of work that: + + \begin{itemize} + \larger \larger + \item Represent \e{important themes} from Wikipedia in the last year. + \item Research that is likely to be of \e{interest} to Wikimedians. + \item Research by people who are \e{not at Wikimania}. + \end{itemize} + + \note{This is my disclaimer slide... + + Within these goals, the selections are \e{incomplete}, and \e{wrong}.} +\end{frame} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Paper Summaries} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\subsection{Event Prediction} + +\begin{frame} + \centertext{6em}{Event Prediction} + + \note{Mako + + This was the year that studies of readership of Wikipedia really + blossomed. People figured out how to use the view data. Much of + what they used it for was prediction.} +\end{frame} + +\begin{frame} + + \frametitle{Wikipedia Viewership and Flu Prediction} + + \larger \larger McIver, David J., and John + S. Brownstein. ``\e{Wikipedia Usage Estimates Prevalence of + Influenza-Like Illness in the United States in Near Real-Time}.'' + PLoS Comput Biol 10, no. 4 (April 17, 2014): + e1003581. \href{http://dx.doi.org/10.1371/journal.pcbi.1003581}{doi:10.1371/journal.pcbi.1003581}. + +\end{frame} + +\begin{frame} + + \frametitle{Wikipedia Viewership and Flu Prediction: Motivation} + + \begin{itemize} + \larger \larger + \item \e{Google Flu Trends} uses search engine queries to try to + predict influenza epidemics more quickly than traditional methods. + \item ..but it has been criticized as being biased (e.g., by media coverage). + \item WP is freely available and viewership data is free, unlike + Google which is proprietary. + + \end{itemize} + + \note{2009 H1N1 Swine Flu broke GFT.} +\end{frame} + +\begin{frame} + + \frametitle{Wikipedia Viewership and Flu Prediction: Methods} + + \begin{itemize} + \larger \larger \larger + \item Measure traffic to flu related articles on Wikipedia + \item Compare to the ``gold standard'' data from the Center for + Disease Control (CDC) + \end{itemize} + +\end{frame} + +\begin{frame} + \frametitle{Wikipedia Viewership and Flu Prediction: Results} + + \centering + \includegraphics[width=\textwidth]{figures/flu.png} + +\note{\begin{itemize} + \larger \larger + + \item Wikipedia better than Google at predicting peak flu weeks. + \item Wikipedia better at predicting relative influenza rates. + \end{itemize}} + +\end{frame} + +\begin{frame} + + \frametitle{Other things people have tried to predict include...} + \begin{itemize} + + \larger \larger + \item \href{http://arxiv.org/abs/1405.3612}{Global disease forecasting} + + \item \href{https://meta.wikimedia.org/wiki/Research:Newsletter/2014/June\#.22Prediction_of_Foreign_Box_Office_Revenues_Based_on_Wikipedia_Page_Activity.22}{Box office revenue based on films} + \item \href{https://meta.wikimedia.org/wiki/Research:Newsletter/2013/December\#Attempt_to_use_Wikipedia_pageviews_to_predict_election_results_in_Iran.2C_Germany_and_the_UK}{Election results in Iran, Germany and the UK} + \item \href{https://meta.wikimedia.org/wiki/Research:Newsletter/2014/April\#cite_ref-10}{Breaking news stories} + \item Trending topics, general zeitgeist. + \href{https://meta.wikimedia.org/wiki/Research:Newsletter/2014/June\#cite_ref-13}{[1]} + \href{https://meta.wikimedia.org/wiki/Research:Newsletter/2013/August\#Collective_memories_in_Wikipedia}{[2]} + \href{https://meta.wikimedia.org/wiki/Research:Newsletter/2013/November\#Twitter_activity_leads_Wikipedia_activity_by_an_hour}{[3]} + \href{https://meta.wikimedia.org/wiki/Research:Newsletter/2013/September\#Identifying_trending_topics_of_yesteryear}{[4]} + + \end{itemize} + +\end{frame} + +\subsection{Wikimedia as a Corpus} + +\begin{frame} + + \centertext{5em}{Wikimedia as a Corpus} + + \note{\e{Aaron} + + From the Bar-Ilan lit review: 48\% of them about Wikipedia per se, + 52\% are just using Wikipedia (e.g. as a text corpus) + + Wiktionary, non Wikipedia projects. + + Wiktionary as a source of data, not the substance/object of + analysis. Projects as amazing multilingual corpuses of natural + language.} + +\end{frame} + +\begin{frame} + + \frametitle{Multi-Lingual Dictionary from Wiktionary: Methods} + + \larger \larger Ács, Judit. ``\e{Pivot-Based Multilingual Dictionary + Building Using Wiktionary}.'' In Proceedings of the Ninth + International Conference on Language Resources and Evaluation + (LREC'14). Rekyjavik, Iceland, + 2014. \href{http://www.lrec-conf.org/proceedings/lrec2014/pdf/864\_Paper.pdf}{http://www.lrec-conf.org/proceedings/lrec2014/pdf/864\_Paper.pdf}. + + \note{\e{Methods}: This paper uses a machine classifier to take + advantage of ``pivots'' --- words with common translations across + more than one language --- to automatically construct a + multilingual dictionary via triangulation! They then manually + evaluate the precision of this multilingual dictionary on a small + subset of words.} + +\end{frame} + +\begin{frame} + + \frametitle{Multi-Lingual Dictionary from Wiktionary: Results} + + + \begin{columns} + \column{0.5\textwidth} + \includegraphics[height=0.85\textheight]{figures/Acs-fig5.pdf} + + \column{0.5\textwidth} + + \larger \e{Figure:} Translation graph with many pivots. The edge + labels denote the source Wiktionary and article of the translation + pair. + + \end{columns} + + \note{\textbf{Finding:} So, kind of incredibly, this sort of + works. The author succeeds in constructing the multilingual + dictionary, but finds that problems like polysemy (one word + meaning multiple things) limit the precision of the resulting + output. Using the multilingual pivots offered by Wiktionary, + however, performs much better using ``triangles'' between three + languages alone.} + +\end{frame} + +\subsection{Content Quality} +\begin{frame} + + \centertext{6em}{Content Quality} + + \note{Mako + + Mostly focused on sub-areas. There was one this year we considered + discussion on hematology. Or information on infectious diseases.} + +\end{frame} + +\begin{frame} + + \frametitle{Comparing Wikipedia Quality to Britannica} + + \larger \larger Nifrário Rodrigues, Fernando + Silvério. ``\e{Colaboração Em Massa Ou Amadorismo Em Massa? Um + Estudo Comparativo Da Qualidade Da Informação Científica Produzida + Utilizando Os Conceitos E Ferramentas Wiki}.'' Universidade de + Évora, + 2012. \href{http://massamateurism.blogspot.co.uk/p/synopsis.html}{http://massamateurism.blogspot.co.uk/p/synopsis.html}. + + \note{A Portuguese-language dissertation at the Universidade de + Évora, titled "Colaboração em Massa ou Amadorismo em Massa?" + ("Mass collaboration or mass amateurism?")} + +\end{frame} + +\begin{frame} + + \frametitle{Comparing Wikipedia Quality to Britannica: Methods} + + \begin{itemize} + \larger \larger + \item Random sample of 245 article pairs from both encyclopedias. + \item Graded by an expert in its subject area using a five-point scale. + \item Experts asked, ``to concentrate only on some [...] intrinsic + aspects of the articles' quality, namely accuracy and objectivity, + and discard the contextual, representational and accessibility + aspects.'' + \item Experts were mostly university teachers. + \end{itemize} + + \note{Compared the quality of English Wikipedia with that of + Encyclopedia Britannica} +\end{frame} + +\begin{frame} + \frametitle{Comparing Wikipedia Quality to Britannica: Results} + + \includegraphics[width=\textwidth]{figures/synopsis4.jpg} + + \note{They rated "\e{90\%} of the Wikipedia articles ... as having + \e{equivalent or better quality} than their Britannica + counterparts".} +\end{frame} + +\begin{frame} + + \frametitle{Comparing Wikipedia Quality to Britannica: Results} + + \includegraphics[width=\textwidth]{figures/synopsis2.jpg} + +\end{frame} + +% Other examples: +% - https://meta.wikimedia.org/wiki/Research:Newsletter/2014/April#cite_ref-17 +% - https://meta.wikimedia.org/wiki/Research:Newsletter/2014/February#.22World.E2.80.99s_largest_study_on_Wikipedia:_Better_than_its_reputation.22 (Finnish WP) +% - https://meta.wikimedia.org/wiki/Research:Newsletter/2014/March#.22Risk_factors_and_control_of_hospital_acquired_infections:_a_comparison_between_Wikipedia_and_scientific_literature.22 +% - https://meta.wikimedia.org/wiki/Research:Newsletter/2013/December#.22Evaluation_of_gastroenterology_and_hepatology_articles_on_Wikipedia.22 + +\subsection{Controversy and Conflict} +\begin{frame} + + \centertext{7em}{Controversy and Conflict} + + \note{Tilman} + +\end{frame} + +\begin{frame} + \frametitle{Conflict, Consensus and Quality in Wikipedia} + + \larger \larger Osman, Kim. ``\e{The Role of Conflict in Determining + Consensus on Quality in Wikipedia Articles}.'' In Proceedings of + the 9th International Symposium on Open Collaboration, + 12:1–12:6. WikiSym ’13. New York, NY, USA: ACM, + 2013. \href{http://dx.doi.org/10.1145/2491055.2491067}{doi:10.1145/2491055.2491067}. + +\end{frame} + +\begin{frame} + + \frametitle{Conflict, Consensus and Quality in Wikipedia: Methods} + + \begin{itemize} + \larger \larger \larger + \item Literature review of research on online communities + \item Discourse analysis of [[Talk:Australia]] (+ 17 archive pages), + coding 147 threads (156,112 words) in a grounded theory approach. + \end{itemize} + + \note{Very simplified: Grounded theory is an approach in social + sciences where one starts from empirical data first and develops + hypotheses by coding... } + +\end{frame} + +\begin{frame} + + \frametitle{Conflict, Consensus and Quality in Wikipedia: Results} + + \larger \larger + + ``\e{Conflict} was significantly more prevalent .. than \e{collaboration}.'' + + \begin{itemize} + \larger + + \item However: Personal attacks are rare (as opposed to + disagreement). + + \item ``The four main themes that emerged as cause for + debate among the editorial community were \e{sources}, \e{wording}, + \e{structure} and \e{content accuracy}". + + \item In e.g. sourcing debates, "conflict ... had a role in developing a + mechanism to ensure the accuracy of information by prompting + participants to properly source and reference material." + + \item Conflict is not always bad. ``\e{Generative friction}'' + benefits Wikipedia quality. + \end{itemize} + + \note{i.e. editors generally play the ball, not the man.} + +\end{frame} + +\begin{frame} + + \frametitle{Conflict, Consensus and Quality in Wikipedia: Results} + + \includegraphics[width=0.85\textwidth]{figures/osman-fig.png} + + \begin{itemize} + \larger + \item Talk page discussions frequently contain references to + Wikipedia policy, both formal (directly linking a policy) and + informal (mentioning or quoting it). + + \item 86\% of policy references are informal + + \end{itemize} + + \note{Bear in mind that while it was a sizable corpus, it still + pertained to only one article on the English Wikipedia. + + Osman: "More than being a set of isolated rules for the community, + policies are part of the fabric of the culture of the talk + pages. They regulate both behavior and the production process and + manage conflict so that it remains a generative friction", e.g. by + discouraging personal attacks.} + +\end{frame} + +\subsection{WikiProjects} + +\begin{frame} + + \centertext{7em}{WikiProjects} + + \note{Another area of research focuses on understanding wikis and + other peer production communities as organization. Some of the + most interesting work in this area compares many projects in order + to better understand the characteristics that might lead them to + grow and attract contributors.} +\end{frame} + +\begin{frame} + \frametitle{Critical Mass in WikiProjects} + + \larger \larger Solomon, Jacob, and Rick Wash. “\e{Critical Mass of + What? Exploring Community Growth in WikiProjects}.” In Eighth + International AAAI Conference on Weblogs and Social Media, + 2014. \href{http://www.aaai.org/ocs/index.php/ICWSM/ICWSM14/paper/view/8104}{[1]}. + + \note{\e{Methods:} Analyzed data on 1069 EN:WP WikiProjects. Fit + models to describe the projects' growth curves in terms of editors + and project edits. They then use these growth curves to estimate + the relationship between adding editors or edits (at an early + stage) on subsequent project growth.} + +\end{frame} + +\begin{frame} + \frametitle{Critical Mass in WikiProjects: Results} + + \includegraphics[width=\textwidth]{figures/Solomon_Walsh-2014-critical_mass_wikiprojects-FIG1.pdf} + + \note{Results: The authors find that projects with more contributors + are more likely to experience subsequent growth in contributions + and contributors.They also find that contributions from both + "power users" and more casual, one-off contributors predicts + subsequent growth. These findings (like others in this area) have + important implications for project leaders and designers.} + +\end{frame} + +\subsection{Vandalism} +\begin{frame} + + \centertext{7em}{Vandalism} + +\end{frame} + +\begin{frame} + + \frametitle{Edit Patterns and Vandalism Detection} + + \larger \larger Sethi, Deepika. \e{A Large Scale Study of Edit + Patterns in Wikipedia and its Applications to Vandalism + Detection}. M. Sc. thesis, University of Georgia. Submitted + December 2012 + +\end{frame} + +\begin{frame} + + \frametitle{Edit Patterns and Vandalism Detection: Methods} + + \larger \larger Used the \e{PAN Wikipedia vandalism corpus 2010 } of + 32,452 edits, classified as vandalism (2,391 edits) or non-vandalism + by Mechanical Turk workers. + + \note{Corpus was created to train vandalism detectors and + formed the basis of several competitions.} + + Identified vandalism based on: + + \begin{itemize} + \larger \larger + + \item Editor country + \item Time of day + + \item \e{Article content domain} using 12 ``classes'' from DBpedia + (e.g., 1. Person, 2. Work, 3. Sports, 4. Places, 5. Food ...) + \item Content of edits + + \end{itemize} + +\end{frame} + +\begin{frame} + \frametitle{Edit Patterns and Vandalism Detection: Results} + + \larger \larger \larger "\e{Vandalism occurs the most during office + hours} while \e{non-vandalism occurs the most during late + evenings}." + +\end{frame} + +\begin{frame} + \frametitle{Edit Patterns and Vandalism Detection: Results} + + \larger "Hostilities among the countries are one major cause of + vandalism." + + \medskip + + \includegraphics[width=\textwidth]{figures/sethi-countries.pdf} + \note{\e{Country} where the vandal is based.. + + Methodology: Get Geolocation of IP editors to articles about + countries Result: image : Figure 20: India’s Vandal Contributions + (caption: Where Indians vandalize most) + + [[India]] most frequently vandalized from India, Bahrain, Bangladesh, Pakistan, ... + + [[Pakistan]] most frequently vandalized from India, Pakistan, Poland, Sweden} + + +\note{Other example: [[Taiwan]] from Taiwan, China, Germany ... / [[China]] from Ireland, Germany, Poland, ... } + +\end{frame} + +\begin{frame} + \frametitle{Edit Patterns and Vandalism Detection: Results} + +% \includegraphics[width=\textwidth]{figures/sethi-46-figure.pdf} + + \larger \larger \larger \e{Common Vandalism Words} + + Ball, chicken, British, woman, hole, handicap, meat, kiss, play, + old, love, death, course, kick, American, bomb + + \note{Methodology: Word most frequently occurring in vandalized versions + + Result: Ball, chicken, British, woman, hole, handicap, meat, kiss, + play, old, love, death, course, kick, American, bomb, ...} + + + %\note{Intriguing... A vandal who only added words might aim to get visibility for them; a vandal who only deleted words might dislike a particular statement} + + +\end{frame} + +\subsection{Editor Motivation} + +\begin{frame} + + \centertext{7em}{Editor Motivation} + +\end{frame} + +\begin{frame} + + \frametitle{When do barnstars increase edits?} + + \larger \larger Restivo, Michael, and Arnout van de Rijt. “\e{No Praise + without Effort: Experimental Evidence on How Rewards Affect + Wikipedia’s Contributor Community}.” Information, Communication \& + Society 0, no. 0 (0): + 1–12. \href{http://dx.doi.org/10.1080/1369118X.2014.888459}{doi:10.1080/1369118X.2014.888459}. + +\end{frame} + +\begin{frame} + + \frametitle{When do barnstars increase edits? Methods} + + \larger \larger Prior work by the authors showed that randomly given + barnstars to very active editors (top 1\% in a month) resulted in + more edits and a positive feedback loop in English Wikipedia. + + \begin{itemize} + \larger \larger + \item Like before, gives barnstars to \e{randomly} selected users -- + but this time to editors of varying activity level (e.g., + 91-95$^{\mathrm{th}}$, 96-99$^{\mathrm{th}}$, and 100$^{th}$ + percentile of editing in the month). Plus a ``control'' group of + other editors who do not receive the award. + \item Follow post-award activity on Wikipedia. + \end{itemize} + +\end{frame} + +\begin{frame} + + \frametitle{Do barnstars encourage editing? Results for Edits} + + \includegraphics[width=\textwidth]{figures/barnstars-1.pdf} + + \note{Positive effect only in the case of the very active -- top 1\% + of editors. No significant difference in the other two groups.} + +\end{frame} + +\begin{frame} + + \frametitle{Do barnstars encourage editing? Results for Retention} + + \includegraphics[width=\textwidth]{figures/barnstars-2.pdf} + + \note{Lower retention among award recipients in the less active + group! No significant difference in the other two groups.} +\end{frame} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Conclusion} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% SLIDE: Other Resources +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{frame}{More Resources} + + \begin{itemize} + \larger \larger + \item \e{Wikimedia Research Newsletter} [[:meta:Research:Newsletter]] + \item \e{WikiSym} (Later this month in Berlin!) + \item \e{WikiPapers Repository} [http://wikipapers.referata.com] + \item \e{Much More} + \end{itemize} + + {\centering + \includegraphics[width=0.25\textwidth]{figures/Wikimedia_Research_Newsletter_Logo.png} + + } + + \note{Those are my six exemplary studies from the past year. + + There has been just tons and tons of work in this area. Trying to + talk about this in 20 minutes strikes me as increasingly crazy + every year I try to do it. + + The most important source, now going for a couple years, is the + Wikimedia Research Newsletter which is published monthly in the (English) + Signpost and syndicated on the Wikimedia Research. + + But there are other resources as well. And I encourage you to get + involved.} + +\end{frame} + +\subsection{Meta-Analyses} + +\begin{frame} + + \frametitle{Meta-Analyses} + + \begin{itemize} + + \larger \larger + + \item Okoli et al., + \href{https://spectrum.library.concordia.ca/978618/}{``The sum of + all human knowledge'': a systematic review of scholarly research + on the content of Wikipedia}. + + \item Bar-Ilan and Aharony, + \href{http://dl.acm.org/citation.cfm?doid=2615569.2615643}{Twelve + years of Wikipedia research}. + + \item Taraborelli. \href{https://meta.wikimedia.org/wiki/Research:Newsletter/2013/August\#Keynote\_on\_applicable\_Wikipedia\_research}{Keynote + on Wikipedia Research}. OpenSym 2013. Hong Kong. + + \item Benkler, Shaw, and Hill, + \href{http://mako.cc/academic/benkler\_shaw\_hill-peer\_production\_ci.pdf}{Peer + Production: A Modality of Collective Intelligence}. + + \end{itemize} + +\end{frame} + + +\end{document} + +% LocalWords: xshift yshift makopurple Tilman Wikimedians Okoli al +% LocalWords: Ilan Aharony Taraborelli OpenSym Hong shaw ci pdf GFT +% LocalWords: McIver Brownstein Comput Wiktionary Acs Judit LREC Ou +% LocalWords: Rekyjavik Multi polysemy Nifrário Rodrigues Silvério +% LocalWords: Colaboração Massa Amadorismo Estudo Comparativo Da ou +% LocalWords: Qualidade Informação Científica Produzida Utilizando +% LocalWords: Conceitos Ferramentas Universidade Évora WikiProjects +% LocalWords: Weblogs Sethi Deepika DBpedia Restivo Arnout Rijt th +% LocalWords: WikiPapers diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ac30864 --- /dev/null +++ b/Makefile @@ -0,0 +1,42 @@ +#!/usr/bin/make + +all: notes.config vc $(patsubst %.tex,%.pdf,$(wildcard *.tex)) +pdf: all + +%.pdf: %.tex + rubber -fd $< + +# by default, we produce combined notes/slides output +notes.config: + echo '\setbeameroption{show notes on second screen}' > notes.config + +# rules for generating notesonly +notesonly-config: + echo '\setbeameroption{show only notes}' > notes.config +notesonly: notesonly-config pdf + +# rules for generating slides only +slides-config: + echo '' > notes.config +slides: slides-config pdf + +viewpdf: all + evince *.pdf + +pdfpc: notes.config + pdfpc -n right *pdf + +spell: + aspell -c -t --tex-check-comments -b text.tex + +vc: + vc-git + +clean: + rubber -d --clean *.tex + rm -f *.tmp + rm -f vc + rm -f notes.config + rm -f *.pdfpc + +.PHONY: clean all notesonly-config notesonly slides-config slides viewpdf pdfpc spell diff --git a/beamercolorthememako.sty b/beamercolorthememako.sty new file mode 100644 index 0000000..d6e5fce --- /dev/null +++ b/beamercolorthememako.sty @@ -0,0 +1,30 @@ +% Copyright 2007 by Marco Barisione +% +% This file may be distributed and/or modified +% +% 1. under the LaTeX Project Public License and/or +% 2. under the GNU Public License. + +\mode + +\definecolor{makopurple1}{RGB}{116,25,189} +\definecolor{makopurple2}{RGB}{178,141,225} +\definecolor{makopurple3}{RGB}{146,48,149} +\definecolor{makopurple4}{RGB}{130,34,110} +\definecolor{makopurple5}{RGB}{238,225,255} + +\setbeamercolor*{palette primary}{fg=white,bg=makopurple2} +\setbeamercolor*{palette secondary}{fg=white,bg=makopurple3} +\setbeamercolor*{palette tertiary}{fg=white,bg=makopurple1} +\setbeamercolor*{palette quaternary}{fg=white,bg=makopurple1} + +\setbeamercolor*{titlelike}{bg=makopurple1} +\setbeamercolor*{frametitle}{bg=white,fg=white} +\setbeamercolor*{part title}{bg=black,fg=black} +\setbeamercolor*{item}{fg=makopurple3} + +\setbeamercolor*{separation line}{} +\setbeamercolor*{fine separation line}{} + +\mode + diff --git a/beamerinnerthemefancy.sty b/beamerinnerthemefancy.sty new file mode 100644 index 0000000..38b8353 --- /dev/null +++ b/beamerinnerthemefancy.sty @@ -0,0 +1,182 @@ +% Copyright 2007 by Marco Barisione +% +% This file may be distributed and/or modified +% +% 1. under the LaTeX Project Public License and/or +% 2. under the GNU Public License. + +\mode + +% Use alternative title page style. +\DeclareOptionBeamer{alternativetitlepage}[true]{\def\beamer@fancy@alternativetitlepage{#1}} + +% Logo to use in the alternative title page. +\def\beamer@fancy@titlepagelogo{} +\DeclareOptionBeamer{titlepagelogo}{\def\beamer@fancy@titlepagelogo{#1}} + +% Bullet shape. +\DeclareOptionBeamer{bullet}{\def\beamer@fancy@bullet{#1}} + +\ExecuteOptionsBeamer{alternativetitlepage=false,bullet=square} +\ProcessOptionsBeamer + +% Colors. +\setbeamercolor*{lineup}{parent=palette primary} +\setbeamercolor*{linemid}{parent=palette secondary} +\setbeamercolor*{linebottom}{parent=palette tertiary} +\setbeamercolor*{title page header}{parent=palette quaternary} + +% Lengths. +\newlength{\beamer@fancy@lineup} +\setlength{\beamer@fancy@lineup}{.025\paperheight} +\newlength{\beamer@fancy@linemid} +\setlength{\beamer@fancy@linemid}{.015\paperheight} +\newlength{\beamer@fancy@linebottom} +\setlength{\beamer@fancy@linebottom}{.01\paperheight} + +% Margins. +\newlength{\beamer@fancy@normalmargin} +\setlength{\beamer@fancy@normalmargin}{.06\paperwidth} +\setbeamersize{text margin left=\beamer@fancy@normalmargin} +\setbeamersize{text margin right=\beamer@fancy@normalmargin} +\setlength\leftmargini{.6\beamer@fancy@normalmargin} +\setlength\leftmarginii{.6\beamer@fancy@normalmargin} +\setlength\leftmarginiii{.6\beamer@fancy@normalmargin} + +% Normal title page. +\defbeamertemplate*{title page normal}{fancy theme}[1][] +{ + \vbox{} + \vfill + \begin{centering} + \begin{beamercolorbox}[wd=\paperwidth,sep=8pt,center,#1]{title page header} + \usebeamerfont{title}\inserttitle\par% + \ifx\insertsubtitle\@empty% + \else% + \vskip0.25em% + {\usebeamerfont{subtitle}\usebeamercolor[fg]{subtitle}\insertsubtitle\par}% + \fi% + \end{beamercolorbox}% + \vskip1em\par + \begin{beamercolorbox}[sep=8pt,center,#1]{author} + \usebeamerfont{author}\insertauthor + \end{beamercolorbox} + \begin{beamercolorbox}[sep=8pt,center,#1]{institute} + \usebeamerfont{institute}\insertinstitute + \end{beamercolorbox} + \begin{beamercolorbox}[sep=8pt,center,#1]{date} + \usebeamerfont{date}\insertdate + \end{beamercolorbox}\vskip0.5em + {\usebeamercolor[fg]{titlegraphic}\inserttitlegraphic\par} + \end{centering} + \vfill +} + +% Alternative title page, you should use this in a frame with the [plain] +% option. +\defbeamertemplate*{title page alternative}{fancy theme}[1][] +{ + {\parskip0pt\offinterlineskip% + \hbox{\hskip-\Gm@lmargin\hbox{\vbox{% + \@tempdima=\textwidth\textwidth=\paperwidth\hsize=\textwidth\def\\{,}\vbox{}\vskip-1.5ex% + % Title. + \begin{beamercolorbox}[wd=\paperwidth,ht=.4\paperheight,center,#1]{title page header} + \usebeamerfont{title}\inserttitle\par% + \ifx\insertsubtitle\@empty% + \else% + \vskip0.25em% + {\usebeamerfont{subtitle}\usebeamercolor[fg]{subtitle}\insertsubtitle\par}% + \fi% + \vspace{.125\paperheight}% + \end{beamercolorbox}% + \vbox{}\vskip-\beamer@fancy@lineup% + \vbox{}\vskip-\beamer@fancy@linemid% + % First line. + \hbox{% + \begin{beamercolorbox}[wd=.2\paperwidth,ht=\beamer@fancy@lineup,dp=0pt]{}% + \end{beamercolorbox}% + \begin{beamercolorbox}[wd=.8\paperwidth,ht=\beamer@fancy@lineup,dp=0pt]{lineup}% + \end{beamercolorbox}% + }% + \vbox{}\vskip0ex% + % Second line. + \hbox{% + \begin{beamercolorbox}[wd=.1\paperwidth,ht=\beamer@fancy@linemid,dp=0pt]{}% + \end{beamercolorbox}% + \begin{beamercolorbox}[wd=.9\paperwidth,ht=\beamer@fancy@linemid,dp=0pt]{linemid}% + \end{beamercolorbox}% + }% + % Third line. + \hbox{% + \begin{beamercolorbox}[wd=.5\paperwidth,ht=\beamer@fancy@linebottom,dp=0pt]{}% + \end{beamercolorbox}% + \begin{beamercolorbox}[wd=.5\paperwidth,ht=\beamer@fancy@linebottom,dp=0pt]{linebottom}% + \end{beamercolorbox}% + }% + \vskip0pt% + }}% + \hskip-\Gm@rmargin% + }}\hfil% + % + \begin{columns} + \ifx\beamer@fancy@titlepagelogo\@empty% + \column{\textwidth} + \else + \column{.5\textwidth} + % Logo. + \begin{centering} + \vbox{}\vfill + \includegraphics[height=.4\paperheight]{\beamer@fancy@titlepagelogo} + \vfill + \end{centering} + \column{.5\textwidth} + \fi + % Authors, institute and date + \vskip1em\par + \begin{beamercolorbox}[sep=8pt,center,#1]{author} + \usebeamerfont{author}\insertauthor + \end{beamercolorbox} + \begin{beamercolorbox}[sep=8pt,center,#1]{institute} + \usebeamerfont{institute}\insertinstitute + \end{beamercolorbox} + \begin{beamercolorbox}[sep=8pt,center,#1]{date} + \usebeamerfont{date}\insertdate + \end{beamercolorbox}\vskip0.5em + {\usebeamercolor[fg]{titlegraphic}\inserttitlegraphic\par} + \end{columns} +} + +\defbeamertemplate*{title page}{fancy}[1][] +{ + \def\beamer@fancy@truetext{true}% + \ifx\beamer@fancy@alternativetitlepage\beamer@fancy@truetext% + \usebeamertemplate{title page alternative}% + \else% + \usebeamertemplate{title page normal}% + \fi% +} + +% Items. +\defbeamertemplate{itemize item}{squarealt}% +{\tiny\raise.5ex\hbox{\donotcoloroutermaths$\blacksquare$}} +\defbeamertemplate{itemize subitem}{squarealt}% +{\tiny\raise.4ex\hbox{\donotcoloroutermaths$\square$}} +\defbeamertemplate{itemize subsubitem}{squarealt}% +{\tiny\raise.3ex\hbox{\donotcoloroutermaths$\blacksquare$}} + +\defbeamertemplate{itemize item}{circlealt}% +{\small\raise.2ex\hbox{\donotcoloroutermaths$\bullet$}} +\defbeamertemplate{itemize subitem}{circlealt}% +{\small\raise.1ex\hbox{\donotcoloroutermaths$\circ$}} +\defbeamertemplate{itemize subsubitem}{circlealt}% +{\scriptsize\raise.1ex\hbox{\donotcoloroutermaths$\bullet$}} + +\def\circletext{circle} +\ifx\beamer@fancy@bullet\circletext + \setbeamertemplate{items}[circlealt] +\else + \setbeamertemplate{items}[squarealt] +\fi + +\mode + diff --git a/beamerouterthemedecolines.sty b/beamerouterthemedecolines.sty new file mode 100644 index 0000000..6979f88 --- /dev/null +++ b/beamerouterthemedecolines.sty @@ -0,0 +1,160 @@ +% Copyright 2007 by Marco Barisione +% +% This file may be distributed and/or modified +% +% 1. under the LaTeX Project Public License and/or +% 2. under the GNU Public License. + +\mode + +% String used between the current page and the total page count. +\def\beamer@decolines@pageofpages{/} +\DeclareOptionBeamer{pageofpages}{\def\beamer@decolines@pageofpages{#1}} + +% Show a line below the frame title. +\DeclareOptionBeamer{titleline}[true]{\def\beamer@decolines@titleline{#1}} + +% Image used for the watermark. +\def\beamer@decolines@watermarkorig{} +\DeclareOptionBeamer{watermark}{\def\beamer@decolines@watermarkorig{#1}} + +% Height of the watermark. +\def\beamer@decolines@watermarkheight{100px} +\DeclareOptionBeamer{watermarkheight}{\def\beamer@decolines@watermarkheight{#1}} + +% The original image height is watermarkheightmult * watermarkheight. +\def\beamer@decolines@watermarkheightmult{1} +\DeclareOptionBeamer{watermarkheightmult}{\def\beamer@decolines@watermarkheightmult{#1}} + +\ExecuteOptionsBeamer{titleline=false} +\ProcessOptionsBeamer + +% Enable/disable the watermark. +\def\watermarkon{% + \def\beamer@decolines@watermark{\beamer@decolines@watermarkorig}% +} +\def\watermarkoff{\def\beamer@decolines@watermark{}} + +% Initially enable the watermark. +\watermarkon + +% Colors. +\setbeamercolor*{lineup}{parent=palette primary} +\setbeamercolor*{linemid}{parent=palette secondary} +\setbeamercolor*{linebottom}{parent=palette tertiary} +\setbeamercolor*{page header}{parent=titlelike} + +% Lengths +\newlength{\headerheight} +\setlength{\headerheight}{.045\paperheight} +\newlength{\beamer@decolines@lineup} +\setlength{\beamer@decolines@lineup}{.018\paperheight} +\newlength{\beamer@decolines@linemid} +\setlength{\beamer@decolines@linemid}{.015\paperheight} +\newlength{\beamer@decolines@linebottom} +\setlength{\beamer@decolines@linebottom}{.01\paperheight} + +% The height of the watermark part below the 3 bottom lines. +\newlength{\beamer@decolines@watermarkheightbottom} +\addtolength{\beamer@decolines@watermarkheightbottom}{\beamer@decolines@lineup} +\addtolength{\beamer@decolines@watermarkheightbottom}{\beamer@decolines@linemid} +\addtolength{\beamer@decolines@watermarkheightbottom}{\beamer@decolines@linebottom} + +% The height of the watermark part over the 3 bottom lines before shrinking. +\newlength{\beamer@decolines@watermarkheightupperorig} +\setlength{\beamer@decolines@watermarkheightupperorig}{\beamer@decolines@watermarkheight} +\addtolength{\beamer@decolines@watermarkheightupperorig}{-\beamer@decolines@watermarkheightbottom} +\multiply\beamer@decolines@watermarkheightupperorig by \beamer@decolines@watermarkheightmult + +% Footer. +\defbeamertemplate*{footline}{decolines theme} +{ + \leavevmode% + % First line. + \hbox{% + \begin{beamercolorbox}[wd=.2\paperwidth,ht=\beamer@decolines@lineup,dp=1.5pt]{}% + \end{beamercolorbox}% + \begin{beamercolorbox}[wd=.8\paperwidth,ht=\beamer@decolines@lineup,dp=1.5pt,right]{lineup}% + \usebeamerfont{palette primary}\insertframenumber{} \beamer@decolines@pageofpages{} \inserttotalframenumber% + \end{beamercolorbox}% + } % + % Second line. + \hbox{% + \begin{beamercolorbox}[wd=\paperwidth,ht=\beamer@decolines@linemid,dp=0pt]{linemid}% + \end{beamercolorbox}% + } % + % Third line. + \hbox{% + \begin{beamercolorbox}[wd=.1\paperwidth,ht=\beamer@decolines@linebottom,dp=0pt]{}% + \end{beamercolorbox}% + \begin{beamercolorbox}[wd=.9\paperwidth,ht=\beamer@decolines@linebottom,dp=0pt]{linebottom}% + \end{beamercolorbox}% + }% + % This seems to fix some alignment problems with the watermark. It has to be + % always applied if you do not want to see the footer moving up and down when + % moving from a page with watermark to a page without or vice versa. + \vskip-.5px% + % Watermark. + \if\beamer@decolines@watermark\@empty\else% + \vskip-\beamer@decolines@watermarkheightbottom% + \llap{\includegraphics[height=\beamer@decolines@watermarkheightbottom,clip=true,% + trim=0pt 0pt 0pt \beamer@decolines@watermarkheightupperorig]{\beamer@decolines@watermark}\hskip-\paperwidth}% + \fi% +} + +\defbeamertemplate*{headline}{decolines theme}{} +% +%{ +% \leavevmode% +% \hbox{% +% \begin{beamercolorbox}[wd=\paperwidth,ht=\headerheight,dp=0pt]{page header}% +% \end{beamercolorbox}% +% } % +% \vskip0pt% +%} + +\defbeamertemplate*{frametitle}{decolines theme}[1][left] +{ + \ifbeamercolorempty[bg]{frametitle}{}{\nointerlineskip}% + \@tempdima=\textwidth% + \advance\@tempdima by\beamer@leftmargin% + \advance\@tempdima by\beamer@rightmargin% + \vbox{}\vskip-.5\beamer@leftmargin% + \begin{beamercolorbox}[sep=.5\beamer@leftmargin,#1,wd=\the\@tempdima]{page header} + \usebeamerfont{frametitle}\usebeamercolor[bg]{framesubtitle}% + \vbox{}\vskip0ex% + \if@tempswa\else\csname beamer@fte#1\endcsname\fi% + \strut\insertframetitle\strut\par% + {% + \ifx\insertframesubtitle\@empty% + \else% + {\usebeamerfont{framesubtitle}\usebeamercolor[bg]{framesubtitle}\insertframesubtitle\strut\par}% + \fi + }% + \vskip-1ex% + \if@tempswa\else\vskip-\beamer@leftmargin\fi + \end{beamercolorbox}% + \def\beamer@decolines@truetext{true}% + \ifx\beamer@decolines@titleline\beamer@decolines@truetext% + \vskip-.5\beamer@leftmargin% + \begin{beamercolorbox}[wd=\textwidth,ht=.1ex,dp=0ex]{linemid}% + \end{beamercolorbox}% + \fi +} + +% Frame title continuations, default +\defbeamertemplate*{frametitle continuation}{decolines theme}{(\insertcontinuationcount)} + +\defbeamertemplate*{sidebar right}{decolines theme} +{ + \vskip.1\beamer@leftmargin% + \llap{\insertlogo\hskip.5\beamer@leftmargin}% + \vfill% + \if\beamer@decolines@watermark\@empty\else% + \llap{\includegraphics[height=\beamer@decolines@watermarkheight]{\beamer@decolines@watermark}}% + \vskip-\beamer@decolines@watermarkheightbottom% + \fi +} + +\mode + diff --git a/beamerthemeTorino.sty b/beamerthemeTorino.sty new file mode 100644 index 0000000..5756c97 --- /dev/null +++ b/beamerthemeTorino.sty @@ -0,0 +1,26 @@ +% Copyright 2007 by Marco Barisione +% +% This file may be distributed and/or modified +% +% 1. under the LaTeX Project Public License and/or +% 2. under the GNU Public License. + +\mode + +\DeclareOptionBeamer{alternativetitlepage}[true]{\PassOptionsToPackage{alternativetitlepage=#1}{beamerinnerthemefancy}} +\DeclareOptionBeamer{titlepagelogo}{\PassOptionsToPackage{titlepagelogo=#1}{beamerinnerthemefancy}} +\DeclareOptionBeamer{bullet}{\PassOptionsToPackage{bullet=#1}{beamerinnerthemefancy}} +\DeclareOptionBeamer{pageofpages}{\PassOptionsToPackage{pageofpages=#1}{beamerouterthemedecolines}} +\DeclareOptionBeamer{titleline}[true]{\PassOptionsToPackage{titleline=#1}{beamerouterthemedecolines}} +\DeclareOptionBeamer{watermark}{\PassOptionsToPackage{watermark=#1}{beamerouterthemedecolines}} +\DeclareOptionBeamer{watermarkheight}{\PassOptionsToPackage{watermarkheight=#1}{beamerouterthemedecolines}} +\DeclareOptionBeamer{watermarkheightmult}{\PassOptionsToPackage{watermarkheightmult=#1}{beamerouterthemedecolines}} + +\ProcessOptionsBeamer + +\useinnertheme{fancy} +\useoutertheme{decolines} +\usecolortheme{mako} + +\mode + diff --git a/figures/Acs-fig5.pdf b/figures/Acs-fig5.pdf new file mode 100644 index 0000000..968f247 Binary files /dev/null and b/figures/Acs-fig5.pdf differ diff --git a/figures/Solomon_Walsh-2014-critical_mass_wikiprojects-FIG1.pdf b/figures/Solomon_Walsh-2014-critical_mass_wikiprojects-FIG1.pdf new file mode 100644 index 0000000..b759aa2 Binary files /dev/null and b/figures/Solomon_Walsh-2014-critical_mass_wikiprojects-FIG1.pdf differ diff --git a/figures/Wikimedia_Research_Newsletter_Logo.png b/figures/Wikimedia_Research_Newsletter_Logo.png new file mode 100644 index 0000000..840be88 Binary files /dev/null and b/figures/Wikimedia_Research_Newsletter_Logo.png differ diff --git a/figures/Wikipedia publications - Data.csv b/figures/Wikipedia publications - Data.csv new file mode 100644 index 0000000..0dde3db --- /dev/null +++ b/figures/Wikipedia publications - Data.csv @@ -0,0 +1,15 @@ +,wikipedia,corpus,quality,reputation,gender,collaboration,education,,,network,model +2001,18,1,0,0,0,0,0,,,0,0 +2002,8,0,0,0,0,0,0,,,0,0 +2003,12,0,0,0,0,0,0,,,0,0 +2004,47,0,0,0,0,4,0,,,0,0 +2005,213,4,5,1,0,2,5,,,3,0 +2006,354,10,6,7,0,11,7,,,9,0 +2007,570,26,10,6,0,13,7,,,4,5 +2008,634,44,12,10,0,13,16,,,8,8 +2009,721,53,16,8,2,16,19,,,9,13 +2010,754,79,12,10,4,12,20,,,9,12 +2011,692,59,15,18,4,41,29,,,22,15 +2012,674,76,19,7,4,22,25,,,24,15 +2013,435,49,12,3,3,22,29,,,15,14 +2013 to date,255,29,7,2,2,13,17,,,9,8 \ No newline at end of file diff --git a/figures/barnstars-1.pdf b/figures/barnstars-1.pdf new file mode 100644 index 0000000..35d95d8 Binary files /dev/null and b/figures/barnstars-1.pdf differ diff --git a/figures/barnstars-2.pdf b/figures/barnstars-2.pdf new file mode 100644 index 0000000..9e4f628 Binary files /dev/null and b/figures/barnstars-2.pdf differ diff --git a/figures/citations_by_year.pdf b/figures/citations_by_year.pdf new file mode 100644 index 0000000..28d490e Binary files /dev/null and b/figures/citations_by_year.pdf differ diff --git a/figures/cite_graph.R b/figures/cite_graph.R new file mode 100644 index 0000000..74de572 --- /dev/null +++ b/figures/cite_graph.R @@ -0,0 +1,38 @@ +# the last line is projected based on citations to the end of october +# (almost certainly conservative) +library(ggplot2) + +d <- read.csv("wikipedia_citations.txt",header=F) +colnames(d) <- c("year", "citations") + +d <- d[1:(dim(d)[1]-1),] + +# print the total number of citations +sum(d$citations) + +# generate and print a graph +# p <- qplot(year, citations, data=d) + +# geom_line(colour="blue") + geom_point(colour="blue") + +p <- qplot(factor(year), citations, data=d, geom="bar") + geom_bar(stat="identity", fill=I("darkblue")) +p <- p + scale_x_discrete("Year") + scale_y_continuous("Number of Papers") + +pdf("citations_by_year.pdf", width=7.5, height=5.3) +print(p) +dev.off() + +## data from dario +##########################################################3 + +# import data from dario +d <- read.csv("Wikipedia publications - Data.csv") + +# clean up the dates +colnames(d)[1] <- "date" +d <- d[,c(-9,-10)] +d <- d[!d$date == "2013 to date",] +d$date <- as.factor(d$date) + +library(reshape) +qplot(date, value, data=melt(d), group=variable, geom="line") + + aes(colour=variable) + scale_y_log10() diff --git a/figures/flu.png b/figures/flu.png new file mode 100644 index 0000000..29653ad Binary files /dev/null and b/figures/flu.png differ diff --git a/figures/google_scholar_result.png b/figures/google_scholar_result.png new file mode 100644 index 0000000..75ac820 Binary files /dev/null and b/figures/google_scholar_result.png differ diff --git a/figures/multiple_issues.png b/figures/multiple_issues.png new file mode 100644 index 0000000..5f26cf5 Binary files /dev/null and b/figures/multiple_issues.png differ diff --git a/figures/osman-fig.png b/figures/osman-fig.png new file mode 100644 index 0000000..5f7ae70 Binary files /dev/null and b/figures/osman-fig.png differ diff --git a/figures/sethi-46-figure.pdf b/figures/sethi-46-figure.pdf new file mode 100644 index 0000000..fe4d909 Binary files /dev/null and b/figures/sethi-46-figure.pdf differ diff --git a/figures/sethi-countries.pdf b/figures/sethi-countries.pdf new file mode 100644 index 0000000..2c5daa5 Binary files /dev/null and b/figures/sethi-countries.pdf differ diff --git a/figures/synopsis2.jpg b/figures/synopsis2.jpg new file mode 100644 index 0000000..0d7f0d0 Binary files /dev/null and b/figures/synopsis2.jpg differ diff --git a/figures/synopsis4.jpg b/figures/synopsis4.jpg new file mode 100644 index 0000000..da131d9 Binary files /dev/null and b/figures/synopsis4.jpg differ diff --git a/figures/wikimedia_projects.png b/figures/wikimedia_projects.png new file mode 100644 index 0000000..67ca39b Binary files /dev/null and b/figures/wikimedia_projects.png differ diff --git a/figures/wikipeda_citations_bytime.png b/figures/wikipeda_citations_bytime.png new file mode 100644 index 0000000..d5377a3 Binary files /dev/null and b/figures/wikipeda_citations_bytime.png differ diff --git a/figures/wikipedia_citations.txt b/figures/wikipedia_citations.txt new file mode 100644 index 0000000..a24d97c --- /dev/null +++ b/figures/wikipedia_citations.txt @@ -0,0 +1,14 @@ +2001,23 +2002,9 +2003,11 +2004,53 +2005,221 +2006,369 +2007,593 +2008,676 +2009,754 +2010,816 +2011,752 +2012,744 +2013,623 +2014,268