From: Benjamin Mako Hill Date: Fri, 17 Jul 2015 15:00:54 +0000 (-0700) Subject: added extra notes from working on the presentation X-Git-Url: https://projects.mako.cc/source/state_of_wikimedia_research_2015/commitdiff_plain/c68a60b6fe8adc33137eac5435f7283b00b3e057 added extra notes from working on the presentation --- diff --git a/20150717-wikimania_research.tex b/20150717-wikimania_research.tex index 854624e..cc75b30 100644 --- a/20150717-wikimania_research.tex +++ b/20150717-wikimania_research.tex @@ -337,12 +337,55 @@ \end{frame} +\begin{frame} + \frametitle{How to measure the global influence of languages?} + + \larger \larger + + \e{Traditional} methods rely on: + + \begin{itemize} + \larger \larger + \item \e{Population} of speakers + \item \e{Income} or political power of speakers + \end{itemize} + + Paper presents \e{new network method} based on measuring + \e{co-speakers} of languages in several data sources including + Wikipedia. + +\end{frame} + \begin{frame} \frametitle{Wikipedia as a source of data: Ronen et al.} \includegraphics[width=\textwidth]{figures/ronen_fig1.png} + + \note{Two languages are connected when users that edit an article in + one Wikipedia language edition are significantly more likely to + also edit an article in the edition of the other language. + + If an editor of Spanish is also likely to edit Galician, we'll + call those languages connected.} \end{frame} +\begin{frame} + \frametitle{Wikipedia as a source of data: Ronen et al.} + + \includegraphics[width=\textwidth]{figures/ronen_people.png} + + \note{\begin{itemize} + \item The number of people per language (born 1800–1950) with + articles in at least 26 Wikipedia language editions as a + function of their language’s eigenvector centrality. + \item The bottom row shows the number of people per language (born + 1800–1950) listed in \emph{Human Accomplishment} (a book by + Charles Murray) as a function of their language’s eigenvector + centrality. + \end{itemize}} +\end{frame} + + \subsection{Community and Organization} \begin{frame} @@ -366,13 +409,29 @@ \begin{frame} \frametitle{Community and organization: Warncke-Wang et al.} + + \larger \larger + \e{Perfect Alignment Hypothesis (PAH)}: There is an exact match + between the supply of high-quality content and the demand for it. + + \bigskip \includegraphics[width=\textwidth]{figures/warncke-english_confusion.pdf} + + \note{\e{Quality}: Stub, Start, C, B, Good Article, A, Featured Article + + \e{Popularity}: equivalently sized buckets} \end{frame} \begin{frame} \frametitle{Community and organization: Warncke-Wang et al.} + Measure of the degree of misalignment can be used to build lists of + categories that are relatively \e{``overproduced''} and + \e{``underproduced''}: + + \bigskip + \includegraphics[width=\textwidth]{figures/warncke-english_overunder.pdf} \end{frame} @@ -578,7 +637,7 @@ AAAI Conference on Web and Social Media (ICWSM). \item 2: Articles about women tend to be less centrally connected in the network of articles than articles about men (Smurfette!) \item 3: (\e{viz}) Content of articles about women uses different words - than those about men. Much igher incidence of language related to + than those about men. Much higher incidence of language related to family, gender, and relationships. \end{itemize} } @@ -728,3 +787,12 @@ AAAI Conference on Web and Social Media (ICWSM). \end{document} + +% LocalWords: xshift yshift makopurple Tilman wikipedia Scopus Hu +% LocalWords: Ronen Gonçalves Vespignani Hidalgo al Galician Ranjan +% LocalWords: eigenvector Warncke Terveen Hecht underproduced NEJM +% LocalWords: Hwang Engl doi Kräenbring WebMD WikiProject Mohsen +% LocalWords: Jadidi Markus Strohmaier Wikipedias WPs Smurfette +% LocalWords: Barnhisel Rapchak WikiEd Mesgari Mostafa Okoli Chitu +% LocalWords: Mehdi Mohamad Årup Lanamäki Arto Miquel Ribé OpenSym +% LocalWords: WikiPapers diff --git a/figures/ronen_people.png b/figures/ronen_people.png new file mode 100644 index 0000000..8919720 Binary files /dev/null and b/figures/ronen_people.png differ