\end{frame}
+\begin{frame}
+ \frametitle{How to measure the global influence of languages?}
+
+ \larger \larger
+
+ \e{Traditional} methods rely on:
+
+ \begin{itemize}
+ \larger \larger
+ \item \e{Population} of speakers
+ \item \e{Income} or political power of speakers
+ \end{itemize}
+
+ Paper presents \e{new network method} based on measuring
+ \e{co-speakers} of languages in several data sources including
+ Wikipedia.
+
+\end{frame}
+
\begin{frame}
\frametitle{Wikipedia as a source of data: Ronen et al.}
\includegraphics[width=\textwidth]{figures/ronen_fig1.png}
+
+ \note{Two languages are connected when users that edit an article in
+ one Wikipedia language edition are significantly more likely to
+ also edit an article in the edition of the other language.
+
+ If an editor of Spanish is also likely to edit Galician, we'll
+ call those languages connected.}
+\end{frame}
+
+\begin{frame}
+ \frametitle{Wikipedia as a source of data: Ronen et al.}
+
+ \includegraphics[width=\textwidth]{figures/ronen_people.png}
+
+ \note{\begin{itemize}
+ \item The number of people per language (born 1800–1950) with
+ articles in at least 26 Wikipedia language editions as a
+ function of their language’s eigenvector centrality.
+ \item The bottom row shows the number of people per language (born
+ 1800–1950) listed in \emph{Human Accomplishment} (a book by
+ Charles Murray) as a function of their language’s eigenvector
+ centrality.
+ \end{itemize}}
\end{frame}
+
\subsection{Community and Organization}
\begin{frame}
\begin{frame}
\frametitle{Community and organization: Warncke-Wang et al.}
+
+ \larger \larger
+ \e{Perfect Alignment Hypothesis (PAH)}: There is an exact match
+ between the supply of high-quality content and the demand for it.
+
+ \bigskip
\includegraphics[width=\textwidth]{figures/warncke-english_confusion.pdf}
+
+ \note{\e{Quality}: Stub, Start, C, B, Good Article, A, Featured Article
+
+ \e{Popularity}: equivalently sized buckets}
\end{frame}
\begin{frame}
\frametitle{Community and organization: Warncke-Wang et al.}
+ Measure of the degree of misalignment can be used to build lists of
+ categories that are relatively \e{``overproduced''} and
+ \e{``underproduced''}:
+
+ \bigskip
+
\includegraphics[width=\textwidth]{figures/warncke-english_overunder.pdf}
\end{frame}
-% \begin{frame}
-% \frametitle{Wikipedia Viewership and Flu Prediction: Results}
+\begin{frame}
+ \centertext{6em}{Automation in Wikipedia}
+
+ \note{Tilman
+
+ Starting to see more practical applications of AI methods to editing.
+
+ Bots have been writing Wikipedia articles ever since back in 2002,
+ User:Rambot covered US municipalities from US census data.
+
+ Picked these two related papers for their somewhat unusual approach}
+\end{frame}
+
+
+\begin{frame}
+ \frametitle{Automation in Wikipedia}
+
+ \larger \larger
+ Banerjee et al., \e{Playscript Classification and Automatic Wikipedia
+ Play Articles Generation}.
+ 2014 22nd International Conference on Pattern Recognition (ICPR).
+ pp. 3630–3635.
+ \href{http://dx.doi.org/10.1109/ICPR.2014.624}
+ {DOI:10.1109/ICPR.2014.624}
+ \href{http://www.cse.unt.edu/~ccaragea/papers/icpr14.pdf}{Author's copy}
+
+\end{frame}
+
+
+\begin{frame}
+
+\frametitle{Automation in Wikipedia: Bot-written theatre play articles}
+
+ \begin{itemize}
+ \larger \larger \larger
+ \item Bot searches for playscripts and related documents on the web
+ \bigskip
+ \item Extract key information from them, e.g.
+ \begin{itemize} \larger
+ \item The play's main characters
+ \item Relevant sentences from online synopses of the play
+ \item Mentions in Google Books and Google News (as evidence that
+ the play satisfies Wikipedia's notability criteria)
+
+ \end{itemize}
+
+ \item Some heuristics to exclude non-encyclopedic sentences, e.g.
+ first person statements
+
+ \end{itemize}
+
+ \note{Tilman
+
+ NB: Most article creation bots work from well-defined databases
+ (e.g. species, census data, geographical databases).
+
+ This bots finds article topics and online references itself,
+ using an elaborate classifier algorithm to distinguish scripts
+ from non-scripts.}
+\end{frame}
+
+\begin{frame}
+\frametitle{Automation in Wikipedia: Bot-written theatre play articles}
-% \centering
-% \includegraphics[width=\textwidth]{figures/flu.png}
+ \includegraphics[width=0.3\textwidth]{figures/Fourteen_submission.png}
+ \begin{itemize}
+ \larger \larger \larger
+ \item 15 articles submitted at Articles for Creation. Two accepted
+ by Wikipedia editors. One of them without major changes.
+
+
+ \end{itemize}
-% \note{\begin{itemize}
-% \larger \larger
+ \note{Tilman
+
+ Editors were unaware the articles had been automatically generated.
+
+
+ Related paper by some of the same authors:
+
+ Banerjee et al., \e{WikiKreator: Improving Wikipedia Stubs
+ Automatically}. \href{https://siddbanpsu.github.io/publications/acl2015-banerjee-preprint.pdf}
+ {preprint}, accepted paper at ACL2015
+
+ Elaborate classifier method to find suitable web resources for
+ expanding stubs - but copying sentences wholesale from these into
+ articles landed the bot (User:MightyPepper) in a \href{https://en.wikipedia.org/wiki/Wikipedia:Contributor_copyright_investigations/Archive#2015}{contributor copyright investigation}\ldots
+ }
+\end{frame}
-% \item Wikipedia better than Google at predicting peak flu weeks.
-% \item Wikipedia better at predicting relative influenza rates.
-% \end{itemize}}
-% \end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item 2: Articles about women tend to be less centrally connected in
the network of articles than articles about men (Smurfette!)
\item 3: (\e{viz}) Content of articles about women uses different words
- than those about men. Much igher incidence of language related to
+ than those about men. Much higher incidence of language related to
family, gender, and relationships.
\end{itemize}
}
\end{document}
+
+% LocalWords: xshift yshift makopurple Tilman wikipedia Scopus Hu
+% LocalWords: Ronen Gonçalves Vespignani Hidalgo al Galician Ranjan
+% LocalWords: eigenvector Warncke Terveen Hecht underproduced NEJM
+% LocalWords: Hwang Engl doi Kräenbring WebMD WikiProject Mohsen
+% LocalWords: Jadidi Markus Strohmaier Wikipedias WPs Smurfette
+% LocalWords: Barnhisel Rapchak WikiEd Mesgari Mostafa Okoli Chitu
+% LocalWords: Mehdi Mohamad Årup Lanamäki Arto Miquel Ribé OpenSym
+% LocalWords: WikiPapers