]> projects.mako.cc - state_of_wikimedia_research_2015/blobdiff - 20150717-wikimania_research.tex
move one Automation piece to notes, as the deck is already on the long side
[state_of_wikimedia_research_2015] / 20150717-wikimania_research.tex
index cc75b308c0b88087d3b8ecd2d6917efbc5c43e76..8150bad8b94902e7f9af0d0a471ed746dbd15244 100644 (file)
 
 
 
-% \begin{frame}
-%   \frametitle{Wikipedia Viewership and Flu Prediction: Results}
+\begin{frame}
+   \centertext{6em}{Automation in Wikipedia}
+
+   \note{Tilman
+   
+     Starting to see more practical applications of AI methods to editing.
+
+     Bots have been writing Wikipedia articles ever since back in 2002, 
+     User:Rambot covered US municipalities from US census data.
+     
+     Picked these two related papers for their somewhat unusual approach}
+\end{frame}
+
+
+\begin{frame}
+  \frametitle{Automation in Wikipedia}
+
+  \larger \larger
+  Banerjee et al., \e{Playscript Classification and Automatic Wikipedia 
+  Play Articles Generation}. 
+  2014 22nd International Conference on Pattern Recognition (ICPR). 
+  pp. 3630–3635. 
+  \href{http://dx.doi.org/10.1109/ICPR.2014.624}
+  {DOI:10.1109/ICPR.2014.624}
+  \href{http://www.cse.unt.edu/~ccaragea/papers/icpr14.pdf}{Author's copy}
+
+\end{frame}
+
+
+\begin{frame}
+
+\frametitle{Automation in Wikipedia: Bot-written theatre play articles}
+
+  \begin{itemize}
+   \larger \larger \larger
+   \item Bot searches for playscripts and related documents on the web
+     \bigskip
+   \item Extract key information from them, e.g.
+     \begin{itemize} \larger
+      \item The play's main characters
+      \item Relevant sentences from online synopses of the play
+      \item Mentions in Google Books and Google News (as evidence that 
+      the play satisfies Wikipedia's notability criteria)
+      
+      \end{itemize}
+      
+      \item Some heuristics to exclude non-encyclopedic sentences, e.g. 
+       first person statements
+      
+   \end{itemize}
+
+    \note{Tilman
+    
+          NB: Most article creation bots work from well-defined databases
+          (e.g. species, census data, geographical databases). 
+    
+       This bots finds article topics and online references itself, 
+       using an elaborate classifier algorithm to distinguish scripts 
+       from non-scripts.}
+\end{frame}
+
+\begin{frame}
+\frametitle{Automation in Wikipedia: Bot-written theatre play articles}
 
-%   \centering
-%   \includegraphics[width=\textwidth]{figures/flu.png}
+  \includegraphics[width=0.3\textwidth]{figures/Fourteen_submission.png}
+  \begin{itemize}
+   \larger \larger \larger
+   \item 15 articles submitted at Articles for Creation. Two accepted 
+   by Wikipedia editors. One of them without major changes.
+   
+      
+   \end{itemize}
 
-% \note{\begin{itemize}
-%   \larger \larger
+    \note{Tilman
+    
+    Editors were unaware the articles had been automatically generated.
+    
+    
+    Related paper by some of the same authors:
+  
+    Banerjee et al., \e{WikiKreator: Improving Wikipedia Stubs 
+    Automatically}. \href{https://siddbanpsu.github.io/publications/acl2015-banerjee-preprint.pdf}
+    {preprint}, accepted paper at ACL2015
+    
+    Elaborate classifier method to find suitable web resources for 
+    expanding stubs - but copying sentences wholesale from these into
+    articles landed the bot (User:MightyPepper) in a \href{https://en.wikipedia.org/wiki/Wikipedia:Contributor_copyright_investigations/Archive#2015}{contributor copyright investigation}\ldots
+    }
+\end{frame}
 
-%   \item Wikipedia better than Google at predicting peak flu weeks.
-%   \item Wikipedia better at predicting relative influenza rates.
-%   \end{itemize}}
 
-% \end{frame}
 
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Benjamin Mako Hill || Want to submit a patch?