From 7cd2a85d1c8bc2d5189e0ad63b44756dc3b24b3d Mon Sep 17 00:00:00 2001 From: "T. Bayer" Date: Fri, 17 Jul 2015 08:53:18 -0700 Subject: [PATCH] move one Automation piece to notes, as the deck is already on the long side --- 20150717-wikimania_research.tex | 97 +++++++++++++++++++++++++++++---- 1 file changed, 87 insertions(+), 10 deletions(-) diff --git a/20150717-wikimania_research.tex b/20150717-wikimania_research.tex index cc75b30..8150bad 100644 --- a/20150717-wikimania_research.tex +++ b/20150717-wikimania_research.tex @@ -548,20 +548,97 @@ -% \begin{frame} -% \frametitle{Wikipedia Viewership and Flu Prediction: Results} +\begin{frame} + \centertext{6em}{Automation in Wikipedia} + + \note{Tilman + + Starting to see more practical applications of AI methods to editing. + + Bots have been writing Wikipedia articles ever since back in 2002, + User:Rambot covered US municipalities from US census data. + + Picked these two related papers for their somewhat unusual approach} +\end{frame} + + +\begin{frame} + \frametitle{Automation in Wikipedia} + + \larger \larger + Banerjee et al., \e{Playscript Classification and Automatic Wikipedia + Play Articles Generation}. + 2014 22nd International Conference on Pattern Recognition (ICPR). + pp. 3630–3635. + \href{http://dx.doi.org/10.1109/ICPR.2014.624} + {DOI:10.1109/ICPR.2014.624} + \href{http://www.cse.unt.edu/~ccaragea/papers/icpr14.pdf}{Author's copy} + +\end{frame} + + +\begin{frame} + +\frametitle{Automation in Wikipedia: Bot-written theatre play articles} + + \begin{itemize} + \larger \larger \larger + \item Bot searches for playscripts and related documents on the web + \bigskip + \item Extract key information from them, e.g. + \begin{itemize} \larger + \item The play's main characters + \item Relevant sentences from online synopses of the play + \item Mentions in Google Books and Google News (as evidence that + the play satisfies Wikipedia's notability criteria) + + \end{itemize} + + \item Some heuristics to exclude non-encyclopedic sentences, e.g. + first person statements + + \end{itemize} + + \note{Tilman + + NB: Most article creation bots work from well-defined databases + (e.g. species, census data, geographical databases). + + This bots finds article topics and online references itself, + using an elaborate classifier algorithm to distinguish scripts + from non-scripts.} +\end{frame} + +\begin{frame} +\frametitle{Automation in Wikipedia: Bot-written theatre play articles} -% \centering -% \includegraphics[width=\textwidth]{figures/flu.png} + \includegraphics[width=0.3\textwidth]{figures/Fourteen_submission.png} + \begin{itemize} + \larger \larger \larger + \item 15 articles submitted at Articles for Creation. Two accepted + by Wikipedia editors. One of them without major changes. + + + \end{itemize} -% \note{\begin{itemize} -% \larger \larger + \note{Tilman + + Editors were unaware the articles had been automatically generated. + + + Related paper by some of the same authors: + + Banerjee et al., \e{WikiKreator: Improving Wikipedia Stubs + Automatically}. \href{https://siddbanpsu.github.io/publications/acl2015-banerjee-preprint.pdf} + {preprint}, accepted paper at ACL2015 + + Elaborate classifier method to find suitable web resources for + expanding stubs - but copying sentences wholesale from these into + articles landed the bot (User:MightyPepper) in a \href{https://en.wikipedia.org/wiki/Wikipedia:Contributor_copyright_investigations/Archive#2015}{contributor copyright investigation}\ldots + } +\end{frame} -% \item Wikipedia better than Google at predicting peak flu weeks. -% \item Wikipedia better at predicting relative influenza rates. -% \end{itemize}} -% \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -- 2.39.5