finished version of talk for presentation
[state_of_wikimedia_research_2013] / figures / cite_graph.R
1 # the last line is projected based on citations to the end of october
2 # (almost certainly conservative)
3 library(ggplot2)
4
5 d <- read.csv("wikipedia_citations.txt",header=F)
6 colnames(d) <- c("year", "citations")
7
8 # print the total number of citations
9 sum(d$citations)
10
11 # generate and print a graph
12 # p <- qplot(year, citations, data=d) +
13 #  geom_line(colour="blue") + geom_point(colour="blue")
14
15 p <- qplot(factor(year), citations, data=d, geom="bar", fill=I("darkblue"))
16 p <- p + scale_x_discrete("Year") + scale_y_continuous("Number of Papers")
17
18 pdf("citations_by_year.pdf", width=7.5, height=5.3)
19 print(p)
20 dev.off()
21
22 ## data from dario
23 ##########################################################3
24
25 # import data from dario
26 d <- read.csv("Wikipedia publications - Data.csv")
27
28 # clean up the dates
29 colnames(d)[1] <- "date"
30 d <- d[,c(-9,-10)]
31 d <- d[!d$date == "2013 to date",]
32 d$date <- as.factor(d$date)
33
34 library(reshape)
35 qplot(date, value, data=melt(d), group=variable, geom="line") +
36   aes(colour=variable) + scale_y_log10()

Benjamin Mako Hill || Want to submit a patch?