imported the talk given at wikimania
[state_of_wikimedia_research_2014] / figures / cite_graph.R
1 # the last line is projected based on citations to the end of october
2 # (almost certainly conservative)
3 library(ggplot2)
4
5 d <- read.csv("wikipedia_citations.txt",header=F)
6 colnames(d) <- c("year", "citations")
7
8 d <- d[1:(dim(d)[1]-1),]
9
10 # print the total number of citations
11 sum(d$citations)
12
13 # generate and print a graph
14 # p <- qplot(year, citations, data=d) +
15 #  geom_line(colour="blue") + geom_point(colour="blue")
16
17 p <- qplot(factor(year), citations, data=d, geom="bar") + geom_bar(stat="identity", fill=I("darkblue"))
18 p <- p + scale_x_discrete("Year") + scale_y_continuous("Number of Papers")
19
20 pdf("citations_by_year.pdf", width=7.5, height=5.3)
21 print(p)
22 dev.off()
23
24 ## data from dario
25 ##########################################################3
26
27 # import data from dario
28 d <- read.csv("Wikipedia publications - Data.csv")
29
30 # clean up the dates
31 colnames(d)[1] <- "date"
32 d <- d[,c(-9,-10)]
33 d <- d[!d$date == "2013 to date",]
34 d$date <- as.factor(d$date)
35
36 library(reshape)
37 qplot(date, value, data=melt(d), group=variable, geom="line") +
38   aes(colour=variable) + scale_y_log10()

Benjamin Mako Hill || Want to submit a patch?