fixed errors (x-png > x.png | missing 2nd png(~~~) | non-hardcoded input)
[noahs_yelp_analysis] / analysis.R
1 cat("\n","Enter textfile name","\n") # prompt
2 y<-readLines(n=1) 
3
4 x <- paste(y,".txt", sep="")
5 d <- read.csv(x)[,c(1,2,3)]
6
7 colnames(d) <- c("rating", "day.of.week", "day.in.review")
8 d$day.in.review <- d$day.in.review == "True"
9
10 d$day.of.week <- as.character(d$day.of.week) 
11 d$day.of.week[d$day.of.week == "0"] <- "Sunday"
12 d$day.of.week[d$day.of.week == "1"] <- "Monday"
13 d$day.of.week[d$day.of.week == "2"] <- "Tuesday"
14 d$day.of.week[d$day.of.week == "3"] <- "Wednesday"
15 d$day.of.week[d$day.of.week == "4"] <- "Thursday"
16 d$day.of.week[d$day.of.week == "5"] <- "Friday"
17 d$day.of.week[d$day.of.week == "6"] <- "Saturday"
18 d$day.of.week <- factor(d$day.of.week, levels = c("Monday","Tuesday","Wednesday",
19                                          "Thursday","Friday","Saturday","Sunday"))
20
21 d$day.of.week <- as.factor(d$day.of.week) 
22
23 #t <- table(d$day.of.week, as.factor(d$rating))
24 #t2 <- prop.table(t,2)
25 chisq.test(d$day.of.week, as.factor(d$rating))
26
27 # compute and add a set of weights so that we can graph proportions
28 # using the same data.frame
29 w <- data.frame(1 / (table(d$day.of.week)/sum(table(d$day.of.week))))
30 colnames(w) <- c("day.of.week", "plot.weight")
31 d <- merge(d, w, by="day.of.week", all.x=TRUE, all.y=FALSE)
32
33 # draw two graphs
34 library(ggplot2)
35
36 # generate the unscaled count graph
37 filename.count <- paste(y,"-count.png",sep="")
38
39 png(filename.count, width=8, height=6, unit="in", res=200)
40 p <- qplot(day.of.week, data=d, fill=as.factor(rating), geom="bar", main=y)
41 p <- p + scale_x_discrete("Day of Week") +
42   scale_y_continuous("yelp # star review") +
43   scale_fill_discrete("", breaks=as.character(rev(1:5)) )
44 print(p)
45 dev.off()
46
47 # generate the scaled proportion graph
48 filename.prop <- paste(y,"-proportion.png",sep="")
49
50 max.value <- tapply(d$plot.weight, d$day.of.week, sum)[1]
51 png(filename.prop, width=8,height=6, unit="in", res=200)
52 p <- qplot(day.of.week, data=d, fill=as.factor(rating), geom="bar",
53            weight=plot.weight, main=y)
54 p <- p + scale_x_discrete("Day of Week") +
55   scale_y_continuous("percentage of reviews",
56                      breaks=(0:4 * max.value/4), labels=seq(0,100, 25)) +
57   scale_fill_discrete("", breaks=as.character(rev(1:5)))
58 print(p)
59 dev.off()
60
61
62
63

Benjamin Mako Hill || Want to submit a patch?