cat("\n","Enter textfile name","\n") # prompt y<-readLines(n=1) x <- paste(y,".txt", sep="") d <- read.csv(x)[,c(1,2,3)] colnames(d) <- c("rating", "day.of.week", "day.in.review") d$day.in.review <- d$day.in.review == "True" d$day.of.week <- as.character(d$day.of.week) d$day.of.week[d$day.of.week == "0"] <- "Sunday" d$day.of.week[d$day.of.week == "1"] <- "Monday" d$day.of.week[d$day.of.week == "2"] <- "Tuesday" d$day.of.week[d$day.of.week == "3"] <- "Wednesday" d$day.of.week[d$day.of.week == "4"] <- "Thursday" d$day.of.week[d$day.of.week == "5"] <- "Friday" d$day.of.week[d$day.of.week == "6"] <- "Saturday" d$day.of.week <- factor(d$day.of.week, levels = c("Monday","Tuesday","Wednesday", "Thursday","Friday","Saturday","Sunday")) d$day.of.week <- as.factor(d$day.of.week) #t <- table(d$day.of.week, as.factor(d$rating)) #t2 <- prop.table(t,2) chisq.test(d$day.of.week, as.factor(d$rating)) # compute and add a set of weights so that we can graph proportions # using the same data.frame w <- data.frame(1 / (table(d$day.of.week)/sum(table(d$day.of.week)))) colnames(w) <- c("day.of.week", "plot.weight") d <- merge(d, w, by="day.of.week", all.x=TRUE, all.y=FALSE) # draw two graphs library(ggplot2) # generate the unscaled count graph filename.count <- paste(y,"-count.png",sep="") png(filename.count, width=8, height=6, unit="in", res=200) p <- qplot(day.of.week, data=d, fill=as.factor(rating), geom="bar", main=y) p <- p + scale_x_discrete("Day of Week") + scale_y_continuous("yelp # star review") + scale_fill_discrete("", breaks=as.character(rev(1:5)) ) print(p) dev.off() # generate the scaled proportion graph filename.prop <- paste(y,"-proportion.png",sep="") max.value <- tapply(d$plot.weight, d$day.of.week, sum)[1] png(filename.prop, width=8,height=6, unit="in", res=200) p <- qplot(day.of.week, data=d, fill=as.factor(rating), geom="bar", weight=plot.weight, main=y) p <- p + scale_x_discrete("Day of Week") + scale_y_continuous("percentage of reviews", breaks=(0:4 * max.value/4), labels=seq(0,100, 25)) + scale_fill_discrete("", breaks=as.character(rev(1:5))) print(p) dev.off()