-cat("\n","Enter textfile name","\n") # prompt
+qcat("\n","Enter textfile name","\n") # prompt
y<-readLines(n=1)
-x <- paste("lord-hobo-cambridge.txt",sep="")
+
+x <- "lord-hobo-cambridge.txt"
d <- read.csv(x)[,c(1,2,3)]
colnames(d) <- c("rating", "day.of.week", "day.in.review")
d$day.of.week[d$day.of.week == "4"] <- "Thursday"
d$day.of.week[d$day.of.week == "5"] <- "Friday"
d$day.of.week[d$day.of.week == "6"] <- "Saturday"
-d$day.of.week <- factor(d$day.of.week, levels = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"))
+d$day.of.week <- factor(d$day.of.week, levels = c("Monday","Tuesday","Wednesday",
+ "Thursday","Friday","Saturday","Sunday"))
d$day.of.week <- as.factor(d$day.of.week)
t2 <- prop.table(t,2)
chisq.test(d$day.of.week, as.factor(d$rating))
-# draw a graph
+# compute and add a set of weights so that we can graph proportions
+# using the same data.frame
+w <- data.frame(1 / (table(d$day.of.week)/sum(table(d$day.of.week))))
+colnames(w) <- c("day.of.week", "plot.weight")
+d <- merge(d, w, by="day.of.week", all.x=TRUE, all.y=FALSE)
+
+# draw two graphs
library(ggplot2)
-v <- paste(y,"-baaaarplot.png",sep="")
-png(v, width=8, height=6, unit="in", res=200)
-p <- qplot(day.of.week, data=d, fill=as.factor(rating), main=y)
-#p <- qplot(levels(d$day.of.week),as.factor(names(d$rating)), t, data=data.frame(t,levels(d$day.of.week)))
-p <- p + scale_x_discrete("Day of Week") + scale_y_continuous("yelp # star review") + scale_fill_discrete("")
+
+# generate the unscaled count graph
+filename.count <- paste(y,"-count-png",sep="")
+
+png(filename.count, width=8, height=6, unit="in", res=200)
+p <- qplot(day.of.week, data=d, fill=as.factor(rating), geom="bar", main=y)
+p <- p + scale_x_discrete("Day of Week") +
+ scale_y_continuous("yelp # star review") +
+ scale_fill_discrete("", breaks=as.character(rev(1:5)) )
+print(p)
+dev.off()
+
+# generate the scaled proportion graph
+filename.prop <- paste(y,"-proportion-png",sep="")
+
+max.value <- tapply(d$plot.weight, d$day.of.week, sum)[1]
+
+p <- qplot(day.of.week, data=d, fill=as.factor(rating), geom="bar",
+ weight=plot.weight, main=y)
+p <- p + scale_x_discrete("Day of Week") +
+ scale_y_continuous("percentage of reviews",
+ breaks=(0:4 * max.value/4), labels=seq(0,100, 25)) +
+ scale_fill_discrete("", breaks=as.character(rev(1:5)))
print(p)
dev.off()
+