# load up libraries and data ####################################################################### library(ggplot2) library(reshape2) library(parallel) d <- read.delim("gitdata_1week.tsv", stringsAsFactors=FALSE) d$url <- NULL d$date <- gsub("T", " ", d$date) d$date <- as.POSIXct(d$date) # crop the event off the end d$type <- gsub("Event$", "", d$type) rev(sort(table(d$type))) head(rev(sort(table(d$name)))) head(rev(sort(table(d$actor)))) d$min <- cut(d$date, breaks="min") d$hour <- cut(d$date, breaks="hour") # generate graphs ###################################################################### pdf("github_graphs.pdf", width=10, height=7) # graph all together grid.tmp <- as.data.frame(table(d$hour)) colnames(grid.tmp) <- c("date", "freq") grid.tmp$date <- as.POSIXct(grid.tmp$date) qplot(date, freq, data=grid.tmp, geom="line") # just types of events grid.tmp <- melt(lapply(tapply(d$type, d$hour, table), as.list), L2~L1) colnames(grid.tmp) <- c("value", "event", "date") grid.tmp$date <- as.POSIXct(grid.tmp$date) ggplot(data=grid.tmp) + aes(x=date, y=value, group=event, color=event) + geom_line() ggplot(data=grid.tmp) + aes(x=date, y=value, group=event, color=event) + geom_bar(stat="identity") ggplot(data=grid.tmp) + aes(x=date, y=value, group=event, color=event, size=value) + geom_line() + facet_grid(event~., scale="free_y") # create first differences build.phase.diagram.dataset <- function (d) { grid.tmp <- as.data.frame(table(d$hour)) colnames(grid.tmp) <- c("date", "freq") grid.tmp$date <- as.POSIXct(grid.tmp$date) grid.tmp$freq2 <- c(NA, grid.tmp$freq[1:(length(grid.tmp$freq)-1)]) grid.tmp$diff <- grid.tmp$freq2 - grid.tmp$freq grid.tmp <- grid.tmp[2:(dim(grid.tmp)[1]-1),] grid.tmp$hour.of.week <- as.numeric(as.factor(grid.tmp$date)) grid.tmp$hour <- as.numeric(as.factor(grid.tmp$date)) %% 24 return(grid.tmp) } phase.diagram <- function (subset) { grid.tmp <- build.phase.diagram.dataset(d[d$type == subset,]) ggplot(grid.tmp) + aes(x=diff, y=freq2, size=hour, colour=hour) + geom_path() + ggtitle(paste("Phase Diagram:", subset)) } grid.tmp <- build.phase.diagram.dataset(d) ggplot(grid.tmp) + aes(x=diff, y=freq2, size=hour, colour=hour) + geom_path() + ggtitle("Phase Diagram: All Activity") phase.diagram("Push") phase.diagram("Watch") phase.diagram("Fork") phase.diagram("PullRequest") dev.off()