1 # load up libraries and data
2 #######################################################################
7 d <- read.delim("gitdata_1week.tsv", stringsAsFactors=FALSE)
10 d$date <- gsub("T", " ", d$date)
11 d$date <- as.POSIXct(d$date)
13 # crop the event off the end
14 d$type <- gsub("Event$", "", d$type)
16 rev(sort(table(d$type)))
17 head(rev(sort(table(d$name))))
18 head(rev(sort(table(d$actor))))
20 d$min <- cut(d$date, breaks="min")
21 d$hour <- cut(d$date, breaks="hour")
24 ######################################################################
26 pdf("github_graphs.pdf", width=10, height=7)
29 grid.tmp <- as.data.frame(table(d$hour))
30 colnames(grid.tmp) <- c("date", "freq")
31 grid.tmp$date <- as.POSIXct(grid.tmp$date)
33 qplot(date, freq, data=grid.tmp, geom="line")
35 # just types of events
36 grid.tmp <- melt(lapply(tapply(d$type, d$hour, table), as.list), L2~L1)
37 colnames(grid.tmp) <- c("value", "event", "date")
38 grid.tmp$date <- as.POSIXct(grid.tmp$date)
40 ggplot(data=grid.tmp) +
41 aes(x=date, y=value, group=event, color=event) + geom_line()
43 ggplot(data=grid.tmp) +
44 aes(x=date, y=value, group=event, color=event) + geom_bar(stat="identity")
46 ggplot(data=grid.tmp) +
47 aes(x=date, y=value, group=event, color=event, size=value) + geom_line() +
48 facet_grid(event~., scale="free_y")
50 # create first differences
51 build.phase.diagram.dataset <- function (d) {
52 grid.tmp <- as.data.frame(table(d$hour))
53 colnames(grid.tmp) <- c("date", "freq")
54 grid.tmp$date <- as.POSIXct(grid.tmp$date)
55 grid.tmp$freq2 <- c(NA, grid.tmp$freq[1:(length(grid.tmp$freq)-1)])
56 grid.tmp$diff <- grid.tmp$freq2 - grid.tmp$freq
57 grid.tmp <- grid.tmp[2:(dim(grid.tmp)[1]-1),]
58 grid.tmp$hour.of.week <- as.numeric(as.factor(grid.tmp$date))
59 grid.tmp$hour <- as.numeric(as.factor(grid.tmp$date)) %% 24
63 phase.diagram <- function (subset) {
64 grid.tmp <- build.phase.diagram.dataset(d[d$type == subset,])
65 ggplot(grid.tmp) + aes(x=diff, y=freq2, size=hour, colour=hour) +
66 geom_path() + ggtitle(paste("Phase Diagram:", subset))
70 grid.tmp <- build.phase.diagram.dataset(d)
71 ggplot(grid.tmp) + aes(x=diff, y=freq2, size=hour, colour=hour) +
72 geom_path() + ggtitle("Phase Diagram: All Activity")
75 phase.diagram("Watch")
77 phase.diagram("PullRequest")