#!/usr/bin/env Rscript --vanilla # # Generates plots from CAF profiler output. # # The script takes a profiler log file on standard input and generates one plot # according to the command line options given. In general, the script generates # plots at the granularity of worker threads as well as individual actors. suppressPackageStartupMessages(library(colorspace)) suppressPackageStartupMessages(library(ggplot2)) suppressPackageStartupMessages(library(plyr)) suppressPackageStartupMessages(library(dplyr)) suppressPackageStartupMessages(library(reshape2)) suppressPackageStartupMessages(library(grid)) suppressPackageStartupMessages(library(scales)) # pretty_breaks suppressPackageStartupMessages(library(optparse)) # ---------------------------------------------------------------------------- # Helper Functions # ---------------------------------------------------------------------------- # Generates a diverging color palette of a given size. # - http://tools.medialab.sciences-po.fr/iwanthue # - https://gist.github.com/johnbaums/45b49da5e260a9fc1cd7 iwanthue <- function(n, hmin=0, hmax=360, cmin=0, cmax=180, lmin=0, lmax=100) { stopifnot(hmin >= 0, cmin >= 0, lmin >= 0, hmax <= 360, cmax <= 180, lmax <= 100, hmin <= hmax, cmin <= cmax, lmin <= lmax, n > 0) lab <- LAB(as.matrix(expand.grid(seq(0, 100, 1), seq(-100, 100, 5), seq(-110, 100, 5)))) if (any((hmin != 0 || cmin != 0 || lmin != 0 || hmax != 360 || cmax != 180 || lmax != 100))) { hcl <- as(lab, 'polarLUV') hcl_coords <- coords(hcl) hcl <- hcl[which(hcl_coords[, 'H'] <= hmax & hcl_coords[, 'H'] >= hmin & hcl_coords[, 'C'] <= cmax & hcl_coords[, 'C'] >= cmin & hcl_coords[, 'L'] <= lmax & hcl_coords[, 'L'] >= lmin), ] #hcl <- hcl[-which(is.na(coords(hcl)[, 2]))] lab <- as(hcl, 'LAB') } lab <- lab[which(!is.na(hex(lab))), ] clus <- kmeans(coords(lab), n, iter.max=100, algorithm="MacQueen") hex(LAB(clus$centers)) } # Generates a palette with equally spaced hues around the color wheel. color_hue <- function(n, l=65) { hues <- seq(15, 375, length=n+1) hcl(h=hues, l=l, c=100)[1:n] } # Generates labels microsecond ticks. make_usec_labels <- function(ticks=10^(0:9), sep="") { fuse <- function(x, u) paste(x, u, sep=sep) unitize <- function(us) { if (us < 1e3) return(fuse(round(us), "us")) else if (us < 1e6) return(fuse(round(us / 1e3, 1), "ms")) else if (us < 60 * 1e6) return(fuse(round(us / 1e6, 1), "s")) else if (us < 60 * 60 * 1e6) return(fuse(round(us / (60 * 1e6), 1), "m")) else return(fuse(round(us / (60 * 60 * 1e6), 1), "h")) } sapply(ticks, unitize) } # FIXME: crappy hack to get a log transformation that keeps 0s as 0s instead of # setting them to -Inf. This only "makes sense" because we have almost no # values in [0,1]. The only reason why we need such a thing is to get prettier # scatterplots where the points don't hang directly on top of the axes lines. # Another annoying artifact of this hack is that annotation logticks # between 0 and the first order of magnitude are wrong. # Possible solution: coord_trans() log_magic_trans <- function(base=10) { magic <- 1.000000042 trans <- function(x) { stopifnot(! magic %in% x) r <- x r[r == 1] <- magic r <- log(r, base) r[is.infinite(r)] <- 0 r } inv <- function(x) { r <- x r[r == base^magic] <- 1 r <- base^x r[r == 1] <- 0 r } trans_new(paste0("log-magic-", format(base)), trans, inv, log_breaks(base=base), domain=c(0, Inf)) } # Dynamic time scale that flips to logarithmic if our data is more than two # orders of magnitude apart. scale_time <- function(.data, fun) { trans <- NULL breaks <- NULL if (log10(diff(range(.data))) > 2) { #trans <- log10_trans() #breaks <- 10^(0:10) trans <- log_magic_trans() breaks <- c(0, 10^(1:10)) } else { trans <- identity_trans() breaks <- pretty_breaks(10)(.data) } fun(breaks=breaks, labels=make_usec_labels(breaks), trans=trans) } scale_x_time <- function(.data) scale_time(.data, fun=scale_x_continuous) scale_y_time <- function(.data) scale_time(.data, fun=scale_y_continuous) # Adds a geom_point to an existing plot for a profile. If the the profile has # labels, the function adds one geom_point per label, with (1) layers sorted by # decreasing number of points and (2) alpha scaled inversely proportional to # the number of points. add_points <- function(p, .data) { r <- p # Add one layer per label, sorted by per-label point frequency to avoid # hiding smaller point groups behind big point clouds. f <- table(.data$label) o <- names(rev(sort(f))) # order of layers gp <- function(x) { force(x); geom_point(data=subset(.data, label == x)) } r <- Reduce("+", lapply(o, gp), r) # Give each layer its own color and assign it it custom alpha value # inversely proportional to the point frequency. n <- length(f) stopifnot(n > 0) # Scales a vector to the interval [amin, 1] inversely proportional to the # point values. make_alpha <- function(x, amin=0.4) { stopifnot(amin > 0, amin < 1) 1 - log(x) / (max(log(x)) + amin * max(log(x))) } r <- r + aes(color=label, alpha=label) + scale_color_manual(name="ID", values=as.vector(iwanthue(n))) + guides(color=guide_legend(override.aes=list(size=4))) # Turn each point into a shape and slightly increase alpha. if (n > 25) { write("** ignoring shapification: more than 25 unique labels", stderr()) r <- r + scale_alpha_manual(values=as.list(make_alpha(f)), guide='none') } else { r <- r + aes(shape=label) + scale_shape_manual(name="ID", values=rev(order(f))) + scale_alpha_manual(values=as.list(make_alpha(f, 0.5)), guide='none') } r } # ---------------------------------------------------------------------------- # Plot Functions # ---------------------------------------------------------------------------- # Plots system and user CPU utilization over time. plot_utilization_time <- function(.data, color.sys="red", color.usr="blue") { ggplot(.data) + aes(x=clock) + geom_point(aes(y=usr/time), shape=4, color=color.usr) + geom_point(aes(y=sys/time), shape=1, color=color.sys) + geom_line(aes(y=usr/time), color=color.usr) + geom_line(aes(y=sys/time), color=color.sys) + scale_y_continuous(breaks=seq(0,1,length=5)) + labs(x="Time", y="CPU utilization") + theme(axis.ticks=element_blank(), axis.text.x=element_blank()) + facet_wrap(~ id, ncol=4) } # Plots the total CPU utilization in a scatterplot, where the axes denote user # and system CPU utilization. The point radius is scaled logarithmically to the # runtime. plot_utilization_scatter <- function(.data) { max.xy <- with(.data, max(usr/time, sys/time)) p <- ggplot(.data, aes(x=usr/time, y=sys/time, size=time)) + scale_size(name="ID", range=c(2, 10), guide='none') + xlim(0, max.xy) + ylim(0, max.xy) + labs(x="User CPU utilization", y="System CPU utilization") add_points(p, .data) } # Plots the CPU time in a scatterplot, where axes denote absolute runtime for # CPU time in user and system mode. The point radius is scaled to utilization. plot_time_scatter <- function(.data) { p <- ggplot(.data, aes(x=usr, y=sys, size=(usr+sys)/time)) + scale_size(name="Utilization", range=c(1,6)) + #scale_size_area(name="Utilization", max_size=6) + scale_x_time(.data$usr) + scale_y_time(.data$sys) + # TODO: re-enable when fixing the magic transformation. # annotation_logticks(color="grey") + xlab("User CPU time") + ylab("System CPU time") add_points(p, .data) } # Plots CPU time as boxplot per label. plot_time_boxplot <- function(.data) { totals <- .data %>% group_by(label) %>% summarize(sort=sum(usr+sys)) lvls <- rev(totals[order(totals$sort), ]$label) xs <- .data %>% group_by(label) %>% mutate(util=sum(cpu)/sum(time)) # Do not drop zeros on log transformation. xs$cpu <- mapvalues(xs$cpu, 0, 1, warn_missing=FALSE) xs$label <- factor(xs$label, levels=lvls) ggplot(xs) + aes(x=label, y=cpu, fill=util) + # TODO: re-enable when fixing the magic transformation. # annotation_logticks(sides="lr", color="grey") + geom_boxplot(outlier.colour="grey") + labs(x="ID", y="CPU time") + scale_y_time(xs$cpu) + scale_fill_gradient(name="Utilization", low="red", high="green", limits=c(0, 1)) + theme(axis.text.x=element_text(angle=90, vjust=.5, hjust=1)) } # Plots CPU utilization as boxplot per label. plot_utilization_boxplot <- function(.data) { totals <- .data %>% group_by(label) %>% summarize(sort=sum(cpu)) lvls <- rev(totals[order(totals$sort), ]$label) xs <- .data %>% group_by(label) %>% mutate(dom=sum(usr-sys)/sum(usr+sys)) xs$label <- factor(xs$label, levels=lvls) ggplot(xs) + aes(x=label, y=util, fill=dom) + geom_boxplot() + labs(x="ID", y="CPU utilization") + scale_fill_gradient2(name="Domination", low="red", mid="white", high="green", breaks=c(-.5, .5), labels=c("System", "User"), limits=c(-1, 1)) + theme(axis.text.x=element_text(angle=90, vjust=.5, hjust=1)) } # Plots the total runtime where each bar consists of system (bottom) and user # (top) CPU time. plot_time_barplot <- function(.data) { sums <- .data %>% group_by(label) %>% summarize(usr=sum(usr), sys=sum(sys)) molten <- melt(sums, .(label)) molten <- molten[order(molten$variable, decreasing=TRUE),] # sys at bottom lvls <- rev(sums[order(sums$usr + sums$sys), ]$label) molten$label <- factor(molten$label, levels=lvls) # highest bar on the left ticks <- pretty_breaks(10)(sums$usr + sums$sys) ggplot(molten) + aes(x=label, y=value, fill=variable) + geom_bar(stat="identity") + xlab("ID") + scale_y_continuous(name="CPU time", breaks=ticks, labels=make_usec_labels(ticks)) + scale_fill_manual(name="CPU time", values=rev(color_hue(2)), labels=c("User", "System")) + theme(axis.text.x=element_text(angle=90, vjust=.5, hjust=1), legend.justification=c(1,1), legend.position=c(1,1)) } # ---------------------------------------------------------------------------- # Command Line Parsing # ---------------------------------------------------------------------------- option_list <- list( make_option(c("-a", "--actors"), action="store_true", default=F, help="generate plots involving actors"), make_option(c("-w", "--workers"), action="store_true", default=F, help="generate plots involving workers"), make_option(c("-l", "--labels"), default=NULL, help="two-column file mapping IDs to labels"), make_option(c("-f", "--font-size"), type="integer", default=12, metavar="number", help="font base size [%default]"), make_option(c("-s", "--squeeze"), action="store_true", default=F, help="make plot edages as small as possible [%default]"), make_option(c("-o", "--output"), default="png", help="the image format of the output [%default]"), make_option(c("-r", "--read"), default="stdin", help="read CAF profile from file [-]"), make_option(c("-h", "--help"), action="store_true", default=F, help="display this help and exit") ) opt <- parse_args(OptionParser(option_list=option_list, add_help_option=F)) # If neither --actors nor --workers given, set 'em both. if (! opt$actors && ! opt$workers) { opt$actors <- TRUE opt$workers <- TRUE } # ---------------------------------------------------------------------------- # Plot Generation # ---------------------------------------------------------------------------- # Setup theme. options(scipen=1000) theme_set(theme_bw(base_size=opt$`font-size`)) theme_update(legend.key=element_rect(colour="white")) if (opt$squeeze) theme_update(legend.key.width=unit(3, "lines"), plot.margin=unit(rep(0, 4), "lines")) record <- function(name, fun, ...) { filename <- paste("plot", name, sep="-") filename <- paste(filename, opt$output, sep=".") write(paste("-- generating", filename), stderr()) ggsave(filename, fun(...), height=10, width=10) } make_zero <- function(x) mapvalues(x, NaN, 0, warn_missing=FALSE) make_profile <- function(filename) { read.table(filename, header=T) %>% filter(time > 0) %>% group_by(id, type) %>% arrange(time) %>% mutate(cpu=usr+sys, util=cpu/time, dom=make_zero((usr-sys)/cpu)) } make_workers <- function(prof) { x <- prof %>% filter(type == "worker") x$label <- factor(x$id) x } make_actors <- function(prof, labels=NULL) { x <- prof %>% filter(type == "actor") if (is.null(labels)) { x$label <- factor(x$id) } else { ls <- read.table(labels, col.names=c("id", "label")) x <- left_join(x, ls, "id") x$label <- droplevels(x$label) x$label <- addNA(x$label) levels(x$label) <- mapvalues(levels(x$label), NA, "OTHER") } x } summarize.prof <- function(x) { summarize(x, usr=sum(usr), sys=sum(sys), cpu=sum(cpu), time=sum(time), util=sum(cpu)/sum(time), dom=make_zero(sum(usr-sys)/sum(cpu))) } prof <- make_profile(file(opt$read)) if (opt$workers) { workers <- make_workers(prof) workers.by_id <- workers %>% group_by(id, label) %>% summarize.prof record("worker-time-bar", plot_time_barplot, workers) record("worker-time-facets", plot_utilization_time, workers) record("worker-time-scatter", plot_time_scatter, workers) record("worker-time-scatter-id", plot_time_scatter, workers.by_id) record("worker-util-box", plot_utilization_boxplot, workers) record("worker-util-scatter", plot_utilization_scatter, workers) record("worker-util-scatter-id", plot_utilization_scatter, workers.by_id) } if (opt$actors) { actors <- make_actors(prof, opt$labels) actors.by_id <- actors %>% group_by(id, label) %>% summarize.prof actors.by_label <- actors.by_id %>% group_by(label) %>% summarize.prof record("actor-time-bar", plot_time_barplot, actors.by_label) record("actor-time-scatter", plot_time_scatter, actors) record("actor-time-scatter-id", plot_time_scatter, actors.by_id) record("actor-time-scatter-label", plot_time_scatter, actors.by_label) record("actor-time-box", plot_time_boxplot, actors) record("actor-time-box-id", plot_time_boxplot, actors.by_id) record("actor-util-box", plot_utilization_boxplot, actors) record("actor-util-box-id", plot_utilization_boxplot, actors.by_id) record("actor-util-scatter-id", plot_utilization_scatter, actors.by_id) record("actor-util-scatter-label", plot_utilization_scatter, actors.by_label) }