diff --git a/metrics/report/README.md b/metrics/report/README.md new file mode 100644 index 00000000..0986db2d --- /dev/null +++ b/metrics/report/README.md @@ -0,0 +1,73 @@ +* [cloud-native-setup metrics report generator](#cloud-native-setup-metrics-report-generator) + * [Data gathering](#data-gathering) + * [Report generation](#report-generation) + * [Debugging and development](#debugging-and-development) + +# cloud-native-setup metrics report generator + +The files within this directory can be used to generate a 'metrics report' +for Kubernetes. + +The primary workflow consists of two stages: + +1) Run the provided report metrics data gathering scripts on the system(s) you wish +to analyze. +2) Run the provided report generation script to analyze the data and generate a +report file. + +## Data gathering + +Data gathering is provided by the `grabdata.sh` script. When run, this script +executes a set of tests from the `cloud-native-setup/metrics` directory. The JSON results files +will be placed into the `cloud-native-setup/metrics/results` directory. + +Once the results are generated, create a suitably named subdirectory of +`tests/metrics/results`, and move the JSON files into it. + +Repeat this process if you want to compare multiple sets of results. Note, the +report generation scripts process all subfolders of `tests/metrics/results` when +generating the report. + +You can restrict the subset of tests run by `grabdata.sh` via its commandline parameters: + +| Option | Description | +| ------ | ----------- | +| -a | Run all tests (default) | +| -s | Run the scaling tests | +| -h | Print this help | + +## Report generation + +Report generation is provided by the `makereport.sh` script. By default this script +processes all subdirectories of the `cloud-native-setup/metrics/results` directory to generate the report. +To run in the default mode, execute the following: + +```sh +$ ./makereport.sh +``` + +The report generation tool uses [Rmarkdown](https://github.com/rstudio/rmarkdown), +[R](https://www.r-project.org/about.html) and [pandoc](https://pandoc.org/) to produce +a PDF report. To avoid the need for all users to set up a working environment +with all the necessary tooling, the `makereport.sh` script utilises a `Dockerfile` with +the environment pre-defined in order to produce the report. Thus, you need to +have Docker installed on your system in order to run the report generation. + +The resulting `metrics_report.pdf` is generated into the `output` subdir of the `report` +directory. + +## Debugging and development + +To aid in script development and debugging, the `makereport.sh` script offers a debug +facility via the `-d` command line option. Using this option will place you into a `bash` +shell within the running `Dockerfile` image used to generate the report. From there you +can examine the Docker image environment, and execute the generation scripts. E.g., to +test the `scaling.R` script, you can execute: + +```bash +$ makereport.sh -d +# R +> source('/inputdir/Env.R') +> source('/scripts/scaling.R') +``` + diff --git a/metrics/report/grabdata.sh b/metrics/report/grabdata.sh new file mode 100755 index 00000000..b67f8842 --- /dev/null +++ b/metrics/report/grabdata.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# Copyright (c) 2019 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Run a set of the metrics tests to gather data to be used with the report +# generator. The general ideal is to have the tests configured to generate +# useful, meaninful and repeatable (stable, with minimised variance) results. +# If the tests have to be run more or longer to achieve that, then generally +# that is fine - this test is not intended to be quick, it is intended to +# be repeatable. + +# Note - no 'set -e' in this file - if one of the metrics tests fails +# then we wish to continue to try the rest. +# Finally at the end, in some situations, we explicitly exit with a +# failure code if necessary. + +SCRIPT_DIR=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_DIR}/../lib/common.bash" +RESULTS_DIR=${SCRIPT_DIR}/../results + +# By default we run all the tests +RUN_ALL=1 + +help() { + usage=$(cat << EOF +Usage: $0 [-h] [options] + Description: + This script gathers a number of metrics for use in the + report generation script. Which tests are run can be + configured on the commandline. Specifically enabling + individual tests will disable the 'all' option, unless + 'all' is also specified last. + Options: + -a, Run all tests (default). + -h, Print this help. + -s, Run the scaling tests. +EOF +) + echo "$usage" +} + +# Set up the initial state +init() { + metrics_onetime_init + + local OPTIND + while getopts "ahs" opt;do + case ${opt} in + a) + RUN_ALL=1 + ;; + h) + help + exit 0; + ;; + s) + RUN_SCALING=1 + RUN_ALL= + ;; + ?) + # parse failure + help + die "Failed to parse arguments" + ;; + esac + done + shift $((OPTIND-1)) +} + +run_scaling() { + echo "Running scaling tests" + + (cd scaling; ./k8s_scale.sh) +} + +# Execute metrics scripts +run() { + pushd "$SCRIPT_DIR/.." + + if [ -n "$RUN_ALL" ] || [ -n "$RUN_SCALING" ]; then + run_scaling + fi + + popd +} + +finish() { + echo "Now please create a suitably descriptively named subdirectory in" + echo "$RESULTS_DIR and copy the .json results files into it before running" + echo "this script again." +} + +init "$@" +run +finish + diff --git a/metrics/report/makereport.sh b/metrics/report/makereport.sh new file mode 100755 index 00000000..70900799 --- /dev/null +++ b/metrics/report/makereport.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# Copyright (c) 2019 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Take the data found in subdirectories of the metrics 'results' directory, +# and turn them into a PDF report. Use a Dockerfile containing all the tooling +# and scripts we need to do that. + +set -e + +SCRIPT_PATH=$(dirname "$(readlink -f "$0")") +source "${SCRIPT_PATH}/../lib/common.bash" + +IMAGE="${IMAGE:-metrics-report}" +DOCKERFILE="${SCRIPT_PATH}/report_dockerfile/Dockerfile" + +HOSTINPUTDIR="${SCRIPT_PATH}/../results" +RENVFILE="${HOSTINPUTDIR}/Env.R" +HOSTOUTPUTDIR="${SCRIPT_PATH}/output" + +GUESTINPUTDIR="/inputdir/" +GUESTOUTPUTDIR="/outputdir/" + +setup() { + echo "Checking subdirectories" + check_subdir="$(ls -dx ${HOSTINPUTDIR}/*/ 2> /dev/null | wc -l)" + if [ $check_subdir -eq 0 ]; then + die "No subdirs in [${HOSTINPUTDIR}] to read results from." + fi + + echo "Checking Dockerfile" + check_dockerfiles_images "$IMAGE" "$DOCKERFILE" + + mkdir -p "$HOSTOUTPUTDIR" && true + + echo "inputdir=\"${GUESTINPUTDIR}\"" > ${RENVFILE} + echo "outputdir=\"${GUESTOUTPUTDIR}\"" >> ${RENVFILE} + + # A bit of a hack to get an R syntax'd list of dirs to process + # Also, need it as not host-side dir path - so short relative names + resultdirs="$(cd ${HOSTINPUTDIR}; ls -dx */)" + resultdirslist=$(echo ${resultdirs} | sed 's/ \+/", "/g') + echo "resultdirs=c(" >> ${RENVFILE} + echo " \"${resultdirslist}\"" >> ${RENVFILE} + echo ")" >> ${RENVFILE} +} + +run() { + docker run -ti --rm -v ${HOSTINPUTDIR}:${GUESTINPUTDIR} -v ${HOSTOUTPUTDIR}:${GUESTOUTPUTDIR} ${IMAGE} ${extra_command} + ls -la ${HOSTOUTPUTDIR}/* +} + +main() { + + local OPTIND + while getopts "d" opt;do + case ${opt} in + d) + # In debug mode, run a shell instead of the default report generation + extra_command="bash" + ;; + esac + done + shift $((OPTIND-1)) + + setup + run +} + +main "$@" + diff --git a/metrics/report/report_dockerfile/Dockerfile b/metrics/report/report_dockerfile/Dockerfile new file mode 100755 index 00000000..bf1c1717 --- /dev/null +++ b/metrics/report/report_dockerfile/Dockerfile @@ -0,0 +1,40 @@ +# Copyright (c) 2018-2019 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Set up an Ubuntu image with the components needed to generate a +# metrics report. That includes: +# - R +# - The R 'tidyverse' +# - pandoc +# - The report generation R files and helper scripts + +# Start with the base rocker tidyverse. +# We would have used the 'verse' base, that already has some of the docs processing +# installed, but I could not figure out how to add in the extra bits we needed to +# the lite tex version is uses. +FROM rocker/tidyverse + +# Version of the Dockerfile +LABEL DOCKERFILE_VERSION="1.0" + +# Without this some of the package installs stop to try and ask questions... +ENV DEBIAN_FRONTEND=noninteractive + +# Install the extra doc processing parts we need for our Rmarkdown PDF flow. +RUN apt-get update -qq && \ + apt-get install -y \ + texlive-latex-base \ + texlive-fonts-recommended \ + latex-xcolor + +# Install the extra R packages we need. +RUN install2.r --error --deps TRUE \ + gridExtra \ + ggpubr + +# Pull in our actual worker scripts +COPY . /scripts + +# By default generate the report +CMD ["/scripts/genreport.sh"] diff --git a/metrics/report/report_dockerfile/genreport.sh b/metrics/report/report_dockerfile/genreport.sh new file mode 100755 index 00000000..c27379b1 --- /dev/null +++ b/metrics/report/report_dockerfile/genreport.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# Copyright (c) 2018-2019 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +REPORTNAME="metrics_report.pdf" + +cd scripts + +Rscript --slave -e "library(knitr);knit('metrics_report.Rmd')" +Rscript --slave -e "library(knitr);pandoc('metrics_report.md', format='latex')" + +cp /scripts/${REPORTNAME} /outputdir +echo "The report, named ${REPORTNAME}, can be found in the output directory" diff --git a/metrics/report/report_dockerfile/metrics_report.Rmd b/metrics/report/report_dockerfile/metrics_report.Rmd new file mode 100644 index 00000000..950bbc1e --- /dev/null +++ b/metrics/report/report_dockerfile/metrics_report.Rmd @@ -0,0 +1,40 @@ +--- +# Copyright (c) 2018-2019 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# +title: "Kubernetes metrics report" +author: "Auto generated" +date: "`r format(Sys.time(), '%d %B, %Y')`" +output: + pdf_document: +urlcolor: blue +--- + +```{r setup, include=FALSE} +#Set these opts to get pdf images which fit into beamer slides better +opts_chunk$set(dev = 'pdf') +# Pick up any env set by the invoking script, such as the root dir of the +# results data tree +source("/inputdir/Env.R") +``` +\pagebreak + +# Introduction +This report compares the metrics between multiple sets of data generated from +the [cloud-native-setup report generation scripts](https://github.com/clearlinux/cloud-native-setup/metrics/report/README.md). + +This report was generated using the data from the **`r resultdirs`** results directories. + +\pagebreak + +# Runtime scaling +This [test](https://github.com/clearlinux/cloud-native-setup/metrics/scaling/k8s_scale.sh) +measures the system memory 'free' reduction, CPU idle % and pod boot time as it launches more +and more idle `busybox` pods on a single node Kubernetes cluster. + +> Note: CPU % is measured as a system whole - 100% represents *all* CPUs on the node. + +```{r, echo=FALSE, fig.cap="K8S scaling"} +source('scaling.R') +``` diff --git a/metrics/report/report_dockerfile/scaling.R b/metrics/report/report_dockerfile/scaling.R new file mode 100755 index 00000000..d93a2111 --- /dev/null +++ b/metrics/report/report_dockerfile/scaling.R @@ -0,0 +1,198 @@ +#!/usr/bin/env Rscript +# Copyright (c) 2018-2019 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Show system memory reduction, and hence container 'density', by analysing the +# scaling footprint data results and the 'system free' memory. + +suppressMessages(suppressWarnings(library(ggplot2))) # ability to plot nicely. + # So we can plot multiple graphs +library(gridExtra) # together. +suppressMessages(suppressWarnings(library(ggpubr))) # for ggtexttable. +suppressMessages(library(jsonlite)) # to load the data. +suppressMessages(library(scales)) # For de-science notation of axis + +testnames=c( + "k8s-scaling.*" +) + +data=c() +stats=c() +rstats=c() +rstats_names=c() +cstats=c() +cstats_names=c() + +skip_points=0 # Shall we draw the points as well as lines on the graphs. + +# FIXME GRAHAM - bomb if there are no source dirs?! + +for (currentdir in resultdirs) { + count=1 + dirstats=c() + for (testname in testnames) { + matchdir=paste(inputdir, currentdir, sep="") + matchfile=paste(testname, '\\.json', sep="") + files=list.files(matchdir, pattern=matchfile) + if ( length(files) == 0 ) { + #warning(paste("Pattern [", matchdir, "/", matchfile, "] matched nothing")) + } + for (ffound in files) { + fname=paste(inputdir, currentdir, ffound, sep="") + if ( !file.exists(fname)) { + warning(paste("Skipping non-existent file: ", fname)) + next + } + + # Derive the name from the test result dirname + datasetname=basename(currentdir) + + # Import the data + fdata=fromJSON(fname) + # De-nest the test name specific data + shortname=substr(ffound, 1, nchar(ffound)-nchar(".json")) + fdata=fdata[[shortname]] + + testname=datasetname + + cdata=data.frame(avail_gb=as.numeric(fdata$BootResults$mem_free$Result)/(1024*1024)) + cdata=cbind(cdata, cpu_idle=as.numeric(fdata$BootResults$cpu_idle$Result)) + # convert ms to seconds + cdata=cbind(cdata, boot_time=as.numeric(fdata$BootResults$launch_time$Result)/1000) + # FIXME - we should seq from 0 index + if (length(cdata[, "avail_gb"]) > 20) { + skip_points=1 + } + + cdata=cbind(cdata, count=seq_len(length(cdata[, "avail_gb"]))) + cdata=cbind(cdata, testname=rep(testname, length(cdata[, "avail_gb"]) )) + cdata=cbind(cdata, dataset=rep(datasetname, length(cdata[, "avail_gb"]) )) + + # Gather our statistics + # '-1' containers, as the first entry should be a data capture of before + # the first container was run. + # FIXME - once the test starts to store a stats baseline in slot 0, then + # we should re-enable the '-1' + #sdata=data.frame(num_containers=length(cdata[, "avail_gb"])-1) + sdata=data.frame(num_containers=length(cdata[, "avail_gb"])) + # Work out memory reduction by subtracting last (most consumed) from + # first (which should be 0-containers) + sdata=cbind(sdata, mem_consumed= cdata[, "avail_gb"][1] - + cdata[, "avail_gb"][length(cdata[, "avail_gb"])]) + sdata=cbind(sdata, cpu_consumed= cdata[, "cpu_idle"][1] - + cdata[, "cpu_idle"][length(cdata[, "cpu_idle"])]) + sdata=cbind(sdata, boot_time=cdata[, "boot_time"][length(cdata[, "boot_time"])]) + sdata=cbind(sdata, avg_gb_per_c=sdata$mem_consumed / sdata$num_containers) + sdata=cbind(sdata, runtime=testname) + + # Store away as a single set + data=rbind(data, cdata) + stats=rbind(stats, sdata) + + ms = c( + "Test"=testname, + "n"=sdata$num_containers, + "size"=round((sdata$mem_consumed), 3), + "gb/n"=round(sdata$avg_gb_per_c, digits=4), + "n/Gb"= round(1 / sdata$avg_gb_per_c, digits=2) + ) + + cs = c( + "Test"=testname, + "n"=sdata$num_containers, + "cpu"=round(sdata$cpu_consumed, digits=3), + "cpu/n"=round((sdata$cpu_consumed / sdata$num_containers), digits=4) + ) + + rstats=rbind(rstats, ms) + cstats=rbind(cstats, cs) + count = count + 1 + } + } +} + +#FIXME - if we end up with no data here, we should probably abort cleanly, or we +# end up spewing errors for trying to access empty arrays etc. + +# Set up the text table headers +colnames(rstats)=c("Test", "n", "Tot_Gb", "avg_Gb", "n_per_Gb") +colnames(cstats)=c("Test", "n", "Tot_CPU", "avg_CPU") + +# Build us a text table of numerical results +mem_stats_plot = suppressWarnings(ggtexttable(data.frame(rstats), + theme=ttheme(base_size=10), + rows=NULL + )) + +# plot how samples varioed over 'time' +mem_line_plot <- ggplot() + + geom_line( data=data, aes(count, avail_gb, colour=testname, group=dataset), alpha=0.2) + + geom_smooth( data=data, aes(count, avail_gb, colour=testname, group=dataset), se=FALSE, method="loess", size=0.3) + + xlab("Pods") + + ylab("System Avail (Gb)") + + scale_y_continuous(labels=comma) + + ggtitle("System Memory free") + + #ylim(0, NA) + # For big machines, better to not 0-index + theme(axis.text.x=element_text(angle=90)) + +# If we only have relatively few samples, add points to the plot. Otherwise, skip as +# the plot becomes far too noisy +if ( skip_points == 0 ) { + mem_line_plot = mem_line_plot + geom_point( data=data, aes(count, avail_gb, colour=testname, group=dataset), alpha=0.3) +} + +cpu_stats_plot = suppressWarnings(ggtexttable(data.frame(cstats), + theme=ttheme(base_size=10), + rows=NULL + )) + +# plot how samples varioed over 'time' +cpu_line_plot <- ggplot() + + geom_line( data=data, aes(count, cpu_idle, colour=testname, group=dataset), alpha=0.2) + + geom_smooth( data=data, aes(count, cpu_idle, colour=testname, group=dataset), se=FALSE, method="loess", size=0.3) + + xlab("Pods") + + ylab("System CPU Idle (%)") + + ggtitle("System CPU usage") + + #ylim(0, NA) + # For big machines, better to not 0-index + theme(axis.text.x=element_text(angle=90)) + +if ( skip_points == 0 ) { + cpu_line_plot = cpu_line_plot + geom_point( data=data, aes(count, cpu_idle, colour=testname, group=dataset), alpha=0.3) +} + +# Show how boot time changed +boot_line_plot <- ggplot() + + geom_line( data=data, aes(count, boot_time, colour=testname, group=dataset), alpha=0.2) + + geom_smooth( data=data, aes(count, boot_time, colour=testname, group=dataset), se=FALSE, method="loess", size=0.3) + + xlab("pods") + + ylab("Boot time (s)") + + ggtitle("Pod boot time") + + #ylim(0, NA) + # For big machines, better to not 0-index + theme(axis.text.x=element_text(angle=90)) + +if ( skip_points == 0 ) { + cpu_line_plot = cpu_line_plot + geom_point( data=data, aes(count, boot_time, colour=testname, group=dataset), alpha=0.3) +} + +mem_text <- paste("Footprint density statistics") +mem_text.p <- ggparagraph(text=mem_text, face="italic", size="10", color="black") + +cpu_text <- paste("System CPU consumption statistics") +cpu_text.p <- ggparagraph(text=cpu_text, face="italic", size="10", color="black") + +# See https://www.r-bloggers.com/ggplot2-easy-way-to-mix-multiple-graphs-on-the-same-page/ for +# excellent examples +master_plot = grid.arrange( + mem_line_plot, + cpu_line_plot, + mem_stats_plot, + cpu_stats_plot, + mem_text.p, + cpu_text.p, + boot_line_plot, + zeroGrob(), + nrow=4, + ncol=2, + heights=c(1, 0.8, 0.1, 1) ) +