Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial commit of metrics for scaling #119

Merged
merged 2 commits into from
Jul 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions metrics/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Metric testing for scaling on Kubernetes.

The usage is

```bash
cd metrics/scaling
./k8s-scale.sh
```

The default container runtime is not Kata containers, but Kata can be specified
setting the environment variable:

```bash
use_kata_runtime=yes
```

These tests currently only support single node deployments, but PRs to add
multi-node support will follow shortly.
310 changes: 310 additions & 0 deletions metrics/lib/common.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,310 @@
#!/bin/bash
#
# Copyright (c) 2017,2018,2019 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

THIS_FILE=$(readlink -f ${BASH_SOURCE[0]})
LIB_DIR=${THIS_FILE%/*}
RESULT_DIR="${LIB_DIR}/../results"

source ${LIB_DIR}/kata-common.bash
source ${LIB_DIR}/json.bash
source /etc/os-release || source /usr/lib/os-release

# Set variables to reasonable defaults if unset or empty
DOCKER_EXE="${DOCKER_EXE:-docker}"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we are going to eradicate all docker stuff from this code, yes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

punted on this for now, will work on once this is merged

RUNTIME="${RUNTIME:-kata-runtime}"

KSM_BASE="/sys/kernel/mm/ksm"
KSM_ENABLE_FILE="${KSM_BASE}/run"
KSM_PAGES_FILE="${KSM_BASE}/pages_to_scan"
KSM_SLEEP_FILE="${KSM_BASE}/sleep_millisecs"

# The settings we use for an 'aggresive' KSM setup
# Scan 1000 pages every 50ms - 20,000 pages/s
KSM_AGGRESIVE_PAGES=1000
KSM_AGGRESIVE_SLEEP=50

# This function checks existence of commands.
# They can be received standalone or as an array, e.g.
#
# cmds=(“cmd1” “cmd2”)
# check_cmds "${cmds[@]}"
check_cmds()
{
local cmd req_cmds=( "$@" )
for cmd in "${req_cmds[@]}"; do
if ! command -v "$cmd" > /dev/null 2>&1; then
die "command $cmd not available"
fi
echo "command: $cmd: yes"
done
}

# This function performs a docker pull on the image names
# passed in (notionally as 'about to be used'), to ensure
# - that we have the most upto date images
# - that any pull/refresh time (for a first pull) does not
# happen during the test itself.
#
# The image list can be received standalone or as an array, e.g.
#
# images=(“img1” “img2”)
# check_imgs "${images[@]}"
check_images()
{
local img req_images=( "$@" )
for img in "${req_images[@]}"; do
echo "docker pull'ing: $img"
if ! docker pull "$img"; then
die "Failed to docker pull image $img"
fi
echo "docker pull'd: $img"
done
}

# This function performs a docker build on the image names
# passed in, to ensure that we have the latest changes from
# the dockerfiles
build_dockerfile_image()
{
local image="$1"
local dockerfile_path="$2"
local dockerfile_dir=${2%/*}

echo "docker building $image"
if ! docker build --label "$image" --tag "${image}" -f "$dockerfile_path" "$dockerfile_dir"; then
die "Failed to docker build image $image"
fi
}

# This function verifies that the dockerfile version is
# equal to the test version in order to build the image or
# just run the test
check_dockerfiles_images()
{
local image="$1"
local dockerfile_path="$2"

if [ -z "$image" ] || [ -z "$dockerfile_path" ]; then
die "Missing image or dockerfile path variable"
fi

# Verify that dockerfile version is equal to test version
check_image=$(docker images "$image" -q)
if [ -n "$check_image" ]; then
# Check image label
check_image_version=$(docker image inspect $image | grep -w DOCKERFILE_VERSION | head -1 | cut -d '"' -f4)
if [ -n "$check_image_version" ]; then
echo "$image is not updated"
build_dockerfile_image "$image" "$dockerfile_path"
else
# Check dockerfile label
dockerfile_version=$(grep DOCKERFILE_VERSION $dockerfile_path | cut -d '"' -f2)
if [ "$dockerfile_version" != "$check_image_version" ]; then
echo "$dockerfile_version is not equal to $check_image_version"
build_dockerfile_image "$image" "$dockerfile_path"
fi
fi
else
build_dockerfile_image "$image" "$dockerfile_path"
fi
}

# A one time (per uber test cycle) init that tries to get the
# system to a 'known state' as much as possible
metrics_onetime_init()
{
# The onetime init must be called once, and only once
if [ ! -z "$onetime_init_done" ]; then
die "onetime_init() called more than once"
fi

# Restart services
sudo systemctl restart docker

# We want this to be seen in sub shells as well...
# otherwise init_env() cannot check us
export onetime_init_done=1
}

# Print a banner to the logs noting clearly which test
# we are about to run
test_banner()
{
echo -e "\n===== starting test [$1] ====="
}

# Initialization/verification environment. This function makes
# minimal steps for metrics/tests execution.
init_env()
{
test_banner "${TEST_NAME}"

cmd=("docker")

# check dependencies
check_cmds "${cmd[@]}"

# Remove all stopped containers
clean_env

# This clean up is more aggressive, this is in order to
# decrease the factors that could affect the metrics results.
kill_processes_before_start
}

# This function checks if there are containers or
# shim/proxy/hypervisor processes up, if found, they are
# killed to start test with clean environment.
kill_processes_before_start() {
DOCKER_PROCS=$(${DOCKER_EXE} ps -q)
[[ -n "${DOCKER_PROCS}" ]] && clean_env
check_processes
}

# Generate a random name - generally used when creating containers, but can
# be used for any other appropriate purpose
random_name() {
mktemp -u kata-XXXXXX
}

# Dump diagnostics about our current system state.
# Very useful for diagnosing if we have failed a sanity check
show_system_state() {
echo "Showing system state:"
echo " --Docker ps--"
${DOCKER_EXE} ps -a
echo " --${RUNTIME} list--"
local RPATH=$(command -v ${RUNTIME})
sudo ${RPATH} list

local processes="kata-proxy kata-shim kata-runtime qemu"

for p in ${processes}; do
echo " --pgrep ${p}--"
pgrep -a ${p}
done
}

common_init(){

# If we are running a kata runtime, go extract its environment
# for later use.
local iskata=$(is_a_kata_runtime "$RUNTIME")

if [ "$iskata" == "1" ]; then
extract_kata_env
else
# We know we have nothing to do for runc
if [ "$RUNTIME" != "runc" ]; then
warning "Unrecognised runtime ${RUNTIME}"
fi
fi
}


# Save the current KSM settings so we can restore them later
save_ksm_settings(){
echo "saving KSM settings"
ksm_stored_run=$(cat ${KSM_ENABLE_FILE})
ksm_stored_pages=$(cat ${KSM_ENABLE_FILE})
ksm_stored_sleep=$(cat ${KSM_ENABLE_FILE})
}

set_ksm_aggressive(){
echo "setting KSM to aggressive mode"
# Flip the run off/on to ensure a restart/rescan
sudo bash -c "echo 0 > ${KSM_ENABLE_FILE}"
sudo bash -c "echo ${KSM_AGGRESIVE_PAGES} > ${KSM_PAGES_FILE}"
sudo bash -c "echo ${KSM_AGGRESIVE_SLEEP} > ${KSM_SLEEP_FILE}"
sudo bash -c "echo 1 > ${KSM_ENABLE_FILE}"
}

restore_ksm_settings(){
echo "restoring KSM settings"
# First turn off the run to ensure if we are then re-enabling
# that any changes take effect
sudo bash -c "echo 0 > ${KSM_ENABLE_FILE}"
sudo bash -c "echo ${ksm_stored_pages} > ${KSM_PAGES_FILE}"
sudo bash -c "echo ${ksm_stored_sleep} > ${KSM_SLEEP_FILE}"
sudo bash -c "echo ${ksm_stored_run} > ${KSM_ENABLE_FILE}"
}

disable_ksm(){
echo "disabling KSM"
sudo bash -c "echo 0 > ${KSM_ENABLE_FILE}"
}

# See if KSM is enabled.
# If so, amend the test name to reflect that
check_for_ksm(){
if [ ! -f ${KSM_ENABLE_FILE} ]; then
return
fi

ksm_on=$(< ${KSM_ENABLE_FILE})

if [ $ksm_on == "1" ]; then
TEST_NAME="${TEST_NAME} ksm"
fi
}

# Wait for KSM to settle down, or timeout waiting
# The basic algorithm is to look at the pages_shared value
# at the end of every 'full scan', and if the value
# has changed very little, then we are done (because we presume
# a full scan has managed to do few new merges)
#
# arg1 - timeout in seconds
wait_ksm_settle(){
[[ "$RUNTIME" == "runc" ]] || [[ "$RUNTIME" == "kata-fc" ]] && return
local t pcnt
local oldscan=-1 newscan
local oldpages=-1 newpages

oldscan=$(cat /sys/kernel/mm/ksm/full_scans)

# Go around the loop until either we see a small % change
# between two full_scans, or we timeout
for ((t=0; t<$1; t++)); do

newscan=$(cat /sys/kernel/mm/ksm/full_scans)
newpages=$(cat /sys/kernel/mm/ksm/pages_shared)
[[ "$newpages" -eq 0 ]] && echo "No need to wait for KSM to settle" && return

if (( newscan != oldscan )); then
echo -e "\nnew full_scan ($oldscan to $newscan)"

# Do we have a previous scan to compare with
echo "check pages $oldpages to $newpages"

if (( oldpages != -1 )); then
# avoid divide by zero problems
if (( $oldpages > 0 )); then
pcnt=$(( 100 - ((newpages * 100) / oldpages) ))
# abs()
pcnt=$(( $pcnt * -1 ))

echo "$oldpages to $newpages is ${pcnt}%"

if (( $pcnt <= 5 )); then
echo "KSM stabilised at ${t}s"
return
fi
else
echo "$oldpages KSM pages... waiting"
fi
fi
oldscan=$newscan
oldpages=$newpages
else
echo -n "."
fi
sleep 1
done
echo "Timed out after ${1}s waiting for KSM to settle"
}

common_init
Loading