diff --git a/tools/workload/benchmark_velox/README.md b/tools/workload/benchmark_velox/README.md new file mode 100644 index 000000000000..5f080077f606 --- /dev/null +++ b/tools/workload/benchmark_velox/README.md @@ -0,0 +1,38 @@ +# Setup, Build and Benchmark Spark/Gluten with Jupyter Notebook + +This guide provides notebooks and scripts for conducting performance testing in Gluten. The standard approach involves setting up the test environment on a bare-metal machine or cloud instance and running performance tests with TPC-H/TPC-DS workloads. These scripts enable users to reproduce our performance results in their own environment. + +## Environment Setup + +The recommended OS is ubuntu22.04 with kernel 5.15. To prepare the environment, run [initialize.ipynb](./initialize.ipynb), which will: + +- Install system dependencies and set up jupyter notebook +- Configure Hadoop and Spark +- Configure kernel parameters +- Build Gluten using Docker +- Generate TPC-H/TPC-DS tables + +## Running TPC-H/TPC-DS Benchmarks + +To run TPC-H/TPC-DS benchmarks, use [tpc_workload.ipynb](./tpc_workload.ipynb). You can create a copy of the notebook and modify the parameters defined in this notebook to run different workloads. However, creating and modifying a copy each time you change workloads can be inconvenient. Instead, it's recommended to use Papermill to pass parameters via the command line for greater flexibility. + +The required parameters are specified in [params.yaml.template](./params.yaml.template). To use it, create your own YAML file by copying and modifying the template. The command to run the notebook is: + +```bash +papermill tpc_workload.ipynb --inject-output-path -f params.yaml gluten_tpch.ipynb +``` +After execution, the output notebook will be saved as `gluten_tpch.ipynb`. + +If you want to use different parameters, you can specify them via the `-p` option. It will overwrite the previously defined parameters in `params.yaml`. e.g. To switch to the TPC-DS workload, run: + +```bash +papermill tpc_workload.ipynb --inject-output-path -f params.yaml -p workoad tpcds gluten_tpcds.ipynb +``` + +Please refer to the Papermill documentation for additional usage details. + +We also provide a script [run_tpc_workload.sh](./run_tpc_workload.sh). This script wraps the Papermill command, automatically renaming the output notebook with a timestamp and application ID to prevent overwriting existing output files. + +## Analyzing Performance Results + +You can check the **Show Performance** section in the output notebook after execution. It shows the cpu% per query, and draws charts for the cpu%, memory throughput, disk throughput/util%, network throughput and pagefaults. diff --git a/tools/workload/benchmark_velox/init_disks.py b/tools/workload/benchmark_velox/init_disks.py new file mode 100644 index 000000000000..8e47e16e3078 --- /dev/null +++ b/tools/workload/benchmark_velox/init_disks.py @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# To set up the virtual environment required to run this script, +# refer to the `Format and mount disks` subsection under `System Setup` in initialize.ipynb. +import sys +import subprocess +import questionary +import json + +def yes_or_no(question): + while True: + user_input = input(question + '(yes/no/quit): ') + if user_input.lower() == 'yes': + return True + elif user_input.lower() == 'no': + return False + elif user_input.lower() == 'quit': + sys.exit(1) + else: + continue + +def filter_empty_str(l): + return [x for x in l if x] + +def run_and_log(cmd): + # Print command in yellow + print('\033[93m' + '>>> Running command: ' + repr(cmd) + '\033[0m') + result = subprocess.run(cmd, check=True, shell=True, capture_output=True, text=True) + # Print stdout in green + print('\033[92m' + '==========stdout==========' + '\033[0m') + print(result.stdout) + # Print stderr in red + print('\033[91m' + '==========stderr==========' + '\033[0m') + print(result.stderr) + +def init_disks(): + all_disks = filter_empty_str(subprocess.run("lsblk -I 7,8,259 -npd --output NAME".split(' '), capture_output=True, text=True).stdout.split('\n')) + if not all_disks: + print("No disks found on system. Exit.") + sys.exit(0) + + answer = False + disks = [] + while not answer: + disks = questionary.checkbox('Select disks to initialize:', choices=all_disks).ask() + answer = yes_or_no('Confirm selected:\n' + '\n'.join(disks) + '\n') + + if not disks: + print('No disks are selected.') + return + + for d in disks: + print('Initializing {} ...'.format(d)) + run_and_log('wipefs -a {}'.format(d)) + run_and_log('echo "g\nw\n" | fdisk {}'.format(d)) + run_and_log('echo "n\n\n\n\nw\n" | fdisk {}'.format(d)) + run_and_log('mkfs.ext4 {}p1'.format(d)) + +def mount_partitions(): + subprocess.run('lsblk -pf --json > lsblk.json', shell=True) + partitions = [] + with open('lsblk.json', 'r') as f: + data = json.load(f) + for d in data['blockdevices']: + if 'children' in d: + for c in d['children']: + if c['fstype'] == 'ext4': + partitions.append(c['name']) + answer = False + while not answer: + partitions = questionary.checkbox('Select partitions to create mount points:', choices=partitions).ask() + answer = yes_or_no('Confirm selected:\n' + '\n'.join(partitions) + '\n') + + for i, p in enumerate(partitions): + d = 'data{}'.format(i) + run_and_log('e2label {} ""'.format(p)) + run_and_log('e2label {} {}'.format(p, d)) + run_and_log('mkdir -p /{}'.format(d)) + run_and_log('mount -L {} /{}'.format(d, d)) + +def choose(): + choice = questionary.select('Select operation:', choices=['Format disks', 'Mount partitions']).ask() + print(choice) + if choice == 'Format disks': + init_disks() + elif choice == 'Mount partitions': + mount_partitions() + +if __name__ == '__main__': + choose() diff --git a/tools/workload/benchmark_velox/initialize.ipynb b/tools/workload/benchmark_velox/initialize.ipynb new file mode 100644 index 000000000000..cbbc27686951 --- /dev/null +++ b/tools/workload/benchmark_velox/initialize.ipynb @@ -0,0 +1,2918 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# System Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "**1. Install system dependencies and python packages. Prepare the environment.**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "First, install all dependencies and python packages as `root`. Run commands and make sure the installations are successful.\n", + "\n", + "```bash\n", + "apt update\n", + "\n", + "apt install -y sudo locales wget tar tzdata git ccache cmake ninja-build build-essential llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev libboost-all-dev libcurl4-openssl-dev openjdk-8-jdk maven vim pip sysstat gcc-9 libjemalloc-dev nvme-cli curl zip unzip bison flex\n", + "\n", + "python3 -m pip install notebook==6.5.2\n", + "python3 -m pip install jupyter_server==1.23.4\n", + "python3 -m pip install jupyter_highlight_selected_word\n", + "python3 -m pip install jupyter_contrib_nbextensions\n", + "python3 -m pip install virtualenv==20.21.1\n", + "python3 -m pip uninstall -y ipython\n", + "python3 -m pip install ipython==8.21.0\n", + "python3 -m pip uninstall -y traitlets\n", + "python3 -m pip install traitlets==5.9.0\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "***Required for Ubuntu***\n", + "\n", + "Check that there isn't an entry for your hostname mapped to 127.0.0.1 or 127.0.1.1 in /etc/hosts (Ubuntu is notorious for this). If there is, delete it.\n", + "Then add `` and `` for master and worker nodes.\n", + "\n", + "Example /etc/hosts:\n", + " \n", + "```\n", + "127.0.0.1 localhost\n", + "\n", + "# The following lines are desirable for IPv6 capable hosts\n", + "::1 ip6-localhost ip6-loopback\n", + "fe00::0 ip6-localnet\n", + "ff00::0 ip6-mcastprefix\n", + "ff02::1 ip6-allnodes\n", + "ff02::2 ip6-allrouters\n", + "\n", + "10.0.0.117 sr217\n", + "10.0.0.113 sr213\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "**2. Format and mount disks**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Create a python virtual environment to finish the system setup process:\n", + "\n", + "```bash\n", + "virtualenv -p python3 -v venv\n", + "source venv/bin/activate\n", + "```\n", + "\n", + "And install packages under `venv`:\n", + "```bash\n", + "(venv) python3 -m pip install questionary\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Run script [init_disks.py](./init_disks.py) to format and mount disks. **Be careful when choosing the disks to format.** If you see errors like `device or resource busy`, perhaps the partition has been mounted, you should unmount it first. If you still see this error, reboot the system and try again." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Exit `venv`:\n", + "```bash\n", + "(venv) deactivate\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "**3. Create user `sparkuser`**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Create user `sparkuser` without password and with sudo priviledge. It's recommended to use one of the disks as the home directory instead of the system drive.\n", + "\n", + "```bash\n", + "mkdir -p /data0/home/sparkuser\n", + "ln -s /data0/home/sparkuser /home/sparkuser\n", + "cp -r /etc/skel/. /home/sparkuser/\n", + "adduser --home /home/sparkuser --disabled-password --gecos \"\" sparkuser\n", + "\n", + "chown -R sparkuser:sparkuser /data*\n", + "\n", + "echo 'sparkuser ALL=(ALL:ALL) NOPASSWD:ALL' | EDITOR='tee -a' visudo\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Generate ssh keys for `sparkuser`\n", + "\n", + "```bashrc\n", + "su - sparkuser\n", + "```\n", + "\n", + "```bashrc\n", + "rm -rf ~/.ssh\n", + "ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa <</dev/null 2>&1\n", + "cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys\n", + "\n", + "exit\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Generate ssh keys for `root`, and enable no password ssh from `sparkuser`\n", + "\n", + "```bash\n", + "rm -rf /root/.ssh\n", + "ssh-keygen -q -t rsa -N '' -f ~/.ssh/id_rsa <</dev/null 2>&1\n", + "cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys\n", + "cat /home/sparkuser/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Login to `sparkuser` and run the first-time ssh to the `root`\n", + "\n", + "```bash\n", + "su - sparkuser\n", + "```\n", + "\n", + "```bash\n", + "ssh -o StrictHostKeyChecking=no root@localhost ls\n", + "ssh -o StrictHostKeyChecking=no root@127.0.0.1 ls\n", + "ssh -o StrictHostKeyChecking=no root@`hostname` ls\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "***Required for Ubuntu***\n", + "\n", + "Run below command to comment out lines starting from `If not running interactively, don't do anything` in ~/.bashrc\n", + "\n", + "```bash\n", + "sed -i '5,9 s/^/# /' ~/.bashrc\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "**4. Configure jupyter notebook**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "As `sparkuser`, install python packages\n", + "\n", + "```bash\n", + "cd /home/sparkuser/.local/lib/ && rm -rf python*\n", + "\n", + "python3 -m pip install --upgrade jsonschema\n", + "python3 -m pip install jsonschema[format]\n", + "python3 -m pip install sqlalchemy==1.4.46\n", + "python3 -m pip install papermill Black\n", + "python3 -m pip install NotebookScripter\n", + "python3 -m pip install findspark spylon-kernel matplotlib pandasql pyhdfs\n", + "python3 -m pip install ipywidgets jupyter_nbextensions_configurator ipyparallel\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Configure jupyter notebook. Setup password when it prompts\n", + "\n", + "```bash\n", + "jupyter notebook --generate-config\n", + "\n", + "jupyter notebook password\n", + "\n", + "mkdir -p ~/.jupyter/custom/\n", + "\n", + "echo '.container { width:100% !important; }' >> ~/.jupyter/custom/custom.css\n", + "\n", + "echo 'div.output_stderr { background: #ffdd; display: none; }' >> ~/.jupyter/custom/custom.css\n", + "\n", + "jupyter nbextension install --py jupyter_highlight_selected_word --user\n", + "\n", + "jupyter nbextension enable highlight_selected_word/main\n", + "\n", + "jupyter nbextension install --py widgetsnbextension --user\n", + "\n", + "jupyter contrib nbextension install --user\n", + "\n", + "jupyter nbextension enable codefolding/main\n", + "\n", + "jupyter nbextension enable code_prettify/code_prettify\n", + "\n", + "jupyter nbextension enable codefolding/edit\n", + "\n", + "jupyter nbextension enable code_font_size/code_font_size\n", + "\n", + "jupyter nbextension enable collapsible_headings/main\n", + "\n", + "jupyter nbextension enable highlight_selected_word/main\n", + "\n", + "jupyter nbextension enable ipyparallel/main\n", + "\n", + "jupyter nbextension enable move_selected_cells/main\n", + "\n", + "jupyter nbextension enable nbTranslate/main\n", + "\n", + "jupyter nbextension enable scratchpad/main\n", + "\n", + "jupyter nbextension enable tree-filter/index\n", + "\n", + "jupyter nbextension enable comment-uncomment/main\n", + "\n", + "jupyter nbextension enable export_embedded/main\n", + "\n", + "jupyter nbextension enable hide_header/main\n", + "\n", + "jupyter nbextension enable highlighter/highlighter\n", + "\n", + "jupyter nbextension enable scroll_down/main\n", + "\n", + "jupyter nbextension enable snippets/main\n", + "\n", + "jupyter nbextension enable toc2/main\n", + "\n", + "jupyter nbextension enable varInspector/main\n", + "\n", + "jupyter nbextension enable codefolding/edit\n", + "\n", + "jupyter nbextension enable contrib_nbextensions_help_item/main\n", + "\n", + "jupyter nbextension enable freeze/main\n", + "\n", + "jupyter nbextension enable hide_input/main\n", + "\n", + "jupyter nbextension enable jupyter-js-widgets/extension\n", + "\n", + "jupyter nbextension enable snippets_menu/main\n", + "\n", + "jupyter nbextension enable table_beautifier/main\n", + "\n", + "jupyter nbextension enable hide_input_all/main\n", + "\n", + "jupyter nbextension enable spellchecker/main\n", + "\n", + "jupyter nbextension enable toggle_all_line_numbers/main\n", + "\n", + "jupyter nbextensions_configurator enable --user\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Clone Gluten\n", + "\n", + "```bash\n", + "cd ~\n", + "git clone https://github.com/apache/incubator-gluten.git gluten\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Start jupyter notebook\n", + "\n", + "```bash\n", + "mkdir -p ~/ipython\n", + "cd ~/ipython\n", + "\n", + "nohup jupyter notebook --ip=0.0.0.0 --port=8888 &\n", + "\n", + "cp ~/gluten/tools/workload/benchmark_velox ~/ipython/\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# Initialize\n", + " Run this section after notebook restart! " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Specify datadir. The directories are used for spark.local.dirs and hadoop namenode/datanode." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "datadir=[f'/data{i}' for i in range(0, 8)]\n", + "datadir" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Specify clients(workers). Leave it empty if the cluster is setup on the local machine." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "clients=''''''.split()\n", + "print(clients)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Specify JAVA_HOME" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "java_home = '/usr/lib/jvm/java-8-openjdk-amd64'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "import os\n", + "import socket\n", + "import platform\n", + "\n", + "user=os.getenv('USER')\n", + "print(f\"user: {user}\")\n", + "print()\n", + "\n", + "masterip=socket.gethostbyname(socket.gethostname())\n", + "hostname=socket.gethostname() \n", + "print(f\"masterip: {masterip} hostname: {hostname}\")\n", + "print()\n", + "\n", + "hclients=clients.copy()\n", + "hclients.append(hostname)\n", + "print(f\"master and workers: {hclients}\")\n", + "print()\n", + "\n", + "\n", + "if clients:\n", + " cmd = f\"ssh {clients[0]} \" + \"\\\"lscpu | grep '^CPU(s)'\\\"\" + \" | awk '{print $2}'\"\n", + " client_cpu = !{cmd}\n", + " cpu_num = client_cpu[0]\n", + "\n", + " cmd = f\"ssh {clients[0]} \" + \"\\\"cat /proc/meminfo | grep MemTotal\\\"\" + \" | awk '{print $2}'\"\n", + " totalmemory = !{cmd}\n", + " totalmemory = int(totalmemory[0])\n", + "else:\n", + " cpu_num = os.cpu_count()\n", + " totalmemory = !cat /proc/meminfo | grep MemTotal | awk '{print $2}'\n", + " totalmemory = int(totalmemory[0])\n", + " \n", + "print(f\"cpu_num: {cpu_num}\")\n", + "print()\n", + "\n", + "print(\"total memory: \", totalmemory, \"KB\")\n", + "print()\n", + "\n", + "mem_mib = int(totalmemory/1024)-1024\n", + "print(f\"mem_mib: {mem_mib}\")\n", + "print()\n", + "\n", + "is_arm = platform.machine() == 'aarch64'\n", + "print(\"is_arm: \",is_arm)\n", + "print()\n", + "\n", + "sparklocals=\",\".join([f'{l}/{user}/yarn/local' for l in datadir])\n", + "print(f\"SPARK_LOCAL_DIR={sparklocals}\")\n", + "print()\n", + "\n", + "%cd ~" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# Set up clients\n", + " SKIP for single node " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## Install dependencies" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Manually configure ssh login without password to all clients\n", + "\n", + "```bash\n", + "ssh-copy-id -o StrictHostKeyChecking=no root@\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in clients:\n", + " !ssh root@{l} apt update > /dev/null 2>&1\n", + " !ssh root@{l} apt install -y sudo locales wget tar tzdata git ccache cmake ninja-build build-essential llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev libboost-all-dev libcurl4-openssl-dev openjdk-8-jdk maven vim pip sysstat gcc-9 libjemalloc-dev nvme-cli curl zip unzip bison flex > /dev/null 2>&1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## Create user" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in clients:\n", + " !ssh -o StrictHostKeyChecking=no root@{l} ls" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in clients:\n", + " !ssh root@{l} adduser --disabled-password --gecos '\"\"' {user}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in clients:\n", + " !ssh root@{l} cp -r .ssh /home/{user}/\n", + " !ssh root@{l} chown -R {user}:{user} /home/{user}/.ssh" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in clients:\n", + " !ssh root@{l} \"echo -e 'sparkuser ALL=(ALL:ALL) NOPASSWD:ALL' | EDITOR='tee -a' visudo\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "***Required for Ubuntu***\n", + "\n", + "Run below command to comment out lines starting from If not running interactively, don't do anything in ~/.bashrc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in clients:\n", + " !ssh {l} sed -i \"'5,9 s/^/# /'\" ~/.bashrc" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Use /etc/hosts on master node" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in clients:\n", + " !scp /etc/hosts root@{l}:/etc/hosts" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## Setup disks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in clients:\n", + " !ssh root@{l} apt update > /dev/null 2>&1\n", + " !ssh root@{l} apt install -y pip > /dev/null 2>&1\n", + " !ssh root@{l} python3 -m pip install virtualenv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Manually run **2. Format and mount disks** section under [System Setup](#System-Setup)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# Configure Spark, Hadoop" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## Download packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!wget https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz > /dev/null 2>&1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!wget https://archive.apache.org/dist/hadoop/common/hadoop-3.2.4/hadoop-3.2.4.tar.gz > /dev/null 2>&1\n", + "# backup url: !wget https://dlcdn.apache.org/hadoop/common/hadoop-3.2.4/hadoop-3.2.4.tar.gz > /dev/null 2>&1\n", + "if is_arm:\n", + " # download both versions\n", + " !wget https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5-aarch64.tar.gz > /dev/null 2>&1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## Create directories" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "cmd=\";\".join([f\"chown -R {user}:{user} \" + l for l in datadir])\n", + "for l in hclients:\n", + " !ssh root@{l} '{cmd}'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "cmd=\";\".join([f\"rm -rf {l}/tmp; mkdir -p {l}/tmp\" for l in datadir])\n", + "for l in hclients:\n", + " !ssh {l} '{cmd}'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "cmd=\";\".join([f\"mkdir -p {l}/{user}/hdfs/data; mkdir -p {l}/{user}/yarn/local\" for l in datadir])\n", + "for l in hclients:\n", + " !ssh {l} '{cmd}'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!mkdir -p {datadir[0]}/{user}/hdfs/name\n", + "!mkdir -p {datadir[0]}/{user}/hdfs/namesecondary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in hclients:\n", + " !scp hadoop-3.2.4.tar.gz {l}:~/\n", + " !scp spark-3.3.1-bin-hadoop3.tgz {l}:~/\n", + " !ssh {l} \"mv -f hadoop hadoop.bak; mv -f spark spark.bak\"\n", + " !ssh {l} \"tar zxvf hadoop-3.2.4.tar.gz > /dev/null 2>&1\"\n", + " !ssh {l} \"tar -zxvf spark-3.3.1-bin-hadoop3.tgz > /dev/null 2>&1\"\n", + " !ssh root@{l} \"apt install -y openjdk-8-jdk > /dev/null 2>&1\"\n", + " !ssh {l} \"ln -s hadoop-3.2.4 hadoop; ln -s spark-3.3.1-bin-hadoop3 spark\"\n", + " if is_arm:\n", + " !ssh {l} \"tar zxvf hadoop-3.3.5-aarch64.tar.gz > /dev/null 2>&1\"\n", + " !ssh {l} \"cd hadoop && mv lib lib.bak && cp -rf ~/hadoop-3.3.5/lib ~/hadoop\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## Configure bashrc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "\n", + "cfg=f'''export HADOOP_HOME=~/hadoop\n", + "export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin\n", + "\n", + "export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop\n", + "export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop\n", + "\n", + "export SPARK_HOME=~/spark\n", + "export PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.5-src.zip:$PYTHONPATH\n", + "export PATH=$SPARK_HOME/bin:$PATH\n", + "\n", + "'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "if is_arm:\n", + " cfg += 'export CPU_TARGET=\"aarch64\"\\nexport JAVA_HOME=/usr/lib/jvm/java-8-openjdk-arm64\\nexport PATH=$JAVA_HOME/bin:$PATH\\n'\n", + "else:\n", + " cfg += f'export JAVA_HOME={java_home}\\nexport PATH=$JAVA_HOME/bin:$PATH\\n'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "with open(\"tmpcfg\",'w') as f:\n", + " f.writelines(cfg)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in hclients:\n", + " !scp tmpcfg {l}:~/tmpcfg.in\n", + " !ssh {l} \"cat ~/tmpcfg.in >> ~/.bashrc\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in hclients:\n", + " !ssh {l} tail -n10 ~/.bashrc" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## Configure Hadoop" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in hclients:\n", + " !ssh root@{l} \"apt install -y libiberty-dev libxml2-dev libkrb5-dev libgsasl7-dev libuuid1 uuid-dev > /dev/null 2>&1\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "### setup short-circuit " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in hclients:\n", + " !ssh root@{l} \"mkdir -p /var/lib/hadoop-hdfs/\"\n", + " !ssh root@{l} 'chown {user}:{user} /var/lib/hadoop-hdfs/'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "### enable security.authorization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "coresite='''\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " fs.default.name\n", + " hdfs://{:s}:8020\n", + " true\n", + " \n", + " \n", + " hadoop.security.authentication\n", + " simple\n", + " \n", + " \n", + " hadoop.security.authorization\n", + " true\n", + " \n", + "\n", + "'''.format(hostname)\n", + "\n", + "with open(f'/home/{user}/hadoop/etc/hadoop/core-site.xml','w') as f:\n", + " f.writelines(coresite)\n", + " \n", + "for l in clients:\n", + " !scp ~/hadoop/etc/hadoop/core-site.xml {l}:~/hadoop/etc/hadoop/core-site.xml >/dev/null 2>&1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "### set IP check, note the command \", \".join" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "hadooppolicy='''\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " security.service.authorization.default.hosts\n", + " {:s}\n", + " \n", + " \n", + " security.service.authorization.default.acl\n", + " {:s} {:s}\n", + " \n", + " \n", + " \n", + " \n", + " security.client.protocol.acl\n", + " *\n", + " ACL for ClientProtocol, which is used by user code\n", + " via the DistributedFileSystem.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " security.client.datanode.protocol.acl\n", + " *\n", + " ACL for ClientDatanodeProtocol, the client-to-datanode protocol\n", + " for block recovery.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " security.datanode.protocol.acl\n", + " *\n", + " ACL for DatanodeProtocol, which is used by datanodes to\n", + " communicate with the namenode.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " security.inter.datanode.protocol.acl\n", + " *\n", + " ACL for InterDatanodeProtocol, the inter-datanode protocol\n", + " for updating generation timestamp.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " security.namenode.protocol.acl\n", + " *\n", + " ACL for NamenodeProtocol, the protocol used by the secondary\n", + " namenode to communicate with the namenode.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " security.admin.operations.protocol.acl\n", + " *\n", + " ACL for AdminOperationsProtocol. Used for admin commands.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " security.refresh.user.mappings.protocol.acl\n", + " *\n", + " ACL for RefreshUserMappingsProtocol. Used to refresh\n", + " users mappings. The ACL is a comma-separated list of user and\n", + " group names. The user and group list is separated by a blank. For\n", + " e.g. \"alice,bob users,wheel\". A special value of \"*\" means all\n", + " users are allowed.\n", + " \n", + "\n", + " \n", + " security.refresh.policy.protocol.acl\n", + " *\n", + " ACL for RefreshAuthorizationPolicyProtocol, used by the\n", + " dfsadmin and mradmin commands to refresh the security policy in-effect.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " security.ha.service.protocol.acl\n", + " *\n", + " ACL for HAService protocol used by HAAdmin to manage the\n", + " active and stand-by states of namenode.\n", + " \n", + "\n", + " \n", + " security.zkfc.protocol.acl\n", + " *\n", + " ACL for access to the ZK Failover Controller\n", + " \n", + " \n", + "\n", + " \n", + " security.qjournal.service.protocol.acl\n", + " *\n", + " ACL for QJournalProtocol, used by the NN to communicate with\n", + " JNs when using the QuorumJournalManager for edit logs.\n", + " \n", + "\n", + " \n", + " security.mrhs.client.protocol.acl\n", + " *\n", + " ACL for HSClientProtocol, used by job clients to\n", + " communciate with the MR History Server job status etc.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + "\n", + " \n", + " security.resourcetracker.protocol.acl\n", + " *\n", + " ACL for ResourceTrackerProtocol, used by the\n", + " ResourceManager and NodeManager to communicate with each other.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " security.resourcemanager-administration.protocol.acl\n", + " *\n", + " ACL for ResourceManagerAdministrationProtocol, for admin commands.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " security.applicationclient.protocol.acl\n", + " *\n", + " ACL for ApplicationClientProtocol, used by the ResourceManager\n", + " and applications submission clients to communicate with each other.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " security.applicationmaster.protocol.acl\n", + " *\n", + " ACL for ApplicationMasterProtocol, used by the ResourceManager\n", + " and ApplicationMasters to communicate with each other.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " security.containermanagement.protocol.acl\n", + " *\n", + " ACL for ContainerManagementProtocol protocol, used by the NodeManager\n", + " and ApplicationMasters to communicate with each other.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " security.resourcelocalizer.protocol.acl\n", + " *\n", + " ACL for ResourceLocalizer protocol, used by the NodeManager\n", + " and ResourceLocalizer to communicate with each other.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " security.job.task.protocol.acl\n", + " *\n", + " ACL for TaskUmbilicalProtocol, used by the map and reduce\n", + " tasks to communicate with the parent tasktracker.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " security.job.client.protocol.acl\n", + " *\n", + " ACL for MRClientProtocol, used by job clients to\n", + " communciate with the MR ApplicationMaster to query job status etc.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " security.applicationhistory.protocol.acl\n", + " *\n", + " ACL for ApplicationHistoryProtocol, used by the timeline\n", + " server and the generic history service client to communicate with each other.\n", + " The ACL is a comma-separated list of user and group names. The user and\n", + " group list is separated by a blank. For e.g. \"alice,bob users,wheel\".\n", + " A special value of \"*\" means all users are allowed.\n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + "\n", + "'''.format((\",\").join(hclients),user,user)\n", + "\n", + "with open(f'/home/{user}/hadoop/etc/hadoop/hadoop-policy.xml','w') as f:\n", + " f.writelines(hadooppolicy)\n", + " \n", + "for l in clients:\n", + " !scp ~/hadoop/etc/hadoop/hadoop-policy.xml {l}:~/hadoop/etc/hadoop/hadoop-policy.xml >/dev/null 2>&1\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "### hdfs config, set replication to 1 to cache all the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "hdfs_data=\",\".join([f'{l}/{user}/hdfs/data' for l in datadir])\n", + "\n", + "hdfs_site=f'''\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " dfs.namenode.secondary.http-address\n", + " {hostname}:50090\n", + " \n", + " \n", + " dfs.namenode.name.dir\n", + " {datadir[0]}/{user}/hdfs/name\n", + " true\n", + " \n", + "\n", + " \n", + " dfs.datanode.data.dir\n", + " {hdfs_data}\n", + " true\n", + " \n", + "\n", + " \n", + " dfs.namenode.checkpoint.dir\n", + " {datadir[0]}/{user}/hdfs/namesecondary\n", + " true\n", + " \n", + " \n", + " dfs.name.handler.count\n", + " 100\n", + " \n", + " \n", + " dfs.blocksize\n", + " 128m\n", + "\n", + " \n", + " dfs.replication\n", + " 1\n", + "\n", + "\n", + "\n", + " dfs.client.read.shortcircuit\n", + " true\n", + "\n", + "\n", + "\n", + " dfs.domain.socket.path\n", + " /var/lib/hadoop-hdfs/dn_socket\n", + "\n", + "\n", + "\n", + "'''\n", + "\n", + "\n", + "with open(f'/home/{user}/hadoop/etc/hadoop/hdfs-site.xml','w') as f:\n", + " f.writelines(hdfs_site)\n", + " \n", + "for l in clients:\n", + " !scp ~/hadoop/etc/hadoop/hdfs-site.xml {l}:~/hadoop/etc/hadoop/hdfs-site.xml >/dev/null 2>&1\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "### mapreduce config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "mapreduce='''\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + " mapreduce.framework.name\n", + " yarn\n", + " \n", + "\n", + " \n", + " mapreduce.job.maps\n", + " 288\n", + " \n", + " \n", + " mapreduce.job.reduces\n", + " 64\n", + " \n", + "\n", + " \n", + " mapreduce.map.java.opts\n", + " -Xmx5120M -DpreferIPv4Stack=true\n", + " \n", + " \n", + " mapreduce.map.memory.mb\n", + " 6144\n", + " \n", + "\n", + " \n", + " mapreduce.reduce.java.opts\n", + " -Xmx5120M -DpreferIPv4Stack=true\n", + " \n", + " \n", + " mapreduce.reduce.memory.mb\n", + " 6144\n", + " \n", + " \n", + " yarn.app.mapreduce.am.staging-dir\n", + " /user\n", + " \n", + " \n", + " mapreduce.task.io.sort.mb\n", + " 2000\n", + " \n", + " \n", + " mapreduce.task.timeout\n", + " 3600000\n", + " \n", + "\n", + "\n", + " mapreduce.jobhistory.address\n", + " {:s}:10020\n", + "\n", + "\n", + "\n", + "'''.format(hostname)\n", + "\n", + "\n", + "with open(f'/home/{user}/hadoop/etc/hadoop/mapred-site.xml','w') as f:\n", + " f.writelines(mapreduce)\n", + " \n", + "for l in clients:\n", + " !scp ~/hadoop/etc/hadoop/mapred-site.xml {l}:~/hadoop/etc/hadoop/mapred-site.xml >/dev/null 2>&1\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "### yarn config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "yarn_site=f'''\n", + "\n", + "\n", + " \n", + " yarn.resourcemanager.hostname\n", + " {hostname}\n", + " \n", + " \n", + " yarn.resourcemanager.address\n", + " {hostname}:8032\n", + " \n", + " \n", + " yarn.resourcemanager.webapp.address\n", + " {hostname}:8088\n", + " \n", + " \n", + " yarn.nodemanager.resource.memory-mb\n", + " {mem_mib}\n", + " \n", + " \n", + " yarn.nodemanager.resource.cpu-vcores\n", + " {cpu_num}\n", + " \n", + " \n", + " yarn.nodemanager.pmem-check-enabled\n", + " false\n", + " \n", + "\n", + " \n", + " yarn.nodemanager.vmem-check-enabled\n", + " false\n", + " \n", + " \n", + " yarn.nodemanager.vmem-pmem-ratio\n", + " 4.1\n", + " \n", + " \n", + " yarn.nodemanager.aux-services\n", + " mapreduce_shuffle,spark_shuffle\n", + " \n", + "\n", + " \n", + " yarn.scheduler.minimum-allocation-mb\n", + " 1024\n", + " \n", + " \n", + " yarn.scheduler.maximum-allocation-mb\n", + " {mem_mib}\n", + " \n", + " \n", + " yarn.scheduler.minimum-allocation-vcores\n", + " 1\n", + " \n", + " \n", + " yarn.scheduler.maximum-allocation-vcores\n", + " {cpu_num}\n", + " \n", + "\n", + " \n", + " yarn.log-aggregation-enable\n", + " false\n", + " \n", + " \n", + " yarn.nodemanager.log.retain-seconds\n", + " 36000\n", + " \n", + " \n", + " yarn.nodemanager.delete.debug-delay-sec\n", + " 3600\n", + " \n", + " \n", + " yarn.log.server.url\n", + " http://{hostname}:19888/jobhistory/logs/\n", + " \n", + "\n", + " \n", + " yarn.nodemanager.log-dirs\n", + " /home/{user}/hadoop/logs/userlogs\n", + " \n", + " \n", + " yarn.nodemanager.local-dirs\n", + " {sparklocals}\n", + " \n", + " \n", + " \n", + " yarn.nodemanager.aux-services.spark_shuffle.class\n", + " org.apache.spark.network.yarn.YarnShuffleService\n", + " \n", + "\n", + "'''\n", + "\n", + "\n", + "with open(f'/home/{user}/hadoop/etc/hadoop/yarn-site.xml','w') as f:\n", + " f.writelines(yarn_site)\n", + " \n", + "for l in clients:\n", + " !scp ~/hadoop/etc/hadoop/yarn-site.xml {l}:~/hadoop/etc/hadoop/yarn-site.xml >/dev/null 2>&1\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "### hadoop-env" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "#config java home\n", + "if is_arm:\n", + " !echo \"export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-arm64\" >> ~/hadoop/etc/hadoop/hadoop-env.sh\n", + "else:\n", + " !echo \"export JAVA_HOME={java_home}\" >> ~/hadoop/etc/hadoop/hadoop-env.sh\n", + "\n", + "for l in clients:\n", + " !scp hadoop/etc/hadoop/hadoop-env.sh {l}:~/hadoop/etc/hadoop/ >/dev/null 2>&1\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "### workers config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "if clients:\n", + " with open(f'/home/{user}/hadoop/etc/hadoop/workers','w') as f:\n", + " f.writelines(\"\\n\".join(clients))\n", + " for l in clients:\n", + " !scp hadoop/etc/hadoop/workers {l}:~/hadoop/etc/hadoop/ >/dev/null 2>&1\n", + "else:\n", + " !echo {hostname} > ~/hadoop/etc/hadoop/workers" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "### Copy jar from Spark for external shuffle service" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in hclients:\n", + " !scp spark/yarn/spark-3.3.1-yarn-shuffle.jar {l}:~/hadoop/share/hadoop/common/lib/" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## Configure Spark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "eventlog_dir=f'hdfs://{hostname}:8020/tmp/sparkEventLog'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "sparkconf=f'''\n", + "spark.eventLog.enabled true\n", + "spark.eventLog.dir {eventlog_dir}\n", + "spark.history.fs.logDirectory {eventlog_dir}\n", + "'''\n", + "\n", + "with open(f'/home/{user}/spark/conf/spark-defaults.conf','w+') as f:\n", + " f.writelines(sparkconf)\n", + " \n", + "for l in clients:\n", + " !scp ~/spark/conf/spark-defaults.conf {l}:~/spark/conf/spark-defaults.conf >/dev/null 2>&1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "sparkenv = f'export SPARK_LOCAL_DIRS={sparklocals}\\n'\n", + "with open(f'/home/{user}/.bashrc', 'a+') as f:\n", + " f.writelines(sparkenv)\n", + "for l in clients:\n", + " !scp ~/.bashrc {l}:~/.bashrc >/dev/null 2>&1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in hclients:\n", + " !ssh {l} tail -n10 ~/.bashrc" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# Configure startup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "startup=f'''#!/bin/bash\n", + "echo -1 > /proc/sys/kernel/perf_event_paranoid\n", + "echo 0 > /proc/sys/kernel/kptr_restrict\n", + "echo madvise >/sys/kernel/mm/transparent_hugepage/enabled\n", + "echo 1 > /proc/sys/kernel/numa_balancing\n", + "end=$(($(nproc) - 1))\n", + "for i in $(seq 0 $end); do echo performance > /sys/devices/system/cpu/cpu$i/cpufreq/scaling_governor; done\n", + "for file in $(find /sys/devices/system/cpu/cpu*/power/energy_perf_bias); do echo \"0\" > $file; done\n", + "'''\n", + "\n", + "with open('/tmp/tmpstartup', 'w') as f:\n", + " f.writelines(startup)\n", + "\n", + "startup_service=f'''[Unit]\n", + "Description=Configure Transparent Hugepage, Auto NUMA Balancing, CPU Freq Scaling Governor\n", + "\n", + "[Service]\n", + "ExecStart=/usr/local/bin/mystartup.sh\n", + "\n", + "[Install]\n", + "WantedBy=multi-user.target\n", + "'''\n", + "\n", + "with open('/tmp/tmpstartup_service', 'w') as f:\n", + " f.writelines(startup_service)\n", + " \n", + "for l in hclients:\n", + " !scp /tmp/tmpstartup $l:/tmp/tmpstartup\n", + " !scp /tmp/tmpstartup_service $l:/tmp/tmpstartup_service\n", + " !ssh root@$l \"cat /tmp/tmpstartup > /usr/local/bin/mystartup.sh\"\n", + " !ssh root@$l \"chmod +x /usr/local/bin/mystartup.sh\"\n", + " !ssh root@$l \"cat /tmp/tmpstartup_service > /etc/systemd/system/mystartup.service\"\n", + " !ssh $l \"sudo systemctl enable mystartup.service\"\n", + " !ssh $l \"sudo systemctl start mystartup.service\"\n", + " !ssh $l \"sudo systemctl status mystartup.service\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "deletable": false, + "editable": false, + "heading_collapsed": true, + "hidden": true, + "run_control": { + "frozen": true + } + }, + "source": [ + "## Inspect CPU Freq & HT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "hidden": true, + "run_control": { + "frozen": true + } + }, + "outputs": [], + "source": [ + "if is_arm:\n", + " t = r'''\n", + " #include \n", + " #include \n", + " #include \n", + " #include \n", + " #include \n", + " #include \n", + " #include \n", + " #include \n", + " #include //used for parsing the command line arguments\n", + " #include //used for opening the memory device file\n", + " #include //used for rounding functions\n", + " #include \n", + " #include \n", + " #include \n", + "\n", + " static inline uint64_t GetTickCount()\n", + " {//Return ns counts\n", + " struct timeval tp;\n", + " gettimeofday(&tp,NULL);\n", + " return tp.tv_sec*1000+tp.tv_usec/1000;\n", + " }\n", + "\n", + " uint64_t CNT=CNT_DEF;\n", + "\n", + " int main()\n", + " {\n", + "\n", + " uint64_t start, end;\n", + " start=end=GetTickCount();\n", + "\n", + " asm volatile (\n", + " \"1:\\n\"\n", + " \"SUBS %0,%0,#1\\n\"\n", + " \"bne 1b\\n\"\n", + " ::\"r\"(CNT)\n", + " );\n", + "\n", + " end=GetTickCount();\n", + "\n", + " printf(\" total time = %lu, freq = %lu \\n\", end-start, CNT/(end-start)/1000);\n", + "\n", + " return 0;\n", + " }\n", + " '''\n", + "else:\n", + " t=r'''\n", + " #include \n", + " #include \n", + " #include \n", + " #include \n", + " #include \n", + " #include \n", + " #include \n", + " #include \n", + " #include //used for parsing the command line arguments\n", + " #include //used for opening the memory device file\n", + " #include //used for rounding functions\n", + " #include \n", + " #include \n", + " #include \n", + "\n", + " static inline uint64_t GetTickCount()\n", + " {//Return ns counts\n", + " struct timeval tp;\n", + " gettimeofday(&tp,NULL);\n", + " return tp.tv_sec*1000+tp.tv_usec/1000;\n", + " }\n", + "\n", + " uint64_t CNT=CNT_DEF;\n", + "\n", + " int main()\n", + " {\n", + "\n", + " uint64_t start, end;\n", + " start=end=GetTickCount();\n", + "\n", + " asm volatile (\n", + " \"1:\\n\"\n", + " \"dec %0\\n\"\n", + " \"jnz 1b\\n\"\n", + " ::\"r\"(CNT)\n", + " );\n", + "\n", + " end=GetTickCount();\n", + "\n", + " printf(\" total time = %lu, freq = %lu \\n\", end-start, CNT/(end-start)/1000);\n", + "\n", + " return 0;\n", + " }\n", + " '''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "hidden": true, + "run_control": { + "frozen": true + } + }, + "outputs": [], + "source": [ + "%cd ~\n", + "with open(\"t.c\", 'w') as f:\n", + " f.writelines(t)\n", + "!gcc -O3 -DCNT_DEF=10000000000LL -o t t.c; gcc -O3 -DCNT_DEF=1000000000000LL -o t.delay t.c;\n", + "!for j in `seq 1 $(nproc)`; do echo -n $j; (for i in `seq 1 $j`; do taskset -c $i ./t.delay & done); sleep 1; ./t; killall t.delay; sleep 2; done" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# Shutdown Jupyter; source ~/.bashrc; reboot Jupyter; run section [Initialize](#Initialize)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# Build gluten" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## Install docker" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "# Instructions from https://docs.docker.com/engine/install/ubuntu/\n", + "\n", + "# Add Docker's official GPG key:\n", + "!sudo -E apt-get update\n", + "!sudo -E apt-get install ca-certificates curl\n", + "!sudo -E install -m 0755 -d /etc/apt/keyrings\n", + "!sudo -E curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc\n", + "!sudo chmod a+r /etc/apt/keyrings/docker.asc\n", + "\n", + "# Add the repository to Apt sources:\n", + "!echo \\\n", + " \"deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \\\n", + " $(. /etc/os-release && echo \"$VERSION_CODENAME\") stable\" | \\\n", + " sudo -E tee /etc/apt/sources.list.d/docker.list > /dev/null\n", + "!sudo -E apt-get update" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!sudo -E apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin >/dev/null 2>&1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "import os\n", + "http_proxy=os.getenv('http_proxy')\n", + "https_proxy=os.getenv('https_proxy')\n", + "\n", + "if http_proxy or https_proxy:\n", + " !sudo mkdir -p /etc/systemd/system/docker.service.d\n", + " with open('/tmp/http-proxy.conf', 'w') as f:\n", + " s = '''\n", + "[Service]\n", + "{}\n", + "{}\n", + "'''.format(f'Environment=\"HTTP_PROXY={http_proxy}\"' if http_proxy else '', f'Environment=\"HTTPS_PROXY={https_proxy}\"' if https_proxy else '')\n", + " f.writelines(s)\n", + " !sudo cp /tmp/http-proxy.conf /etc/systemd/system/docker.service.d" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!mkdir -p ~/.m2\n", + "\n", + "def get_proxy(proxy):\n", + " pos0 = proxy.rfind('/')\n", + " pos = proxy.rfind(':')\n", + " host = http_proxy[pos0+1:pos]\n", + " port = http_proxy[pos+1:]\n", + " return host, port\n", + "\n", + "if http_proxy or https_proxy:\n", + " with open(f\"/home/{user}/.m2/settings.xml\",\"w+\") as f:\n", + " f.write('''\n", + "\n", + " ''')\n", + " if http_proxy:\n", + " host, port = get_proxy(http_proxy)\n", + " f.write(f'''\n", + " \n", + " http_proxy\n", + " true\n", + " http\n", + " {host}\n", + " {port}\n", + " ''')\n", + " if https_proxy:\n", + " host, port = get_proxy(http_proxy)\n", + " f.write(f'''\n", + " \n", + " https_proxy\n", + " true\n", + " https\n", + " {host}\n", + " {port}\n", + " ''')\n", + " f.write('''\n", + " \n", + "\n", + "''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!sudo systemctl daemon-reload" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!sudo systemctl restart docker.service" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## Build gluten" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!sudo docker pull apache/gluten:vcpkg-centos-7" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "import os\n", + "http_proxy=os.getenv('http_proxy')\n", + "https_proxy=os.getenv('https_proxy')\n", + "\n", + "container=!sudo docker run -e http_proxy={http_proxy} -e https_proxy={https_proxy} -itd apache/gluten:vcpkg-centos-7\n", + "containerid = container[0]\n", + "containerid" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!sudo docker exec {containerid} bash -c \"cd /opt && git clone https://github.com/apache/incubator-gluten.git gluten\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!sudo docker exec {containerid} bash -c \"cd /opt && source /opt/rh/devtoolset-9/enable && cd gluten && ./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --enable_hdfs=ON > build.log\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "import os\n", + "if os.path.exists(f'/home/{user}/.m2/settings.xml'):\n", + " !sudo docker exec {containerid} bash -c \"mkdir -p ~/.m2\"\n", + " !sudo docker cp /home/{user}/.m2/settings.xml {containerid}:/root/.m2/settings.xml" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!sudo docker exec {containerid} bash -c \"cd /opt/gluten && mvn clean package -DskipTests -Pspark-3.3 -Pbackends-velox\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!sudo docker cp {containerid}:/opt/gluten/package/target/gluten-velox-bundle-spark3.3_2.12-centos_7_x86_64-1.3.0-SNAPSHOT.jar ~/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in clients:\n", + " !scp ~/gluten-velox-bundle-spark3.3_2.12-centos_7_x86_64-1.3.0-SNAPSHOT.jar {l}:~/" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# Generate data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## Build spark-sql-perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!echo \"deb https://repo.scala-sbt.org/scalasbt/debian all main\" | sudo tee /etc/apt/sources.list.d/sbt.list\n", + "!echo \"deb https://repo.scala-sbt.org/scalasbt/debian /\" | sudo tee /etc/apt/sources.list.d/sbt_old.list\n", + "!curl -sL \"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823\" | sudo apt-key add\n", + "!sudo -E apt-get update > /dev/null 2>&1\n", + "!sudo -E apt-get install sbt > /dev/null 2>&1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "import os\n", + "http_proxy=os.getenv('http_proxy')\n", + "https_proxy=os.getenv('https_proxy')\n", + "\n", + "def get_proxy(proxy):\n", + " pos0 = proxy.rfind('/')\n", + " pos = proxy.rfind(':')\n", + " host = http_proxy[pos0+1:pos]\n", + " port = http_proxy[pos+1:]\n", + " return host, port\n", + "\n", + "sbt_opts=''\n", + "\n", + "if http_proxy:\n", + " host, port = get_proxy(http_proxy)\n", + " sbt_opts = f'{sbt_opts} -Dhttp.proxyHost={host} -Dhttp.proxyPort={port}'\n", + "if https_proxy:\n", + " host, port = get_proxy(https_proxy)\n", + " sbt_opts = f'{sbt_opts} -Dhttps.proxyHost={host} -Dhttps.proxyPort={port}'\n", + " \n", + "if sbt_opts:\n", + " %env SBT_OPTS={sbt_opts}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!git clone https://github.com/databricks/spark-sql-perf.git ~/spark-sql-perf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!cd ~/spark-sql-perf && sbt package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!cp ~/spark-sql-perf/target/scala-2.12/spark-sql-perf_2.12-0.5.1-SNAPSHOT.jar ~/ipython/" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## Start Hadoop/Spark cluster, Spark history server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!~/hadoop/bin/hadoop namenode -format" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!~/hadoop/bin/hadoop datanode -format " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!~/hadoop/sbin/start-dfs.sh" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!hadoop dfsadmin -safemode leave" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!hadoop fs -mkdir -p /tmp/sparkEventLog" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!cd ~/spark && sbin/start-history-server.sh" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "master=''\n", + "if clients:\n", + " !~/hadoop/sbin/start-yarn.sh\n", + " master='yarn'\n", + "else:\n", + " # If we run on single node, we use standalone mode\n", + " !{os.environ['SPARK_HOME']}/sbin/stop-slave.sh\n", + " !{os.environ['SPARK_HOME']}/sbin/stop-master.sh\n", + " !{os.environ['SPARK_HOME']}/sbin/start-master.sh\n", + " !{os.environ['SPARK_HOME']}/sbin/start-worker.sh spark://{hostname}:7077 -c {cpu_num}\n", + " master=f'spark://{hostname}:7077'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!jps\n", + "for l in clients:\n", + " !ssh {l} jps" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## TPCH" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!rm -rf ~/tpch-dbgen\n", + "!git clone https://github.com/databricks/tpch-dbgen.git ~/tpch-dbgen" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in clients:\n", + " !scp -r ~/tpch-dbgen {l}:~/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in hclients:\n", + " !ssh {l} cd ~/tpch-dbgen && git checkout 0469309147b42abac8857fa61b4cf69a6d3128a8 && make clean && make OS=LINUX" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "%cd ~/gluten/tools/workload/tpch/gen_data/parquet_dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "# Suggest 2x cpu# partitions.\n", + "scaleFactor = 1500\n", + "numPartitions = 2*cpu_num if len(clients)==0 else len(clients)*2*cpu_num\n", + "dataformat = \"parquet\" # data format of data source\n", + "dataSourceCodec = \"snappy\"\n", + "rootDir = f\"/tpch_sf{scaleFactor}_{dataformat}_{dataSourceCodec}\" # root directory of location to create data in." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "# Verify parameters\n", + "print(f'scaleFactor = {scaleFactor}')\n", + "print(f'numPartitions = {numPartitions}')\n", + "print(f'dataformat = {dataformat}')\n", + "print(f'rootDir = {rootDir}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "scala=f'''import com.databricks.spark.sql.perf.tpch._\n", + "\n", + "\n", + "val scaleFactor = \"{scaleFactor}\" // scaleFactor defines the size of the dataset to generate (in GB).\n", + "val numPartitions = {numPartitions} // how many dsdgen partitions to run - number of input tasks.\n", + "\n", + "val format = \"{dataformat}\" // valid spark format like parquet \"parquet\".\n", + "val rootDir = \"{rootDir}\" // root directory of location to create data in.\n", + "val dbgenDir = \"/home/{user}/tpch-dbgen\" // location of dbgen\n", + "\n", + "val tables = new TPCHTables(spark.sqlContext,\n", + " dbgenDir = dbgenDir,\n", + " scaleFactor = scaleFactor,\n", + " useDoubleForDecimal = false, // true to replace DecimalType with DoubleType\n", + " useStringForDate = false) // true to replace DateType with StringType\n", + "\n", + "\n", + "tables.genData(\n", + " location = rootDir,\n", + " format = format,\n", + " overwrite = true, // overwrite the data that is already there\n", + " partitionTables = false, // do not create the partitioned fact tables\n", + " clusterByPartitionColumns = false, // shuffle to get partitions coalesced into single files.\n", + " filterOutNullPartitionValues = false, // true to filter out the partition with NULL key value\n", + " tableFilter = \"\", // \"\" means generate all tables\n", + " numPartitions = numPartitions) // how many dsdgen partitions to run - number of input tasks.\n", + "'''\n", + "\n", + "with open(\"tpch_datagen_parquet.scala\",\"w\") as f:\n", + " f.writelines(scala)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "executor_cores = 8\n", + "num_executors=cpu_num/executor_cores\n", + "executor_memory = (totalmemory - 10*1024*1024)/num_executors - 1*1024*1024\n", + "\n", + "# Verify parameters\n", + "print(f'--master {master}')\n", + "print(f'--num-executors {int(num_executors)}')\n", + "print(f'--executor-cores {int(executor_cores)}')\n", + "print(f'--executor-memory {int(executor_memory)}k')\n", + "print(f'--conf spark.sql.shuffle.partitions={numPartitions}')\n", + "print(f'--conf spark.sql.parquet.compression.codec={dataSourceCodec}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "tpch_datagen_parquet=f'''\n", + "cat tpch_datagen_parquet.scala | {os.environ['SPARK_HOME']}/bin/spark-shell \\\n", + " --master {master} \\\n", + " --name tpch_gen_parquet \\\n", + " --driver-memory 10g \\\n", + " --num-executors {int(num_executors)} \\\n", + " --executor-cores {int(executor_cores)} \\\n", + " --executor-memory {int(executor_memory)}k \\\n", + " --conf spark.executor.memoryOverhead=1g \\\n", + " --conf spark.sql.broadcastTimeout=4800 \\\n", + " --conf spark.driver.maxResultSize=4g \\\n", + " --conf spark.sql.shuffle.partitions={numPartitions} \\\n", + " --conf spark.sql.parquet.compression.codec={dataSourceCodec} \\\n", + " --conf spark.network.timeout=800s \\\n", + " --conf spark.executor.heartbeatInterval=200s \\\n", + " --jars /home/{user}/ipython/spark-sql-perf_2.12-0.5.1-SNAPSHOT.jar \\\n", + "'''\n", + "\n", + "with open(\"tpch_datagen_parquet.sh\",\"w\") as f:\n", + " f.writelines(tpch_datagen_parquet)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!nohup bash tpch_datagen_parquet.sh" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## TPCDS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!rm -rf ~/tpcds-kit\n", + "!git clone https://github.com/databricks/tpcds-kit.git ~/tpcds-kit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in clients:\n", + " !scp -r ~/tpcds-kit {l}:~/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in hclients:\n", + " !ssh {l} \"cd ~/tpcds-kit/tools && make clean && make OS=LINUX CC=gcc-9\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "%cd ~/gluten/tools/workload/tpcds/gen_data/parquet_dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "# Suggest 2x cpu# partitions\n", + "scaleFactor = 1500\n", + "numPartitions = 2*cpu_num if len(clients)==0 else len(clients)*2*cpu_num\n", + "dataformat = \"parquet\" # data format of data source\n", + "dataSourceCodec = \"snappy\"\n", + "rootDir = f\"/tpcds_sf{scaleFactor}_{dataformat}_{dataSourceCodec}\" # root directory of location to create data in." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "# Verify parameters\n", + "print(f'scaleFactor = {scaleFactor}')\n", + "print(f'numPartitions = {numPartitions}')\n", + "print(f'dataformat = {dataformat}')\n", + "print(f'rootDir = {rootDir}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "scala=f'''import com.databricks.spark.sql.perf.tpcds._\n", + "\n", + "val scaleFactor = \"{scaleFactor}\" // scaleFactor defines the size of the dataset to generate (in GB).\n", + "val numPartitions = {numPartitions} // how many dsdgen partitions to run - number of input tasks.\n", + "\n", + "val format = \"{dataformat}\" // valid spark format like parquet \"parquet\".\n", + "val rootDir = \"{rootDir}\" // root directory of location to create data in.\n", + "val dsdgenDir = \"/home/{user}/tpcds-kit/tools/\" // location of dbgen\n", + "\n", + "val tables = new TPCDSTables(spark.sqlContext,\n", + " dsdgenDir = dsdgenDir,\n", + " scaleFactor = scaleFactor,\n", + " useDoubleForDecimal = false, // true to replace DecimalType with DoubleType\n", + " useStringForDate = false) // true to replace DateType with StringType\n", + "\n", + "\n", + "tables.genData(\n", + " location = rootDir,\n", + " format = format,\n", + " overwrite = true, // overwrite the data that is already there\n", + " partitionTables = true, // create the partitioned fact tables\n", + " clusterByPartitionColumns = true, // shuffle to get partitions coalesced into single files.\n", + " filterOutNullPartitionValues = false, // true to filter out the partition with NULL key value\n", + " tableFilter = \"\", // \"\" means generate all tables\n", + " numPartitions = numPartitions) // how many dsdgen partitions to run - number of input tasks.\n", + "'''\n", + "\n", + "with open(\"tpcds_datagen_parquet.scala\",\"w\") as f:\n", + " f.writelines(scala)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "executor_cores = 8\n", + "num_executors=cpu_num/executor_cores\n", + "executor_memory = (totalmemory - 10*1024*1024)/num_executors - 1*1024*1024\n", + "\n", + "# Verify parameters\n", + "print(f'--master {master}')\n", + "print(f'--num-executors {int(num_executors)}')\n", + "print(f'--executor-cores {int(executor_cores)}')\n", + "print(f'--executor-memory {int(executor_memory)}k')\n", + "print(f'--conf spark.sql.shuffle.partitions={numPartitions}')\n", + "print(f'--conf spark.sql.parquet.compression.codec={dataSourceCodec}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "tpcds_datagen_parquet=f'''\n", + "cat tpcds_datagen_parquet.scala | {os.environ['SPARK_HOME']}/bin/spark-shell \\\n", + " --master {master} \\\n", + " --name tpcds_gen_parquet \\\n", + " --driver-memory 10g \\\n", + " --num-executors {int(num_executors)} \\\n", + " --executor-cores {int(executor_cores)} \\\n", + " --executor-memory {int(executor_memory)}k \\\n", + " --conf spark.executor.memoryOverhead=1g \\\n", + " --conf spark.sql.broadcastTimeout=4800 \\\n", + " --conf spark.driver.maxResultSize=4g \\\n", + " --conf spark.sql.shuffle.partitions={numPartitions} \\\n", + " --conf spark.sql.parquet.compression.codec={dataSourceCodec} \\\n", + " --conf spark.network.timeout=800s \\\n", + " --conf spark.executor.heartbeatInterval=200s \\\n", + " --jars /home/{user}/ipython/spark-sql-perf_2.12-0.5.1-SNAPSHOT.jar \\\n", + "'''\n", + "\n", + "with open(\"tpcds_datagen_parquet.sh\",\"w\") as f:\n", + " f.writelines(tpcds_datagen_parquet)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!nohup bash tpcds_datagen_parquet.sh" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# Install Trace-Viewer (optional)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Clone the master branch\n", + "```\n", + "cd ~\n", + "git clone https://github.com/catapult-project/catapult.git -b master\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Trace-Viewer requires python version 2.7. Create a virtualenv for python2.7\n", + "```\n", + "sudo apt install -y python2.7\n", + "virtualenv -p /usr/bin/python2.7 py27\n", + "source py27/bin/activate\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Apply patch\n", + "\n", + "```\n", + "cd catapult\n", + "```\n", + "```\n", + "git apply <\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%html\n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%html\n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%html\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# System Settings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "\n", + "pd.set_option('display.max_rows', None)\n", + "\n", + "# Convert the os.environ object to a dictionary and then to a DataFrame\n", + "env_df = pd.DataFrame(list(dict(os.environ).items()), columns=['Environment Variable', 'Value'])\n", + "\n", + "# Display the DataFrame\n", + "from IPython.display import display\n", + "\n", + "display(env_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import socket\n", + "localhost=socket.gethostname()\n", + "local_ip=socket.gethostbyname(localhost)\n", + "\n", + "print(f'localhost: {localhost}')\n", + "print(f'ip: {local_ip}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spark_version=!head -n1 $SPARK_HOME/RELEASE | awk '{print $2}'\n", + "spark_version = spark_version[0]\n", + "\n", + "print(f\"Spark version from SPARK_HOME: {spark_version}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import sys\n", + "\n", + "logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.ERROR, stream=sys.stdout)\n", + "logger = logging.getLogger()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "hdfs_event_dir=''\n", + "local_event_dir=''\n", + "\n", + "def get_spark_eventlog_dir(path):\n", + " eventlog_dir = None\n", + " eventlog_enabled = False\n", + " try:\n", + " with open(path, 'r') as f:\n", + " for line in f:\n", + " if line.startswith('spark.eventLog.dir'):\n", + " eventlog_dir = line.split(' ')[-1].strip()\n", + " elif line.startswith('spark.eventLog.enabled'):\n", + " eventlog_enabled = line.split(' ')[-1].strip().lower() == 'true'\n", + " except FileNotFoundError:\n", + " raise SystemExit(f\"'spark-defaults.conf' not found: {path}\")\n", + " if not eventlog_enabled:\n", + " raise SystemExit(\"'spark.eventLog.enabled' must be enabled.\")\n", + " return eventlog_dir\n", + "\n", + "spark_defaults_conf = None\n", + "\n", + "if 'SPARK_CONF_DIR' in os.environ:\n", + " spark_defaults_conf = os.path.join(os.environ['SPARK_CONF_DIR'], 'spark-defaults.conf')\n", + "elif 'SPARK_HOME' in os.environ:\n", + " spark_defaults_conf = os.path.join(os.environ['SPARK_HOME'], 'conf', 'spark-defaults.conf')\n", + "\n", + "if spark_defaults_conf:\n", + " event_log_dir = get_spark_eventlog_dir(spark_defaults_conf)\n", + " if event_log_dir:\n", + " print(f\"spark.eventLog.dir: {event_log_dir}\")\n", + " if event_log_dir[:7] == 'hdfs://':\n", + " hdfs_event_dir = event_log_dir\n", + " elif event_log_dir[:6] == 'file:/':\n", + " local_event_dir = event_log_dir[6:]\n", + " else:\n", + " raise SystemExit(f\"'spark.eventLog.dir' is not configured in {spark_defaults_conf}\")\n", + "else:\n", + " raise SystemExit(\"Cannot get `spark.eventLog.dir`. Neither SPARK_CONF_DIR nor SPARK_HOME defined in envrionment variables.\")\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Monitor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import findspark\n", + "import os\n", + "\n", + "findspark.init(os.environ['SPARK_HOME'])\n", + "os.environ.setdefault('SPARK_SUBMIT_OPTS', '-Dscala.usejavacp=true')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "import atexit\n", + "import collections\n", + "import gzip\n", + "import importlib\n", + "import json\n", + "import logging\n", + "import math\n", + "import os\n", + "import pathlib\n", + "import shutil\n", + "import signal\n", + "import subprocess\n", + "import tempfile\n", + "import threading\n", + "import time\n", + "import timeit\n", + "import traceback\n", + "\n", + "import matplotlib\n", + "import matplotlib.colors as colors\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.ticker as mtick\n", + "import numpy as np\n", + "import pandas as pd\n", + "import platform\n", + "import pyspark\n", + "import pyspark.sql.functions as F\n", + "import pyspark.sql.types as T\n", + "import spylon_kernel\n", + "from collections import namedtuple\n", + "from concurrent.futures import ThreadPoolExecutor\n", + "from datetime import date\n", + "from functools import reduce\n", + "from IPython.display import display, HTML\n", + "from matplotlib import rcParams\n", + "from pyspark import SparkConf, SparkContext\n", + "from pyspark.ml import Pipeline\n", + "from pyspark.ml.feature import StringIndexer, VectorAssembler\n", + "from pyspark.sql import SparkSession, SQLContext, Window\n", + "from pyspark.sql.functions import col, floor, lit, rank, to_date\n", + "from pyspark.sql.types import (DoubleType, FloatType, IntegerType,\n", + " StringType, StructField, StructType,\n", + " TimestampType)\n", + "\n", + "from spylon_kernel import register_ipython_magics\n", + "from spylon.spark.utils import SparkJVMHelpers\n", + "\n", + "register_ipython_magics()\n", + "\n", + "rcParams['font.sans-serif'] = 'Courier New'\n", + "rcParams['font.family'] = 'Courier New'\n", + "rcParams['font.size'] = '12'\n", + "\n", + "%matplotlib inline\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [] + }, + "outputs": [], + "source": [ + "import socket\n", + "import os\n", + "import sys\n", + "\n", + "from pathlib import Path\n", + "home = str(Path.home())\n", + "\n", + "def upload_profile(server, base_dir, appid):\n", + " local_profile_dir = os.path.join(home, 'profile')\n", + " !mkdir -p {local_profile_dir}\n", + " !cd {local_profile_dir}; rm -f {appid}.tar.gz; tar zcvf {appid}.tar.gz {appid} >/dev/null 2>&1\n", + " \n", + " server_local_dir=os.path.join('PAUS', base_dir)\n", + " server_local_profile_dir=os.path.join(server_local_dir, 'profile')\n", + " server_hdfs_dir=f'/{base_dir}/'\n", + "\n", + " !ssh {server} \"mkdir -p {server_local_profile_dir}\"\n", + " !ssh {server} \"cd {server_local_profile_dir}; rm {appid}.tar.gz; rm -r {appid} >/dev/null 2>&1\"\n", + " !scp {local_profile_dir}/{appid}.tar.gz {server}:{server_local_profile_dir}/\n", + " !ssh {server} \"cd {server_local_profile_dir} && tar zxf {appid}.tar.gz\"\n", + " !ssh {server} \"hdfs dfs -mkdir -p {server_hdfs_dir}; hdfs dfs -rm -r {server_hdfs_dir}{appid}; hdfs dfs -put {server_local_profile_dir}/{appid} {server_hdfs_dir}\"\n", + " !ssh {server} \"cd {server_local_profile_dir}; rm {appid}.tar.gz; rm -r {appid}\"\n", + "\n", + "def killsar(clients):\n", + " for l in clients:\n", + " out=!ssh $l \"ps aux | grep -w sar | grep -v grep | tr -s ' ' | cut -d' ' -f2\"\n", + " for x in out:\n", + " !ssh $l \"kill $x > /dev/null 2>&1\"\n", + " for l in clients:\n", + " out=!ssh $l \"ps aux | grep -w pidstat | grep -v grep | tr -s ' ' | cut -d' ' -f2\"\n", + " for x in out:\n", + " !ssh $l \"kill $x > /dev/null 2>&1\"\n", + " for l in clients:\n", + " out=!ssh $l \"ps aux | grep -w perf | grep -v grep | tr -s ' ' | cut -d' ' -f2\"\n", + " for x in out:\n", + " !ssh root@$l \"kill $x > /dev/null 2>&1\"\n", + "\n", + "def killnumactl(clients):\n", + " for l in clients:\n", + " out =!ssh $l \"ps aux | grep numactl | grep bash | tr -s ' ' | cut -d' ' -f2\"\n", + " for x in out:\n", + " !ssh $l \"kill $x > /dev/null 2>&1\"\n", + "\n", + "def startmonitor(clients,appid,**kwargs):\n", + " local_profile_dir=os.path.join(home, 'profile')\n", + " prof=os.path.join(local_profile_dir, appid)\n", + " !mkdir -p {prof}\n", + " \n", + " for l in clients:\n", + " !ssh root@{l} date\n", + " \n", + " killsar(clients)\n", + " \n", + " perfsyscalls=kwargs.get(\"collect_perf_syscall\",None)\n", + " \n", + " for l in clients:\n", + " prof_client=os.path.join(prof, l)\n", + " !mkdir -p {prof_client}\n", + " !ssh {l} mkdir -p {prof_client}\n", + " !ssh {l} \"sar -o {prof_client}/sar.bin -r -u -d -B -n DEV 1 >/dev/null 2>&1 &\"\n", + " !ssh root@{l} \"jps | grep CoarseGrainedExecutorBackend | cut -d' ' -f 1 | xargs -I % bash -c '(cat /proc/%/status >> {prof_client}/%.stat; cat /proc/%/io >> {prof_client}/%.stat)'\"\n", + " if kwargs.get(\"collect_pid\",False):\n", + " !ssh {l} \"jps | grep CoarseGrainedExecutorBackend | head -n 1 | cut -d' ' -f 1 | xargs -I % pidstat -h -t -p % 1 > {prof_client}/pidstat.out 2>/dev/null &\"\n", + " !ssh root@{l} 'cat /proc/uptime | cut -d\" \" -f 1 | xargs -I ^ date -d \"- ^ seconds\" +%s.%N' > $prof/$l/uptime.txt\n", + " if kwargs.get(\"collect_sched\",False):\n", + " !ssh root@{l} 'perf trace -e \"sched:sched_switch\" -C 8-15 -o {prof_client}/sched.txt -T -- sleep 10000 >/dev/null 2>/dev/null &'\n", + " if perfsyscalls is not None:\n", + " !ssh root@{l} \"perf stat -e 'syscalls:sys_exit_poll,syscalls:sys_exit_epoll_wait' -a -I 1000 -o {prof_client}/perfstat.txt >/dev/null 2>&1 & \"\n", + " if kwargs.get(\"collect_hbm\",False):\n", + " hbm_nodes = kwargs.get(\"hbm_nodes\")\n", + " if hbm_nodes is not None:\n", + " print(\"collect_hbm\")\n", + " hbm_nodes = '\\|'.join([\"node \" + str(i) for i in hbm_nodes])\n", + " %env hbm_numa_nodes={hbm_nodes}\n", + " %env hbm_l = {l}\n", + " %env hbm_prof = {prof}\n", + " !ssh $hbm_l \"echo timestamp, size, free > $hbm_prof/$hbm_l/numactl.csv\"\n", + " !ssh $hbm_l \"while :; do echo \\$(numactl -H | grep '$hbm_numa_nodes' | grep 'size' | awk '{ print \\$4 }' | awk '{ s += \\$1 } END { print s }'), \\$(numactl -H | grep '$hbm_numa_nodes' | grep 'free' | awk '{ print \\$4 }' | awk '{ s += \\$1 } END { print s }') | ts '%Y-%m-%d %H:%M:%S,' >> $hbm_prof/$hbm_l/numactl.csv; sleep 1; done >/dev/null 2>&1 &\"\n", + " else:\n", + " print(\"Missing argument: hbm_nodes. e.g. hbm_nodes = list(range(8,16))\")\n", + " return prof\n", + "\n", + "def stopmonitor(clients, sc, appid, **kwargs):\n", + " %cd ~\n", + " \n", + " local_profile_dir=os.path.join(home, 'profile')\n", + " prof=os.path.join(local_profile_dir, appid)\n", + " !mkdir -p {prof}\n", + "\n", + " killsar(clients)\n", + " killnumactl(clients) \n", + " \n", + " with open(f\"{prof}/starttime\",\"w\") as f:\n", + " f.write(\"{:d}\".format(int(time.time()*1000)))\n", + " \n", + " for l in clients:\n", + " prof_client=os.path.join(prof, l)\n", + " !ssh {l} \"sar -f {prof_client}/sar.bin -r > {prof_client}/sar_mem.sar;sar -f {prof_client}/sar.bin -u > {prof_client}/sar_cpu.sar;sar -f {prof_client}/sar.bin -d -p > {prof_client}/sar_disk.sar;sar -f {prof_client}/sar.bin -n DEV > {prof_client}/sar_nic.sar;sar -f {prof_client}/sar.bin -B > {prof_client}/sar_page.sar;\" \n", + " !ssh root@{l} \"jps | grep CoarseGrainedExecutorBackend | cut -d' ' -f 1 | xargs -I % bash -c '(cat /proc/%/status >> {prof_client}/%.stat; cat /proc/%/io >> {prof_client}/%.stat)'\"\n", + " !ssh {l} \"sar -V \" > {prof_client}/sarv.txt\n", + " !test -f {prof_client}/perfstat.txt && head -n 1 {prof_client}/perfstat.txt > {prof_client}/perfstarttime\n", + " if l!= socket.gethostname():\n", + " !scp -r {l}:{prof_client} {prof}/ > /dev/null 2>&1\n", + " \n", + " if sc is not None:\n", + " sc.stop()\n", + " \n", + " if hdfs_event_dir != '':\n", + " !hadoop fs -copyToLocal {hdfs_event_dir}/{appid} {prof}/app.log\n", + " elif local_event_dir != '':\n", + " !cp {local_event_dir}/{appid} {prof}/app.log" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def pinexecutor_numa(clients):\n", + " cpunum = !ssh {clients[0]} \"grep 'processor' /proc/cpuinfo | wc -l\"\n", + " cpunum = int(cpunum[0])\n", + " \n", + " numanodes=!ssh {clients[0]} \"cat /sys/devices/system/node/node*/cpulist\"\n", + " numanodes = list(filter(lambda x: x != '', numanodes))\n", + " print(numanodes)\n", + " for client in clients:\n", + " pids=!ssh {client} \"jps | grep CoarseGrainedExecutorBackend | cut -d' ' -f1\"\n", + " print(client,\":\",len(pids),\" \",\"\\t\".join(map(str,pids)))\n", + " \n", + " cpunum_c = !ssh {client} \"grep 'processor' /proc/cpuinfo | wc -l\"\n", + " cpunum_c = int(cpunum_c[0])\n", + " if cpunum_c != cpunum:\n", + " print(f\"client {client} cpunum not match!\")\n", + " return\n", + " numanodes_c=!ssh {client} \"cat /sys/devices/system/node/node*/cpulist\"\n", + " numanodes_c = list(filter(lambda x: x != '', numanodes))\n", + " time.sleep(1)\n", + " print(numanodes_c)\n", + " if numanodes_c != numanodes:\n", + " print(f\"client {client} numanodes not match!\")\n", + " return\n", + " \n", + " idx = 0\n", + " nodes=len(numanodes)\n", + " for i in range(nodes):\n", + " cpus = numanodes[i]\n", + " for l in pids[idx:idx+int(len(pids)/nodes)]: # executors on 1 numanode\n", + " print(f\" {cpus} {l}\")\n", + " !ssh {client} \"taskset -a -p -c $cpus $l > /dev/null 2>&1 \"\n", + " idx += int(len(pids)/nodes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def config_pagecache(clients, run_gluten=True):\n", + " for l in clients:\n", + " if run_gluten:\n", + " !ssh root@$l \"echo 80 > /proc/sys/vm/dirty_ratio\"\n", + " !ssh root@$l \"echo 50 > /proc/sys/vm/dirty_background_ratio\"\n", + " !ssh root@$l \"echo 360000 > /proc/sys/vm/dirty_expire_centisecs\"\n", + " !ssh root@$l \"echo 3000 > /proc/sys/vm/dirty_writeback_centisecs\"\n", + "\n", + " else:\n", + " !ssh root@$l \"echo 10 > /proc/sys/vm/dirty_ratio\"\n", + " !ssh root@$l \"echo 20 > /proc/sys/vm/dirty_background_ratio\"\n", + " !ssh root@$l \"echo 3000 > /proc/sys/vm/dirty_expire_centisecs\"\n", + " !ssh root@$l \"echo 500 > /proc/sys/vm/dirty_writeback_centisecs\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def print_kernel_params(clietns):\n", + " params = {\n", + " 'transparent hugepage': '/sys/kernel/mm/transparent_hugepage/enabled',\n", + " 'auto numa balancing': '/proc/sys/kernel/numa_balancing',\n", + " 'scaling governor': '/sys/devices/system/cpu/cpu*/cpufreq/scaling_governor',\n", + " 'scaling max freq': '/sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq',\n", + " 'scaling cur freq': '/sys/devices/system/cpu/cpu*/cpufreq/scaling_cur_freq',\n", + " 'power & perf policy': '/sys/devices/system/cpu/cpu*/power/energy_perf_bias',\n", + " 'dirty_ratio': '/proc/sys/vm/dirty_ratio',\n", + " 'dirty_background_ratio': '/proc/sys/vm/dirty_background_ratio',\n", + " 'dirty_expire_centisecs': '/proc/sys/vm/dirty_expire_centisecs',\n", + " 'dirty_writeback_centisecs': '/proc/sys/vm/dirty_writeback_centisecs'\n", + " }\n", + " for k, param in params.items():\n", + " print()\n", + " print(f'{k} ({param})')\n", + " for l in clients:\n", + " print(l + \": \", end='')\n", + " res = !ssh root@$l \"cat {param}\"\n", + " print(*res)\n", + " # print numactl\n", + " print()\n", + " print(\"numactl -H\")\n", + " for l in clients:\n", + " print(l + \":\")\n", + " res = !ssh $l \"numactl -H\"\n", + " print('\\n'.join(res))\n", + " # print memory freq\n", + " print()\n", + " print(\"Memory Frequency\")\n", + " for l in clients:\n", + " print(l + \":\")\n", + " res= !ssh root@$l \"dmidecode -t memory | grep Speed\"\n", + " print('\\n'.join(res))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [] + }, + "outputs": [], + "source": [ + "def dropcache(clients):\n", + " for l in clients:\n", + " !ssh root@$l \"sync && echo 3 > /proc/sys/vm/drop_caches; echo 1 >/proc/sys/vm/compact_memory; free -h\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def config_mem_cgroup(clients):\n", + " mem_cgroup = \"\"\"\n", + "CGROUP_ROOT=/sys/fs/cgroup/gluten\n", + "\n", + "if [ ! -d $CGROUP_ROOT ] ; then\n", + " sudo mkdir $CGROUP_ROOT\n", + " # enable memory for subtree\n", + " sudo bash -c \"echo '+memory' >> $CGROUP_ROOT/cgroup.subtree_control\"\n", + "fi\n", + "\n", + "# move each process to sub memory group\n", + "index=0\n", + "for pid in `jps | grep Coarse | awk '{print $1}'` ; do\n", + " target_cgroup=$CGROUP_ROOT/mem-${index}\n", + " if [ ! -d $target_cgroup ] ; then\n", + " sudo mkdir $target_cgroup\n", + " fi\n", + " proc_file=$target_cgroup/cgroup.procs\n", + " sudo bash -c \"echo $pid >> $proc_file\"\n", + " index=`expr $index + 1`\n", + "done\n", + " \"\"\"\n", + " with open(f'{home}/mem-cgroup.sh', 'w+') as f:\n", + " f.writelines(mem_cgroup)\n", + " for l in clients:\n", + " !scp {home}/mem-cgroup.sh {l}:{home}/ >/dev/null 2>&1\n", + " !ssh {l} \"bash {home}/mem-cgroup.sh >/dev/null 2>&1 &\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "\n", + "\n", + "from IPython.display import display, HTML\n", + "\n", + "def get_io_stats(appid, client):\n", + " file_path = os.path.join(home,'profile',appid,client)\n", + " statf = [f for f in os.listdir(file_path) if f.endswith('.stat')]\n", + " statmap=[]\n", + " for f in statf:\n", + " statmap.append({'pid':f[:-len(\".stat\")]})\n", + " with open(os.path.join(file_path, f),\"r\") as fi:\n", + " cnts=fi.readlines()\n", + " for l in cnts:\n", + " for fld in ['rchar','wchar','syscr','syscw','read_bytes','write_bytes','cancelled_write_bytes']:\n", + " if l.startswith(fld):\n", + " if not fld in statmap[-1]:\n", + " statmap[-1][fld]=int(l.split(\" \")[-1].strip())\n", + " else:\n", + " statmap[-1][fld]=(int(l.split(\" \")[-1].strip())-statmap[-1][fld])/1024/1024/1024\n", + "\n", + " df = pd.DataFrame(statmap).drop('pid', axis=1).sum().to_frame()\n", + " df.columns = ['sum']\n", + " return df\n", + "\n", + "# Preprocess 'time' column\n", + "def process_time(dataframes):\n", + " for df in dataframes:\n", + " df.columns=['time']+list(df.columns[1:])\n", + " df = df[df.time != 'Average:']\n", + " df['time'] = pd.to_datetime(df['time'], format='%H:%M:%S').dt.time\n", + " df['time'] = df['time'].apply(lambda dt: dt.hour*3600 + dt.minute*60 + dt.second)\n", + "\n", + " offset = 12 * 3600 # half-day seconds\n", + " for i in range(1, len(df)):\n", + " if df['time'].iloc[i] < df['time'].iloc[i-1]: # Detect AM->PM or PM->AM\n", + " for j in range(i, len(df)): # Apply offset until end\n", + " df['time'].iloc[j] += offset\n", + "\n", + " df['time'] = df['time'].astype(int)\n", + " yield df\n", + "\n", + "def draw_sar(appid, qtime=None, disk_dev=None, nic_dev=None, client=None):\n", + " if client is None:\n", + " client = clients[0]\n", + "\n", + " display(HTML('{:s}'.format(client)))\n", + "\n", + " display(get_io_stats(appid, client))\n", + "\n", + " # Read data\n", + " profile_dir = os.path.join(home,'profile',appid,client)\n", + " datafiles = [os.path.join(profile_dir, datafile) for datafile in ['sar_cpu.sar', 'sar_mem.sar', 'sar_disk.sar', 'sar_nic.sar', 'sar_page.sar']]\n", + " dataframes = [pd.read_csv(datafile, header=1, delim_whitespace=True, parse_dates=True) for datafile in datafiles]\n", + " \n", + " num_figs=5\n", + " fig, axs=plt.subplots(num_figs,1,sharex=True,figsize=(30,5*4))\n", + "\n", + " [cpu_df, mem_df, disk_df, nic_df, page_df] = process_time(dataframes)\n", + "\n", + " # CPU usage\n", + " cpu_df['total'] = cpu_df['%user'] + cpu_df['%system'] + cpu_df['%iowait']\n", + "\n", + " starttime = cpu_df[cpu_df['total'] > 50]['time'].min() - 1\n", + " cpu_df['time'] -= starttime\n", + "\n", + " axs[4].stackplot(cpu_df['time'], cpu_df['%user'], cpu_df['%system'], cpu_df['%iowait'], labels=['user','system','iowait'])\n", + " axs[4].legend(loc='upper left')\n", + "\n", + " # Memory usage\n", + " mem_df['dirty_cached'] = mem_df['kbdirty'] * mem_df['%memused'] / mem_df['kbmemused']\n", + " mem_df['clean_cached'] = (mem_df['kbcached'] - mem_df['kbdirty']) * mem_df['%memused'] / mem_df['kbmemused']\n", + " mem_df['used'] = mem_df['kbmemused'] * mem_df['%memused'] / mem_df['kbmemused']\n", + "# mem_df['used'] = (mem_df['kbmemused'] - mem_df['kbbuffers'] - mem_df['kbcached'])* mem_df['%memused'] / mem_df['kbmemused']\n", + "\n", + " mem_df['time'] -= starttime\n", + "\n", + " axs[0].stackplot(mem_df['time'], mem_df['used'], mem_df['clean_cached'], mem_df['dirty_cached'], labels=['used','clean cached','dirty cached'])\n", + " axs[0].legend(loc='upper left')\n", + " axs[0].grid(axis = 'y')\n", + "\n", + " # Disk usage\n", + " if disk_dev is not None:\n", + " disk_df = disk_df[disk_df['DEV'].isin(disk_dev)]\n", + " disk_df['rkB/s'] = disk_df['rkB/s'].astype(float)\n", + " disk_df['wkB/s'] = disk_df['wkB/s'].astype(float)\n", + " disk_df['%util'] = disk_df['%util'].astype(float)\n", + "\n", + "\n", + " disk_df = disk_df.groupby('time').agg({'rkB/s': 'sum', 'wkB/s': 'sum', '%util':'mean'}).reset_index()\n", + " disk_df['read'] = disk_df['rkB/s'] / 1024\n", + " disk_df['write'] = disk_df['wkB/s'] / 1024\n", + "\n", + " disk_df['time'] -= starttime\n", + "\n", + " axs[1].stackplot(disk_df['time'], disk_df['read'], disk_df['write'], labels=['read MB/s','write MB/s'])\n", + " axs[1].grid(axis = 'y')\n", + "\n", + " ax2 = axs[1].twinx()\n", + "\n", + " ax2.plot(disk_df['time'], disk_df['%util'],'g-')\n", + " axs[1].legend(loc='upper left')\n", + "\n", + " \n", + " # Nic usage\n", + " if nic_dev is not None:\n", + " nic_df = nic_df[nic_df['IFACE'].isin(nic_dev)]\n", + " nic_df['rxkB/s'] = nic_df['rxkB/s'].astype(float)\n", + " nic_df['txkB/s'] = nic_df['txkB/s'].astype(float)\n", + " \n", + " nic_df = nic_df.groupby('time').agg({'rxkB/s': 'sum', 'txkB/s': \"sum\"}).reset_index()\n", + " nic_df['rx'] = nic_df['rxkB/s'] / 1024\n", + " nic_df['tx'] = nic_df['txkB/s'] / 1024\n", + " \n", + " nic_df['time'] -= starttime\n", + " \n", + " axs[2].stackplot(nic_df['time'], nic_df['rx'], nic_df['tx'], labels=['rx MB/s','tx MB/s'])\n", + " axs[2].legend(loc='upper left')\n", + " axs[2].grid(axis = 'y')\n", + "\n", + " # Pagefaults\n", + " page_df['minflt/s'] = page_df['fault/s'] - page_df['majflt/s']\n", + " \n", + " page_df['time'] -= starttime\n", + "\n", + " axs[3].stackplot(page_df['time'], page_df['minflt/s'], page_df['majflt/s'], labels=['minor_fault/s','major_fault/s'])\n", + " axs[3].legend(loc='upper left')\n", + " axs[3].grid(axis = 'y')\n", + "\n", + " # Add vertical lines and text for qtime, and calculate per query cpu%\n", + " if qtime is not None:\n", + " for ax in axs:\n", + " x = 0\n", + " ax.axvline(x = x, color = 'b')\n", + " for k, v in qtime.items():\n", + " x += v\n", + " ax.axvline(x = x, color = 'b')\n", + "\n", + " tx = 0\n", + " for k, v in qtime.items():\n", + " if v / x > 15 / 772:\n", + " ax.text(tx + v / 2 - 6 * x / 772, ax.get_ylim()[1] * 1.05, k)\n", + " tx += v\n", + "\n", + " x = 0\n", + " qtime_se = {}\n", + " cols = ['%user','%system','%iowait']\n", + " for k, v in qtime.items():\n", + " filtered_df = cpu_df[(cpu_df['time'] >= x) & (cpu_df['time'] <= x+v)]\n", + " averages = filtered_df[cols].mean()\n", + " qtime_se[k] = averages.tolist()\n", + " x += v\n", + " if qtime_se:\n", + " perqcpu = pd.DataFrame(qtime_se).T\n", + " perqcpu.columns = cols\n", + " display(perqcpu)\n", + "\n", + " plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def convert_to_etc_gmt(tz_offset=None):\n", + " # Run the 'date +%z' command and get the output\n", + " if not tz_offset:\n", + " tz_offset = !date +%z\n", + " tz_offset = tz_offset[0]\n", + " \n", + " # Extract the sign and the hour/minute offset\n", + " sign = tz_offset[0]\n", + " hours = int(tz_offset[1:3])\n", + " minutes = int(tz_offset[3:])\n", + "\n", + " # Convert the offset to a GMT value\n", + " gmt_offset = hours + (minutes / 60)\n", + " if sign == '+':\n", + " gmt_offset = -gmt_offset\n", + " else:\n", + " gmt_offset = abs(gmt_offset)\n", + "\n", + " # Construct the Etc/GMT string\n", + " etc_gmt = f\"Etc/GMT{int(gmt_offset):+d}\"\n", + " return etc_gmt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TestTPC" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import socket\n", + "from dataclasses import dataclass\n", + "from functools import wraps\n", + "from pathlib import Path\n", + "from typing import List \n", + "\n", + "class TestTPC:\n", + " @dataclass\n", + " class query_info:\n", + " tables: List[str]\n", + " sql: List[str]\n", + "\n", + " query_infos = {}\n", + " query_ids =[]\n", + "\n", + " tpctables=[]\n", + " tpc_query_path = ''\n", + " \n", + " def __init__(self, spark, table_dir, run_gluten, workload, server, base_dir, nb_name, data_source = 'parquet'):\n", + " self.spark = spark\n", + " self.sc = spark.sparkSession.sparkContext\n", + " self.appid = self.sc.applicationId\n", + " self.run_gluten = run_gluten\n", + " self.workload = workload\n", + " self.table_dir = table_dir\n", + " self.server = server\n", + " self.base_dir = base_dir\n", + " self.nb_name = nb_name\n", + " self.data_source = data_source\n", + " self.table_loaded = False\n", + " self.result = {}\n", + " self.stopped = False\n", + " self.perf_html = ''\n", + " self.finished_nb = ''\n", + " for l in os.listdir(self.tpc_query_path):\n", + " if (l[-3:] == 'sql'):\n", + " with open(self.tpc_query_path+l,\"r\") as f:\n", + " self.query_infos[l.split(\".\")[0]]=self.query_info(self.tpctables,[\"\\n\".join(f.readlines())])\n", + " self.query_ids = sorted(self.query_infos.keys(), key=lambda x: str(len(x))+x if x[-1] != 'a' and x[-1] != 'b' else str(len(x)-1) + x)\n", + " print(\"http://{}:18080/history/{}/jobs/\".format(local_ip, self.sc.applicationId))\n", + " \n", + " def start_monitor(self, clients, **kw):\n", + " startmonitor(clients, self.appid, **kw)\n", + " \n", + " def stop_monitor(self, clients, **kw):\n", + " if self.stopped:\n", + " return\n", + " stopmonitor(clients, self.sc, self.appid, **kw)\n", + " if self.server:\n", + " output_nb = f'{self.nb_name[:-6]}-{self.appid}.ipynb'\n", + " if output_nb.startswith(home):\n", + " output_nb_name = os.path.relpath(output_nb, home)\n", + " else:\n", + " output_nb_name = output_nb\n", + " output_nb_dir = os.path.dirname(output_nb_name)\n", + " server_nb_dir = os.path.join('PAUS', self.base_dir, output_nb_dir)\n", + " !ssh {self.server} \"mkdir -p {server_nb_dir}\"\n", + " !scp {output_nb} {self.server}:{server_nb_dir}\n", + " self.finished_nb = f\"http://{self.server}:8888/tree/{self.base_dir}/{output_nb_name}\"\n", + " self.stopped = True\n", + "\n", + " def run_perf_analysis(self, disk_dev, nic_dev):\n", + " if not self.server:\n", + " return\n", + "\n", + " upload_profile(self.server, self.base_dir, self.appid)\n", + "\n", + " ts=time.strftime(\"%Y_%m_%d_%H%M%S\")\n", + " name=f'{self.workload}_gluten' if self.run_gluten else f'{self.workload}_vanilla'\n", + " run_script=f'{gluten_home}/tools/workload/benchmark_velox/analysis/run_perf_analysis.sh'\n", + " \n", + " disk=','.join(disk_dev)\n", + " nic=','.join(nic_dev)\n", + "\n", + " command =' '.join(['bash', run_script, '--ts', ts, '--base-dir', self.base_dir, '--name', name, '--appid', self.appid, '--disk', disk, '--nic', nic, '--tz', convert_to_etc_gmt()])\n", + " print(command)\n", + "\n", + " # Block if running on local cluster.\n", + " if self.server == localhost:\n", + " !ssh {self.server} \"{command} > /dev/null 2>&1\"\n", + " else:\n", + " !ssh {self.server} \"{command} > /dev/null 2>&1 &\"\n", + "\n", + " self.perf_html=f'http://{self.server}:8888/view/{self.base_dir}/html/{ts}_{name}_{self.appid}.html'\n", + " display(HTML(f'{self.perf_html}'))\n", + " \n", + " def load_table(self, table):\n", + " if type(self.table_dir)==list:\n", + " return self.spark.read.format(self.data_source).load([os.path.join(t, table) for t in self.table_dir])\n", + " else:\n", + " return self.spark.read.format(self.data_source).load(os.path.join(self.table_dir, table))\n", + " \n", + " def load_tables_as_tempview(self, tables):\n", + " for table in tables:\n", + " df = self.load_table(table)\n", + " df.createOrReplaceTempView(table)\n", + " \n", + " def load_all_tables_as_tempview(self):\n", + " print(f\"Loading all tables: {self.tpctables}\")\n", + " self.load_tables_as_tempview(self.tpctables)\n", + " \n", + " def load_query(self, query):\n", + " info = self.query_infos[query]\n", + " return [self.spark.sql(q) for q in info.sql]\n", + " \n", + " def run_query(self, query, explain = False, print_result=False, load_table=True):\n", + " if load_table:\n", + " self.load_all_tables_as_tempview()\n", + " start_time = timeit.default_timer()\n", + " print(\"start query \" + query + \", application id \" + self.sc.applicationId)\n", + " print(\"{} : {}\".format(\"Start time\", start_time))\n", + " self.sc.setJobDescription(query)\n", + "\n", + " queries = self.load_query(query)\n", + " for q in queries:\n", + " if explain: q.explain()\n", + " collect=q.collect()\n", + " end_time = timeit.default_timer()\n", + " duration = end_time - start_time\n", + " display(HTML(('Completed Query. Time(sec): {:f}'.format(duration))))\n", + " \n", + " self.result[query] = duration\n", + " if print_result:\n", + " print(collect)\n", + "\n", + " def power_run(self, explain=False, print_result=False, load_table=True):\n", + " if load_table:\n", + " self.load_all_tables_as_tempview()\n", + " for l in self.query_ids:\n", + " self.run_query(l, explain=explain, print_result=print_result, load_table=False)\n", + "\n", + " def print_result(self):\n", + " print(self.result)\n", + " print()\n", + " durations = [float(i) for i in self.result.values()]\n", + " print(\"total duration:\")\n", + " print(sum(durations))\n", + " print()\n", + " if self.server:\n", + " print(self.finished_nb)\n", + " print(f\"http://{self.server}:1088/tracing_examples/trace_viewer.html#/tracing/test_data/{self.appid}.json\")\n", + " print(f\"http://{self.server}:18080/history/{self.appid}\")\n", + " print(self.perf_html)\n", + " print(self.appid)\n", + " for i in durations:\n", + " print(i)\n", + " \n", + "class TestTPCH(TestTPC):\n", + " tpctables = ['customer', 'lineitem', 'nation', 'orders', 'part', 'partsupp', 'region', 'supplier']\n", + " tpc_query_path = f'{gluten_home}/tools/gluten-it/common/src/main/resources/tpch-queries/'\n", + " \n", + " def __init__(self, spark, table_dir, run_gluten, server, base_dir, nb_name, data_source = 'parquet'):\n", + " TestTPC.__init__(self,spark, table_dir, run_gluten, 'tpch', server, base_dir, nb_name, data_source)\n", + " \n", + "class TestTPCDS(TestTPC):\n", + " tpctables = [ 'call_center',\n", + " 'catalog_page',\n", + " 'catalog_returns',\n", + " 'catalog_sales',\n", + " 'customer',\n", + " 'customer_address',\n", + " 'customer_demographics',\n", + " 'date_dim',\n", + " 'household_demographics',\n", + " 'income_band',\n", + " 'inventory',\n", + " 'item',\n", + " 'promotion',\n", + " 'reason',\n", + " 'ship_mode',\n", + " 'store',\n", + " 'store_returns',\n", + " 'store_sales',\n", + " 'time_dim',\n", + " 'warehouse',\n", + " 'web_page',\n", + " 'web_returns',\n", + " 'web_sales',\n", + " 'web_site']\n", + " tpc_query_path = f'{gluten_home}/tools/gluten-it/common/src/main/resources/tpcds-queries/'\n", + " \n", + " def __init__(self, spark, table_dir, run_gluten, server, base_dir, nb_name, data_source = 'parquet'):\n", + " TestTPC.__init__(self,spark, table_dir, run_gluten, 'tpcds', server, base_dir, nb_name, data_source)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create SparkContext" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## default config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [] + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "def findjemalloc():\n", + " l = clients[0]\n", + " jemallocDir = !ssh $l \"whereis libjemalloc.so.2\"\n", + " libjemalloc = jemallocDir[0].split(' ')\n", + " return libjemalloc[1]\n", + "\n", + "def get_py4jzip():\n", + " spark_home=os.environ['SPARK_HOME']\n", + " py4jzip = !ls {spark_home}/python/lib/py4j*.zip\n", + " return py4jzip[0]\n", + "\n", + "def default_conf(executors_per_node, cores_per_executor, task_per_core, memory_per_node, extra_jars='', app_name='', master='yarn', run_gluten=False):\n", + " # Create a temp directory that gets cleaned up on exit\n", + " output_dir = os.path.abspath(tempfile.mkdtemp())\n", + " def cleanup():\n", + " shutil.rmtree(output_dir, True)\n", + " atexit.register(cleanup)\n", + " signal.signal(signal.SIGTERM, cleanup)\n", + "\n", + "##################################################\n", + " def convert_to_bytes(size):\n", + " units = {'k': 1, 'm': 2, 'g': 3}\n", + " size = size.lower()\n", + " if size[-1] in units:\n", + " return int(size[:-1]) * 1024 ** units[size[-1]]\n", + " else:\n", + " return int(size)\n", + "\n", + " def yarn_padding(size):\n", + " min_size = convert_to_bytes('1g')\n", + " step = min_size\n", + " while size > min_size:\n", + " min_size += step\n", + " return min_size - size\n", + " \n", + " num_nodes = len(clients)\n", + " num_executors = num_nodes*executors_per_node\n", + " parallelism = num_executors*cores_per_executor*task_per_core\n", + "\n", + " if run_gluten:\n", + " offheap_ratio = gluten_offheap_ratio\n", + " else:\n", + " offheap_ratio = vanilla_offheap_ratio\n", + " driver_memory = convert_to_bytes('20g')\n", + " executor_memory_overhead = convert_to_bytes('1g')\n", + " \n", + " # Minimun executor memory\n", + " min_memory = convert_to_bytes('1g')\n", + "\n", + " # Calculate executor onheap memory\n", + " num_driver = 1 if localhost in clients else 0\n", + " executor_memory = math.floor((convert_to_bytes(memory_per_node) - (executor_memory_overhead + min_memory)*executors_per_node - (driver_memory + min_memory)*num_driver)/(offheap_ratio*num_driver + (1+offheap_ratio)*executors_per_node))\n", + " executor_memory = max(executor_memory, min_memory)\n", + " # Calculate driver/executor offheap memory in MB\n", + " #offheap_memory_per_node = convert_to_bytes(memory_per_node) - (executor_memory + executor_memory_overhead) * executors_per_node\n", + " if offheap_ratio > 0:\n", + " enable_offheap = True\n", + " offheap_memory = math.floor(executor_memory*offheap_ratio)\n", + " else:\n", + " enable_offheap = False\n", + " offheap_memory = 0\n", + "\n", + " byte_to_mb = lambda x: int(x/(1024 ** 2))\n", + " driver_memory_mb = byte_to_mb(driver_memory)\n", + " executor_memory_overhead_mb = byte_to_mb(executor_memory_overhead)\n", + " executor_memory_mb = byte_to_mb(executor_memory)\n", + " offheap_memory_mb = byte_to_mb(offheap_memory)\n", + " \n", + " executor_totalmem_mb = executor_memory_overhead_mb + executor_memory_mb + offheap_memory_mb\n", + " executor_totalmem_mb = yarn_padding(executor_totalmem_mb)\n", + " if byte_to_mb(convert_to_bytes(memory_per_node)) - executor_totalmem_mb*executors_per_node > executor_totalmem_mb:\n", + " executor_memory_overhead_mb += 1024\n", + " \n", + " print('''\n", + " executors per node: {:d}\n", + " parallelism: {:d}\n", + " executor memory: {:d}m\n", + " offheap memory: {:d}m\n", + " '''.format(executors_per_node, parallelism, executor_memory_mb, offheap_memory_mb))\n", + "\n", + " conf = SparkConf() \\\n", + " .set('spark.app.name', app_name)\\\n", + " .set('spark.master',master)\\\n", + " .set('spark.executor.memory', '{:d}m'.format(executor_memory_mb))\\\n", + " .set('spark.memory.offHeap.enabled', enable_offheap)\\\n", + " .set('spark.memory.offHeap.size','{:d}m'.format(offheap_memory_mb))\\\n", + " .set('spark.sql.shuffle.partitions', parallelism)\\\n", + " .set('spark.executor.instances', '{:d}'.format(num_executors))\\\n", + " .set('spark.executor.cores','{:d}'.format(cores_per_executor))\\\n", + " .set('spark.task.cpus','{:d}'.format(1))\\\n", + " .set('spark.driver.memory', '{:d}m'.format(driver_memory_mb))\\\n", + " .set('spark.executor.memoryOverhead', '{:d}m'.format(executor_memory_overhead_mb))\\\n", + " .set('spark.driver.maxResultSize', '4g')\\\n", + " .set('spark.executor.extraJavaOptions',\\\n", + " f'-XX:+UseParallelOldGC -XX:ParallelGCThreads=2 -XX:NewRatio=1 -XX:SurvivorRatio=1 -XX:+UseCompressedOops -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:ErrorFile={home}/logs/java/hs_err_pid%p.log')\\\n", + " .set('spark.driver.extraClassPath', extra_jars) \\\n", + " .set('spark.executor.extraClassPath', extra_jars) \\\n", + " .set('spark.executorEnv.PYTHONPATH',f\"{os.environ['SPARK_HOME']}python:{get_py4jzip()}\") \\\n", + " .set(\"spark.repl.class.outputDir\", output_dir) \\\n", + " .set(\"spark.sql.broadcastTimeout\", \"4800\") \\\n", + " .set('spark.serializer','org.apache.spark.serializer.KryoSerializer')\\\n", + " .set('spark.kryoserializer.buffer.max','512m')\\\n", + " .set('spark.kryo.unsafe',False)\\\n", + " .set('spark.sql.adaptive.enabled',True)\\\n", + " .set('spark.sql.autoBroadcastJoinThreshold',\"10m\")\\\n", + " .set('spark.sql.catalogImplementation','hive')\\\n", + " .set('spark.sql.optimizer.dynamicPartitionPruning.enabled',True)\\\n", + " .set('spark.cleaner.periodicGC.interval', '10s')\n", + "\n", + " return conf\n", + "\n", + "\n", + "def create_cntx_with_config(conf,conf_overwrite=None):\n", + "\n", + " importlib.reload(pyspark.java_gateway)\n", + "\n", + " def Popen(*args, **kwargs):\n", + " \"\"\"Wraps subprocess.Popen to force stdout and stderr from the child process\n", + " to pipe to this process without buffering.\n", + " \"\"\"\n", + " global spark_jvm_proc\n", + " # Override these in kwargs to avoid duplicate value errors\n", + " # Set streams to unbuffered so that we read whatever bytes are available\n", + " # when ready, https://docs.python.org/3.6/library/subprocess.html#popen-constructor\n", + " kwargs['bufsize'] = 0\n", + " # Capture everything from stdout for display in the notebook\n", + " kwargs['stdout'] = subprocess.PIPE\n", + " print(\"java proc gateway popen\")\n", + " spark_jvm_proc = subprocess.Popen(*args, **kwargs)\n", + " return spark_jvm_proc\n", + " pyspark.java_gateway.Popen = Popen\n", + "\n", + " spylon_kernel.scala_interpreter.scala_intp=None\n", + " \n", + " if conf_overwrite is not None:\n", + " conf=conf_overwrite(conf)\n", + " print(\"spark.serializer: \",conf.get(\"spark.serializer\"))\n", + " print(\"master: \",conf.get(\"spark.master\"))\n", + " \n", + " sc = SparkContext(conf = conf,master=conf.get(\"spark.master\"))\n", + " sc.setLogLevel('ERROR')\n", + " \n", + " sc.addPyFile(f\"{os.environ['SPARK_HOME']}/python/lib/pyspark.zip\")\n", + " sc.addPyFile(get_py4jzip())\n", + " \n", + " spark = SQLContext(sc)\n", + " \n", + " time.sleep(30)\n", + " \n", + " for client in clients:\n", + " pids=!ssh $client \"jps | grep CoarseGrainedExecutorBackend | cut -d' ' -f1\"\n", + " print(client,\":\",len(pids),\" \",\"\\t\".join(map(str,pids)))\n", + " \n", + " spark_session = SparkSession(sc)\n", + " spark_jvm_helpers = SparkJVMHelpers(spark_session._sc)\n", + " spylon_kernel.scala_interpreter.spark_state = spylon_kernel.scala_interpreter.SparkState(spark_session, spark_jvm_helpers, spark_jvm_proc)\n", + " \n", + " print(\"appid: \",sc.applicationId)\n", + " print(\"SparkConf:\")\n", + "\n", + " df = pd.DataFrame(sc.getConf().getAll(), columns=['key', 'value'])\n", + " display(df)\n", + "\n", + " return sc, spark" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vanilla Spark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def vanilla_tpch_conf_overwrite(conf):\n", + " return conf\n", + "\n", + "def vanilla_tpcds_conf_overwrite(conf):\n", + " conf.set('spark.sql.optimizer.runtime.bloomFilter.applicationSideScanSizeThreshold', '0')\\\n", + " .set('spark.sql.optimizer.runtime.bloomFilter.enabled', 'true')\n", + " return conf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def create_cntx_vanilla(executors_per_node, cores_per_executor, task_per_core, memory_per_node, extra_jars, app_name='', master='yarn', conf_overwrite=None):\n", + " conf = default_conf(executors_per_node, cores_per_executor, task_per_core, memory_per_node, extra_jars, app_name, master, run_gluten=False)\n", + " conf.set(\"spark.sql.execution.arrow.maxRecordsPerBatch\",20480)\\\n", + " .set(\"spark.sql.parquet.columnarReaderBatchSize\",20480)\\\n", + " .set(\"spark.sql.inMemoryColumnarStorage.batchSize\",20480)\n", + " return create_cntx_with_config(conf,conf_overwrite)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Gluten" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def gluten_tpch_conf_overwrite(conf):\n", + " return conf\n", + "\n", + "def gluten_tpcds_conf_overwrite(conf):\n", + " conf.set('spark.sql.optimizer.runtime.bloomFilter.applicationSideScanSizeThreshold', '0')\\\n", + " .set('spark.sql.optimizer.runtime.bloomFilter.enabled', 'true')\\\n", + " .set('spark.gluten.sql.columnar.joinOptimizationLevel', '18')\\\n", + " .set('spark.gluten.sql.columnar.physicalJoinOptimizeEnable', 'true')\\\n", + " .set('spark.gluten.sql.columnar.physicalJoinOptimizationLevel', '18')\\\n", + " .set('spark.gluten.sql.columnar.logicalJoinOptimizeEnable', 'true')\\\n", + " .set('spark.gluten.sql.columnar.logicalJoinOptimizationLevel', '19')\n", + " return conf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def create_cntx_gluten(executors_per_node, cores_per_executor, task_per_core, memory_per_node, extra_jars, app_name='', master='yarn', conf_overwrite=None):\n", + " conf = default_conf(executors_per_node, cores_per_executor, task_per_core, memory_per_node, extra_jars, app_name, master, run_gluten=True)\n", + " conf.set('spark.sql.files.maxPartitionBytes', '4g')\\\n", + " .set('spark.plugins','org.apache.gluten.GlutenPlugin')\\\n", + " .set('spark.shuffle.manager','org.apache.spark.shuffle.sort.ColumnarShuffleManager')\\\n", + " .set('spark.gluten.sql.columnar.backend.lib','velox')\\\n", + " .set('spark.gluten.sql.columnar.maxBatchSize',4096)\\\n", + " .set('spark.gluten.sql.columnar.forceshuffledhashjoin',True)\\\n", + " .set('spark.executorEnv.LD_PRELOAD', findjemalloc())\\\n", + " .set('spark.gluten.sql.columnar.coalesce.batches', 'true')\n", + " \n", + " return create_cntx_with_config(conf,conf_overwrite)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Context" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def create_cntx(run_gluten=False, workload='tpch', app_conf_overwrite=None, server='', base_dir='', nb_name='tpc_workload.ipynb', app_name=''):\n", + " table_dir=''\n", + " extra_jars = ''\n", + " is_tpch_workload=False\n", + " is_tpcds_workload=False\n", + " workload_conf_overwrite=None\n", + " create_cntx_func=None\n", + " test_tpc=None\n", + "\n", + " if workload.lower() == 'tpch':\n", + " if not app_name:\n", + " app_name = 'tpch_power'\n", + " tabledir = tpch_tabledir\n", + " is_tpch_workload=True\n", + " elif workload.lower() == 'tpcds':\n", + " if not app_name:\n", + " app_name = 'tpcds_power'\n", + " tabledir = tpcds_tabledir\n", + " is_tpcds_workload=True\n", + " else:\n", + " raise ValueError(f\"Unknown workload: {workload}\")\n", + "\n", + " lastgit=!git --git-dir {gluten_home}/.git log --format=\"%H\" -n 1\n", + " lastgit = lastgit[0]\n", + " print(f'lastgit: {lastgit}')\n", + "\n", + " nodes=len(clients)\n", + "\n", + " if run_gluten:\n", + " jars_base=f\"{home}/jars/\"+lastgit\n", + " \n", + " for target_jar in gluten_target_jar.split(\",\"):\n", + " !ls -l {target_jar}\n", + " !mkdir -p {jars_base}\n", + " !rm -rf {jars_base}/*\n", + " !cp {target_jar} {jars_base}/\n", + " if target_jar[-4:] != '.jar':\n", + " !cp -f {target_jar} {jars_base}/gluten-{lastgit}.jar\n", + "\n", + " jars=!ls -d {jars_base}/*.jar\n", + " extra_jars=\":\".join([\"file://\"+j for j in jars])\n", + " print(f'extra_jars: {extra_jars}')\n", + "\n", + " for c in clients:\n", + " if c!=localhost:\n", + " !ssh {c} \"rm -rf {jars_base}\"\n", + " !ssh {c} \"mkdir -p {jars_base}\"\n", + " !scp {jars_base}/*.jar {c}:{jars_base} >/dev/null 2>&1\n", + "\n", + " app_name = ' '.join(['gluten', app_name, lastgit[:6]])\n", + " create_cntx_func=create_cntx_gluten\n", + " if is_tpch_workload:\n", + " task_per_core = gluten_tpch_task_per_core\n", + " workload_conf_overwrite = gluten_tpch_conf_overwrite\n", + " elif is_tpcds_workload:\n", + " task_per_core = gluten_tpcds_task_per_core\n", + " workload_conf_overwrite = gluten_tpcds_conf_overwrite\n", + " else:\n", + " app_name = ' '.join(['vanilla', app_name, lastgit[:6]])\n", + " create_cntx_func=create_cntx_vanilla\n", + " if is_tpch_workload:\n", + " task_per_core = vanilla_tpch_task_per_core\n", + " workload_conf_overwrite = vanilla_tpch_conf_overwrite\n", + " elif is_tpcds_workload:\n", + " task_per_core = vanilla_tpcds_task_per_core\n", + " workload_conf_overwrite = vanilla_tpcds_conf_overwrite\n", + " \n", + " conf_overwrite = lambda conf: app_conf_overwrite(workload_conf_overwrite(conf))\n", + " \n", + " sc, spark = create_cntx_func(executors_per_node, cores_per_executor, task_per_core, memory_per_node, extra_jars, app_name, master, conf_overwrite)\n", + " \n", + " # Pin executors to numa nodes for Gluten\n", + " if run_gluten:\n", + " pinexecutor_numa(clients)\n", + "\n", + " appid = sc.applicationId\n", + " print(\"start run: \", appid)\n", + " \n", + " if is_tpch_workload:\n", + " test_tpc = TestTPCH(spark, tabledir, run_gluten, server, base_dir, nb_name)\n", + " elif is_tpcds_workload:\n", + " test_tpc = TestTPCDS(spark, tabledir, run_gluten, server, base_dir, nb_name)\n", + " \n", + " return sc, spark, appid, test_tpc" + ] + } + ], + "metadata": { + "hide_input": false, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "nbTranslate": { + "displayLangs": [ + "*" + ], + "hotkey": "alt-t", + "langInMainMenu": true, + "sourceLang": "en", + "targetLang": "fr", + "useGoogleTranslate": true + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": false, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "364.469px", + "left": "2086.8px", + "top": "150.516px", + "width": "375px" + }, + "toc_section_display": true, + "toc_window_display": true + }, + "toc-autonumbering": true, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tools/workload/benchmark_velox/params.yaml.template b/tools/workload/benchmark_velox/params.yaml.template new file mode 100644 index 000000000000..1c70e428bc99 --- /dev/null +++ b/tools/workload/benchmark_velox/params.yaml.template @@ -0,0 +1,71 @@ +# Local path to gluten project. +gluten_home: /home/sparkuser/gluten + +# Local path to gluten jar. +gluten_target_jar: /home/sparkuser/gluten-velox-bundle-spark3.3_2.12-centos_7_x86_64-1.3.0-SNAPSHOT.jar + +# Spark app master. +master: yarn + +# List of workers. +clients: + - localhost + +# List of block devices. +disk_dev: + - nvme1n1 + - nvme2n1 + +# List of network devices. +nic_dev: + - ens787f0 + +# Hostname or IP to server for perf analysis. Able to connect via ssh. +server: '' + +# Specify the directory on perf analysis server. Usually a codename for this run. +base_dir: emr + +# Proxy used to connect to server for perf analysis. +proxy: '' + +# Whether to upload profile to perf analysis server and run perf analysis scripts. Only takes effect if server is set. +analyze_perf: True + +# Select workload. Can be either 'tpch' or 'tpcds'. +workload: tpch + +# Run with gluten. If False, run vanilla Spark. +run_gluten: True + +# TPC tables +tpch_tabledir: /tpch_sf3000 +tpcds_tabledir: /tpcds_sf3000 + +# Parallelism +executors_per_node: 32 +cores_per_executor: 8 + +gluten_tpch_task_per_core: 2 +gluten_tpcds_task_per_core: 2 +vanilla_tpch_task_per_core: 4 +vanilla_tpcds_task_per_core: 4 + +# Physical memory on each worker node. +memory_per_node: 1000g + +# Offheap ratio. 0 to disable offheap for vanilla Spark. +# onheap:offheap = 1:2 +vanilla_offheap_ratio: 2.0 +# onheap:offheap = 1:7 +gluten_offheap_ratio: 7.0 + +# spark.io.compression.codec +vanilla_codec: lz4 +# spark.gluten.sql.columnar.shuffle.codec +gluten_codec: lz4 +# spark.gluten.sql.columnar.shuffle.codecBackend +gluten_codec_backend: '' +# spark.gluten.sql.columnar.maxBatchSize +max_batch_size: 4096 + diff --git a/tools/workload/benchmark_velox/run_tpc_workload.sh b/tools/workload/benchmark_velox/run_tpc_workload.sh new file mode 100755 index 000000000000..f6de6ff0f538 --- /dev/null +++ b/tools/workload/benchmark_velox/run_tpc_workload.sh @@ -0,0 +1,86 @@ +#! /bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eu + +PAPERMILL_ARGS=() +OUTPUT_DIR=$PWD + +while [[ $# -gt 0 ]]; do + case $1 in + --notebook) + NOTEBOOK="$2" + shift # past argument + shift # past value + ;; + --output-dir) + OUTPUT_DIR="$2" + shift # past argument + shift # past value + ;; + --output-name) + OUTPUT_NAME="$2" + shift # past argument + shift # past value + ;; + *) + PAPERMILL_ARGS+=("$1") # save positional arg + shift # past argument + ;; + esac +done + +if [ -z ${NOTEBOOK+x} ]; then + echo "Usage: $0 --notebook NOTEBOOK [--output-dir OUTPUT_DIR] [--output-name OUTPUT_NAME] [PAPERMILL_ARGS]" + exit 0 +fi + + +BASEDIR=$(dirname $0) +echo "Script location: ${BASEDIR}" + +nbname=$(basename $NOTEBOOK .ipynb) + +if [ -z ${OUTPUT_NAME+x} ]; then output_name=$nbname; else output_name=$(basename $OUTPUT_NAME .ipynb); fi + +output_dir=$(realpath $OUTPUT_DIR) +mkdir -p $output_dir + +rename_append_appid() { + output_name=$1 + orig_nb=$2 + + output_appid=`grep "appid: " $orig_nb | awk -F' ' '{print $2}' | sed 's/....$//'` + if [ -n "$output_appid" ]; + then + rename_nb=${output_dir}/${output_name}-${output_appid}.ipynb + echo "Rename notebook $orig_nb to $rename_nb" + mv $orig_nb $rename_nb + fi +} + +run() { + output_name=${output_name}-$(date +"%H%M%S") + output_nb=${output_dir}/${output_name}.ipynb + papermill --inject-output-path $NOTEBOOK \ + ${PAPERMILL_ARGS[@]} \ + $output_nb + rename_append_appid $output_name $output_nb +} + +run + diff --git a/tools/workload/benchmark_velox/tpc_workload.ipynb b/tools/workload/benchmark_velox/tpc_workload.ipynb new file mode 100644 index 000000000000..5dcb50a8a066 --- /dev/null +++ b/tools/workload/benchmark_velox/tpc_workload.ipynb @@ -0,0 +1,381 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# initialization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Local path to gluten project.\n", + "gluten_home='/home/sparkuser/gluten'\n", + "\n", + "# Local path to gluten jar.\n", + "gluten_target_jar='/home/sparkuser/gluten-velox-bundle-spark3.3_2.12-centos_7_x86_64-1.3.0-SNAPSHOT.jar'\n", + "\n", + "# Spark app master. e.g. 'yarn'\n", + "master='yarn'\n", + "\n", + "# List of workers.\n", + "clients=['localhost']\n", + "\n", + "# List of block devices. e.g. ['nvme1n1', 'nvme2n1']\n", + "disk_dev=[]\n", + "\n", + "# List of network devices. e.g. ['ens787f0']\n", + "nic_dev=[]\n", + "\n", + "# Hostname or IP to server for perf analysis. Able to connect via ssh.\n", + "server=''\n", + "\n", + "# Specify the directory on perf analysis server. Usually a codename for this run.\n", + "base_dir=''\n", + "\n", + "# Proxy used to connect to server for perf analysis.\n", + "proxy=''\n", + "\n", + "# Whether to upload profile to perf analysis server and run perf analysis scripts. Only takes effect if server is set.\n", + "analyze_perf=True\n", + "\n", + "# Select workload. Can be either 'tpch' or 'tpcds'.\n", + "workload='tpch'\n", + "\n", + "# Run with gluten. If False, run vanilla Spark.\n", + "run_gluten=True\n", + "\n", + "# TPC tables\n", + "tpch_tabledir=''\n", + "tpcds_tabledir=''\n", + "\n", + "# Parallelism\n", + "executors_per_node=32\n", + "cores_per_executor=8\n", + "\n", + "gluten_tpch_task_per_core=2\n", + "gluten_tpcds_task_per_core=4\n", + "vanilla_tpch_task_per_core=8\n", + "vanilla_tpcds_task_per_core=8\n", + "\n", + "# Physical memory on each worker node.\n", + "memory_per_node='1000g'\n", + "\n", + "# Offheap ratio. 0 to disable offheap for vanilla Spark.\n", + "# onheap:offheap = 1:2\n", + "vanilla_offheap_ratio=2.0\n", + "# onheap:offheap = 1:7\n", + "gluten_offheap_ratio=7.0\n", + "\n", + "# spark.io.compression.codec\n", + "vanilla_codec='lz4'\n", + "# spark.gluten.sql.columnar.shuffle.codec\n", + "gluten_codec='lz4'\n", + "# spark.gluten.sql.columnar.shuffle.codecBackend\n", + "gluten_codec_backend=''\n", + "# spark.gluten.sql.columnar.maxBatchSize\n", + "max_batch_size=4096" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "initialize_ipynb = !realpath native_sql_initialize.ipynb\n", + "print(f\"Running notebook: {initialize_ipynb[0]}\\n\")\n", + "%run {initialize_ipynb[0]}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "newClients = []\n", + "for l in clients:\n", + " if l == 'localhost':\n", + " newClients.append(localhost)\n", + " else:\n", + " newClients.append(l)\n", + "clients = newClients\n", + "\n", + "if server == 'localhost':\n", + " server = localhost" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%javascript\n", + "IPython.notebook.kernel.execute('nb_name = \"' + IPython.notebook.notebook_name + '\"')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "deletable": false, + "editable": false, + "run_control": { + "frozen": true + } + }, + "outputs": [], + "source": [ + "nb_name=PAPERMILL_OUTPUT_PATH" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Application Level Configuration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tpch_workload=False\n", + "tpcds_workload=False\n", + "\n", + "if workload.lower() == 'tpch':\n", + " tpch_workload=True\n", + "elif workload.lower() == 'tpcds':\n", + " tpcds_workload=True\n", + "else:\n", + " raise ValueError(f\"Unknown workload: {workload}\")\n", + "\n", + "def gluten_conf_overwrite(conf):\n", + " conf.set('spark.gluten.sql.columnar.shuffle.codec', gluten_codec)\\\n", + " .set('spark.gluten.sql.columnar.shuffle.codecBackend', gluten_codec_backend)\\\n", + " .set('spark.gluten.sql.columnar.maxBatchSize', max_batch_size)\\\n", + " .set('spark.executor.extraJavaOptions',\\\n", + " '-XX:+UseParallelOldGC -XX:ParallelGCThreads=2 -XX:NewRatio=1 -XX:SurvivorRatio=1 -XX:+UseCompressedOops -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:ErrorFile=/home/sparkuser/logs/java/hs_err_pid%p.log')\\\n", + " .set('spark.gluten.memory.overAcquiredMemoryRatio','0')\\\n", + "\n", + " if tpch_workload:\n", + " pass\n", + " elif tpcds_workload:\n", + " pass\n", + " return conf\n", + "\n", + "def vanilla_conf_overwrite(conf):\n", + " conf.set('spark.io.compression.codec', vanilla_codec)\\\n", + " .set('spark.executorEnv.LD_LIBRARY_PATH',f\"{os.getenv('HADOOP_HOME')}/lib/native/\") \\\n", + " .set('spark.yarn.appMasterEnv.LD_LIBRARY_PATH',f\"{os.getenv('HADOOP_HOME')}/lib/native/\") \\\n", + "\n", + " if tpch_workload:\n", + " pass\n", + " elif tpcds_workload:\n", + " pass\n", + " return conf\n", + "\n", + "def app_conf_overwrite(conf):\n", + " if run_gluten:\n", + " return gluten_conf_overwrite(conf)\n", + " return vanilla_conf_overwrite(conf)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Run Workload" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Config and clean pagecache before each run\n", + "config_pagecache(clients, run_gluten)\n", + "dropcache(clients)\n", + "print_kernel_params(clients)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create SparkSession\n", + "sc, spark, appid, test_tpc=create_cntx(run_gluten, workload, app_conf_overwrite, server, base_dir, nb_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if run_gluten:\n", + " config_mem_cgroup(clients)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_tpc.start_monitor(clients)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_tpc.power_run(explain=False, print_result=False, load_table=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_tpc.stop_monitor(clients)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if analyze_perf:\n", + " test_tpc.run_perf_analysis(disk_dev, nic_dev)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Show Performance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_tpc.print_result()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [] + }, + "outputs": [], + "source": [ + "for client in clients:\n", + " draw_sar(appid, qtime=test_tpc.result, disk_dev=disk_dev, nic_dev=nic_dev, client=client)" + ] + } + ], + "metadata": { + "celltoolbar": "Tags", + "hide_input": false, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "nbTranslate": { + "displayLangs": [ + "*" + ], + "hotkey": "alt-t", + "langInMainMenu": true, + "sourceLang": "en", + "targetLang": "fr", + "useGoogleTranslate": true + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": false, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "428.672px", + "left": "1339.91px", + "top": "374.297px", + "width": "456.969px" + }, + "toc_section_display": true, + "toc_window_display": true + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tools/workload/tpcds/gen_data/parquet_dataset/tpcds_datagen_parquet.sh b/tools/workload/tpcds/gen_data/parquet_dataset/tpcds_datagen_parquet.sh index 8f62560dc168..60f10f094d5b 100644 --- a/tools/workload/tpcds/gen_data/parquet_dataset/tpcds_datagen_parquet.sh +++ b/tools/workload/tpcds/gen_data/parquet_dataset/tpcds_datagen_parquet.sh @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -batchsize=10240 SPARK_HOME=/home/sparkuser/spark/ spark_sql_perf_jar=/PATH/TO/spark-sql-perf_2.12-0.5.1-SNAPSHOT.jar cat tpcds_datagen_parquet.scala | ${SPARK_HOME}/bin/spark-shell \ @@ -22,14 +21,13 @@ cat tpcds_datagen_parquet.scala | ${SPARK_HOME}/bin/spark-shell \ --executor-memory 25g \ --executor-cores 8 \ --master yarn \ - --driver-memory 50g \ + --driver-memory 10g \ --deploy-mode client \ --conf spark.executor.memoryOverhead=1g \ - --conf spark.sql.parquet.columnarReaderBatchSize=${batchsize} \ - --conf spark.sql.inMemoryColumnarStorage.batchSize=${batchsize} \ - --conf spark.sql.execution.arrow.maxRecordsPerBatch=${batchsize} \ --conf spark.sql.broadcastTimeout=4800 \ --conf spark.driver.maxResultSize=4g \ - --conf spark.sql.sources.useV1SourceList=avro \ --conf spark.sql.shuffle.partitions=224 \ + --conf spark.sql.parquet.compression.codec=snappy \ + --conf spark.network.timeout=800s \ + --conf spark.executor.heartbeatInterval=200s \ --jars ${spark_sql_perf_jar} diff --git a/tools/workload/tpch/gen_data/dwrf_dataset/tpch_convert_parquet_dwrf.scala b/tools/workload/tpch/gen_data/dwrf_dataset/tpch_convert_parquet_dwrf.scala deleted file mode 100644 index 958a98f57020..000000000000 --- a/tools/workload/tpch/gen_data/dwrf_dataset/tpch_convert_parquet_dwrf.scala +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -import org.apache.spark.sql.execution.debug._ -import scala.io.Source -import java.io.File -import java.util.Arrays -import org.apache.spark.sql.types.{DoubleType, TimestampType, LongType, IntegerType} - -val parquet_file_path = "/PATH/TO/TPCH_PARQUET_PATH" -val dwrf_file_path = "/PATH/TO/TPCH_DWRF_PATH" - -val lineitem_parquet_path = "file://" + parquet_file_path + "/lineitem" -val customer_parquet_path = "file://" + parquet_file_path + "/customer" -val nation_parquet_path = "file://" + parquet_file_path + "/nation" -val orders_parquet_path = "file://" + parquet_file_path + "/orders" -val part_parquet_path = "file://" + parquet_file_path + "/part" -val partsupp_parquet_path = "file://" + parquet_file_path + "/partsupp" -val region_path_path = "file://" + parquet_file_path + "/region" -val supplier_parquet_path = "file://" + parquet_file_path + "/supplier" - -val lineitem = spark.read.format("parquet").load(lineitem_parquet_path) -val customer = spark.read.format("parquet").load(customer_parquet_path) -val nation = spark.read.format("parquet").load(nation_parquet_path) -val orders = spark.read.format("parquet").load(orders_parquet_path) -val part = spark.read.format("parquet").load(part_parquet_path) -val partsupp = spark.read.format("parquet").load(partsupp_parquet_path) -val region = spark.read.format("parquet").load(region_path_path) -val supplier = spark.read.format("parquet").load(supplier_parquet_path) - -val lineitem_dwrf_path = "file://" + dwrf_file_path + "/lineitem" -val customer_dwrf_path = "file://" + dwrf_file_path + "/customer" -val nation_dwrf_path = "file://" + dwrf_file_path + "/nation" -val orders_dwrf_path = "file://" + dwrf_file_path + "/orders" -val part_dwrf_path = "file://" + dwrf_file_path + "/part" -val partsupp_dwrf_path = "file://" + dwrf_file_path + "/partsupp" -val region_dwrf_path = "file://" + dwrf_file_path + "/region" -val supplier_dwrf_path = "file://" + dwrf_file_path + "/supplier" - -lineitem.write.mode("append").format("dwrf").save(lineitem_dwrf_path) -customer.write.mode("append").format("dwrf").save(customer_dwrf_path) -nation.write.mode("append").format("dwrf").save(nation_dwrf_path) -orders.write.mode("append").format("dwrf").save(orders_dwrf_path) -part.write.mode("append").format("dwrf").save(part_dwrf_path) -partsupp.write.mode("append").format("dwrf").save(partsupp_dwrf_path) -region.write.mode("append").format("dwrf").save(region_dwrf_path) -supplier.write.mode("append").format("dwrf").save(supplier_dwrf_path) - - diff --git a/tools/workload/tpch/gen_data/dwrf_dataset/tpch_convert_parquet_dwrf.sh b/tools/workload/tpch/gen_data/dwrf_dataset/tpch_convert_parquet_dwrf.sh deleted file mode 100644 index 2dc05c9aa7dd..000000000000 --- a/tools/workload/tpch/gen_data/dwrf_dataset/tpch_convert_parquet_dwrf.sh +++ /dev/null @@ -1,47 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -batchsize=20480 - -export GLUTEN_HOME=/PATH/TO/gluten/ -#please choose right os system jar -export GLUTEN_JVM_JAR=${GLUTEN_HOME}/package/target/ -SPARK_HOME=/home/sparkuser/spark/ - -cat tpch_convert_parquet_dwrf.scala | ${SPARK_HOME}/bin/spark-shell \ - --name convert_parquet_dwrf \ - --master yarn \ - --deploy-mode client \ - --driver-memory 20g \ - --executor-cores 8 \ - --num-executors 14 \ - --executor-memory 30g \ - --conf spark.plugins=org.apache.gluten.GlutenPlugin \ - --conf spark.driver.extraClassPath=${GLUTEN_JVM_JAR} \ - --conf spark.executor.extraClassPath=${GLUTEN_JVM_JAR} \ - --conf spark.memory.offHeap.size=30g \ - --conf spark.executor.memoryOverhead=5g \ - --conf spark.driver.maxResultSize=32g \ - --conf spark.sql.autoBroadcastJoinThreshold=-1 \ - --conf spark.sql.parquet.columnarReaderBatchSize=${batchsize} \ - --conf spark.sql.inMemoryColumnarStorage.batchSize=${batchsize} \ - --conf spark.sql.execution.arrow.maxRecordsPerBatch=${batchsize} \ - --conf spark.gluten.sql.columnar.forceShuffledHashJoin=true \ - --conf spark.sql.broadcastTimeout=4800 \ - --conf spark.driver.maxResultSize=4g \ - --conf spark.sql.adaptive.enabled=true \ - --conf spark.sql.shuffle.partitions=112 \ - --conf spark.sql.sources.useV1SourceList=avro \ - --conf spark.shuffle.manager=org.apache.spark.shuffle.sort.ColumnarShuffleManager \ - --conf spark.sql.files.maxPartitionBytes=1073741824 \ diff --git a/tools/workload/tpch/gen_data/parquet_dataset/tpch_datagen_parquet.sh b/tools/workload/tpch/gen_data/parquet_dataset/tpch_datagen_parquet.sh index 29512ed80b15..8db9d443331d 100644 --- a/tools/workload/tpch/gen_data/parquet_dataset/tpch_datagen_parquet.sh +++ b/tools/workload/tpch/gen_data/parquet_dataset/tpch_datagen_parquet.sh @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -batchsize=10240 SPARK_HOME=/home/sparkuser/spark/ spark_sql_perf_jar=/PATH/TO/spark-sql-perf_2.12-0.5.1-SNAPSHOT.jar cat tpch_datagen_parquet.scala | ${SPARK_HOME}/bin/spark-shell \ @@ -25,11 +24,10 @@ cat tpch_datagen_parquet.scala | ${SPARK_HOME}/bin/spark-shell \ --driver-memory 50g \ --deploy-mode client \ --conf spark.executor.memoryOverhead=1g \ - --conf spark.sql.parquet.columnarReaderBatchSize=${batchsize} \ - --conf spark.sql.inMemoryColumnarStorage.batchSize=${batchsize} \ - --conf spark.sql.execution.arrow.maxRecordsPerBatch=${batchsize} \ --conf spark.sql.broadcastTimeout=4800 \ --conf spark.driver.maxResultSize=4g \ - --conf spark.sql.sources.useV1SourceList=avro \ --conf spark.sql.shuffle.partitions=224 \ + --conf spark.sql.parquet.compression.codec=snappy \ + --conf spark.network.timeout=800s \ + --conf spark.executor.heartbeatInterval=200s \ --jars ${spark_sql_perf_jar}