Skip to content

Commit

Permalink
#77: fix permissions setup of mounted volumes
Browse files Browse the repository at this point in the history
change log:
- add docker-entrypoint.sh that dynamically ensures container has read/write permissions at runtime while using a non-root user to run applications
- update Dockerfile to user docker-entrypoint.sh
- update setup.sh to remove redundant setup_permissions and improve env var readability
  • Loading branch information
tstescoTT committed Jan 28, 2025
1 parent a31ac60 commit 02d3b16
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 101 deletions.
72 changes: 72 additions & 0 deletions docker-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/bin/bash
# SPDX-License-Identifier: Apache-2.0
#
# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC

# Docker entry point script:
# ensures CONTAINER_APP_USERNAME has read/write permissions to:
# - CACHE_ROOT
# - /home/${CONTAINER_APP_USERNAME}
#
# This script is run by container root user at startup, CMD is then deescalated
# to non-root user CONTAINER_APP_USERNAME.
# Note: for additional run-time mounted volumes, mount them as leaf to
# /home/${CONTAINER_APP_USERNAME}/ if read/write permissions are needed.

set -eo pipefail

set_group_permissions() {
local var_dir="$1"
local shared_group="$2"
echo "setting permissions for ${var_dir} ..."

# Skip if directory doesn't exist
if [ ! -d "$var_dir" ]; then
return 0
fi

# Check current group and permissions
current_group=$(stat -c "%G" "$var_dir")
current_perms=$(stat -c "%a" "$var_dir")

# Set group if needed
if [ "$current_group" != "$shared_group" ]; then
chown -R :"$shared_group" "$var_dir"
fi

# Set permissions if needed
if [ "$current_perms" != "2775" ]; then
chmod -R 2775 "$var_dir"
fi
}

echo "using CACHE_ROOT: ${CACHE_ROOT}"

# Get current ownership of volume
VOLUME_OWNER=$(stat -c '%u' "$CACHE_ROOT")
VOLUME_GROUP=$(stat -c '%g' "$CACHE_ROOT")
echo "Mounted CACHE_ROOT volume is owned by UID:GID - $VOLUME_OWNER:$VOLUME_GROUP"

# Create shared group with host's GID if it doesn't exist
if ! getent group "$VOLUME_GROUP" > /dev/null 2>&1; then
groupadd -g "$VOLUME_GROUP" sharedgroup
fi

# Get the created/existing group name
SHARED_GROUP_NAME=$(getent group "$VOLUME_GROUP" | cut -d: -f1)

# Add container user to the shared group
usermod -a -G "$SHARED_GROUP_NAME" "${CONTAINER_APP_USERNAME}"

# Ensure new files get group write permissions (in current shell)
umask 0002

# Process CACHE_ROOT if it's not inside home directory
if [[ "$CACHE_ROOT" != "/home/${CONTAINER_APP_USERNAME}"* ]]; then
set_group_permissions "$CACHE_ROOT" "$SHARED_GROUP_NAME"
fi
set_group_permissions "/home/${CONTAINER_APP_USERNAME}" "$SHARED_GROUP_NAME"
echo "Mounted volume permissions setup completed."

# Execute CMD as CONTAINER_APP_USERNAME user
exec gosu "${CONTAINER_APP_USERNAME}" "$@"
131 changes: 56 additions & 75 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,13 @@ get_hf_env_vars() {
read -r -p "Enter your HF_HOME [default: $HOME/.cache/huggingface]:" input_hf_home
echo
input_hf_home=${input_hf_home:-"$HOME/.cache/huggingface"}
if [ ! -d "$input_hf_home" ]; then
mkdir -p "$input_hf_home" 2>/dev/null || {
echo "⛔ Failed to create HF_HOME directory. Please check permissions and try again."
echo "Entered input was HF_HOME:= ${input_hf_home}, is this correct for your system?"
exit 1
}
fi
if [ ! -d "$input_hf_home" ] || [ ! -w "$input_hf_home" ]; then
echo "⛔ HF_HOME must be a valid directory and writable by the user. Please try again."
exit 1
Expand All @@ -102,87 +109,87 @@ get_hf_env_vars() {
# Function to set environment variables based on the model selection and write them to .env
setup_model_environment() {
# Set environment variables based on the model selection
# note: MODEL_NAME is the lower cased basename of the HF repo ID
# note: MODEL_NAME is the directory name for the model weights
case "$1" in
"llama-3.3-70b-instruct")
MODEL_NAME="llama-3.3-70b-instruct"
MODEL_NAME="Llama-3.3-70B-Instruct"
HF_MODEL_REPO_ID="meta-llama/Llama-3.3-70B-Instruct"
META_MODEL_NAME=""
META_DIR_FILTER=""
REPACKED=1
;;
"llama-3.2-11b-vision-instruct")
MODEL_NAME="llama-3.2-11b-vision-instruct"
MODEL_NAME="Llama-3.2-11B-Vision-Instruct"
HF_MODEL_REPO_ID="meta-llama/Llama-3.2-11B-Vision-Instruct"
META_MODEL_NAME=""
META_DIR_FILTER=""
REPACKED=0
;;
"llama-3.2-3b-instruct")
MODEL_NAME="llama-3.2-3b-instruct"
MODEL_NAME="Llama-3.2-3B-Instruct"
HF_MODEL_REPO_ID="meta-llama/Llama-3.2-3B-Instruct"
META_MODEL_NAME=""
META_DIR_FILTER=""
REPACKED=0
;;
"llama-3.2-1b-instruct")
MODEL_NAME="llama-3.2-1b-instruct"
MODEL_NAME="Llama-3.2-1B-Instruct"
HF_MODEL_REPO_ID="meta-llama/Llama-3.2-1B-Instruct"
META_MODEL_NAME=""
META_DIR_FILTER=""
REPACKED=0
;;
"llama-3.1-70b-instruct")
MODEL_NAME="llama-3.1-70b-instruct"
MODEL_NAME="Llama-3.1-70B-Instruct"
HF_MODEL_REPO_ID="meta-llama/Llama-3.1-70B-Instruct"
META_MODEL_NAME="Meta-Llama-3.1-70B-Instruct"
META_DIR_FILTER="llama3_1"
REPACKED=1
;;
"llama-3.1-70b")
MODEL_NAME="llama-3.1-70b"
MODEL_NAME="Llama-3.1-70B"
HF_MODEL_REPO_ID="meta-llama/Llama-3.1-70B"
META_MODEL_NAME="Meta-Llama-3.1-70B"
META_DIR_FILTER="llama3_1"
REPACKED=1
;;
"llama-3.1-8b-instruct")
MODEL_NAME="llama-3.1-8b-instruct"
MODEL_NAME="Llama-3.1-8B-Instruct"
HF_MODEL_REPO_ID="meta-llama/Llama-3.1-8B-Instruct"
META_MODEL_NAME="Meta-Llama-3.1-8B-Instruct"
META_DIR_FILTER="llama3_1"
REPACKED=0
;;
"llama-3.1-8b")
MODEL_NAME="llama-3.1-8b"
MODEL_NAME="Llama-3.1-8B"
HF_MODEL_REPO_ID="meta-llama/Llama-3.1-8B"
META_MODEL_NAME="Meta-Llama-3.1-8B"
META_DIR_FILTER="llama3_1"
REPACKED=0
;;
"llama-3-70b-instruct")
MODEL_NAME="llama-3-70b-instruct"
MODEL_NAME="Llama-3-70B-Instruct"
HF_MODEL_REPO_ID="meta-llama/Llama-3-70B-Instruct"
META_MODEL_NAME="Meta-Llama-3-70B-Instruct"
META_DIR_FILTER="llama3"
REPACKED=1
;;
"llama-3-70b")
MODEL_NAME="llama-3-70b"
MODEL_NAME="Llama-3-70B"
HF_MODEL_REPO_ID="meta-llama/Llama-3-70B"
META_MODEL_NAME="Meta-Llama-3-70B"
META_DIR_FILTER="llama3"
REPACKED=1
;;
"llama-3-8b-instruct")
MODEL_NAME="llama-3-8b-instruct"
MODEL_NAME="Llama-3-8B-Instruct"
HF_MODEL_REPO_ID="meta-llama/Llama-3-8B-Instruct"
META_MODEL_NAME="Meta-Llama-3-8B-Instruct"
META_DIR_FILTER="llama3"
REPACKED=0
;;
"llama-3-8b")
MODEL_NAME="llama-3-8b"
MODEL_NAME="Llama-3-8B"
HF_MODEL_REPO_ID="meta-llama/Llama-3-8B"
META_MODEL_NAME="Meta-Llama-3-8B"
META_DIR_FILTER="llama3"
Expand Down Expand Up @@ -230,7 +237,6 @@ setup_model_environment() {
echo "Using 🤗 Hugging Face Token."
get_hf_env_vars
# default location for HF e.g. ~/.cache/huggingface/models/meta-llama/Llama-3.3-70B-Instruct
# LLAMA_WEIGHTS_DIR=${HF_HOME}/local_dir/${HF_MODEL_REPO_ID}
WEIGHTS_DIR=${PERSISTENT_VOLUME}/model_weights/${MODEL_NAME}
;;
n|N )
Expand All @@ -241,8 +247,8 @@ setup_model_environment() {
# Prompt user for LLAMA_REPO if not already set or use default
read -r -p "Enter the path where you want to clone the Llama model repository [default: ${LLAMA_REPO}]: " INPUT_LLAMA_REPO
LLAMA_REPO=${INPUT_LLAMA_REPO:-$LLAMA_REPO}
LLAMA_DIR=${LLAMA_DIR:-${LLAMA_REPO}/models/${META_DIR_FILTER}}
LLAMA_WEIGHTS_DIR=${LLAMA_WEIGHTS_DIR:-${LLAMA_DIR}/${META_MODEL_NAME}}
LLAMA_MODELS_DIR=${LLAMA_MODELS_DIR:-${LLAMA_REPO}/models/${META_DIR_FILTER}}
LLAMA_WEIGHTS_DIR=${LLAMA_WEIGHTS_DIR:-${LLAMA_MODELS_DIR}/${META_MODEL_NAME}}
echo # move to a new line after input
;;
* )
Expand All @@ -268,6 +274,10 @@ setup_model_environment() {
REPACKED_STR=""
fi

CONTAINER_APP_USERNAME="container_app_user"
CONTAINER_HOME="/home/${CONTAINER_APP_USERNAME}"
CACHE_ROOT="${CONTAINER_HOME}/cache_root"
MODEL_WEIGHTS_PATH="${CACHE_ROOT}/model_weights/${REPACKED_STR}$MODEL_NAME"
# Write environment variables to .env file
echo "Writing environment variables to ${ENV_FILE} ..."
cat > ${ENV_FILE} <<EOF
Expand All @@ -276,28 +286,30 @@ USE_HF_DOWNLOAD=$choice_use_hf_token
MODEL_NAME=$MODEL_NAME
META_MODEL_NAME=$META_MODEL_NAME
HF_MODEL_REPO_ID=$HF_MODEL_REPO_ID
HOST_HF_HOME=${HF_HOME:-""}
REPACKED=${REPACKED}
REPACKED_STR=${REPACKED_STR}
# model runtime variables
LLAMA_VERSION=llama3
TT_METAL_ASYNC_DEVICE_QUEUE=1
WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml
SERVICE_PORT=7000
# host paths
HOST_HF_HOME=${HF_HOME:-""}
LLAMA_REPO=${LLAMA_REPO:-""}
LLAMA_DIR=${LLAMA_DIR:-""}
LLAMA_MODELS_DIR=${LLAMA_MODELS_DIR:-""}
LLAMA_WEIGHTS_DIR=${LLAMA_WEIGHTS_DIR:-""}
PERSISTENT_VOLUME_ROOT=$PERSISTENT_VOLUME_ROOT
PERSISTENT_VOLUME=$PERSISTENT_VOLUME
WEIGHTS_DIR=${WEIGHTS_DIR:-""}
# container paths
REPACKED=${REPACKED}
REPACKED_STR=${REPACKED_STR}
CACHE_ROOT=/home/user/cache_root
HF_HOME=/home/user/cache_root/huggingface
CACHE_ROOT=${CACHE_ROOT}
HF_HOME=${CACHE_ROOT}/huggingface
MODEL_WEIGHTS_ID=id_${REPACKED_STR}$MODEL_NAME
MODEL_WEIGHTS_PATH=/home/user/cache_root/model_weights/${REPACKED_STR}$MODEL_NAME
LLAMA_VERSION=llama3
TT_METAL_ASYNC_DEVICE_QUEUE=1
WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml
SERVICE_PORT=7000
LLAMA3_CKPT_DIR=/home/user/cache_root/model_weights/${REPACKED_STR}$MODEL_NAME
LLAMA3_TOKENIZER_PATH=/home/user/cache_root/model_weights/${REPACKED_STR}$MODEL_NAME/tokenizer.model
LLAMA3_CACHE_PATH=/home/user/cache_root/tt_metal_cache/cache_${REPACKED_STR}$MODEL_NAME
MODEL_WEIGHTS_PATH=${MODEL_WEIGHTS_PATH}
LLAMA_DIR=${MODEL_WEIGHTS_PATH}
LLAMA3_CKPT_DIR=${MODEL_WEIGHTS_PATH}
LLAMA3_TOKENIZER_PATH=${MODEL_WEIGHTS_PATH}/tokenizer.model
LLAMA3_CACHE_PATH=${CACHE_ROOT}/tt_metal_cache/cache_${REPACKED_STR}$MODEL_NAME
# These are secrets and must be stored securely for production environments
JWT_SECRET=$JWT_SECRET
HF_TOKEN=${HF_TOKEN:-""}
Expand All @@ -318,46 +330,6 @@ load_env() {
fi
}

# SUDO PORTION: Encapsulated in a function to handle all sudo-requiring tasks
setup_permissions() {
# Load environment variables from .env
load_env

echo "Running sudo-required commands..."
# Create group 'dockermount' if it doesn't exist
if ! getent group dockermount > /dev/null 2>&1; then
echo "Creating group 'dockermount' ..."
sudo groupadd dockermount
else
echo "Group 'dockermount' already exists."
fi

# Add host user to 'dockermount' group
echo "Adding user: '$USER' to 'dockermount' group ..."
sudo usermod -aG dockermount "$USER"

# Get container user with UID 1000 and add to group
CONTAINER_UID=1000
CONTAINER_USER=$(getent passwd ${CONTAINER_UID} | cut -d: -f1)
if [ -n "$CONTAINER_USER" ]; then
echo "Adding container user: '$CONTAINER_USER' (UID ${CONTAINER_UID}) to 'dockermount' group ..."
sudo usermod -aG dockermount "$CONTAINER_USER"
else
echo "No user found with UID ${CONTAINER_UID}."
fi

# Set file ownership and permissions
echo "Setting file ownership and permissions for container and host access ..."
if [ ! -d "${PERSISTENT_VOLUME}" ]; then
# if the user point the PERSISTENT_VOLUME
sudo mkdir -p "${PERSISTENT_VOLUME}"
fi
sudo chown -R ${CONTAINER_UID}:dockermount "${PERSISTENT_VOLUME}"
sudo chmod -R 775 "${PERSISTENT_VOLUME}"

echo "✅ setup_permissions completed!"
}

# Shared function for repacking weights
repack_weights() {
local source_dir="$1"
Expand Down Expand Up @@ -478,11 +450,23 @@ setup_weights_huggingface() {
# note: ls -td will sort by modification date descending, potential edge case
# if desired snapshot is not most recent modified or ls sorts differently
MOST_RECENT_SNAPSHOT=$(ls -td -- ${SNAPSHOT_DIR}/* | head -n 1)
echo "create symlink: ${MOST_RECENT_SNAPSHOT}/original/ -> ${WEIGHTS_DIR}"
for item in ${MOST_RECENT_SNAPSHOT}/original/*; do
ln -s "$item" "${WEIGHTS_DIR}"
if [ "${REPACKED}" -eq 1 ]; then
echo "create symlink to: ${item} in ${WEIGHTS_DIR}"
ln -s "$item" "${WEIGHTS_DIR}"
else
# if not repacking, need to make weights accessible in container
echo "copying ${item} to ${WEIGHTS_DIR} ..."
cp "${item}" "${WEIGHTS_DIR}"
fi
done

if [ "${HF_MODEL_REPO_ID}" == "meta-llama/Llama-3.2-11B-Vision-Instruct" ]; then
# tt-metal impl expects models with naming: consolidated.xx.pth
# this convention is followed in all models expect Llama-3.2-11B-Vision-Instruct
mv "${WEIGHTS_DIR}/consolidated.pth" "${WEIGHTS_DIR}/consolidated.00.pth"
fi

# Step 6: Process and copy weights
if [ "${REPACKED}" -eq 1 ]; then
REPACKED_WEIGHTS_DIR="${PERSISTENT_VOLUME}/model_weights/${REPACKED_STR}${MODEL_NAME}"
Expand Down Expand Up @@ -557,6 +541,3 @@ fi
MODEL_TYPE=$1
setup_model_environment "$MODEL_TYPE"
setup_weights
# Call the script again with sudo to execute the sudo-required commands
echo "Switching to sudo portion to set file permissions and complete setup."
setup_permissions
Loading

0 comments on commit 02d3b16

Please sign in to comment.