Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clone of D68511145 #2697

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 42 additions & 24 deletions .github/scripts/validate_binaries.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@


export PYTORCH_CUDA_PKG=""
export CONDA_ENV="build_binary"

conda create -y -n build_binary python="${MATRIX_PYTHON_VERSION}"
conda create -y -n "${CONDA_ENV}" python="${MATRIX_PYTHON_VERSION}"

conda run -n build_binary python --version

Expand Down Expand Up @@ -49,41 +50,58 @@ elif [[ ${MATRIX_CHANNEL} = 'release' ]]; then
export PYTORCH_URL="https://download.pytorch.org/whl/${CUDA_VERSION}"
fi


echo "CU_VERSION: ${CUDA_VERSION}"
echo "MATRIX_CHANNEL: ${MATRIX_CHANNEL}"
echo "CONDA_ENV: ${CONDA_ENV}"

# shellcheck disable=SC2155
export CONDA_PREFIX=$(conda run -n "${CONDA_ENV}" printenv CONDA_PREFIX)


# Set LD_LIBRARY_PATH to fix the runtime error with fbgemm_gpu not
# being able to locate libnvrtc.so
# NOTE: The order of the entries in LD_LIBRARY_PATH matters
echo "[NOVA] Setting LD_LIBRARY_PATH ..."
conda env config vars set -n ${CONDA_ENV} \
LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:/usr/local/lib:/usr/lib64:${LD_LIBRARY_PATH}"


# install pytorch
# switch back to conda once torch nightly is fixed
# if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
# export PYTORCH_CUDA_PKG="pytorch-cuda=${MATRIX_GPU_ARCH_VERSION}"
# fi
conda run -n build_binary pip install torch --index-url "$PYTORCH_URL"
conda run -n "${CONDA_ENV}" pip install torch --index-url "$PYTORCH_URL"

# install fbgemm
conda run -n build_binary pip install fbgemm-gpu --index-url "$PYTORCH_URL"
conda run -n "${CONDA_ENV}" pip install fbgemm-gpu --index-url "$PYTORCH_URL"

# install requirements from pypi
conda run -n build_binary pip install torchmetrics==1.0.3
conda run -n "${CONDA_ENV}" pip install torchmetrics==1.0.3

# install torchrec
conda run -n build_binary pip install torchrec --index-url "$PYTORCH_URL"
conda run -n "${CONDA_ENV}" pip install torchrec --index-url "$PYTORCH_URL"

# Run small import test
conda run -n build_binary python -c "import torch; import fbgemm_gpu; import torchrec"
conda run -n "${CONDA_ENV}" python -c "import torch; import fbgemm_gpu; import torchrec"

# check directory
ls -R

# check if cuda available
conda run -n build_binary python -c "import torch; print(torch.cuda.is_available())"
conda run -n "${CONDA_ENV}" python -c "import torch; print(torch.cuda.is_available())"

# check cuda version
conda run -n build_binary python -c "import torch; print(torch.version.cuda)"
conda run -n "${CONDA_ENV}" python -c "import torch; print(torch.version.cuda)"

# Finally run smoke test
# python 3.11 needs torchx-nightly
conda run -n build_binary pip install torchx-nightly iopath
conda run -n "${CONDA_ENV}" pip install torchx-nightly iopath
if [[ ${MATRIX_GPU_ARCH_TYPE} = 'cuda' ]]; then
conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
conda run -n "${CONDA_ENV}" torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
else
conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
conda run -n "${CONDA_ENV}" torchx run -s local_cwd dist.ddp -j 1 --script test_installation.py -- --cpu_only
fi


Expand All @@ -93,31 +111,31 @@ if [[ ${MATRIX_CHANNEL} != 'release' ]]; then
exit 0
else
# Check version matches only for release binaries
torchrec_version=$(conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2)
fbgemm_version=$(conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
torchrec_version=$(conda run -n "${CONDA_ENV}" pip show torchrec | grep Version | cut -d' ' -f2)
fbgemm_version=$(conda run -n "${CONDA_ENV}" pip show fbgemm_gpu | grep Version | cut -d' ' -f2)

if [ "$torchrec_version" != "$fbgemm_version" ]; then
echo "Error: TorchRec package version does not match FBGEMM package version"
exit 1
fi
fi

conda create -y -n build_binary python="${MATRIX_PYTHON_VERSION}"
conda create -y -n "${CONDA_ENV}" python="${MATRIX_PYTHON_VERSION}"

conda run -n build_binary python --version
conda run -n "${CONDA_ENV}" python --version

if [[ ${MATRIX_GPU_ARCH_VERSION} != '12.4' ]]; then
exit 0
fi

echo "checking pypi release"
conda run -n build_binary pip install torch
conda run -n build_binary pip install fbgemm-gpu
conda run -n build_binary pip install torchrec
conda run -n "${CONDA_ENV}" pip install torch
conda run -n "${CONDA_ENV}" pip install fbgemm-gpu
conda run -n "${CONDA_ENV}" pip install torchrec

# Check version matching again for PyPI
torchrec_version=$(conda run -n build_binary pip show torchrec | grep Version | cut -d' ' -f2)
fbgemm_version=$(conda run -n build_binary pip show fbgemm_gpu | grep Version | cut -d' ' -f2)
torchrec_version=$(conda run -n "${CONDA_ENV}" pip show torchrec | grep Version | cut -d' ' -f2)
fbgemm_version=$(conda run -n "${CONDA_ENV}" pip show fbgemm_gpu | grep Version | cut -d' ' -f2)

if [ "$torchrec_version" != "$fbgemm_version" ]; then
echo "Error: TorchRec package version does not match FBGEMM package version"
Expand All @@ -128,13 +146,13 @@ fi
ls -R

# check if cuda available
conda run -n build_binary python -c "import torch; print(torch.cuda.is_available())"
conda run -n "${CONDA_ENV}" python -c "import torch; print(torch.cuda.is_available())"

# check cuda version
conda run -n build_binary python -c "import torch; print(torch.version.cuda)"
conda run -n "${CONDA_ENV}" python -c "import torch; print(torch.version.cuda)"

# python 3.11 needs torchx-nightly
conda run -n build_binary pip install torchx-nightly iopath
conda run -n "${CONDA_ENV}" pip install torchx-nightly iopath

# Finally run smoke test
conda run -n build_binary torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
conda run -n "${CONDA_ENV}" torchx run -s local_cwd dist.ddp -j 1 --gpu 2 --script test_installation.py
Loading