Skip to content

Commit

Permalink
[CI] Adjust how the tests are run in Slurm (#600)
Browse files Browse the repository at this point in the history
* Adjust how the tests are run in CI

* Fix schedule/auto_parallelize and adapt its test to A100

* Remove pybind11-stubgen from CI environment because it's conflict against pyproject unless --no-build-isolation
  • Loading branch information
roastduck authored Mar 14, 2024
1 parent 2e285e9 commit 87bfe21
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 13 deletions.
34 changes: 28 additions & 6 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@ jobs:
source /opt/spack/share/spack/setup-env.sh
spack load [email protected]%[email protected] [email protected]/ehz25ml [email protected]/uopt2y4 [email protected] java@11 [email protected]
source ci-script/prepare-python-environment.sh
# Set OMP_PROC_BIND to make OpenMP happy for 30.schedule/test_auto_fission_fuse.py::test_tune_fission
# Setting OMP_NUM_THREADS=256 seems to work around the conflict of PyTorch
OMP_NUM_THREADS=256 OMP_PROC_BIND=true srun --exclusive -N 1 -p ja --gres=gpu:v100:1 pytest --color=yes test
srun -N 1 -c 64 -p octave --gres=gpu:a100:1 pytest --color=yes -m "not performance_sensitive" test
build-and-test-gcc-minimal-run_in_tree:
runs-on: self-hosted
if: github.event.pull_request.draft == false
Expand All @@ -65,8 +63,7 @@ jobs:
source /opt/spack/share/spack/setup-env.sh
spack load [email protected]%[email protected] java@11 [email protected]
source ci-script/prepare-python-environment.sh
# Set OMP_PROC_BIND to make OpenMP happy for 30.schedule/test_auto_fission_fuse.py::test_tune_fission
OMP_PROC_BIND=true PYTHONPATH=build:python:$PYTHONPATH srun --exclusive -N 1 -p ja pytest --color=yes test
PYTHONPATH=build:python:$PYTHONPATH srun -N 1 -c 64 -p ja pytest --color=yes -m "not performance_sensitive" test
build-and-test-clang-run-in-tree:
runs-on: self-hosted
if: github.event.pull_request.draft == false
Expand All @@ -92,5 +89,30 @@ jobs:
source /opt/spack/share/spack/setup-env.sh
spack load [email protected]%[email protected] java@11 llvm@16%gcc@12
source ci-script/prepare-python-environment.sh
PYTHONPATH=build:python:$PYTHONPATH srun -N 1 -c 64 -p ja pytest --color=yes -m "not performance_sensitive" test
build-and-test-gcc-cuda-mkl-exclusively:
runs-on: self-hosted
if: github.event.pull_request.draft == false
steps:
- uses: roastduck/checkout@main
with:
ssh-key: ${{ secrets.CI }}
submodules: true
fetch-depth: 0
- name: Build ffi module in Release
run: |
git submodule foreach --recursive git clean -ffdx
git submodule foreach --recursive git reset --hard
source /opt/spack/share/spack/setup-env.sh
spack load [email protected]%[email protected] [email protected]/ehz25ml [email protected]/uopt2y4 [email protected] java@11 [email protected]
source ci-script/prepare-python-environment.sh
# -C requires a new enough pip
pip3 install --upgrade pip
pip3 install . -C--local=with-cuda.toml -C--local=ci-script/with-spack-mkl.toml
- name: Run PyTest
run: |
source /opt/spack/share/spack/setup-env.sh
spack load [email protected]%[email protected] [email protected]/ehz25ml [email protected]/uopt2y4 [email protected] java@11 [email protected]
source ci-script/prepare-python-environment.sh
# Set OMP_PROC_BIND to make OpenMP happy for 30.schedule/test_auto_fission_fuse.py::test_tune_fission
OMP_PROC_BIND=true PYTHONPATH=build:python:$PYTHONPATH srun --exclusive -N 1 -p ja pytest --color=yes test
OMP_PROC_BIND=true srun --exclusive=user -N 1 -c 256 -p ja --gres=gpu:v100:1 pytest --color=yes -m "performance_sensitive" test
3 changes: 3 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[pytest]
markers =
performance_sensitive: These tests should be run on a exlusively dedicated node
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ numpy==1.24.3
packaging==23.1
pluggy==1.0.0
py_build_cmake==0.1.8
pybind11-stubgen==0.13.0
Pygments==2.15.1
pymdown-extensions==10.0
pytest==7.3.1
Expand Down
2 changes: 1 addition & 1 deletion src/schedule/auto_parallelize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ void Schedule::autoParallelize(const Ref<Target> &target) {
});

// III b. Reduction
if (!needParRed) {
if (localParaAll.size() == localParaNoRed.size() || !needParRed) {
commitTransaction();
} else {
abortTransaction();
Expand Down
3 changes: 3 additions & 0 deletions test/31.auto_schedule/test_auto_fission_fuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def test_stmt_in_between_2():
assert logs == ["swap(L2, S1)", "fuse(L1, L2, true)"]


@pytest.mark.performance_sensitive
def test_tune_fuse():
# We may fuse these loops. But fusing them will make it impossible to parallelize.
# After tuning, we will end up in not fusing them
Expand Down Expand Up @@ -133,6 +134,7 @@ def test_tune_fuse():
assert "fuse" not in log


@pytest.mark.performance_sensitive
def test_tune_fission():
# The reverse schedule of `test_tune_fuse`

Expand Down Expand Up @@ -178,6 +180,7 @@ def test_tune_fission():
assert "fission" in ", ".join(logs)


@pytest.mark.performance_sensitive
@pytest.mark.skipif(not ft.with_cuda(), reason="requires CUDA")
def test_tune_with_cond():
# Fuse loops that can parallelize. Don't fuse loops that can't
Expand Down
10 changes: 5 additions & 5 deletions test/31.auto_schedule/test_auto_parallelize.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ def test_3_levels():

@pytest.mark.skipif(not ft.with_cuda(), reason="requires CUDA")
def test_gpu_basic_static_small():
with ft.VarDef([("x", (10, 10, 2), "int32", "input", "cpu"),
("y", (10, 10, 2), "int32", "output", "cpu")]) as (x, y):
with ft.For("i", 0, 10, label="Li") as i:
with ft.For("j", 0, 10, label="Lj") as j:
with ft.VarDef([("x", (20, 20, 2), "int32", "input", "cpu"),
("y", (20, 20, 2), "int32", "output", "cpu")]) as (x, y):
with ft.For("i", 0, 20, label="Li") as i:
with ft.For("j", 0, 20, label="Lj") as j:
y[i, j, 0] = x[i, j, 0] + 1

device = ft.GPU()
Expand All @@ -68,7 +68,7 @@ def test_gpu_basic_static_small():
logs = list(map(str, s.logs()))
print(logs)
assert fnmatch_list(logs, [
f'split(Lj, -1, {num_sm // 10}, 0)', 'merge(Li, $split.0{Lj})',
f'split(Lj, -1, {num_sm // 20}, 0)', 'merge(Li, $split.0{Lj})',
'parallelize($merge{Li, $split.0{Lj}}, blockIdx.x, *)',
'parallelize($split.1{Lj}, threadIdx.y, *)'
])
Expand Down

0 comments on commit 87bfe21

Please sign in to comment.