diff --git a/.github/workflows/README.rst b/.github/workflows/README.rst index 8d98d0981b..89e4b31d67 100644 --- a/.github/workflows/README.rst +++ b/.github/workflows/README.rst @@ -9,24 +9,30 @@ DevCloud Runners ================ -We have 2 self-hosted runners on devcloud. We run in tmux so it will -continue to run after disconnecting. ``ssh`` to DevCloud:: +We have 2 self-hosted runners on devcloud. We run in an emacs daemon +so it will continue to run after disconnecting. ``ssh`` to DevCloud +and start an emacs daemon:: + + emacs --daemon + +Inside an emacs shell:: - tmux cd github/runner-1 ./run.sh -For the second runner, split the window with ``control-b "`` and start -the runner:: +Rename the shell to ``devcloud-1`` and start another emacs shell:: cd github/runner-2 ./run.sh -To check on a runner ``ssh`` to DevCloud and do:: +If you disconnect emacs and the runners will continue to run. To check +on a runner ``ssh`` to DevCloud and do:: - tmux ls + emacsclient -nw -To see the sessions. Usually there is just one name ``0``. Attach to -the session:: +And visit ``devcloud-1`` and ``devcloud-2`` buffers. - tmux attach -t 0 +``tmux`` is an alternative. I do not use it because devcloud allows 4 +logins via ``ssh``. Using ``tmux`` with 2 runners consumes 3 even +while it is detached. You may create a situation where you cannot log +in because ``tmux`` is consuming all the logins. diff --git a/include/dr/mhp/algorithms/md_for_each.hpp b/include/dr/mhp/algorithms/md_for_each.hpp index 6928ab62f2..ccd98d52c2 100644 --- a/include/dr/mhp/algorithms/md_for_each.hpp +++ b/include/dr/mhp/algorithms/md_for_each.hpp @@ -142,8 +142,15 @@ void for_each(F op, is_mdspan_view auto &&...drs) { auto invoke_index = [=](auto index) { // Transform mdspans into references auto references = detail::tie_transform( - operand_mdspans, [index](auto mdspan) -> decltype(auto) { - return mdspan(index[0], index[1]); + operand_mdspans, [mdspan0, index](auto mdspan) -> decltype(auto) { + static_assert(1 <= mdspan0.rank() && mdspan0.rank() <= 3); + if constexpr (mdspan0.rank() == 1) { + return mdspan(index[0]); + } else if constexpr (mdspan0.rank() == 2) { + return mdspan(index[0], index[1]); + } else if constexpr (mdspan0.rank() == 3) { + return mdspan(index[0], index[1], index[2]); + } }); static_assert( std::invocable || @@ -160,13 +167,20 @@ void for_each(F op, is_mdspan_view auto &&...drs) { } }; - // TODO: Extend sycl_utils.hpp to handle ranges > 1D. It uses - // ndrange and handles > 32 bits. - - dr::__detail::parallel_for( - mhp::sycl_queue(), - sycl::range(mdspan0.extent(0), mdspan0.extent(1)), invoke_index) - .wait(); + if constexpr (mdspan0.rank() == 1) { + auto range = sycl::range(mdspan0.extent(0)); + dr::__detail::parallel_for(mhp::sycl_queue(), range, invoke_index) + .wait(); + } else if constexpr (mdspan0.rank() == 2) { + auto range = sycl::range(mdspan0.extent(0), mdspan0.extent(1)); + dr::__detail::parallel_for(mhp::sycl_queue(), range, invoke_index) + .wait(); + } else if constexpr (mdspan0.rank() == 3) { + auto range = sycl::range(mdspan0.extent(0), mdspan0.extent(1), + mdspan0.extent(2)); + dr::__detail::parallel_for(mhp::sycl_queue(), range, invoke_index) + .wait(); + } #else assert(false); #endif diff --git a/scripts/devcloud-test.sh b/scripts/devcloud-test.sh index f898dfeabd..87898f39ef 100755 --- a/scripts/devcloud-test.sh +++ b/scripts/devcloud-test.sh @@ -26,9 +26,10 @@ echo "::group::SHP GPU Test" ONEAPI_DEVICE_SELECTOR=level_zero:0 time ctest --test-dir build -L SHP echo "::endgroup::" -echo "::group::SHP CPU Test" -ONEAPI_DEVICE_SELECTOR=opencl:cpu time ctest --test-dir build -L SHP -echo "::endgroup::" +# disabled: very slow or fails when cryptominer is on devcloud +#echo "::group::SHP CPU Test" +#ONEAPI_DEVICE_SELECTOR=opencl:cpu time ctest --test-dir build -L SHP +#echo "::endgroup::" echo "::group::MHP GPU Test" ONEAPI_DEVICE_SELECTOR=level_zero:* time ctest --test-dir build -L MHP diff --git a/test/gtest/mhp/mdstar.cpp b/test/gtest/mhp/mdstar.cpp index b9dc948469..1d47b0e824 100644 --- a/test/gtest/mhp/mdstar.cpp +++ b/test/gtest/mhp/mdstar.cpp @@ -261,6 +261,46 @@ TEST_F(Mdarray, Enumerate) { static_assert(dr::distributed_range); } +TEST_F(Mdarray, Slabs) { + // local_mdspan is not accessible for device memory + if (options.count("device-memory")) { + return; + } + + // leading dimension decomp of 3d array creates slabs + xhp::distributed_mdarray mdarray(extents3d); + for (auto slab : dr::mhp::local_mdspans(mdarray)) { + for (std::size_t i = 0; i < slab.extent(0); i++) { + for (std::size_t j = 0; j < slab.extent(1); j++) { + for (std::size_t k = 0; k < slab.extent(2); k++) { + slab(i, j, k) = 1; + } + } + } + } + + EXPECT_EQ(mdarray.mdspan()(0, 0, 0), 1); + EXPECT_EQ( + mdarray.mdspan()(extents3d[0] - 1, extents3d[1] - 1, extents3d[2] - 1), + 1); +} + +TEST_F(Mdarray, MdForEach3d) { + // leading dimension decomp of 3d array creates slabs + xhp::distributed_mdarray mdarray(extents3d); + std::vector local(extents3d[0] * extents3d[1] * extents3d[2], 0); + rng::iota(local, 0); + + auto set = [d1 = extents3d[1], d2 = extents3d[2]](auto index, auto v) { + auto &[o] = v; + o = index[0] * d1 * d2 + index[1] * d2 + index[2]; + }; + dr::mhp::for_each(set, mdarray); + + EXPECT_EQ(xhp::views::take(mdarray.view(), local.size()), local) + << mdrange_message(mdarray); +} + using Submdspan = Mdspan; TEST_F(Submdspan, StaticAssert) {