From d25e0c3da700dcdba78befe030e12dc46bec1d83 Mon Sep 17 00:00:00 2001 From: Brian Zhao Date: Thu, 30 Jan 2025 15:45:43 -0500 Subject: [PATCH] async parallelize apply_operator_ functions by splitting the sparsestate --- forte/sparse_ci/sparse_state_functions.cc | 56 ++++++++++++++++++++++- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/forte/sparse_ci/sparse_state_functions.cc b/forte/sparse_ci/sparse_state_functions.cc index a36e96c84..2d8caf361 100644 --- a/forte/sparse_ci/sparse_state_functions.cc +++ b/forte/sparse_ci/sparse_state_functions.cc @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include "helpers/helpers.h" #include "helpers/timer.h" @@ -39,6 +41,8 @@ namespace forte { +std::vector split_state(const SparseState& state, size_t num_chunks); + // This is a naive implementation of the operator application that is used for testing SparseState apply_operator_impl_naive(bool is_antihermitian, const SparseOperator& sop, const SparseState& state, double screen_thresh); @@ -53,12 +57,60 @@ SparseState apply_operator_impl_grouped_string(bool is_antihermitian, const Spar SparseState apply_operator_lin(const SparseOperator& sop, const SparseState& state, double screen_thresh) { - return apply_operator_impl_grouped_string(false, sop, state, screen_thresh); + size_t num_threads = std::thread::hardware_concurrency(); + auto chunks = split_state(state, num_threads); + std::vector> futures; + futures.reserve(num_threads); + for (auto& chunk : chunks) { + futures.emplace_back(std::async(std::launch::async, [&]() { + return apply_operator_impl_grouped_string(false, sop, chunk, screen_thresh); + })); + } + SparseState result; + for (auto& future : futures) { + result += future.get(); + } + return result; } SparseState apply_operator_antiherm(const SparseOperator& sop, const SparseState& state, double screen_thresh) { - return apply_operator_impl_grouped_string(true, sop, state, screen_thresh); + size_t num_threads = std::thread::hardware_concurrency(); + auto chunks = split_state(state, num_threads); + std::vector> futures; + futures.reserve(num_threads); + for (auto& chunk : chunks) { + futures.emplace_back(std::async(std::launch::async, [&]() { + return apply_operator_impl_grouped_string(true, sop, chunk, screen_thresh); + })); + } + SparseState result; + for (auto& future : futures) { + result += future.get(); + } + return result; +} + +std::vector split_state(const SparseState& state, size_t num_chunks) { + if (num_chunks == 0 || state.size() == 0) { + return {}; + } + const size_t total_elements =state.size(); + const size_t chunk_size = total_elements / num_chunks; + const size_t remainder = total_elements % num_chunks; + + std::vector chunks; + chunks.reserve(num_chunks); + auto it = state.elements().begin(); + for (size_t chunk_idx = 0; chunk_idx < num_chunks; ++chunk_idx) { + size_t this_chunk_size = chunk_size + (chunk_idx < remainder); + SparseState chunk; + for (size_t j = 0; j < this_chunk_size; ++j, ++it) { + chunk.insert(it->first, it->second); + } + chunks.emplace_back(std::move(chunk)); + } + return chunks; } // This is a naive implementation of the operator application that is used for testing