Skip to content

Commit

Permalink
Do a more significant refactor of parallel_for
Browse files Browse the repository at this point in the history
  • Loading branch information
ckormanyos committed Feb 26, 2023
1 parent 5bcac5f commit 9b517e5
Showing 1 changed file with 47 additions and 43 deletions.
90 changes: 47 additions & 43 deletions concurrency/parallel_for.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
callable_function_type parallel_function) -> void
{
// Estimate the number of threads available.
static const auto number_of_threads_hint =
const auto number_of_threads_hint =
static_cast<signed>
(
std::thread::hardware_concurrency()
Expand All @@ -37,60 +37,64 @@
)
);

// Set the size of a slice for the range functions.
const auto n =
static_cast<index_type>
(
static_cast<index_type>(end - start) + static_cast<index_type>(1)
);

const auto slice =
(std::max)
(
static_cast<index_type>(std::round(static_cast<float>(n) / static_cast<float>(number_of_threads))),
static_cast<index_type>(1)
);

// Inner loop.
const auto launch_range =
[&parallel_function](index_type index_lo, index_type index_hi)
{
for(auto i = index_lo; i < index_hi; ++i) // NOLINT(altera-id-dependent-backward-branch)
{
parallel_function(i);
}
};
using thread_vector_type = std::vector<std::thread>;

// Create the thread pool and launch the jobs.
std::vector<std::thread> pool { };

pool.reserve(number_of_threads);
thread_vector_type pool { };

auto i1 = start;
pool.reserve(static_cast<typename thread_vector_type::size_type>(number_of_threads));

{
auto i2 = (std::min)(static_cast<index_type>(start + slice), end);
// Inner loop.
const auto launch_range =
[&parallel_function](index_type index_lo, index_type index_hi)
{
for(auto i = index_lo; i < index_hi; ++i) // NOLINT(altera-id-dependent-backward-branch)
{
parallel_function(i);
}
};

auto i1 = start;

for(auto i = static_cast<index_type>(0U);
static_cast<index_type>(i + static_cast<index_type>(INT8_C(1))) < static_cast<index_type>(number_of_threads);
++i)
{
pool.emplace_back(launch_range, i1, i2);
// Set the size of a slice for the range functions.
const auto n =
static_cast<index_type>
(
static_cast<index_type>(end - start) + static_cast<index_type>(1)
);

const auto slice =
(std::max)
(
static_cast<index_type>(std::round(static_cast<float>(n) / static_cast<float>(number_of_threads))),
static_cast<index_type>(1)
);

auto i2 = (std::min)(static_cast<index_type>(start + slice), end);

for(auto i = static_cast<index_type>(0U);
static_cast<index_type>(i + static_cast<index_type>(INT8_C(1))) < static_cast<index_type>(number_of_threads);
++i)
{
pool.emplace_back(launch_range, i1, i2);

i1 = i2;
i1 = i2;

if(i1 >= end)
{
break;
}
if(i1 >= end)
{
break;
}

i2 = (std::min)(static_cast<index_type>(i2 + slice), end);
i2 = (std::min)(static_cast<index_type>(i2 + slice), end);
}
}
}

if(i1 < end)
{
pool.emplace_back(launch_range, i1, end);
if(i1 < end)
{
pool.emplace_back(launch_range, i1, end);
}
}

// Wait for the jobs to finish.
Expand Down

0 comments on commit 9b517e5

Please sign in to comment.