diff --git a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp index fa168bea5..a9cc811ce 100644 --- a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp +++ b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp @@ -24,66 +24,117 @@ using namespace std::chrono; const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU; class BitArray { - uint8_t *array; + uint32_t *array; size_t arrSize; - size_t logicalSize; - static constexpr size_t arraySize(size_t size) + inline static size_t arraySize(size_t size) { - return (size >> 3) + ((size & 7) > 0); + return (size >> 5) + ((size & 31) > 0); } - static constexpr size_t index(size_t n) + inline static size_t index(size_t n) { - return (n >> 3); + return (n >> 5); + } + + inline static uint32_t getSubindex(size_t n, uint32_t d) + { + return d & uint32_t(uint32_t(0x01) << (n % 32)); + } + + inline void setFalseSubindex(size_t n, uint32_t &d) + { + d &= ~uint32_t(uint32_t(0x01) << (n % (8*sizeof(uint32_t)))); } public: - explicit BitArray(size_t size) : logicalSize(size) + explicit BitArray(size_t size) : arrSize(size) { - arrSize = (size + 1) / 2; // Only store bits for odd numbers - array = new uint8_t[arraySize(arrSize)]; - // Bits are left at zero default, so no need to initialize them - // std::memset(array, 0x00, arraySize(arrSize)); + array = new uint32_t[arraySize(size)]; + std::memset(array, 0xFF, (size >> 3) + ((size & 7) > 0)); } - ~BitArray() { delete[] array; } + ~BitArray() {delete [] array;} - constexpr bool get(size_t n) const + bool get(size_t n) const { - if (n % 2 == 0) - return false; // Even numbers > 2 are not prime - n = n / 2; // Map the actual number to the index in the array - return !(array[index(n)] & (uint8_t(1) << (n % 8))); + return getSubindex(n, array[index(n)]); } - void set(size_t n) + static constexpr uint32_t rol(uint32_t x, uint32_t n) { - n = n / 2; // Map the actual number to the index in the array - array[index(n)] |= (uint8_t(1) << (n % 8)); + return (x<>(32-n)); } - constexpr size_t size() const + static constexpr uint32_t buildSkipMask(size_t skip, size_t offset) { - return logicalSize; + uint32_t mask = 0; + for (size_t i = offset; i < 32; i += skip) { + mask |= (1u << i); + } + return ~mask; + } + + void setFlagsFalse(size_t n, size_t skip) + { + if (skip <= 12) { + // For small skips, use pre-built mask approach + size_t word_idx = index(n); + size_t bit_pos = n % 32; + size_t curr_n = n; + + while (curr_n < arrSize) + { + // Build mask for current word starting at bit_pos + uint32_t mask = buildSkipMask(skip, bit_pos); + + // Apply mask to current word + array[word_idx] &= mask; + + // Move to next word + size_t bits_remaining = 32 - bit_pos; + curr_n += ((bits_remaining + skip - 1) / skip) * skip; + + if (curr_n >= arrSize) break; + + word_idx = index(curr_n); + bit_pos = curr_n % 32; + } + } + else + { + // Original implementation for larger skips + auto rolling_mask = ~uint32_t(1 << (n % 32)); + auto roll_bits = skip % 32; + while (n < arrSize) { + array[index(n)] &= rolling_mask; + n += skip; + rolling_mask = rol(rolling_mask, roll_bits); + } + } + } + + inline size_t size() const + { + return arrSize; } }; // prime_sieve // -// Represents the data comprising the sieve (an array of bits representing odd numbers starting from 3) -// and includes the code needed to eliminate non-primes from its array by calling runSieve. +// Represents the data comprising the sieve (an array of N bits, where N is the upper limit prime being tested) +// as well as the code needed to eliminate non-primes from its array, which you perform by calling runSieve. class prime_sieve { private: - BitArray Bits; // Sieve data, where 0==prime, 1==not + BitArray Bits; // Sieve data, where 1==prime, 0==not public: - prime_sieve(uint64_t n) : Bits(n) // Initialize bits to zero default + prime_sieve(uint64_t n) : Bits(n) // Initialize all to true (potential primes) { } @@ -103,21 +154,15 @@ class prime_sieve while (factor <= q) { - // Find the next prime number - for (; factor <= q; factor += 2) + for (uint64_t num = factor; num < Bits.size(); num += 2) { - if (Bits.get(factor)) + if (Bits.get(num)) { + factor = num; break; } } - - // Mark multiples of the prime number as not prime - uint64_t start = factor * factor; - for (uint64_t num = start; num <= Bits.size(); num += factor * 2) - { - Bits.set(num); - } + Bits.setFlagsFalse(factor * factor, factor + factor); factor += 2; } @@ -129,9 +174,9 @@ class prime_sieve size_t countPrimes() const { - size_t count = (Bits.size() >= 2); // Count 2 as prime if within range - for (uint64_t num = 3; num <= Bits.size(); num += 2) - if (Bits.get(num)) + size_t count = (Bits.size() >= 2); // Count 2 as prime if within range + for (int i = 3; i < Bits.size(); i+=2) + if (Bits.get(i)) count++; return count; } @@ -142,24 +187,23 @@ class prime_sieve bool isPrime(uint64_t n) const { - if (n == 2) - return true; - if (n < 2 || n % 2 == 0) + if (n & 1) + return Bits.get(n); + else return false; - return Bits.get(n); } // validateResults // - // Checks to see if the number of primes found matches what we should expect. This data isn't used in the + // Checks to see if the number of primes found matches what we should expect. This data isn't used in the // sieve processing at all, only to sanity check that the results are right when done. bool validateResults() const { const std::map resultsDictionary = { - { 10LLU, 4 }, // Historical data for validating our results - the number of primes - { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 + { 10LLU, 4 }, // Historical data for validating our results - the number of primes + { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 { 1'000LLU, 168 }, { 10'000LLU, 1229 }, { 100'000LLU, 9592 }, @@ -183,8 +227,8 @@ class prime_sieve if (showResults) cout << "2, "; - size_t count = (Bits.size() >= 2); // Count 2 as prime if in range - for (uint64_t num = 3; num <= Bits.size(); num += 2) + size_t count = (Bits.size() >= 2); // Count 2 as prime if in range + for (uint64_t num = 3; num <= Bits.size(); num+=2) { if (Bits.get(num)) { @@ -203,7 +247,7 @@ class prime_sieve << "Average: " << duration/passes << ", " << "Limit: " << Bits.size() << ", " << "Counts: " << count << "/" << countPrimes() << ", " - << "Valid: " << (validateResults() ? "Pass" : "FAIL!") + << "Valid : " << (validateResults() ? "Pass" : "FAIL!") << "\n"; // Following 2 lines added by rbergen to conform to drag race output format @@ -310,7 +354,7 @@ int main(int argc, char **argv) } if (bOneshot) - cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; + cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; if (bOneshot && (cSecondsRequested > 0 || cThreadsRequested > 1)) { @@ -345,8 +389,8 @@ int main(int argc, char **argv) else { auto tStart = steady_clock::now(); - std::vector threads(cThreads); - std::vector l_passes(cThreads); + std::thread threads[cThreads]; + uint64_t l_passes[cThreads]; for (unsigned int i = 0; i < cThreads; i++) threads[i] = std::thread([i, &l_passes, &tStart](size_t llUpperLimit) { @@ -383,4 +427,4 @@ int main(int argc, char **argv) // On success return the count of primes found; on failure, return 0 return (int) result; -} +} \ No newline at end of file diff --git a/PrimeCPP/solution_2a/Dockerfile b/PrimeCPP/solution_2a/Dockerfile new file mode 100644 index 000000000..f6c0ceb2f --- /dev/null +++ b/PrimeCPP/solution_2a/Dockerfile @@ -0,0 +1,13 @@ +FROM ubuntu:22.04 AS build + +RUN apt-get update -qq \ + && apt-get install -y clang + +WORKDIR /opt/app +COPY *.cpp . +RUN clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_PAR.cpp -oprimes_par + +FROM ubuntu:22.04 +COPY --from=build /opt/app/primes_par /usr/local/bin + +ENTRYPOINT [ "primes_par", "-l", "1000000" ] \ No newline at end of file diff --git a/PrimeCPP/solution_2a/PrimeCPP_PAR.cpp b/PrimeCPP/solution_2a/PrimeCPP_PAR.cpp new file mode 100644 index 000000000..017e8220a --- /dev/null +++ b/PrimeCPP/solution_2a/PrimeCPP_PAR.cpp @@ -0,0 +1,379 @@ +// --------------------------------------------------------------------------- +// PrimeCPP.cpp : Pol Marcet's Modified version of Dave's Garage Prime Sieve +// Some great ideas taken from Rust's implementation from Michael Barber +// @mike-barber https://www.github.com/mike-barber (bit-storage-rotate) +// --------------------------------------------------------------------------- + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace std::chrono; + +const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU; + +class BitArray { + uint8_t *array; + size_t arrSize; + size_t logicalSize; + + static constexpr size_t arraySize(size_t size) + { + return (size >> 3) + ((size & 7) > 0); + } + + static constexpr size_t index(size_t n) + { + return (n >> 3); + } + +public: + explicit BitArray(size_t size) : logicalSize(size) + { + arrSize = (size + 1) / 2; // Only store bits for odd numbers + array = new uint8_t[arraySize(arrSize)]; + fill_n(array, arrSize, 0x00); + } + + ~BitArray() { delete[] array; } + + constexpr bool get(size_t n) const + { + if (n % 2 == 0) + return false; // Even numbers > 2 are not prime + n = n / 2; // Map the actual number to the index in the array + return !(array[index(n)] & (uint8_t(1) << (n % 8))); + } + + void set(size_t n) + { + n = n / 2; // Map the actual number to the index in the array + array[index(n)] |= (uint8_t(1) << (n % 8)); + } + + constexpr size_t size() const + { + return logicalSize; + } +}; + + +// prime_sieve +// +// Represents the data comprising the sieve (an array of bits representing odd numbers starting from 3) +// and includes the code needed to eliminate non-primes from its array by calling runSieve. + +class prime_sieve +{ + private: + + BitArray Bits; // Sieve data, where 0==prime, 1==not + + public: + + prime_sieve(uint64_t n) : Bits(n) // Initialize bits to zero default + { + } + + ~prime_sieve() + { + } + + // runSieve + // + // Scan the array for the next factor (>2) that hasn't yet been eliminated from the array, and then + // walk through the array crossing off every multiple of that factor. + + void runSieve() + { + uint64_t factor = 3; + uint64_t q = (int) sqrt(Bits.size()); + + while (factor <= q) + { + // Find the next prime number + for (NULL; factor <= q; factor += 2) + if (Bits.get(factor)) + break; + + // Mark multiples of the prime number as not prime + uint64_t start = factor * factor; + for (uint64_t num = start; num <= Bits.size(); num += factor * 2) + Bits.set(num); + + factor += 2; + } + } + + // countPrimes + // + // Can be called after runSieve to determine how many primes were found in total + + size_t countPrimes() const + { + size_t count = (Bits.size() >= 2); // Count 2 as prime if within range + for (uint64_t num = 3; num <= Bits.size(); num += 2) + if (Bits.get(num)) + count++; + return count; + } + + // isPrime + // + // Can be called after runSieve to determine whether a given number is prime. + + bool isPrime(uint64_t n) const + { + if (n == 2) + return true; + if (n < 2 || n % 2 == 0) + return false; + return Bits.get(n); + } + + // validateResults + // + // Checks to see if the number of primes found matches what we should expect. This data isn't used in the + // sieve processing at all, only to sanity check that the results are right when done. + + bool validateResults() const + { + const std::map resultsDictionary = + { + { 10LLU, 4 }, // Historical data for validating our results - the number of primes + { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 + { 1'000LLU, 168 }, + { 10'000LLU, 1229 }, + { 100'000LLU, 9592 }, + { 1'000'000LLU, 78498 }, + { 10'000'000LLU, 664579 }, + { 100'000'000LLU, 5761455 }, + { 1'000'000'000LLU, 50847534 }, + { 10'000'000'000LLU, 455052511 }, + }; + if (resultsDictionary.end() == resultsDictionary.find(Bits.size())) + return false; + return resultsDictionary.find(Bits.size())->second == countPrimes(); + } + + // printResults + // + // Displays stats about what was found as well as (optionally) the primes themselves + + void printResults(bool showResults, double duration, size_t passes, size_t threads) const + { + if (showResults) + cout << "2, "; + + size_t count = (Bits.size() >= 2); // Count 2 as prime if in range + for (uint64_t num = 3; num <= Bits.size(); num += 2) + { + if (Bits.get(num)) + { + if (showResults) + cout << num << ", "; + count++; + } + } + + if (showResults) + cout << "\n"; + + cout << "Passes: " << passes << ", " + << "Threads: " << threads << ", " + << "Time: " << duration << ", " + << "Average: " << duration/passes << ", " + << "Limit: " << Bits.size() << ", " + << "Counts: " << count << "/" << countPrimes() << ", " + << "Valid: " << (validateResults() ? "Pass" : "FAIL!") + << "\n"; + + // Following 2 lines added by rbergen to conform to drag race output format + cout << "\n"; + cout << "davepl_par;" << passes << ";" << duration << ";" << threads << ";algorithm=base,faithful=yes,bits=1\n"; + } + +}; + +// custom_atoll +// +// Like atoll(), but accepts K, M, G, and T as magnitude suffixes. + +long long custom_atoll(const std::string& value_str) { + static const std::unordered_map suffixes = { + {'K', 1000LL}, + {'M', 1000000LL}, + {'G', 1000000000LL}, + {'T', 1000000000000LL} + }; + + std::string input_str = value_str; + for (char& c : input_str) { + c = std::toupper(c); + } + + char last_char = input_str.back(); + if (suffixes.find(last_char) != suffixes.end()) { + long long multiplier = suffixes.at(last_char); + std::string numeric_part = input_str.substr(0, input_str.size() - 1); + std::istringstream iss(numeric_part); + double numeric_value; + if (!(iss >> numeric_value)) { + throw std::invalid_argument("Invalid numeric part: " + numeric_part); + } + return static_cast(numeric_value * multiplier); + } + + std::istringstream iss(input_str); + long long result; + if (!(iss >> result)) { + throw std::invalid_argument("Invalid input format"); + } + return result; +} + +int main(int argc, char **argv) +{ + vector args(argv + 1, argv + argc); // From first to last argument in the argv array + uint64_t ullLimitRequested = 0; + auto cThreadsRequested = 0; + auto cSecondsRequested = 0; + auto bPrintPrimes = false; + auto bOneshot = false; + auto bQuiet = false; + + // Process command-line args + + for (auto i = args.begin(); i != args.end(); ++i) + { + if (*i == "-h" || *i == "--help") { + cout << "Syntax: " << argv[0] << " [-t,--threads threads] [-s,--seconds seconds] [-l,--limit limit] [-1,--oneshot] [-q,--quiet] [-h] " << endl; + return 0; + } + else if (*i == "-t" || *i == "--threads") + { + i++; + cThreadsRequested = (i == args.end()) ? 0 : max(1, atoi(i->c_str())); + } + else if (*i == "-s" || *i == "--seconds") + { + i++; + cSecondsRequested = (i == args.end()) ? 0 : max(1, atoi(i->c_str())); + } + else if (*i == "-l" || *i == "--limit") + { + i++; + ullLimitRequested = (i == args.end()) ? 0LL : max((long long)1, custom_atoll(i->c_str())); + } + else if (*i == "-1" || *i == "--oneshot") + { + bOneshot = true; + cThreadsRequested = 1; + } + else if (*i == "-p" || *i == "--print") + { + bPrintPrimes = true; + } + else if (*i == "-q" || *i == "--quiet") + { + bQuiet = true; + } + else + { + fprintf(stderr, "Unknown argument: %s", i->c_str()); + return 0; + } + } + + if (!bQuiet) + { + cout << "Primes Benchmark (c) 2021 Dave's Garage - http://github.com/davepl/primes" << endl; + cout << "-------------------------------------------------------------------------" << endl; + } + + if (bOneshot) + cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; + + if (bOneshot && (cSecondsRequested > 0 || cThreadsRequested > 1)) + { + cout << "Oneshot option cannot be mixed with second count or thread count." << endl; + return 0; + } + + auto cPasses = 0; + auto cSeconds = (cSecondsRequested ? cSecondsRequested : 5); + auto cThreads = (cThreadsRequested ? cThreadsRequested : thread::hardware_concurrency()); + auto llUpperLimit = (ullLimitRequested ? ullLimitRequested : DEFAULT_UPPER_LIMIT); + + if (!bQuiet) + { + printf("Computing primes to %llu on %d thread%s for %d second%s.\n", + (unsigned long long)llUpperLimit, + cThreads, + cThreads == 1 ? "" : "s", + cSeconds, + cSeconds == 1 ? "" : "s" + ); + } + double duration; + + if (bOneshot) + { + auto tStart = steady_clock::now(); + prime_sieve(llUpperLimit).runSieve(); + auto tEnd = steady_clock::now() - tStart; + duration = duration_cast(tEnd).count()/1000000.0; + } + else + { + auto tStart = steady_clock::now(); + std::vector threads(cThreads); + std::vector l_passes(cThreads); + for (unsigned int i = 0; i < cThreads; i++) + threads[i] = std::thread([i, &l_passes, &tStart](size_t llUpperLimit) + { + l_passes[i] = 0; + while (duration_cast(steady_clock::now() - tStart).count() < 5) { + prime_sieve(llUpperLimit).runSieve(); + ++l_passes[i]; + } + }, llUpperLimit); + for (auto i = 0; i < cThreads; i++) { + threads[i].join(); + cPasses += l_passes[i]; + } + auto tEnd = steady_clock::now() - tStart; + duration = duration_cast(tEnd).count()/1000000.0; + } + + + if (bOneshot) + { + cPasses = 1.0 / duration * 5; + duration = 5.0; + } + + prime_sieve checkSieve(llUpperLimit); + checkSieve.runSieve(); + auto result = checkSieve.validateResults() ? checkSieve.countPrimes() : 0; + + if (!bQuiet) + checkSieve.printResults(bPrintPrimes, duration , cPasses, cThreads); + else + cout << cPasses << ", " << duration / cPasses << endl; + + // On success return the count of primes found; on failure, return 0 + + return (int) result; +} diff --git a/PrimeCPP/solution_2a/run.cmd b/PrimeCPP/solution_2a/run.cmd new file mode 100644 index 000000000..3390a0df2 --- /dev/null +++ b/PrimeCPP/solution_2a/run.cmd @@ -0,0 +1,2 @@ +g++ -Ofast PrimeCPP_PAR.cpp -std=c++17 -lstdc++ -oPrimes_par_gcc.exe +.\Primes_par_gcc.exe diff --git a/PrimeCPP/solution_2a/run.sh b/PrimeCPP/solution_2a/run.sh new file mode 100755 index 000000000..dc59d3612 --- /dev/null +++ b/PrimeCPP/solution_2a/run.sh @@ -0,0 +1,6 @@ +# g++ -Ofast -std=c++17 -lc++ PrimeCPP.cpp -oPrimes.exe +# gcc -Ofast -std=c++17 PrimeCPP.cpp -lc++ -oPrimes_gcc.exe +# clang -Ofast -std=c++17 -lc++ PrimeCPP.cpp -oPrimes_clang.exe + +clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_PAR.cpp -oprimes_par.exe +./primes_par.exe -t 1