diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ce9e44..591f5c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,11 @@ set(THREADS_PREFER_PTHREAD_FLAG ON) set(CMAKE_VENDOR_DIR "${CMAKE_SOURCE_DIR}/cmake/") +if (${IS_AARCH64}) + message(STATUS "SIMDE IS BEING USED") + include("${CMAKE_VENDOR_DIR}/simde.cmake") +endif () + # Compression library dependencies include("${CMAKE_VENDOR_DIR}/fastpfor.cmake") include("${CMAKE_VENDOR_DIR}/fsst.cmake") @@ -61,7 +66,6 @@ include("${CMAKE_VENDOR_DIR}/googletest.cmake") include("${CMAKE_VENDOR_DIR}/aws-sdk.cmake") include("${CMAKE_VENDOR_DIR}/benchmark.cmake") include("${CMAKE_VENDOR_DIR}/gdouble.cmake") -include("${CMAKE_VENDOR_DIR}/turbo.cmake") include("${CMAKE_VENDOR_DIR}/lz4.cmake") # --------------------------------------------------------------------------- diff --git a/README.md b/README.md index a02ae67..1df068b 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,8 @@ Then, depending on your usecase, build only the library or any of the tools: For a list of all valid targets, run `make help`. +Library was built and tested on Linux (x86, ARM) and MacOS (ARM). + ## Contributors Adnan Alhomssi diff --git a/btrblocks/common/SIMD.hpp b/btrblocks/common/SIMD.hpp index ecde85a..ad7b26b 100644 --- a/btrblocks/common/SIMD.hpp +++ b/btrblocks/common/SIMD.hpp @@ -14,7 +14,11 @@ #else // USE_SIMD // ------------------------------------------------------------------------------ +#if (defined(__x86_64__) || defined(__i386__)) #include +#elif defined(__aarch64__) +#include +#endif #define BTR_IFSIMD(x...) x #define BTR_IFELSESIMD(a, b) a diff --git a/btrblocks/extern/FastPFOR.cpp b/btrblocks/extern/FastPFOR.cpp index 9129b4d..2235631 100644 --- a/btrblocks/extern/FastPFOR.cpp +++ b/btrblocks/extern/FastPFOR.cpp @@ -8,9 +8,10 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wuninitialized" #include -#include +#include #include -#include +#include +#include #pragma GCC diagnostic pop // ------------------------------------------------------------------------------------- using namespace btrblocks; diff --git a/btrblocks/local.cmake b/btrblocks/local.cmake index 518d819..6108500 100755 --- a/btrblocks/local.cmake +++ b/btrblocks/local.cmake @@ -72,6 +72,11 @@ if (${WITH_LOGGING}) target_link_libraries(btrblocks spdlog) endif() +# we have to import simde for simd functionality on arm +if ((NOT "${IS_AARCH64}" STREQUAL "") AND NOT ${NO_SIMD}) + target_link_libraries(btrblocks simde) +endif () + # TODO including everything as public headers, as this is a research library # later on we might want to extract a minimal public interface. set(BTR_PUBLIC_INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}) diff --git a/btrblocks/scheme/string/DynamicDictionary.cpp b/btrblocks/scheme/string/DynamicDictionary.cpp index 25d2909..5023e55 100644 --- a/btrblocks/scheme/string/DynamicDictionary.cpp +++ b/btrblocks/scheme/string/DynamicDictionary.cpp @@ -87,7 +87,7 @@ u32 DynamicDictionary::compress(const btrblocks::StringArrayViewer src, // TODO: use memcpy instead of export/import (Note: I still use // FSST_MAXHEADER ~2KiB ) fsst_encoder_t* encoder = - fsst_create(fsst_n, input_string_lengths.get(), input_string_buffers.get(), 0); + fsst_create(fsst_n, (unsigned long*) input_string_lengths.get(), input_string_buffers.get(), 0); die_if(fsst_export(encoder, write_ptr) > 0); auto fsst_table_used_space = FSST_MAXHEADER; // ------------------------------------------------------------------------------------- @@ -97,8 +97,8 @@ u32 DynamicDictionary::compress(const btrblocks::StringArrayViewer src, // ------------------------------------------------------------------------------------- // Compress const u64 output_buffer_size = 7 + 4 * stats.total_unique_length; // fake - if (fsst_compress(encoder, fsst_n, input_string_lengths.get(), input_string_buffers.get(), - output_buffer_size, write_ptr, output_string_lengths.get(), + if (fsst_compress(encoder, fsst_n, (unsigned long*) input_string_lengths.get(), input_string_buffers.get(), + output_buffer_size, write_ptr, (unsigned long*) output_string_lengths.get(), output_string_buffers.get()) != fsst_n) { throw Generic_Exception("FSST Compression failed !"); } diff --git a/btrblocks/scheme/string/Fsst.cpp b/btrblocks/scheme/string/Fsst.cpp index ef6abcf..6eba603 100644 --- a/btrblocks/scheme/string/Fsst.cpp +++ b/btrblocks/scheme/string/Fsst.cpp @@ -42,7 +42,7 @@ u32 Fsst::compress(const btrblocks::StringArrayViewer src, // Prepare decoder and write header fsst_encoder_t* encoder = - fsst_create(stats.tuple_count, input_string_lengths.get(), input_string_buffers.get(), 0); + fsst_create(stats.tuple_count, (unsigned long*) input_string_lengths.get(), input_string_buffers.get(), 0); die_if(fsst_export(encoder, write_ptr) > 0); auto fsst_table_used_space = FSST_MAXHEADER; write_ptr += fsst_table_used_space; @@ -51,10 +51,10 @@ u32 Fsst::compress(const btrblocks::StringArrayViewer src, // Compress strings // TODO whyever this is fake(?), fix it. const u64 output_buffer_size = 7 + 4 * stats.total_length; // fake - if (fsst_compress(encoder, stats.tuple_count, input_string_lengths.get(), - input_string_buffers.get(), output_buffer_size, write_ptr, - output_string_lengths.get(), - output_string_buffers.get()) != stats.tuple_count) { + if (fsst_compress(encoder, stats.tuple_count, (unsigned long*) input_string_lengths.get(), + input_string_buffers.get(), output_buffer_size, write_ptr, + (unsigned long*) output_string_lengths.get(), + output_string_buffers.get()) != stats.tuple_count) { throw Generic_Exception("FSST Compression failed !"); } u64 fsst_strings_used_space = diff --git a/btrblocks/storage/MMapVector.cpp b/btrblocks/storage/MMapVector.cpp index 24abe75..ee001d8 100644 --- a/btrblocks/storage/MMapVector.cpp +++ b/btrblocks/storage/MMapVector.cpp @@ -11,7 +11,21 @@ void btrblocks::writeBinary(const char* pathname, std::vector& v) { for (const auto& s : v) { fileSize += s.size() + 1; } +#if defined(__linux__) die_if(posix_fallocate(fd, 0, fileSize) == 0); +#elif defined(__APPLE__) + fstore_t store = {F_ALLOCATECONTIG, F_PEOFPOSMODE, 0, static_cast(fileSize)}; + // Try to get a continous chunk of disk space + int ret = fcntl(fd, F_PREALLOCATE, &store); + if(-1 == ret){ + // OK, perhaps we are too fragmented, allocate non-continuous + store.fst_flags = F_ALLOCATEALL; + ret = fcntl(fd, F_PREALLOCATE, &store); + if (-1 == ret) + die_if(false); + } + die_if(0 == ftruncate(fd, fileSize)); +#endif auto data = reinterpret_cast(mmap(nullptr, fileSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)); data->count = v.size(); diff --git a/btrblocks/storage/MMapVector.hpp b/btrblocks/storage/MMapVector.hpp index bde3237..433e547 100644 --- a/btrblocks/storage/MMapVector.hpp +++ b/btrblocks/storage/MMapVector.hpp @@ -76,7 +76,21 @@ void writeBinary(const char* pathname, std::vector& v) { int fd = open(pathname, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); die_if(fd != -1); uint64_t length = v.size() * sizeof(T); +#if defined(__linux__) die_if(posix_fallocate(fd, 0, length) == 0); +#elif defined(__APPLE__) + fstore_t store = {F_ALLOCATECONTIG, F_PEOFPOSMODE, 0, static_cast(length)}; + // Try to get a continous chunk of disk space + int ret = fcntl(fd, F_PREALLOCATE, &store); + if(-1 == ret){ + // OK, perhaps we are too fragmented, allocate non-continuous + store.fst_flags = F_ALLOCATEALL; + ret = fcntl(fd, F_PREALLOCATE, &store); + if (-1 == ret) + die_if(false); + } + die_if(0 == ftruncate(fd, length)); +#endif T* data = reinterpret_cast(mmap(nullptr, length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)); die_if(data != MAP_FAILED); memcpy(data, v.data(), length); diff --git a/cmake/environment.cmake b/cmake/environment.cmake index 717740f..dac6238 100644 --- a/cmake/environment.cmake +++ b/cmake/environment.cmake @@ -9,3 +9,7 @@ endif() if (CYGWIN) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libstdc++") endif (CYGWIN) + +if (CMAKE_SYSTEM_PROCESSOR MATCHES "arm" OR CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") + set(IS_AARCH64 ON) +endif () diff --git a/cmake/fastpfor.cmake b/cmake/fastpfor.cmake index 9200e43..3cd133b 100755 --- a/cmake/fastpfor.cmake +++ b/cmake/fastpfor.cmake @@ -9,7 +9,7 @@ ExternalProject_Add( fastpfor_src PREFIX "vendor/lemire/fastpfor" GIT_REPOSITORY "https://github.com/lemire/FastPFor.git" - GIT_TAG 773283d4a11fa2440a1b3b28fd77f775e86d7898 + GIT_TAG bc375056b803af9b3ca5a6c98e6b38aeca80d825 TIMEOUT 10 UPDATE_COMMAND "" # to prevent rebuilding everytime INSTALL_COMMAND "" diff --git a/cmake/simde.cmake b/cmake/simde.cmake new file mode 100644 index 0000000..6107b9c --- /dev/null +++ b/cmake/simde.cmake @@ -0,0 +1,13 @@ +include(FetchContent) +FetchContent_Declare( + simde + GIT_REPOSITORY https://github.com/simd-everywhere/simde.git + GIT_TAG master +) +FetchContent_MakeAvailable(simde) + +add_library(simde INTERFACE IMPORTED GLOBAL) +target_include_directories(simde INTERFACE "${simde_SOURCE_DIR}") + +# Enables native aliases. Not ideal but makes it easier to convert old code. +target_compile_definitions(simde INTERFACE SIMDE_ENABLE_NATIVE_ALIASES) \ No newline at end of file diff --git a/test/DatasetGenerator.cpp b/test/DatasetGenerator.cpp index 61ce610..f1305ed 100755 --- a/test/DatasetGenerator.cpp +++ b/test/DatasetGenerator.cpp @@ -275,7 +275,7 @@ int main(int argc, char **argv) integers[i] = (rand() % 100000); if ( rand() % 10 > 2 ) { size_t repeat = 20; - repeat = std::min(repeat, FLAGS_tuple_count - i - 1); + repeat = std::min(static_cast(repeat), FLAGS_tuple_count - i - 1); for ( size_t r_i = 1; r_i <= repeat; r_i++ ) { integers[i + r_i] = integers[i]; }