Skip to content

Commit

Permalink
Merge pull request seqan#229 from smehringer/timers
Browse files Browse the repository at this point in the history
[FEATURE] Add --timing-output and Output layout timings.
  • Loading branch information
smehringer authored Nov 8, 2023
2 parents 74355cf + d2d2b39 commit 0534c5b
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 2 deletions.
3 changes: 3 additions & 0 deletions include/chopper/configuration.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ struct configuration

//!\brief Internal parameter that triggers some verbose debug output.
bool debug{false};

//!\brief If specified, layout timings are written to the specified file.
std::filesystem::path output_timings{};
//!\}

/*!\name Configuration of size estimates (chopper::count)
Expand Down
2 changes: 1 addition & 1 deletion lib/hibf
Submodule hibf updated 41 files
+1 −0 .github/workflows/ci_coverage.yml
+0 −3 .gitmodules
+4 −4 include/hibf/build/build_data.hpp
+1 −1 include/hibf/build/insert_into_ibf.hpp
+3 −2 include/hibf/build/update_parent_kmers.hpp
+156 −11 include/hibf/hierarchical_interleaved_bloom_filter.hpp
+36 −189 include/hibf/interleaved_bloom_filter.hpp
+10 −0 include/hibf/layout/compute_layout.hpp
+7 −0 include/hibf/layout/data_store.hpp
+2 −0 include/hibf/layout/layout.hpp
+968 −0 include/hibf/misc/bit_vector.hpp
+3 −2 include/hibf/misc/print.hpp
+236 −60 include/hibf/misc/timer.hpp
+7 −0 include/hibf/platform.hpp
+2 −1 src/build/compute_kmers.cpp
+2 −1 src/build/construct_ibf.cpp
+5 −4 src/build/insert_into_ibf.cpp
+24 −2 src/hierarchical_interleaved_bloom_filter.cpp
+70 −92 src/interleaved_bloom_filter.cpp
+1 −0 src/layout/compute_fpr_correction.cpp
+16 −1 src/layout/compute_layout.cpp
+10 −0 src/layout/hierarchical_binning.cpp
+3 −2 src/misc/print.cpp
+0 −1 submodules/sdsl-lite
+1 −1 test/documentation/hibf_doxygen_cfg.in
+1 −1 test/header/CMakeLists.txt
+69 −0 test/include/hibf/test/cereal.hpp
+784 −0 test/include/hibf/test/iterator_test_template.hpp
+1 −1 test/iwyu/CMakeLists.txt
+1 −1 test/performance/ibf/CMakeLists.txt
+0 −151 test/performance/ibf/binning_bitvector_benchmark.cpp
+140 −0 test/performance/ibf/bit_vector_benchmark.cpp
+4 −0 test/performance/ibf/interleaved_bloom_filter_benchmark.cpp
+2 −0 test/unit/hibf/CMakeLists.txt
+802 −0 test/unit/hibf/bit_vector_test.cpp
+102 −79 test/unit/hibf/hierarchical_interleaved_bloom_filter_test.cpp
+139 −186 test/unit/hibf/interleaved_bloom_filter_test.cpp
+1 −0 test/unit/hibf/layout/CMakeLists.txt
+39 −0 test/unit/hibf/layout/compute_layout_test.cpp
+5 −4 test/unit/hibf/print_test.cpp
+125 −0 test/unit/hibf/timer_test.cpp
29 changes: 28 additions & 1 deletion src/layout/execute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ int execute(chopper::configuration & config, std::vector<std::string> const & fi
seqan::hibf::layout::layout hibf_layout;
std::vector<seqan::hibf::sketch::hyperloglog> sketches;

seqan::hibf::concurrent_timer compute_sketches_timer{};
seqan::hibf::concurrent_timer union_estimation_timer{};
seqan::hibf::concurrent_timer rearrangement_timer{};
seqan::hibf::concurrent_timer dp_algorithm_timer{};

if (config.determine_best_tmax)
{
std::tie(hibf_layout, sketches) = determine_best_number_of_technical_bins(config);
Expand All @@ -69,8 +74,16 @@ int execute(chopper::configuration & config, std::vector<std::string> const & fi
{
std::vector<size_t> kmer_counts;

compute_sketches_timer.start();
seqan::hibf::sketch::compute_sketches(config.hibf_config, kmer_counts, sketches);
hibf_layout = seqan::hibf::layout::compute_layout(config.hibf_config, kmer_counts, sketches);
compute_sketches_timer.stop();
dp_algorithm_timer.start();
hibf_layout = seqan::hibf::layout::compute_layout(config.hibf_config,
kmer_counts,
sketches,
union_estimation_timer,
rearrangement_timer);
dp_algorithm_timer.stop();

if (config.output_verbose_statistics)
{
Expand Down Expand Up @@ -98,6 +111,20 @@ int execute(chopper::configuration & config, std::vector<std::string> const & fi
config.write_to(fout);
hibf_layout.write_to(fout);

if (!config.output_timings.empty())
{
std::ofstream output_stream{config.output_timings};
output_stream << std::fixed << std::setprecision(2);
output_stream << "sketching_in_seconds\t"
<< "layouting_in_seconds\t"
<< "union_estimation_in_seconds\t"
<< "rearrangement_in_seconds\n";
output_stream << compute_sketches_timer.in_seconds() << '\t';
output_stream << dp_algorithm_timer.in_seconds() << '\t';
output_stream << union_estimation_timer.in_seconds() << '\t';
output_stream << rearrangement_timer.in_seconds() << '\t';
}

return 0;
}

Expand Down
6 changes: 6 additions & 0 deletions src/set_up_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ void set_up_parser(sharg::parser & parser, configuration & config)
"accuracy.",
.default_message = "k-mer size",
});
parser.add_option(config.output_timings,
sharg::config{.short_id = '\0',
.long_id = "timing-output",
.description = "Write time and memory usage to specified file (TSV format). ",
.default_message = "",
.validator = sharg::output_file_validator{}});

parser.add_option(
config.hibf_config.tmax,
Expand Down
3 changes: 3 additions & 0 deletions test/cli/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,6 @@ target_use_datasources (cli_chopper_pipeline_test FILES seq1.fa)
target_use_datasources (cli_chopper_pipeline_test FILES small.fa)
target_use_datasources (cli_chopper_pipeline_test FILES small2.fa)
target_use_datasources (cli_chopper_pipeline_test FILES small.split)

add_cli_test (cli_timing_output_test.cpp)
target_use_datasources (cli_chopper_pipeline_test FILES small.fa)
45 changes: 45 additions & 0 deletions test/cli/cli_timing_output_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// ---------------------------------------------------------------------------------------------------
// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
// ---------------------------------------------------------------------------------------------------

#include <gtest/gtest.h>

#include <filesystem>
#include <fstream>
#include <string> // strings

#include <seqan3/test/tmp_directory.hpp>

#include "cli_test.hpp"

TEST_F(cli_test, timing_output)
{
std::string const seq_filename = data("small.fa");
seqan3::test::tmp_directory tmp_dir{};
std::filesystem::path const input_filename{tmp_dir.path() / "data.tsv"};
std::filesystem::path const layout_filename{tmp_dir.path() / "output.layout"};
std::filesystem::path const timing_filename{tmp_dir.path() / "output.timings"};

{
std::ofstream fout{input_filename};
fout << seq_filename << '\n' << seq_filename << '\n' << seq_filename << '\n';
}

cli_test_result result = execute_app("chopper",
"--input",
input_filename.c_str(),
"--output",
layout_filename.c_str(),
"--timing-output",
timing_filename.c_str());

EXPECT_EQ(result.exit_code, 0);
EXPECT_EQ(result.out, std::string{});
EXPECT_EQ(result.err, std::string{});

EXPECT_TRUE(std::filesystem::exists(timing_filename)); // file should have been written
// not not check output since it is not relevant how exectly the timings look like
}

0 comments on commit 0534c5b

Please sign in to comment.