From 0b6187f61430bb4eec9fa58a7a5b7f72ca6a0816 Mon Sep 17 00:00:00 2001 From: Maxwell1447 Date: Mon, 14 Oct 2024 16:09:38 +0200 Subject: [PATCH] fixed problem of best match buffer --- src/CMakeLists.txt | 2 +- src/filter.cc | 2 +- src/fuzzy_match.cc | 25 ++++++++++++++++++++++--- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b7a96fa..b80565a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -55,7 +55,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC target_link_libraries(${PROJECT_NAME} ${OPENNMT_TOKENIZER_LIB} - ${ICU_LIBRARIES} + # ${ICU_LIBRARIES} ${Boost_LIBRARIES} Threads::Threads ) diff --git a/src/filter.cc b/src/filter.cc index 94f89c2..e2a5e6d 100644 --- a/src/filter.cc +++ b/src/filter.cc @@ -10,7 +10,7 @@ namespace fuzzy Filter::add_sentence(const std::vector& sentence) { size_t sidx = _sentence_pos.size(); - std::cerr << sidx << std::endl; + // std::cerr << sidx << std::endl; _sentence_pos.push_back(_sentence_buffer.size()); /* first token in sentence buffer is the sentence size */ diff --git a/src/fuzzy_match.cc b/src/fuzzy_match.cc index 7398291..b3fc964 100644 --- a/src/fuzzy_match.cc +++ b/src/fuzzy_match.cc @@ -41,6 +41,16 @@ namespace fuzzy } }; + class CompareMatchInverse + { + public: + bool operator()(const FuzzyMatch::Match &x, const FuzzyMatch::Match &y) + { + return x.score > y.score || + (x.score == y.score && x.secondary_sort < y.secondary_sort); + } + }; + static std::string normalize(const std::string& text_utf8) { UErrorCode error_code = U_ZERO_ERROR; const auto* normalizer = icu::Normalizer2::getNFCInstance(error_code); @@ -522,6 +532,7 @@ namespace fuzzy /* result map - normalized error => sentence */ std::priority_queue, CompareMatch> result; + std::priority_queue, CompareMatchInverse> result_best; const Filter& filter = _filterIndex->get_Filter(); // FilterMatches* filter_matches = nullptr; @@ -822,12 +833,20 @@ namespace fuzzy m.secondary_sort = (filter_type == IndexType::SUFFIX) ? s_id : cpt; m.penalty = 0; m.cover = s_cover; - result.push(m); + // result.push(m); + result_best.push(m); + if (contrast_buffer > 0 && (int)result_best.size() > contrast_buffer) + result_best.pop(); cpt++; - if (cpt > contrast_buffer) - break; + // if (cpt > contrast_buffer) + // break } } + while (result_best.size() > 0) + { + result.push(result_best.top()); + result_best.pop(); + } // COUT filter // std::cerr << num_filtered << std::endl; // std::cerr << filter_matches->get_best_matches().size() << std::endl;