Skip to content

Commit

Permalink
fixed problem of best match buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
Maxwell1447 committed Oct 14, 2024
1 parent 30f56d2 commit 0b6187f
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC

target_link_libraries(${PROJECT_NAME}
${OPENNMT_TOKENIZER_LIB}
${ICU_LIBRARIES}
# ${ICU_LIBRARIES}
${Boost_LIBRARIES}
Threads::Threads
)
Expand Down
2 changes: 1 addition & 1 deletion src/filter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace fuzzy
Filter::add_sentence(const std::vector<unsigned>& sentence)
{
size_t sidx = _sentence_pos.size();
std::cerr << sidx << std::endl;
// std::cerr << sidx << std::endl;
_sentence_pos.push_back(_sentence_buffer.size());

/* first token in sentence buffer is the sentence size */
Expand Down
25 changes: 22 additions & 3 deletions src/fuzzy_match.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,16 @@ namespace fuzzy
}
};

class CompareMatchInverse
{
public:
bool operator()(const FuzzyMatch::Match &x, const FuzzyMatch::Match &y)
{
return x.score > y.score ||
(x.score == y.score && x.secondary_sort < y.secondary_sort);
}
};

static std::string normalize(const std::string& text_utf8) {
UErrorCode error_code = U_ZERO_ERROR;
const auto* normalizer = icu::Normalizer2::getNFCInstance(error_code);
Expand Down Expand Up @@ -522,6 +532,7 @@ namespace fuzzy

/* result map - normalized error => sentence */
std::priority_queue<Match, std::vector<Match>, CompareMatch> result;
std::priority_queue<Match, std::vector<Match>, CompareMatchInverse> result_best;

const Filter& filter = _filterIndex->get_Filter();
// FilterMatches* filter_matches = nullptr;
Expand Down Expand Up @@ -822,12 +833,20 @@ namespace fuzzy
m.secondary_sort = (filter_type == IndexType::SUFFIX) ? s_id : cpt;
m.penalty = 0;
m.cover = s_cover;
result.push(m);
// result.push(m);
result_best.push(m);
if (contrast_buffer > 0 && (int)result_best.size() > contrast_buffer)
result_best.pop();
cpt++;
if (cpt > contrast_buffer)
break;
// if (cpt > contrast_buffer)
// break
}
}
while (result_best.size() > 0)
{
result.push(result_best.top());
result_best.pop();
}
// COUT filter
// std::cerr << num_filtered << std::endl;
// std::cerr << filter_matches->get_best_matches().size() << std::endl;
Expand Down

0 comments on commit 0b6187f

Please sign in to comment.