Skip to content

Commit

Permalink
cerr everywhere
Browse files Browse the repository at this point in the history
  • Loading branch information
Maxwell1447 committed Jan 20, 2024
1 parent 7a2c13f commit e22bee1
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 7 deletions.
3 changes: 3 additions & 0 deletions cli/src/FuzzyMatch-cli.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,15 @@ std::pair<int, int> process_stream(const Function& function,
if (num_threads <= 1) // Fast path for sequential processing.
{
while (std::getline(in, line)) {
std::cerr << "#" << std::flush;
std::string res = function(line);
std::cerr << "+" << std::flush;
if (!res.empty())
count_nonempty++;
out << res << std::endl;
// if (count_nonempty % 100 == 0)
// std::cerr << "\rPROGRESS: " << count_nonempty << " " << std::flush;
std::cerr << "+" << std::endl << std::flush;
}
// std::cerr << std::endl;
return std::make_pair(count_nonempty, count_total);
Expand Down
3 changes: 2 additions & 1 deletion include/fuzzy/fuzzy_match.hh
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,14 @@ namespace fuzzy
int length
) : length(length), s(seq) {}
Match() {}
~Match() {}
float score;
float secondary_sort;
float penalty;
int max_subseq;
unsigned s_id;
std::string id;
std::vector<float> cover;
float* cover;
int length;
const unsigned* s;
};
Expand Down
4 changes: 4 additions & 0 deletions src/bm25_matches.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,15 @@ namespace fuzzy

std::priority_queue<std::pair<float, unsigned>, std::vector<std::pair<float, unsigned>>, ComparePairs> k_best;

std::cerr << "1" << std::flush;
Eigen::SparseVector<float> pattern_sparse_vec(bm25.get_vocab_size());
for (const unsigned& wid : pattern_wids)
pattern_sparse_vec.coeffRef(wid) += 1.0;

std::cerr << "2" << std::flush;
Eigen::SparseVector<float> scores = bm25.compute_product(pattern_sparse_vec);

std::cerr << "3" << std::flush;
for (Eigen::SparseVector<float>::InnerIterator it(scores); it; ++it) {
int s_id = it.index();
float bm25_score = it.value();
Expand All @@ -41,6 +44,7 @@ namespace fuzzy
}
}

std::cerr << "4" << std::flush;
_best_matches.reserve(k_best.size());
while (!k_best.empty())
{
Expand Down
31 changes: 25 additions & 6 deletions src/fuzzy_match.cc
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,8 @@ namespace fuzzy
if (!p_length)
return false;

std::cerr << "[" << std::flush;

if ((std::size_t)(min_subseq_length) > pattern.size())
min_subseq_length = pattern.size();

Expand All @@ -527,6 +529,8 @@ namespace fuzzy
// FilterMatches* filter_matches = nullptr;
// std::unique_ptr<FilterMatches> filter_matches;
std::shared_ptr<FilterMatches> filter_matches;

std::cerr << "$" << std::flush;
if (filter_type == IndexType::SUFFIX) {
const SuffixArray& suffix_array = static_cast<const SuffixArray&>(filter);
// filter_matches = new NGramMatches(fuzzy, p_length, min_subseq_length, suffix_array);
Expand Down Expand Up @@ -611,6 +615,7 @@ namespace fuzzy
filter_matches = std::make_shared<BM25Matches>(fuzzy, p_length, min_subseq_length, bm25, bm25_buffer, bm25_cutoff);
// filter_matches = new BM25Matches(fuzzy, p_length, min_subseq_length, bm25, bm25_buffer, bm25_cutoff);
BM25Matches& bm25Matches = static_cast<BM25Matches&>(*filter_matches);
std::cerr << "!" << std::flush;
bm25Matches.register_pattern(pattern_wids, edit_costs);
}
#endif
Expand All @@ -622,7 +627,7 @@ namespace fuzzy
no_matches.load_all();
}
/* Consolidation of the results */

std::cerr << "~" << std::flush;
/* now explore for the best segments */

PatternCoverage pattern_coverage(pattern_wids);
Expand All @@ -647,6 +652,8 @@ namespace fuzzy
std::vector<float> norm_weight;
std::vector<float> sorted_pattern_terms_idf;

std::cerr << "|" << std::flush;

/* Salient aspects enumeration */
switch(submod_fun)
{
Expand Down Expand Up @@ -706,6 +713,7 @@ namespace fuzzy

for (const auto& pair : best_matches)
{
// std::cerr << "-" << std::flush;
// num_filtered++;
const auto s_id = pair.first;
const auto score_filter = pair.second;
Expand Down Expand Up @@ -856,7 +864,10 @@ namespace fuzzy
m.id = _filterIndex->id(s_id);
m.secondary_sort = (filter_type == IndexType::SUFFIX) ? s_id : cpt;
m.penalty = 0;
m.cover = s_cover;
// m.cover = s_cover;
// m.cover = std::vector<float>(s_cover);
// m.cover = std::vector<float>(s_cover.size());
// std::copy(s_cover.begin(), s_cover.end(), m.cover.begin());
result.push(m);
// std::cerr << m.s_id << ": ";
// for (const auto& c : m.cover)
Expand All @@ -868,6 +879,7 @@ namespace fuzzy
}
}
// COUT filter
std::cerr << "]" << std::flush;
// std::cerr << num_filtered << std::endl;
// std::cerr << filter_matches->get_best_matches().size() << std::endl;

Expand Down Expand Up @@ -896,7 +908,9 @@ namespace fuzzy
// std::cerr << "rescore " << match.s_id << " : (";
for (unsigned i = 0; i < cover_weights.size(); i++)
{
rescore += cover_weights[i] * match.cover[i];
///////////////////////////////// TODO: uncomment
// rescore += cover_weights[i] * match.cover[i];
rescore += cover_weights[i];
// if (match.cover[i] != 0)
// std::cerr << cover_weights[i] << "*" << match.cover[i] << "+";
}
Expand All @@ -907,9 +921,10 @@ namespace fuzzy
matches.push_back(*it_max);
// std::cerr << "choose No " << it_max->s_id << std::endl;
// update cover_weights
for (unsigned i = 0; i < cover_weights.size(); i++)
if (it_max->cover[i] > 0)
cover_weights[i] *= shrinking_factor;
///////////////////////////////// TODO: uncomment
// for (unsigned i = 0; i < cover_weights.size(); i++)
// if (it_max->cover[i] > 0)
// cover_weights[i] *= shrinking_factor;
candidates.erase(it_max);
if (shrinking_factor < 1e-20f)
{
Expand Down Expand Up @@ -987,6 +1002,7 @@ namespace fuzzy
result.pop();
}
}
std::cerr << "|" << std::flush;

// std::cerr << "final matches " << " : ";
// for (unsigned i = 0; i < matches.size(); i++)
Expand All @@ -999,6 +1015,9 @@ namespace fuzzy
// std::cerr << matches[i].id;
// }
// std::cerr << std::endl;

//// Attempts to free memory which is corrupted
//// Probably from vector
return matches.size() > 0;
}
}
Expand Down

0 comments on commit e22bee1

Please sign in to comment.