Skip to content

Commit

Permalink
Merge pull request #37 from omarmhaimdat/0.0.1-alpha.20
Browse files Browse the repository at this point in the history
Run aho corasick to also find overlapping substrings
  • Loading branch information
omarmhaimdat authored Feb 24, 2024
2 parents 6181949 + ec97664 commit 6375271
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 5 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "quickner"
version = "0.0.1-alpha.19"
version = "0.0.1-alpha.20"
edition = "2021"
authors = ["Omar MHAIMDAT"]
license = "Mozilla Public License 2.0"
Expand Down
2 changes: 1 addition & 1 deletion quickner-core/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "quickner-core"
version = "0.0.1-alpha.19"
version = "0.0.1-alpha.20"
edition = "2021"
authors = ["Omar MHAIMDAT"]
license-file = "LICENSE"
Expand Down
4 changes: 2 additions & 2 deletions quickner-core/src/quickner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ impl Quickner {
return None;
}
let mut annotations = Vec::new();
for mat in aho_corasick.find_iter(&text) {
for mat in aho_corasick.find_overlapping_iter(&text) {
let start = mat.start();
// convert byte index to char index (assuming utf8)
let start = text[..start].chars().count();
Expand All @@ -141,7 +141,7 @@ impl Quickner {
annotations.push((start, end, label));
continue;
}
// if text == "python was created by guido van rossum" {
// if text == "monty python and the holy grail: the ultimate quiz http://bit.ly/pd3ms i got 42/50. can't believe i missed the name of lancelot's page " {
// println!("Start: {}, End: {}, text_len: {}, End + 1: {}", start, end, text.len(), text.chars().nth(end + 1).unwrap_or('N'));
// }
// println!("Start: {}, End: {}, text_len: {}", start, end, char_len);
Expand Down
3 changes: 2 additions & 1 deletion tests/performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ def main():
quick.process()
end = time.perf_counter()
quick.to_jsonl("data/output.jsonl")
# docs = quick.find_documents_by_entity("twitter")
print(quick)
docs = quick.find_documents_by_entity("Twitter")
print(f"Time elapsed: {end - start} seconds")


Expand Down

0 comments on commit 6375271

Please sign in to comment.