From f2048d0a5ef6df2d2e3d9a7348f6544c52bed43f Mon Sep 17 00:00:00 2001 From: oiwn Date: Thu, 16 Jan 2025 01:05:23 +0700 Subject: [PATCH 1/5] update deps --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3ac2dfc..9f7b73b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "dom-content-extraction" -version = "0.3.5" +version = "0.3.6" description = "Rust implementation of Content extraction via text density paper" license = "MPL-2.0" @@ -30,8 +30,8 @@ strip = true panic = "abort" [dependencies] -ego-tree = "0.9" -scraper = "0.21" +ego-tree = "0.10" +scraper = "0.22" thiserror = "2" [dev-dependencies] From b181d75d44d6c4f4ce0568e96c993fcd00315562 Mon Sep 17 00:00:00 2001 From: oiwn Date: Thu, 16 Jan 2025 01:15:59 +0700 Subject: [PATCH 2/5] fix typos --- .typos.toml | 2 ++ benches/simple.rs | 4 ++-- src/lib.rs | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 .typos.toml diff --git a/.typos.toml b/.typos.toml new file mode 100644 index 0000000..ed6bc2b --- /dev/null +++ b/.typos.toml @@ -0,0 +1,2 @@ +[files] +extend-exclude = ["*.html"] diff --git a/benches/simple.rs b/benches/simple.rs index 483cd5a..4975bc4 100644 --- a/benches/simple.rs +++ b/benches/simple.rs @@ -31,7 +31,7 @@ fn read_file_content_from_zip(zip_path: &str, file_name: &str) -> Option result } -fn benchmark_test_1_html_dom_content_extaction(c: &mut Criterion) { +fn benchmark_test_1_html_dom_content_extraction(c: &mut Criterion) { let content = read_file("html/test_1.html").unwrap(); c.bench_function("test_1_dom_content_extraction", |b| { b.iter(|| { @@ -122,7 +122,7 @@ fn benchmark_node_text_extraction(c: &mut Criterion) { criterion_group!( benches, - benchmark_test_1_html_dom_content_extaction, + benchmark_test_1_html_dom_content_extraction, benchmark_real_file_dom_content_extraction, benchmark_real_file_density_tree_calculation, benchmark_real_file_density_tree_calculation_and_sort, diff --git a/src/lib.rs b/src/lib.rs index ea86b71..b21ab45 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -483,7 +483,7 @@ impl<'a> DensityTree { } impl std::fmt::Debug for DensityTree { - /// Format tree with identation + /// Format tree with indentation fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn pretty_print( f: &mut std::fmt::Formatter<'_>, From c28f11b24a029e722446a0d48f92f9ac23a2f6ea Mon Sep 17 00:00:00 2001 From: oiwn Date: Thu, 16 Jan 2025 01:30:14 +0700 Subject: [PATCH 3/5] fix cargo-deny issues --- .deny.toml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .deny.toml diff --git a/.deny.toml b/.deny.toml new file mode 100644 index 0000000..7c72fd5 --- /dev/null +++ b/.deny.toml @@ -0,0 +1,6 @@ +[licenses] +allow = [ + "Apache-2.0", + "MIT", + "Apache-2.0 OR MIT" +] From 658ca40f96ff38ad37513bfb9c703dd6e8552060 Mon Sep 17 00:00:00 2001 From: oiwn Date: Thu, 16 Jan 2025 01:49:57 +0700 Subject: [PATCH 4/5] fixes deny --- .deny.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/.deny.toml b/.deny.toml index 7c72fd5..86759db 100644 --- a/.deny.toml +++ b/.deny.toml @@ -2,5 +2,4 @@ allow = [ "Apache-2.0", "MIT", - "Apache-2.0 OR MIT" ] From ee9e54e665ca71d2f0156dcd89e8f0f737cb6a82 Mon Sep 17 00:00:00 2001 From: oiwn Date: Thu, 16 Jan 2025 01:53:01 +0700 Subject: [PATCH 5/5] fix deny --- .deny.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.deny.toml b/.deny.toml index 86759db..3151078 100644 --- a/.deny.toml +++ b/.deny.toml @@ -2,4 +2,7 @@ allow = [ "Apache-2.0", "MIT", + "Unicode-3.0", + "MPL-2.0", + "ISC" ]