diff --git a/.deny.toml b/.deny.toml new file mode 100644 index 0000000..3151078 --- /dev/null +++ b/.deny.toml @@ -0,0 +1,8 @@ +[licenses] +allow = [ + "Apache-2.0", + "MIT", + "Unicode-3.0", + "MPL-2.0", + "ISC" +] diff --git a/.typos.toml b/.typos.toml new file mode 100644 index 0000000..ed6bc2b --- /dev/null +++ b/.typos.toml @@ -0,0 +1,2 @@ +[files] +extend-exclude = ["*.html"] diff --git a/Cargo.toml b/Cargo.toml index 3ac2dfc..9f7b73b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "dom-content-extraction" -version = "0.3.5" +version = "0.3.6" description = "Rust implementation of Content extraction via text density paper" license = "MPL-2.0" @@ -30,8 +30,8 @@ strip = true panic = "abort" [dependencies] -ego-tree = "0.9" -scraper = "0.21" +ego-tree = "0.10" +scraper = "0.22" thiserror = "2" [dev-dependencies] diff --git a/benches/simple.rs b/benches/simple.rs index 483cd5a..4975bc4 100644 --- a/benches/simple.rs +++ b/benches/simple.rs @@ -31,7 +31,7 @@ fn read_file_content_from_zip(zip_path: &str, file_name: &str) -> Option result } -fn benchmark_test_1_html_dom_content_extaction(c: &mut Criterion) { +fn benchmark_test_1_html_dom_content_extraction(c: &mut Criterion) { let content = read_file("html/test_1.html").unwrap(); c.bench_function("test_1_dom_content_extraction", |b| { b.iter(|| { @@ -122,7 +122,7 @@ fn benchmark_node_text_extraction(c: &mut Criterion) { criterion_group!( benches, - benchmark_test_1_html_dom_content_extaction, + benchmark_test_1_html_dom_content_extraction, benchmark_real_file_dom_content_extraction, benchmark_real_file_density_tree_calculation, benchmark_real_file_density_tree_calculation_and_sort, diff --git a/src/lib.rs b/src/lib.rs index ea86b71..b21ab45 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -483,7 +483,7 @@ impl<'a> DensityTree { } impl std::fmt::Debug for DensityTree { - /// Format tree with identation + /// Format tree with indentation fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn pretty_print( f: &mut std::fmt::Formatter<'_>,