Skip to content

Commit

Permalink
remove blank ids, remove fasthash to fix building on mac
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesamcl committed Oct 31, 2024
1 parent 56522b1 commit a19c985
Show file tree
Hide file tree
Showing 8 changed files with 25 additions and 155 deletions.
1 change: 0 additions & 1 deletion 02_assign_ids/grebi_assign_ids/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ edition = "2021"
serde_json = { version = "1.0.108", features=["preserve_order"] }
grebi_shared = { path = "../../grebi_shared" }
csv = "1.3.0"
fasthash = "0.4.0"
lmdb-zero = "0.4.4"
bloomfilter = "1.0.13"
jemallocator = "0.5.4"
Expand Down
21 changes: 18 additions & 3 deletions 02_assign_ids/grebi_assign_ids/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ use clap::Parser;

use grebi_shared::find_strings;
use grebi_shared::load_groups_txt::load_id_to_group_mapping;
use grebi_shared::check_id;


#[derive(clap::Parser, Debug)]
#[command(author, version, about, long_about = None)]
Expand Down Expand Up @@ -200,7 +198,10 @@ fn get_ids<'a, 'b>(json:&mut JsonParser<'a>, ids:&'b mut BTreeSet<&'a [u8]>) {
json.end_array();
} else if json.peek().kind == JsonTokenType::StartString {
let id = json.string();
ids.insert(id.clone());
if check_id(&id) {
ids.insert(id.clone());
}

} else if json.peek().kind == JsonTokenType::StartObject {
// maybe a reification
json.begin_object();
Expand All @@ -218,3 +219,17 @@ fn get_ids<'a, 'b>(json:&mut JsonParser<'a>, ids:&'b mut BTreeSet<&'a [u8]>) {
}
}


// Duplicated in grebi_extract_identifiers
fn check_id(id:&[u8]) -> bool {
if id.len() >= 16 {
// long numeric ID is prob a UUID and fine
return true;
}
for c in id {
if !c.is_ascii_digit() {
return true;
}
}
return false;
}
1 change: 0 additions & 1 deletion 02_assign_ids/grebi_extract_identifiers/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ clap = { version = "4.4.11", features = ["derive"] }
serde_json = { version = "1.0.108", features=["preserve_order"] }
grebi_shared = { path = "../../grebi_shared" }
csv = "1.3.0"
fasthash = "0.4.0"
lmdb-zero = "0.4.4"
jemallocator = "0.5.4"

1 change: 1 addition & 0 deletions 02_assign_ids/grebi_extract_identifiers/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ fn write_ids(k:&[u8], json:&mut JsonParser, writer:&mut BufWriter<io::StdoutLock
}


// Duplicated in grebi_assign_ids
fn check_id(k:&[u8], id:&[u8]) -> bool {
if id.len() >= 16 {
// long numeric ID is prob a UUID and fine
Expand Down
1 change: 0 additions & 1 deletion 02_assign_ids/grebi_identifiers2groups/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ edition = "2021"
serde_json = { version = "1.0.108", features=["preserve_order"] }
grebi_shared = { path = "../../grebi_shared" }
csv = "1.3.0"
fasthash = "0.4.0"
lmdb-zero = "0.4.4"
bloomfilter = "1.0.13"
jemallocator = "0.5.4"
Expand Down
1 change: 0 additions & 1 deletion 02_assign_ids/grebi_superclasses2types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ edition = "2021"
serde_json = { version = "1.0.108", features=["preserve_order"] }
grebi_shared = { path = "../../grebi_shared" }
csv = "1.3.0"
fasthash = "0.4.0"
lmdb-zero = "0.4.4"
bloomfilter = "1.0.13"
jemallocator = "0.5.4"
Expand Down
139 changes: 6 additions & 133 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 0 additions & 15 deletions grebi_shared/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,6 @@ pub mod slice_materialised_edge;
pub mod load_metadata_mapping_table;
pub mod load_groups_txt;

pub fn check_id(k:&[u8], id:&[u8]) -> bool {
if id.len() >= 16 {
// long numeric ID is prob a UUID and fine
return true;
}
for c in id {
if !c.is_ascii_digit() {
return true;
}
}
// also triggers for blank IDs
eprintln!("Found unprefixed numeric ID {} for identifier property {}. Unqualified numbers like this as identifiers are ambiguous and may cause incorrect equivalences.", String::from_utf8_lossy(id), String::from_utf8_lossy(k));
return false;
}

// get the id without parsing json
pub fn get_id<'a>(json:&'a [u8])->&'a [u8] {

Expand Down

0 comments on commit a19c985

Please sign in to comment.