Skip to content

Commit

Permalink
fix reified identifiers not being extracted
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesamcl committed Sep 25, 2024
1 parent 419b1eb commit b095e6e
Showing 1 changed file with 43 additions and 31 deletions.
74 changes: 43 additions & 31 deletions 02_assign_ids/grebi_extract_identifiers/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,37 +67,7 @@ fn main() {
continue;
}

if json.peek().kind == JsonTokenType::StartArray {
json.begin_array();
while json.peek().kind != JsonTokenType::EndArray {
if json.peek().kind == JsonTokenType::StartString {
let id = json.string();
if check_id(&k, &id) {
if wrote_any {
writer.write_all(b"\t").unwrap();
} else {
wrote_any = true;
}
writer.write_all(&id).unwrap();
}
} else {
json.value(); // skip
}
}
json.end_array();
} else if json.peek().kind == JsonTokenType::StartString {
let id = json.string();
if check_id(&k, &id) {
if wrote_any {
writer.write_all(b"\t").unwrap();
} else {
wrote_any = true;
}
writer.write_all(&id).unwrap();
}
} else {
json.value(); // skip
}
write_ids(&k, &mut json, &mut writer, &mut wrote_any);
}
if !wrote_any {
panic!("no identifiers found in object {}", String::from_utf8_lossy(&line));
Expand All @@ -116,6 +86,48 @@ fn main() {

}

fn write_ids(k:&[u8], json:&mut JsonParser, writer:&mut BufWriter<io::StdoutLock>, wrote_any:&mut bool) {

if json.peek().kind == JsonTokenType::StartArray {
json.begin_array();
while json.peek().kind != JsonTokenType::EndArray {
write_ids(k, json, writer, wrote_any);
}
json.end_array();
return;
}

if json.peek().kind == JsonTokenType::StartString {
let id = json.string();
if check_id(&k, &id) {
if *wrote_any {
writer.write_all(b"\t").unwrap();
} else {
*wrote_any = true;
}
writer.write_all(&id).unwrap();
}
return;
}

if json.peek().kind == JsonTokenType::StartObject {
// maybe a reification
json.begin_object();
while json.peek().kind != JsonTokenType::EndObject {
let k = json.name();
if k.eq(b"grebi:value") {
write_ids(k, json, writer, wrote_any);
break;
}
}
json.end_object();
return;
}

json.value(); // skip
}


fn check_id(k:&[u8], id:&[u8]) -> bool {
if id.len() >= 16 {
// long numeric ID is prob a UUID and fine
Expand Down

0 comments on commit b095e6e

Please sign in to comment.