Skip to content

Commit

Permalink
Fixed possible bug in label choosing.
Browse files Browse the repository at this point in the history
  • Loading branch information
gaurav committed Nov 7, 2024
1 parent 31bc57f commit 8f64ec3
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions node_normalizer/normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,8 +707,7 @@ async def create_node(canonical_id, equivalent_ids, types, info_contents, includ
# As per https://github.com/TranslatorSRI/Babel/issues/158, we select the first label from any
# identifier _except_ where one of the types is in preferred_name_boost_prefixes, in which case
# we prefer the prefixes listed there.
labels = list(filter(lambda x: len(x) > 0, [eid['l'] for eid in eids if 'l' in eid]))

#
# Note that types[canonical_id] goes from most specific to least specific, so we
# need to reverse it in order to apply preferred_name_boost_prefixes for the most
# specific type.
Expand Down Expand Up @@ -747,11 +746,11 @@ async def create_node(canonical_id, equivalent_ids, types, info_contents, includ
# least one label shorter than this limit.
labels_shorter_than_limit = [l for l in filtered_possible_labels if l and len(l) <= config['demote_labels_longer_than']]
if labels_shorter_than_limit:
labels = labels_shorter_than_limit
filtered_possible_labels = labels_shorter_than_limit

# Note that the id will be from the equivalent ids, not the canonical_id. This is to handle conflation
if len(labels) > 0:
node = {"id": {"identifier": eids[0]['i'], "label": labels[0]}}
if len(filtered_possible_labels) > 0:
node = {"id": {"identifier": eids[0]['i'], "label": filtered_possible_labels[0]}}
else:
# Sometimes, nothing has a label :(
node = {"id": {"identifier": eids[0]['i']}}
Expand Down

0 comments on commit 8f64ec3

Please sign in to comment.