Skip to content

Commit

Permalink
Fix error when missing keys in dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
Riccorl committed Jul 11, 2024
1 parent 1c7aaf3 commit 41f84e8
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
10 changes: 8 additions & 2 deletions goldenretriever/data/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,11 +494,17 @@ def load_fn(
if max_positives != -1:
positives = positives[:max_positives]

negatives = list(set([n["text"] for n in sample["negative_ctxs"]]))
if "negative_ctxs" in sample:
negatives = list(set([n["text"] for n in sample["negative_ctxs"]]))
else:
negatives = []
if max_negatives != -1:
negatives = negatives[:max_negatives]

hard_negatives = list(set([h["text"] for h in sample["hard_negative_ctxs"]]))
if "hard_negative_ctxs" in sample:
hard_negatives = list(set([h["text"] for h in sample["hard_negative_ctxs"]]))
else:
hard_negatives = []
if max_hard_negatives != -1:
hard_negatives = hard_negatives[:max_hard_negatives]

Expand Down
2 changes: 1 addition & 1 deletion goldenretriever/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
_MINOR = "9"
# On main and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "1"
_PATCH = "2"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = os.environ.get("GOLDENRETRIEVER_VERSION_SUFFIX", "")
Expand Down

0 comments on commit 41f84e8

Please sign in to comment.