Skip to content

Commit

Permalink
Add BRIGHT Long results
Browse files Browse the repository at this point in the history
  • Loading branch information
Muennighoff committed Sep 1, 2024
1 parent 9a79f7e commit 9355679
Show file tree
Hide file tree
Showing 17 changed files with 20,634 additions and 15,446 deletions.
29,520 changes: 15,337 additions & 14,183 deletions paths.json

Large diffs are not rendered by default.

130 changes: 73 additions & 57 deletions results.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@
"LaBSE",
"OpenSearch-text-hybrid",
"SFR-Embedding-Mistral",
"all-MiniLM-L12-v2",
"all-MiniLM-L6-v2",
"all-MiniLM-L6-v2-instruct",
"all-mpnet-base-v2",
Expand Down Expand Up @@ -194,6 +193,12 @@
"sentence-t5-large",
"sentence-t5-xl",
"sentence-t5-xxl",
"sentence-transformers__LaBSE",
"sentence-transformers__all-MiniLM-L12-v2",
"sentence-transformers__all-MiniLM-L6-v2",
"sentence-transformers__all-mpnet-base-v2",
"sentence-transformers__paraphrase-multilingual-MiniLM-L12-v2",
"sentence-transformers__paraphrase-multilingual-mpnet-base-v2",
"sgpt-bloom-1b7-nli",
"sgpt-bloom-7b1-msmarco",
"silver-retriever-base-v1",
Expand Down Expand Up @@ -291,6 +296,7 @@ def _info(self):
"eval_language": datasets.Value("string"),
"metric": datasets.Value("string"),
"score": datasets.Value("float"),
"split": datasets.Value("string"),
}
),
supervised_keys=None,
Expand All @@ -300,8 +306,8 @@ def _info(self):
def _split_generators(self, dl_manager):
path_file = dl_manager.download_and_extract(URL)
# Local debugging:
#with open("/Users/muennighoff/Desktop/results/paths.json") as f:
with open(path_file) as f:
with open("/Users/muennighoff/Desktop/leaderboard/results/paths.json") as f:
#with open(path_file) as f:
files = json.load(f)
downloaded_files = dl_manager.download_and_extract(files[self.config.name])
return [
Expand Down Expand Up @@ -333,8 +339,12 @@ def _generate_examples(self, filepath):
split = "dev"
elif (ds_name in TESTFULL_SPLIT) and ("test.full" in res_dict):
split = "test.full"
elif (ds_name in STANDARD_SPLIT) and ("standard" in res_dict):
split = "standard"
elif (ds_name in STANDARD_SPLIT):
split = []
if "standard" in res_dict:
split += ["standard"]
if "long" in res_dict:
split += ["long"]
elif (ds_name in DEVTEST_SPLIT) and ("devtest" in res_dict):
split = "devtest"
elif (ds_name in TEST_AVG_SPLIT):
Expand Down Expand Up @@ -363,65 +373,71 @@ def _generate_examples(self, filepath):
elif "test" not in res_dict:
print(f"Skipping {ds_name} as split {split} not present.")
continue
res_dict = res_dict.get(split)

### New MTEB format ###
if isinstance(res_dict, list):
for res in res_dict:
lang = res.pop("languages", [""])
subset = res.pop("hf_subset", "")
if len(lang) == 1:
lang = lang[0].replace("eng-Latn", "")
else:
lang = "_".join(lang)
if not lang:
lang = subset
for metric, score in res.items():
if metric in SKIP_KEYS: continue
if isinstance(score, dict):
# Legacy format with e.g. {cosine: {spearman: ...}}
# Now it is {cosine_spearman: ...}
for k, v in score.items():
if not isinstance(v, float):
print(f'WARNING: Expected float, got {v} for {ds_name} {lang} {metric} {k}')
splits = [split] if not isinstance(split, list) else split
full_res_dict = res_dict
for split in splits:
res_dict = full_res_dict.get(split)

### New MTEB format ###
if isinstance(res_dict, list):
for res in res_dict:
lang = res.pop("languages", [""])
subset = res.pop("hf_subset", "")
if len(lang) == 1:
lang = lang[0].replace("eng-Latn", "")
else:
lang = "_".join(lang)
if not lang:
lang = subset
for metric, score in res.items():
if metric in SKIP_KEYS: continue
if isinstance(score, dict):
# Legacy format with e.g. {cosine: {spearman: ...}}
# Now it is {cosine_spearman: ...}
for k, v in score.items():
if not isinstance(v, float):
print(f'WARNING: Expected float, got {v} for {ds_name} {lang} {metric} {k}')
continue
if metric in SKIP_KEYS: continue
out.append({
"mteb_dataset_name": ds_name,
"eval_language": lang,
"metric": metric + "_" + k,
"score": v * 100,
})
else:
if not isinstance(score, float):
print(f'WARNING: Expected float, got {score} for {ds_name} {lang} {metric}')
continue
if metric in SKIP_KEYS: continue
out.append({
"mteb_dataset_name": ds_name,
"eval_language": lang,
"metric": metric + "_" + k,
"score": v * 100,
"metric": metric,
"score": score * 100,
"split": split,
})
else:
if not isinstance(score, float):
print(f'WARNING: Expected float, got {score} for {ds_name} {lang} {metric}')
continue
out.append({
"mteb_dataset_name": ds_name,
"eval_language": lang,
"metric": metric,
"score": score * 100,
})

### Old MTEB format ###
else:
is_multilingual = any(x in res_dict for x in EVAL_LANGS)
langs = res_dict.keys() if is_multilingual else ["en"]
for lang in langs:
if lang in SKIP_KEYS: continue
test_result_lang = res_dict.get(lang) if is_multilingual else res_dict
for metric, score in test_result_lang.items():
if not isinstance(score, dict):
score = {metric: score}
for sub_metric, sub_score in score.items():
if any(x in sub_metric for x in SKIP_KEYS): continue
if isinstance(sub_score, dict): continue
out.append({
"mteb_dataset_name": ds_name,
"eval_language": lang if is_multilingual else "",
"metric": f"{metric}_{sub_metric}" if metric != sub_metric else metric,
"score": sub_score * 100,
})
### Old MTEB format ###
else:
is_multilingual = any(x in res_dict for x in EVAL_LANGS)
langs = res_dict.keys() if is_multilingual else ["en"]
for lang in langs:
if lang in SKIP_KEYS: continue
test_result_lang = res_dict.get(lang) if is_multilingual else res_dict
for metric, score in test_result_lang.items():
if not isinstance(score, dict):
score = {metric: score}
for sub_metric, sub_score in score.items():
if any(x in sub_metric for x in SKIP_KEYS): continue
if isinstance(sub_score, dict): continue
out.append({
"mteb_dataset_name": ds_name,
"eval_language": lang if is_multilingual else "",
"metric": f"{metric}_{sub_metric}" if metric != sub_metric else metric,
"score": sub_score * 100,
"split": split,
})
for idx, row in enumerate(sorted(out, key=lambda x: x["mteb_dataset_name"])):
yield idx, row

Expand Down
Loading

0 comments on commit 9355679

Please sign in to comment.