embeddings-benchmark · x-tabdeveloping · Jan 24, 2025 · Jan 22, 2025
diff --git a/load_external.py b/load_external.py
@@ -68,14 +68,17 @@ def get_model_parameters_memory(model_info: ModelInfo) -> tuple[int| None, float
     return None, None
 
 
-def get_dim_seq_size(model: ModelInfo) -> tuple[str | None, str | None, int, float]:
+def get_dim_seq_size(model: ModelInfo) -> tuple[str | None, str | None, int, float, str | None]:
     siblings = model.siblings or []
     filenames = [sib.rfilename for sib in siblings]
     dim, seq = None, None
+    similarity_fn_name = None
     for filename in filenames:
         if re.match(r"\d+_Pooling/config.json", filename):
             st_config_path = hf_hub_download(model.id, filename=filename)
-            dim = json.load(open(st_config_path)).get("word_embedding_dimension", None)
+            with open(st_config_path) as f:
+                pooling_config = json.load(f)
+            dim = pooling_config.get("word_embedding_dimension", None)
             break
     for filename in filenames:
         if re.match(r"\d+_Dense/config.json", filename):
@@ -87,17 +90,21 @@ def get_dim_seq_size(model: ModelInfo) -> tuple[str | None, str | None, int, flo
         if not dim:
             dim = config.get("hidden_dim", config.get("hidden_size", config.get("d_model", None)))
         seq = config.get("n_positions", config.get("max_position_embeddings", config.get("n_ctx", config.get("seq_length", None))))
-
+    if "config_sentence_transformers.json" in filenames:
+        st_config_path = hf_hub_download(model.id, filename="config_sentence_transformers.json")
+        with open(st_config_path) as f:
+            st_config = json.load(f)
+        similarity_fn_name = st_config.get("similarity_fn_name", None)
     parameters, memory = get_model_parameters_memory(model)
-    return dim, seq, parameters, memory
+    return dim, seq, parameters, memory, similarity_fn_name
 
 
 def create_model_meta(model_info: ModelInfo) -> ModelMeta | None:
     readme_path = hf_hub_download(model_info.id, filename="README.md", etag_timeout=30)
     meta = metadata_load(readme_path)
-    dim, seq, parameters, memory = None, None, None, None
+    dim, seq, parameters, memory, similarity_fn_name = None, None, None, None, None
     try:
-        dim, seq, parameters, memory = get_dim_seq_size(model_info)
+        dim, seq, parameters, memory, similarity_fn_name = get_dim_seq_size(model_info)
     except Exception as e:
         logger.error(f"Error getting model parameters for {model_info.id}, {e}")
 
@@ -110,7 +117,12 @@ def create_model_meta(model_info: ModelInfo) -> ModelMeta | None:
     for i in range(len(languages)):
         if languages[i] is False:
             languages[i] = "no"
-
+    datasets = meta.get("datasets", None)
+    if datasets is not None:
+        datasets = {
+            d: []
+            for d in datasets
+        }
     model_meta = ModelMeta(
         name=model_info.id,
         revision=model_info.sha,
@@ -122,6 +134,11 @@ def create_model_meta(model_info: ModelInfo) -> ModelMeta | None:
         max_tokens=seq,
         n_parameters=parameters,
         languages=languages,
+        public_training_code=None,
+        public_training_data=None,
+        similarity_fn_name=similarity_fn_name,
+        use_instructions=None,
+        training_datasets=datasets,
     )
     return model_meta
 
@@ -139,14 +156,7 @@ def parse_readme(model_info: ModelInfo) -> dict[str, dict[str, Any]] | None:
         return
     model_index = meta["model-index"][0]
     model_name_from_readme = model_index.get("name", None)
-    orgs = ["Alibaba-NLP", "HIT-TMG", "McGill-NLP", "Snowflake", "facebook", "jinaai", "nomic-ai"]
-    is_org = any([model_id.startswith(org) for org in orgs])
-    # There a lot of reuploads with tunes, quantization, etc. We only want the original model
-    # to prevent this most of the time we can check if the model name from the readme is the same as the model id
-    # but some orgs have a different naming in their readme
-    if model_name_from_readme and not model_info.id.endswith(model_name_from_readme) and not is_org:
-        logger.warning(f"Model name mismatch: {model_info.id} vs {model_name_from_readme}")
-        return
+
     results = model_index.get("results", [])
     model_results = {}
     for result in results:

diff --git a/results/AbderrahmanSkiredj1__Arabic_text_embedding_for_sts/external/model_meta.json b/results/AbderrahmanSkiredj1__Arabic_text_embedding_for_sts/external/model_meta.json
@@ -5,18 +5,19 @@
     "languages": [],
     "loader": null,
     "n_parameters": 135193344,
-    "memory_usage": null,
-    "max_tokens": 512,
+    "max_tokens": 512.0,
     "embed_dim": 768,
     "license": null,
     "open_weights": true,
-    "public_training_data": null,
     "public_training_code": null,
+    "public_training_data": null,
     "framework": [
         "Sentence Transformers"
     ],
     "reference": null,
     "similarity_fn_name": null,
     "use_instructions": null,
-    "zero_shot_benchmarks": null
+    "training_datasets": {},
+    "adapted_from": null,
+    "superseded_by": null
 }
diff --git a/...nSkiredj1__arabic_text_embedding_sts_arabertv02_arabicnlitriplet/external/model_meta.json b/...nSkiredj1__arabic_text_embedding_sts_arabertv02_arabicnlitriplet/external/model_meta.json
@@ -5,18 +5,19 @@
     "languages": [],
     "loader": null,
     "n_parameters": 135193344,
-    "memory_usage": null,
-    "max_tokens": 512,
+    "max_tokens": 512.0,
     "embed_dim": 768,
     "license": null,
     "open_weights": true,
-    "public_training_data": null,
     "public_training_code": null,
+    "public_training_data": null,
     "framework": [
         "Sentence Transformers"
     ],
     "reference": null,
     "similarity_fn_name": null,
     "use_instructions": null,
-    "zero_shot_benchmarks": null
+    "training_datasets": {},
+    "adapted_from": null,
+    "superseded_by": null
 }
diff --git a/results/AdrienB134__llm2vec-croissant-mntp/external/AlloProfClusteringP2P.json b/results/AdrienB134__llm2vec-croissant-mntp/external/AlloProfClusteringP2P.json
@@ -0,0 +1,18 @@
+{
+    "dataset_revision": "392ba3f5bcc8c51f578786c1fc3dae648662cb9b",
+    "task_name": "AlloProfClusteringP2P",
+    "evaluation_time": null,
+    "mteb_version": null,
+    "scores": {
+        "test": [
+            {
+                "hf_subset": "fra-Latn",
+                "languages": [
+                    "fra-Latn"
+                ],
+                "v_measure": 0.6234594305243399,
+                "main_score": 0.6234594305243399
+            }
+        ]
+    }
+}
diff --git a/results/AdrienB134__llm2vec-croissant-mntp/external/AlloProfClusteringS2S.json b/results/AdrienB134__llm2vec-croissant-mntp/external/AlloProfClusteringS2S.json
@@ -0,0 +1,18 @@
+{
+    "dataset_revision": "392ba3f5bcc8c51f578786c1fc3dae648662cb9b",
+    "task_name": "AlloProfClusteringS2S",
+    "evaluation_time": null,
+    "mteb_version": null,
+    "scores": {
+        "test": [
+            {
+                "hf_subset": "fra-Latn",
+                "languages": [
+                    "fra-Latn"
+                ],
+                "v_measure": 0.2572945498452115,
+                "main_score": 0.2572945498452115
+            }
+        ]
+    }
+}
diff --git a/results/AdrienB134__llm2vec-croissant-mntp/external/AlloprofReranking.json b/results/AdrienB134__llm2vec-croissant-mntp/external/AlloprofReranking.json
@@ -0,0 +1,19 @@
+{
+    "dataset_revision": "65393d0d7a08a10b4e348135e824f385d420b0fd",
+    "task_name": "AlloprofReranking",
+    "evaluation_time": null,
+    "mteb_version": null,
+    "scores": {
+        "test": [
+            {
+                "hf_subset": "fra-Latn",
+                "languages": [
+                    "fra-Latn"
+                ],
+                "map": 0.26596323297349184,
+                "mrr": 0.26091629657044163,
+                "main_score": 0.26596323297349184
+            }
+        ]
+    }
+}
diff --git a/results/AdrienB134__llm2vec-croissant-mntp/external/AlloprofRetrieval.json b/results/AdrienB134__llm2vec-croissant-mntp/external/AlloprofRetrieval.json
@@ -0,0 +1,52 @@
+{
+    "dataset_revision": "fcf295ea64c750f41fadbaa37b9b861558e1bfbd",
+    "task_name": "AlloprofRetrieval",
+    "evaluation_time": null,
+    "mteb_version": null,
+    "scores": {
+        "test": [
+            {
+                "hf_subset": "fra-Latn",
+                "languages": [
+                    "fra-Latn"
+                ],
+                "map_at_1": 0.00345,
+                "map_at_10": 0.00934,
+                "map_at_100": 0.01191,
+                "map_at_1000": 0.013419999999999998,
+                "map_at_20": 0.0102,
+                "map_at_3": 0.006689999999999999,
+                "map_at_5": 0.00753,
+                "mrr_at_1": 0.00345,
+                "mrr_at_10": 0.00934,
+                "mrr_at_100": 0.01191,
+                "mrr_at_1000": 0.013419999999999998,
+                "mrr_at_20": 0.0102,
+                "mrr_at_3": 0.006689999999999999,
+                "mrr_at_5": 0.00753,
+                "ndcg_at_1": 0.00345,
+                "ndcg_at_10": 0.013839999999999998,
+                "ndcg_at_100": 0.03151,
+                "ndcg_at_1000": 0.09014,
+                "ndcg_at_20": 0.01692,
+                "ndcg_at_3": 0.00785,
+                "ndcg_at_5": 0.00941,
+                "precision_at_1": 0.00345,
+                "precision_at_10": 0.00289,
+                "precision_at_100": 0.00124,
+                "precision_at_1000": 0.00063,
+                "precision_at_20": 0.00205,
+                "precision_at_3": 0.00374,
+                "precision_at_5": 0.00302,
+                "recall_at_1": 0.00345,
+                "recall_at_10": 0.02893,
+                "recall_at_100": 0.12435,
+                "recall_at_1000": 0.62867,
+                "recall_at_20": 0.04102,
+                "recall_at_3": 0.01123,
+                "recall_at_5": 0.015110000000000002,
+                "main_score": 0.013839999999999998
+            }
+        ]
+    }
+}
diff --git a/results/AdrienB134__llm2vec-croissant-mntp/external/AmazonReviewsClassification.json b/results/AdrienB134__llm2vec-croissant-mntp/external/AmazonReviewsClassification.json
@@ -0,0 +1,19 @@
+{
+    "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d",
+    "task_name": "AmazonReviewsClassification",
+    "evaluation_time": null,
+    "mteb_version": null,
+    "scores": {
+        "test": [
+            {
+                "hf_subset": "fra-Latn",
+                "languages": [
+                    "None"
+                ],
+                "accuracy": 0.32661999999999997,
+                "f1": 0.32443152253731844,
+                "main_score": 0.32661999999999997
+            }
+        ]
+    }
+}
diff --git a/results/AdrienB134__llm2vec-croissant-mntp/external/BSARDRetrieval.json b/results/AdrienB134__llm2vec-croissant-mntp/external/BSARDRetrieval.json
@@ -0,0 +1,52 @@
+{
+    "dataset_revision": "5effa1b9b5fa3b0f9e12523e6e43e5f86a6e6d59",
+    "task_name": "BSARDRetrieval",
+    "evaluation_time": null,
+    "mteb_version": null,
+    "scores": {
+        "test": [
+            {
+                "hf_subset": "fra-Latn",
+                "languages": [
+                    "fra-Latn"
+                ],
+                "map_at_1": 0.0,
+                "map_at_10": 0.0,
+                "map_at_100": 0.00062,
+                "map_at_1000": 0.00077,
+                "map_at_20": 0.0,
+                "map_at_3": 0.0,
+                "map_at_5": 0.0,
+                "mrr_at_1": 0.0,
+                "mrr_at_10": 0.0,
+                "mrr_at_100": 0.00062,
+                "mrr_at_1000": 0.00077,
+                "mrr_at_20": 0.0,
+                "mrr_at_3": 0.0,
+                "mrr_at_5": 0.0,
+                "ndcg_at_1": 0.0,
+                "ndcg_at_10": 0.0,
+                "ndcg_at_100": 0.00484,
+                "ndcg_at_1000": 0.01054,
+                "ndcg_at_20": 0.0,
+                "ndcg_at_3": 0.0,
+                "ndcg_at_5": 0.0,
+                "precision_at_1": 0.0,
+                "precision_at_10": 0.0,
+                "precision_at_100": 0.00027,
+                "precision_at_1000": 8e-05,
+                "precision_at_20": 0.0,
+                "precision_at_3": 0.0,
+                "precision_at_5": 0.0,
+                "recall_at_1": 0.0,
+                "recall_at_10": 0.0,
+                "recall_at_100": 0.02703,
+                "recall_at_1000": 0.07658,
+                "recall_at_20": 0.0,
+                "recall_at_3": 0.0,
+                "recall_at_5": 0.0,
+                "main_score": 0.02703
+            }
+        ]
+    }
+}
diff --git a/results/AdrienB134__llm2vec-croissant-mntp/external/HALClusteringS2S.json b/results/AdrienB134__llm2vec-croissant-mntp/external/HALClusteringS2S.json
@@ -0,0 +1,18 @@
+{
+    "dataset_revision": "e06ebbbb123f8144bef1a5d18796f3dec9ae2915",
+    "task_name": "HALClusteringS2S",
+    "evaluation_time": null,
+    "mteb_version": null,
+    "scores": {
+        "test": [
+            {
+                "hf_subset": "fra-Latn",
+                "languages": [
+                    "fra-Latn"
+                ],
+                "v_measure": 0.1377084465510841,
+                "main_score": 0.1377084465510841
+            }
+        ]
+    }
+}
diff --git a/results/AdrienB134__llm2vec-croissant-mntp/external/MLSUMClusteringP2P.json b/results/AdrienB134__llm2vec-croissant-mntp/external/MLSUMClusteringP2P.json
@@ -0,0 +1,18 @@
+{
+    "dataset_revision": "b5d54f8f3b61ae17845046286940f03c6bc79bc7",
+    "task_name": "MLSUMClusteringP2P",
+    "evaluation_time": null,
+    "mteb_version": null,
+    "scores": {
+        "test": [
+            {
+                "hf_subset": "fra-Latn",
+                "languages": [
+                    "None"
+                ],
+                "v_measure": 0.4543375637260015,
+                "main_score": 0.4543375637260015
+            }
+        ]
+    }
+}
diff --git a/results/AdrienB134__llm2vec-croissant-mntp/external/MLSUMClusteringS2S.json b/results/AdrienB134__llm2vec-croissant-mntp/external/MLSUMClusteringS2S.json
@@ -0,0 +1,18 @@
+{
+    "dataset_revision": "b5d54f8f3b61ae17845046286940f03c6bc79bc7",
+    "task_name": "MLSUMClusteringS2S",
+    "evaluation_time": null,
+    "mteb_version": null,
+    "scores": {
+        "test": [
+            {
+                "hf_subset": "fra-Latn",
+                "languages": [
+                    "None"
+                ],
+                "v_measure": 0.45205646487969753,
+                "main_score": 0.45205646487969753
+            }
+        ]
+    }
+}