AFQMC |
['cmn'] |
STS |
s2s |
|
None |
None |
AILACasedocs |
['eng'] |
Retrieval |
p2p |
[Legal, Written] |
None |
{'test': {'average_document_length': 26948.344086021505, 'average_query_length': 3038.42, 'num_documents': 186, 'num_queries': 50, 'average_relevant_docs_per_query': 3.9}} |
AILAStatutes |
['eng'] |
Retrieval |
p2p |
[Legal, Written] |
None |
{'test': {'average_document_length': 1973.6341463414635, 'average_query_length': 3038.42, 'num_documents': 82, 'num_queries': 50, 'average_relevant_docs_per_query': 4.34}} |
AJGT (Alomari et al., 2017) |
['ara'] |
Classification |
s2s |
[Social, Written] |
{'train': 1800} |
{'train': 46.81} |
ARCChallenge (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
{'test': 1172} |
{'test': {'average_document_length': 30.94235294117647, 'average_query_length': 131.56569965870307, 'num_documents': 9350, 'num_queries': 1172, 'average_relevant_docs_per_query': 1.0}} |
ATEC |
['cmn'] |
STS |
s2s |
|
None |
None |
AfriSentiClassification |
['amh', 'arq', 'ary', 'hau', 'ibo', 'kin', 'pcm', 'por', 'swa', 'tso', 'twi', 'yor'] |
Classification |
s2s |
[Social, Written] |
{'test': 2048} |
{'test': 74.77} |
AfriSentiLangClassification |
['amh', 'arq', 'ary', 'hau', 'ibo', 'kin', 'pcm', 'por', 'swa', 'tso', 'twi', 'yor'] |
Classification |
s2s |
[Social, Written] |
{'test': 5754} |
{'test': 77.84} |
AllegroReviews |
['pol'] |
Classification |
s2s |
|
{'test': 1006} |
{'test': 477.2} |
AlloProfClusteringP2P.v2 (Lefebvre-Brossard et al., 2023) |
['fra'] |
Clustering |
p2p |
[Encyclopaedic, Written] |
{'test': 2556} |
{'test': 3539.5} |
AlloProfClusteringS2S.v2 (Lefebvre-Brossard et al., 2023) |
['fra'] |
Clustering |
s2s |
[Encyclopaedic, Written] |
{'test': 2556} |
{'test': 32.8} |
AlloprofReranking (Lefebvre-Brossard et al., 2023) |
['fra'] |
Reranking |
s2p |
[Web, Academic, Written] |
{'test': 2316, 'train': 9264} |
None |
AlloprofRetrieval (Lefebvre-Brossard et al., 2023) |
['fra'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
{'train': 2048} |
{'test': {'average_document_length': 3505.705399061033, 'average_query_length': 170.71286701208982, 'num_documents': 2556, 'num_queries': 2316, 'average_relevant_docs_per_query': 1.0}} |
AlphaNLI (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
{'test': 1532} |
{'test': {'average_document_length': 43.42647308646886, 'average_query_length': 103.05483028720627, 'num_documents': 241347, 'num_queries': 1532, 'average_relevant_docs_per_query': 1.0}} |
AmazonCounterfactualClassification |
['deu', 'eng', 'jpn'] |
Classification |
s2s |
[Reviews, Written] |
{'validation': 335, 'test': 670} |
{'validation': 109.2, 'test': 106.1} |
AmazonPolarityClassification (Julian McAuley, 2013) |
['eng'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 400000} |
{'test': 431.4} |
AmazonReviewsClassification (Phillip Keung, 2020) |
['cmn', 'deu', 'eng', 'fra', 'jpn', 'spa'] |
Classification |
s2s |
[Reviews, Written] |
{'validation': 30000, 'test': 30000} |
{'validation': 159.2, 'test': 160.4} |
AngryTweetsClassification (Pauli et al., 2021) |
['dan'] |
Classification |
s2s |
[Social, Written] |
{'test': 1050} |
{'test': 156.1} |
AppsRetrieval (Dan Hendrycks, 2021) |
['eng', 'python'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 1000} |
{'test': {'average_document_length': 575.0086708499715, 'average_query_length': 1669.8284196547145, 'num_documents': 8765, 'num_queries': 3765, 'average_relevant_docs_per_query': 1.0}} |
ArEntail (Obeidat et al., 2024) |
['ara'] |
PairClassification |
s2s |
[News, Written] |
{'test': 1000} |
{'test': 65.77} |
ArXivHierarchicalClusteringP2P |
['eng'] |
Clustering |
p2p |
[Academic, Written] |
{'test': 2048} |
{'test': {'num_samples': 2048, 'average_text_length': 1008.439453125, 'average_labels_per_text': 1.46337890625, 'unique_labels': 129, 'labels': {'cs': {'count': 356}, 'math': {'count': 381}, 'OC': {'count': 11}, 'hep-lat': {'count': 13}, 'hep': {'count': 98}, 'astro-ph': {'count': 213}, 'eess': {'count': 76}, 'quant-ph': {'count': 135}, 'DC': {'count': 5}, 'cond-mat': {'count': 274}, 'hep-th': {'count': 66}, 'SP': {'count': 33}, 'hep-ph': {'count': 69}, 'FA': {'count': 6}, 'nucl-th': {'count': 17}, 'q-bio': {'count': 80}, 'HE': {'count': 22}, 'HC': {'count': 2}, 'stat': {'count': 60}, 'ML': {'count': 16}, 'IV': {'count': 13}, 'stat-mech': {'count': 47}, 'DS': {'count': 14}, 'ME': {'count': 12}, 'CC': {'count': 2}, 'mtrl-sci': {'count': 22}, 'PE': {'count': 16}, 'NT': {'count': 11}, 'SC': {'count': 6}, 'AG': {'count': 13}, 'physics': {'count': 81}, 'ins-det': {'count': 9}, 'GA': {'count': 18}, 'BM': {'count': 6}, 'GN': {'count': 17}, 'NA': {'count': 15}, 'app-ph': {'count': 7}, 'RT': {'count': 6}, 'other': {'count': 37}, 'soft': {'count': 15}, 'CO': {'count': 33}, 'supr-con': {'count': 21}, 'chem-ph': {'count': 3}, 'DM': {'count': 2}, 'MN': {'count': 12}, 'q-fin': {'count': 27}, 'PM': {'count': 2}, 'AP': {'count': 27}, 'gr-qc': {'count': 15}, 'quant-gas': {'count': 8}, 'mes-hall': {'count': 33}, 'IT': {'count': 19}, 'SI': {'count': 6}, 'SG': {'count': 3}, 'bio-ph': {'count': 2}, 'SR': {'count': 16}, 'soc-ph': {'count': 5}, 'hep-ex': {'count': 15}, 'DG': {'count': 11}, 'NE': {'count': 5}, 'CR': {'count': 6}, 'CL': {'count': 12}, 'RM': {'count': 3}, 'econ': {'count': 17}, 'nlin': {'count': 5}, 'PS': {'count': 1}, 'LG': {'count': 26}, 'QA': {'count': 9}, 'str-el': {'count': 26}, 'CV': {'count': 34}, 'MF': {'count': 6}, 'IM': {'count': 7}, 'EM': {'count': 6}, 'TH': {'count': 5}, 'PR': {'count': 20}, 'AT': {'count': 4}, 'OA': {'count': 4}, 'CP': {'count': 6}, 'LO': {'count': 14}, 'flu-dyn': {'count': 6}, 'atom-ph': {'count': 8}, 'class-ph': {'count': 1}, 'SY': {'count': 20}, 'IR': {'count': 1}, 'plasm-ph': {'count': 8}, 'CE': {'count': 2}, 'AO': {'count': 1}, 'comp-ph': {'count': 3}, 'optics': {'count': 12}, 'MG': {'count': 4}, 'ST': {'count': 6}, 'nucl-ex': {'count': 6}, 'CY': {'count': 9}, 'ao-ph': {'count': 2}, 'DB': {'count': 1}, 'math-ph': {'count': 10}, 'NC': {'count': 13}, 'GT': {'count': 11}, 'TO': {'count': 2}, 'AI': {'count': 9}, 'NI': {'count': 2}, 'gen-ph': {'count': 4}, 'OT': {'count': 4}, 'SD': {'count': 2}, 'dis-nn': {'count': 4}, 'RO': {'count': 7}, 'CA': {'count': 6}, 'FL': {'count': 1}, 'SE': {'count': 5}, 'EP': {'count': 9}, 'hist-ph': {'count': 1}, 'QM': {'count': 9}, 'ed-ph': {'count': 2}, 'GR': {'count': 4}, 'MS': {'count': 1}, 'CD': {'count': 1}, 'ET': {'count': 1}, 'acc-ph': {'count': 5}, 'AC': {'count': 2}, 'OH': {'count': 1}, 'EC': {'count': 2}, 'DL': {'count': 1}, 'AS': {'count': 3}, 'geo-ph': {'count': 2}, 'CG': {'count': 3}, 'CB': {'count': 1}, 'AR': {'count': 1}, 'TR': {'count': 1}, 'atm-clus': {'count': 1}}}} |
ArXivHierarchicalClusteringS2S |
['eng'] |
Clustering |
p2p |
[Academic, Written] |
{'test': 2048} |
{'test': 1009.98} |
ArguAna (Boteva et al., 2016) |
['eng'] |
Retrieval |
s2p |
[Medical, Written] |
None |
{'test': {'average_document_length': 1029.2327645838136, 'average_query_length': 1192.7204836415362, 'num_documents': 8674, 'num_queries': 1406, 'average_relevant_docs_per_query': 1.0}} |
ArguAna-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 1060.702674659903, 'average_query_length': 1224.8022759601706, 'num_documents': 8674, 'num_queries': 1406, 'average_relevant_docs_per_query': 1.0}} |
ArmenianParaphrasePC (Arthur Malajyan, 2020) |
['hye'] |
PairClassification |
s2s |
[News, Written] |
{'train': 4023, 'test': 1470} |
{'train': 243.81, 'test': 241.37} |
ArxivClassification (He et al., 2019) |
['eng'] |
Classification |
s2s |
[Academic, Written] |
{'test': 2048} |
{} |
AskUbuntuDupQuestions |
['eng'] |
Reranking |
s2s |
|
{'test': 2255} |
{'test': {'num_samples': 375, 'num_positive': 375, 'num_negative': 375, 'avg_query_len': 50.205333333333336, 'avg_positive_len': 6.013333333333334, 'avg_negative_len': 13.986666666666666}} |
Assin2RTE (Real et al., 2020) |
['por'] |
PairClassification |
s2s |
[Written] |
{'test': 2448} |
{'test': 53.55} |
Assin2STS (Real et al., 2020) |
['por'] |
STS |
s2s |
[Written] |
{'test': 2448} |
{'test': 53.55} |
BIOSSES (Soğancıoğlu et al., 2017) |
['eng'] |
STS |
s2s |
|
None |
None |
BQ (Shitao Xiao, 2024) |
['cmn'] |
STS |
s2s |
|
None |
None |
BSARDRetrieval (Louis et al., 2022) |
['fra'] |
Retrieval |
s2p |
[Legal, Spoken] |
{'test': 222} |
{'test': {'average_document_length': 880.2900631820793, 'average_query_length': 144.77027027027026, 'num_documents': 22633, 'num_queries': 222, 'average_relevant_docs_per_query': 1.0}} |
BUCC.v2 |
['cmn', 'deu', 'eng', 'fra', 'rus'] |
BitextMining |
s2s |
[Written] |
{'test': 641684} |
{'test': 101.3} |
Banking77Classification |
['eng'] |
Classification |
s2s |
[Written] |
{'test': 3080} |
{'test': 54.2} |
BelebeleRetrieval (Lucas Bandarkar, 2023) |
['acm', 'afr', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'azj', 'bam', 'ben', 'bod', 'bul', 'cat', 'ceb', 'ces', 'ckb', 'dan', 'deu', 'ell', 'eng', 'est', 'eus', 'fin', 'fra', 'fuv', 'gaz', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kac', 'kan', 'kat', 'kaz', 'kea', 'khk', 'khm', 'kin', 'kir', 'kor', 'lao', 'lin', 'lit', 'lug', 'luo', 'lvs', 'mal', 'mar', 'mkd', 'mlt', 'mri', 'mya', 'nld', 'nob', 'npi', 'nso', 'nya', 'ory', 'pan', 'pbt', 'pes', 'plt', 'pol', 'por', 'ron', 'rus', 'shn', 'sin', 'slk', 'slv', 'sna', 'snd', 'som', 'sot', 'spa', 'srp', 'ssw', 'sun', 'swe', 'swh', 'tam', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tsn', 'tso', 'tur', 'ukr', 'urd', 'uzn', 'vie', 'war', 'wol', 'xho', 'yor', 'zho', 'zsm', 'zul'] |
Retrieval |
s2p |
[Web, News, Written] |
{'test': 103500} |
{'test': {'average_document_length': 487.3975028339728, 'average_query_length': 74.49551684802204, 'num_documents': 183488, 'num_queries': 338378, 'average_relevant_docs_per_query': 1.0, 'hf_subset_descriptive_stats': {'acm_Arab-acm_Arab': {'average_document_length': 416.4733606557377, 'average_query_length': 55.84, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'acm_Arab-eng_Latn': {'average_document_length': 416.4733606557377, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-acm_Arab': {'average_document_length': 475.51024590163934, 'average_query_length': 55.84, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'afr_Latn-afr_Latn': {'average_document_length': 503.6659836065574, 'average_query_length': 78.04555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'afr_Latn-eng_Latn': {'average_document_length': 503.6659836065574, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-afr_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 78.04555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'als_Latn-als_Latn': {'average_document_length': 534.016393442623, 'average_query_length': 76.13555555555556, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'als_Latn-eng_Latn': {'average_document_length': 534.016393442623, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-als_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 76.13555555555556, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'amh_Ethi-amh_Ethi': {'average_document_length': 319.8688524590164, 'average_query_length': 49.16111111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'amh_Ethi-eng_Latn': {'average_document_length': 319.8688524590164, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-amh_Ethi': {'average_document_length': 475.51024590163934, 'average_query_length': 49.16111111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'apc_Arab-apc_Arab': {'average_document_length': 393.0553278688525, 'average_query_length': 55.85777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'apc_Arab-eng_Latn': {'average_document_length': 393.0553278688525, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-apc_Arab': {'average_document_length': 475.51024590163934, 'average_query_length': 55.85777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'arb_Arab-arb_Arab': {'average_document_length': 421.96311475409834, 'average_query_length': 58.55, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'arb_Arab-eng_Latn': {'average_document_length': 421.96311475409834, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-arb_Arab': {'average_document_length': 475.51024590163934, 'average_query_length': 58.55, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'arb_Latn-arb_Latn': {'average_document_length': 555.6188524590164, 'average_query_length': 67.02444444444444, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'arb_Latn-eng_Latn': {'average_document_length': 555.6188524590164, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-arb_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 67.02444444444444, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ars_Arab-ars_Arab': {'average_document_length': 422.5553278688525, 'average_query_length': 56.43222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ars_Arab-eng_Latn': {'average_document_length': 422.5553278688525, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ars_Arab': {'average_document_length': 475.51024590163934, 'average_query_length': 56.43222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ary_Arab-ary_Arab': {'average_document_length': 411.1475409836066, 'average_query_length': 66.01893095768374, 'num_documents': 488, 'num_queries': 898, 'average_relevant_docs_per_query': 1.0}, 'ary_Arab-eng_Latn': {'average_document_length': 411.1475409836066, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ary_Arab': {'average_document_length': 475.51024590163934, 'average_query_length': 66.01893095768374, 'num_documents': 488, 'num_queries': 898, 'average_relevant_docs_per_query': 1.0}, 'arz_Arab-arz_Arab': {'average_document_length': 412.05122950819674, 'average_query_length': 57.14111111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'arz_Arab-eng_Latn': {'average_document_length': 412.05122950819674, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-arz_Arab': {'average_document_length': 475.51024590163934, 'average_query_length': 57.14111111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'asm_Beng-asm_Beng': {'average_document_length': 458.5983606557377, 'average_query_length': 68.26, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'asm_Beng-eng_Latn': {'average_document_length': 458.5983606557377, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-asm_Beng': {'average_document_length': 475.51024590163934, 'average_query_length': 68.26, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'azj_Latn-azj_Latn': {'average_document_length': 519.6127049180328, 'average_query_length': 73.51222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'azj_Latn-eng_Latn': {'average_document_length': 519.6127049180328, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-azj_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 73.51222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'bam_Latn-bam_Latn': {'average_document_length': 457.3114754098361, 'average_query_length': 72.34222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'bam_Latn-eng_Latn': {'average_document_length': 457.3114754098361, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-bam_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 72.34222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ben_Beng-ben_Beng': {'average_document_length': 467.7745901639344, 'average_query_length': 69.48444444444445, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ben_Beng-eng_Latn': {'average_document_length': 467.7745901639344, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ben_Beng': {'average_document_length': 475.51024590163934, 'average_query_length': 69.48444444444445, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ben_Latn-ben_Latn': {'average_document_length': 522.8934426229508, 'average_query_length': 74.78777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ben_Latn-eng_Latn': {'average_document_length': 522.8934426229508, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ben_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 74.78777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'bod_Tibt-bod_Tibt': {'average_document_length': 533.3872950819672, 'average_query_length': 86.90222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'bod_Tibt-eng_Latn': {'average_document_length': 533.3872950819672, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-bod_Tibt': {'average_document_length': 475.51024590163934, 'average_query_length': 86.90222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'bul_Cyrl-bul_Cyrl': {'average_document_length': 496.97131147540983, 'average_query_length': 72.89, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'bul_Cyrl-eng_Latn': {'average_document_length': 496.97131147540983, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-bul_Cyrl': {'average_document_length': 475.51024590163934, 'average_query_length': 72.89, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'cat_Latn-cat_Latn': {'average_document_length': 525.4467213114754, 'average_query_length': 75.40666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'cat_Latn-eng_Latn': {'average_document_length': 525.4467213114754, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-cat_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 75.40666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ceb_Latn-ceb_Latn': {'average_document_length': 570.8483606557377, 'average_query_length': 81.19666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ceb_Latn-eng_Latn': {'average_document_length': 570.8483606557377, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ceb_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 81.19666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ces_Latn-ces_Latn': {'average_document_length': 461.0061475409836, 'average_query_length': 67.73333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ces_Latn-eng_Latn': {'average_document_length': 461.0061475409836, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ces_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 67.73333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ckb_Arab-ckb_Arab': {'average_document_length': 462.98770491803276, 'average_query_length': 71.04555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ckb_Arab-eng_Latn': {'average_document_length': 462.98770491803276, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ckb_Arab': {'average_document_length': 475.51024590163934, 'average_query_length': 71.04555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'dan_Latn-dan_Latn': {'average_document_length': 489.4856557377049, 'average_query_length': 72.96888888888888, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'dan_Latn-eng_Latn': {'average_document_length': 489.4856557377049, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-dan_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 72.96888888888888, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'deu_Latn-deu_Latn': {'average_document_length': 555.1659836065573, 'average_query_length': 75.32444444444444, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'deu_Latn-eng_Latn': {'average_document_length': 555.1659836065573, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-deu_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 75.32444444444444, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ell_Grek-ell_Grek': {'average_document_length': 568.3872950819672, 'average_query_length': 86.92666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ell_Grek-eng_Latn': {'average_document_length': 568.3872950819672, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ell_Grek': {'average_document_length': 475.51024590163934, 'average_query_length': 86.92666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-eng_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'est_Latn-est_Latn': {'average_document_length': 467.1475409836066, 'average_query_length': 67.55888888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'est_Latn-eng_Latn': {'average_document_length': 467.1475409836066, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-est_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 67.55888888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eus_Latn-eus_Latn': {'average_document_length': 506.19262295081967, 'average_query_length': 74.44777777777777, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eus_Latn-eng_Latn': {'average_document_length': 506.19262295081967, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-eus_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 74.44777777777777, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'fin_Latn-fin_Latn': {'average_document_length': 507.5, 'average_query_length': 72.50888888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'fin_Latn-eng_Latn': {'average_document_length': 507.5, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-fin_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 72.50888888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'fra_Latn-fra_Latn': {'average_document_length': 564.8401639344262, 'average_query_length': 90.54222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'fra_Latn-eng_Latn': {'average_document_length': 564.8401639344262, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-fra_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 90.54222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'fuv_Latn-fuv_Latn': {'average_document_length': 443.4733606557377, 'average_query_length': 58.42111111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'fuv_Latn-eng_Latn': {'average_document_length': 443.4733606557377, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-fuv_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 58.42111111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'gaz_Latn-gaz_Latn': {'average_document_length': 563.5389344262295, 'average_query_length': 85.93222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'gaz_Latn-eng_Latn': {'average_document_length': 563.5389344262295, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-gaz_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 85.93222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'grn_Latn-grn_Latn': {'average_document_length': 480.3299180327869, 'average_query_length': 75.10666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'grn_Latn-eng_Latn': {'average_document_length': 480.3299180327869, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-grn_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 75.10666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'guj_Gujr-guj_Gujr': {'average_document_length': 458.1885245901639, 'average_query_length': 62.25666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'guj_Gujr-eng_Latn': {'average_document_length': 458.1885245901639, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-guj_Gujr': {'average_document_length': 475.51024590163934, 'average_query_length': 62.25666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hat_Latn-hat_Latn': {'average_document_length': 438.6700819672131, 'average_query_length': 70.64666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hat_Latn-eng_Latn': {'average_document_length': 438.6700819672131, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hat_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 70.64666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hau_Latn-hau_Latn': {'average_document_length': 507.24590163934425, 'average_query_length': 85.8488888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hau_Latn-eng_Latn': {'average_document_length': 507.24590163934425, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hau_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 85.8488888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'heb_Hebr-heb_Hebr': {'average_document_length': 371.36270491803276, 'average_query_length': 55.135555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'heb_Hebr-eng_Latn': {'average_document_length': 371.36270491803276, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-heb_Hebr': {'average_document_length': 475.51024590163934, 'average_query_length': 55.135555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hin_Deva-hin_Deva': {'average_document_length': 473.55737704918033, 'average_query_length': 72.61777777777777, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hin_Deva-eng_Latn': {'average_document_length': 473.55737704918033, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hin_Deva': {'average_document_length': 475.51024590163934, 'average_query_length': 72.61777777777777, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hin_Latn-hin_Latn': {'average_document_length': 541.7315573770492, 'average_query_length': 74.81222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hin_Latn-eng_Latn': {'average_document_length': 541.7315573770492, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hin_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 74.81222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hrv_Latn-hrv_Latn': {'average_document_length': 469.202868852459, 'average_query_length': 68.83555555555556, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hrv_Latn-eng_Latn': {'average_document_length': 469.202868852459, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hrv_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 68.83555555555556, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hun_Latn-hun_Latn': {'average_document_length': 501.1946721311475, 'average_query_length': 74.40555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hun_Latn-eng_Latn': {'average_document_length': 501.1946721311475, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hun_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 74.40555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hye_Armn-hye_Armn': {'average_document_length': 527.5102459016393, 'average_query_length': 75.42555555555556, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hye_Armn-eng_Latn': {'average_document_length': 527.5102459016393, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-hye_Armn': {'average_document_length': 475.51024590163934, 'average_query_length': 75.42555555555556, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ibo_Latn-ibo_Latn': {'average_document_length': 482.3483606557377, 'average_query_length': 72.51501668520578, 'num_documents': 488, 'num_queries': 899, 'average_relevant_docs_per_query': 1.0011123470522802}, 'ibo_Latn-eng_Latn': {'average_document_length': 482.3483606557377, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ibo_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 72.51501668520578, 'num_documents': 488, 'num_queries': 899, 'average_relevant_docs_per_query': 1.0011123470522802}, 'ilo_Latn-ilo_Latn': {'average_document_length': 574.6987704918033, 'average_query_length': 85.7611111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ilo_Latn-eng_Latn': {'average_document_length': 574.6987704918033, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ilo_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 85.7611111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ind_Latn-ind_Latn': {'average_document_length': 516.0573770491803, 'average_query_length': 82.10555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ind_Latn-eng_Latn': {'average_document_length': 516.0573770491803, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ind_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 82.10555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'isl_Latn-isl_Latn': {'average_document_length': 470.73975409836066, 'average_query_length': 77.27333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'isl_Latn-eng_Latn': {'average_document_length': 470.73975409836066, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-isl_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 77.27333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ita_Latn-ita_Latn': {'average_document_length': 560.9344262295082, 'average_query_length': 83.49777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ita_Latn-eng_Latn': {'average_document_length': 560.9344262295082, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ita_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 83.49777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'jav_Latn-jav_Latn': {'average_document_length': 494.1803278688525, 'average_query_length': 78.60666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'jav_Latn-eng_Latn': {'average_document_length': 494.1803278688525, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-jav_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 78.60666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'jpn_Jpan-jpn_Jpan': {'average_document_length': 207.74795081967213, 'average_query_length': 35.79, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'jpn_Jpan-eng_Latn': {'average_document_length': 207.74795081967213, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-jpn_Jpan': {'average_document_length': 475.51024590163934, 'average_query_length': 35.79, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'kac_Latn-kac_Latn': {'average_document_length': 605.2889344262295, 'average_query_length': 98.64182424916574, 'num_documents': 488, 'num_queries': 899, 'average_relevant_docs_per_query': 1.0}, 'kac_Latn-eng_Latn': {'average_document_length': 605.2889344262295, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kac_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 98.64182424916574, 'num_documents': 488, 'num_queries': 899, 'average_relevant_docs_per_query': 1.0}, 'kan_Knda-kan_Knda': {'average_document_length': 498.9077868852459, 'average_query_length': 72.13666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'kan_Knda-eng_Latn': {'average_document_length': 498.9077868852459, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kan_Knda': {'average_document_length': 475.51024590163934, 'average_query_length': 72.13666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'kat_Geor-kat_Geor': {'average_document_length': 521.7766393442623, 'average_query_length': 74.81444444444445, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'kat_Geor-eng_Latn': {'average_document_length': 521.7766393442623, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kat_Geor': {'average_document_length': 475.51024590163934, 'average_query_length': 74.81444444444445, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'kaz_Cyrl-kaz_Cyrl': {'average_document_length': 488.2110655737705, 'average_query_length': 70.75666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'kaz_Cyrl-eng_Latn': {'average_document_length': 488.2110655737705, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kaz_Cyrl': {'average_document_length': 475.51024590163934, 'average_query_length': 70.75666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'kea_Latn-kea_Latn': {'average_document_length': 471.5594262295082, 'average_query_length': 75.94111111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'kea_Latn-eng_Latn': {'average_document_length': 471.5594262295082, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kea_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 75.94111111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'khk_Cyrl-khk_Cyrl': {'average_document_length': 496.655737704918, 'average_query_length': 73.33444444444444, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'khk_Cyrl-eng_Latn': {'average_document_length': 496.655737704918, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-khk_Cyrl': {'average_document_length': 475.51024590163934, 'average_query_length': 73.33444444444444, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'khm_Khmr-khm_Khmr': {'average_document_length': 562.4139344262295, 'average_query_length': 75.74888888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'khm_Khmr-eng_Latn': {'average_document_length': 562.4139344262295, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-khm_Khmr': {'average_document_length': 475.51024590163934, 'average_query_length': 75.74888888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'kin_Latn-kin_Latn': {'average_document_length': 529.2520491803278, 'average_query_length': 79.89655172413794, 'num_documents': 488, 'num_queries': 899, 'average_relevant_docs_per_query': 1.0011123470522802}, 'kin_Latn-eng_Latn': {'average_document_length': 529.2520491803278, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kin_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 79.89655172413794, 'num_documents': 488, 'num_queries': 899, 'average_relevant_docs_per_query': 1.0011123470522802}, 'kir_Cyrl-kir_Cyrl': {'average_document_length': 487.80737704918033, 'average_query_length': 74.42333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'kir_Cyrl-eng_Latn': {'average_document_length': 487.80737704918033, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kir_Cyrl': {'average_document_length': 475.51024590163934, 'average_query_length': 74.42333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'kor_Hang-kor_Hang': {'average_document_length': 241.32991803278688, 'average_query_length': 35.257777777777775, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'kor_Hang-eng_Latn': {'average_document_length': 241.32991803278688, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-kor_Hang': {'average_document_length': 475.51024590163934, 'average_query_length': 35.257777777777775, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'lao_Laoo-lao_Laoo': {'average_document_length': 471.6495901639344, 'average_query_length': 63.31333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'lao_Laoo-eng_Latn': {'average_document_length': 471.6495901639344, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lao_Laoo': {'average_document_length': 475.51024590163934, 'average_query_length': 63.31333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'lin_Latn-lin_Latn': {'average_document_length': 512.9016393442623, 'average_query_length': 81.56681514476615, 'num_documents': 488, 'num_queries': 898, 'average_relevant_docs_per_query': 1.0022271714922049}, 'lin_Latn-eng_Latn': {'average_document_length': 512.9016393442623, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lin_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 81.56681514476615, 'num_documents': 488, 'num_queries': 898, 'average_relevant_docs_per_query': 1.0022271714922049}, 'lit_Latn-lit_Latn': {'average_document_length': 474.0553278688525, 'average_query_length': 68.69888888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'lit_Latn-eng_Latn': {'average_document_length': 474.0553278688525, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lit_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 68.69888888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'lug_Latn-lug_Latn': {'average_document_length': 485.73975409836066, 'average_query_length': 78.52057842046719, 'num_documents': 488, 'num_queries': 899, 'average_relevant_docs_per_query': 1.0011123470522802}, 'lug_Latn-eng_Latn': {'average_document_length': 485.73975409836066, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lug_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 78.52057842046719, 'num_documents': 488, 'num_queries': 899, 'average_relevant_docs_per_query': 1.0011123470522802}, 'luo_Latn-luo_Latn': {'average_document_length': 497.53688524590166, 'average_query_length': 73.14333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'luo_Latn-eng_Latn': {'average_document_length': 497.53688524590166, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-luo_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 73.14333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'lvs_Latn-lvs_Latn': {'average_document_length': 487.21311475409834, 'average_query_length': 69.97888888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'lvs_Latn-eng_Latn': {'average_document_length': 487.21311475409834, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-lvs_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 69.97888888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'mal_Mlym-mal_Mlym': {'average_document_length': 539.2827868852459, 'average_query_length': 80.69222222222223, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'mal_Mlym-eng_Latn': {'average_document_length': 539.2827868852459, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mal_Mlym': {'average_document_length': 475.51024590163934, 'average_query_length': 80.69222222222223, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'mar_Deva-mar_Deva': {'average_document_length': 478.67418032786884, 'average_query_length': 68.62625139043382, 'num_documents': 488, 'num_queries': 899, 'average_relevant_docs_per_query': 1.0011123470522802}, 'mar_Deva-eng_Latn': {'average_document_length': 478.67418032786884, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mar_Deva': {'average_document_length': 475.51024590163934, 'average_query_length': 68.62625139043382, 'num_documents': 488, 'num_queries': 899, 'average_relevant_docs_per_query': 1.0011123470522802}, 'mkd_Cyrl-mkd_Cyrl': {'average_document_length': 495.77868852459017, 'average_query_length': 74.01333333333334, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'mkd_Cyrl-eng_Latn': {'average_document_length': 495.77868852459017, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mkd_Cyrl': {'average_document_length': 475.51024590163934, 'average_query_length': 74.01333333333334, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'mlt_Latn-mlt_Latn': {'average_document_length': 525.8995901639345, 'average_query_length': 75.00444444444445, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'mlt_Latn-eng_Latn': {'average_document_length': 525.8995901639345, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mlt_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 75.00444444444445, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'mri_Latn-mri_Latn': {'average_document_length': 526.0860655737705, 'average_query_length': 81.71444444444444, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'mri_Latn-eng_Latn': {'average_document_length': 526.0860655737705, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mri_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 81.71444444444444, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'mya_Mymr-mya_Mymr': {'average_document_length': 590.389344262295, 'average_query_length': 89.28333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'mya_Mymr-eng_Latn': {'average_document_length': 590.389344262295, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-mya_Mymr': {'average_document_length': 475.51024590163934, 'average_query_length': 89.28333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'nld_Latn-nld_Latn': {'average_document_length': 529.1434426229508, 'average_query_length': 75.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'nld_Latn-eng_Latn': {'average_document_length': 529.1434426229508, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nld_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 75.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'nob_Latn-nob_Latn': {'average_document_length': 479.13729508196724, 'average_query_length': 71.04555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'nob_Latn-eng_Latn': {'average_document_length': 479.13729508196724, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nob_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 71.04555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'npi_Deva-npi_Deva': {'average_document_length': 456.9590163934426, 'average_query_length': 66.89666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'npi_Deva-eng_Latn': {'average_document_length': 456.9590163934426, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-npi_Deva': {'average_document_length': 475.51024590163934, 'average_query_length': 66.89666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'npi_Latn-npi_Latn': {'average_document_length': 515.9815573770492, 'average_query_length': 71.89666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'npi_Latn-eng_Latn': {'average_document_length': 515.9815573770492, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-npi_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 71.89666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'nso_Latn-nso_Latn': {'average_document_length': 548.0225409836065, 'average_query_length': 86.77444444444444, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'nso_Latn-eng_Latn': {'average_document_length': 548.0225409836065, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nso_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 86.77444444444444, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'nya_Latn-nya_Latn': {'average_document_length': 532.3934426229508, 'average_query_length': 90.78777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'nya_Latn-eng_Latn': {'average_document_length': 532.3934426229508, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-nya_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 90.78777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ory_Orya-ory_Orya': {'average_document_length': 487.78483606557376, 'average_query_length': 72.95777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ory_Orya-eng_Latn': {'average_document_length': 487.78483606557376, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ory_Orya': {'average_document_length': 475.51024590163934, 'average_query_length': 72.95777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'pan_Guru-pan_Guru': {'average_document_length': 480.2438524590164, 'average_query_length': 73.29777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'pan_Guru-eng_Latn': {'average_document_length': 480.2438524590164, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pan_Guru': {'average_document_length': 475.51024590163934, 'average_query_length': 73.29777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'pbt_Arab-pbt_Arab': {'average_document_length': 453.3299180327869, 'average_query_length': 67.67111111111112, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'pbt_Arab-eng_Latn': {'average_document_length': 453.3299180327869, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pbt_Arab': {'average_document_length': 475.51024590163934, 'average_query_length': 67.67111111111112, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'pes_Arab-pes_Arab': {'average_document_length': 448.84631147540983, 'average_query_length': 64.75111111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'pes_Arab-eng_Latn': {'average_document_length': 448.84631147540983, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pes_Arab': {'average_document_length': 475.51024590163934, 'average_query_length': 64.75111111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'plt_Latn-plt_Latn': {'average_document_length': 581.2745901639345, 'average_query_length': 94.99555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'plt_Latn-eng_Latn': {'average_document_length': 581.2745901639345, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-plt_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 94.99555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'pol_Latn-pol_Latn': {'average_document_length': 504.0409836065574, 'average_query_length': 74.09777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'pol_Latn-eng_Latn': {'average_document_length': 504.0409836065574, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-pol_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 74.09777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'por_Latn-por_Latn': {'average_document_length': 517.2827868852459, 'average_query_length': 78.11666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'por_Latn-eng_Latn': {'average_document_length': 517.2827868852459, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-por_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 78.11666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ron_Latn-ron_Latn': {'average_document_length': 534.8668032786885, 'average_query_length': 78.74222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ron_Latn-eng_Latn': {'average_document_length': 534.8668032786885, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ron_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 78.74222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'rus_Cyrl-rus_Cyrl': {'average_document_length': 520.1700819672132, 'average_query_length': 83.16333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'rus_Cyrl-eng_Latn': {'average_document_length': 520.1700819672132, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-rus_Cyrl': {'average_document_length': 475.51024590163934, 'average_query_length': 83.16333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'shn_Mymr-shn_Mymr': {'average_document_length': 676.172131147541, 'average_query_length': 75.90222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'shn_Mymr-eng_Latn': {'average_document_length': 676.172131147541, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-shn_Mymr': {'average_document_length': 475.51024590163934, 'average_query_length': 75.90222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'sin_Latn-sin_Latn': {'average_document_length': 590.7889344262295, 'average_query_length': 94.46666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'sin_Latn-eng_Latn': {'average_document_length': 590.7889344262295, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sin_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 94.46666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'sin_Sinh-sin_Sinh': {'average_document_length': 478.66803278688525, 'average_query_length': 69.91777777777777, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'sin_Sinh-eng_Latn': {'average_document_length': 478.66803278688525, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sin_Sinh': {'average_document_length': 475.51024590163934, 'average_query_length': 69.91777777777777, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'slk_Latn-slk_Latn': {'average_document_length': 476.7766393442623, 'average_query_length': 68.5411111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'slk_Latn-eng_Latn': {'average_document_length': 476.7766393442623, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-slk_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 68.5411111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'slv_Latn-slv_Latn': {'average_document_length': 474.84631147540983, 'average_query_length': 68.79888888888888, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'slv_Latn-eng_Latn': {'average_document_length': 474.84631147540983, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-slv_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 68.79888888888888, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'sna_Latn-sna_Latn': {'average_document_length': 532.5860655737705, 'average_query_length': 81.30700778642937, 'num_documents': 488, 'num_queries': 899, 'average_relevant_docs_per_query': 1.0}, 'sna_Latn-eng_Latn': {'average_document_length': 532.5860655737705, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sna_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 81.30700778642937, 'num_documents': 488, 'num_queries': 899, 'average_relevant_docs_per_query': 1.0}, 'snd_Arab-snd_Arab': {'average_document_length': 431.48770491803276, 'average_query_length': 63.42333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'snd_Arab-eng_Latn': {'average_document_length': 431.48770491803276, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-snd_Arab': {'average_document_length': 475.51024590163934, 'average_query_length': 63.42333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'som_Latn-som_Latn': {'average_document_length': 542.0737704918033, 'average_query_length': 90.95777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'som_Latn-eng_Latn': {'average_document_length': 542.0737704918033, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-som_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 90.95777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'sot_Latn-sot_Latn': {'average_document_length': 573.3258196721312, 'average_query_length': 83.13111111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'sot_Latn-eng_Latn': {'average_document_length': 573.3258196721312, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sot_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 83.13111111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'spa_Latn-spa_Latn': {'average_document_length': 564.3319672131148, 'average_query_length': 82.16, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'spa_Latn-eng_Latn': {'average_document_length': 564.3319672131148, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-spa_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 82.16, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'srp_Cyrl-srp_Cyrl': {'average_document_length': 471.84631147540983, 'average_query_length': 67.49833147942158, 'num_documents': 488, 'num_queries': 899, 'average_relevant_docs_per_query': 1.0011123470522802}, 'srp_Cyrl-eng_Latn': {'average_document_length': 471.84631147540983, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-srp_Cyrl': {'average_document_length': 475.51024590163934, 'average_query_length': 67.49833147942158, 'num_documents': 488, 'num_queries': 899, 'average_relevant_docs_per_query': 1.0011123470522802}, 'ssw_Latn-ssw_Latn': {'average_document_length': 535.0901639344262, 'average_query_length': 81.09777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ssw_Latn-eng_Latn': {'average_document_length': 535.0901639344262, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ssw_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 81.09777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'sun_Latn-sun_Latn': {'average_document_length': 495.3032786885246, 'average_query_length': 78.16, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'sun_Latn-eng_Latn': {'average_document_length': 495.3032786885246, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-sun_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 78.16, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'swe_Latn-swe_Latn': {'average_document_length': 480.6803278688525, 'average_query_length': 68.67666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'swe_Latn-eng_Latn': {'average_document_length': 480.6803278688525, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-swe_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 68.67666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'swh_Latn-swh_Latn': {'average_document_length': 499.0983606557377, 'average_query_length': 80.56, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'swh_Latn-eng_Latn': {'average_document_length': 499.0983606557377, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-swh_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 80.56, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tam_Taml-tam_Taml': {'average_document_length': 555.5286885245902, 'average_query_length': 81.12777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tam_Taml-eng_Latn': {'average_document_length': 555.5286885245902, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tam_Taml': {'average_document_length': 475.51024590163934, 'average_query_length': 81.12777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tel_Telu-tel_Telu': {'average_document_length': 481.5245901639344, 'average_query_length': 72.18777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tel_Telu-eng_Latn': {'average_document_length': 481.5245901639344, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tel_Telu': {'average_document_length': 475.51024590163934, 'average_query_length': 72.18777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tgk_Cyrl-tgk_Cyrl': {'average_document_length': 528.516393442623, 'average_query_length': 74.28111111111112, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tgk_Cyrl-eng_Latn': {'average_document_length': 528.516393442623, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tgk_Cyrl': {'average_document_length': 475.51024590163934, 'average_query_length': 74.28111111111112, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tgl_Latn-tgl_Latn': {'average_document_length': 597.6270491803278, 'average_query_length': 82.34555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tgl_Latn-eng_Latn': {'average_document_length': 597.6270491803278, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tgl_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 82.34555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tha_Thai-tha_Thai': {'average_document_length': 456.1659836065574, 'average_query_length': 59.46666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tha_Thai-eng_Latn': {'average_document_length': 456.1659836065574, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tha_Thai': {'average_document_length': 475.51024590163934, 'average_query_length': 59.46666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tir_Ethi-tir_Ethi': {'average_document_length': 327.6967213114754, 'average_query_length': 51.99888888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tir_Ethi-eng_Latn': {'average_document_length': 327.6967213114754, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tir_Ethi': {'average_document_length': 475.51024590163934, 'average_query_length': 51.99888888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tsn_Latn-tsn_Latn': {'average_document_length': 591.7131147540983, 'average_query_length': 87.12777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tsn_Latn-eng_Latn': {'average_document_length': 591.7131147540983, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tsn_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 87.12777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tso_Latn-tso_Latn': {'average_document_length': 569.6475409836065, 'average_query_length': 91.69444444444444, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tso_Latn-eng_Latn': {'average_document_length': 569.6475409836065, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tso_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 91.69444444444444, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tur_Latn-tur_Latn': {'average_document_length': 489.0409836065574, 'average_query_length': 71.56222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'tur_Latn-eng_Latn': {'average_document_length': 489.0409836065574, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-tur_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 71.56222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ukr_Cyrl-ukr_Cyrl': {'average_document_length': 488.11475409836066, 'average_query_length': 72.08222222222223, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ukr_Cyrl-eng_Latn': {'average_document_length': 488.11475409836066, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-ukr_Cyrl': {'average_document_length': 475.51024590163934, 'average_query_length': 72.08222222222223, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'urd_Arab-urd_Arab': {'average_document_length': 470.452868852459, 'average_query_length': 70.52666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'urd_Arab-eng_Latn': {'average_document_length': 470.452868852459, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-urd_Arab': {'average_document_length': 475.51024590163934, 'average_query_length': 70.52666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'urd_Latn-urd_Latn': {'average_document_length': 590.5348360655738, 'average_query_length': 90.07, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'urd_Latn-eng_Latn': {'average_document_length': 590.5348360655738, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-urd_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 90.07, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'uzn_Latn-uzn_Latn': {'average_document_length': 539.2418032786885, 'average_query_length': 77.61333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'uzn_Latn-eng_Latn': {'average_document_length': 539.2418032786885, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-uzn_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 77.61333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'vie_Latn-vie_Latn': {'average_document_length': 499.8360655737705, 'average_query_length': 73.05333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'vie_Latn-eng_Latn': {'average_document_length': 499.8360655737705, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-vie_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 73.05333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'war_Latn-war_Latn': {'average_document_length': 592.8688524590164, 'average_query_length': 86.07555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'war_Latn-eng_Latn': {'average_document_length': 592.8688524590164, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-war_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 86.07555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'wol_Latn-wol_Latn': {'average_document_length': 456.9795081967213, 'average_query_length': 70.60555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'wol_Latn-eng_Latn': {'average_document_length': 456.9795081967213, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-wol_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 70.60555555555555, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'xho_Latn-xho_Latn': {'average_document_length': 505.0655737704918, 'average_query_length': 78.50333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'xho_Latn-eng_Latn': {'average_document_length': 505.0655737704918, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-xho_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 78.50333333333333, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'yor_Latn-yor_Latn': {'average_document_length': 459.5204918032787, 'average_query_length': 68.64, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'yor_Latn-eng_Latn': {'average_document_length': 459.5204918032787, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-yor_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 68.64, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'zho_Hans-zho_Hans': {'average_document_length': 159.76024590163934, 'average_query_length': 21.747777777777777, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'zho_Hans-eng_Latn': {'average_document_length': 159.76024590163934, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zho_Hans': {'average_document_length': 475.51024590163934, 'average_query_length': 21.747777777777777, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'zho_Hant-zho_Hant': {'average_document_length': 149.77254098360655, 'average_query_length': 21.07888888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'zho_Hant-eng_Latn': {'average_document_length': 149.77254098360655, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zho_Hant': {'average_document_length': 475.51024590163934, 'average_query_length': 21.07888888888889, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'zsm_Latn-zsm_Latn': {'average_document_length': 528.9139344262295, 'average_query_length': 78.92444444444445, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'zsm_Latn-eng_Latn': {'average_document_length': 528.9139344262295, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zsm_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 78.92444444444445, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'zul_Latn-zul_Latn': {'average_document_length': 532.9713114754098, 'average_query_length': 76.0411111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'zul_Latn-eng_Latn': {'average_document_length': 532.9713114754098, 'average_query_length': 77.34777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'eng_Latn-zul_Latn': {'average_document_length': 475.51024590163934, 'average_query_length': 76.0411111111111, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'arb_Arab-arb_Latn': {'average_document_length': 421.96311475409834, 'average_query_length': 67.02444444444444, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'arb_Latn-arb_Arab': {'average_document_length': 555.6188524590164, 'average_query_length': 58.55, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ben_Beng-ben_Latn': {'average_document_length': 467.7745901639344, 'average_query_length': 74.78777777777778, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'ben_Latn-ben_Beng': {'average_document_length': 522.8934426229508, 'average_query_length': 69.48444444444445, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hin_Deva-hin_Latn': {'average_document_length': 473.55737704918033, 'average_query_length': 74.81222222222222, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'hin_Latn-hin_Deva': {'average_document_length': 541.7315573770492, 'average_query_length': 72.61777777777777, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'npi_Deva-npi_Latn': {'average_document_length': 456.9590163934426, 'average_query_length': 71.89666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'npi_Latn-npi_Deva': {'average_document_length': 515.9815573770492, 'average_query_length': 66.89666666666666, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'sin_Sinh-sin_Latn': {'average_document_length': 478.66803278688525, 'average_query_length': 94.46666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'sin_Latn-sin_Sinh': {'average_document_length': 590.7889344262295, 'average_query_length': 69.91777777777777, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'urd_Arab-urd_Latn': {'average_document_length': 470.452868852459, 'average_query_length': 90.07, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}, 'urd_Latn-urd_Arab': {'average_document_length': 590.5348360655738, 'average_query_length': 70.52666666666667, 'num_documents': 488, 'num_queries': 900, 'average_relevant_docs_per_query': 1.0}}}} |
BengaliDocumentClassification |
['ben'] |
Classification |
s2s |
[News, Written] |
{'test': 2048} |
{'test': 1658.1} |
BengaliHateSpeechClassification (Karim et al., 2020) |
['ben'] |
Classification |
s2s |
[News, Written] |
{'train': 3418} |
{'train': 103.42} |
BengaliSentimentAnalysis (Sazzed et al., 2020) |
['ben'] |
Classification |
s2s |
[Reviews, Written] |
{'train': 11807} |
{'train': 69.66} |
BibleNLPBitextMining (Akerman et al., 2023) |
['aai', 'aak', 'aau', 'aaz', 'abt', 'abx', 'aby', 'acf', 'acr', 'acu', 'adz', 'aer', 'aey', 'agd', 'agg', 'agm', 'agn', 'agr', 'agt', 'agu', 'aia', 'aii', 'aka', 'ake', 'alp', 'alq', 'als', 'aly', 'ame', 'amf', 'amk', 'amm', 'amn', 'amo', 'amp', 'amr', 'amu', 'amx', 'anh', 'anv', 'aoi', 'aoj', 'aom', 'aon', 'apb', 'ape', 'apn', 'apr', 'apu', 'apw', 'apz', 'arb', 'are', 'arl', 'arn', 'arp', 'asm', 'aso', 'ata', 'atb', 'atd', 'atg', 'att', 'auc', 'aui', 'auy', 'avt', 'awb', 'awk', 'awx', 'azb', 'azg', 'azz', 'bao', 'bba', 'bbb', 'bbr', 'bch', 'bco', 'bdd', 'bea', 'bef', 'bel', 'ben', 'beo', 'beu', 'bgs', 'bgt', 'bhg', 'bhl', 'big', 'bjk', 'bjp', 'bjr', 'bjv', 'bjz', 'bkd', 'bki', 'bkq', 'bkx', 'blw', 'blz', 'bmh', 'bmk', 'bmr', 'bmu', 'bnp', 'boa', 'boj', 'bon', 'box', 'bpr', 'bps', 'bqc', 'bqp', 'bre', 'bsj', 'bsn', 'bsp', 'bss', 'buk', 'bus', 'bvd', 'bvr', 'bxh', 'byr', 'byx', 'bzd', 'bzh', 'bzj', 'caa', 'cab', 'cac', 'caf', 'cak', 'cao', 'cap', 'car', 'cav', 'cax', 'cbc', 'cbi', 'cbk', 'cbr', 'cbs', 'cbt', 'cbu', 'cbv', 'cco', 'ceb', 'cek', 'ces', 'cgc', 'cha', 'chd', 'chf', 'chk', 'chq', 'chz', 'cjo', 'cjv', 'ckb', 'cle', 'clu', 'cme', 'cmn', 'cni', 'cnl', 'cnt', 'cof', 'con', 'cop', 'cot', 'cpa', 'cpb', 'cpc', 'cpu', 'cpy', 'crn', 'crx', 'cso', 'csy', 'cta', 'cth', 'ctp', 'ctu', 'cub', 'cuc', 'cui', 'cuk', 'cut', 'cux', 'cwe', 'cya', 'daa', 'dad', 'dah', 'dan', 'ded', 'deu', 'dgc', 'dgr', 'dgz', 'dhg', 'dif', 'dik', 'dji', 'djk', 'djr', 'dob', 'dop', 'dov', 'dwr', 'dww', 'dwy', 'ebk', 'eko', 'emi', 'emp', 'eng', 'enq', 'epo', 'eri', 'ese', 'esk', 'etr', 'ewe', 'faa', 'fai', 'far', 'ffm', 'for', 'fra', 'fue', 'fuf', 'fuh', 'gah', 'gai', 'gam', 'gaw', 'gdn', 'gdr', 'geb', 'gfk', 'ghs', 'glk', 'gmv', 'gng', 'gnn', 'gnw', 'gof', 'grc', 'gub', 'guh', 'gui', 'guj', 'gul', 'gum', 'gun', 'guo', 'gup', 'gux', 'gvc', 'gvf', 'gvn', 'gvs', 'gwi', 'gym', 'gyr', 'hat', 'hau', 'haw', 'hbo', 'hch', 'heb', 'heg', 'hin', 'hix', 'hla', 'hlt', 'hmo', 'hns', 'hop', 'hot', 'hrv', 'hto', 'hub', 'hui', 'hun', 'hus', 'huu', 'huv', 'hvn', 'ian', 'ign', 'ikk', 'ikw', 'ilo', 'imo', 'inb', 'ind', 'ino', 'iou', 'ipi', 'isn', 'ita', 'iws', 'ixl', 'jac', 'jae', 'jao', 'jic', 'jid', 'jiv', 'jni', 'jpn', 'jvn', 'kan', 'kaq', 'kbc', 'kbh', 'kbm', 'kbq', 'kdc', 'kde', 'kdl', 'kek', 'ken', 'kew', 'kgf', 'kgk', 'kgp', 'khs', 'khz', 'kik', 'kiw', 'kiz', 'kje', 'kjs', 'kkc', 'kkl', 'klt', 'klv', 'kmg', 'kmh', 'kmk', 'kmo', 'kms', 'kmu', 'kne', 'knf', 'knj', 'knv', 'kos', 'kpf', 'kpg', 'kpj', 'kpr', 'kpw', 'kpx', 'kqa', 'kqc', 'kqf', 'kql', 'kqw', 'ksd', 'ksj', 'ksr', 'ktm', 'kto', 'kud', 'kue', 'kup', 'kvg', 'kvn', 'kwd', 'kwf', 'kwi', 'kwj', 'kyc', 'kyf', 'kyg', 'kyq', 'kyz', 'kze', 'lac', 'lat', 'lbb', 'lbk', 'lcm', 'leu', 'lex', 'lgl', 'lid', 'lif', 'lin', 'lit', 'llg', 'lug', 'luo', 'lww', 'maa', 'maj', 'mal', 'mam', 'maq', 'mar', 'mau', 'mav', 'maz', 'mbb', 'mbc', 'mbh', 'mbj', 'mbl', 'mbs', 'mbt', 'mca', 'mcb', 'mcd', 'mcf', 'mco', 'mcp', 'mcq', 'mcr', 'mdy', 'med', 'mee', 'mek', 'meq', 'met', 'meu', 'mgc', 'mgh', 'mgw', 'mhl', 'mib', 'mic', 'mie', 'mig', 'mih', 'mil', 'mio', 'mir', 'mit', 'miz', 'mjc', 'mkj', 'mkl', 'mkn', 'mks', 'mle', 'mlh', 'mlp', 'mmo', 'mmx', 'mna', 'mop', 'mox', 'mph', 'mpj', 'mpm', 'mpp', 'mps', 'mpt', 'mpx', 'mqb', 'mqj', 'msb', 'msc', 'msk', 'msm', 'msy', 'mti', 'mto', 'mux', 'muy', 'mva', 'mvn', 'mwc', 'mwe', 'mwf', 'mwp', 'mxb', 'mxp', 'mxq', 'mxt', 'mya', 'myk', 'myu', 'myw', 'myy', 'mzz', 'nab', 'naf', 'nak', 'nas', 'nbq', 'nca', 'nch', 'ncj', 'ncl', 'ncu', 'ndg', 'ndj', 'nfa', 'ngp', 'ngu', 'nhe', 'nhg', 'nhi', 'nho', 'nhr', 'nhu', 'nhw', 'nhy', 'nif', 'nii', 'nin', 'nko', 'nld', 'nlg', 'nna', 'nnq', 'noa', 'nop', 'not', 'nou', 'npi', 'npl', 'nsn', 'nss', 'ntj', 'ntp', 'ntu', 'nuy', 'nvm', 'nwi', 'nya', 'nys', 'nyu', 'obo', 'okv', 'omw', 'ong', 'ons', 'ood', 'opm', 'ory', 'ote', 'otm', 'otn', 'otq', 'ots', 'pab', 'pad', 'pah', 'pan', 'pao', 'pes', 'pib', 'pio', 'pir', 'piu', 'pjt', 'pls', 'plu', 'pma', 'poe', 'poh', 'poi', 'pol', 'pon', 'por', 'poy', 'ppo', 'prf', 'pri', 'ptp', 'ptu', 'pwg', 'qub', 'quc', 'quf', 'quh', 'qul', 'qup', 'qvc', 'qve', 'qvh', 'qvm', 'qvn', 'qvs', 'qvw', 'qvz', 'qwh', 'qxh', 'qxn', 'qxo', 'rai', 'reg', 'rgu', 'rkb', 'rmc', 'rmy', 'ron', 'roo', 'rop', 'row', 'rro', 'ruf', 'rug', 'rus', 'rwo', 'sab', 'san', 'sbe', 'sbk', 'sbs', 'seh', 'sey', 'sgb', 'sgz', 'shj', 'shp', 'sim', 'sja', 'sll', 'smk', 'snc', 'snn', 'snp', 'snx', 'sny', 'som', 'soq', 'soy', 'spa', 'spl', 'spm', 'spp', 'sps', 'spy', 'sri', 'srm', 'srn', 'srp', 'srq', 'ssd', 'ssg', 'ssx', 'stp', 'sua', 'sue', 'sus', 'suz', 'swe', 'swh', 'swp', 'sxb', 'tac', 'taj', 'tam', 'tav', 'taw', 'tbc', 'tbf', 'tbg', 'tbo', 'tbz', 'tca', 'tcs', 'tcz', 'tdt', 'tee', 'tel', 'ter', 'tet', 'tew', 'tfr', 'tgk', 'tgl', 'tgo', 'tgp', 'tha', 'tif', 'tim', 'tiw', 'tiy', 'tke', 'tku', 'tlf', 'tmd', 'tna', 'tnc', 'tnk', 'tnn', 'tnp', 'toc', 'tod', 'tof', 'toj', 'ton', 'too', 'top', 'tos', 'tpa', 'tpi', 'tpt', 'tpz', 'trc', 'tsw', 'ttc', 'tte', 'tuc', 'tue', 'tuf', 'tuo', 'tur', 'tvk', 'twi', 'txq', 'txu', 'tzj', 'tzo', 'ubr', 'ubu', 'udu', 'uig', 'ukr', 'uli', 'ulk', 'upv', 'ura', 'urb', 'urd', 'uri', 'urt', 'urw', 'usa', 'usp', 'uvh', 'uvl', 'vid', 'vie', 'viv', 'vmy', 'waj', 'wal', 'wap', 'wat', 'wbi', 'wbp', 'wed', 'wer', 'wim', 'wiu', 'wiv', 'wmt', 'wmw', 'wnc', 'wnu', 'wol', 'wos', 'wrk', 'wro', 'wrs', 'wsk', 'wuv', 'xav', 'xbi', 'xed', 'xla', 'xnn', 'xon', 'xsi', 'xtd', 'xtm', 'yaa', 'yad', 'yal', 'yap', 'yaq', 'yby', 'ycn', 'yka', 'yle', 'yml', 'yon', 'yor', 'yrb', 'yre', 'yss', 'yuj', 'yut', 'yuw', 'yva', 'zaa', 'zab', 'zac', 'zad', 'zai', 'zaj', 'zam', 'zao', 'zap', 'zar', 'zas', 'zat', 'zav', 'zaw', 'zca', 'zga', 'zia', 'ziw', 'zlm', 'zos', 'zpc', 'zpl', 'zpm', 'zpo', 'zpq', 'zpu', 'zpv', 'zpz', 'zsr', 'ztq', 'zty', 'zyp'] |
BitextMining |
s2s |
[Religious, Written] |
{'train': 256} |
{'train': 120} |
BigPatentClustering.v2 (Eva Sharma and Chen Li and Lu Wang, 2019) |
['eng'] |
Clustering |
p2p |
[Legal, Written] |
{'test': 2048} |
{'test': 30995.5} |
BiorxivClusteringP2P.v2 |
['eng'] |
Clustering |
p2p |
[Academic, Written] |
{'test': 2151} |
{'test': 1664.0} |
BiorxivClusteringS2S.v2 |
['eng'] |
Clustering |
s2s |
[Academic, Written] |
{'test': 2151} |
{'test': 101.7} |
BlurbsClusteringP2P.v2 (Steffen Remus, 2019) |
['deu'] |
Clustering |
p2p |
[Fiction, Written] |
{'test': 2048} |
{'test': 664.09} |
BlurbsClusteringS2S.v2 (Steffen Remus, 2019) |
['deu'] |
Clustering |
s2s |
[Fiction, Written] |
{'test': 2048} |
{'test': 23.02} |
BornholmBitextMining |
['dan'] |
BitextMining |
s2s |
[Web, Social, Fiction, Written] |
{'test': 500} |
{'test': {'average_sentence1_length': 49.834, 'average_sentence2_length': 38.888, 'num_samples': 500}} |
BrazilianToxicTweetsClassification (Joao Augusto Leite and Diego F. Silva and Kalina Bontcheva and Carolina Scarton, 2020) |
['por'] |
MultilabelClassification |
s2s |
[Constructed, Written] |
{'test': 2048} |
{'test': 85.05} |
BrightRetrieval (Su et al., 2024) |
['eng'] |
Retrieval |
s2p |
[Non-fiction] |
{'standard': 1334914, 'long': 7048} |
{'standard': 800.3994729248476, 'long': 46527.35839954597} |
BulgarianStoreReviewSentimentClassfication (Georgieva-Trifonova et al., 2018) |
['bul'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 182} |
{'test': 316.7} |
CBD |
['pol'] |
Classification |
s2s |
[Written, Social] |
{'test': 1000} |
{'test': 93.2} |
CDSC-E |
['pol'] |
PairClassification |
s2s |
[Written] |
None |
None |
CDSC-R |
['pol'] |
STS |
s2s |
[Web, Written] |
{'test': 1000} |
{'test': 75.24} |
CEDRClassification (Sboev et al., 2021) |
['rus'] |
MultilabelClassification |
s2s |
[Web, Social, Blog, Written] |
{'test': 1882} |
{'test': {'average_text_length': 91.20563230605738, 'average_label_per_text': 0.620616365568544, 'num_samples': 1882, 'unique_labels': 6, 'labels': {'null': {'count': 734}, '3': {'count': 141}, '2': {'count': 170}, '1': {'count': 379}, '0': {'count': 353}, '4': {'count': 125}}}} |
CLSClusteringP2P.v2 (Yudong Li, 2022) |
['cmn'] |
Clustering |
p2p |
[Academic, Written] |
{'test': 2048} |
{} |
CLSClusteringS2S.v2 (Yudong Li, 2022) |
['cmn'] |
Clustering |
s2s |
[Academic, Written] |
{'test': 2048} |
{} |
CMedQAv1-reranking (Zhang et al., 2017) |
['cmn'] |
Reranking |
s2s |
[Medical, Written] |
{'test': 2000} |
{'test': 165} |
CMedQAv2-reranking (S. Zhang, 2018) |
['cmn'] |
Reranking |
s2s |
|
None |
None |
COIRCodeSearchNetRetrieval (Husain et al., 2019) |
['go', 'java', 'javascript', 'php', 'python', 'ruby'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 1000} |
{'test': {'python': {'average_document_length': 466.546, 'average_query_length': 862.842, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'average_document_length': 186.018, 'average_query_length': 1415.632, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'go': {'average_document_length': 125.213, 'average_query_length': 563.729, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'average_document_length': 313.818, 'average_query_length': 577.634, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'java': {'average_document_length': 420.287, 'average_query_length': 690.36, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'php': {'average_document_length': 162.119, 'average_query_length': 712.129, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}}} |
CPUSpeedTask |
['eng'] |
Speed |
s2s |
[Fiction, Written] |
{'test': 1} |
{'test': 3591} |
CQADupstackAndroidRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 593.701974084703, 'average_query_length': 51.76680972818312, 'num_documents': 22998, 'num_queries': 699, 'average_relevant_docs_per_query': 2.4263233190271816}} |
CQADupstackEnglishRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 482.4710971880361, 'average_query_length': 48.32993630573248, 'num_documents': 40221, 'num_queries': 1570, 'average_relevant_docs_per_query': 2.3980891719745223}} |
CQADupstackGamingRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 488.74152888457206, 'average_query_length': 48.772413793103446, 'num_documents': 45301, 'num_queries': 1595, 'average_relevant_docs_per_query': 1.418808777429467}} |
CQADupstackGisRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 1012.167813587693, 'average_query_length': 52.2, 'num_documents': 37637, 'num_queries': 885, 'average_relevant_docs_per_query': 1.2587570621468926}} |
CQADupstackMathematicaRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 1153.4967375037413, 'average_query_length': 48.90547263681592, 'num_documents': 16705, 'num_queries': 804, 'average_relevant_docs_per_query': 1.6890547263681592}} |
CQADupstackPhysicsRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 818.6476145735463, 'average_query_length': 53.36477382098171, 'num_documents': 38316, 'num_queries': 1039, 'average_relevant_docs_per_query': 1.8604427333974976}} |
CQADupstackProgrammersRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
[Programming, Written, Non-fiction] |
None |
{'test': {'average_document_length': 1055.7033814022875, 'average_query_length': 55.1837899543379, 'num_documents': 32176, 'num_queries': 876, 'average_relevant_docs_per_query': 1.9121004566210045}} |
CQADupstackStatsRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 1055.1668598736662, 'average_query_length': 56.31748466257669, 'num_documents': 42269, 'num_queries': 652, 'average_relevant_docs_per_query': 1.4003067484662577}} |
CQADupstackTexRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 1297.09043177285, 'average_query_length': 46.935306262904334, 'num_documents': 68184, 'num_queries': 2906, 'average_relevant_docs_per_query': 1.7735719201651754}} |
CQADupstackUnixRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 1004.8120383267908, 'average_query_length': 50.32369402985075, 'num_documents': 47382, 'num_queries': 1072, 'average_relevant_docs_per_query': 1.5792910447761195}} |
CQADupstackWebmastersRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 707.3635736857225, 'average_query_length': 51.93478260869565, 'num_documents': 17405, 'num_queries': 506, 'average_relevant_docs_per_query': 2.7569169960474307}} |
CQADupstackWordpressRetrieval (Hoogeveen et al., 2015) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 1122.7690155333814, 'average_query_length': 48.7264325323475, 'num_documents': 48605, 'num_queries': 541, 'average_relevant_docs_per_query': 1.3752310536044363}} |
CSFDCZMovieReviewSentimentClassification (Michal Štefánik, 2023) |
['ces'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 2048} |
{'test': 386.5} |
CSFDSKMovieReviewSentimentClassification (Michal Štefánik, 2023) |
['slk'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 2048} |
{'test': 366.2} |
CTKFactsNLI (Ullrich et al., 2023) |
['ces'] |
PairClassification |
s2s |
[News, Written] |
{'test': 375, 'validation': 305} |
{'test': 225.62, 'validation': 219.32} |
CUADAffiliateLicenseLicenseeLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 198} |
{'test': 484.11} |
CUADAffiliateLicenseLicensorLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 88} |
{'test': 633.4} |
CUADAntiAssignmentLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 1172} |
{'test': 340.81} |
CUADAuditRightsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 1216} |
{'test': 337.14} |
CUADCapOnLiabilityLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 1246} |
{'test': 375.74} |
CUADChangeOfControlLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 416} |
{'test': 391.96} |
CUADCompetitiveRestrictionExceptionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 220} |
{'test': 433.04} |
CUADCovenantNotToSueLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 308} |
{'test': 402.97} |
CUADEffectiveDateLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 236} |
{'test': 277.62} |
CUADExclusivityLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 762} |
{'test': 369.17} |
CUADExpirationDateLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 876} |
{'test': 309.27} |
CUADGoverningLawLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 876} |
{'test': 289.87} |
CUADIPOwnershipAssignmentLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 576} |
{'test': 414.0} |
CUADInsuranceLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 1030} |
{'test': 365.54} |
CUADIrrevocableOrPerpetualLicenseLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 280} |
{'test': 473.4} |
CUADJointIPOwnershipLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 192} |
{'test': 374.17} |
CUADLicenseGrantLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 1396} |
{'test': 409.89} |
CUADLiquidatedDamagesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 220} |
{'test': 351.76} |
CUADMinimumCommitmentLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 772} |
{'test': 364.16} |
CUADMostFavoredNationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 64} |
{'test': 418.75} |
CUADNoSolicitOfCustomersLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 84} |
{'test': 392.89} |
CUADNoSolicitOfEmployeesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 142} |
{'test': 417.94} |
CUADNonCompeteLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 442} |
{'test': 383.2} |
CUADNonDisparagementLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 100} |
{'test': 403.08} |
CUADNonTransferableLicenseLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 542} |
{'test': 399.16} |
CUADNoticePeriodToTerminateRenewalLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 222} |
{'test': 354.85} |
CUADPostTerminationServicesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 808} |
{'test': 422.53} |
CUADPriceRestrictionsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 46} |
{'test': 324.71} |
CUADRenewalTermLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 386} |
{'test': 340.87} |
CUADRevenueProfitSharingLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 774} |
{'test': 371.55} |
CUADRofrRofoRofnLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 690} |
{'test': 395.46} |
CUADSourceCodeEscrowLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 118} |
{'test': 399.18} |
CUADTerminationForConvenienceLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 430} |
{'test': 326.3} |
CUADThirdPartyBeneficiaryLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 68} |
{'test': 261.04} |
CUADUncappedLiabilityLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 294} |
{'test': 441.04} |
CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 48} |
{'test': 368.08} |
CUADVolumeRestrictionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 322} |
{'test': 306.27} |
CUADWarrantyDurationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 320} |
{'test': 352.27} |
CanadaTaxCourtOutcomesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 244} |
{'test': 622.6} |
CataloniaTweetClassification |
['cat', 'spa'] |
Classification |
s2s |
[Social, Government, Written] |
{'validation': 2000, 'test': 2000} |
{'validation': 202.61, 'test': 200.49} |
ClimateFEVER (Thomas Diggelmann, 2021) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 538.241873443325, 'average_query_length': 123.39934853420195, 'num_documents': 5416593, 'num_queries': 1535, 'average_relevant_docs_per_query': 3.0495114006514656}} |
CmedqaRetrieval |
['cmn'] |
Retrieval |
s2p |
|
None |
{'dev': {'average_document_length': 307.7710222897771, 'average_query_length': 48.470367591897976, 'num_documents': 100001, 'num_queries': 3999, 'average_relevant_docs_per_query': 1.86271567891973}} |
Cmnli |
['cmn'] |
PairClassification |
s2s |
|
None |
None |
CodeEditSearchRetrieval (Niklas Muennighoff, 2023) |
['c', 'c++', 'go', 'java', 'javascript', 'php', 'python', 'ruby', 'rust', 'scala', 'shell', 'swift', 'typescript'] |
Retrieval |
p2p |
[Programming, Written] |
{'train': 13000} |
{'train': {'python': {'average_document_length': 597.592, 'average_query_length': 69.519, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'average_document_length': 582.554, 'average_query_length': 56.88, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'typescript': {'average_document_length': 580.877, 'average_query_length': 60.092, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'go': {'average_document_length': 548.498, 'average_query_length': 70.797, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'average_document_length': 518.895, 'average_query_length': 66.9, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'java': {'average_document_length': 620.332, 'average_query_length': 62.984, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'php': {'average_document_length': 545.452, 'average_query_length': 61.927, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'c': {'average_document_length': 475.868, 'average_query_length': 97.588, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'c++': {'average_document_length': 544.446, 'average_query_length': 114.48, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'rust': {'average_document_length': 609.548, 'average_query_length': 67.503, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'swift': {'average_document_length': 574.62, 'average_query_length': 57.279, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'scala': {'average_document_length': 495.485, 'average_query_length': 64.833, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'shell': {'average_document_length': 486.519, 'average_query_length': 72.059, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}}} |
CodeFeedbackMT (Tianyu Zheng, 2024) |
['eng'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 1000} |
{'test': {'average_document_length': 1467.879728243677, 'average_query_length': 4425.522256533855, 'num_documents': 66383, 'num_queries': 13277, 'average_relevant_docs_per_query': 1.0}} |
CodeFeedbackST (Xiangyang Li, 2024) |
['eng'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 1000} |
{'test': {'average_document_length': 1521.3317148588733, 'average_query_length': 724.2441704465598, 'num_documents': 156526, 'num_queries': 31306, 'average_relevant_docs_per_query': 1.0}} |
CodeSearchNetCCRetrieval (Xiangyang Li, 2024) |
['go', 'java', 'javascript', 'php', 'python', 'ruby'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 1000} |
{'test': {'python': {'average_document_length': 388.31577184555965, 'average_query_length': 551.7934039415471, 'num_documents': 280652, 'num_queries': 14918, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'average_document_length': 276.0730050152605, 'average_query_length': 443.70707991491946, 'num_documents': 65201, 'num_queries': 3291, 'average_relevant_docs_per_query': 1.0}, 'go': {'average_document_length': 185.0307932251621, 'average_query_length': 233.76803742920464, 'num_documents': 182735, 'num_queries': 8122, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'average_document_length': 214.86204146730464, 'average_query_length': 266.8731165741475, 'num_documents': 27588, 'num_queries': 1261, 'average_relevant_docs_per_query': 1.0}, 'java': {'average_document_length': 281.96280259139183, 'average_query_length': 342.5341853035144, 'num_documents': 181061, 'num_queries': 10955, 'average_relevant_docs_per_query': 1.0}, 'php': {'average_document_length': 268.9752569556027, 'average_query_length': 336.62194947909234, 'num_documents': 268237, 'num_queries': 14014, 'average_relevant_docs_per_query': 1.0}}} |
CodeSearchNetRetrieval (Husain et al., 2019) |
['go', 'java', 'javascript', 'php', 'python', 'ruby'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 1000} |
{'test': {'python': {'average_document_length': 862.842, 'average_query_length': 466.546, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'javascript': {'average_document_length': 1415.632, 'average_query_length': 186.018, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'go': {'average_document_length': 563.729, 'average_query_length': 125.213, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'ruby': {'average_document_length': 577.634, 'average_query_length': 313.818, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'java': {'average_document_length': 420.287, 'average_query_length': 690.36, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}, 'php': {'average_document_length': 712.129, 'average_query_length': 162.119, 'num_documents': 1000, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}}} |
CodeTransOceanContest (Weixiang Yan, 2023) |
['c++', 'python'] |
Retrieval |
p2p |
[Programming, Written] |
|
{'test': {'average_document_length': 1528.9156746031747, 'average_query_length': 1012.1131221719457, 'num_documents': 1008, 'num_queries': 221, 'average_relevant_docs_per_query': 1.0}} |
CodeTransOceanDL (Weixiang Yan, 2023) |
['python'] |
Retrieval |
p2p |
[Programming, Written] |
|
{'test': {'average_document_length': 1479.0735294117646, 'average_query_length': 1867.6222222222223, 'num_documents': 816, 'num_queries': 180, 'average_relevant_docs_per_query': 1.0}} |
ContractNLIConfidentialityOfAgreementLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 82} |
{'test': 473.17} |
ContractNLIExplicitIdentificationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 109} |
{'test': 506.12} |
ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 139} |
{'test': 525.75} |
ContractNLILimitedUseLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 208} |
{'test': 407.51} |
ContractNLINoLicensingLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 162} |
{'test': 419.42} |
ContractNLINoticeOnCompelledDisclosureLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 142} |
{'test': 503.45} |
ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 178} |
{'test': 427.4} |
ContractNLIPermissibleCopyLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 87} |
{'test': 386.84} |
ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 136} |
{'test': 396.4} |
ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 111} |
{'test': 529.09} |
ContractNLIReturnOfConfidentialInformationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 66} |
{'test': 478.29} |
ContractNLISharingWithEmployeesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 170} |
{'test': 548.63} |
ContractNLISharingWithThirdPartiesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 180} |
{'test': 517.29} |
ContractNLISurvivalOfObligationsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 157} |
{'test': 417.64} |
Core17InstructionRetrieval (Orion Weller, 2024) |
['eng'] |
InstructionRetrieval |
s2p |
[News, Written] |
{'eng': 39838} |
{'test': {'num_docs': 19899, 'num_queries': 20, 'average_document_length': 2233.0329664807277, 'average_query_length': 109.75, 'average_instruction_length': 295.55, 'average_changed_instruction_length': 355.2, 'average_relevant_docs_per_query': 32.7, 'average_top_ranked_per_query': 1000.0}} |
CorporateLobbyingLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 490} |
{'test': 6039.85} |
CosQA (Junjie Huang, 2021) |
['eng', 'python'] |
Retrieval |
p2p |
[Programming, Written] |
|
{'test': {'average_document_length': 276.132741215298, 'average_query_length': 36.814, 'num_documents': 20604, 'num_queries': 500, 'average_relevant_docs_per_query': 1.0}} |
CovidRetrieval |
['cmn'] |
Retrieval |
s2p |
|
None |
{'dev': {'average_document_length': 332.4152658473415, 'average_query_length': 25.9304531085353, 'num_documents': 100001, 'num_queries': 949, 'average_relevant_docs_per_query': 1.0105374077976819}} |
CrossLingualSemanticDiscriminationWMT19 |
['deu', 'fra'] |
Retrieval |
s2s |
[News, Written] |
{'test': 2946} |
{'test': {'deu-fra': {'average_document_length': 147.49857433808555, 'average_query_length': 152.95587236931433, 'num_documents': 7365, 'num_queries': 1473, 'average_relevant_docs_per_query': 1.0}, 'fra-deu': {'average_document_length': 154.21968771215208, 'average_query_length': 145.877800407332, 'num_documents': 7365, 'num_queries': 1473, 'average_relevant_docs_per_query': 1.0}}} |
CrossLingualSemanticDiscriminationWMT21 |
['deu', 'fra'] |
Retrieval |
s2s |
[News, Written] |
{'test': 1786} |
{'test': {'deu-fra': {'average_document_length': 177.26270996640537, 'average_query_length': 171.73012318029114, 'num_documents': 4465, 'num_queries': 893, 'average_relevant_docs_per_query': 1.0}, 'fra-deu': {'average_document_length': 174.45061590145576, 'average_query_length': 176.99216125419932, 'num_documents': 4465, 'num_queries': 893, 'average_relevant_docs_per_query': 1.0}}} |
CyrillicTurkicLangClassification (Goldhahn et al., 2012) |
['bak', 'chv', 'kaz', 'kir', 'krc', 'rus', 'sah', 'tat', 'tyv'] |
Classification |
s2s |
[Web, Written] |
{'test': 2048} |
{'test': 92.22} |
CzechProductReviewSentimentClassification |
['ces'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 2048} |
{'test': 153.26} |
CzechSoMeSentimentClassification |
['ces'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 1000} |
{'test': 59.89} |
CzechSubjectivityClassification |
['ces'] |
Classification |
s2s |
[Reviews, Written] |
{'validation': 500, 'test': 2000} |
{'validation': 108.2, 'test': 108.3} |
DBPedia (Hasibi et al., 2017) |
['eng'] |
Retrieval |
s2p |
[Written, Encyclopaedic] |
None |
{'test': {'average_document_length': 1122.7690155333814, 'average_query_length': 48.7264325323475, 'num_documents': 48605, 'num_queries': 541, 'average_relevant_docs_per_query': 1.3752310536044363}} |
DBPedia-PL (Hasibi et al., 2017) |
['pol'] |
Retrieval |
s2p |
[Written, Encyclopaedic] |
None |
{'test': {'average_document_length': 311.7007956561823, 'average_query_length': 35.45, 'num_documents': 4635922, 'num_queries': 400, 'average_relevant_docs_per_query': 38.215}} |
DBpediaClassification (Zhang et al., 2015) |
['eng'] |
Classification |
s2s |
[Encyclopaedic, Written] |
{'test': 70000} |
{'test': 281.4} |
DKHateClassification |
['dan'] |
Classification |
s2s |
[Social, Written] |
{'test': 329} |
{'test': 104.0} |
DalajClassification |
['swe'] |
Classification |
s2s |
[Non-fiction, Written] |
{'test': 444} |
{'test': 243.8} |
DanFeverRetrieval |
['dan'] |
Retrieval |
p2p |
[Encyclopaedic, Non-fiction, Spoken] |
{'train': 8897} |
{'train': {'average_document_length': 312.1117274167987, 'average_query_length': 50.26957476855484, 'num_documents': 2524, 'num_queries': 6373, 'average_relevant_docs_per_query': 0.48721167425074535}} |
DanishPoliticalCommentsClassification (Mads Guldborg Kjeldgaard Kongsbak, 2019) |
['dan'] |
Classification |
s2s |
[Social, Written] |
{'train': 9010} |
{'train': 69.9} |
DefinitionClassificationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 1337} |
{'test': 253.72} |
DiaBlaBitextMining (González et al., 2019) |
['eng', 'fra'] |
BitextMining |
s2s |
[Social, Written] |
{} |
{} |
Diversity1LegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 300} |
{'test': 103.21} |
Diversity2LegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 300} |
{'test': 0} |
Diversity3LegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 300} |
{'test': 135.46} |
Diversity4LegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 300} |
{'test': 144.52} |
Diversity5LegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 300} |
{'test': 174.77} |
Diversity6LegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 300} |
{'test': 301.01} |
DuRetrieval (Yifu Qiu, 2022) |
['cmn'] |
Retrieval |
s2p |
|
None |
{'dev': {'average_document_length': 331.3219967800322, 'average_query_length': 9.289, 'num_documents': 100001, 'num_queries': 2000, 'average_relevant_docs_per_query': 4.9195}} |
DutchBookReviewSentimentClassification (Benjamin et al., 2019) |
['nld'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 2224} |
{'test': 1443.0} |
EcomRetrieval |
['cmn'] |
Retrieval |
s2p |
|
None |
{'dev': {'average_document_length': 32.98041664189015, 'average_query_length': 6.798, 'num_documents': 100902, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}} |
EightTagsClustering.v2 |
['pol'] |
Clustering |
s2s |
[Social, Written] |
{'test': 2048} |
{'test': 78.73} |
EmotionClassification |
['eng'] |
Classification |
s2s |
[Social, Written] |
{'validation': 2000, 'test': 2000} |
{'validation': 95.3, 'test': 95.6} |
EstQA |
['est'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
{'test': 603} |
{'test': {'average_document_length': 785.595041322314, 'average_query_length': 55.32006633499171, 'num_documents': 121, 'num_queries': 603, 'average_relevant_docs_per_query': 1.0}} |
EstonianValenceClassification |
['est'] |
Classification |
s2s |
[News, Written] |
{'train': 3270, 'test': 818} |
{'train': 226.70642201834863, 'test': 231.5085574572127} |
FEVER |
['eng'] |
Retrieval |
s2p |
|
None |
{'train': {'average_document_length': 538.2340070317589, 'average_query_length': 47.56034058828886, 'num_documents': 5416568, 'num_queries': 109810, 'average_relevant_docs_per_query': 1.2757034878426372}, 'dev': {'average_document_length': 538.2340070317589, 'average_query_length': 47.326282628262824, 'num_documents': 5416568, 'num_queries': 6666, 'average_relevant_docs_per_query': 1.211971197119712}, 'test': {'average_document_length': 538.2340070317589, 'average_query_length': 49.60546054605461, 'num_documents': 5416568, 'num_queries': 6666, 'average_relevant_docs_per_query': 1.1906690669066906}} |
FQuADRetrieval |
['fra'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
{'test': 400, 'validation': 100} |
{'test': {'average_document_length': 896.3308550185874, 'average_query_length': 58.52, 'num_documents': 269, 'num_queries': 400, 'average_relevant_docs_per_query': 1.0}, 'validation': {'average_document_length': 895.1340206185567, 'average_query_length': 54.13, 'num_documents': 97, 'num_queries': 100, 'average_relevant_docs_per_query': 1.0}} |
FaithDial (Dziri et al., 2022) |
['eng'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
{'test': 2042} |
{'test': {'average_document_length': 140.61062447018932, 'average_query_length': 4.926542605288932, 'num_documents': 3539, 'num_queries': 2042, 'average_relevant_docs_per_query': 1.0}} |
FalseFriendsGermanEnglish |
['deu'] |
PairClassification |
s2s |
[Written] |
{'test': 1524} |
{'test': 40.3} |
FaroeseSTS |
['fao'] |
STS |
s2s |
[News, Web, Written] |
{'train': 729} |
{'train': 43.6} |
FarsTail (Amirkhani et al., 2023) |
['fas'] |
PairClassification |
s2s |
[Academic, Written] |
{'test': 1029} |
{'test': 125.84} |
FeedbackQARetrieval |
['eng'] |
Retrieval |
s2p |
[Web, Government, Medical, Written] |
{'test': 1992} |
{'test': {'average_document_length': 1174.7986463620982, 'average_query_length': 72.33182730923694, 'num_documents': 2364, 'num_queries': 1992, 'average_relevant_docs_per_query': 1.0}} |
FiQA-PL (Nandan Thakur, 2021) |
['pol'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 795.2371699226205, 'average_query_length': 70.00771604938272, 'num_documents': 57638, 'num_queries': 648, 'average_relevant_docs_per_query': 2.632716049382716}} |
FiQA2018 (Nandan Thakur, 2021) |
['eng'] |
Retrieval |
s2p |
|
None |
{'train': {'average_document_length': 767.2108157812554, 'average_query_length': 61.49763636363636, 'num_documents': 57638, 'num_queries': 5500, 'average_relevant_docs_per_query': 2.5756363636363635}, 'dev': {'average_document_length': 767.2108157812554, 'average_query_length': 62.756, 'num_documents': 57638, 'num_queries': 500, 'average_relevant_docs_per_query': 2.476}, 'test': {'average_document_length': 767.2108157812554, 'average_query_length': 62.7037037037037, 'num_documents': 57638, 'num_queries': 648, 'average_relevant_docs_per_query': 2.632716049382716}} |
FilipinoHateSpeechClassification (Neil Vicente Cabasag et al., 2019) |
['fil'] |
Classification |
s2s |
[Social, Written] |
{'validation': 2048, 'test': 2048} |
{'validation': 88.1, 'test': 87.4} |
FilipinoShopeeReviewsClassification |
['fil'] |
Classification |
s2s |
[Social, Written] |
{'validation': 2250, 'test': 2250} |
{'validation': 143.8, 'test': 145.1} |
FinParaSTS |
['fin'] |
STS |
s2s |
[News, Subtitles, Written] |
{'test': 1000, 'validation': 1000} |
{'test': 59.0, 'validation': 58.8} |
FinToxicityClassification |
['fin'] |
Classification |
s2s |
[News, Written] |
{'train': 2048, 'test': 2048} |
{'train': 432.63, 'test': 401.03} |
FinancialPhrasebankClassification (P. Malo, 2014) |
['eng'] |
Classification |
s2s |
[News, Written] |
{'train': 4840} |
{'train': 121.96} |
FloresBitextMining (Goyal et al., 2022) |
['ace', 'acm', 'acq', 'aeb', 'afr', 'ajp', 'aka', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'ast', 'awa', 'ayr', 'azb', 'azj', 'bak', 'bam', 'ban', 'bel', 'bem', 'ben', 'bho', 'bjn', 'bod', 'bos', 'bug', 'bul', 'cat', 'ceb', 'ces', 'cjk', 'ckb', 'crh', 'cym', 'dan', 'deu', 'dik', 'dyu', 'dzo', 'ell', 'eng', 'epo', 'est', 'eus', 'ewe', 'fao', 'fij', 'fin', 'fon', 'fra', 'fur', 'fuv', 'gaz', 'gla', 'gle', 'glg', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hne', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kac', 'kam', 'kan', 'kas', 'kat', 'kaz', 'kbp', 'kea', 'khk', 'khm', 'kik', 'kin', 'kir', 'kmb', 'kmr', 'knc', 'kon', 'kor', 'lao', 'lij', 'lim', 'lin', 'lit', 'lmo', 'ltg', 'ltz', 'lua', 'lug', 'luo', 'lus', 'lvs', 'mag', 'mai', 'mal', 'mar', 'min', 'mkd', 'mlt', 'mni', 'mos', 'mri', 'mya', 'nld', 'nno', 'nob', 'npi', 'nso', 'nus', 'nya', 'oci', 'ory', 'pag', 'pan', 'pap', 'pbt', 'pes', 'plt', 'pol', 'por', 'prs', 'quy', 'ron', 'run', 'rus', 'sag', 'san', 'sat', 'scn', 'shn', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'sot', 'spa', 'srd', 'srp', 'ssw', 'sun', 'swe', 'swh', 'szl', 'tam', 'taq', 'tat', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tpi', 'tsn', 'tso', 'tuk', 'tum', 'tur', 'twi', 'tzm', 'uig', 'ukr', 'umb', 'urd', 'uzn', 'vec', 'vie', 'war', 'wol', 'xho', 'ydd', 'yor', 'yue', 'zho', 'zsm', 'zul'] |
BitextMining |
s2s |
[Non-fiction, Encyclopaedic, Written] |
{'dev': 997, 'devtest': 1012} |
{} |
FrenchBookReviews |
['fra'] |
Classification |
s2s |
[Reviews, Written] |
{'train': 2048} |
{'train': 311.5} |
FrenkEnClassification (Nikola Ljubešić, 2019) |
['eng'] |
Classification |
s2s |
[Social, Written] |
{'test': 2300} |
{'test': 188.75} |
FrenkHrClassification (Nikola Ljubešić, 2019) |
['hrv'] |
Classification |
s2s |
[Social, Written] |
{'test': 2120} |
{'test': 89.86} |
FrenkSlClassification (Nikola Ljubešić, 2019) |
['slv'] |
Classification |
s2s |
[Social, Written] |
{'test': 2177} |
{'test': 136.61} |
FunctionOfDecisionSectionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 367} |
{'test': 551.07} |
GPUSpeedTask |
['eng'] |
Speed |
s2s |
[Fiction, Written] |
{'test': 1} |
{'test': 3591} |
GeoreviewClassification |
['rus'] |
Classification |
p2p |
[Reviews, Written] |
{'test': 2048} |
{'test': 409.0} |
GeoreviewClusteringP2P |
['rus'] |
Clustering |
p2p |
[Reviews, Written] |
{'test': 2000} |
{'test': 384.5} |
GeorgianFAQRetrieval |
['kat'] |
Retrieval |
s2p |
[Web, Written] |
{'test': 2566} |
{'test': {'average_document_length': 511.24668745128605, 'average_query_length': 61.69551656920078, 'num_documents': 2566, 'num_queries': 2565, 'average_relevant_docs_per_query': 1.0003898635477584}} |
GerDaLIR |
['deu'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 15483.237726805888, 'average_query_length': 1027.3495690356156, 'num_documents': 131445, 'num_queries': 12298, 'average_relevant_docs_per_query': 1.1704342169458448}} |
GerDaLIRSmall |
['deu'] |
Retrieval |
p2p |
[Legal, Written] |
None |
{'test': {'average_document_length': 19706.823653325308, 'average_query_length': 1031.0680889324833, 'num_documents': 9969, 'num_queries': 12234, 'average_relevant_docs_per_query': 1.1705084191597188}} |
GermanDPR (Timo Möller, 2021) |
['deu'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 1288.3410987482614, 'average_query_length': 64.38439024390244, 'num_documents': 2876, 'num_queries': 1025, 'average_relevant_docs_per_query': 1.0}} |
GermanGovServiceRetrieval |
['deu'] |
Retrieval |
s2p |
[Government, Written] |
{'test': 357} |
{'test': {'average_document_length': 1246.4571428571428, 'average_query_length': 68.17977528089888, 'num_documents': 105, 'num_queries': 356, 'average_relevant_docs_per_query': 1.0}} |
GermanPoliticiansTwitterSentimentClassification |
['deu'] |
Classification |
s2s |
[Social, Government, Written] |
{'test': 357} |
{'test': 302.48} |
GermanQuAD-Retrieval (Timo Möller, 2021) |
['deu'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 1941.090717299578, 'average_query_length': 56.74773139745916, 'num_documents': 474, 'num_queries': 2204, 'average_relevant_docs_per_query': 1.0}} |
GermanSTSBenchmark (Philip May, 2021) |
['deu'] |
STS |
s2s |
|
None |
None |
GreekCivicsQA |
['ell'] |
Retrieval |
s2p |
[Academic, Written] |
{'default': 407} |
{'default': {'average_document_length': 1074.894348894349, 'average_query_length': 77.06142506142506, 'num_documents': 407, 'num_queries': 407, 'average_relevant_docs_per_query': 1.0}} |
GreekLegalCodeClassification |
['ell'] |
Classification |
s2s |
[Legal, Written] |
{'validation': 2048, 'test': 2048} |
{'validation': 4046.8, 'test': 4200.8} |
GujaratiNewsClassification |
['guj'] |
Classification |
s2s |
[News, Written] |
{'train': 5269, 'test': 1318} |
{'train': 61.95, 'test': 61.91} |
HALClusteringS2S.v2 (Mathieu Ciancone, 2024) |
['fra'] |
Clustering |
s2s |
[Academic, Written] |
{'test': 2048} |
{'test': 86.6} |
HagridRetrieval (Ehsan Kamalloo, 2023) |
['eng'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
{'train': 1922} |
{'dev': {'average_document_length': 228.36693548387098, 'average_query_length': 40.064516129032256, 'num_documents': 496, 'num_queries': 496, 'average_relevant_docs_per_query': 1.0}} |
HateSpeechPortugueseClassification |
['por'] |
Classification |
s2s |
[Social, Written] |
{'train': 2048} |
{'train': 101.02} |
HeadlineClassification |
['rus'] |
Classification |
s2s |
[News, Written] |
{'test': 2048} |
{'test': 61.6} |
HebrewSentimentAnalysis |
['heb'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 2048} |
{'test': 113.57} |
HellaSwag (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
{'test': 10042} |
{'test': {'average_document_length': 137.36519014671472, 'average_query_length': 224.53654650468033, 'num_documents': 199162, 'num_queries': 10042, 'average_relevant_docs_per_query': 1.0}} |
HinDialectClassification (Bafna et al., 2022) |
['anp', 'awa', 'ben', 'bgc', 'bhb', 'bhd', 'bho', 'bjj', 'bns', 'bra', 'gbm', 'guj', 'hne', 'kfg', 'kfy', 'mag', 'mar', 'mup', 'noe', 'pan', 'raj'] |
Classification |
s2s |
[Social, Spoken, Written] |
{'test': 1152} |
{'test': 583.82} |
HindiDiscourseClassification |
['hin'] |
Classification |
s2s |
[Fiction, Social, Written] |
{'train': 2048} |
{'train': 79.23828125} |
HotelReviewSentimentClassification (Elnagar et al., 2018) |
['ara'] |
Classification |
s2s |
[Reviews, Written] |
{'train': 2048} |
{'train': 137.2} |
HotpotQA |
['eng'] |
Retrieval |
s2p |
[Web, Written] |
None |
{'train': {'average_document_length': 287.9079517072212, 'average_query_length': 105.54965882352941, 'num_documents': 5233329, 'num_queries': 85000, 'average_relevant_docs_per_query': 2.0}, 'dev': {'average_document_length': 287.9079517072212, 'average_query_length': 105.35634294106848, 'num_documents': 5233329, 'num_queries': 5447, 'average_relevant_docs_per_query': 2.0}, 'test': {'average_document_length': 287.9079517072212, 'average_query_length': 92.17096556380824, 'num_documents': 5233329, 'num_queries': 7405, 'average_relevant_docs_per_query': 2.0}} |
HotpotQA-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
[Web, Written] |
None |
{'test': {'average_document_length': 292.26835882093405, 'average_query_length': 94.64064821066847, 'num_documents': 5233329, 'num_queries': 7405, 'average_relevant_docs_per_query': 2.0}} |
HunSum2AbstractiveRetrieval (Botond Barta, 2024) |
['hun'] |
Retrieval |
s2p |
[News, Written] |
{'test': 1998} |
{'test': {'average_document_length': 2511.0315315315315, 'average_query_length': 201.2112112112112, 'num_documents': 1998, 'num_queries': 1998, 'average_relevant_docs_per_query': 1.0}} |
IFlyTek |
['cmn'] |
Classification |
s2s |
|
None |
None |
IN22ConvBitextMining (Jay Gala, 2023) |
['asm', 'ben', 'brx', 'doi', 'eng', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] |
BitextMining |
s2s |
[Social, Spoken, Fiction, Spoken] |
|
|
IN22GenBitextMining (Jay Gala, 2023) |
['asm', 'ben', 'brx', 'doi', 'eng', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] |
BitextMining |
s2s |
[Web, Legal, Government, News, Religious, Non-fiction, Written] |
{'test': 1024} |
{'test': 156.7} |
IWSLT2017BitextMining |
['ara', 'cmn', 'deu', 'eng', 'fra', 'ita', 'jpn', 'kor', 'nld', 'ron'] |
BitextMining |
s2s |
[Non-fiction, Fiction, Written] |
{'validation': 21928} |
{'validation': 95.4} |
ImdbClassification |
['eng'] |
Classification |
p2p |
[Reviews, Written] |
{'test': 25000} |
{'test': 1293.8} |
InappropriatenessClassification |
['rus'] |
Classification |
s2s |
[Web, Social, Written] |
{'test': 2048} |
{'test': 97.7} |
IndicCrosslingualSTS (Ramesh et al., 2022) |
['asm', 'ben', 'eng', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel', 'urd'] |
STS |
s2s |
[News, Non-fiction, Web, Spoken, Government, Written, Spoken] |
{'test': 10020} |
{'test': 76.22} |
IndicGenBenchFloresBitextMining (Harman Singh, 2024) |
['asm', 'awa', 'ben', 'bgc', 'bho', 'bod', 'boy', 'eng', 'gbm', 'gom', 'guj', 'hin', 'hne', 'kan', 'mai', 'mal', 'mar', 'mni', 'mup', 'mwr', 'nep', 'ory', 'pan', 'pus', 'raj', 'san', 'sat', 'tam', 'tel', 'urd'] |
BitextMining |
s2s |
[Web, News, Written] |
{'validation': 997, 'test': 1012} |
{'validation': 126.25, 'test': 130.84} |
IndicLangClassification |
['asm', 'ben', 'brx', 'doi', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] |
Classification |
s2s |
[Web, Non-fiction, Written] |
{'test': 30418} |
{'test': 106.5} |
IndicNLPNewsClassification (Anoop Kunchukuttan, 2020) |
['guj', 'kan', 'mal', 'mar', 'ori', 'pan', 'tam', 'tel'] |
Classification |
s2s |
[News, Written] |
{'test': 2048} |
{'test': 1169.053974484789} |
IndicQARetrieval (Sumanth Doddapaneni, 2022) |
['asm', 'ben', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel'] |
Retrieval |
s2p |
[Web, Written] |
{'test': 18586} |
{'test': {'as': {'average_document_length': 1401.28, 'average_query_length': 56.60504201680672, 'num_documents': 250, 'num_queries': 1785, 'average_relevant_docs_per_query': 1.0016806722689076}, 'bn': {'average_document_length': 2196.012, 'average_query_length': 57.069239500567534, 'num_documents': 250, 'num_queries': 1762, 'average_relevant_docs_per_query': 1.0005675368898979}, 'gu': {'average_document_length': 960.4959677419355, 'average_query_length': 60.3712158808933, 'num_documents': 248, 'num_queries': 2015, 'average_relevant_docs_per_query': 1.0009925558312656}, 'hi': {'average_document_length': 2550.770114942529, 'average_query_length': 52.84909326424871, 'num_documents': 261, 'num_queries': 1544, 'average_relevant_docs_per_query': 1.0019430051813472}, 'kn': {'average_document_length': 882.7354085603113, 'average_query_length': 50.58734344100198, 'num_documents': 257, 'num_queries': 1517, 'average_relevant_docs_per_query': 1.0}, 'ml': {'average_document_length': 2522.6437246963565, 'average_query_length': 75.93635790800252, 'num_documents': 247, 'num_queries': 1587, 'average_relevant_docs_per_query': 1.0}, 'mr': {'average_document_length': 1711.74, 'average_query_length': 58.785, 'num_documents': 250, 'num_queries': 1600, 'average_relevant_docs_per_query': 1.0}, 'or': {'average_document_length': 801.9206349206349, 'average_query_length': 55.072792362768496, 'num_documents': 252, 'num_queries': 1676, 'average_relevant_docs_per_query': 1.0011933174224343}, 'pa': {'average_document_length': 1423.5062240663901, 'average_query_length': 58.394925178919976, 'num_documents': 241, 'num_queries': 1537, 'average_relevant_docs_per_query': 1.0013012361743656}, 'ta': {'average_document_length': 2288.2608695652175, 'average_query_length': 54.06211869107044, 'num_documents': 253, 'num_queries': 1803, 'average_relevant_docs_per_query': 1.0005546311702718}, 'te': {'average_document_length': 2936.176, 'average_query_length': 67.00634371395617, 'num_documents': 250, 'num_queries': 1734, 'average_relevant_docs_per_query': 1.0}}} |
IndicReviewsClusteringP2P (Sumanth Doddapaneni, 2022) |
['asm', 'ben', 'brx', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel', 'urd'] |
Clustering |
p2p |
[Reviews, Written] |
{'test': 1000} |
{'test': 137.6} |
IndicSentimentClassification (Sumanth Doddapaneni, 2022) |
['asm', 'ben', 'brx', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel', 'urd'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 1000} |
{'test': 137.6} |
IndonesianIdClickbaitClassification |
['ind'] |
Classification |
s2s |
[News, Written] |
{'train': 2048} |
{'train': 64.28} |
IndonesianMongabayConservationClassification |
['ind'] |
Classification |
s2s |
[Web, Written] |
{'validation': 984, 'test': 970} |
{'validation': 1675.8, 'test': 1675.5} |
InsurancePolicyInterpretationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 133} |
{'test': 521.88} |
InternationalCitizenshipQuestionsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 2048} |
{'test': 206.18} |
IsiZuluNewsClassification (Madodonga et al., 2023) |
['zul'] |
Classification |
s2s |
[News, Written] |
{'train': 752} |
{'train': 43.1} |
ItaCaseholdClassification (Licari et al., 2023) |
['ita'] |
Classification |
s2s |
[Legal, Government, Written] |
{'test': 221} |
{'test': 4207.9} |
Itacola |
['ita'] |
Classification |
s2s |
[Non-fiction, Spoken, Written] |
{'train': 7801, 'test': 975} |
{'train': 35.95, 'test': 36.67} |
JCrewBlockerLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 54} |
{'test': 1092.22} |
JDReview (Xiao et al., 2023) |
['cmn'] |
Classification |
s2s |
|
None |
None |
JSICK (Yanaka et al., 2022) |
['jpn'] |
STS |
s2s |
[Web, Written] |
{'test': 1986} |
{'test': 21.47} |
JSTS |
['jpn'] |
STS |
s2s |
[Web, Written] |
{'valudtion': 1457} |
{'valudtion': 46.34} |
JaGovFaqsRetrieval |
['jpn'] |
Retrieval |
s2s |
[Web, Written] |
{'test': 2048} |
{'test': {'average_document_length': 210.02601561814512, 'average_query_length': 59.48193359375, 'num_documents': 22794, 'num_queries': 2048, 'average_relevant_docs_per_query': 1.0}} |
JaQuADRetrieval (ByungHoon So, 2022) |
['jpn'] |
Retrieval |
p2p |
[Encyclopaedic, Non-fiction, Written] |
{'validation': 2048} |
{'validation': {'average_document_length': 155.80922362309224, 'average_query_length': 30.826171875, 'num_documents': 3014, 'num_queries': 2048, 'average_relevant_docs_per_query': 2.0}} |
JavaneseIMDBClassification (Wongso et al., 2021) |
['jav'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 25000} |
{'test': 481.83} |
KLUE-NLI (Sungjoon Park, 2021) |
['kor'] |
PairClassification |
s2s |
[News, Encyclopaedic, Written] |
{'validation': 2000} |
{'validation': 35.01} |
KLUE-STS (Sungjoon Park, 2021) |
['kor'] |
STS |
s2s |
[Reviews, News, Spoken, Written, Spoken] |
{'validation': 519} |
{'validation': 33.178227360308284} |
KLUE-TC (Sungjoon Park, 2021) |
['kor'] |
Classification |
s2s |
[News, Written] |
{'validation': 2048} |
{'validation': 27.079609091907326} |
KannadaNewsClassification (Anoop Kunchukuttan, 2020) |
['kan'] |
Classification |
s2s |
[News, Written] |
{'train': 6460} |
{'train': 65.88} |
KinopoiskClassification (Blinov et al., 2013) |
['rus'] |
Classification |
p2p |
[Reviews, Written] |
{'test': 1500} |
{'test': 1897.3} |
Ko-StrategyQA (Geva et al., 2021) |
['kor'] |
Retrieval |
s2p |
|
None |
{'dev': {'average_document_length': 319.25953950924225, 'average_query_length': 22.75337837837838, 'num_documents': 9251, 'num_queries': 592, 'average_relevant_docs_per_query': 1.9341216216216217}} |
KorFin (Son et al., 2023) |
['kor'] |
Classification |
s2s |
[News, Written] |
{'test': 2048} |
{'test': 75.28} |
KorHateClassification (Jihyung Moon, 2020) |
['kor'] |
Classification |
s2s |
[Social, Written] |
{'train': 2048, 'test': 471} |
{'train': 38.57, 'test': 38.86} |
KorHateSpeechMLClassification |
['kor'] |
MultilabelClassification |
s2s |
[Social, Written] |
{'train': 8192, 'test': 2048} |
{'train': 33.67, 'test': 34.67} |
KorSTS (Ham et al., 2020) |
['kor'] |
STS |
s2s |
[News, Web] |
{'test': 1379} |
{'test': 29.279433139534884} |
KorSarcasmClassification (Kim et al., 2019) |
['kor'] |
Classification |
s2s |
[Social, Written] |
{'train': 2048, 'test': 301} |
{'train': 48.45, 'test': 46.77} |
KurdishSentimentClassification (Badawi et al., 2024) |
['kur'] |
Classification |
s2s |
[Web, Written] |
{'train': 6000, 'test': 1987} |
{'train': 59.38, 'test': 56.11} |
LCQMC (Shitao Xiao, 2024) |
['cmn'] |
STS |
s2s |
|
None |
None |
LEMBNarrativeQARetrieval |
['eng'] |
Retrieval |
s2p |
[Fiction, Non-fiction, Written] |
{'test': 10804} |
{'test': {'average_document_length': 326753.5323943662, 'average_query_length': 47.89453536223562, 'num_documents': 355, 'num_queries': 10449, 'average_relevant_docs_per_query': 1.0}} |
LEMBNeedleRetrieval (Zhu et al., 2024) |
['eng'] |
Retrieval |
s2p |
[Academic, Blog, Written] |
{'test_256': 150, 'test_512': 150, 'test_1024': 150, 'test_2048': 150, 'test_4096': 150, 'test_8192': 150, 'test_16384': 150, 'test_32768': 150} |
{'test_256': {'average_document_length': 1013.22, 'average_query_length': 60.48, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}, 'test_512': {'average_document_length': 2009.96, 'average_query_length': 57.3, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}, 'test_1024': {'average_document_length': 4069.9, 'average_query_length': 58.28, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}, 'test_2048': {'average_document_length': 8453.82, 'average_query_length': 59.92, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}, 'test_4096': {'average_document_length': 17395.8, 'average_query_length': 55.86, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}, 'test_8192': {'average_document_length': 35203.82, 'average_query_length': 59.6, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}, 'test_16384': {'average_document_length': 72054.8, 'average_query_length': 59.12, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}, 'test_32768': {'average_document_length': 141769.8, 'average_query_length': 58.34, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}} |
LEMBPasskeyRetrieval (Zhu et al., 2024) |
['eng'] |
Retrieval |
s2p |
[Fiction, Written] |
{'test_256': 150, 'test_512': 150, 'test_1024': 150, 'test_2048': 150, 'test_4096': 150, 'test_8192': 150, 'test_16384': 150, 'test_32768': 150} |
{'test_256': {'average_document_length': 876.24, 'average_query_length': 38.1, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}, 'test_512': {'average_document_length': 1785.2, 'average_query_length': 37.76, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}, 'test_1024': {'average_document_length': 3607.18, 'average_query_length': 37.68, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}, 'test_2048': {'average_document_length': 7242.2, 'average_query_length': 37.8, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}, 'test_4096': {'average_document_length': 14518.16, 'average_query_length': 37.64, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}, 'test_8192': {'average_document_length': 29071.16, 'average_query_length': 37.54, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}, 'test_16384': {'average_document_length': 58175.16, 'average_query_length': 38.12, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}, 'test_32768': {'average_document_length': 116380.16, 'average_query_length': 37.74, 'num_documents': 100, 'num_queries': 50, 'average_relevant_docs_per_query': 1.0}} |
LEMBQMSumRetrieval |
['eng'] |
Retrieval |
s2p |
[Spoken, Written] |
{'test': 1724} |
{'test': {'average_document_length': 53335.817258883246, 'average_query_length': 433.50294695481335, 'num_documents': 197, 'num_queries': 1527, 'average_relevant_docs_per_query': 1.0}} |
LEMBSummScreenFDRetrieval |
['eng'] |
Retrieval |
s2p |
[Spoken, Written] |
{'validation': 672} |
{'validation': {'average_document_length': 30854.32738095238, 'average_query_length': 591.4910714285714, 'num_documents': 336, 'num_queries': 336, 'average_relevant_docs_per_query': 1.0}} |
LEMBWikimQARetrieval (Ho et al., 2020) |
['eng'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
{'test': 500} |
{'test': {'average_document_length': 37445.60333333333, 'average_query_length': 67.57, 'num_documents': 300, 'num_queries': 300, 'average_relevant_docs_per_query': 1.0}} |
LanguageClassification (Conneau et al., 2018) |
['ara', 'bul', 'cmn', 'deu', 'ell', 'eng', 'fra', 'hin', 'ita', 'jpn', 'nld', 'pol', 'por', 'rus', 'spa', 'swa', 'tha', 'tur', 'urd', 'vie'] |
Classification |
s2s |
[Reviews, Web, Non-fiction, Fiction, Government, Written] |
{'test': 2048} |
{'test': {'num_samples': 2048, 'average_text_length': 109.546875, 'unique_labels': 20, 'labels': {'17': {'count': 102}, '0': {'count': 102}, '11': {'count': 102}, '4': {'count': 103}, '3': {'count': 102}, '1': {'count': 102}, '10': {'count': 102}, '2': {'count': 103}, '16': {'count': 103}, '9': {'count': 103}, '5': {'count': 102}, '7': {'count': 102}, '13': {'count': 102}, '14': {'count': 103}, '12': {'count': 102}, '15': {'count': 103}, '19': {'count': 102}, '18': {'count': 102}, '6': {'count': 103}, '8': {'count': 103}}}, 'train': {'num_samples': 70000, 'average_text_length': 110.86141428571429, 'unique_labels': 20, 'labels': {'12': {'count': 3500}, '1': {'count': 3500}, '19': {'count': 3500}, '15': {'count': 3500}, '13': {'count': 3500}, '11': {'count': 3500}, '17': {'count': 3500}, '14': {'count': 3500}, '16': {'count': 3500}, '5': {'count': 3500}, '0': {'count': 3500}, '8': {'count': 3500}, '7': {'count': 3500}, '2': {'count': 3500}, '3': {'count': 3500}, '10': {'count': 3500}, '6': {'count': 3500}, '18': {'count': 3500}, '4': {'count': 3500}, '9': {'count': 3500}}}} |
LccSentimentClassification |
['dan'] |
Classification |
s2s |
[News, Web, Written] |
{'test': 150} |
{'test': 118.7} |
LeCaRDv2 (Haitao Li, 2023) |
['zho'] |
Retrieval |
p2p |
[Legal, Written] |
None |
{'test': {'average_document_length': 7232.823978919631, 'average_query_length': 4259.440251572327, 'num_documents': 3795, 'num_queries': 159, 'average_relevant_docs_per_query': 24.50314465408805}} |
LearnedHandsBenefitsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 66} |
{'test': 1308.44} |
LearnedHandsBusinessLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 174} |
{'test': 1144.51} |
LearnedHandsConsumerLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 614} |
{'test': 1277.45} |
LearnedHandsCourtsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 192} |
{'test': 1171.02} |
LearnedHandsCrimeLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 688} |
{'test': 1212.9} |
LearnedHandsDivorceLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 150} |
{'test': 1242.43} |
LearnedHandsDomesticViolenceLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 174} |
{'test': 1360.83} |
LearnedHandsEducationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 56} |
{'test': 1397.44} |
LearnedHandsEmploymentLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 710} |
{'test': 1262.74} |
LearnedHandsEstatesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 178} |
{'test': 1200.7} |
LearnedHandsFamilyLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 2048} |
{'test': 1338.27} |
LearnedHandsHealthLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 226} |
{'test': 1472.59} |
LearnedHandsHousingLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 2048} |
{'test': 1322.54} |
LearnedHandsImmigrationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 134} |
{'test': 1216.31} |
LearnedHandsTortsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 432} |
{'test': 1406.97} |
LearnedHandsTrafficLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 556} |
{'test': 1182.91} |
LegalBenchConsumerContractsQA (Koreeda et al., 2021) |
['eng'] |
Retrieval |
s2p |
[Legal, Written] |
None |
{'test': {'average_document_length': 2745.8246753246754, 'average_query_length': 92.4090909090909, 'num_documents': 154, 'num_queries': 396, 'average_relevant_docs_per_query': 1.0}} |
LegalBenchCorporateLobbying (Neel Guha, 2023) |
['eng'] |
Retrieval |
s2p |
[Legal, Written] |
None |
{'test': {'average_document_length': 1157.2225705329154, 'average_query_length': 177.87941176470588, 'num_documents': 319, 'num_queries': 340, 'average_relevant_docs_per_query': 1.0}} |
LegalBenchPC (Neel Guha, 2023) |
['eng'] |
PairClassification |
s2s |
[Legal, Written] |
{'test': 2048} |
{'test': 287.18} |
LegalQuAD (Hoppe et al., 2021) |
['deu'] |
Retrieval |
s2p |
[Legal, Written] |
None |
{'test': {'average_document_length': 19481.955, 'average_query_length': 71.965, 'num_documents': 200, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}} |
LegalReasoningCausalityLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 55} |
{'test': 1563.76} |
LegalSummarization |
['eng'] |
Retrieval |
s2p |
[Legal, Written] |
None |
{'test': {'average_document_length': 606.1643835616438, 'average_query_length': 103.19014084507042, 'num_documents': 438, 'num_queries': 284, 'average_relevant_docs_per_query': 1.545774647887324}} |
LinceMTBitextMining (Aguilar et al., 2020) |
['eng', 'hin'] |
BitextMining |
s2s |
[Social, Written] |
{'train': 8060} |
{'train': 58.67} |
LitSearchRetrieval (Ajith et al., 2024) |
['eng'] |
Retrieval |
s2p |
[Academic, Non-fiction, Written] |
{'test': 597} |
{'test': {'average_document_length': 841.2769, 'average_query_length': 141.2, 'num_documents': 64183, 'num_queries': 597, 'average_relevant_docs_per_query': 1.070351}} |
LivedoorNewsClustering.v2 |
['jpn'] |
Clustering |
s2s |
[News, Written] |
{'test': 1106} |
{'test': 1082.61} |
MAUDLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 2048} |
{'test': 1802.93} |
MIRACLReranking (Zhang et al., 2023) |
['ara', 'ben', 'deu', 'eng', 'fas', 'fin', 'fra', 'hin', 'ind', 'jpn', 'kor', 'rus', 'spa', 'swa', 'tel', 'tha', 'yor', 'zho'] |
Reranking |
s2s |
[Encyclopaedic, Written] |
{'dev': 44608} |
{'dev': 506.3} |
MIRACLRetrieval (Zhang et al., 2023) |
['ara', 'ben', 'deu', 'eng', 'fas', 'fin', 'fra', 'hin', 'ind', 'jpn', 'kor', 'rus', 'spa', 'swa', 'tel', 'tha', 'yor', 'zho'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
{'dev': {'ar': {'average_document_length': 318.6539598547405, 'average_query_length': 29.480662983425415, 'num_documents': 2061414, 'num_queries': 2896, 'average_relevant_docs_per_query': 1.953729281767956}, 'bn': {'average_document_length': 383.2428136511194, 'average_query_length': 46.98053527980535, 'num_documents': 297265, 'num_queries': 411, 'average_relevant_docs_per_query': 2.099756690997567}, 'de': {'average_document_length': 414.28004442393404, 'average_query_length': 46.0, 'num_documents': 15866222, 'num_queries': 305, 'average_relevant_docs_per_query': 2.6590163934426227}, 'en': {'average_document_length': 401.0042914921588, 'average_query_length': 40.247809762202756, 'num_documents': 32893221, 'num_queries': 799, 'average_relevant_docs_per_query': 2.911138923654568}, 'es': {'average_document_length': 403.71153493754986, 'average_query_length': 47.373456790123456, 'num_documents': 10373953, 'num_queries': 648, 'average_relevant_docs_per_query': 4.609567901234568}, 'fa': {'average_document_length': 262.6478385010321, 'average_query_length': 41.1503164556962, 'num_documents': 2207172, 'num_queries': 632, 'average_relevant_docs_per_query': 2.079113924050633}, 'fi': {'average_document_length': 359.87767671935734, 'average_query_length': 38.63493312352478, 'num_documents': 1883509, 'num_queries': 1271, 'average_relevant_docs_per_query': 1.925255704169945}, 'fr': {'average_document_length': 343.6283550271699, 'average_query_length': 43.883381924198254, 'num_documents': 14636953, 'num_queries': 343, 'average_relevant_docs_per_query': 2.131195335276968}, 'hi': {'average_document_length': 370.96196845914386, 'average_query_length': 53.34, 'num_documents': 506264, 'num_queries': 350, 'average_relevant_docs_per_query': 2.1485714285714286}, 'id': {'average_document_length': 350.2785651811673, 'average_query_length': 37.958333333333336, 'num_documents': 1446315, 'num_queries': 960, 'average_relevant_docs_per_query': 3.216666666666667}, 'ja': {'average_document_length': 145.8538220556965, 'average_query_length': 17.71395348837209, 'num_documents': 6953614, 'num_queries': 860, 'average_relevant_docs_per_query': 2.0813953488372094}, 'ko': {'average_document_length': 173.97649170809927, 'average_query_length': 21.624413145539908, 'num_documents': 1486752, 'num_queries': 213, 'average_relevant_docs_per_query': 2.568075117370892}, 'ru': {'average_document_length': 332.2475377512674, 'average_query_length': 44.13258785942492, 'num_documents': 9543918, 'num_queries': 1252, 'average_relevant_docs_per_query': 2.8434504792332267}, 'sw': {'average_document_length': 228.71348655286377, 'average_query_length': 38.97095435684647, 'num_documents': 131924, 'num_queries': 482, 'average_relevant_docs_per_query': 1.887966804979253}, 'te': {'average_document_length': 396.2108674545774, 'average_query_length': 38.11231884057971, 'num_documents': 518079, 'num_queries': 828, 'average_relevant_docs_per_query': 1.0314009661835748}, 'th': {'average_document_length': 356.8283496198581, 'average_query_length': 42.87585266030014, 'num_documents': 542166, 'num_queries': 733, 'average_relevant_docs_per_query': 1.8321964529331514}, 'yo': {'average_document_length': 159.35250698366738, 'average_query_length': 37.6890756302521, 'num_documents': 49043, 'num_queries': 119, 'average_relevant_docs_per_query': 1.2100840336134453}, 'zh': {'average_document_length': 119.9458931721347, 'average_query_length': 10.867684478371501, 'num_documents': 4934368, 'num_queries': 393, 'average_relevant_docs_per_query': 2.5292620865139948}}} |
MLQARetrieval |
['ara', 'deu', 'eng', 'hin', 'spa', 'vie', 'zho'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
{'test': 158083, 'validation': 15747} |
{'validation': {'ara-ara': {'average_document_length': 693.8883826879271, 'average_query_length': 42.321083172147, 'num_documents': 439, 'num_queries': 517, 'average_relevant_docs_per_query': 1.0}, 'ara-deu': {'average_document_length': 759.3882352941176, 'average_query_length': 55.14492753623188, 'num_documents': 170, 'num_queries': 207, 'average_relevant_docs_per_query': 1.0}, 'ara-eng': {'average_document_length': 693.8883826879271, 'average_query_length': 50.029013539651835, 'num_documents': 439, 'num_queries': 517, 'average_relevant_docs_per_query': 1.0}, 'ara-spa': {'average_document_length': 654.3071428571428, 'average_query_length': 53.68944099378882, 'num_documents': 140, 'num_queries': 161, 'average_relevant_docs_per_query': 1.0}, 'ara-hin': {'average_document_length': 626.5935483870968, 'average_query_length': 51.956989247311824, 'num_documents': 155, 'num_queries': 186, 'average_relevant_docs_per_query': 1.0}, 'ara-vie': {'average_document_length': 804.6216216216217, 'average_query_length': 49.57055214723926, 'num_documents': 148, 'num_queries': 163, 'average_relevant_docs_per_query': 1.0}, 'ara-zho': {'average_document_length': 787.3161290322581, 'average_query_length': 15.617021276595745, 'num_documents': 155, 'num_queries': 188, 'average_relevant_docs_per_query': 1.0}, 'deu-ara': {'average_document_length': 702.1675977653631, 'average_query_length': 43.06280193236715, 'num_documents': 179, 'num_queries': 207, 'average_relevant_docs_per_query': 1.0}, 'deu-deu': {'average_document_length': 721.405701754386, 'average_query_length': 52.572265625, 'num_documents': 456, 'num_queries': 512, 'average_relevant_docs_per_query': 1.0}, 'deu-eng': {'average_document_length': 721.405701754386, 'average_query_length': 48.33984375, 'num_documents': 456, 'num_queries': 512, 'average_relevant_docs_per_query': 1.0}, 'deu-spa': {'average_document_length': 677.2762430939226, 'average_query_length': 50.60204081632653, 'num_documents': 181, 'num_queries': 196, 'average_relevant_docs_per_query': 1.0}, 'deu-hin': {'average_document_length': 685.917808219178, 'average_query_length': 47.01840490797546, 'num_documents': 146, 'num_queries': 163, 'average_relevant_docs_per_query': 1.0}, 'deu-vie': {'average_document_length': 921.6196319018405, 'average_query_length': 46.81868131868132, 'num_documents': 163, 'num_queries': 182, 'average_relevant_docs_per_query': 1.0}, 'deu-zho': {'average_document_length': 736.6347305389221, 'average_query_length': 14.936842105263159, 'num_documents': 167, 'num_queries': 190, 'average_relevant_docs_per_query': 1.0}, 'eng-ara': {'average_document_length': 979.3447488584475, 'average_query_length': 42.321083172147, 'num_documents': 438, 'num_queries': 517, 'average_relevant_docs_per_query': 1.0}, 'eng-deu': {'average_document_length': 947.3109619686801, 'average_query_length': 52.572265625, 'num_documents': 447, 'num_queries': 512, 'average_relevant_docs_per_query': 1.0}, 'eng-eng': {'average_document_length': 940.2842535787321, 'average_query_length': 49.01480836236934, 'num_documents': 978, 'num_queries': 1148, 'average_relevant_docs_per_query': 1.0}, 'eng-spa': {'average_document_length': 904.3166287015945, 'average_query_length': 52.146, 'num_documents': 439, 'num_queries': 500, 'average_relevant_docs_per_query': 1.0}, 'eng-hin': {'average_document_length': 926.9621749408983, 'average_query_length': 49.3905325443787, 'num_documents': 423, 'num_queries': 507, 'average_relevant_docs_per_query': 1.0}, 'eng-vie': {'average_document_length': 1011.8296460176991, 'average_query_length': 48.082191780821915, 'num_documents': 452, 'num_queries': 511, 'average_relevant_docs_per_query': 1.0}, 'eng-zho': {'average_document_length': 1001.5046511627907, 'average_query_length': 15.39484126984127, 'num_documents': 430, 'num_queries': 504, 'average_relevant_docs_per_query': 1.0}, 'spa-ara': {'average_document_length': 674.3586206896551, 'average_query_length': 41.36024844720497, 'num_documents': 145, 'num_queries': 161, 'average_relevant_docs_per_query': 1.0}, 'spa-deu': {'average_document_length': 544.0489130434783, 'average_query_length': 51.86734693877551, 'num_documents': 184, 'num_queries': 196, 'average_relevant_docs_per_query': 1.0}, 'spa-eng': {'average_document_length': 641.8215859030837, 'average_query_length': 49.156, 'num_documents': 454, 'num_queries': 500, 'average_relevant_docs_per_query': 1.0}, 'spa-spa': {'average_document_length': 641.8215859030837, 'average_query_length': 52.146, 'num_documents': 454, 'num_queries': 500, 'average_relevant_docs_per_query': 1.0}, 'spa-hin': {'average_document_length': 703.3212121212122, 'average_query_length': 48.080213903743314, 'num_documents': 165, 'num_queries': 187, 'average_relevant_docs_per_query': 1.0}, 'spa-vie': {'average_document_length': 737.8579545454545, 'average_query_length': 48.82539682539682, 'num_documents': 176, 'num_queries': 189, 'average_relevant_docs_per_query': 1.0}, 'spa-zho': {'average_document_length': 605.52, 'average_query_length': 15.590062111801242, 'num_documents': 150, 'num_queries': 161, 'average_relevant_docs_per_query': 1.0}, 'hin-ara': {'average_document_length': 670.0394736842105, 'average_query_length': 43.623655913978496, 'num_documents': 152, 'num_queries': 186, 'average_relevant_docs_per_query': 1.0}, 'hin-deu': {'average_document_length': 596.9718309859155, 'average_query_length': 51.41717791411043, 'num_documents': 142, 'num_queries': 163, 'average_relevant_docs_per_query': 1.0}, 'hin-eng': {'average_document_length': 691.5482352941176, 'average_query_length': 49.75936883629191, 'num_documents': 425, 'num_queries': 507, 'average_relevant_docs_per_query': 1.0}, 'hin-spa': {'average_document_length': 718.4904458598726, 'average_query_length': 52.75935828877005, 'num_documents': 157, 'num_queries': 187, 'average_relevant_docs_per_query': 1.0}, 'hin-hin': {'average_document_length': 691.5482352941176, 'average_query_length': 49.3905325443787, 'num_documents': 425, 'num_queries': 507, 'average_relevant_docs_per_query': 1.0}, 'hin-vie': {'average_document_length': 778.484076433121, 'average_query_length': 48.35028248587571, 'num_documents': 157, 'num_queries': 177, 'average_relevant_docs_per_query': 1.0}, 'hin-zho': {'average_document_length': 685.0679012345679, 'average_query_length': 15.97883597883598, 'num_documents': 162, 'num_queries': 189, 'average_relevant_docs_per_query': 1.0}, 'vie-ara': {'average_document_length': 886.6052631578947, 'average_query_length': 41.214723926380366, 'num_documents': 152, 'num_queries': 163, 'average_relevant_docs_per_query': 1.0}, 'vie-deu': {'average_document_length': 981.4534161490683, 'average_query_length': 51.27472527472528, 'num_documents': 161, 'num_queries': 182, 'average_relevant_docs_per_query': 1.0}, 'vie-eng': {'average_document_length': 892.7250554323725, 'average_query_length': 48.09001956947162, 'num_documents': 451, 'num_queries': 511, 'average_relevant_docs_per_query': 1.0}, 'vie-spa': {'average_document_length': 936.6746987951807, 'average_query_length': 51.851851851851855, 'num_documents': 166, 'num_queries': 189, 'average_relevant_docs_per_query': 1.0}, 'vie-hin': {'average_document_length': 869.0509554140127, 'average_query_length': 46.44632768361582, 'num_documents': 157, 'num_queries': 177, 'average_relevant_docs_per_query': 1.0}, 'vie-vie': {'average_document_length': 892.7250554323725, 'average_query_length': 48.082191780821915, 'num_documents': 451, 'num_queries': 511, 'average_relevant_docs_per_query': 1.0}, 'vie-zho': {'average_document_length': 960.7349397590361, 'average_query_length': 15.048913043478262, 'num_documents': 166, 'num_queries': 184, 'average_relevant_docs_per_query': 1.0}, 'zho-ara': {'average_document_length': 238.75155279503105, 'average_query_length': 44.34574468085106, 'num_documents': 161, 'num_queries': 188, 'average_relevant_docs_per_query': 1.0}, 'zho-deu': {'average_document_length': 257.109756097561, 'average_query_length': 53.84736842105263, 'num_documents': 164, 'num_queries': 190, 'average_relevant_docs_per_query': 1.0}, 'zho-eng': {'average_document_length': 246.65237020316027, 'average_query_length': 50.15079365079365, 'num_documents': 443, 'num_queries': 504, 'average_relevant_docs_per_query': 1.0}, 'zho-spa': {'average_document_length': 249.6081081081081, 'average_query_length': 52.857142857142854, 'num_documents': 148, 'num_queries': 161, 'average_relevant_docs_per_query': 1.0}, 'zho-hin': {'average_document_length': 238.5521472392638, 'average_query_length': 52.05291005291005, 'num_documents': 163, 'num_queries': 189, 'average_relevant_docs_per_query': 1.0}, 'zho-vie': {'average_document_length': 268.32142857142856, 'average_query_length': 49.33695652173913, 'num_documents': 168, 'num_queries': 184, 'average_relevant_docs_per_query': 1.0}, 'zho-zho': {'average_document_length': 246.65237020316027, 'average_query_length': 15.39484126984127, 'num_documents': 443, 'num_queries': 504, 'average_relevant_docs_per_query': 1.0}}, 'test': {'ara-ara': {'average_document_length': 698.5714593198451, 'average_query_length': 41.26176636039752, 'num_documents': 4646, 'num_queries': 5333, 'average_relevant_docs_per_query': 1.000375023438965}, 'ara-deu': {'average_document_length': 592.5728542914171, 'average_query_length': 51.27730582524272, 'num_documents': 1503, 'num_queries': 1648, 'average_relevant_docs_per_query': 1.0006067961165048}, 'ara-eng': {'average_document_length': 698.5714593198451, 'average_query_length': 48.556451612903224, 'num_documents': 4646, 'num_queries': 5332, 'average_relevant_docs_per_query': 1.000562640660165}, 'ara-spa': {'average_document_length': 713.4833239118146, 'average_query_length': 51.406471183013146, 'num_documents': 1769, 'num_queries': 1978, 'average_relevant_docs_per_query': 1.0}, 'ara-hin': {'average_document_length': 702.1388888888889, 'average_query_length': 48.71818678317859, 'num_documents': 1512, 'num_queries': 1831, 'average_relevant_docs_per_query': 1.0}, 'ara-vie': {'average_document_length': 745.4528096017458, 'average_query_length': 48.815828041035665, 'num_documents': 1833, 'num_queries': 2047, 'average_relevant_docs_per_query': 1.0}, 'ara-zho': {'average_document_length': 774.4593639575971, 'average_query_length': 14.985355648535565, 'num_documents': 1698, 'num_queries': 1912, 'average_relevant_docs_per_query': 1.0}, 'deu-ara': {'average_document_length': 719.6800267201069, 'average_query_length': 39.54578532443905, 'num_documents': 1497, 'num_queries': 1649, 'average_relevant_docs_per_query': 1.0}, 'deu-deu': {'average_document_length': 725.5304712558599, 'average_query_length': 51.610680257035234, 'num_documents': 4053, 'num_queries': 4513, 'average_relevant_docs_per_query': 1.0008863283846665}, 'deu-eng': {'average_document_length': 725.5304712558599, 'average_query_length': 47.07777531575449, 'num_documents': 4053, 'num_queries': 4513, 'average_relevant_docs_per_query': 1.0008863283846665}, 'deu-spa': {'average_document_length': 740.5414052697616, 'average_query_length': 50.098591549295776, 'num_documents': 1594, 'num_queries': 1775, 'average_relevant_docs_per_query': 1.0005633802816902}, 'deu-hin': {'average_document_length': 674.3714063714064, 'average_query_length': 45.146153846153844, 'num_documents': 1287, 'num_queries': 1430, 'average_relevant_docs_per_query': 1.0}, 'deu-vie': {'average_document_length': 760.1198945981555, 'average_query_length': 46.64358208955224, 'num_documents': 1518, 'num_queries': 1675, 'average_relevant_docs_per_query': 1.0}, 'deu-zho': {'average_document_length': 771.3367697594501, 'average_query_length': 14.942592592592593, 'num_documents': 1455, 'num_queries': 1620, 'average_relevant_docs_per_query': 1.0006172839506173}, 'eng-ara': {'average_document_length': 1008.3584455058619, 'average_query_length': 41.26176636039752, 'num_documents': 4606, 'num_queries': 5333, 'average_relevant_docs_per_query': 1.000375023438965}, 'eng-deu': {'average_document_length': 910.3226686507936, 'average_query_length': 51.610680257035234, 'num_documents': 4032, 'num_queries': 4513, 'average_relevant_docs_per_query': 1.0008863283846665}, 'eng-eng': {'average_document_length': 983.0993344090359, 'average_query_length': 47.960714902434816, 'num_documents': 9916, 'num_queries': 11582, 'average_relevant_docs_per_query': 1.000690726990157}, 'eng-spa': {'average_document_length': 967.4622376109068, 'average_query_length': 50.923252713768804, 'num_documents': 4621, 'num_queries': 5251, 'average_relevant_docs_per_query': 1.000380879832413}, 'eng-hin': {'average_document_length': 986.0465631929046, 'average_query_length': 47.328315703824245, 'num_documents': 4059, 'num_queries': 4916, 'average_relevant_docs_per_query': 1.000406834825061}, 'eng-vie': {'average_document_length': 1048.6062197940744, 'average_query_length': 48.094085532302095, 'num_documents': 4759, 'num_queries': 5495, 'average_relevant_docs_per_query': 1.0}, 'eng-zho': {'average_document_length': 1063.8536257833482, 'average_query_length': 15.019080996884735, 'num_documents': 4468, 'num_queries': 5136, 'average_relevant_docs_per_query': 1.0001947040498442}, 'spa-ara': {'average_document_length': 645.5182320441988, 'average_query_length': 40.78412537917088, 'num_documents': 1810, 'num_queries': 1978, 'average_relevant_docs_per_query': 1.0}, 'spa-deu': {'average_document_length': 586.6057810578105, 'average_query_length': 51.870913190529876, 'num_documents': 1626, 'num_queries': 1774, 'average_relevant_docs_per_query': 1.0011273957158964}, 'spa-eng': {'average_document_length': 630.6735979836169, 'average_query_length': 47.827907862173994, 'num_documents': 4761, 'num_queries': 5253, 'average_relevant_docs_per_query': 1.0}, 'spa-spa': {'average_document_length': 630.6735979836169, 'average_query_length': 50.923252713768804, 'num_documents': 4761, 'num_queries': 5251, 'average_relevant_docs_per_query': 1.000380879832413}, 'spa-hin': {'average_document_length': 613.3478260869565, 'average_query_length': 46.36680208937899, 'num_documents': 1518, 'num_queries': 1723, 'average_relevant_docs_per_query': 1.0}, 'spa-vie': {'average_document_length': 659.6179295624333, 'average_query_length': 48.1595639246779, 'num_documents': 1874, 'num_queries': 2018, 'average_relevant_docs_per_query': 1.0}, 'spa-zho': {'average_document_length': 668.6646171045277, 'average_query_length': 15.115562403697997, 'num_documents': 1789, 'num_queries': 1947, 'average_relevant_docs_per_query': 1.0}, 'hin-ara': {'average_document_length': 765.0352862849534, 'average_query_length': 42.04642271982523, 'num_documents': 1502, 'num_queries': 1831, 'average_relevant_docs_per_query': 1.0}, 'hin-deu': {'average_document_length': 719.676862745098, 'average_query_length': 51.002799160251925, 'num_documents': 1275, 'num_queries': 1429, 'average_relevant_docs_per_query': 1.000699790062981}, 'hin-eng': {'average_document_length': 760.9956086850451, 'average_query_length': 47.91232709519935, 'num_documents': 4099, 'num_queries': 4916, 'average_relevant_docs_per_query': 1.000406834825061}, 'hin-spa': {'average_document_length': 753.5010281014394, 'average_query_length': 50.46689895470383, 'num_documents': 1459, 'num_queries': 1722, 'average_relevant_docs_per_query': 1.0005807200929153}, 'hin-hin': {'average_document_length': 760.9956086850451, 'average_query_length': 47.328315703824245, 'num_documents': 4099, 'num_queries': 4916, 'average_relevant_docs_per_query': 1.000406834825061}, 'hin-vie': {'average_document_length': 789.9253822629969, 'average_query_length': 48.21160760143811, 'num_documents': 1635, 'num_queries': 1947, 'average_relevant_docs_per_query': 1.0}, 'hin-zho': {'average_document_length': 834.2057448229793, 'average_query_length': 15.101301641199774, 'num_documents': 1497, 'num_queries': 1767, 'average_relevant_docs_per_query': 1.0}, 'vie-ara': {'average_document_length': 992.2129527991218, 'average_query_length': 41.82462139716659, 'num_documents': 1822, 'num_queries': 2047, 'average_relevant_docs_per_query': 1.0}, 'vie-deu': {'average_document_length': 861.0610079575597, 'average_query_length': 51.58721624850657, 'num_documents': 1508, 'num_queries': 1674, 'average_relevant_docs_per_query': 1.0005973715651135}, 'vie-eng': {'average_document_length': 913.8633993743483, 'average_query_length': 48.11086837793555, 'num_documents': 4795, 'num_queries': 5493, 'average_relevant_docs_per_query': 1.0003640997633352}, 'vie-spa': {'average_document_length': 940.0322580645161, 'average_query_length': 51.13386217154189, 'num_documents': 1829, 'num_queries': 2017, 'average_relevant_docs_per_query': 1.0004957858205255}, 'vie-hin': {'average_document_length': 838.1713414634146, 'average_query_length': 47.484334874165384, 'num_documents': 1640, 'num_queries': 1947, 'average_relevant_docs_per_query': 1.0}, 'vie-vie': {'average_document_length': 913.8633993743483, 'average_query_length': 48.094085532302095, 'num_documents': 4795, 'num_queries': 5495, 'average_relevant_docs_per_query': 1.0}, 'vie-zho': {'average_document_length': 999.064534883721, 'average_query_length': 15.045805455481215, 'num_documents': 1720, 'num_queries': 1943, 'average_relevant_docs_per_query': 1.0}, 'zho-ara': {'average_document_length': 253.71303841676368, 'average_query_length': 42.04866562009419, 'num_documents': 1718, 'num_queries': 1911, 'average_relevant_docs_per_query': 1.000523286237572}, 'zho-deu': {'average_document_length': 241.84631147540983, 'average_query_length': 52.25107958050586, 'num_documents': 1464, 'num_queries': 1621, 'average_relevant_docs_per_query': 1.0}, 'zho-eng': {'average_document_length': 247.55609326880776, 'average_query_length': 48.64167478091529, 'num_documents': 4546, 'num_queries': 5135, 'average_relevant_docs_per_query': 1.0003894839337877}, 'zho-spa': {'average_document_length': 254.44552196235026, 'average_query_length': 51.90446841294299, 'num_documents': 1753, 'num_queries': 1947, 'average_relevant_docs_per_query': 1.0}, 'zho-hin': {'average_document_length': 229.60590163934427, 'average_query_length': 49.06625141562854, 'num_documents': 1525, 'num_queries': 1766, 'average_relevant_docs_per_query': 1.0005662514156286}, 'zho-vie': {'average_document_length': 266.1140401146132, 'average_query_length': 49.27328872876994, 'num_documents': 1745, 'num_queries': 1943, 'average_relevant_docs_per_query': 1.0}, 'zho-zho': {'average_document_length': 247.55609326880776, 'average_query_length': 15.019080996884735, 'num_documents': 4546, 'num_queries': 5136, 'average_relevant_docs_per_query': 1.0001947040498442}}} |
MLQuestions |
['eng'] |
Retrieval |
s2p |
[Encyclopaedic, Academic, Written] |
{'dev': 1500, 'test': 1500} |
{'dev': {'average_document_length': 258.8772727272727, 'average_query_length': 45.05533333333333, 'num_documents': 11000, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'test': {'average_document_length': 258.8772727272727, 'average_query_length': 45.75333333333333, 'num_documents': 11000, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}} |
MLSUMClusteringP2P.v2 (Scialom et al., 2020) |
['deu', 'fra', 'rus', 'spa'] |
Clustering |
p2p |
[News, Written] |
{'validation': 2048, 'test': 2048} |
{'validation': 4613, 'test': 4810} |
MLSUMClusteringS2S.v2 (Scialom et al., 2020) |
['deu', 'fra', 'rus', 'spa'] |
Clustering |
s2s |
[News, Written] |
{'validation': 750, 'test': 756} |
{'validation': 4613, 'test': 4810} |
MMarcoReranking (Luiz Henrique Bonifacio, 2021) |
['cmn'] |
Reranking |
s2s |
|
None |
None |
MMarcoRetrieval (Shitao Xiao, 2024) |
['cmn'] |
Retrieval |
s2p |
|
None |
{'dev': {'average_document_length': 114.41787048392986, 'average_query_length': 10.51131805157593, 'num_documents': 106813, 'num_queries': 6980, 'average_relevant_docs_per_query': 1.0654727793696275}} |
MSMARCO (Tri Nguyen and Mir Rosenberg and Xia Song and Jianfeng Gao and Saurabh Tiwary and Rangan Majumder and Li Deng, 2016) |
['eng'] |
Retrieval |
s2p |
|
None |
{'train': {'average_document_length': 335.79716603691344, 'average_query_length': 33.21898281898998, 'num_documents': 8841823, 'num_queries': 502939, 'average_relevant_docs_per_query': 1.0592755781516248}, 'dev': {'average_document_length': 335.79716603691344, 'average_query_length': 33.2621776504298, 'num_documents': 8841823, 'num_queries': 6980, 'average_relevant_docs_per_query': 1.0654727793696275}, 'test': {'average_document_length': 335.79716603691344, 'average_query_length': 32.74418604651163, 'num_documents': 8841823, 'num_queries': 43, 'average_relevant_docs_per_query': 95.3953488372093}} |
MSMARCO-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
[Web, Written] |
None |
{'test': {'average_document_length': 349.3574939240471, 'average_query_length': 33.02325581395349, 'num_documents': 8841823, 'num_queries': 43, 'average_relevant_docs_per_query': 95.3953488372093}} |
MSMARCOv2 (Tri Nguyen and Mir Rosenberg and Xia Song and Jianfeng Gao and Saurabh Tiwary and Rangan Majumder and Li Deng, 2016) |
['eng'] |
Retrieval |
s2p |
|
None |
None |
MTOPDomainClassification |
['deu', 'eng', 'fra', 'hin', 'spa', 'tha'] |
Classification |
s2s |
[Spoken, Spoken] |
{'validation': 2235, 'test': 4386} |
{'validation': {'num_samples': 10837, 'average_text_length': 39.85374181046415, 'unique_labels': 11, 'labels': {'1': {'count': 1688}, '10': {'count': 754}, '7': {'count': 849}, '3': {'count': 681}, '6': {'count': 985}, '2': {'count': 647}, '9': {'count': 872}, '0': {'count': 833}, '5': {'count': 1182}, '4': {'count': 982}, '8': {'count': 1364}}, 'hf_subset_descriptive_stats': {}, 'en': {'num_samples': 2235, 'average_text_length': 36.53825503355705, 'unique_labels': 11, 'labels': {'1': {'count': 329}, '10': {'count': 185}, '7': {'count': 183}, '3': {'count': 134}, '6': {'count': 186}, '2': {'count': 123}, '9': {'count': 196}, '0': {'count': 176}, '5': {'count': 228}, '4': {'count': 207}, '8': {'count': 288}}}, 'de': {'num_samples': 1815, 'average_text_length': 42.824793388429754, 'unique_labels': 11, 'labels': {'0': {'count': 99}, '1': {'count': 303}, '2': {'count': 104}, '3': {'count': 122}, '6': {'count': 165}, '4': {'count': 157}, '7': {'count': 141}, '5': {'count': 203}, '8': {'count': 220}, '10': {'count': 133}, '9': {'count': 168}}}, 'es': {'num_samples': 1527, 'average_text_length': 44.34839554682384, 'unique_labels': 11, 'labels': {'1': {'count': 197}, '6': {'count': 166}, '4': {'count': 138}, '10': {'count': 103}, '3': {'count': 104}, '5': {'count': 190}, '2': {'count': 115}, '8': {'count': 212}, '7': {'count': 82}, '9': {'count': 76}, '0': {'count': 144}}}, 'fr': {'num_samples': 1577, 'average_text_length': 43.12492073557387, 'unique_labels': 11, 'labels': {'0': {'count': 125}, '1': {'count': 278}, '2': {'count': 92}, '3': {'count': 89}, '4': {'count': 137}, '7': {'count': 145}, '6': {'count': 138}, '5': {'count': 168}, '8': {'count': 203}, '9': {'count': 124}, '10': {'count': 78}}}, 'hi': {'num_samples': 2012, 'average_text_length': 39.139662027833005, 'unique_labels': 11, 'labels': {'0': {'count': 161}, '1': {'count': 304}, '3': {'count': 126}, '4': {'count': 193}, '2': {'count': 109}, '10': {'count': 154}, '5': {'count': 208}, '6': {'count': 167}, '7': {'count': 172}, '8': {'count': 235}, '9': {'count': 183}}}, 'th': {'num_samples': 1671, 'average_text_length': 34.726511071214844, 'unique_labels': 11, 'labels': {'0': {'count': 128}, '1': {'count': 277}, '2': {'count': 104}, '3': {'count': 106}, '4': {'count': 150}, '5': {'count': 185}, '6': {'count': 163}, '7': {'count': 126}, '8': {'count': 206}, '9': {'count': 125}, '10': {'count': 101}}}}, 'test': {'num_samples': 19680, 'average_text_length': 39.71443089430894, 'unique_labels': 11, 'labels': {'2': {'count': 977}, '5': {'count': 2372}, '6': {'count': 2014}, '8': {'count': 2572}, '9': {'count': 1317}, '1': {'count': 3065}, '10': {'count': 1330}, '3': {'count': 1351}, '0': {'count': 1459}, '7': {'count': 1535}, '4': {'count': 1688}}, 'hf_subset_descriptive_stats': {}, 'en': {'num_samples': 4386, 'average_text_length': 36.79343365253078, 'unique_labels': 11, 'labels': {'2': {'count': 197}, '5': {'count': 487}, '6': {'count': 418}, '8': {'count': 613}, '9': {'count': 346}, '1': {'count': 613}, '10': {'count': 358}, '3': {'count': 290}, '0': {'count': 341}, '7': {'count': 354}, '4': {'count': 369}}}, 'de': {'num_samples': 3549, 'average_text_length': 42.67258382642998, 'unique_labels': 11, 'labels': {'0': {'count': 193}, '10': {'count': 264}, '1': {'count': 553}, '2': {'count': 163}, '3': {'count': 256}, '5': {'count': 439}, '4': {'count': 306}, '6': {'count': 353}, '7': {'count': 279}, '8': {'count': 452}, '9': {'count': 291}}}, 'es': {'num_samples': 2998, 'average_text_length': 43.552034689793196, 'unique_labels': 11, 'labels': {'1': {'count': 401}, '6': {'count': 352}, '4': {'count': 246}, '10': {'count': 206}, '3': {'count': 231}, '5': {'count': 404}, '2': {'count': 177}, '8': {'count': 435}, '7': {'count': 156}, '9': {'count': 126}, '0': {'count': 264}}}, 'fr': {'num_samples': 3193, 'average_text_length': 43.854995302223614, 'unique_labels': 11, 'labels': {'0': {'count': 253}, '1': {'count': 551}, '2': {'count': 159}, '3': {'count': 190}, '4': {'count': 280}, '6': {'count': 330}, '5': {'count': 356}, '7': {'count': 272}, '8': {'count': 462}, '10': {'count': 159}, '9': {'count': 181}}}, 'hi': {'num_samples': 2789, 'average_text_length': 37.395123700250984, 'unique_labels': 11, 'labels': {'0': {'count': 208}, '1': {'count': 470}, '5': {'count': 335}, '3': {'count': 195}, '4': {'count': 242}, '2': {'count': 132}, '6': {'count': 267}, '7': {'count': 262}, '8': {'count': 265}, '10': {'count': 186}, '9': {'count': 227}}}, 'th': {'num_samples': 2765, 'average_text_length': 33.94792043399638, 'unique_labels': 11, 'labels': {'0': {'count': 200}, '1': {'count': 477}, '2': {'count': 149}, '3': {'count': 189}, '4': {'count': 245}, '6': {'count': 294}, '5': {'count': 351}, '7': {'count': 212}, '8': {'count': 345}, '9': {'count': 146}, '10': {'count': 157}}}}, 'train': {'num_samples': 73928, 'average_text_length': 39.73095444215994, 'unique_labels': 11, 'labels': {'0': {'count': 5262}, '5': {'count': 8334}, '6': {'count': 6961}, '9': {'count': 5313}, '1': {'count': 11107}, '8': {'count': 9698}, '10': {'count': 5084}, '2': {'count': 4770}, '4': {'count': 6644}, '3': {'count': 5191}, '7': {'count': 5564}}, 'hf_subset_descriptive_stats': {}, 'en': {'num_samples': 15667, 'average_text_length': 36.57222186761984, 'unique_labels': 11, 'labels': {'0': {'count': 1165}, '5': {'count': 1657}, '6': {'count': 1402}, '9': {'count': 1303}, '1': {'count': 2187}, '8': {'count': 2157}, '10': {'count': 1219}, '2': {'count': 929}, '4': {'count': 1353}, '3': {'count': 1064}, '7': {'count': 1231}}}, 'de': {'num_samples': 13424, 'average_text_length': 43.226013110846246, 'unique_labels': 11, 'labels': {'0': {'count': 761}, '10': {'count': 996}, '4': {'count': 1185}, '1': {'count': 2016}, '7': {'count': 1029}, '5': {'count': 1484}, '2': {'count': 814}, '3': {'count': 980}, '6': {'count': 1265}, '8': {'count': 1767}, '9': {'count': 1127}}}, 'es': {'num_samples': 10934, 'average_text_length': 43.60691421254801, 'unique_labels': 11, 'labels': {'1': {'count': 1459}, '6': {'count': 1188}, '4': {'count': 928}, '10': {'count': 743}, '3': {'count': 830}, '5': {'count': 1396}, '2': {'count': 823}, '8': {'count': 1555}, '7': {'count': 525}, '9': {'count': 560}, '0': {'count': 927}}}, 'fr': {'num_samples': 11814, 'average_text_length': 43.594802776367025, 'unique_labels': 11, 'labels': {'0': {'count': 861}, '10': {'count': 668}, '1': {'count': 1968}, '7': {'count': 975}, '5': {'count': 1261}, '2': {'count': 799}, '3': {'count': 734}, '4': {'count': 1082}, '6': {'count': 1113}, '8': {'count': 1656}, '9': {'count': 697}}}, 'hi': {'num_samples': 11330, 'average_text_length': 37.592144748455425, 'unique_labels': 11, 'labels': {'0': {'count': 794}, '1': {'count': 1741}, '7': {'count': 974}, '2': {'count': 670}, '3': {'count': 831}, '5': {'count': 1272}, '6': {'count': 940}, '4': {'count': 1073}, '10': {'count': 786}, '8': {'count': 1281}, '9': {'count': 968}}}, 'th': {'num_samples': 10759, 'average_text_length': 34.04043126684636, 'unique_labels': 11, 'labels': {'0': {'count': 754}, '10': {'count': 672}, '1': {'count': 1736}, '7': {'count': 830}, '2': {'count': 735}, '3': {'count': 752}, '5': {'count': 1264}, '6': {'count': 1053}, '4': {'count': 1023}, '8': {'count': 1282}, '9': {'count': 658}}}}} |
MTOPIntentClassification |
['deu', 'eng', 'fra', 'hin', 'spa', 'tha'] |
Classification |
s2s |
[Spoken, Spoken] |
{'validation': 2235, 'test': 4386} |
{'validation': 36.5, 'test': 36.8} |
MacedonianTweetSentimentClassification |
['mkd'] |
Classification |
s2s |
[Social, Written] |
{'test': 1139} |
{'test': 67.6} |
MalayalamNewsClassification (Anoop Kunchukuttan, 2020) |
['mal'] |
Classification |
s2s |
[News, Written] |
{'train': 5036, 'test': 1260} |
{'train': 79.48, 'test': 80.44} |
MalteseNewsClassification |
['mlt'] |
MultilabelClassification |
s2s |
[Constructed, Written] |
{'train': 10784, 'test': 2297} |
{'train': 1595.63, 'test': 1752.1} |
MarathiNewsClassification (Anoop Kunchukuttan, 2020) |
['mar'] |
Classification |
s2s |
[News, Written] |
{'test': 2048} |
{'test': 52.37} |
MasakhaNEWSClassification (David Ifeoluwa Adelani, 2023) |
['amh', 'eng', 'fra', 'hau', 'ibo', 'lin', 'lug', 'orm', 'pcm', 'run', 'sna', 'som', 'swa', 'tir', 'xho', 'yor'] |
Classification |
s2s |
[News, Written] |
{'test': 422} |
{'test': 5116.6} |
MasakhaNEWSClusteringP2P (David Ifeoluwa Adelani, 2023) |
['amh', 'eng', 'fra', 'hau', 'ibo', 'lin', 'lug', 'orm', 'pcm', 'run', 'sna', 'som', 'swa', 'tir', 'xho', 'yor'] |
Clustering |
p2p |
[News, Written, Non-fiction] |
None |
None |
MasakhaNEWSClusteringS2S (David Ifeoluwa Adelani, 2023) |
['amh', 'eng', 'fra', 'hau', 'ibo', 'lin', 'lug', 'orm', 'pcm', 'run', 'sna', 'som', 'swa', 'tir', 'xho', 'yor'] |
Clustering |
s2s |
|
None |
None |
MassiveIntentClassification (Jack FitzGerald, 2022) |
['afr', 'amh', 'ara', 'aze', 'ben', 'cmo', 'cym', 'dan', 'deu', 'ell', 'eng', 'fas', 'fin', 'fra', 'heb', 'hin', 'hun', 'hye', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kan', 'kat', 'khm', 'kor', 'lav', 'mal', 'mon', 'msa', 'mya', 'nld', 'nob', 'pol', 'por', 'ron', 'rus', 'slv', 'spa', 'sqi', 'swa', 'swe', 'tam', 'tel', 'tgl', 'tha', 'tur', 'urd', 'vie'] |
Classification |
s2s |
[Spoken] |
{'validation': 2033, 'test': 2974} |
{'validation': 34.8, 'test': 34.6} |
MassiveScenarioClassification (Jack FitzGerald, 2022) |
['afr', 'amh', 'ara', 'aze', 'ben', 'cmo', 'cym', 'dan', 'deu', 'ell', 'eng', 'fas', 'fin', 'fra', 'heb', 'hin', 'hun', 'hye', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kan', 'kat', 'khm', 'kor', 'lav', 'mal', 'mon', 'msa', 'mya', 'nld', 'nob', 'pol', 'por', 'ron', 'rus', 'slv', 'spa', 'sqi', 'swa', 'swe', 'tam', 'tel', 'tgl', 'tha', 'tur', 'urd', 'vie'] |
Classification |
s2s |
[Spoken] |
{'validation': 2033, 'test': 2974} |
{'validation': 34.8, 'test': 34.6} |
MedicalQARetrieval (Asma et al., 2019) |
['eng'] |
Retrieval |
s2s |
[Medical, Written] |
{'test': 2048} |
{'test': {'average_document_length': 1153.482421875, 'average_query_length': 52.4794921875, 'num_documents': 2048, 'num_queries': 2048, 'average_relevant_docs_per_query': 1.0}} |
MedicalRetrieval |
['cmn'] |
Retrieval |
s2p |
|
None |
{'dev': {'average_document_length': 122.04231725066585, 'average_query_length': 17.938, 'num_documents': 100999, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}} |
MedrxivClusteringP2P.v2 |
['eng'] |
Clustering |
p2p |
[Academic, Medical, Written] |
{'test': 1500} |
{'test': 1984.7} |
MedrxivClusteringS2S.v2 |
['eng'] |
Clustering |
s2s |
[Academic, Medical, Written] |
{'test': 1500} |
{'test': 114.9} |
MewsC16JaClustering |
['jpn'] |
Clustering |
s2s |
[News, Written] |
{'test': 992} |
{'test': 95} |
MindSmallReranking |
['eng'] |
Reranking |
s2s |
[News, Written] |
{'test': 107968} |
{'test': 70.9} |
MintakaRetrieval |
['ara', 'deu', 'fra', 'hin', 'ita', 'jpn', 'por', 'spa'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
None |
{'test': {'ar': {'average_document_length': 12.736418511066399, 'average_query_length': 55.275533363595095, 'num_documents': 1491, 'num_queries': 2203, 'average_relevant_docs_per_query': 1.0}, 'de': {'average_document_length': 14.40060422960725, 'average_query_length': 65.41322662173546, 'num_documents': 1655, 'num_queries': 2374, 'average_relevant_docs_per_query': 1.0}, 'es': {'average_document_length': 14.291789722386296, 'average_query_length': 64.88325082508251, 'num_documents': 1693, 'num_queries': 2424, 'average_relevant_docs_per_query': 1.0}, 'fr': {'average_document_length': 14.407234539089849, 'average_query_length': 68.88452088452088, 'num_documents': 1714, 'num_queries': 2442, 'average_relevant_docs_per_query': 1.0}, 'hi': {'average_document_length': 12.71038961038961, 'average_query_length': 58.404637247569184, 'num_documents': 770, 'num_queries': 1337, 'average_relevant_docs_per_query': 1.0}, 'it': {'average_document_length': 14.365985576923077, 'average_query_length': 64.39707724425887, 'num_documents': 1664, 'num_queries': 2395, 'average_relevant_docs_per_query': 1.0004175365344468}, 'ja': {'average_document_length': 9.167713567839195, 'average_query_length': 29.961937716262977, 'num_documents': 1592, 'num_queries': 2312, 'average_relevant_docs_per_query': 1.0}, 'pt': {'average_document_length': 14.244471744471744, 'average_query_length': 60.42225998300765, 'num_documents': 1628, 'num_queries': 2354, 'average_relevant_docs_per_query': 1.0004248088360237}}} |
Moroco (Andrei M. Butnaru, 2019) |
['ron'] |
Classification |
s2s |
[News, Written] |
{'test': 2048} |
{'test': 1710.94} |
MovieReviewSentimentClassification (Théophile Blard, 2020) |
['fra'] |
Classification |
s2s |
[Reviews, Written] |
{'validation': 1024, 'test': 1024} |
{'validation': 550.3, 'test': 558.1} |
MultiEURLEXMultilabelClassification (Chalkidis et al., 2021) |
['bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'est', 'fin', 'fra', 'hrv', 'hun', 'ita', 'lav', 'lit', 'mlt', 'nld', 'pol', 'por', 'ron', 'slk', 'slv', 'spa', 'swe'] |
MultilabelClassification |
p2p |
[Legal, Government, Written] |
{'test': 5000} |
{'test': {'average_text_length': 12014.408930434782, 'average_label_per_text': 3.5938, 'num_samples': 115000, 'unique_labels': 21, 'labels': {'18': {'count': 50784}, '15': {'count': 30981}, '5': {'count': 24978}, '6': {'count': 45080}, '3': {'count': 63687}, '17': {'count': 37743}, '1': {'count': 15019}, '20': {'count': 14030}, '0': {'count': 17802}, '2': {'count': 22402}, '19': {'count': 10212}, '9': {'count': 3772}, '4': {'count': 9062}, '10': {'count': 7705}, '11': {'count': 12213}, '7': {'count': 14306}, '12': {'count': 11799}, '8': {'count': 13800}, '13': {'count': 2346}, '14': {'count': 4255}, '16': {'count': 1311}}, 'hf_subset_descriptive_stats': {'en': {'average_text_length': 11720.2926, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'de': {'average_text_length': 12865.4162, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'fr': {'average_text_length': 13081.1098, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'it': {'average_text_length': 12763.4786, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'es': {'average_text_length': 13080.29, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'pl': {'average_text_length': 12282.5926, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'ro': {'average_text_length': 12836.9322, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'nl': {'average_text_length': 12857.9742, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'el': {'average_text_length': 12998.143, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'hu': {'average_text_length': 12424.641, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'pt': {'average_text_length': 12482.4616, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'cs': {'average_text_length': 10783.4676, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'sv': {'average_text_length': 11612.4774, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'bg': {'average_text_length': 12235.4268, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'da': {'average_text_length': 11773.958, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'fi': {'average_text_length': 12087.6862, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'sk': {'average_text_length': 11130.814, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'lt': {'average_text_length': 11245.3566, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'hr': {'average_text_length': 11022.142, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'sl': {'average_text_length': 10620.0594, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'et': {'average_text_length': 10898.4312, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'lv': {'average_text_length': 10938.5102, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}, 'mt': {'average_text_length': 12589.7442, 'average_label_per_text': 3.5938, 'num_samples': 5000, 'unique_labels': 21, 'labels': {'18': {'count': 2208}, '15': {'count': 1347}, '5': {'count': 1086}, '6': {'count': 1960}, '3': {'count': 2769}, '17': {'count': 1641}, '1': {'count': 653}, '20': {'count': 610}, '0': {'count': 774}, '2': {'count': 974}, '19': {'count': 444}, '9': {'count': 164}, '4': {'count': 394}, '10': {'count': 335}, '11': {'count': 531}, '7': {'count': 622}, '12': {'count': 513}, '8': {'count': 600}, '13': {'count': 102}, '14': {'count': 185}, '16': {'count': 57}}}}}} |
MultiHateClassification |
['ara', 'cmn', 'deu', 'eng', 'fra', 'hin', 'ita', 'nld', 'pol', 'por', 'spa'] |
Classification |
s2s |
[Constructed, Written] |
{'test': 10000} |
{'test': 45.9} |
MultiLongDocRetrieval (Jianlv Chen, 2024) |
['ara', 'cmn', 'deu', 'eng', 'fra', 'hin', 'ita', 'jpn', 'kor', 'por', 'rus', 'spa', 'tha'] |
Retrieval |
s2p |
[Encyclopaedic, Written, Web, Non-fiction, Fiction] |
None |
{'dev': {'ar': {'average_document_length': 29234.48153016958, 'average_query_length': 69.27, 'num_documents': 7607, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'de': {'average_document_length': 33771.2111, 'average_query_length': 153.63, 'num_documents': 10000, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'en': {'average_document_length': 13332.76764, 'average_query_length': 81.22, 'num_documents': 200000, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'es': {'average_document_length': 36567.1736990891, 'average_query_length': 123.11, 'num_documents': 9551, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'fr': {'average_document_length': 36009.4934, 'average_query_length': 142.165, 'num_documents': 10000, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'hi': {'average_document_length': 18688.50788229112, 'average_query_length': 77.995, 'num_documents': 3806, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'it': {'average_document_length': 36633.9969, 'average_query_length': 99.615, 'num_documents': 10000, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'ja': {'average_document_length': 14480.7508, 'average_query_length': 61.625, 'num_documents': 10000, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'ko': {'average_document_length': 13813.441224093263, 'average_query_length': 58.845, 'num_documents': 6176, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'pt': {'average_document_length': 32127.576952351956, 'average_query_length': 122.275, 'num_documents': 6569, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'ru': {'average_document_length': 35934.8756, 'average_query_length': 87.875, 'num_documents': 10000, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'th': {'average_document_length': 25993.2696, 'average_query_length': 107.81, 'num_documents': 10000, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'zh': {'average_document_length': 6039.059725, 'average_query_length': 26.79, 'num_documents': 200000, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}}, 'test': {'ar': {'average_document_length': 29234.48153016958, 'average_query_length': 75.77, 'num_documents': 7607, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'de': {'average_document_length': 33771.2111, 'average_query_length': 123.65, 'num_documents': 10000, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'en': {'average_document_length': 13332.76764, 'average_query_length': 81.33, 'num_documents': 200000, 'num_queries': 800, 'average_relevant_docs_per_query': 1.0}, 'es': {'average_document_length': 36567.1736990891, 'average_query_length': 131.985, 'num_documents': 9551, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'fr': {'average_document_length': 36009.4934, 'average_query_length': 149.795, 'num_documents': 10000, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'hi': {'average_document_length': 18688.50788229112, 'average_query_length': 103.76, 'num_documents': 3806, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'it': {'average_document_length': 36633.9969, 'average_query_length': 114.595, 'num_documents': 10000, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'ja': {'average_document_length': 14480.7508, 'average_query_length': 55.73, 'num_documents': 10000, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'ko': {'average_document_length': 13813.441224093263, 'average_query_length': 58.72, 'num_documents': 6176, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'pt': {'average_document_length': 32127.576952351956, 'average_query_length': 113.455, 'num_documents': 6569, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'ru': {'average_document_length': 35934.8756, 'average_query_length': 94.87, 'num_documents': 10000, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'th': {'average_document_length': 25993.2696, 'average_query_length': 97.99, 'num_documents': 10000, 'num_queries': 200, 'average_relevant_docs_per_query': 1.0}, 'zh': {'average_document_length': 6039.059725, 'average_query_length': 24.70875, 'num_documents': 200000, 'num_queries': 800, 'average_relevant_docs_per_query': 1.0}}} |
MultilingualSentiment |
['cmn'] |
Classification |
s2s |
|
None |
None |
MultilingualSentimentClassification |
['ara', 'bam', 'bul', 'cmn', 'cym', 'deu', 'dza', 'ell', 'eng', 'eus', 'fas', 'fin', 'heb', 'hrv', 'ind', 'jpn', 'kor', 'mlt', 'nor', 'pol', 'rus', 'slk', 'spa', 'tha', 'tur', 'uig', 'urd', 'vie', 'zho'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 7000} |
{'test': 56} |
MyanmarNews (A. H. Khine, 2017) |
['mya'] |
Classification |
p2p |
[News, Written] |
{'train': 2048} |
{'train': 174.2} |
NFCorpus (Boteva et al., 2016) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 1589.783925130746, 'average_query_length': 21.764705882352942, 'num_documents': 3633, 'num_queries': 323, 'average_relevant_docs_per_query': 38.18575851393189}} |
NFCorpus-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 1652.1926782273604, 'average_query_length': 24.390092879256965, 'num_documents': 3633, 'num_queries': 323, 'average_relevant_docs_per_query': 38.18575851393189}} |
NLPJournalAbsIntroRetrieval |
['jpn'] |
Retrieval |
s2s |
[Academic, Written] |
{'test': 404} |
{'test': {'average_document_length': 2052.8611111111113, 'average_query_length': 439.2772277227723, 'num_documents': 504, 'num_queries': 404, 'average_relevant_docs_per_query': 1.0}} |
NLPJournalTitleAbsRetrieval |
['jpn'] |
Retrieval |
s2s |
[Academic, Written] |
{'test': 404} |
{'test': {'average_document_length': 441.6746031746032, 'average_query_length': 27.60891089108911, 'num_documents': 504, 'num_queries': 404, 'average_relevant_docs_per_query': 1.0}} |
NLPJournalTitleIntroRetrieval |
['jpn'] |
Retrieval |
s2s |
[Academic, Written] |
{'test': 404} |
{'test': {'average_document_length': 2052.8611111111113, 'average_query_length': 27.60891089108911, 'num_documents': 504, 'num_queries': 404, 'average_relevant_docs_per_query': 1.0}} |
NQ (Tom Kwiatkowski, 2019) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 492.2287851281462, 'average_query_length': 48.17902665121669, 'num_documents': 2681468, 'num_queries': 3452, 'average_relevant_docs_per_query': 1.2169756662804172}} |
NQ-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 502.14302128535564, 'average_query_length': 48.31662804171495, 'num_documents': 2681468, 'num_queries': 3452, 'average_relevant_docs_per_query': 1.2169756662804172}} |
NTREXBitextMining |
['afr', 'amh', 'arb', 'aze', 'bak', 'bel', 'bem', 'ben', 'bod', 'bos', 'bul', 'cat', 'ces', 'ckb', 'cym', 'dan', 'deu', 'div', 'dzo', 'ell', 'eng', 'eus', 'ewe', 'fao', 'fas', 'fij', 'fil', 'fin', 'fra', 'fuc', 'gle', 'glg', 'guj', 'hau', 'heb', 'hin', 'hmn', 'hrv', 'hun', 'hye', 'ibo', 'ind', 'isl', 'ita', 'jpn', 'kan', 'kat', 'kaz', 'khm', 'kin', 'kir', 'kmr', 'kor', 'lao', 'lav', 'lit', 'ltz', 'mal', 'mar', 'mey', 'mkd', 'mlg', 'mlt', 'mon', 'mri', 'msa', 'mya', 'nde', 'nep', 'nld', 'nno', 'nob', 'nso', 'nya', 'orm', 'pan', 'pol', 'por', 'prs', 'pus', 'ron', 'rus', 'shi', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'spa', 'sqi', 'srp', 'ssw', 'swa', 'swe', 'tah', 'tam', 'tat', 'tel', 'tgk', 'tha', 'tir', 'ton', 'tsn', 'tuk', 'tur', 'uig', 'ukr', 'urd', 'uzb', 'ven', 'vie', 'wol', 'xho', 'yor', 'yue', 'zho', 'zul'] |
BitextMining |
s2s |
[News, Written] |
{'test': 3826252} |
{'test': 120} |
NYSJudicialEthicsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 292} |
{'test': 159.45} |
NaijaSenti |
['hau', 'ibo', 'pcm', 'yor'] |
Classification |
s2s |
[Social, Written] |
{'test': 4800} |
{'test': 72.81} |
NarrativeQARetrieval (Tomáš Kočiský, 2017) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 326753.5323943662, 'average_query_length': 47.730889457232166, 'num_documents': 355, 'num_queries': 10557, 'average_relevant_docs_per_query': 1.0}} |
NepaliNewsClassification |
['nep'] |
Classification |
s2s |
[News, Written] |
{'train': 5975, 'test': 1495} |
{'train': 196.61, 'test': 196.017} |
NeuCLIR2022Retrieval (Lawrie et al., 2023) |
['fas', 'rus', 'zho'] |
Retrieval |
s2p |
[News, Written] |
{'fas': 2232130, 'zho': 3179323, 'rus': 4627657} |
{'test': {'fas': {'average_document_length': 2032.093148525817, 'average_query_length': 85.4298245614035, 'num_documents': 2232016, 'num_queries': 114, 'average_relevant_docs_per_query': 12.912280701754385}, 'rus': {'average_document_length': 1757.9129983233004, 'average_query_length': 85.58771929824562, 'num_documents': 4627543, 'num_queries': 114, 'average_relevant_docs_per_query': 16.57017543859649}, 'zho': {'average_document_length': 743.1426659901881, 'average_query_length': 24.17543859649123, 'num_documents': 3179209, 'num_queries': 114, 'average_relevant_docs_per_query': 18.710526315789473}}} |
NeuCLIR2023Retrieval (Dawn Lawrie, 2024) |
['fas', 'rus', 'zho'] |
Retrieval |
s2p |
[News, Written] |
{'fas': 2232092, 'zho': 3179285, 'rus': 4627619} |
{'test': {'fas': {'average_document_length': 2032.093148525817, 'average_query_length': 65.48684210526316, 'num_documents': 2232016, 'num_queries': 76, 'average_relevant_docs_per_query': 66.28947368421052}, 'rus': {'average_document_length': 1757.9129983233004, 'average_query_length': 74.4342105263158, 'num_documents': 4627543, 'num_queries': 76, 'average_relevant_docs_per_query': 62.223684210526315}, 'zho': {'average_document_length': 743.1426659901881, 'average_query_length': 22.210526315789473, 'num_documents': 3179209, 'num_queries': 76, 'average_relevant_docs_per_query': 53.68421052631579}}} |
News21InstructionRetrieval (Orion Weller, 2024) |
['eng'] |
InstructionRetrieval |
s2p |
[News, Written] |
{'eng': 61906} |
{'eng': 2983.724665391969} |
NewsClassification (Zhang et al., 2015) |
['eng'] |
Classification |
s2s |
[News, Written] |
{'test': 7600} |
{'test': 235.29} |
NoRecClassification |
['nob'] |
Classification |
s2s |
[Written, Reviews] |
{'test': 2050} |
{'test': 82} |
NollySentiBitextMining (Shode et al., 2023) |
['eng', 'hau', 'ibo', 'pcm', 'yor'] |
BitextMining |
s2s |
[Social, Reviews, Written] |
{'train': 1640} |
{'train': 135.91} |
NorQuadRetrieval |
['nob'] |
Retrieval |
p2p |
[Encyclopaedic, Non-fiction, Written] |
{'test': 2602} |
{'test': {'average_document_length': 214.5114503816794, 'average_query_length': 47.896484375, 'num_documents': 1048, 'num_queries': 1024, 'average_relevant_docs_per_query': 2.0}} |
NordicLangClassification |
['dan', 'fao', 'isl', 'nno', 'nob', 'swe'] |
Classification |
s2s |
[Encyclopaedic] |
{'test': 3000} |
{'test': 78.2} |
NorwegianCourtsBitextMining (Tiedemann et al., 2020) |
['nno', 'nob'] |
BitextMining |
s2s |
[Legal, Written] |
{'test': 2050} |
{'test': 1884.0} |
NorwegianParliamentClassification |
['nob'] |
Classification |
s2s |
[Government, Spoken] |
{'test': 1200, 'validation': 1200} |
{'test': 1884.0, 'validation': 1911.0} |
NusaParagraphEmotionClassification |
['bbc', 'bew', 'bug', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] |
Classification |
s2s |
[Non-fiction, Fiction, Written] |
{'train': 15516, 'validation': 2948, 'test': 6250} |
{'train': 740.24, 'validation': 740.66, 'test': 740.71} |
NusaParagraphTopicClassification |
['bbc', 'bew', 'bug', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] |
Classification |
s2s |
[Non-fiction, Fiction, Written] |
{'train': 15516, 'validation': 2948, 'test': 6250} |
{'train': 740.24, 'validation': 740.66, 'test': 740.71} |
NusaTranslationBitextMining (Cahyawijaya et al., 2023) |
['abs', 'bbc', 'bew', 'bhp', 'ind', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] |
BitextMining |
s2s |
[Social, Written] |
{'train': 50200} |
{'train': {'average_sentence1_length': 145.4552390438247, 'average_sentence2_length': 148.56607569721115, 'num_samples': 50200, 'hf_subset_descriptive_stats': {'ind-abs': {'average_sentence1_length': 148.366, 'average_sentence2_length': 147.314, 'num_samples': 1000}, 'ind-btk': {'average_sentence1_length': 145.36666666666667, 'average_sentence2_length': 146.74045454545455, 'num_samples': 6600}, 'ind-bew': {'average_sentence1_length': 145.4280303030303, 'average_sentence2_length': 148.40530303030303, 'num_samples': 6600}, 'ind-bhp': {'average_sentence1_length': 133.528, 'average_sentence2_length': 128.138, 'num_samples': 1000}, 'ind-jav': {'average_sentence1_length': 145.42772727272728, 'average_sentence2_length': 145.8089393939394, 'num_samples': 6600}, 'ind-mad': {'average_sentence1_length': 145.35545454545453, 'average_sentence2_length': 153.6228787878788, 'num_samples': 6600}, 'ind-mak': {'average_sentence1_length': 145.42772727272728, 'average_sentence2_length': 150.6128787878788, 'num_samples': 6600}, 'ind-min': {'average_sentence1_length': 145.42772727272728, 'average_sentence2_length': 148.0621212121212, 'num_samples': 6600}, 'ind-mui': {'average_sentence1_length': 150.454, 'average_sentence2_length': 150.994, 'num_samples': 1000}, 'ind-rej': {'average_sentence1_length': 151.622, 'average_sentence2_length': 139.583, 'num_samples': 1000}, 'ind-sun': {'average_sentence1_length': 145.42772727272728, 'average_sentence2_length': 150.9880303030303, 'num_samples': 6600}}}} |
NusaX-senti (Winata et al., 2022) |
['ace', 'ban', 'bbc', 'bjn', 'bug', 'eng', 'ind', 'jav', 'mad', 'min', 'nij', 'sun'] |
Classification |
s2s |
[Reviews, Web, Social, Constructed, Written] |
{'test': 4800} |
{'test': 52.4} |
NusaXBitextMining (Winata et al., 2023) |
['ace', 'ban', 'bbc', 'bjn', 'bug', 'eng', 'ind', 'jav', 'mad', 'min', 'nij', 'sun'] |
BitextMining |
s2s |
[Reviews, Written] |
{'train': 5500} |
{'train': 157.15} |
OPP115DataRetentionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 88} |
{'test': 195.2} |
OPP115DataSecurityLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 1334} |
{'test': 246.69} |
OPP115DoNotTrackLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 110} |
{'test': 223.16} |
OPP115FirstPartyCollectionUseLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 2086} |
{'test': 204.25} |
OPP115InternationalAndSpecificAudiencesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 980} |
{'test': 327.71} |
OPP115PolicyChangeLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 431} |
{'test': 200.99} |
OPP115ThirdPartySharingCollectionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 1590} |
{'test': 223.64} |
OPP115UserAccessEditAndDeletionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 462} |
{'test': 218.59} |
OPP115UserChoiceControlLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 1546} |
{'test': 210.62} |
Ocnli (Hai Hu, 2020) |
['cmn'] |
PairClassification |
s2s |
|
None |
None |
OdiaNewsClassification (Anoop Kunchukuttan, 2020) |
['ory'] |
Classification |
s2s |
[News, Written] |
{'test': 2048} |
{'test': 49.24} |
OnlineShopping (Xiao et al., 2023) |
['cmn'] |
Classification |
s2s |
|
None |
None |
OnlineStoreReviewSentimentClassification |
['ara'] |
Classification |
s2s |
[Reviews, Written] |
{'train': 2048} |
{'train': 137.2} |
OpusparcusPC (Mathias Creutz, 2018) |
['deu', 'eng', 'fin', 'fra', 'rus', 'swe'] |
PairClassification |
s2s |
[Spoken, Spoken] |
{'validation': 10168, 'test': 10210} |
{'validation': 24.4, 'test': 23.8} |
OralArgumentQuestionPurposeLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 312} |
{'test': 269.71} |
OverrulingLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 2048} |
{'test': 167.2} |
PAC (Łukasz Augustyniak, 2022) |
['pol'] |
Classification |
p2p |
[Legal, Written] |
{'test': 3453} |
{'test': 185.3} |
PAWSX (Shitao Xiao, 2024) |
['cmn'] |
STS |
s2s |
|
None |
None |
PIQA (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
{'test': 1838} |
{'test': {'average_document_length': 99.89012998705756, 'average_query_length': 36.08052230685528, 'num_documents': 35542, 'num_queries': 1838, 'average_relevant_docs_per_query': 1.0}} |
PROALegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 95} |
{'test': 251.73} |
PSC |
['pol'] |
PairClassification |
s2s |
[News, Written] |
None |
None |
PatentClassification |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 5000} |
{'test': 18620.44} |
PawsXPairClassification (Yinfei Yang, 2019) |
['cmn', 'deu', 'eng', 'fra', 'jpn', 'kor', 'spa'] |
PairClassification |
s2s |
[Web, Encyclopaedic, Written] |
{'validation': 14000, 'test': 14000} |
{'test': {'num_samples': 14000, 'avg_sentence1_len': 91.17892857142857, 'avg_sentence2_len': 91.10121428571429, 'unique_labels': 2, 'labels': {'1': {'count': 6285}, '0': {'count': 7715}}, 'hf_subset_descriptive_stats': {'de': {'num_samples': 2000, 'avg_sentence1_len': 119.7815, 'avg_sentence2_len': 119.2355, 'unique_labels': 2, 'labels': {'1': {'count': 895}, '0': {'count': 1105}}}, 'en': {'num_samples': 2000, 'avg_sentence1_len': 113.7575, 'avg_sentence2_len': 113.4235, 'unique_labels': 2, 'labels': {'1': {'count': 907}, '0': {'count': 1093}}}, 'es': {'num_samples': 2000, 'avg_sentence1_len': 117.815, 'avg_sentence2_len': 117.798, 'unique_labels': 2, 'labels': {'1': {'count': 907}, '0': {'count': 1093}}}, 'fr': {'num_samples': 2000, 'avg_sentence1_len': 120.028, 'avg_sentence2_len': 119.9885, 'unique_labels': 2, 'labels': {'1': {'count': 903}, '0': {'count': 1097}}}, 'ja': {'num_samples': 2000, 'avg_sentence1_len': 58.678, 'avg_sentence2_len': 58.875, 'unique_labels': 2, 'labels': {'1': {'count': 883}, '0': {'count': 1117}}}, 'ko': {'num_samples': 2000, 'avg_sentence1_len': 64.9605, 'avg_sentence2_len': 65.114, 'unique_labels': 2, 'labels': {'1': {'count': 896}, '0': {'count': 1104}}}, 'zh': {'num_samples': 2000, 'avg_sentence1_len': 43.232, 'avg_sentence2_len': 43.274, 'unique_labels': 2, 'labels': {'1': {'count': 894}, '0': {'count': 1106}}}}}, 'validation': {'num_samples': 14000, 'avg_sentence1_len': 90.12585714285714, 'avg_sentence2_len': 90.2045, 'unique_labels': 2, 'labels': {'1': {'count': 5948}, '0': {'count': 8052}}, 'hf_subset_descriptive_stats': {'de': {'num_samples': 2000, 'avg_sentence1_len': 116.82, 'avg_sentence2_len': 117.0015, 'unique_labels': 2, 'labels': {'1': {'count': 831}, '0': {'count': 1169}}}, 'en': {'num_samples': 2000, 'avg_sentence1_len': 113.1075, 'avg_sentence2_len': 112.858, 'unique_labels': 2, 'labels': {'1': {'count': 863}, '0': {'count': 1137}}}, 'es': {'num_samples': 2000, 'avg_sentence1_len': 116.3285, 'avg_sentence2_len': 116.7275, 'unique_labels': 2, 'labels': {'1': {'count': 847}, '0': {'count': 1153}}}, 'fr': {'num_samples': 2000, 'avg_sentence1_len': 119.5045, 'avg_sentence2_len': 119.7505, 'unique_labels': 2, 'labels': {'1': {'count': 860}, '0': {'count': 1140}}}, 'ja': {'num_samples': 2000, 'avg_sentence1_len': 57.5105, 'avg_sentence2_len': 57.317, 'unique_labels': 2, 'labels': {'1': {'count': 854}, '0': {'count': 1146}}}, 'ko': {'num_samples': 2000, 'avg_sentence1_len': 65.162, 'avg_sentence2_len': 65.5155, 'unique_labels': 2, 'labels': {'1': {'count': 840}, '0': {'count': 1160}}}, 'zh': {'num_samples': 2000, 'avg_sentence1_len': 42.448, 'avg_sentence2_len': 42.2615, 'unique_labels': 2, 'labels': {'1': {'count': 853}, '0': {'count': 1147}}}}}} |
PersianFoodSentimentClassification (Mehrdad Farahani et al., 2020) |
['fas'] |
Classification |
s2s |
[Reviews, Written] |
{'validation': 2048, 'test': 2048} |
{'validation': 90.37, 'test': 90.58} |
PersonalJurisdictionLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 50} |
{'test': 381.14} |
PhincBitextMining (Srivastava et al., 2020) |
['eng', 'hin'] |
BitextMining |
s2s |
[Social, Written] |
{'train': 13738} |
{'train': 75.32} |
PlscClusteringP2P.v2 |
['pol'] |
Clustering |
s2s |
[Academic, Written] |
{'test': 2048} |
{'test': 1023.21} |
PlscClusteringS2S.v2 |
['pol'] |
Clustering |
s2s |
[Academic, Written] |
{'test': 2048} |
{'test': 84.34} |
PoemSentimentClassification (Emily Sheng, 2020) |
['eng'] |
Classification |
s2s |
[Reviews, Written] |
{'validation': 105, 'test': 104} |
{'validation': 45.3, 'test': 42.4} |
PolEmo2.0-IN |
['pol'] |
Classification |
s2s |
[Written, Social] |
None |
None |
PolEmo2.0-OUT |
['pol'] |
Classification |
s2s |
[Written, Social] |
{'test': 722} |
{'test': 756.2} |
PpcPC (Sławomir Dadas, 2022) |
['pol'] |
PairClassification |
s2s |
[Fiction, Non-fiction, Web, Written, Spoken, Social, News] |
None |
None |
PublicHealthQA |
['ara', 'eng', 'fra', 'kor', 'rus', 'spa', 'vie', 'zho'] |
Retrieval |
s2p |
[Medical, Government, Web, Written] |
{'test': 888} |
{'test': {'arabic': {'average_document_length': 836.8850574712644, 'average_query_length': 79.84883720930233, 'num_documents': 87, 'num_queries': 87, 'average_relevant_docs_per_query': 1.0}, 'chinese': {'average_document_length': 239.58282208588957, 'average_query_length': 24.828220858895705, 'num_documents': 163, 'num_queries': 163, 'average_relevant_docs_per_query': 1.0}, 'english': {'average_document_length': 799.3430232558139, 'average_query_length': 71.78488372093024, 'num_documents': 172, 'num_queries': 172, 'average_relevant_docs_per_query': 1.0}, 'french': {'average_document_length': 1021.6823529411764, 'average_query_length': 101.88235294117646, 'num_documents': 85, 'num_queries': 85, 'average_relevant_docs_per_query': 1.0}, 'korean': {'average_document_length': 339.0, 'average_query_length': 36.90909090909091, 'num_documents': 77, 'num_queries': 77, 'average_relevant_docs_per_query': 1.0}, 'russian': {'average_document_length': 985.1076923076923, 'average_query_length': 85.2, 'num_documents': 65, 'num_queries': 65, 'average_relevant_docs_per_query': 1.0}, 'spanish': {'average_document_length': 941.1666666666666, 'average_query_length': 84.67901234567901, 'num_documents': 162, 'num_queries': 162, 'average_relevant_docs_per_query': 1.0}, 'vietnamese': {'average_document_length': 704.5454545454545, 'average_query_length': 71.83116883116882, 'num_documents': 77, 'num_queries': 77, 'average_relevant_docs_per_query': 1.0}}} |
PunjabiNewsClassification (Anoop Kunchukuttan, 2020) |
['pan'] |
Classification |
s2s |
[News, Written] |
{'train': 627, 'test': 157} |
{'train': 4222.22, 'test': 4115.14} |
QBQTC |
['cmn'] |
STS |
s2s |
|
None |
None |
Quail (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
{'test': 2720} |
{'test': {'average_document_length': 27.50788422240522, 'average_query_length': 1957.3632352941177, 'num_documents': 32787, 'num_queries': 2720, 'average_relevant_docs_per_query': 1.0}} |
Quora-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2s |
|
None |
{'validation': {'average_document_length': 65.82473022253414, 'average_query_length': 54.6006, 'num_documents': 522931, 'num_queries': 5000, 'average_relevant_docs_per_query': 1.5252}, 'test': {'average_document_length': 65.82473022253414, 'average_query_length': 54.5354, 'num_documents': 522931, 'num_queries': 10000, 'average_relevant_docs_per_query': 1.5675}} |
QuoraRetrieval (DataCanary et al., 2017) |
['eng'] |
Retrieval |
s2s |
|
None |
{'dev': {'average_document_length': 62.158154708747425, 'average_query_length': 51.5342, 'num_documents': 522931, 'num_queries': 5000, 'average_relevant_docs_per_query': 1.5252}, 'test': {'average_document_length': 62.158154708747425, 'average_query_length': 51.5396, 'num_documents': 522931, 'num_queries': 10000, 'average_relevant_docs_per_query': 1.5675}} |
RARbCode (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2p |
[Programming, Written] |
{'test': 1484} |
{'test': {'average_document_length': 793.6813076734267, 'average_query_length': 375.7506738544474, 'num_documents': 301482, 'num_queries': 1484, 'average_relevant_docs_per_query': 1.0}} |
RARbMath (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
{'test': 6319} |
{'test': {'average_document_length': 504.0197829347469, 'average_query_length': 210.30732710871973, 'num_documents': 389376, 'num_queries': 6319, 'average_relevant_docs_per_query': 1.0}} |
RTE3 |
['deu', 'eng', 'fra', 'ita'] |
PairClassification |
s2s |
[News, Web, Encyclopaedic, Written] |
{'test': 1923} |
{'test': 124.79} |
RUParaPhraserSTS (Pivovarova et al., 2017) |
['rus'] |
STS |
s2s |
[News, Written] |
{'test': 1924} |
{'test': 61.25} |
RedditClustering.v2 (Gregor Geigle, 2021) |
['eng'] |
Clustering |
s2s |
[Web, Social, Written] |
{'test': 32768} |
{'test': 64.7} |
RedditClusteringP2P.v2 (Gregor Geigle, 2021) |
['eng'] |
Clustering |
p2p |
[Web, Social, Written] |
{'test': 18375} |
{'test': 727.7} |
RestaurantReviewSentimentClassification (ElSahar et al., 2015) |
['ara'] |
Classification |
s2s |
[Reviews, Written] |
{'train': 2048} |
{'train': 231.4} |
RiaNewsRetrieval (Gavrilov et al., 2019) |
['rus'] |
Retrieval |
s2p |
[News, Written] |
{'test': 10000} |
{'test': {'average_document_length': 1165.6429557148213, 'average_query_length': 62.4029, 'num_documents': 704344, 'num_queries': 10000, 'average_relevant_docs_per_query': 1.0}} |
Robust04InstructionRetrieval (Orion Weller, 2024) |
['eng'] |
InstructionRetrieval |
s2p |
[News, Written] |
{'eng': 95088} |
{'eng': 2471.0398058252426} |
RomaTalesBitextMining |
['hun', 'rom'] |
BitextMining |
s2s |
[Fiction, Written] |
{'test': 215} |
{'test': 316.8046511627907} |
RomaniBibleClustering |
['rom'] |
Clustering |
p2p |
[Religious, Written] |
{'test': 2048} |
{'test': 132.2} |
RomanianReviewsSentiment (Anca Maria Tache, 2021) |
['ron'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 2048} |
{'test': 588.6} |
RomanianSentimentClassification (Dumitrescu et al., 2020) |
['ron'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 2048} |
{'test': 67.6} |
RonSTS (Dumitrescu et al., 2021) |
['ron'] |
STS |
s2s |
[News, Social, Web, Written] |
{'test': 1379} |
{'test': 60.5} |
RuBQReranking (Ivan Rybin, 2021) |
['rus'] |
Reranking |
s2p |
[Encyclopaedic, Written] |
{'test': 1551} |
{'test': 499.9} |
RuBQRetrieval (Ivan Rybin, 2021) |
['rus'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
{'test': 2845} |
{'test': {'average_document_length': 448.94659134903037, 'average_query_length': 45.29609929078014, 'num_documents': 56826, 'num_queries': 1692, 'average_relevant_docs_per_query': 1.6814420803782506}} |
RuReviewsClassification (Sergey Smetanin, 2019) |
['rus'] |
Classification |
p2p |
[Reviews, Written] |
{'test': 2048} |
{'test': 133.2} |
RuSTSBenchmarkSTS (Philip May, 2021) |
['rus'] |
STS |
s2s |
[News, Social, Web, Written] |
{'test': 1264} |
{'test': 54.2} |
RuSciBenchGRNTIClassification |
['rus'] |
Classification |
p2p |
[Academic, Written] |
{'test': 2048} |
{'test': 890.1} |
RuSciBenchGRNTIClusteringP2P |
['rus'] |
Clustering |
p2p |
[Academic, Written] |
{'test': 2048} |
{'test': {'num_samples': 2048, 'average_text_length': 889.81396484375, 'average_labels_per_text': 1.0, 'unique_labels': 28, 'labels': {'3': {'count': 73}, '4': {'count': 73}, '20': {'count': 73}, '9': {'count': 73}, '21': {'count': 73}, '15': {'count': 73}, '16': {'count': 74}, '2': {'count': 73}, '8': {'count': 73}, '23': {'count': 73}, '6': {'count': 73}, '24': {'count': 73}, '10': {'count': 73}, '1': {'count': 73}, '17': {'count': 74}, '14': {'count': 74}, '18': {'count': 73}, '27': {'count': 73}, '19': {'count': 73}, '22': {'count': 73}, '12': {'count': 73}, '25': {'count': 73}, '5': {'count': 74}, '0': {'count': 73}, '26': {'count': 73}, '11': {'count': 73}, '13': {'count': 73}, '7': {'count': 73}}}} |
RuSciBenchOECDClassification |
['rus'] |
Classification |
p2p |
[Academic, Written] |
{'test': 2048} |
{'test': 838.9} |
RuSciBenchOECDClusteringP2P |
['rus'] |
Clustering |
p2p |
[Academic, Written] |
{'test': 2048} |
{'test': 838.9} |
SCDBPAccountabilityLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 379} |
{'test': 3520} |
SCDBPAuditsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 379} |
{'test': 3507} |
SCDBPCertificationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 378} |
{'test': 3507} |
SCDBPTrainingLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 379} |
{'test': 3506} |
SCDBPVerificationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 379} |
{'test': 3498} |
SCDDAccountabilityLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 378} |
{'test': 3522} |
SCDDAuditsLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 379} |
{'test': 3506} |
SCDDCertificationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 378} |
{'test': 3518} |
SCDDTrainingLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 379} |
{'test': 3499} |
SCDDVerificationLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 379} |
{'test': 3503} |
SCIDOCS (Arman Cohan, 2020) |
['eng'] |
Retrieval |
s2p |
[Academic, Written, Non-fiction] |
None |
{'test': {'average_document_length': 1203.3659819932182, 'average_query_length': 71.632, 'num_documents': 25657, 'num_queries': 1000, 'average_relevant_docs_per_query': 4.928}} |
SCIDOCS-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 1270.0791986592353, 'average_query_length': 80.671, 'num_documents': 25657, 'num_queries': 1000, 'average_relevant_docs_per_query': 4.928}} |
SIB200Classification (Adelani et al., 2023) |
['ace', 'acm', 'acq', 'aeb', 'afr', 'ajp', 'aka', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'ast', 'awa', 'ayr', 'azb', 'azj', 'bak', 'bam', 'ban', 'bel', 'bem', 'ben', 'bho', 'bjn', 'bod', 'bos', 'bug', 'bul', 'cat', 'ceb', 'ces', 'cjk', 'ckb', 'crh', 'cym', 'dan', 'deu', 'dik', 'dyu', 'dzo', 'ell', 'eng', 'epo', 'est', 'eus', 'ewe', 'fao', 'fij', 'fin', 'fon', 'fra', 'fur', 'fuv', 'gaz', 'gla', 'gle', 'glg', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hne', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kac', 'kam', 'kan', 'kas', 'kat', 'kaz', 'kbp', 'kea', 'khk', 'khm', 'kik', 'kin', 'kir', 'kmb', 'kmr', 'knc', 'kon', 'kor', 'lao', 'lij', 'lim', 'lin', 'lit', 'lmo', 'ltg', 'ltz', 'lua', 'lug', 'luo', 'lus', 'lvs', 'mag', 'mai', 'mal', 'mar', 'min', 'mkd', 'mlt', 'mni', 'mos', 'mri', 'mya', 'nld', 'nno', 'nob', 'npi', 'nqo', 'nso', 'nus', 'nya', 'oci', 'ory', 'pag', 'pan', 'pap', 'pbt', 'pes', 'plt', 'pol', 'por', 'prs', 'quy', 'ron', 'run', 'rus', 'sag', 'san', 'sat', 'scn', 'shn', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'sot', 'spa', 'srd', 'srp', 'ssw', 'sun', 'swe', 'swh', 'szl', 'tam', 'taq', 'tat', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tpi', 'tsn', 'tso', 'tuk', 'tum', 'tur', 'twi', 'tzm', 'uig', 'ukr', 'umb', 'urd', 'uzn', 'vec', 'vie', 'war', 'wol', 'xho', 'ydd', 'yor', 'yue', 'zho', 'zsm', 'zul'] |
Classification |
s2s |
[News, Written] |
{'train': 701, 'validation': 99, 'test': 204} |
{'train': 111.24, 'validation': 97.11, 'test': 135.53} |
SIB200ClusteringS2S (Adelani et al., 2023) |
['ace', 'acm', 'acq', 'aeb', 'afr', 'ajp', 'aka', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'ast', 'awa', 'ayr', 'azb', 'azj', 'bak', 'bam', 'ban', 'bel', 'bem', 'ben', 'bho', 'bjn', 'bod', 'bos', 'bug', 'bul', 'cat', 'ceb', 'ces', 'cjk', 'ckb', 'crh', 'cym', 'dan', 'deu', 'dik', 'dyu', 'dzo', 'ell', 'eng', 'epo', 'est', 'eus', 'ewe', 'fao', 'fij', 'fin', 'fon', 'fra', 'fur', 'fuv', 'gaz', 'gla', 'gle', 'glg', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hne', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kac', 'kam', 'kan', 'kas', 'kat', 'kaz', 'kbp', 'kea', 'khk', 'khm', 'kik', 'kin', 'kir', 'kmb', 'kmr', 'knc', 'kon', 'kor', 'lao', 'lij', 'lim', 'lin', 'lit', 'lmo', 'ltg', 'ltz', 'lua', 'lug', 'luo', 'lus', 'lvs', 'mag', 'mai', 'mal', 'mar', 'min', 'mkd', 'mlt', 'mni', 'mos', 'mri', 'mya', 'nld', 'nno', 'nob', 'npi', 'nqo', 'nso', 'nus', 'nya', 'oci', 'ory', 'pag', 'pan', 'pap', 'pbt', 'pes', 'plt', 'pol', 'por', 'prs', 'quy', 'ron', 'run', 'rus', 'sag', 'san', 'sat', 'scn', 'shn', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'sot', 'spa', 'srd', 'srp', 'ssw', 'sun', 'swe', 'swh', 'szl', 'tam', 'taq', 'tat', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tpi', 'tsn', 'tso', 'tuk', 'tum', 'tur', 'twi', 'tzm', 'uig', 'ukr', 'umb', 'urd', 'uzn', 'vec', 'vie', 'war', 'wol', 'xho', 'ydd', 'yor', 'yue', 'zho', 'zsm', 'zul'] |
Clustering |
s2s |
[News, Written] |
{'test': 1004} |
{'test': 114.78} |
SICK-BR-PC |
['por'] |
PairClassification |
s2s |
[Web, Written] |
{'test': 1000} |
{'test': 54.89} |
SICK-BR-STS |
['por'] |
STS |
s2s |
[Web, Written] |
{'test': 1000} |
{'test': 54.89} |
SICK-E-PL |
['pol'] |
PairClassification |
s2s |
|
None |
None |
SICK-R |
['eng'] |
STS |
s2s |
|
None |
None |
SICK-R-PL |
['pol'] |
STS |
s2s |
[Web, Written] |
{'test': 9812} |
{'test': 42.8} |
SICKFr |
['fra'] |
STS |
s2s |
|
None |
None |
SIQA (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
{'test': 0} |
{'test': {'average_document_length': 22.967085695044617, 'average_query_length': 127.75383828045035, 'num_documents': 71276, 'num_queries': 1954, 'average_relevant_docs_per_query': 1.0}} |
SNLHierarchicalClusteringP2P (Navjord et al., 2023) |
['nob'] |
Clustering |
p2p |
[Encyclopaedic, Non-fiction, Written] |
{'test': 1300} |
{'test': 1986.9453846153847} |
SNLHierarchicalClusteringS2S (Navjord et al., 2023) |
['nob'] |
Clustering |
s2s |
[Encyclopaedic, Non-fiction, Written] |
{'test': 1300} |
{'test': 242.22384615384615} |
SNLRetrieval (Navjord et al., 2023) |
['nob'] |
Retrieval |
p2p |
[Encyclopaedic, Non-fiction, Written] |
{'test': 2048} |
{'test': {'average_document_length': 1986.9453846153847, 'average_query_length': 14.906153846153845, 'num_documents': 1300, 'num_queries': 1300, 'average_relevant_docs_per_query': 1.0}} |
SRNCorpusBitextMining (Zwennicker et al., 2022) |
['nld', 'srn'] |
BitextMining |
s2s |
[Social, Web, Written] |
{'test': 256} |
{'test': 55} |
STS12 (Agirre et al., 2012) |
['eng'] |
STS |
s2s |
[Encyclopaedic, News, Written] |
{'test': 6216} |
{'test': {'num_samples': 3108, 'average_sentence1_len': 63.78893178893179, 'average_sentence2_len': 65.5926640926641, 'avg_score': 3.5060643500643507}} |
STS13 (Eneko Agirre, 2013) |
['eng'] |
STS |
s2s |
[Web, News, Non-fiction, Written] |
{'test': 3000} |
{'test': 54.0} |
STS14 |
['eng'] |
STS |
s2s |
[Blog, Web, Spoken] |
{'test': 7500} |
{'test': 54.3} |
STS15 |
['eng'] |
STS |
s2s |
[Blog, News, Web, Written, Spoken] |
{'test': 6000} |
{'test': 57.7} |
STS16 |
['eng'] |
STS |
s2s |
[Blog, Web, Spoken] |
{'test': 2372} |
{'test': 65.3} |
STS17 |
['ara', 'deu', 'eng', 'fra', 'ita', 'kor', 'nld', 'spa', 'tur'] |
STS |
s2s |
[News, Web, Written] |
{'test': 500} |
{'test': {'num_samples': 5346, 'average_sentence1_len': 38.14665170220726, 'average_sentence2_len': 36.72502805836139, 'avg_score': 2.3554804214989464, 'hf_subset_descriptive_stats': {'ko-ko': {'num_samples': 2846, 'average_sentence1_len': 31.991918482080113, 'average_sentence2_len': 32.44483485593816, 'avg_score': 2.469359920356055}, 'ar-ar': {'num_samples': 250, 'average_sentence1_len': 32.208, 'average_sentence2_len': 32.78, 'avg_score': 2.216800000000001}, 'en-ar': {'num_samples': 250, 'average_sentence1_len': 42.36, 'average_sentence2_len': 32.696, 'avg_score': 2.1423999999999994}, 'en-de': {'num_samples': 250, 'average_sentence1_len': 43.952, 'average_sentence2_len': 44.756, 'avg_score': 2.2776000000000014}, 'en-en': {'num_samples': 250, 'average_sentence1_len': 43.952, 'average_sentence2_len': 42.724, 'avg_score': 2.2776000000000014}, 'en-tr': {'num_samples': 250, 'average_sentence1_len': 41.916, 'average_sentence2_len': 41.6, 'avg_score': 2.1335999999999986}, 'es-en': {'num_samples': 250, 'average_sentence1_len': 50.84, 'average_sentence2_len': 42.024, 'avg_score': 2.1464000000000003}, 'es-es': {'num_samples': 250, 'average_sentence1_len': 49.836, 'average_sentence2_len': 51.224, 'avg_score': 2.2312000000000007}, 'fr-en': {'num_samples': 250, 'average_sentence1_len': 49.624, 'average_sentence2_len': 42.724, 'avg_score': 2.2776000000000014}, 'it-en': {'num_samples': 250, 'average_sentence1_len': 50.028, 'average_sentence2_len': 42.724, 'avg_score': 2.2776000000000014}, 'nl-en': {'num_samples': 250, 'average_sentence1_len': 46.816, 'average_sentence2_len': 42.724, 'avg_score': 2.2776000000000014}}}} |
STS22.v2 |
['ara', 'cmn', 'deu', 'eng', 'fra', 'ita', 'pol', 'rus', 'spa', 'tur'] |
STS |
p2p |
[News, Written] |
{'test': 3958} |
{'test': 1993.6} |
STSB (Shitao Xiao, 2024) |
['cmn'] |
STS |
s2s |
|
None |
None |
STSBenchmark (Philip May, 2021) |
['eng'] |
STS |
s2s |
|
None |
None |
STSBenchmarkMultilingualSTS (Philip May, 2021) |
['cmn', 'deu', 'eng', 'fra', 'ita', 'nld', 'pol', 'por', 'rus', 'spa'] |
STS |
s2s |
[News, Social, Web, Spoken, Written] |
{'dev': 30000, 'test': 27580} |
{'dev': 66.5, 'test': 56.1} |
STSES (Agirre et al., 2015) |
['spa'] |
STS |
s2s |
[Written] |
None |
None |
SadeemQuestionRetrieval |
['ara'] |
Retrieval |
s2p |
[Written, Written] |
{'test': 22979} |
{'test': 500.0} |
SanskritShlokasClassification |
['san'] |
Classification |
s2s |
[Religious, Written] |
{'train': 383, 'validation': 96} |
{'train': 98.415, 'validation': 96.635} |
ScalaClassification |
['dan', 'nno', 'nob', 'swe'] |
Classification |
s2s |
[Fiction, News, Non-fiction, Blog, Spoken, Web, Written] |
{'test': 4096} |
{'test': 102.72} |
SciDocsRR |
['eng'] |
Reranking |
s2s |
[Academic, Non-fiction, Written] |
{'test': 19599} |
{'test': 69.0} |
SciFact (Arman Cohan, 2020) |
['eng'] |
Retrieval |
s2p |
|
None |
{'train': {'average_document_length': 1498.4152035500674, 'average_query_length': 88.58838071693448, 'num_documents': 5183, 'num_queries': 809, 'average_relevant_docs_per_query': 1.1359703337453646}, 'test': {'average_document_length': 1498.4152035500674, 'average_query_length': 90.34666666666666, 'num_documents': 5183, 'num_queries': 300, 'average_relevant_docs_per_query': 1.13}} |
SciFact-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 1553.5178468068686, 'average_query_length': 95.44, 'num_documents': 5183, 'num_queries': 300, 'average_relevant_docs_per_query': 1.13}} |
SemRel24STS (Nedjma Ousidhoum, 2024) |
['afr', 'amh', 'arb', 'arq', 'ary', 'eng', 'hau', 'hin', 'ind', 'kin', 'mar', 'tel'] |
STS |
s2s |
[Spoken, Written] |
{'dev': 2089, 'test': 7498} |
{'dev': 163.1, 'test': 145.9} |
SensitiveTopicsClassification |
['rus'] |
MultilabelClassification |
s2s |
[Web, Social, Written] |
{'test': 2048} |
{'test': 95.3} |
SentimentAnalysisHindi (Shantipriya Parida, 2023) |
['hin'] |
Classification |
s2s |
[Reviews, Written] |
{'train': 2497} |
{'train': 81.29} |
SinhalaNewsClassification (Nisansa de Silva, 2015) |
['sin'] |
Classification |
s2s |
[News, Written] |
{'train': 3327} |
{'train': 148.04} |
SinhalaNewsSourceClassification (Dhananjaya et al., 2022) |
['sin'] |
Classification |
s2s |
[News, Written] |
{'train': 24094} |
{'train': 56.08} |
SiswatiNewsClassification (Madodonga et al., 2023) |
['ssw'] |
Classification |
s2s |
[News, Written] |
{'train': 80} |
{'train': 354.2} |
SlovakMovieReviewSentimentClassification ({�{S, 2023) |
['svk'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 2048} |
{'test': 366.17} |
SlovakSumRetrieval |
['slk'] |
Retrieval |
s2s |
[News, Social, Web, Written] |
{'test': 600} |
{'test': {'average_document_length': 2156.445, 'average_query_length': 143.59833333333333, 'num_documents': 600, 'num_queries': 600, 'average_relevant_docs_per_query': 1.0}} |
SouthAfricanLangClassification (ExploreAI Academy et al., 2022) |
['afr', 'eng', 'nbl', 'nso', 'sot', 'ssw', 'tsn', 'tso', 'ven', 'xho', 'zul'] |
Classification |
s2s |
[Web, Non-fiction, Written] |
{'test': 2048} |
{'test': 247.49} |
SpanishNewsClassification |
['spa'] |
Classification |
s2s |
[News, Written] |
{'train': 2048} |
{'train': 4218.2} |
SpanishNewsClusteringP2P |
['spa'] |
Clustering |
p2p |
|
None |
None |
SpanishPassageRetrievalS2P |
['spa'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 2635.217893792966, 'average_query_length': 67.55688622754491, 'num_documents': 10037, 'num_queries': 167, 'average_relevant_docs_per_query': 6.053892215568863}} |
SpanishPassageRetrievalS2S |
['spa'] |
Retrieval |
s2s |
|
None |
{'test': {'average_document_length': 434.5924528301887, 'average_query_length': 67.55688622754491, 'num_documents': 265, 'num_queries': 167, 'average_relevant_docs_per_query': 7.718562874251497}} |
SpanishSentimentClassification |
['spa'] |
Classification |
s2s |
[Reviews, Written] |
{'validation': 147, 'test': 296} |
{'validation': 85.02, 'test': 87.91} |
SpartQA (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
{'test': 0} |
{'test': {'average_document_length': 50.40829145728643, 'average_query_length': 656.2328881469115, 'num_documents': 1592, 'num_queries': 3594, 'average_relevant_docs_per_query': 1.8786867000556482}} |
SprintDuplicateQuestions |
['eng'] |
PairClassification |
s2s |
[Programming, Written] |
{'validation': 101000, 'test': 101000} |
{'validation': 65.2, 'test': 67.9} |
StackExchangeClustering.v2 (Gregor Geigle, 2021) |
['eng'] |
Clustering |
s2s |
[Web, Written] |
{'test': 32768} |
{'test': 57.0} |
StackExchangeClusteringP2P.v2 (Gregor Geigle, 2021) |
['eng'] |
Clustering |
p2p |
[Web, Written] |
{'test': 2996} |
{'test': 1090.7} |
StackOverflowDupQuestions (Xueqing Liu, 2018) |
['eng'] |
Reranking |
s2s |
|
{'test': 3467} |
{'test': 49.8} |
StackOverflowQA (Xiangyang Li, 2024) |
['eng'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 1000} |
{'test': {'average_document_length': 1202.4815613867845, 'average_query_length': 1302.6263791374122, 'num_documents': 19931, 'num_queries': 1994, 'average_relevant_docs_per_query': 1.0}} |
StatcanDialogueDatasetRetrieval |
['eng', 'fra'] |
Retrieval |
s2p |
[Government, Web, Written] |
{'dev': 1000, 'test': 1011, 'corpus': 5907} |
{'dev': {'english': {'average_document_length': 6535.865413915693, 'average_query_length': 6.869244935543278, 'num_documents': 5907, 'num_queries': 543, 'average_relevant_docs_per_query': 1.4714548802946592}, 'french': {'average_document_length': 7078.072794988996, 'average_query_length': 6.860655737704918, 'num_documents': 5907, 'num_queries': 122, 'average_relevant_docs_per_query': 1.6475409836065573}}, 'test': {'english': {'average_document_length': 6535.865413915693, 'average_query_length': 7.650994575045208, 'num_documents': 5907, 'num_queries': 553, 'average_relevant_docs_per_query': 1.573236889692586}, 'french': {'average_document_length': 7078.072794988996, 'average_query_length': 5.907407407407407, 'num_documents': 5907, 'num_queries': 108, 'average_relevant_docs_per_query': 1.3055555555555556}}} |
SummEvalFrSummarization.v2 (Fabbri et al., 2020) |
['fra'] |
Summarization |
p2p |
[News, Written] |
{'test': 2800} |
{'test': 407.1} |
SummEvalSummarization.v2 (Fabbri et al., 2020) |
['eng'] |
Summarization |
p2p |
[News, Written] |
{'test': 2800} |
{'test': 359.8} |
SwahiliNewsClassification |
['swa'] |
Classification |
s2s |
[News, Written] |
{'train': 2048} |
{'train': 2438.2308135942326} |
SweFaqRetrieval (Berdi{�{c, 2023) |
['swe'] |
Retrieval |
s2s |
[Government, Non-fiction, Written] |
{'test': 1024} |
{'test': {'average_document_length': 319.8473581213307, 'average_query_length': 70.51461988304094, 'num_documents': 511, 'num_queries': 513, 'average_relevant_docs_per_query': 1.0}} |
SweRecClassification |
['swe'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 1024} |
{'test': 318.8} |
SwedishSentimentClassification |
['swe'] |
Classification |
s2s |
[Reviews, Written] |
{'validation': 1024, 'test': 1024} |
{'validation': 499.3, 'test': 498.1} |
SwednClusteringP2P (Monsen et al., 2021) |
['swe'] |
Clustering |
p2p |
[News, Non-fiction, Written] |
{'all': 2048} |
{'all': 1619.71} |
SwednClusteringS2S (Monsen et al., 2021) |
['swe'] |
Clustering |
s2s |
[News, Non-fiction, Written] |
{'all': 2048} |
{'all': 1619.71} |
SwednRetrieval (Monsen et al., 2021) |
['swe'] |
Retrieval |
p2p |
[News, Non-fiction, Written] |
{'test': 2048} |
{'test': {'average_document_length': 2896.519550342131, 'average_query_length': 45.876953125, 'num_documents': 2046, 'num_queries': 1024, 'average_relevant_docs_per_query': 2.0}} |
SwissJudgementClassification (Joel Niklaus, 2022) |
['deu', 'fra', 'ita'] |
Classification |
s2s |
[Legal, Written] |
{'test': 2048} |
{'test': 3411.72} |
SyntecReranking (Mathieu Ciancone, 2024) |
['fra'] |
Reranking |
s2p |
[Legal, Written] |
None |
None |
SyntecRetrieval (Mathieu Ciancone, 2024) |
['fra'] |
Retrieval |
s2p |
[Legal, Written] |
{'test': 90} |
{'test': {'average_document_length': 1224.2666666666667, 'average_query_length': 72.82, 'num_documents': 90, 'num_queries': 100, 'average_relevant_docs_per_query': 1.0}} |
SyntheticText2SQL (Meyer et al., 2024) |
['eng', 'sql'] |
Retrieval |
p2p |
[Programming, Written] |
{'test': 1000} |
{'test': {'average_document_length': 127.07126054548375, 'average_query_length': 82.90582806357888, 'num_documents': 105851, 'num_queries': 5851, 'average_relevant_docs_per_query': 1.0}} |
T2Reranking (Xiaohui Xie, 2023) |
['cmn'] |
Reranking |
s2s |
|
None |
None |
T2Retrieval (Xiaohui Xie, 2023) |
['cmn'] |
Retrieval |
s2p |
|
None |
{'dev': {'average_document_length': 874.1184182791619, 'average_query_length': 10.938847974750132, 'num_documents': 118605, 'num_queries': 22812, 'average_relevant_docs_per_query': 5.213571804313519}} |
TERRa (Shavrina et al., 2020) |
['rus'] |
PairClassification |
s2s |
[News, Web, Written] |
{'dev': 307} |
{'dev': 138.2} |
TNews |
['cmn'] |
Classification |
s2s |
|
None |
None |
TRECCOVID (Kirk Roberts, 2021) |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 1116.7434221277986, 'average_query_length': 69.24, 'num_documents': 171332, 'num_queries': 50, 'average_relevant_docs_per_query': 493.5}} |
TRECCOVID-PL (Konrad Wojtasik, 2024) |
['pol'] |
Retrieval |
s2p |
[Academic, Non-fiction, Written] |
None |
{'test': {'average_document_length': 1159.8020276422385, 'average_query_length': 69.42, 'num_documents': 171332, 'num_queries': 50, 'average_relevant_docs_per_query': 493.5}} |
TV2Nordretrieval |
['dan'] |
Retrieval |
p2p |
[News, Non-fiction, Written] |
{'test': 4096} |
{'test': {'average_document_length': 1440.66552734375, 'average_query_length': 126.552734375, 'num_documents': 2048, 'num_queries': 2048, 'average_relevant_docs_per_query': 1.0}} |
TamilNewsClassification (Anoop Kunchukuttan, 2020) |
['tam'] |
Classification |
s2s |
[News, Written] |
{'train': 14521, 'test': 3631} |
{'train': 56.5, 'test': 56.52} |
Tatoeba (Tatoeba community, 2021) |
['afr', 'amh', 'ang', 'ara', 'arq', 'arz', 'ast', 'awa', 'aze', 'bel', 'ben', 'ber', 'bos', 'bre', 'bul', 'cat', 'cbk', 'ceb', 'ces', 'cha', 'cmn', 'cor', 'csb', 'cym', 'dan', 'deu', 'dsb', 'dtp', 'ell', 'eng', 'epo', 'est', 'eus', 'fao', 'fin', 'fra', 'fry', 'gla', 'gle', 'glg', 'gsw', 'heb', 'hin', 'hrv', 'hsb', 'hun', 'hye', 'ido', 'ile', 'ina', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kat', 'kaz', 'khm', 'kor', 'kur', 'kzj', 'lat', 'lfn', 'lit', 'lvs', 'mal', 'mar', 'max', 'mhr', 'mkd', 'mon', 'nds', 'nld', 'nno', 'nob', 'nov', 'oci', 'orv', 'pam', 'pes', 'pms', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'swg', 'swh', 'tam', 'tat', 'tel', 'tgl', 'tha', 'tuk', 'tur', 'tzl', 'uig', 'ukr', 'urd', 'uzb', 'vie', 'war', 'wuu', 'xho', 'yid', 'yue', 'zsm'] |
BitextMining |
s2s |
[Written] |
{'test': 2000} |
{'test': 39.4} |
TbilisiCityHallBitextMining |
['eng', 'kat'] |
BitextMining |
s2s |
[News, Written] |
{'test': 1820} |
{'test': 78} |
TelemarketingSalesRuleLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 47} |
{'test': 348.29} |
TeluguAndhraJyotiNewsClassification |
['tel'] |
Classification |
s2s |
[News, Written] |
{'test': 4329} |
{'test': 1428.28} |
TempReasonL1 (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
{'test': 4000} |
{'test': {'average_document_length': 8.989843250159948, 'average_query_length': 50.22375, 'num_documents': 12504, 'num_queries': 4000, 'average_relevant_docs_per_query': 1.0}} |
TempReasonL2Context (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
{'test': 0} |
{'test': {'average_document_length': 19.823525685690758, 'average_query_length': 11919.25792106726, 'num_documents': 15787, 'num_queries': 5397, 'average_relevant_docs_per_query': 1.0}} |
TempReasonL2Fact (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
{'test': 5397} |
{'test': {'average_document_length': 19.823525685690758, 'average_query_length': 830.7268853066519, 'num_documents': 15787, 'num_queries': 5397, 'average_relevant_docs_per_query': 1.0}} |
TempReasonL2Pure (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
{'test': 5397} |
{'test': {'average_document_length': 19.823525685690758, 'average_query_length': 55.94089308875301, 'num_documents': 15787, 'num_queries': 5397, 'average_relevant_docs_per_query': 1.0}} |
TempReasonL3Context (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
{'test': 4426} |
{'test': {'average_document_length': 19.80534984678243, 'average_query_length': 13424.633077270673, 'num_documents': 15664, 'num_queries': 4426, 'average_relevant_docs_per_query': 1.0}} |
TempReasonL3Fact (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
{'test': 4426} |
{'test': {'average_document_length': 19.80534984678243, 'average_query_length': 896.0754631721645, 'num_documents': 15664, 'num_queries': 4426, 'average_relevant_docs_per_query': 1.0}} |
TempReasonL3Pure (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
{'test': 4426} |
{'test': {'average_document_length': 19.80534984678243, 'average_query_length': 74.44012652507908, 'num_documents': 15664, 'num_queries': 4426, 'average_relevant_docs_per_query': 1.0}} |
TenKGnadClassification |
['deu'] |
Classification |
p2p |
[News, Written] |
{'test': 1028} |
{'test': 2627.31} |
TenKGnadClusteringP2P.v2 |
['deu'] |
Clustering |
p2p |
[News, Non-fiction, Written] |
{'test': 10275} |
{'test': 2641.03} |
TenKGnadClusteringS2S.v2 |
['deu'] |
Clustering |
s2s |
[News, Non-fiction, Written] |
{'test': 10267} |
{'test': 50.96} |
TextualismToolDictionariesLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 107} |
{'test': 943.23} |
TextualismToolPlainLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 165} |
{'test': 997.97} |
ThuNewsClusteringP2P.v2 (Sun et al., 2016) |
['cmn'] |
Clustering |
p2p |
[News, Written] |
{'test': 2048} |
{} |
ThuNewsClusteringS2S.v2 (Sun et al., 2016) |
['cmn'] |
Clustering |
s2s |
[News, Written] |
{'test': 2048} |
{} |
TopiOCQA (Vaibhav Adlakha, 2022) |
['eng'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
{'dev': 2514} |
{'validation': {'average_document_length': 478.8968086416064, 'average_query_length': 12.579952267303103, 'num_documents': 25700592, 'num_queries': 2514, 'average_relevant_docs_per_query': 1.0}} |
Touche2020 |
['eng'] |
Retrieval |
s2p |
|
None |
{'test': {'average_document_length': 1719.3347658445412, 'average_query_length': 43.42857142857143, 'num_documents': 382545, 'num_queries': 49, 'average_relevant_docs_per_query': 19.020408163265305}} |
ToxicChatClassification (Zi Lin, 2023) |
['eng'] |
Classification |
s2s |
[Constructed, Written] |
{'test': 1427} |
{'test': 189.4} |
ToxicConversationsClassification (cjadams et al., 2019) |
['eng'] |
Classification |
s2s |
[Social, Written] |
{'test': 50000} |
{'test': 296.6} |
TswanaNewsClassification (Vukosi Marivate, 2023) |
['tsn'] |
Classification |
s2s |
[News, Written] |
{'validation': 487, 'test': 487} |
{'validation': 2417.72, 'test': 2369.52} |
TurHistQuadRetrieval (Soygazi et al., 2021) |
['tur'] |
Retrieval |
p2p |
[Encyclopaedic, Non-fiction, Academic, Written] |
{'test': 1330} |
{'test': {'average_document_length': 172.12118713932398, 'average_query_length': 62.5302734375, 'num_documents': 1213, 'num_queries': 1024, 'average_relevant_docs_per_query': 2.0}} |
TurkicClassification |
['bak', 'kaz', 'kir'] |
Classification |
s2s |
[News, Written] |
{'train': 193056} |
{'train': 1103.13} |
TurkishMovieSentimentClassification (Erkin Demirtas, 2013) |
['tur'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 2644} |
{'test': 141.5} |
TurkishProductSentimentClassification (Erkin Demirtas, 2013) |
['tur'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 800} |
{'test': 246.85} |
TweetEmotionClassification (Al-Khatib et al., 2018) |
['ara'] |
Classification |
s2s |
[Social, Written] |
{'train': 2048} |
{'train': 78.8} |
TweetSarcasmClassification |
['ara'] |
Classification |
s2s |
[Social, Written] |
{'test': 2110} |
{'test': 102.1} |
TweetSentimentClassification |
['ara', 'deu', 'eng', 'fra', 'hin', 'ita', 'por', 'spa'] |
Classification |
s2s |
[Social, Written] |
{'test': 2048} |
{'test': 83.51} |
TweetSentimentExtractionClassification (Maggie et al., 2020) |
['eng'] |
Classification |
s2s |
[Social, Written] |
{'test': 3534} |
{'test': 67.8} |
TweetTopicSingleClassification |
['eng'] |
Classification |
s2s |
[Social, News, Written] |
{'test_2021': 1693} |
{'test_2021': 167.66} |
TwentyNewsgroupsClustering.v2 (Ken Lang, 1995) |
['eng'] |
Clustering |
s2s |
[News, Written] |
{'test': 2381} |
{'test': 32.0} |
TwitterHjerneRetrieval (Holm et al., 2024) |
['dan'] |
Retrieval |
p2p |
[Social, Written] |
{'train': 340} |
{'train': {'average_document_length': 128.85114503816794, 'average_query_length': 166.3846153846154, 'num_documents': 262, 'num_queries': 78, 'average_relevant_docs_per_query': 3.358974358974359}} |
TwitterSemEval2015 |
['eng'] |
PairClassification |
s2s |
|
{'test': 16777} |
{'test': 38.3} |
TwitterURLCorpus |
['eng'] |
PairClassification |
s2s |
|
{'test': 51534} |
{'test': {'num_samples': 51534, 'avg_sentence1_len': 79.48919160166103, 'avg_sentence2_len': 88.5540419916948, 'unique_labels': 2, 'labels': {'0': {'count': 38546}, '1': {'count': 12988}}}} |
UCCVCommonLawLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 94} |
{'test': 114.127} |
UkrFormalityClassification |
['ukr'] |
Classification |
s2s |
[News, Written] |
{'train': 2048, 'test': 2048} |
{'train': 52.1, 'test': 53.07} |
UnfairTOSLegalBenchClassification (Neel Guha, 2023) |
['eng'] |
Classification |
s2s |
[Legal, Written] |
{'test': 2048} |
{'test': 184.69} |
UrduRomanSentimentClassification (Sharf,Zareen, 2018) |
['urd'] |
Classification |
s2s |
[Social, Written] |
{'train': 2048} |
{'train': 68.248} |
VGHierarchicalClusteringP2P (Navjord et al., 2023) |
['nob'] |
Clustering |
p2p |
[News, Non-fiction, Written] |
{'test': 2048} |
{'test': 2670.3243084794544} |
VGHierarchicalClusteringS2S (Navjord et al., 2023) |
['nob'] |
Clustering |
p2p |
[News, Non-fiction, Written] |
{'test': 2048} |
{'test': 139.31247668283325} |
VideoRetrieval |
['cmn'] |
Retrieval |
s2p |
|
None |
{'dev': {'average_document_length': 31.048855642524522, 'average_query_length': 7.365, 'num_documents': 100930, 'num_queries': 1000, 'average_relevant_docs_per_query': 1.0}} |
VieMedEVBitextMining (Nhu Vo, 2024) |
['eng', 'vie'] |
BitextMining |
s2s |
[Medical, Written] |
{'test': 2048} |
{'test': 139.23} |
VieQuADRetrieval |
['vie'] |
Retrieval |
s2p |
[Encyclopaedic, Non-fiction, Written] |
{'validation': 2048} |
{'validation': {'average_document_length': 222.61244979919678, 'average_query_length': 65.51513671875, 'num_documents': 2490, 'num_queries': 2048, 'average_relevant_docs_per_query': 2.0}} |
VieStudentFeedbackClassification (Nguyen et al., 2018) |
['vie'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 2048} |
{'test': 14.22} |
VoyageMMarcoReranking (Benjamin Clavié, 2023) |
['jpn'] |
Reranking |
s2s |
[Academic, Non-fiction, Written] |
{'test': 2048} |
{'test': 162} |
WRIMEClassification |
['jpn'] |
Classification |
s2s |
[Social, Written] |
{'test': 2048} |
{'test': 47.78} |
Waimai (Xiao et al., 2023) |
['cmn'] |
Classification |
s2s |
|
None |
None |
WebLINXCandidatesReranking (Xing Han Lù, 2024) |
['eng'] |
Reranking |
p2p |
[Academic, Web, Written] |
{'validation': 1301, 'test_iid': 1438, 'test_cat': 3560, 'test_web': 3144, 'test_vis': 5298, 'test_geo': 4916} |
{'validation': 1647.52, 'test_iid': 1722.63, 'test_cat': 2149.66, 'test_web': 1831.46, 'test_vis': 1737.26, 'test_geo': 1742.66} |
WikiCitiesClustering |
['eng'] |
Clustering |
p2p |
[Encyclopaedic, Written] |
None |
None |
WikiClusteringP2P.v2 |
['bos', 'cat', 'ces', 'dan', 'eus', 'glv', 'ilo', 'kur', 'lav', 'min', 'mlt', 'sco', 'sqi', 'wln'] |
Clustering |
p2p |
[Encyclopaedic, Written] |
{'test': 2048} |
{'test': {'num_samples': 28672, 'average_text_length': 629.7426409040179, 'average_labels_per_text': 1.0, 'unique_labels': 39, 'labels': {'16': {'count': 541}, '3': {'count': 1607}, '12': {'count': 846}, '0': {'count': 2410}, '15': {'count': 878}, '11': {'count': 864}, '6': {'count': 787}, '9': {'count': 654}, '14': {'count': 966}, '8': {'count': 1389}, '2': {'count': 2428}, '10': {'count': 839}, '1': {'count': 1370}, '4': {'count': 2942}, '7': {'count': 2514}, '5': {'count': 1490}, '13': {'count': 918}, '19': {'count': 315}, '17': {'count': 711}, '20': {'count': 345}, '18': {'count': 800}, '24': {'count': 467}, '25': {'count': 928}, '21': {'count': 62}, '26': {'count': 270}, '22': {'count': 186}, '23': {'count': 36}, '27': {'count': 465}, '28': {'count': 62}, '36': {'count': 139}, '32': {'count': 57}, '38': {'count': 43}, '30': {'count': 52}, '34': {'count': 80}, '33': {'count': 75}, '35': {'count': 62}, '31': {'count': 63}, '37': {'count': 8}, '29': {'count': 3}}, 'hf_subset_descriptive_stats': {'bs': {'num_samples': 2048, 'average_text_length': 1046.25732421875, 'average_labels_per_text': 1.0, 'unique_labels': 17, 'labels': {'16': {'count': 268}, '3': {'count': 89}, '12': {'count': 597}, '0': {'count': 202}, '15': {'count': 113}, '11': {'count': 11}, '6': {'count': 142}, '9': {'count': 181}, '14': {'count': 179}, '8': {'count': 33}, '2': {'count': 172}, '10': {'count': 12}, '1': {'count': 7}, '4': {'count': 25}, '7': {'count': 6}, '5': {'count': 9}, '13': {'count': 2}}}, 'ca': {'num_samples': 2048, 'average_text_length': 600.73291015625, 'average_labels_per_text': 1.0, 'unique_labels': 8, 'labels': {'6': {'count': 257}, '1': {'count': 737}, '2': {'count': 284}, '4': {'count': 394}, '0': {'count': 162}, '7': {'count': 151}, '5': {'count': 55}, '3': {'count': 8}}}, 'cs': {'num_samples': 2048, 'average_text_length': 659.2294921875, 'average_labels_per_text': 1.0, 'unique_labels': 21, 'labels': {'19': {'count': 35}, '5': {'count': 624}, '17': {'count': 126}, '10': {'count': 155}, '1': {'count': 231}, '7': {'count': 215}, '11': {'count': 128}, '0': {'count': 57}, '13': {'count': 75}, '2': {'count': 83}, '3': {'count': 38}, '9': {'count': 8}, '6': {'count': 14}, '12': {'count': 9}, '16': {'count': 16}, '20': {'count': 73}, '18': {'count': 38}, '4': {'count': 60}, '15': {'count': 14}, '14': {'count': 38}, '8': {'count': 11}}}, 'da': {'num_samples': 2048, 'average_text_length': 767.58935546875, 'average_labels_per_text': 1.0, 'unique_labels': 20, 'labels': {'14': {'count': 212}, '4': {'count': 74}, '15': {'count': 16}, '8': {'count': 165}, '13': {'count': 115}, '0': {'count': 79}, '1': {'count': 34}, '9': {'count': 114}, '7': {'count': 364}, '10': {'count': 32}, '17': {'count': 66}, '18': {'count': 32}, '12': {'count': 129}, '11': {'count': 159}, '2': {'count': 66}, '3': {'count': 185}, '19': {'count': 103}, '16': {'count': 33}, '5': {'count': 56}, '6': {'count': 14}}}, 'eu': {'num_samples': 2048, 'average_text_length': 405.16015625, 'average_labels_per_text': 1.0, 'unique_labels': 5, 'labels': {'4': {'count': 383}, '0': {'count': 995}, '3': {'count': 282}, '2': {'count': 344}, '1': {'count': 44}}}, 'gv': {'num_samples': 2048, 'average_text_length': 368.01123046875, 'average_labels_per_text': 1.0, 'unique_labels': 28, 'labels': {'6': {'count': 32}, '1': {'count': 83}, '24': {'count': 13}, '17': {'count': 152}, '2': {'count': 534}, '25': {'count': 76}, '5': {'count': 198}, '15': {'count': 100}, '21': {'count': 22}, '26': {'count': 188}, '13': {'count': 230}, '20': {'count': 11}, '3': {'count': 107}, '19': {'count': 88}, '16': {'count': 55}, '22': {'count': 29}, '14': {'count': 12}, '8': {'count': 61}, '0': {'count': 5}, '10': {'count': 4}, '4': {'count': 9}, '23': {'count': 6}, '7': {'count': 3}, '9': {'count': 20}, '18': {'count': 4}, '12': {'count': 3}, '27': {'count': 1}, '11': {'count': 2}}}, 'ilo': {'num_samples': 2048, 'average_text_length': 617.90771484375, 'average_labels_per_text': 1.0, 'unique_labels': 29, 'labels': {'3': {'count': 562}, '0': {'count': 373}, '18': {'count': 521}, '8': {'count': 129}, '13': {'count': 123}, '11': {'count': 54}, '25': {'count': 8}, '27': {'count': 5}, '17': {'count': 13}, '15': {'count': 4}, '4': {'count': 28}, '7': {'count': 83}, '10': {'count': 15}, '1': {'count': 11}, '24': {'count': 15}, '14': {'count': 8}, '16': {'count': 4}, '19': {'count': 9}, '23': {'count': 10}, '26': {'count': 4}, '28': {'count': 8}, '12': {'count': 29}, '21': {'count': 12}, '6': {'count': 5}, '20': {'count': 6}, '5': {'count': 4}, '22': {'count': 2}, '9': {'count': 2}, '2': {'count': 1}}}, 'ku': {'num_samples': 2048, 'average_text_length': 421.17333984375, 'average_labels_per_text': 1.0, 'unique_labels': 39, 'labels': {'14': {'count': 14}, '36': {'count': 139}, '20': {'count': 108}, '22': {'count': 27}, '15': {'count': 102}, '32': {'count': 55}, '8': {'count': 431}, '17': {'count': 210}, '38': {'count': 43}, '30': {'count': 51}, '4': {'count': 60}, '2': {'count': 111}, '6': {'count': 95}, '34': {'count': 70}, '27': {'count': 15}, '5': {'count': 174}, '26': {'count': 37}, '0': {'count': 11}, '25': {'count': 50}, '16': {'count': 2}, '12': {'count': 16}, '24': {'count': 2}, '11': {'count': 17}, '21': {'count': 9}, '13': {'count': 20}, '1': {'count': 7}, '33': {'count': 33}, '35': {'count': 28}, '10': {'count': 11}, '31': {'count': 51}, '18': {'count': 4}, '3': {'count': 4}, '28': {'count': 8}, '37': {'count': 8}, '23': {'count': 2}, '19': {'count': 7}, '7': {'count': 6}, '9': {'count': 8}, '29': {'count': 2}}}, 'lv': {'num_samples': 2048, 'average_text_length': 770.67138671875, 'average_labels_per_text': 1.0, 'unique_labels': 16, 'labels': {'15': {'count': 288}, '2': {'count': 110}, '6': {'count': 74}, '12': {'count': 50}, '0': {'count': 171}, '14': {'count': 188}, '10': {'count': 351}, '5': {'count': 142}, '4': {'count': 300}, '13': {'count': 60}, '11': {'count': 48}, '1': {'count': 165}, '8': {'count': 53}, '7': {'count': 5}, '3': {'count': 9}, '9': {'count': 34}}}, 'min': {'num_samples': 2048, 'average_text_length': 631.74072265625, 'average_labels_per_text': 1.0, 'unique_labels': 15, 'labels': {'7': {'count': 1595}, '9': {'count': 9}, '4': {'count': 48}, '3': {'count': 83}, '2': {'count': 160}, '0': {'count': 19}, '5': {'count': 74}, '6': {'count': 12}, '10': {'count': 12}, '13': {'count': 10}, '8': {'count': 5}, '11': {'count': 13}, '12': {'count': 2}, '1': {'count': 5}, '14': {'count': 1}}}, 'mt': {'num_samples': 2048, 'average_text_length': 821.22265625, 'average_labels_per_text': 1.0, 'unique_labels': 27, 'labels': {'12': {'count': 8}, '10': {'count': 147}, '14': {'count': 180}, '17': {'count': 117}, '25': {'count': 654}, '19': {'count': 35}, '0': {'count': 77}, '3': {'count': 12}, '16': {'count': 44}, '15': {'count': 108}, '24': {'count': 267}, '6': {'count': 43}, '26': {'count': 32}, '4': {'count': 79}, '22': {'count': 67}, '9': {'count': 16}, '8': {'count': 16}, '2': {'count': 55}, '5': {'count': 6}, '11': {'count': 30}, '18': {'count': 12}, '21': {'count': 12}, '20': {'count': 15}, '23': {'count': 7}, '13': {'count': 6}, '7': {'count': 1}, '1': {'count': 2}}}, 'sco': {'num_samples': 2048, 'average_text_length': 1065.21044921875, 'average_labels_per_text': 1.0, 'unique_labels': 23, 'labels': {'18': {'count': 178}, '6': {'count': 92}, '9': {'count': 28}, '15': {'count': 106}, '8': {'count': 432}, '2': {'count': 95}, '11': {'count': 104}, '1': {'count': 42}, '13': {'count': 248}, '16': {'count': 118}, '20': {'count': 130}, '3': {'count': 171}, '22': {'count': 57}, '7': {'count': 83}, '10': {'count': 74}, '5': {'count': 6}, '4': {'count': 17}, '17': {'count': 24}, '14': {'count': 14}, '0': {'count': 7}, '19': {'count': 18}, '21': {'count': 3}, '12': {'count': 1}}}, 'sq': {'num_samples': 2048, 'average_text_length': 425.486328125, 'average_labels_per_text': 1.0, 'unique_labels': 36, 'labels': {'27': {'count': 444}, '9': {'count': 234}, '14': {'count': 120}, '0': {'count': 128}, '15': {'count': 27}, '11': {'count': 298}, '24': {'count': 170}, '28': {'count': 46}, '19': {'count': 20}, '25': {'count': 140}, '3': {'count': 47}, '2': {'count': 87}, '35': {'count': 34}, '8': {'count': 53}, '31': {'count': 12}, '17': {'count': 3}, '23': {'count': 11}, '20': {'count': 2}, '33': {'count': 42}, '10': {'count': 26}, '34': {'count': 10}, '7': {'count': 2}, '13': {'count': 29}, '4': {'count': 4}, '6': {'count': 7}, '26': {'count': 9}, '5': {'count': 16}, '30': {'count': 1}, '21': {'count': 4}, '22': {'count': 4}, '18': {'count': 11}, '32': {'count': 2}, '12': {'count': 2}, '16': {'count': 1}, '1': {'count': 1}, '29': {'count': 1}}}, 'wa': {'num_samples': 2048, 'average_text_length': 216.00390625, 'average_labels_per_text': 1.0, 'unique_labels': 6, 'labels': {'5': {'count': 126}, '4': {'count': 1461}, '0': {'count': 124}, '2': {'count': 326}, '3': {'count': 10}, '1': {'count': 1}}}}}} |
WikipediaRerankingMultilingual |
['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] |
Reranking |
s2p |
[Encyclopaedic, Written] |
{'en': 1500, 'de': 1500, 'it': 1500, 'pt': 1500, 'nl': 1500, 'cs': 1500, 'ro': 1500, 'bg': 1500, 'sr': 1500, 'fi': 1500, 'da': 1500, 'fa': 1500, 'hi': 1500, 'bn': 1500, 'no': 1500, 'sv': 1500} |
{'test': {'num_samples': 24000, 'num_positive': 24000, 'num_negative': 24000, 'avg_query_len': 59.091208333333334, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0, 'hf_subset_descriptive_stats': {'bg': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 60.82666666666667, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}, 'bn': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 47.266666666666666, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}, 'cs': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 56.272, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}, 'da': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 56.75066666666667, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}, 'de': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 70.004, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}, 'en': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 68.372, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}, 'fa': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 48.66733333333333, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}, 'fi': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 55.343333333333334, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}, 'hi': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 50.77733333333333, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}, 'it': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 70.05466666666666, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}, 'nl': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 65.34466666666667, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}, 'pt': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 65.11933333333333, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}, 'ro': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 61.973333333333336, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}, 'sr': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 55.669333333333334, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}, 'no': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 55.288, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}, 'sv': {'num_samples': 1500, 'num_positive': 1500, 'num_negative': 1500, 'avg_query_len': 57.73, 'avg_positive_len': 1.0, 'avg_negative_len': 8.0}}}} |
WikipediaRetrievalMultilingual |
['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] |
Retrieval |
s2p |
[Encyclopaedic, Written] |
{'en': 1500, 'de': 1500, 'it': 1500, 'pt': 1500, 'nl': 1500, 'cs': 1500, 'ro': 1500, 'bg': 1500, 'sr': 1500, 'fi': 1500, 'da': 1500, 'fa': 1500, 'hi': 1500, 'bn': 1500, 'no': 1500, 'sv': 1500} |
{'test': {'bg': {'average_document_length': 374.376, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'bn': {'average_document_length': 394.05044444444445, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'cs': {'average_document_length': 369.9831111111111, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'da': {'average_document_length': 345.2597037037037, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'de': {'average_document_length': 398.4137777777778, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'en': {'average_document_length': 452.9871111111111, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'fa': {'average_document_length': 345.1568888888889, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'fi': {'average_document_length': 379.71237037037037, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'hi': {'average_document_length': 410.72540740740743, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'it': {'average_document_length': 393.73437037037036, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'nl': {'average_document_length': 375.6695555555556, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'pt': {'average_document_length': 398.27237037037037, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'ro': {'average_document_length': 348.3817037037037, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'sr': {'average_document_length': 384.3131851851852, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'no': {'average_document_length': 366.93733333333336, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}, 'sv': {'average_document_length': 369.340962962963, 'average_query_length': 1.0, 'num_documents': 13500, 'num_queries': 1500, 'average_relevant_docs_per_query': 1.0}}} |
WinoGrande (Xiao et al., 2024) |
['eng'] |
Retrieval |
s2s |
[Encyclopaedic, Written] |
{'test': 0} |
{'test': {'average_document_length': 7.68243375858685, 'average_query_length': 111.78216258879242, 'num_documents': 5095, 'num_queries': 1267, 'average_relevant_docs_per_query': 1.0}} |
WisesightSentimentClassification |
['tha'] |
Classification |
s2s |
[Social, News, Written] |
{'train': 2048} |
{'train': 103.42} |
XMarket (Bonab et al., 2021) |
['deu', 'eng', 'spa'] |
Retrieval |
s2p |
|
None |
{'test': {'de': {'average_document_length': 187.4061197288943, 'average_query_length': 15.717612088184294, 'num_documents': 70526, 'num_queries': 4037, 'average_relevant_docs_per_query': 54.3522417636859}, 'en': {'average_document_length': 452.792089662076, 'average_query_length': 15.881635344543357, 'num_documents': 218777, 'num_queries': 9099, 'average_relevant_docs_per_query': 85.43719090009891}, 'es': {'average_document_length': 279.67909262759923, 'average_query_length': 19.97062937062937, 'num_documents': 39675, 'num_queries': 3575, 'average_relevant_docs_per_query': 36.01006993006993}}} |
XNLI (Conneau et al., 2018) |
['ara', 'bul', 'deu', 'ell', 'eng', 'fra', 'hin', 'rus', 'spa', 'swa', 'tha', 'tur', 'vie', 'zho'] |
PairClassification |
s2s |
[Non-fiction, Fiction, Government, Written] |
{'validation': 2163, 'test': 2460} |
{'test': {'num_samples': 19110, 'avg_sentence1_len': 103.23793825222397, 'avg_sentence2_len': 48.88895866038723, 'unique_labels': 2, 'labels': {'0': {'count': 9562}, '1': {'count': 9548}}, 'hf_subset_descriptive_stats': {'ar': {'num_samples': 1365, 'avg_sentence1_len': 89.57362637362637, 'avg_sentence2_len': 41.99487179487179, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'bg': {'num_samples': 1365, 'avg_sentence1_len': 110.01611721611722, 'avg_sentence2_len': 51.62930402930403, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'de': {'num_samples': 1365, 'avg_sentence1_len': 119.92600732600732, 'avg_sentence2_len': 56.794871794871796, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'el': {'num_samples': 1365, 'avg_sentence1_len': 119.05421245421246, 'avg_sentence2_len': 56.93260073260073, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'en': {'num_samples': 1365, 'avg_sentence1_len': 105.67032967032966, 'avg_sentence2_len': 49.8043956043956, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'es': {'num_samples': 1365, 'avg_sentence1_len': 115.43296703296703, 'avg_sentence2_len': 54.68205128205128, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'fr': {'num_samples': 1365, 'avg_sentence1_len': 121.0967032967033, 'avg_sentence2_len': 58.58021978021978, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'hi': {'num_samples': 1365, 'avg_sentence1_len': 104.63443223443224, 'avg_sentence2_len': 50.17289377289377, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'ru': {'num_samples': 1365, 'avg_sentence1_len': 110.76923076923077, 'avg_sentence2_len': 52.452014652014654, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'sw': {'num_samples': 1365, 'avg_sentence1_len': 104.43956043956044, 'avg_sentence2_len': 49.48205128205128, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'th': {'num_samples': 1365, 'avg_sentence1_len': 96.6923076923077, 'avg_sentence2_len': 44.544322344322346, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'tr': {'num_samples': 1365, 'avg_sentence1_len': 103.67765567765568, 'avg_sentence2_len': 49.18534798534799, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'vi': {'num_samples': 1365, 'avg_sentence1_len': 111.31208791208792, 'avg_sentence2_len': 52.46007326007326, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'zh': {'num_samples': 1365, 'avg_sentence1_len': 33.03589743589744, 'avg_sentence2_len': 15.73040293040293, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}}}, 'validation': {'num_samples': 19110, 'avg_sentence1_len': 103.20790162218734, 'avg_sentence2_len': 49.01909994767138, 'unique_labels': 2, 'labels': {'0': {'count': 9562}, '1': {'count': 9548}}, 'hf_subset_descriptive_stats': {'ar': {'num_samples': 1365, 'avg_sentence1_len': 88.31868131868131, 'avg_sentence2_len': 41.61172161172161, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'bg': {'num_samples': 1365, 'avg_sentence1_len': 109.196336996337, 'avg_sentence2_len': 51.967032967032964, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'de': {'num_samples': 1365, 'avg_sentence1_len': 119.81172161172161, 'avg_sentence2_len': 57.36923076923077, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'el': {'num_samples': 1365, 'avg_sentence1_len': 119.87545787545787, 'avg_sentence2_len': 56.88278388278388, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'en': {'num_samples': 1365, 'avg_sentence1_len': 105.71648351648352, 'avg_sentence2_len': 49.87619047619047, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'es': {'num_samples': 1365, 'avg_sentence1_len': 115.17289377289377, 'avg_sentence2_len': 55.120879120879124, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'fr': {'num_samples': 1365, 'avg_sentence1_len': 121.75897435897436, 'avg_sentence2_len': 59.08864468864469, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'hi': {'num_samples': 1365, 'avg_sentence1_len': 105.06446886446886, 'avg_sentence2_len': 50.44395604395604, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'ru': {'num_samples': 1365, 'avg_sentence1_len': 109.74725274725274, 'avg_sentence2_len': 52.26886446886447, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'sw': {'num_samples': 1365, 'avg_sentence1_len': 104.32234432234432, 'avg_sentence2_len': 49.87692307692308, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'th': {'num_samples': 1365, 'avg_sentence1_len': 97.28498168498169, 'avg_sentence2_len': 43.843223443223444, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'tr': {'num_samples': 1365, 'avg_sentence1_len': 102.96630036630036, 'avg_sentence2_len': 49.63809523809524, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'vi': {'num_samples': 1365, 'avg_sentence1_len': 112.26373626373626, 'avg_sentence2_len': 52.432967032967035, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'zh': {'num_samples': 1365, 'avg_sentence1_len': 33.41098901098901, 'avg_sentence2_len': 15.846886446886447, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}}}} |
XNLIV2 (Upadhyay et al., 2023) |
['asm', 'ben', 'bho', 'ell', 'guj', 'kan', 'mar', 'ory', 'pan', 'rus', 'san', 'tam', 'tur'] |
PairClassification |
s2s |
[Non-fiction, Fiction, Government, Written] |
{'test': 5010} |
{'test': 80.06} |
XPQARetrieval (Shen et al., 2023) |
['ara', 'cmn', 'deu', 'eng', 'fra', 'hin', 'ita', 'jpn', 'kor', 'pol', 'por', 'spa', 'tam'] |
Retrieval |
s2p |
[Reviews, Written] |
{'test': 19801} |
{'test': {'ara-ara': {'average_document_length': 61.88361204013378, 'average_query_length': 29.688, 'num_documents': 1495, 'num_queries': 750, 'average_relevant_docs_per_query': 2.004}, 'eng-ara': {'average_document_length': 125.26940639269407, 'average_query_length': 29.688, 'num_documents': 1533, 'num_queries': 750, 'average_relevant_docs_per_query': 2.058666666666667}, 'ara-eng': {'average_document_length': 61.88361204013378, 'average_query_length': 39.5188679245283, 'num_documents': 1495, 'num_queries': 742, 'average_relevant_docs_per_query': 2.024258760107817}, 'deu-deu': {'average_document_length': 69.54807692307692, 'average_query_length': 55.51827676240209, 'num_documents': 1248, 'num_queries': 766, 'average_relevant_docs_per_query': 1.6318537859007833}, 'eng-deu': {'average_document_length': 115.77118078719145, 'average_query_length': 55.51827676240209, 'num_documents': 1499, 'num_queries': 766, 'average_relevant_docs_per_query': 1.9634464751958225}, 'deu-eng': {'average_document_length': 69.54807692307692, 'average_query_length': 51.88903394255875, 'num_documents': 1248, 'num_queries': 766, 'average_relevant_docs_per_query': 1.6318537859007833}, 'spa-spa': {'average_document_length': 68.27511591962906, 'average_query_length': 46.711223203026485, 'num_documents': 1941, 'num_queries': 793, 'average_relevant_docs_per_query': 2.4489281210592684}, 'eng-spa': {'average_document_length': 123.43698347107438, 'average_query_length': 46.711223203026485, 'num_documents': 1936, 'num_queries': 793, 'average_relevant_docs_per_query': 2.472887767969735}, 'spa-eng': {'average_document_length': 68.27511591962906, 'average_query_length': 47.21059268600252, 'num_documents': 1941, 'num_queries': 793, 'average_relevant_docs_per_query': 2.4489281210592684}, 'fra-fra': {'average_document_length': 76.99354005167959, 'average_query_length': 56.0520694259012, 'num_documents': 1548, 'num_queries': 749, 'average_relevant_docs_per_query': 2.069425901201602}, 'eng-fra': {'average_document_length': 137.31242532855435, 'average_query_length': 56.0520694259012, 'num_documents': 1674, 'num_queries': 749, 'average_relevant_docs_per_query': 2.248331108144192}, 'fra-eng': {'average_document_length': 76.99354005167959, 'average_query_length': 49.58744993324433, 'num_documents': 1548, 'num_queries': 749, 'average_relevant_docs_per_query': 2.069425901201602}, 'hin-hin': {'average_document_length': 47.20783373301359, 'average_query_length': 33.47783783783784, 'num_documents': 1251, 'num_queries': 925, 'average_relevant_docs_per_query': 1.3902702702702703}, 'eng-hin': {'average_document_length': 106.67662682602922, 'average_query_length': 33.47783783783784, 'num_documents': 1506, 'num_queries': 925, 'average_relevant_docs_per_query': 1.8054054054054054}, 'hin-eng': {'average_document_length': 47.20783373301359, 'average_query_length': 34.98574561403509, 'num_documents': 1251, 'num_queries': 912, 'average_relevant_docs_per_query': 1.4100877192982457}, 'ita-ita': {'average_document_length': 59.778301886792455, 'average_query_length': 49.14932126696833, 'num_documents': 1272, 'num_queries': 663, 'average_relevant_docs_per_query': 1.9245852187028658}, 'eng-ita': {'average_document_length': 123.07302075326672, 'average_query_length': 49.14932126696833, 'num_documents': 1301, 'num_queries': 663, 'average_relevant_docs_per_query': 1.9849170437405732}, 'ita-eng': {'average_document_length': 59.778301886792455, 'average_query_length': 49.040723981900456, 'num_documents': 1272, 'num_queries': 663, 'average_relevant_docs_per_query': 1.9245852187028658}, 'jpn-jpn': {'average_document_length': 41.030605871330415, 'average_query_length': 23.296969696969697, 'num_documents': 1601, 'num_queries': 825, 'average_relevant_docs_per_query': 1.9406060606060607}, 'eng-jpn': {'average_document_length': 126.2647564469914, 'average_query_length': 23.296969696969697, 'num_documents': 1745, 'num_queries': 825, 'average_relevant_docs_per_query': 2.1187878787878787}, 'jpn-eng': {'average_document_length': 41.030605871330415, 'average_query_length': 51.416058394160586, 'num_documents': 1601, 'num_queries': 822, 'average_relevant_docs_per_query': 1.9476885644768855}, 'kor-kor': {'average_document_length': 31.22722159730034, 'average_query_length': 21.81804281345566, 'num_documents': 889, 'num_queries': 654, 'average_relevant_docs_per_query': 1.5642201834862386}, 'eng-kor': {'average_document_length': 112.41231822070145, 'average_query_length': 21.81804281345566, 'num_documents': 1169, 'num_queries': 654, 'average_relevant_docs_per_query': 1.952599388379205}, 'kor-eng': {'average_document_length': 31.22722159730034, 'average_query_length': 43.9527687296417, 'num_documents': 889, 'num_queries': 614, 'average_relevant_docs_per_query': 1.6661237785016287}, 'pol-pol': {'average_document_length': 50.66814439518683, 'average_query_length': 53.72101910828025, 'num_documents': 1579, 'num_queries': 785, 'average_relevant_docs_per_query': 2.080254777070064}, 'eng-pol': {'average_document_length': 112.96919566457501, 'average_query_length': 53.72101910828025, 'num_documents': 1753, 'num_queries': 785, 'average_relevant_docs_per_query': 2.385987261146497}, 'pol-eng': {'average_document_length': 50.66814439518683, 'average_query_length': 54.1994851994852, 'num_documents': 1579, 'num_queries': 777, 'average_relevant_docs_per_query': 2.101673101673102}, 'por-por': {'average_document_length': 75.9845869297164, 'average_query_length': 42.58875, 'num_documents': 1622, 'num_queries': 800, 'average_relevant_docs_per_query': 2.14}, 'eng-por': {'average_document_length': 111.42525930445393, 'average_query_length': 42.58875, 'num_documents': 1639, 'num_queries': 800, 'average_relevant_docs_per_query': 2.21875}, 'por-eng': {'average_document_length': 75.9845869297164, 'average_query_length': 46.57967377666248, 'num_documents': 1622, 'num_queries': 797, 'average_relevant_docs_per_query': 2.148055207026349}, 'tam-tam': {'average_document_length': 64.89019607843137, 'average_query_length': 33.267263427109974, 'num_documents': 1275, 'num_queries': 782, 'average_relevant_docs_per_query': 1.6994884910485935}, 'eng-tam': {'average_document_length': 96.96361185983828, 'average_query_length': 33.267263427109974, 'num_documents': 1484, 'num_queries': 782, 'average_relevant_docs_per_query': 2.0255754475703327}, 'tam-eng': {'average_document_length': 64.89019607843137, 'average_query_length': 34.777633289986994, 'num_documents': 1275, 'num_queries': 769, 'average_relevant_docs_per_query': 1.728218465539662}, 'cmn-cmn': {'average_document_length': 20.958944281524925, 'average_query_length': 12.21116504854369, 'num_documents': 1705, 'num_queries': 824, 'average_relevant_docs_per_query': 2.0716019417475726}, 'eng-cmn': {'average_document_length': 108.31593874078276, 'average_query_length': 12.21116504854369, 'num_documents': 1763, 'num_queries': 824, 'average_relevant_docs_per_query': 2.2633495145631066}, 'cmn-eng': {'average_document_length': 20.958944281524925, 'average_query_length': 41.24390243902439, 'num_documents': 1705, 'num_queries': 820, 'average_relevant_docs_per_query': 2.0817073170731706}}} |
XQuADRetrieval (Mikel Artetxe, 2019) |
['arb', 'deu', 'ell', 'eng', 'hin', 'ron', 'rus', 'spa', 'tha', 'tur', 'vie', 'zho'] |
Retrieval |
s2p |
[Web, Written] |
{'test': 1190} |
{'validation': {'ar': {'average_document_length': 683.4666666666667, 'average_query_length': 53.327993254637434, 'num_documents': 240, 'num_queries': 1186, 'average_relevant_docs_per_query': 1.0}, 'de': {'average_document_length': 894.0666666666667, 'average_query_length': 69.04318374259103, 'num_documents': 240, 'num_queries': 1181, 'average_relevant_docs_per_query': 1.0}, 'el': {'average_document_length': 894.3791666666667, 'average_query_length': 68.61317567567568, 'num_documents': 240, 'num_queries': 1184, 'average_relevant_docs_per_query': 1.0}, 'en': {'average_document_length': 784.8333333333334, 'average_query_length': 61.25063291139241, 'num_documents': 240, 'num_queries': 1185, 'average_relevant_docs_per_query': 1.0}, 'es': {'average_document_length': 883.8041666666667, 'average_query_length': 68.23817567567568, 'num_documents': 240, 'num_queries': 1184, 'average_relevant_docs_per_query': 1.0}, 'hi': {'average_document_length': 764.9416666666667, 'average_query_length': 59.684699915469146, 'num_documents': 240, 'num_queries': 1183, 'average_relevant_docs_per_query': 1.0}, 'ro': {'average_document_length': 878.4458333333333, 'average_query_length': 67.17229729729729, 'num_documents': 240, 'num_queries': 1184, 'average_relevant_docs_per_query': 1.0}, 'ru': {'average_document_length': 850.1875, 'average_query_length': 64.94261603375527, 'num_documents': 240, 'num_queries': 1185, 'average_relevant_docs_per_query': 1.0}, 'th': {'average_document_length': 736.7583333333333, 'average_query_length': 55.103389830508476, 'num_documents': 240, 'num_queries': 1180, 'average_relevant_docs_per_query': 1.0}, 'tr': {'average_document_length': 788.3, 'average_query_length': 60.876689189189186, 'num_documents': 240, 'num_queries': 1184, 'average_relevant_docs_per_query': 1.0}, 'vi': {'average_document_length': 803.9083333333333, 'average_query_length': 61.62859560067682, 'num_documents': 240, 'num_queries': 1182, 'average_relevant_docs_per_query': 1.0}, 'zh': {'average_document_length': 252.4, 'average_query_length': 18.460626587637595, 'num_documents': 240, 'num_queries': 1181, 'average_relevant_docs_per_query': 1.0}}} |
XStance |
['deu', 'fra', 'ita'] |
PairClassification |
s2s |
[Social, Written] |
{'test': 2048} |
{'test': 152.41} |
YahooAnswersTopicsClassification (Zhang et al., 2015) |
['eng'] |
Classification |
s2s |
[Web, Written] |
{'test': 60000} |
{'test': 346.35} |
YelpReviewFullClassification (Zhang et al., 2015) |
['eng'] |
Classification |
s2s |
[Reviews, Written] |
{'test': 50000} |
{} |
YueOpenriceReviewClassification (Xiang et al., 2019) |
['yue'] |
Classification |
s2s |
[Reviews, Spoken] |
{'test': 6161} |
{'test': 173.0} |
indonli |
['ind'] |
PairClassification |
s2s |
[Encyclopaedic, Web, News, Written] |
{'test_expert': 2040} |
{'test_expert': 145.88} |