Skip to content

Commit

Permalink
Add initial prototype for scored-docs access into ir_datasets #1
Browse files Browse the repository at this point in the history
  • Loading branch information
mam10eks committed May 17, 2024
1 parent dabd37a commit fa1e98c
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions data/ir_datasets_scored_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env python3
from ir_datasets.util import RequestsDownload, Cache, home_path
from ir_datasets.formats import TrecScoredDocs


def scored_docs(rank_distill_llm_run='__rankzephyr-colbert-10000-sampled-100__msmarco-passage-train-judged.run'):
base_path = home_path() / 'rank-disti-llm'
requests_download = RequestsDownload(f'https://zenodo.org/records/11147862/files/{rank_distill_llm_run}?download=1')
scored_docs = TrecScoredDocs(Cache(requests_download, base_path/rank_distill_llm_run))

return scored_docs

if __name__ == '__main__':
for i in scored_docs().scoreddocs_iter():
print(i)

0 comments on commit fa1e98c

Please sign in to comment.