Skip to content

Commit

Permalink
[add] new datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
Immortalise committed Jan 10, 2024
1 parent dcd3485 commit 7688860
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 2 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
<!-- News and Updates -->

## News and Updates
- [05/01/2024] Add support for BigBench Hard, DROP datasets.
- [05/01/2024] Add support for BigBench Hard, DROP, ARC datasets.
- [16/12/2023] Add support for Gemini, Mistral, Mixtral, Baichuan, Yi models.
- [15/12/2023] Add detailed instructions for users to add new modules (models, datasets, etc.) [examples/add_new_modules.md](examples/add_new_modules.md).
- [05/12/2023] Published promptbench 0.0.1.
Expand Down
6 changes: 5 additions & 1 deletion promptbench/dataload/dataload.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"bool_logic", "valid_parentheses",
"gsm8k", "csqa", "bigbench_date", "bigbench_object_tracking",
"last_letter_concat", "numersense", "qasc",
"bbh", "drop",
"bbh", "drop", "arc-easy", "arc-challenge",
]

class DatasetLoader:
Expand Down Expand Up @@ -69,6 +69,10 @@ def load_dataset(dataset_name, task=None, supported_languages=None):
return BBH()
elif dataset_name == 'drop':
return DROP()
elif dataset_name == 'arc-easy':
return ARC('ARC-Easy')
elif dataset_name == 'arc-challenge':
return ARC('ARC-Challenge')
else:
# If the dataset name doesn't match any known datasets, raise an error
raise NotImplementedError(f"Dataset '{dataset_name}' is not supported.")
Expand Down
24 changes: 24 additions & 0 deletions promptbench/dataload/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,5 +625,29 @@ def __init__(self):
data = load_dataset("drop")["validation"]
self.data = []

for d in data:
self.data.append(d)

class ARC(Dataset):
"""
ARC is a dataset class for the AI2 Reasoning Challenge dataset.
This dataset is loaded from huggingface datasets: arc (test set).
Reference:
https://huggingface.co/datasets/ai2_arc
AI2 Reasoning Challenge (ARC) (https://arxiv.org/abs/1803.05457)
Example data format:
{
'id': 'Mercury_7175875',
'question': 'An astronomer observes that a planet rotates faster after a meteorite impact. Which is the most likely effect of this increase in rotation?',
'choices': {'text': ['Planetary density will decrease.', 'Planetary years will become longer.', 'Planetary days will become shorter.', 'Planetary gravity will become stronger.'], 'label': ['A', 'B', 'C', 'D']},
'answerKey': 'C'
}
"""
def __init__(self, name):
data = load_dataset("ai2_arc", name)["test"]
self.data = []

for d in data:
self.data.append(d)

0 comments on commit 7688860

Please sign in to comment.