Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lint fixes for African MMLU and Winogrande #3256

Merged
merged 1 commit into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 19 additions & 35 deletions src/helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,56 +2,40 @@

Available subjects: "clinical_knowledge", "college_medicine", "virology"
Available langs: "af", "zu", "xh", "am", "bm", "ig", "nso", "sn", "st", "tn", "ts" (see lang_map below for language code mapping to language name, or here for ISO code reference: https://huggingface.co/languages)
"""
""" # noqa: E501

from helm.benchmark.adaptation.adapter_spec import (
ADAPT_GENERATION,
ADAPT_MULTIPLE_CHOICE_JOINT,
AdapterSpec,
)
from helm.benchmark.adaptation.common_adapter_specs import (
get_generation_adapter_spec,
get_machine_translation_adapter_spec,
get_multiple_choice_adapter_spec,
)
from helm.benchmark.metrics.common_metric_specs import (
get_basic_generation_metric_specs,
get_basic_metric_specs,
get_exact_match_metric_specs,
get_f1_metric_specs,
get_generative_harms_metric_specs,
get_generic_metric_specs,
get_open_ended_generation_metric_specs,
)
from helm.benchmark.adaptation.adapter_spec import ADAPT_MULTIPLE_CHOICE_JOINT
from helm.benchmark.adaptation.common_adapter_specs import get_multiple_choice_adapter_spec
from helm.benchmark.metrics.common_metric_specs import get_exact_match_metric_specs
from helm.benchmark.run_spec import RunSpec, run_spec_function
from helm.benchmark.runner import get_benchmark_output_path
from helm.benchmark.scenarios.scenario import ScenarioSpec, get_scenario_cache_path
from helm.benchmark.scenarios.scenario import ScenarioSpec


@run_spec_function("mmlu_clinical_afr")
def get_mmlu_clinical_afr_spec(subject: str, lang: str, method: str = ADAPT_MULTIPLE_CHOICE_JOINT) -> RunSpec:
scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.mmlu_clinical_afr_scenario.MMLU_Clinical_Afr_Scenario", args={"subject": subject, "lang": lang}
class_name="helm.benchmark.scenarios.mmlu_clinical_afr_scenario.MMLU_Clinical_Afr_Scenario",
args={"subject": subject, "lang": lang},
)

lang_map = {
'af': 'Afrikaans',
'zu': 'Zulu',
'xh': 'Xhosa',
'am': 'Amharic',
'bm': 'Bambara',
'ig': 'Igbo',
'nso': 'Sepedi',
'sn': 'Shona',
'st': 'Sesotho',
'tn': 'Setswana',
'ts': 'Tsonga',
"af": "Afrikaans",
"zu": "Zulu",
"xh": "Xhosa",
"am": "Amharic",
"bm": "Bambara",
"ig": "Igbo",
"nso": "Sepedi",
"sn": "Shona",
"st": "Sesotho",
"tn": "Setswana",
"ts": "Tsonga",
}

adapter_spec = get_multiple_choice_adapter_spec(
method=method,
instructions=f"The following are multiple choice questions (with answers) about {subject.replace('_', ' ')} "
f"in {lang_map[lang]}.",
f"in {lang_map[lang]}.",
input_noun="Question",
output_noun="Answer",
)
Expand Down
53 changes: 18 additions & 35 deletions src/helm/benchmark/run_specs/winogrande_afr_run_specs.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,13 @@
"""Run spec functions for Winogrande human-translated into 11 African languages

Available langs: "af", "zu", "xh", "am", "bm", "ig", "nso", "sn", "st", "tn", "ts" (see lang_map below for language code mapping to language name, or here for ISO code reference: https://huggingface.co/languages)
"""
""" # noqa: E501

from helm.benchmark.adaptation.adapter_spec import (
ADAPT_GENERATION,
ADAPT_MULTIPLE_CHOICE_JOINT,
AdapterSpec,
)
from helm.benchmark.adaptation.common_adapter_specs import (
get_generation_adapter_spec,
get_machine_translation_adapter_spec,
get_multiple_choice_adapter_spec,
)
from helm.benchmark.metrics.common_metric_specs import (
get_basic_generation_metric_specs,
get_basic_metric_specs,
get_exact_match_metric_specs,
get_f1_metric_specs,
get_generative_harms_metric_specs,
get_generic_metric_specs,
get_open_ended_generation_metric_specs,
)
from helm.benchmark.adaptation.adapter_spec import ADAPT_MULTIPLE_CHOICE_JOINT
from helm.benchmark.adaptation.common_adapter_specs import get_multiple_choice_adapter_spec
from helm.benchmark.metrics.common_metric_specs import get_exact_match_metric_specs
from helm.benchmark.run_spec import RunSpec, run_spec_function
from helm.benchmark.runner import get_benchmark_output_path
from helm.benchmark.scenarios.scenario import ScenarioSpec, get_scenario_cache_path
from helm.benchmark.scenarios.scenario import ScenarioSpec


@run_spec_function("winogrande_afr")
Expand All @@ -34,23 +17,23 @@ def get_winogrande_afr_spec(lang: str, method: str = ADAPT_MULTIPLE_CHOICE_JOINT
)

lang_map = {
'af': 'Afrikaans',
'zu': 'Zulu',
'xh': 'Xhosa',
'am': 'Amharic',
'bm': 'Bambara',
'ig': 'Igbo',
'nso': 'Sepedi',
'sn': 'Shona',
'st': 'Sesotho',
'tn': 'Setswana',
'ts': 'Tsonga',
"af": "Afrikaans",
"zu": "Zulu",
"xh": "Xhosa",
"am": "Amharic",
"bm": "Bambara",
"ig": "Igbo",
"nso": "Sepedi",
"sn": "Shona",
"st": "Sesotho",
"tn": "Setswana",
"ts": "Tsonga",
}

adapter_spec = get_multiple_choice_adapter_spec(
method=method,
instructions=f"The following are binary choice fill-in-the-blank sentences (with answers), requiring common sense reasoning "
f"in {lang_map[lang]}.",
instructions=f"The following are binary choice fill-in-the-blank sentences (with answers), "
f"requiring common sense reasoning in {lang_map[lang]}.",
input_noun="Question",
output_noun="Answer",
)
Expand Down
6 changes: 3 additions & 3 deletions src/helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ def __init__(self, subject: str = "clinical_knowledge", lang: str = "af"):

def download_mmlu_clinical_afr(self, path: str):
ensure_file_downloaded(
source_url="https://github.com/InstituteforDiseaseModeling/Bridging-the-Gap-Low-Resource-African-Languages/raw/refs/heads/main/data/evaluation_benchmarks_afr_release.zip",
source_url="https://github.com/InstituteforDiseaseModeling/Bridging-the-Gap-Low-Resource-African-Languages/raw/refs/heads/main/data/evaluation_benchmarks_afr_release.zip", # noqa: E501
target_path=path,
unpack=True,
unpack_type='unzip'
unpack_type="unzip",
)

def process_csv(self, csv_path: str, split: str) -> List[Instance]:
Expand All @@ -53,7 +53,7 @@ def answer_to_reference(answer: str) -> Reference:

def get_instances(self, output_path: str) -> List[Instance]:
# Download the raw data
desired_dir = 'mmlu_cm_ck_vir'
desired_dir = "mmlu_cm_ck_vir"
data_path: str = os.path.join(output_path, desired_dir)
self.download_mmlu_clinical_afr(data_path)

Expand Down
10 changes: 5 additions & 5 deletions src/helm/benchmark/scenarios/winogrande_afr_scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ def __init__(self, lang: str = "af"):

def download_winogrande_afr(self, path: str):
ensure_file_downloaded(
source_url="https://github.com/InstituteforDiseaseModeling/Bridging-the-Gap-Low-Resource-African-Languages/raw/refs/heads/main/data/evaluation_benchmarks_afr_release.zip",
source_url="https://github.com/InstituteforDiseaseModeling/Bridging-the-Gap-Low-Resource-African-Languages/raw/refs/heads/main/data/evaluation_benchmarks_afr_release.zip", # noqa: E501
target_path=path,
unpack=True,
unpack_type='unzip'
unpack_type="unzip",
)

def process_csv(self, csv_path: str, split: str, pseudo_split: str) -> List[Instance]:
# Match naming in Winogrande
if pseudo_split == 'val':
pseudo_split = 'train_s'
if pseudo_split == "val":
pseudo_split = "train_s"
instances: List[Instance] = []
hlog(f"Reading {csv_path}")
with open(csv_path) as f:
Expand All @@ -57,7 +57,7 @@ def answer_to_reference(answer: str) -> Reference:

def get_instances(self, output_path: str) -> List[Instance]:
# Download the raw data
desired_dir = 'winogrande_s'
desired_dir = "winogrande_s"
data_path: str = os.path.join(output_path, desired_dir)
self.download_winogrande_afr(data_path)

Expand Down
Loading