stanford-crfm · yifanmai · Jan 8, 2025 · Jan 8, 2025
diff --git a/src/helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py b/src/helm/benchmark/run_specs/mmlu_clinical_afr_run_specs.py
@@ -2,56 +2,40 @@
 
 Available subjects: "clinical_knowledge", "college_medicine", "virology"
 Available langs: "af", "zu", "xh", "am", "bm", "ig", "nso", "sn", "st", "tn", "ts" (see lang_map below for language code mapping to language name, or here for ISO code reference: https://huggingface.co/languages)
-"""
+"""  # noqa: E501
 
-from helm.benchmark.adaptation.adapter_spec import (
-    ADAPT_GENERATION,
-    ADAPT_MULTIPLE_CHOICE_JOINT,
-    AdapterSpec,
-)
-from helm.benchmark.adaptation.common_adapter_specs import (
-    get_generation_adapter_spec,
-    get_machine_translation_adapter_spec,
-    get_multiple_choice_adapter_spec,
-)
-from helm.benchmark.metrics.common_metric_specs import (
-    get_basic_generation_metric_specs,
-    get_basic_metric_specs,
-    get_exact_match_metric_specs,
-    get_f1_metric_specs,
-    get_generative_harms_metric_specs,
-    get_generic_metric_specs,
-    get_open_ended_generation_metric_specs,
-)
+from helm.benchmark.adaptation.adapter_spec import ADAPT_MULTIPLE_CHOICE_JOINT
+from helm.benchmark.adaptation.common_adapter_specs import get_multiple_choice_adapter_spec
+from helm.benchmark.metrics.common_metric_specs import get_exact_match_metric_specs
 from helm.benchmark.run_spec import RunSpec, run_spec_function
-from helm.benchmark.runner import get_benchmark_output_path
-from helm.benchmark.scenarios.scenario import ScenarioSpec, get_scenario_cache_path
+from helm.benchmark.scenarios.scenario import ScenarioSpec
 
 
 @run_spec_function("mmlu_clinical_afr")
 def get_mmlu_clinical_afr_spec(subject: str, lang: str, method: str = ADAPT_MULTIPLE_CHOICE_JOINT) -> RunSpec:
     scenario_spec = ScenarioSpec(
-        class_name="helm.benchmark.scenarios.mmlu_clinical_afr_scenario.MMLU_Clinical_Afr_Scenario", args={"subject": subject, "lang": lang}
+        class_name="helm.benchmark.scenarios.mmlu_clinical_afr_scenario.MMLU_Clinical_Afr_Scenario",
+        args={"subject": subject, "lang": lang},
     )
 
     lang_map = {
-        'af': 'Afrikaans',
-        'zu': 'Zulu',
-        'xh': 'Xhosa',
-        'am': 'Amharic',
-        'bm': 'Bambara',
-        'ig': 'Igbo',
-        'nso': 'Sepedi',
-        'sn': 'Shona',
-        'st': 'Sesotho',
-        'tn': 'Setswana',
-        'ts': 'Tsonga',
+        "af": "Afrikaans",
+        "zu": "Zulu",
+        "xh": "Xhosa",
+        "am": "Amharic",
+        "bm": "Bambara",
+        "ig": "Igbo",
+        "nso": "Sepedi",
+        "sn": "Shona",
+        "st": "Sesotho",
+        "tn": "Setswana",
+        "ts": "Tsonga",
     }
 
     adapter_spec = get_multiple_choice_adapter_spec(
         method=method,
         instructions=f"The following are multiple choice questions (with answers) about {subject.replace('_', ' ')} "
-                     f"in {lang_map[lang]}.",
+        f"in {lang_map[lang]}.",
         input_noun="Question",
         output_noun="Answer",
     )

diff --git a/src/helm/benchmark/run_specs/winogrande_afr_run_specs.py b/src/helm/benchmark/run_specs/winogrande_afr_run_specs.py
@@ -1,30 +1,13 @@
 """Run spec functions for Winogrande human-translated into 11 African languages
 
 Available langs: "af", "zu", "xh", "am", "bm", "ig", "nso", "sn", "st", "tn", "ts" (see lang_map below for language code mapping to language name, or here for ISO code reference: https://huggingface.co/languages)
-"""
+"""  # noqa: E501
 
-from helm.benchmark.adaptation.adapter_spec import (
-    ADAPT_GENERATION,
-    ADAPT_MULTIPLE_CHOICE_JOINT,
-    AdapterSpec,
-)
-from helm.benchmark.adaptation.common_adapter_specs import (
-    get_generation_adapter_spec,
-    get_machine_translation_adapter_spec,
-    get_multiple_choice_adapter_spec,
-)
-from helm.benchmark.metrics.common_metric_specs import (
-    get_basic_generation_metric_specs,
-    get_basic_metric_specs,
-    get_exact_match_metric_specs,
-    get_f1_metric_specs,
-    get_generative_harms_metric_specs,
-    get_generic_metric_specs,
-    get_open_ended_generation_metric_specs,
-)
+from helm.benchmark.adaptation.adapter_spec import ADAPT_MULTIPLE_CHOICE_JOINT
+from helm.benchmark.adaptation.common_adapter_specs import get_multiple_choice_adapter_spec
+from helm.benchmark.metrics.common_metric_specs import get_exact_match_metric_specs
 from helm.benchmark.run_spec import RunSpec, run_spec_function
-from helm.benchmark.runner import get_benchmark_output_path
-from helm.benchmark.scenarios.scenario import ScenarioSpec, get_scenario_cache_path
+from helm.benchmark.scenarios.scenario import ScenarioSpec
 
 
 @run_spec_function("winogrande_afr")
@@ -34,23 +17,23 @@ def get_winogrande_afr_spec(lang: str, method: str = ADAPT_MULTIPLE_CHOICE_JOINT
     )
 
     lang_map = {
-        'af': 'Afrikaans',
-        'zu': 'Zulu',
-        'xh': 'Xhosa',
-        'am': 'Amharic',
-        'bm': 'Bambara',
-        'ig': 'Igbo',
-        'nso': 'Sepedi',
-        'sn': 'Shona',
-        'st': 'Sesotho',
-        'tn': 'Setswana',
-        'ts': 'Tsonga',
+        "af": "Afrikaans",
+        "zu": "Zulu",
+        "xh": "Xhosa",
+        "am": "Amharic",
+        "bm": "Bambara",
+        "ig": "Igbo",
+        "nso": "Sepedi",
+        "sn": "Shona",
+        "st": "Sesotho",
+        "tn": "Setswana",
+        "ts": "Tsonga",
     }
 
     adapter_spec = get_multiple_choice_adapter_spec(
         method=method,
-        instructions=f"The following are binary choice fill-in-the-blank sentences (with answers), requiring common sense reasoning "
-                     f"in {lang_map[lang]}.",
+        instructions=f"The following are binary choice fill-in-the-blank sentences (with answers), "
+        f"requiring common sense reasoning in {lang_map[lang]}.",
         input_noun="Question",
         output_noun="Answer",
     )

diff --git a/src/helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py b/src/helm/benchmark/scenarios/mmlu_clinical_afr_scenario.py
@@ -23,10 +23,10 @@ def __init__(self, subject: str = "clinical_knowledge", lang: str = "af"):
 
     def download_mmlu_clinical_afr(self, path: str):
         ensure_file_downloaded(
-            source_url="https://github.com/InstituteforDiseaseModeling/Bridging-the-Gap-Low-Resource-African-Languages/raw/refs/heads/main/data/evaluation_benchmarks_afr_release.zip",
+            source_url="https://github.com/InstituteforDiseaseModeling/Bridging-the-Gap-Low-Resource-African-Languages/raw/refs/heads/main/data/evaluation_benchmarks_afr_release.zip",  # noqa: E501
             target_path=path,
             unpack=True,
-            unpack_type='unzip'
+            unpack_type="unzip",
         )
 
     def process_csv(self, csv_path: str, split: str) -> List[Instance]:
@@ -53,7 +53,7 @@ def answer_to_reference(answer: str) -> Reference:
 
     def get_instances(self, output_path: str) -> List[Instance]:
         # Download the raw data
-        desired_dir = 'mmlu_cm_ck_vir'
+        desired_dir = "mmlu_cm_ck_vir"
         data_path: str = os.path.join(output_path, desired_dir)
         self.download_mmlu_clinical_afr(data_path)
 

diff --git a/src/helm/benchmark/scenarios/winogrande_afr_scenario.py b/src/helm/benchmark/scenarios/winogrande_afr_scenario.py
@@ -22,16 +22,16 @@ def __init__(self, lang: str = "af"):
 
     def download_winogrande_afr(self, path: str):
         ensure_file_downloaded(
-            source_url="https://github.com/InstituteforDiseaseModeling/Bridging-the-Gap-Low-Resource-African-Languages/raw/refs/heads/main/data/evaluation_benchmarks_afr_release.zip",
+            source_url="https://github.com/InstituteforDiseaseModeling/Bridging-the-Gap-Low-Resource-African-Languages/raw/refs/heads/main/data/evaluation_benchmarks_afr_release.zip",  # noqa: E501
             target_path=path,
             unpack=True,
-            unpack_type='unzip'
+            unpack_type="unzip",
         )
 
     def process_csv(self, csv_path: str, split: str, pseudo_split: str) -> List[Instance]:
         # Match naming in Winogrande
-        if pseudo_split == 'val':
-            pseudo_split = 'train_s'
+        if pseudo_split == "val":
+            pseudo_split = "train_s"
         instances: List[Instance] = []
         hlog(f"Reading {csv_path}")
         with open(csv_path) as f:
@@ -57,7 +57,7 @@ def answer_to_reference(answer: str) -> Reference:
 
     def get_instances(self, output_path: str) -> List[Instance]:
         # Download the raw data
-        desired_dir = 'winogrande_s'
+        desired_dir = "winogrande_s"
         data_path: str = os.path.join(output_path, desired_dir)
         self.download_winogrande_afr(data_path)