Skip to content

Commit

Permalink
Remove old task evals
Browse files Browse the repository at this point in the history
  • Loading branch information
liujch1998 committed Nov 21, 2024
1 parent b79c6c0 commit 2a851bf
Showing 1 changed file with 27 additions and 27 deletions.
54 changes: 27 additions & 27 deletions scripts/ladder_peteish.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,33 +339,33 @@ def config_from_args(args: argparse.Namespace) -> TrainConfig:
},
),
),
EvaluatorConfig(label="piqa", type=EvaluatorType.downstream),
EvaluatorConfig(label="hellaswag", type=EvaluatorType.downstream),
EvaluatorConfig(label="winogrande", type=EvaluatorType.downstream),
EvaluatorConfig(label="openbook_qa", type=EvaluatorType.downstream),
EvaluatorConfig(label="boolq", type=EvaluatorType.downstream),
EvaluatorConfig(label="sciq", type=EvaluatorType.downstream),
EvaluatorConfig(label="arc_easy", type=EvaluatorType.downstream),
EvaluatorConfig(label="arc_challenge", type=EvaluatorType.downstream),
EvaluatorConfig(label="copa", type=EvaluatorType.downstream),
EvaluatorConfig(label="commonsense_qa", type=EvaluatorType.downstream),
EvaluatorConfig(label="social_iqa", type=EvaluatorType.downstream),
EvaluatorConfig(label="mmlu_stem_var", type=EvaluatorType.downstream),
EvaluatorConfig(label="mmlu_humanities_var", type=EvaluatorType.downstream),
EvaluatorConfig(label="mmlu_social_sciences_var", type=EvaluatorType.downstream),
EvaluatorConfig(label="mmlu_other_var", type=EvaluatorType.downstream),
EvaluatorConfig(label="mmlu_stem_mc_5shot", type=EvaluatorType.downstream),
EvaluatorConfig(label="mmlu_humanities_mc_5shot", type=EvaluatorType.downstream),
EvaluatorConfig(label="mmlu_social_sciences_mc_5shot", type=EvaluatorType.downstream),
EvaluatorConfig(label="mmlu_other_mc_5shot", type=EvaluatorType.downstream),
EvaluatorConfig(label="mmlu_stem_mc_5shot_test", type=EvaluatorType.downstream),
EvaluatorConfig(label="mmlu_humanities_mc_5shot_test", type=EvaluatorType.downstream),
EvaluatorConfig(label="mmlu_social_sciences_mc_5shot_test", type=EvaluatorType.downstream),
EvaluatorConfig(label="mmlu_other_mc_5shot_test", type=EvaluatorType.downstream),
EvaluatorConfig(label="basic_arithmetic", type=EvaluatorType.downstream),
EvaluatorConfig(label="trivia_qa_wiki_ppl", type=EvaluatorType.downstream),
EvaluatorConfig(label="natural_qs_open_ppl", type=EvaluatorType.downstream),
EvaluatorConfig(label="arc_easy_ppl", type=EvaluatorType.downstream),
# EvaluatorConfig(label="piqa", type=EvaluatorType.downstream),
# EvaluatorConfig(label="hellaswag", type=EvaluatorType.downstream),
# EvaluatorConfig(label="winogrande", type=EvaluatorType.downstream),
# EvaluatorConfig(label="openbook_qa", type=EvaluatorType.downstream),
# EvaluatorConfig(label="boolq", type=EvaluatorType.downstream),
# EvaluatorConfig(label="sciq", type=EvaluatorType.downstream),
# EvaluatorConfig(label="arc_easy", type=EvaluatorType.downstream),
# EvaluatorConfig(label="arc_challenge", type=EvaluatorType.downstream),
# EvaluatorConfig(label="copa", type=EvaluatorType.downstream),
# EvaluatorConfig(label="commonsense_qa", type=EvaluatorType.downstream),
# EvaluatorConfig(label="social_iqa", type=EvaluatorType.downstream),
# EvaluatorConfig(label="mmlu_stem_var", type=EvaluatorType.downstream),
# EvaluatorConfig(label="mmlu_humanities_var", type=EvaluatorType.downstream),
# EvaluatorConfig(label="mmlu_social_sciences_var", type=EvaluatorType.downstream),
# EvaluatorConfig(label="mmlu_other_var", type=EvaluatorType.downstream),
# EvaluatorConfig(label="mmlu_stem_mc_5shot", type=EvaluatorType.downstream),
# EvaluatorConfig(label="mmlu_humanities_mc_5shot", type=EvaluatorType.downstream),
# EvaluatorConfig(label="mmlu_social_sciences_mc_5shot", type=EvaluatorType.downstream),
# EvaluatorConfig(label="mmlu_other_mc_5shot", type=EvaluatorType.downstream),
# EvaluatorConfig(label="mmlu_stem_mc_5shot_test", type=EvaluatorType.downstream),
# EvaluatorConfig(label="mmlu_humanities_mc_5shot_test", type=EvaluatorType.downstream),
# EvaluatorConfig(label="mmlu_social_sciences_mc_5shot_test", type=EvaluatorType.downstream),
# EvaluatorConfig(label="mmlu_other_mc_5shot_test", type=EvaluatorType.downstream),
# EvaluatorConfig(label="basic_arithmetic", type=EvaluatorType.downstream),
# EvaluatorConfig(label="trivia_qa_wiki_ppl", type=EvaluatorType.downstream),
# EvaluatorConfig(label="natural_qs_open_ppl", type=EvaluatorType.downstream),
# EvaluatorConfig(label="arc_easy_ppl", type=EvaluatorType.downstream),
]
+ [
EvaluatorConfig(label=label, type=EvaluatorType.downstream)
Expand Down

0 comments on commit 2a851bf

Please sign in to comment.