Bump version to 3.0.0

wandb · Jul 1, 2024 · 09d66ec · 09d66ec
1 parent 4b75ac8
commit 09d66ec
Showing 1 changed file with 106 additions and 0 deletions.
diff --git a/configs/config-gpt-4-1106-preview.yaml b/configs/config-gpt-4-1106-preview.yaml
@@ -0,0 +1,106 @@
+wandb:
+  log: True
+  entity: "wandb-japan"
+  project: "llm-leaderboard-test"
+  run_name: "gpt-4-1106-preview" # use run_name defined above
+
+github_version: v3.0.0 #for recording
+
+testmode: true
+
+# if you don't use api, please set "api" as "false"
+# if you use api, please select from "openai", "anthropic", "google", "cohere", "vllm"
+api: openai
+
+model:
+  use_wandb_artifacts: false
+  artifacts_path: ""
+  pretrained_model_name_or_path: "gpt-4-1106-preview" #if you use openai api, put the name of model
+  #chat_template: "Swallow-MS-7b-v0.1"
+  dtype: 'float16'
+  max_model_len: 4096
+  trust_remote_code: true
+  device_map: "auto"
+  load_in_8bit: false
+  load_in_4bit: false
+
+generator:
+  top_p: 1.0
+  # top_k: 0
+  temperature: 0.1
+  # repetition_penalty: 1.0
+  max_tokens: 128
+
+mtbench:
+  temperature_override:
+    writing: 0.7
+    roleplay: 0.7
+    extraction: 0.0
+    math: 0.0
+    coding: 0.0
+    reasoning: 0.0
+    stem: 0.1
+    humanities": 0.1
+  question_artifacts_path: 'wandb-japan/llm-leaderboard/mtbench_ja_question:v0' # if testmode is true, small dataset will be used
+  question_artifacts_path_test: 'wandb-japan/llm-leaderboard/mtbench_ja_question_small_for_test:v0'
+  referenceanswer_artifacts_path: 'wandb-japan/llm-leaderboard/mtbench_ja_referenceanswer:v0' # if testmode is true, small dataset will be used
+  referenceanswer_artifacts_path_test: 'wandb-japan/llm-leaderboard/mtbench_ja_referenceanswer_small_for_test:v0'
+  judge_prompt_artifacts_path: 'wandb-japan/llm-leaderboard/mtbench_ja_prompt:v1' 
+  bench_name: 'japanese_mt_bench'
+  model_id: null # cannot use '<', '>', ':', '"', '/', '\\', '|', '?', '*', '.'
+  system_message: "以下は、タスクを説明する指示と、文脈のある入力の組み合わせです。要求を適切に満たす応答を書きなさい。"
+  question_begin: null 
+  question_end: null 
+  max_new_token: 1024
+  num_choices: 1
+  num_gpus_per_model: 1
+  num_gpus_total: 1
+  max_gpu_memory: null
+  dtype: bfloat16 # None or float32 or float16 or bfloat16
+  # for gen_judgment
+  judge_model: 'gpt-4'
+  judge_count: 1
+  mode: 'single'
+  baseline_model: null 
+  parallel: 1
+  first_n: null
+
+custom_prompt_template_ja: "以下に、あるタスクを説明する指示があり、それに付随する入力が更なる文脈を提供しています。リクエストを適切に完了するための回答を記述してください。\n\n ### 指示:\n{instruction}\n\n### 入力:\n{input}\n\n### 応答:"
+custom_prompt_template_en": The following text provides instructions for a certain task, along with accompanying input that offers further context. Please describe the appropriate response to complete the request.\n\n ### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:"
+
+num_few_shots: 4
+
+jaster:
+  artifacts_path: "wandb-japan/llm-leaderboard/jaster:v11"
+  dataset_dir: "jaster/1.2.6/evaluation"
+  language: ja
+
+jmmlu:
+  artifacts_path: "t-ibi/llm-leaderboard/jmmlu:v5"
+  dataset_dir: "jmmlu/1.3.0/evaluation"
+  language: ja
+
+mmlu:
+  artifacts_path: "wandb-japan/llm-leaderboard/jaster:v11"
+  dataset_dir: "jaster/1.2.6/evaluation"
+  language: en
+
+lctg-bench:
+  aaa: aaa
+
+bbq:
+  aaa: aaa
+
+sample_dataset:
+  artifacts_path: 't-ibi/llm-leaderboard/sample_dataset:v1'
+  dataset_dir: 'sample_dataset'
+  language: ja
+
+#jbbq:
+
+toxicity:
+  artifact_path: 'wandb-japan/toxicity-dataset-private/toxicity_dataset_subset:v1'
+  judge_prompts_path: 'wandb-japan/toxicity-dataset-private/toxicity_judge_prompts:v1'
+  max_workers: 5
+  judge_model: 'gpt-4o-2024-05-13'
+  visualize_ids: '[0, 1]'