-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_dpo.sh
57 lines (56 loc) · 1.86 KB
/
run_dpo.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
python DPO_trainer.py \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 2 \
--gradient_accumulation_steps 8 \
--model_name_or_path 'meta-llama/Llama-2-7b-hf' \
--load_in_4bit \
--use_peft \
--learning_rate 1e-3 \
--report_to 'wandb' \
--run_name 'llama_dpo_gpt4_high_to_low_beta_0.3' \
--max_length 1024 \
--max_prompt_length 768 \
--num_train_epochs 5 \
--max_steps -1 \
--evaluation_strategy 'epoch' \
--eval_steps -1 \
--logging_strategy 'steps' \
--log_steps 20 \
--logging_first_step \
--save_strategy 'epoch' \
--save_total_limit 3 \
--load_best_model_at_end \
--metric_for_best_model 'eval_metrics_policy_UMLS_cuis_f' \
--alignment_function 'dpo' \
--output_dir './results/DPO_model/gpt4_edits_high_to_low/DPO-LLaMA(1|1|0.3)' \
--alpha1 1.0 \
--alpha2 1.0 \
--beta 0.3 \
--synthetic_data_type 'gpt4_edits_high_to_low' \
# python DPO_trainer.py \
# --per_device_train_batch_size 1 \
# --per_device_eval_batch_size 2 \
# --gradient_accumulation_steps 8 \
# --model_name_or_path 'gpt2' \
# --learning_rate 1e-4 \
# --report_to 'wandb' \
# --run_name 'gpt2_dpo_gpt4_high_to_low_beta_0.3' \
# --max_length 1024 \
# --max_prompt_length 768 \
# --num_train_epochs 5 \
# --max_steps -1 \
# --evaluation_strategy 'epoch' \
# --eval_steps -1 \
# --logging_strategy 'steps' \
# --log_steps 20 \
# --logging_first_step \
# --save_strategy 'epoch' \
# --save_total_limit 3 \
# --load_best_model_at_end \
# --metric_for_best_model 'eval_metrics_policy_UMLS_cuis_f' \
# --alignment_function 'dpo' \
# --output_dir './results/DPO_model/gpt4_edits_high_to_low/DPO-gpt2(1|1|0.3)' \
# --alpha1 1.0 \
# --alpha2 1.0 \
# --beta 0.3 \
# --synthetic_data_type 'gpt4_edits_high_to_low' \