forked from mosaicml/llm-foundry
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhf_lora_eval.yml
50 lines (43 loc) · 1.38 KB
/
hf_lora_eval.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
max_seq_len: 2048
seed: 1
precision: amp_fp16
# If you are using one model, put it here:
model_name_or_path: EleutherAI/gpt-neo-125m
# If you are using a seperated lora weight, put it here:
# lora weights must be compatible with the specified model
lora_id_or_path: edbeeching/gpt-neo-125M-imdb-lora # Example lora weights for gpt-neo-125m
# otherwise, write a block for each model you want to test in the `models` section
models:
-
model_name: ${model_name_or_path}
model:
name: hf_causal_lm
pretrained_model_name_or_path: ${model_name_or_path}
init_device: mixed
pretrained: true
pretrained_lora_id_or_path: ${lora_id_or_path}
tokenizer:
name: ${model_name_or_path}
kwargs:
model_max_length: ${max_seq_len}
# # if you are evaluating more than one model, list them all as YAML blocks without variable interpolation
# -
# model_name: mosaicml/mpt-7b
# model:
# name: hf_causal_lm
# pretrained_model_name_or_path: mosaicml/mpt-7b
# init_device: cpu
# pretrained: true
# config_overrides:
# max_seq_len: ${max_seq_len}
# tokenizer:
# name: mosaicml/mpt-7b
# kwargs:
# model_max_length: ${max_seq_len}
device_eval_batch_size: 4
# FSDP config for model sharding
fsdp_config:
sharding_strategy: FULL_SHARD
mixed_precision: FULL
icl_tasks: 'eval/yamls/tasks_v0.2.yaml'
eval_gauntlet: 'eval/yamls/eval_gauntlet_v0.2.yaml'