Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
support extension
Browse files Browse the repository at this point in the history
Signed-off-by: changwangss <[email protected]>
  • Loading branch information
changwangss committed Jun 11, 2024
1 parent 658e129 commit fe06a84
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ python run_generation_sq.py \
--tasks lambada_openai \
--sq \
--accuracy \
--batch_size 56 \
--eval_batch_size 56 \
--alpha 0.85
```

Expand Down Expand Up @@ -115,7 +115,7 @@ python run_generation_sq.py \
--tasks lambada_openai \
--sq \
--accuracy \
--batch_size 56 \
--eval_batch_size 56 \
--alpha 0.9
```

Expand Down Expand Up @@ -169,7 +169,7 @@ python run_generation_sq.py \
--tasks lambada_openai \
--sq \
--accuracy \
--batch_size 56 \
--eval_batch_size 56 \
--alpha 0.5
```

Expand Down Expand Up @@ -340,7 +340,7 @@ python run_generation_sq.py \
--tasks lambada_openai \
--sq \
--accuracy \
--batch_size 56 \
--eval_batch_size 56 \
--alpha 0.8
```

Expand Down Expand Up @@ -394,7 +394,7 @@ python run_generation_sq.py \
--tasks lambada_openai \
--sq \
--accuracy \
--batch_size 56 \
--eval_batch_size 56 \
--alpha 0.9
```

Expand Down Expand Up @@ -500,7 +500,7 @@ python run_generation_sq.py \
--tasks lambada_openai \
--sq \
--accuracy \
--batch_size 56 \
--eval_batch_size 56 \
--alpha 0.95
```

Expand Down Expand Up @@ -554,7 +554,7 @@ python run_generation_sq.py \
--tasks lambada_openai \
--sq \
--accuracy \
--batch_size 56 \
--eval_batch_size 56 \
--alpha 0.65
```

Expand Down Expand Up @@ -662,7 +662,7 @@ python run_generation_sq.py \
--tasks lambada_openai \
--sq \
--accuracy \
--batch_size 56 \
--eval_batch_size 56 \
--alpha 0.75
```

Expand Down Expand Up @@ -715,7 +715,7 @@ python run_generation_sq.py \
--tasks lambada_openai \
--sq \
--accuracy \
--batch_size 56 \
--eval_batch_size 56 \
--alpha 0.9
```

Expand Down Expand Up @@ -768,7 +768,7 @@ python run_generation_sq.py \
--tasks lambada_openai \
--sq \
--accuracy \
--batch_size 56 \
--eval_batch_size 56 \
--alpha 0.6
```

Expand Down Expand Up @@ -821,7 +821,7 @@ python run_generation_sq.py \
--tasks lambada_openai \
--sq \
--accuracy \
--batch_size 56 \
--eval_batch_size 56 \
--alpha 0.7
```

Expand Down Expand Up @@ -874,7 +874,7 @@ python run_generation_sq.py \
--tasks lambada_openai \
--sq \
--accuracy \
--batch_size 56 \
--eval_batch_size 56 \
--alpha 0.75
```

Expand Down Expand Up @@ -927,7 +927,7 @@ python run_generation_sq.py \
--tasks lambada_openai \
--sq \
--accuracy \
--batch_size 56 \
--eval_batch_size 56 \
--alpha 0.75
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ function run_benchmark {
extra_cmd=$extra_cmd" --tasks ${lm_eval_tasks}"
elif [[ ${mode} == "benchmark" ]]; then
mode_cmd=" --benchmark "
extra_cmd=$extra_cmd" --iters ${iters}"
extra_cmd=$extra_cmd" --benchmark_iters ${iters}"
else
echo "Error: No such mode: ${mode}"
exit 1
Expand Down Expand Up @@ -248,7 +248,7 @@ function run_benchmark {
elif [ "${script}" == "run_generation_cpu_woq.py" ];then
python -u ./${script} \
--model ${model_name_or_path} \
--batch_size ${batch_size} \
--eval_batch_size ${batch_size} \
${mode_cmd} \
${extra_cmd}
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,12 @@
)
# ============Benchmark configs==============
parser.add_argument("--benchmark", action="store_true")
parser.add_argument("--iters", default=100, type=int, help="num iter")
parser.add_argument("--benchmark_iters", default=100, type=int, help="num iter")
parser.add_argument("--benchmark_batch_size", default=1, type=int, help="batch size for benchmark")
parser.add_argument("--num_warmup", default=10, type=int, help="num warmup")
# ============Accuracy configs==============
parser.add_argument("--accuracy", action="store_true")
parser.add_argument("--batch_size", default=56, type=int, help="batch size num.")
parser.add_argument("--eval_batch_size", default=56, type=int, help="batch size num.")
parser.add_argument(
"--tasks",
default="lambada_openai",
Expand All @@ -65,6 +66,7 @@
parser.add_argument(
"--seq_len", default=512, type=int, help="Smooth quant calibration input length."
)
parser.add_argument("--batch_size", default=1, type=int, help="batch size num.")
# sq alpha "auto" parameters
parser.add_argument("--scale_sharing", action="store_true")
parser.add_argument(
Expand Down Expand Up @@ -138,6 +140,7 @@
tokenizer=tokenizer,
seq_len=args.seq_len,
n_samples=args.n_samples,
batch_size=args.batch_size,
excluded_precisions=excluded_precisions,
alpha=args.alpha if args.alpha == "auto" else float(args.alpha),
scale_sharing=args.scale_sharing,
Expand Down Expand Up @@ -205,7 +208,7 @@

# start
total_time = 0.0
num_iter = args.iters
num_iter = args.benchmark_iters
num_warmup = args.num_warmup
total_token_num = 0
eos_token_id = tokenizer.eos_token_id
Expand All @@ -215,7 +218,7 @@
# for chatglm2 only
if hasattr(tokenizer, "build_chat_input"):
input_ids = tokenizer.build_chat_input(prompt)["input_ids"]
input_ids = input_ids.repeat(args.batch_size, 1)
input_ids = input_ids.repeat(args.benchmark_batch_size, 1)
eos_token_id = [
tokenizer.eos_token_id,
tokenizer.get_command("<|user|>"),
Expand All @@ -225,11 +228,11 @@
elif hasattr(tokenizer, "build_prompt"):
build_prompt = tokenizer.build_prompt(prompt)
input_ids = tokenizer(
[build_prompt] * args.batch_size, return_tensors="pt"
[build_prompt] * args.benchmark_batch_size, return_tensors="pt"
).input_ids
else:
input_ids = tokenizer(
[prompt] * args.batch_size, return_tensors="pt"
[prompt] * args.benchmark_batch_size, return_tensors="pt"
).input_ids
gen_ids = user_model.generate(
input_ids,
Expand Down Expand Up @@ -270,7 +273,7 @@
user_model=user_model,
tasks=args.tasks,
device="cpu",
batch_size=args.batch_size,
batch_size=args.eval_batch_size,
)
results = evaluate(args)
for task_name in args.tasks.split(","):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,7 @@ def __init__(
self.ipex_opt_llm = ipex_opt_llm
self.num_beams = num_beams
self.excluded_precisions = excluded_precisions
self.batch_size = kwargs.pop("batch_size", 1)


class RtnConfig(ITREXQuantizationConfigMixin):
Expand Down

0 comments on commit fe06a84

Please sign in to comment.