Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
update sq
Browse files Browse the repository at this point in the history
Signed-off-by: changwangss <[email protected]>
  • Loading branch information
changwangss committed Jul 4, 2024
1 parent ed478c1 commit 634ef3e
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 333 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,7 @@
quantization_config.remove_redundant_parameters()
config.quantization_config = quantization_config
config.save_pretrained(args.output_dir)
torch.jit.save(user_model, args.output_dir + "/pytorch_model.bin")
with open(args.output_dir + "/best_configure.json", "w") as f:
json.dump(user_model.tune_cfg, f, indent=4)
# validate loading
user_model.save(args.output_dir)
user_model = AutoModelForCausalLM.from_pretrained(
args.output_dir,
trust_remote_code=args.trust_remote_code,
Expand All @@ -188,7 +185,7 @@
)
user_model = recover_model_from_json(
args.model,
os.path.join(args.output_dir, "best_configure.json"),
os.path.join(args.output_dir, "qconfig.json"),
args.trust_remote_code,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,7 @@ def _reorder_cache(
This is required to match `past_key_values` with the correct beam_idx at every generation step.
"""
if self.config.model_type == "bloom":
return self._reorder_cache_bloom(past_key_values, beam_idx)

if self.config.model_type == "chatglm":
return tuple(
tuple(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,7 @@ def calib_func(model):
alpha_step=quantization_config.alpha_step,
shared_criterion=quantization_config.shared_criterion,
do_blockwise=quantization_config.do_blockwise,
excluded_precisions=quantization_config.excluded_precisions,
)
# fallback
if model_type in ["gptj", "gpt_neox", "mpt"]:
Expand Down
Loading

0 comments on commit 634ef3e

Please sign in to comment.