Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
support layerwise
Browse files Browse the repository at this point in the history
Signed-off-by: changwangss <[email protected]>
  • Loading branch information
changwangss committed Jun 19, 2024
1 parent 9a5a578 commit 397d661
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from datasets import load_dataset
from neural_compressor import quantization
from neural_compressor.adaptor.torch_utils.model_wrapper import WeightOnlyLinear
from neural_compressor.utils.pytorch import load
from neural_compressor.utils.utility import LazyImport
from neural_compressor.config import PostTrainingQuantConfig
from intel_extension_for_transformers.tools.utils import (
Expand Down Expand Up @@ -583,6 +584,10 @@ def default_calib_func(model):
inc_model = quantization.fit(
model, conf, calib_func=calib_func, calib_dataloader=calib_dataloader
)
if config.layer_wise:
inc_model.save("./tmp")
inc_model = load("./tmp", model, weight_only=True, layer_wise=True)
return inc_model.eval()
inc_model.eval()

if device == "xpu" or device == torch.device("xpu"):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
from accelerate import init_empty_weights
from huggingface_hub import hf_hub_download
from neural_compressor.adaptor.torch_utils.model_wrapper import WeightOnlyLinear
from neural_compressor.adaptor.torch_utils.layer_wise_quant import load_empty_model
from neural_compressor.model.torch_model import PyTorchFXModel
from threading import Thread
from transformers.configuration_utils import PretrainedConfig
Expand Down Expand Up @@ -778,13 +779,16 @@ def forward(self, input: torch.Tensor) -> tuple[torch.Tensor, None]:
if quantization_config.quant_method.value in ["teq", "awq"]
else False
)
model = cls.ORIG_MODEL.from_pretrained(
pretrained_model_name_or_path,
*model_args,
config=config,
**kwargs,
)
model.config.update({"low_cpu_mem_usage": True})
if quantization_config.layer_wise:
model = load_empty_model(pretrained_model_name_or_path, torchscript=True)
else:
model = cls.ORIG_MODEL.from_pretrained(
pretrained_model_name_or_path,
*model_args,
config=config,
**kwargs,
)
model.config.update({"low_cpu_mem_usage": True})
model.eval()

if use_xpu:
Expand Down

0 comments on commit 397d661

Please sign in to comment.