From ef87c042f96935125e8f3a65c788e638d4b89ba6 Mon Sep 17 00:00:00 2001 From: "Wang, Chang" Date: Thu, 18 Jul 2024 18:24:43 +0800 Subject: [PATCH] fix int4_fullrange dtype loading Signed-off-by: Wang, Chang --- .../transformers/modeling/modeling_auto.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py index 63540e11a74..77c8008b063 100644 --- a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py +++ b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py @@ -1833,6 +1833,7 @@ def load_low_bit(cls, pretrained_model_name_or_path, *model_args, **kwargs): if quantization_config.weight_dtype not in [ "fp8_e5m2", "fp8_e4m3", + "int4_fullrange" ]: model = build_woq_model(model, quantization_config) else: @@ -1949,6 +1950,7 @@ def replace_ipex_cpu_woq_linear(model, current_name=[]): if quantization_config.weight_dtype not in [ "fp8_e5m2", "fp8_e4m3", + "int4_fullrange" ] and not quantization_config.use_ipex: model = replace_linear( model,