Merge branch 'main' into penghuic/support_hf_format

intel · Jul 4, 2024 · e788e8d · e788e8d
2 parents 9e438b5 + aecd109
commit e788e8d
Show file tree

Hide file tree

Showing 33 changed files with 93 additions and 86 deletions.
diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml
@@ -65,54 +65,54 @@ subprojects:
       - "engine-unit-test-PR-test"
       - "Genreate-Engine-Report"
 
-  - id: "Windows Binary Test"
-    paths:
-      - ".github/workflows/windows-test.yml"
-      - "requirements.txt"
-      - "setup.py"
-      - "intel_extension_for_transformers/transformers/runtime/**"
-      - "intel_extension_for_transformers/transformers/llm/operator/**"
-      - "!intel_extension_for_transformers/transformers/runtime/third_party/**"
-      - "!intel_extension_for_transformers/transformers/runtime/docs/**"
-      - "!intel_extension_for_transformers/transformers/runtime/test/**"
-    checks:
-      - "Windows-Binary-Test"
+  # - id: "Windows Binary Test"
+  #   paths:
+  #     - ".github/workflows/windows-test.yml"
+  #     - "requirements.txt"
+  #     - "setup.py"
+  #     - "intel_extension_for_transformers/transformers/runtime/**"
+  #     - "intel_extension_for_transformers/transformers/llm/operator/**"
+  #     - "!intel_extension_for_transformers/transformers/runtime/third_party/**"
+  #     - "!intel_extension_for_transformers/transformers/runtime/docs/**"
+  #     - "!intel_extension_for_transformers/transformers/runtime/test/**"
+  #   checks:
+  #     - "Windows-Binary-Test"
 
-  - id: "LLM Model Test workflow"
-    paths:
-      - ".github/workflows/llm-test.yml"
-      - ".github/workflows/script/models/run_llm.sh"
-      - "intel_extension_for_transformers/transformers/runtime/**"
-      - "!intel_extension_for_transformers/transformers/runtime/kernels/**"
-      - "!intel_extension_for_transformers/transformers/runtime/test/**"
-      - "!intel_extension_for_transformers/transformers/runtime/third_party/**"
-      - "!intel_extension_for_transformers/transformers/runtime/docs/**"
-    checks:
-      - "LLM-Workflow (gpt-j-6b, engine, latency, bf16,int8,fp8)"
-      - "Generate-LLM-Report"
+  # - id: "LLM Model Test workflow"
+  #   paths:
+  #     - ".github/workflows/llm-test.yml"
+  #     - ".github/workflows/script/models/run_llm.sh"
+  #     - "intel_extension_for_transformers/transformers/runtime/**"
+  #     - "!intel_extension_for_transformers/transformers/runtime/kernels/**"
+  #     - "!intel_extension_for_transformers/transformers/runtime/test/**"
+  #     - "!intel_extension_for_transformers/transformers/runtime/third_party/**"
+  #     - "!intel_extension_for_transformers/transformers/runtime/docs/**"
+  #   checks:
+  #     - "LLM-Workflow (gpt-j-6b, engine, latency, bf16,int8,fp8)"
+  #     - "Generate-LLM-Report"
 
-  - id: "Chat Bot Test workflow"
-    paths:
-      - ".github/workflows/chatbot-test.yml"
-      - ".github/workflows/chatbot-inference-llama-2-7b-chat-hf.yml"
-      - ".github/workflows/chatbot-inference-mpt-7b-chat.yml"
-      - ".github/workflows/chatbot-finetune-mpt-7b-chat.yml"
-      - ".github/workflows/chatbot-inference-llama-2-7b-chat-hf-hpu.yml"
-      - ".github/workflows/chatbot-inference-mpt-7b-chat-hpu.yml"
-      - ".github/workflows/chatbot-finetune-mpt-7b-chat-hpu.yml"
-      - ".github/workflows/script/chatbot/**"
-      - ".github/workflows/sample_data/**"
-      - "intel_extension_for_transformers/neural_chat/**"
-      - "intel_extension_for_transformers/transformers/llm/finetuning/**"
-      - "intel_extension_for_transformers/transformers/llm/quantization/**"
-      - "intel_extension_for_transformers/transformers/**"
-      - "workflows/chatbot/inference/**"
-      - "workflows/chatbot/fine_tuning/**"
-      - "!intel_extension_for_transformers/neural_chat/docs/**"
-      - "!intel_extension_for_transformers/neural_chat/tests/ci/**"
-      - "!intel_extension_for_transformers/neural_chat/examples/**"
-      - "!intel_extension_for_transformers/neural_chat/assets/**"
-      - "!intel_extension_for_transformers/neural_chat/README.md"
-    checks:
-      - "call-inference-llama-2-7b-chat-hf / inference test"
-      - "call-inference-mpt-7b-chat / inference test"
+  # - id: "Chat Bot Test workflow"
+  #   paths:
+  #     - ".github/workflows/chatbot-test.yml"
+  #     - ".github/workflows/chatbot-inference-llama-2-7b-chat-hf.yml"
+  #     - ".github/workflows/chatbot-inference-mpt-7b-chat.yml"
+  #     - ".github/workflows/chatbot-finetune-mpt-7b-chat.yml"
+  #     - ".github/workflows/chatbot-inference-llama-2-7b-chat-hf-hpu.yml"
+  #     - ".github/workflows/chatbot-inference-mpt-7b-chat-hpu.yml"
+  #     - ".github/workflows/chatbot-finetune-mpt-7b-chat-hpu.yml"
+  #     - ".github/workflows/script/chatbot/**"
+  #     - ".github/workflows/sample_data/**"
+  #     - "intel_extension_for_transformers/neural_chat/**"
+  #     - "intel_extension_for_transformers/transformers/llm/finetuning/**"
+  #     - "intel_extension_for_transformers/transformers/llm/quantization/**"
+  #     - "intel_extension_for_transformers/transformers/**"
+  #     - "workflows/chatbot/inference/**"
+  #     - "workflows/chatbot/fine_tuning/**"
+  #     - "!intel_extension_for_transformers/neural_chat/docs/**"
+  #     - "!intel_extension_for_transformers/neural_chat/tests/ci/**"
+  #     - "!intel_extension_for_transformers/neural_chat/examples/**"
+  #     - "!intel_extension_for_transformers/neural_chat/assets/**"
+  #     - "!intel_extension_for_transformers/neural_chat/README.md"
+  #   checks:
+  #     - "call-inference-llama-2-7b-chat-hf / inference test"
+  #     - "call-inference-mpt-7b-chat / inference test"
diff --git a/.github/workflows/script/formatScan/nlp_dict.txt b/.github/workflows/script/formatScan/nlp_dict.txt
@@ -1,5 +1,6 @@
 alse
 ans
+assertIn
 bu
 charactor
 daa

diff --git a/.github/workflows/script/formatScan/pylint.sh b/.github/workflows/script/formatScan/pylint.sh
@@ -28,7 +28,7 @@ else
     echo "Not found requirements.txt file."
 fi
 # install packages
-pip install lm-eval
+pip install lm-eval==0.4.2
 pip install accelerate nlpaug nltk schema optimum-intel optimum peft
 pip install --upgrade --force-reinstall transformers==4.36.2
 pip install optimum-habana

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@ ci:
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v4.6.0
     hooks:
       - id: debug-statements
       - id: mixed-line-ending
@@ -44,7 +44,7 @@ repos:
           )$
 
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.6
+    rev: v2.3.0
     hooks:
       - id: codespell
         args: [-w, --ignore-words=.github/workflows/script/formatScan/nlp_dict.txt]

diff --git a/docs/code_of_conduct.md b/docs/code_of_conduct.md
@@ -14,7 +14,7 @@ In the interest of fostering an open and welcoming environment, we as
 contributors and maintainers pledge to making participation in our project and
 our community a harassment-free experience for everyone, regardless of age, body
 size, disability, ethnicity, sex characteristics, gender identity and expression,
-level of experience, education, socio-economic status, nationality, personal
+level of experience, education, socioeconomic status, nationality, personal
 appearance, race, religion, or sexual identity and orientation.
 
 ## Our Standards

diff --git a/examples/huggingface/pytorch/language-modeling/inference/requirements.txt b/examples/huggingface/pytorch/language-modeling/inference/requirements.txt
@@ -1,4 +1,4 @@
 transformers 
 accelerate
 sentencepiece != 0.1.92
-lm-eval
+lm-eval==0.4.2
diff --git a/examples/huggingface/pytorch/language-modeling/pruning/requirements.txt b/examples/huggingface/pytorch/language-modeling/pruning/requirements.txt
@@ -7,5 +7,5 @@ transformers
 torch==2.0.1
 tqdm
 neural_compressor
-lm-eval
+lm-eval==0.4.2
 
diff --git a/examples/huggingface/pytorch/language-modeling/quantization/requirements.txt b/examples/huggingface/pytorch/language-modeling/quantization/requirements.txt
@@ -9,5 +9,5 @@ wandb
 einops
 neural-compressor
 pytest==8.0.0
-lm-eval
+lm-eval==0.4.2
 git+https://github.com/huggingface/peft.git@6c44096c7b8d55a2ecf24be9bc68393467e1584a
diff --git a/.../huggingface/pytorch/question-answering/deployment/squad/MLperf_example/csrc/bert_qsl.cpp b/.../huggingface/pytorch/question-answering/deployment/squad/MLperf_example/csrc/bert_qsl.cpp
@@ -74,7 +74,7 @@ namespace qsl {
       }
     }
 
-    // Splice them togather
+    // Splice them together
     Queue_t result;
     for (auto& q : Buckets)
       result.splice(result.end(), std::move(q));

diff --git a/examples/huggingface/pytorch/question-answering/deployment/squad/MLperf_example/utils_qa.py b/examples/huggingface/pytorch/question-answering/deployment/squad/MLperf_example/utils_qa.py
@@ -295,7 +295,7 @@ def postprocess_qa_predictions_with_beam_search(
 
     assert len(predictions[0]) == len(
         features
-    ), f"Got {len(predictions[0])} predicitions and {len(features)} features."
+    ), f"Got {len(predictions[0])} predictions and {len(features)} features."
 
     # Build a map example to its corresponding features.
     example_id_to_index = {k: i for i, k in enumerate(examples["id"])}

diff --git a/...text-to-image/deployment/stable_diffusion/ITREX_StableDiffusionInstructPix2PixPipeline.py b/...text-to-image/deployment/stable_diffusion/ITREX_StableDiffusionInstructPix2PixPipeline.py
@@ -14,7 +14,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Pipeline Modificaiton based from the diffusers 0.12.1 StableDiffusionInstructPix2PixPipeline"""
+"""Pipeline Modification based from the diffusers 0.12.1 StableDiffusionInstructPix2PixPipeline"""
 
 import inspect
 from typing import Callable, List, Optional, Union

diff --git a/examples/huggingface/pytorch/text-to-image/deployment/stable_diffusion/README.md b/examples/huggingface/pytorch/text-to-image/deployment/stable_diffusion/README.md
@@ -137,7 +137,7 @@ python run_executor.py --ir_path=./qat_int8_ir --mode=latency --input_model=runw
 ## 3. Accuracy
 Frechet Inception Distance(FID) metric is used to evaluate the accuracy. This case we check the FID scores between the pytorch image and engine image.
 
-By setting --accuracy to check FID socre.
+By setting --accuracy to check FID score.
 Python API command as follows:
 ```python
 # FP32 IR

diff --git a/.../huggingface/pytorch/text-to-image/deployment/stable_diffusion/diffusion_utils_img2img.py b/.../huggingface/pytorch/text-to-image/deployment/stable_diffusion/diffusion_utils_img2img.py
@@ -14,7 +14,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Pipeline Modificaiton based from the diffusers 0.12.1 StableDiffusionImg2ImgPipeline"""
+"""Pipeline Modification based from the diffusers 0.12.1 StableDiffusionImg2ImgPipeline"""
 
 import inspect
 from typing import Callable, List, Optional, Union

diff --git a/examples/huggingface/pytorch/text2text-generation/requirements.txt b/examples/huggingface/pytorch/text2text-generation/requirements.txt
@@ -11,4 +11,4 @@ neural-compressor
 optimum-intel > 1.12.0
 onnxruntime
 intel-extension-for-pytorch
-lm-eval
+lm-eval==0.4.2
diff --git a/examples/modelscope/requirements.txt b/examples/modelscope/requirements.txt
@@ -1,6 +1,6 @@
 intel_extension_for_transformers
 neural-speed
-lm-eval
+lm-eval==0.4.2
 sentencepiece
 gguf
 --extra-index-url https://download.pytorch.org/whl/cpu

diff --git a/intel_extension_for_transformers/neural_chat/docs/notebooks/build_chatbot_on_xpu.ipynb b/intel_extension_for_transformers/neural_chat/docs/notebooks/build_chatbot_on_xpu.ipynb
@@ -77,7 +77,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Install requirements that have denpendency on stock pytorch"
+    "Install requirements that have dependency on stock pytorch"
    ]
   },
   {

diff --git a/intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_xpu.ipynb b/intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_xpu.ipynb
@@ -69,7 +69,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Install requirements that have denpendency on stock pytorch"
+    "Install requirements that have dependency on stock pytorch"
    ]
   },
   {

diff --git a/...ion_for_transformers/neural_chat/examples/deployment/photo_ai/backend/README.md b/...ion_for_transformers/neural_chat/examples/deployment/photo_ai/backend/README.md
@@ -75,7 +75,7 @@ mkdir /home/nfs_images
 export IMAGE_SERVER_IP="your.server.ip"
 ```
 
-# Configurate photoai.yaml
+# Configure photoai.yaml
 
 You can customize the configuration file `photoai.yaml` to match your environment setup. Here's a table to help you understand the configurable options:
 
@@ -91,9 +91,9 @@ You can customize the configuration file `photoai.yaml` to match your environmen
 | tasks_list          | ['voicechat', 'photoai']               |
 
 
-# Configurate Environment Variables
+# Configure Environment Variables
 
-Configurate all of the environment variables in file `run.sh` using `export XXX=xxx`. Here's a table of all the variables needed to configurate.
+Configure all of the environment variables in file `run.sh` using `export XXX=xxx`. Here's a table of all the variables needed to configure.
 
 |  Variable           | Value                                  |
 | ------------------- | ---------------------------------------|

diff --git a/...transformers/neural_chat/examples/finetuning/multi_modal/eval/mmmu_eval/main_eval_only.py b/...transformers/neural_chat/examples/finetuning/multi_modal/eval/mmmu_eval/main_eval_only.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Parse and Evalate."""
+"""Parse and Evaluate."""
 import os
 import json
 

diff --git a/...formers/neural_chat/examples/finetuning/multi_modal/eval/mmmu_eval/main_parse_and_eval.py b/...formers/neural_chat/examples/finetuning/multi_modal/eval/mmmu_eval/main_parse_and_eval.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Parse and Evalate."""
+"""Parse and Evaluate."""
 import os
 import json
 import shlex

diff --git a/...ion_for_transformers/neural_chat/pipeline/plugins/image2image/instructpix2pix_pipeline.py b/...ion_for_transformers/neural_chat/pipeline/plugins/image2image/instructpix2pix_pipeline.py
@@ -14,7 +14,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Pipeline Modificaiton based from the diffusers 0.12.1 StableDiffusionInstructPix2PixPipeline."""
+"""Pipeline Modification based from the diffusers 0.12.1 StableDiffusionInstructPix2PixPipeline."""
 
 import inspect
 from typing import Callable, List, Optional, Union

diff --git a/intel_extension_for_transformers/neural_chat/requirements_cpu.txt b/intel_extension_for_transformers/neural_chat/requirements_cpu.txt
@@ -7,7 +7,7 @@ fastapi
 fschat==0.2.32
 huggingface_hub
 intel_extension_for_pytorch==2.3.0
-lm-eval
+lm-eval==0.4.2
 neural-compressor
 neural_speed==1.0a0
 numpy==1.23.5

diff --git a/intel_extension_for_transformers/neural_chat/requirements_hpu.txt b/intel_extension_for_transformers/neural_chat/requirements_hpu.txt
@@ -4,7 +4,7 @@ evaluate
 fastapi
 fschat==0.2.35
 huggingface_hub
-lm-eval
+lm-eval==0.4.2
 neural-compressor
 numpy==1.23.5
 optimum

diff --git a/intel_extension_for_transformers/neural_chat/requirements_win.txt b/intel_extension_for_transformers/neural_chat/requirements_win.txt
@@ -6,7 +6,7 @@ fastapi
 fschat==0.2.35
 huggingface_hub
 intel-extension-for-transformers
-lm-eval
+lm-eval==0.4.2
 neural-compressor
 numpy==1.23.5
 optimum

diff --git a/intel_extension_for_transformers/neural_chat/tests/requirements.txt b/intel_extension_for_transformers/neural_chat/tests/requirements.txt
@@ -38,7 +38,7 @@ langchain-community==0.0.27
 langchain_core==0.1.35
 langid
 librosa
-lm-eval
+lm-eval==0.4.2
 markdown
 neural-compressor
 neural_speed==1.0a0

diff --git a/intel_extension_for_transformers/transformers/llm/quantization/utils.py b/intel_extension_for_transformers/transformers/llm/quantization/utils.py
@@ -545,7 +545,7 @@ def default_calib_func(model):
                 "autoround_args": {
                     "n_samples": config.nsamples,
                     "seqlen": config.calib_len,
-                    "iters": config.iters,
+                    "iters": config.calib_iters,
                     "scale_dtype": config.scale_dtype,
                     "enable_quanted_input": not config.disable_quanted_input,
                     "lr": config.lr,

diff --git a/intel_extension_for_transformers/transformers/runtime/compile/graph_utils.py b/intel_extension_for_transformers/transformers/runtime/compile/graph_utils.py
@@ -140,7 +140,7 @@ def get_environ_info():
 
 
 def search_straight_pattern(input_pattern, graph):
-    """Search user specified patterns on internal grpah structure.
+    """Search user specified patterns on internal graph structure.
 
     Attention: the input computation chain in the graph which can be called pattern, there must be
                 straight (or sequence). It means it has not any subgraph nodes. Otherwise this

diff --git a/intel_extension_for_transformers/transformers/runtime/compile/onnx_utils.py b/intel_extension_for_transformers/transformers/runtime/compile/onnx_utils.py
@@ -73,7 +73,7 @@ def get_initializer_children_names(model, initializer):
 def graph_node_names_details(model):
     """Parse the graph nodes ans get the graph_nodes_dict.
 
-    Be used for Grpah class with creating a new graph.
+    Be used for Graph class with creating a new graph.
     The node_name is the key, node in value is for getting the Const
     tensor value and the input_tensor source op; output_names in value
     is the node output name list; outputs in value is for output_tensor dest op
@@ -155,7 +155,7 @@ def bias_to_int32(bias_node, a_scale, b_scale):
         bias_node: bias_add in graph (from onnx framework)
         a_scale: matmul node input matrice a scale tensor
         b_scale: matmul node input matrice b scale tensor
-        model: Grpah class
+        model: Graph class
 
     Returns:
         int32 bias numpy array

diff --git a/intel_extension_for_transformers/transformers/runtime/compile/tf_utils.py b/intel_extension_for_transformers/transformers/runtime/compile/tf_utils.py
@@ -48,7 +48,7 @@ def create_tf_node(op, name, inputs):
 def graph_node_names_details(nodes):
     """Parse the graph nodes ans get the graph_nodes_dict.
 
-    Be used for Grpah class when converting a tensorflow computation graph to an engine graph.
+    Be used for Graph class when converting a tensorflow computation graph to an engine graph.
     The node_name is the key, node in value is for getting the Const
     tensor value and the input_tensor source op; outputs in value is for
     output_tensor dest op.

diff --git a/intel_extension_for_transformers/transformers/runtime/executor/src/operators/cossin.cpp b/intel_extension_for_transformers/transformers/runtime/executor/src/operators/cossin.cpp
@@ -68,7 +68,7 @@ PI32_CONST256(inv_mant_mask, ~0x7f800000);
 
 PI32_CONST256(0x7f, 0x7f);
 
-// evaluation of 8 sines at once using AVX intrisics
+// evaluation of 8 sines at once using AVX intrinsics
 __m256 sinf(__m256 x) {
   __m256 sign_bit = x;
   // take the absolute value