diff --git a/intel_extension_for_transformers/neural_chat/docs/notebooks/workshop/01_quickstart_neuralchat.ipynb b/intel_extension_for_transformers/neural_chat/docs/notebooks/workshop/01_quickstart_neuralchat.ipynb index 50064c4a45a..9142899159a 100644 --- a/intel_extension_for_transformers/neural_chat/docs/notebooks/workshop/01_quickstart_neuralchat.ipynb +++ b/intel_extension_for_transformers/neural_chat/docs/notebooks/workshop/01_quickstart_neuralchat.ipynb @@ -57,16 +57,6 @@ "Library imports" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "b63fa0ac", - "metadata": {}, - "outputs": [], - "source": [ - "from time import time" - ] - }, { "cell_type": "markdown", "id": "c3c6d1e2-61f1-4ee4-98c7-4f6202e7f2ea", @@ -93,16 +83,19 @@ "outputs": [], "source": [ "# Build chatbot\n", - "from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig\n", + "from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig, GenerationConfig\n", "config = PipelineConfig(model_name_or_path='Intel/neural-chat-7b-v3-1')\n", "chatbot = build_chatbot(config)\n", "\n", "# Perform inference/generate a response\n", - "start = time()\n", - "response = chatbot.predict(query=\"Tell me about Intel Xeon Scalable Processors.\")\n", - "end = time()\n", - "print(response)\n", - "print(\"%.5f seconds\" %(end-start))" + "\n", + "gen_config = GenerationConfig(return_stats=True, format_version=\"v2\")\n", + "results, _ = chatbot.predict_stream(\"Tell me about Intel Xeon Scalable Processors.\", config=gen_config)\n", + "stream_text = \"\"\n", + "for text in results:\n", + " stream_text += text\n", + "print(stream_text)\n", + "\n" ] }, { @@ -124,18 +117,20 @@ "outputs": [], "source": [ "# Build chatbot in BF16\n", - "from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig\n", + "from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig, GenerationConfig\n", "from intel_extension_for_transformers.transformers import MixedPrecisionConfig\n", + "mix_config = MixedPrecisionConfig(dtype=\"bfloat16\")\n", "config = PipelineConfig(model_name_or_path='Intel/neural-chat-7b-v3-1',\n", - " optimization_config=MixedPrecisionConfig(dtype='bfloat16'))\n", + " optimization_config=mix_config)\n", "chatbot = build_chatbot(config)\n", "\n", "# Perform inference/generate a response\n", - "start = time()\n", - "response = chatbot.predict(query=\"Tell me about Intel Xeon Scalable Processors.\")\n", - "end = time()\n", - "print(response)\n", - "print(\"%.5f seconds\" %(end-start))" + "gen_config = GenerationConfig(return_stats=True, format_version=\"v2\")\n", + "results, _ = chatbot.predict_stream(\"Tell me about Intel Xeon Scalable Processors.\", config=gen_config)\n", + "stream_text = \"\"\n", + "for text in results:\n", + " stream_text += text\n", + "print(stream_text)\n" ] }, { @@ -155,16 +150,20 @@ "source": [ "# Build chatbot with INT4 weight-only quantization, computations in AMX INT8\n", "from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig\n", - "from intel_extension_for_transformers.transformers import RtnConfig\n", + "from intel_extension_for_transformers.transformers import WeightOnlyQuantConfig\n", "from intel_extension_for_transformers.neural_chat.config import LoadingModelConfig\n", - "config = PipelineConfig(model_name_or_path=\"Intel/neural-chat-7b-v3-1\",\n", - " optimization_config=RtnConfig(bits=4, compute_dtype=\"int8\", weight_dtype=\"int4_fullrange\"), \n", + "config = PipelineConfig(model_name_or_path='Intel/neural-chat-7b-v3-1',\n", + " optimization_config=WeightOnlyQuantConfig(compute_dtype=\"int8\", weight_dtype=\"int4_fullrange\"), \n", " loading_config=LoadingModelConfig(use_neural_speed=False))\n", "chatbot = build_chatbot(config)\n", "\n", "# Perform inference/generate a response\n", - "response = chatbot.predict(query=\"Tell me about Intel Xeon Scalable Processors.\")\n", - "print(response)" + "gen_config = GenerationConfig(return_stats=True, format_version=\"v2\")\n", + "results, _ = chatbot.predict_stream(\"Tell me about Intel Xeon Scalable Processors.\", config=gen_config)\n", + "stream_text = \"\"\n", + "for text in results:\n", + " stream_text += text\n", + "print(stream_text)" ] }, { @@ -184,8 +183,7 @@ "outputs": [], "source": [ "# OPTIONAL: log in to HuggingFace to access Llama2\n", - "export HUGGINGFACE_TOKEN=None #@TODO: enter in HF token here\n", - "!huggingface-cli login --token $HUGGINGFACE_TOKEN --add-to-git-credential" + "#!huggingface-cli login --token <@TODO: enter in HF token here> --add-to-git-credential" ] }, { @@ -310,7 +308,9 @@ "cell_type": "code", "execution_count": null, "id": "103340b5-1b57-486c-9fba-3060de63ab42", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "# Build chatbot with AST and TTS plugin\n", @@ -338,6 +338,17 @@ "Open the audio files using your own audio player to hear the query and response. " ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "25157d11-15c5-4d56-a4f4-5294db04a332", + "metadata": {}, + "outputs": [], + "source": [ + "import IPython\n", + "IPython.display.Audio(\"response.wav\")" + ] + }, { "cell_type": "markdown", "id": "e93a0779-8c26-4c62-a514-64bf9c58896f", @@ -389,7 +400,7 @@ " TextGenerationFinetuningConfig,\n", ")\n", "from intel_extension_for_transformers.neural_chat.chatbot import finetune_model\n", - "model_args = ModelArguments(model_name_or_path=\"Intel/neural-chat-7b-v3-1\")\n", + "model_args = ModelArguments(model_name_or_path='Intel/neural-chat-7b-v3-1')\n", "data_args = DataArguments(train_file=\"alpaca_data.json\")\n", "training_args = TrainingArguments(\n", " output_dir='./finetuned_model_path',\n", @@ -434,7 +445,7 @@ "from intel_extension_for_transformers.neural_chat import PipelineConfig\n", "from intel_extension_for_transformers.neural_chat.config import LoadingModelConfig\n", "\n", - "config = PipelineConfig(model_name_or_path=\"Intel/neural-chat-7b-v3-1\",\n", + "config = PipelineConfig(model_name_or_path='Intel/neural-chat-7b-v3-1',\n", " loading_config=LoadingModelConfig(peft_path=\"./finetuned_model_path\"))\n", "chatbot = build_chatbot(config)\n", "response = chatbot.predict(query=\"Tell me about Intel Xeon Scalable Processors.\")\n", @@ -467,7 +478,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.10.13" } }, "nbformat": 4,