From 3cddae3478b707d8e95f4c6248d5452b09599b79 Mon Sep 17 00:00:00 2001 From: kcz358 Date: Wed, 16 Oct 2024 11:39:22 +0800 Subject: [PATCH] Add multi-modal to args --- docs/LLaVA_OneVision_Tutorials.ipynb | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/docs/LLaVA_OneVision_Tutorials.ipynb b/docs/LLaVA_OneVision_Tutorials.ipynb index 4c77121d2..366868911 100644 --- a/docs/LLaVA_OneVision_Tutorials.ipynb +++ b/docs/LLaVA_OneVision_Tutorials.ipynb @@ -46,7 +46,11 @@ "model_name = \"llava_qwen\"\n", "device = \"cuda\"\n", "device_map = \"auto\"\n", - "tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, device_map=device_map) # Add any other thing you want to pass in llava_model_args\n", + "llava_model_args = {\n", + " \"multimodal\": True,\n", + " \"attn_implementation\": \"sdpa\",\n", + "}\n", + "tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, device_map=device_map, **llava_model_args) # Add any other thing you want to pass in llava_model_args\n", "\n", "model.eval()\n", "\n", @@ -308,7 +312,10 @@ "model_name = \"llava_qwen\"\n", "device = \"cuda\"\n", "device_map = \"auto\"\n", - "tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, device_map=device_map, attn_implementation=\"sdpa\")\n", + "llava_model_args = {\n", + " \"multimodal\": True,\n", + "}\n", + "tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, device_map=device_map, attn_implementation=\"sdpa\", **llava_model_args)\n", "\n", "model.eval()\n", "\n",