diff --git a/docs/LLaVA_OneVision_Tutorials.ipynb b/docs/LLaVA_OneVision_Tutorials.ipynb index 4c77121d2..366868911 100644 --- a/docs/LLaVA_OneVision_Tutorials.ipynb +++ b/docs/LLaVA_OneVision_Tutorials.ipynb @@ -46,7 +46,11 @@ "model_name = \"llava_qwen\"\n", "device = \"cuda\"\n", "device_map = \"auto\"\n", - "tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, device_map=device_map) # Add any other thing you want to pass in llava_model_args\n", + "llava_model_args = {\n", + " \"multimodal\": True,\n", + " \"attn_implementation\": \"sdpa\",\n", + "}\n", + "tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, device_map=device_map, **llava_model_args) # Add any other thing you want to pass in llava_model_args\n", "\n", "model.eval()\n", "\n", @@ -308,7 +312,10 @@ "model_name = \"llava_qwen\"\n", "device = \"cuda\"\n", "device_map = \"auto\"\n", - "tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, device_map=device_map, attn_implementation=\"sdpa\")\n", + "llava_model_args = {\n", + " \"multimodal\": True,\n", + "}\n", + "tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, device_map=device_map, attn_implementation=\"sdpa\", **llava_model_args)\n", "\n", "model.eval()\n", "\n",