wip: integration of ip-adapter

brycedrennan · Jan 20, 2024 · c164dba · c164dba
1 parent 1bf53e4
commit c164dba
Show file tree

Hide file tree

Showing 5 changed files with 66 additions and 0 deletions.
diff --git a/imaginairy/cli/imagine.py b/imaginairy/cli/imagine.py
@@ -83,6 +83,8 @@ def imagine_cmd(
     prompt_strength,
     init_image,
     init_image_strength,
+    image_prompt,
+    image_prompt_strength,
     outdir,
     output_file_extension,
     repeats,
@@ -191,6 +193,8 @@ def imagine_cmd(
         prompt_strength=prompt_strength,
         init_image=init_image,
         init_image_strength=init_image_strength,
+        image_prompt=image_prompt,
+        image_prompt_strength=image_prompt_strength,
         outdir=outdir,
         output_file_extension=output_file_extension,
         repeats=repeats,

diff --git a/imaginairy/cli/shared.py b/imaginairy/cli/shared.py
@@ -35,6 +35,8 @@ def _imagine_cmd(
     prompt_strength,
     init_image,
     init_image_strength,
+    image_prompt,
+    image_prompt_strength,
     outdir,
     output_file_extension,
     repeats,
@@ -186,6 +188,8 @@ def _imagine_cmd(
                     prompt_strength=prompt_strength,
                     init_image=_init_image,
                     init_image_strength=init_image_strength,
+                    image_prompt=image_prompt,
+                    image_prompt_strength=image_prompt_strength,
                     control_inputs=control_inputs,
                     seed=seed,
                     solver_type=solver,
@@ -312,6 +316,19 @@ def temp_f():
         type=float,
         help="Starting image strength. Between 0 and 1.",
     ),
+    click.option(
+        "--image-prompt",
+        metavar="PATH|URL",
+        help="Starting image.",
+        multiple=True,
+    ),
+    click.option(
+        "--image-prompt-strength",
+        default=None,
+        show_default=False,
+        type=float,
+        help="Starting image strength. Between 0 and 1.",
+    ),
     click.option(
         "--outdir",
         default="./outputs",

diff --git a/imaginairy/image_prompts.py b/imaginairy/image_prompts.py
diff --git a/imaginairy/schema.py b/imaginairy/schema.py
@@ -333,6 +333,8 @@ class ImaginePrompt(BaseModel, protected_namespaces=()):
             prompt_strength (float, optional): Strength of the influence of the prompt on the output.
             init_image (LazyLoadingImage, optional): Initial image to base the generation on.
             init_image_strength (float, optional): Strength of the influence of the initial image.
+            image_prompt (LazyLoadingImage, optional): Image to be used as part of the prompt using IP-Adapter.
+            image_prompt_strength (float, optional): Strength of the influence of the prompt_image.
             control_inputs (List[ControlInput], optional): Additional control inputs for image generation.
             mask_prompt (str, optional): Mask prompt for selective area generation.
             mask_image (LazyLoadingImage, optional): Image used for masking.
@@ -370,6 +372,8 @@ class ImaginePrompt(BaseModel, protected_namespaces=()):
     init_image_strength: float | None = Field(
         ge=0, le=1, default=None, validate_default=True
     )
+    image_prompt: LazyLoadingImage | None = Field(None, validate_default=True)
+    image_prompt_strength: float | None = Field(ge=0, le=1, default=0.0)
     control_inputs: List[ControlInput] = Field(
         default_factory=list, validate_default=True
     )
@@ -411,6 +415,8 @@ def __init__(
         prompt_strength: float | None = 7.5,
         init_image: LazyLoadingImage | None = None,
         init_image_strength: float | None = None,
+        image_prompt: LazyLoadingImage | None = None,
+        image_prompt_strength: float | None = None,
         control_inputs: List[ControlInput] | None = None,
         mask_prompt: str | None = None,
         mask_image: LazyLoadingImage | None = None,
@@ -440,6 +446,8 @@ def __init__(
             prompt_strength=prompt_strength,
             init_image=init_image,
             init_image_strength=init_image_strength,
+            image_prompt=image_prompt,
+            image_prompt_strength=image_prompt_strength,
             control_inputs=control_inputs,
             mask_prompt=mask_prompt,
             mask_image=mask_image,

diff --git a/imaginairy/utils/model_manager.py b/imaginairy/utils/model_manager.py
@@ -204,6 +204,42 @@ def get_diffusion_model_refiners(
     # ensures a "fresh" copy that doesn't have additional injected parts
     sd = sd.structural_copy()
 
+    # inject ip-adapter (img to img prompt)
+    from PIL import Image
+
+    from imaginairy.vendored.refiners.fluxion.utils import (
+        load_from_safetensors,
+        no_grad,
+    )
+    from imaginairy.vendored.refiners.foundationals.latent_diffusion import (
+        SDXLIPAdapter,
+    )
+
+    image_prompt = Image.open(
+        "/imaginAIry/docs/assets/000032_337692011_PLMS40_PS7.5_a_photo_of_a_dog.jpg"
+    )
+
+    ip_adapter = SDXLIPAdapter(
+        target=sd.unet,
+        weights=load_from_safetensors(
+            "/imaginAIry/imaginairy/utils/ip-adapter_sdxl_vit-h.safetensors"
+        ),
+    )
+    ip_adapter.clip_image_encoder.load_from_safetensors(
+        "/imaginAIry/imaginairy/utils/clip_image.safetensors"
+    )
+    ip_adapter.inject()
+
+    scale = 0.4
+    ip_adapter.set_scale(scale)
+    print(f"SCALE: {scale}")
+
+    with no_grad():
+        clip_image_embedding = ip_adapter.compute_clip_image_embedding(
+            ip_adapter.preprocess_image(image_prompt)
+        )
+        ip_adapter.set_clip_image_embedding(clip_image_embedding)
+
     sd.set_self_attention_guidance(enable=True)
 
     return sd
@@ -222,6 +258,7 @@ def _get_diffusion_model_refiners(
 
     Weights location may also be shortcut name, e.g. "SD-1.5"
     """
+
     global MOST_RECENTLY_LOADED_MODEL
     _get_diffusion_model_refiners.cache_clear()
     clear_gpu_cache()