diff --git a/mmagic_qrcode_generator/README.md b/mmagic_qrcode_generator/README.md new file mode 100644 index 00000000..891324e9 --- /dev/null +++ b/mmagic_qrcode_generator/README.md @@ -0,0 +1,35 @@ +# Introduction + +I make a QR Code Generator by Stable Diffusion and Controlnet. + +Must set `mmagic/models/archs/wrapper.py` line 90: + +`self.model = module_cls.from_pretrained(from_pretrained,use_safetensors=True, *args,**kwargs)` + +# Demo + +A simple demo is provided. + +```shell +python demo/qrcode_inference_demo.py \ + --config controlnet-brightness.py \ + --qrcode_img 'test.png' \ + --prompt 'dreamlikeart, an zebra' \ + --negative_prompt 'ugly, bad quality' \ + --resize 440 640 \ + --output_size 440 640 \ + --num_inference_steps 50 \ + --guidance_scale 7.5 \ + --unet_model 'dreamlike-art/dreamlike-diffusion-1.0' \ + --vae_model 'dreamlike-art/dreamlike-diffusion-1.0' \ + --controlnet_model 'ioclab/control_v1p_sd15_brightness' \ + --controlnet_conditioning_scale 0.7 \ + --num_generated_img 5 \ + --save_path 'output' +``` + +The generated images will be save in `output/[num]_sample.png`. + +If the generated QR code is not recognizable, try increasing `controlnet_conditioning_scale`. + +One result display (using the parameters of the demo above)`qrcode_example.png`. diff --git a/mmagic_qrcode_generator/controlnet-brightness.py b/mmagic_qrcode_generator/controlnet-brightness.py new file mode 100644 index 00000000..6480dc5f --- /dev/null +++ b/mmagic_qrcode_generator/controlnet-brightness.py @@ -0,0 +1,51 @@ +# config for model +stable_diffusion_v15_url = 'runwayml/stable-diffusion-v1-5' + +model = dict( + type = 'ControlStableDiffusion', + vae=dict( + type='AutoencoderKL', + from_pretrained=stable_diffusion_v15_url, + subfolder='vae'), + unet=dict( + type='UNet2DConditionModel', + subfolder='unet', + from_pretrained=stable_diffusion_v15_url), + text_encoder=dict( + type='ClipWrapper', + clip_type='huggingface', + pretrained_model_name_or_path=stable_diffusion_v15_url, + subfolder='text_encoder'), + tokenizer=stable_diffusion_v15_url, + controlnet=dict( + type='ControlNetModel', + attention_head_dim = 8, + block_out_channels = [320,640,1280,1280], + conditioning_embedding_out_channels=[16,32,96,256], + controlnet_conditioning_channel_order="rgb", + cross_attention_dim = 768, + down_block_types = ["CrossAttnDownBlock2D","CrossAttnDownBlock2D","CrossAttnDownBlock2D","DownBlock2D"], + downsample_padding = 1, + flip_sin_to_cos=True, + freq_shift= 0, + in_channels= 4, + layers_per_block= 2, + mid_block_scale_factor = 1, + norm_eps= 1e-05, + norm_num_groups= 32, + only_cross_attention= False, + resnet_time_scale_shift= "default", + sample_size= 32, + upcast_attention= False, + use_linear_projection= False + ), + scheduler=dict( + type='DDPMScheduler', + from_pretrained=stable_diffusion_v15_url, + subfolder='scheduler'), + test_scheduler=dict( + type='DDIMScheduler', + from_pretrained=stable_diffusion_v15_url, + subfolder='scheduler'), + data_preprocessor=dict(type='DataPreprocessor'), + init_cfg=dict(type='init_from_unet')) \ No newline at end of file diff --git a/mmagic_qrcode_generator/qrcode_exmple.png b/mmagic_qrcode_generator/qrcode_exmple.png new file mode 100644 index 00000000..12249e9a Binary files /dev/null and b/mmagic_qrcode_generator/qrcode_exmple.png differ diff --git a/mmagic_qrcode_generator/qrcode_inference_demo.py b/mmagic_qrcode_generator/qrcode_inference_demo.py new file mode 100644 index 00000000..bdcfa2d1 --- /dev/null +++ b/mmagic_qrcode_generator/qrcode_inference_demo.py @@ -0,0 +1,108 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +import cv2 +import numpy as np +import mmcv +from mmengine import Config +from PIL import Image +import os +from argparse import ArgumentParser + +from mmagic.registry import MODELS +from mmagic.utils import register_all_modules + + +def parse_args(): + parser = ArgumentParser() + + # input + parser.add_argument( + '--qrcode_img', type=str, default=None, help='Input QRcode image file.') + parser.add_argument( + '--prompt', type=str, default=None, help='Input prompt.') + parser.add_argument( + '--negative_prompt', type=str, default=None, help='Input negative prompt.') + parser.add_argument( + '--config', type=str, default=None, help='Input config.') + + # parameters + parser.add_argument( + '--resize', nargs='+', help='Resize the input QRcode image, must be a multiple of 8') + parser.add_argument( + '--output_size', nargs='+', help='Output image size, must be a multiple of 8') + parser.add_argument( + '--num_inference_steps', type=int, default=50, help='Number of inference steps.') + parser.add_argument( + '--guidance_scale', type=float, default=7.5, help='guidance scale.') + parser.add_argument( + '--controlnet_conditioning_scale', type=float, default=0.6, help='Controlnet conditioning scale.') + parser.add_argument( + '--num_generated_img', type=int, default=5, help='Number of generated images.') + parser.add_argument( + '--save_path', type=str, default=None, help='Generated image save path.') + + # models + parser.add_argument( + '--unet_model', type=str, default=None, help='Change unet mdoel.') + parser.add_argument( + '--vae_model', type=str, default=None, help='Change vae mdoel.') + parser.add_argument( + '--controlnet_model', type=str, default=None, help='Change controlnet mdoel.') + + args = parser.parse_args() + + return args + +def main(): + args = parse_args() + register_all_modules() + + cfg = Config.fromfile(args.config) + cfg.model.unet.from_pretrained = args.unet_model + cfg.model.vae.from_pretrained = args.vae_model + cfg.model.controlnet.from_pretrained = args.controlnet_model + + + cfg.model.init_cfg['type'] = 'convert_from_unet' + controlnet = MODELS.build(cfg.model).cuda() + + # call init_weights manually to convert weight + controlnet.init_weights() + + prompt = args.prompt + negative_prompt = args.negative_prompt + control_path = args.qrcode_img + control_img = mmcv.imread(control_path) + control_img = cv2.resize(control_img, (int(args.resize[0]),int(args.resize[1]))) + control_img = control_img[:,:,0:1] + control_img = np.concatenate([control_img]*3, axis=2) + control = Image.fromarray(control_img) + + num_inference_steps = args.num_inference_steps + guidance_scale = args.guidance_scale + num_images_per_prompt = 1 + controlnet_conditioning_scale = args.controlnet_conditioning_scale + height=int(args.resize[1]) + width=int(args.resize[0]) + + num = args.num_generated_img + save_path = args.save_path + + for i in range(num): + output_dict = controlnet.infer( + prompt = prompt, + control = control, + height = height, + width = width, + controlnet_conditioning_scale=controlnet_conditioning_scale, + num_inference_steps=num_inference_steps, + guidance_scale=guidance_scale, + num_images_per_prompt=num_images_per_prompt, + negative_prompt=negative_prompt, + ) + samples = output_dict['samples'] + savepath = os.path.join(save_path, str(i)+'_sample.png') + samples[0].save(savepath) + +if __name__ == '__main__': + main() \ No newline at end of file