-
Notifications
You must be signed in to change notification settings - Fork 36
/
Copy pathmake_warped_noise.py
93 lines (72 loc) · 4.59 KB
/
make_warped_noise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#Ryan Burgert 2024
#Setup:
# Run this in a Jupyter Notebook on a computer with at least one GPU
# `sudo apt install ffmpeg git`
# `pip install rp`
# The first time you run this it might be a bit slow (it will download necessary models)
# The `rp` package will take care of installing the rest of the python packages for you
import rp
rp.r._pip_import_autoyes=True #Automatically install missing packages
rp.pip_import('fire')
rp.git_import('CommonSource') #If missing, installs code from https://github.com/RyannDaGreat/CommonSource
import rp.git.CommonSource.noise_warp as nw
import fire
def main(video:str, output_folder:str):
"""
Takes a video URL or filepath and an output folder path
It then resizes that video to height=480, width=720, 49 frames (CogVidX's dimensions)
Then it calculates warped noise at latent resolution (i.e. 1/8 of the width and height) with 16 channels
It saves that warped noise, optical flows, and related preview videos and images to the output folder
The main file you need is <output_folder>/noises.npy which is the gaussian noises in (H,W,C) form
"""
if rp.folder_exists(output_folder):
raise RuntimeError(f"The given output_folder={repr(output_folder)} already exists! To avoid clobbering what might be in there, please specify a folder that doesn't exist so I can create one for you. Alternatively, you could delete that folder if you don't care whats in it.")
FRAME = 2**-1 #We immediately resize the input frames by this factor, before calculating optical flow
#The flow is calulated at (input size) × FRAME resolution.
#Higher FLOW values result in slower optical flow calculation and higher intermediate noise resolution
#Larger is not always better - watch the preview in Jupyter to see if it looks good!
FLOW = 2**3 #Then, we use bilinear interpolation to upscale the flow by this factor
#We warp the noise at (input size) × FRAME × FLOW resolution
#The noise is then downsampled back to (input size)
#Higher FLOW values result in more temporally consistent noise warping at the cost of higher VRAM usage and slower inference time
LATENT = 8 #We further downsample the outputs by this amount - because 8 pixels wide corresponds to one latent wide in Stable Diffusion
#The final output size is (input size) ÷ LATENT regardless of FRAME and FLOW
#LATENT = 1 #Uncomment this line for a prettier visualization! But for latent diffusion models, use LATENT=8
#You can also use video files or URLs
# video = "https://www.shutterstock.com/shutterstock/videos/1100085499/preview/stock-footage-bremen-germany-october-old-style-carousel-moving-on-square-in-city-horses-on-traditional.webm"
# output_folder = "NoiseWarpOutputFolder"
if isinstance(video,str):
video=rp.load_video(video)
#Preprocess the video
video=rp.resize_list(video,length=49) #Stretch or squash video to 49 frames (CogVideoX's length)
video=rp.resize_images_to_hold(video,height=480,width=720)
video=rp.crop_images(video,height=480,width=720,origin='center') #Make the resolution 480x720 (CogVideoX's resolution)
video=rp.as_numpy_array(video)
#See this function's docstring for more information!
output = nw.get_noise_from_video(
video,
remove_background=False, #Set this to True to matte the foreground - and force the background to have no flow
visualize=True, #Generates nice visualization videos and previews in Jupyter notebook
save_files=True, #Set this to False if you just want the noises without saving to a numpy file
noise_channels=16,
output_folder=output_folder,
resize_frames=FRAME,
resize_flow=FLOW,
downscale_factor=round(FRAME * FLOW) * LATENT,
)
output.first_frame_path = rp.save_image(video[0],rp.path_join(output_folder,'first_frame.png'))
rp.save_video_mp4(video, rp.path_join(output_folder, 'input.mp4'), framerate=12, video_bitrate='max')
#output.numpy_noises_downsampled = as_numpy_images(
#nw.resize_noise(
#as_torch_images(x),
#1 / 8,
#)for x
#)
#
#output.numpy_noises_downsampled_path = path_join(output_folder, 'noises_downsampled.npy')
#np.save(numpy_noises_downsampled_path, output.numpy_noises_downsampled)
print("Noise shape:" ,output.numpy_noises.shape)
print("Flow shape:" ,output.numpy_flows .shape)
print("Output folder:",output.output_folder)
if __name__ == "__main__":
fire.Fire(main)