Skip to content

Commit

Permalink
Feature/gradio demo (#190)
Browse files Browse the repository at this point in the history
* [gradio] added demo app

* polish
  • Loading branch information
FrankLeeeee authored Mar 22, 2024
1 parent 3875053 commit 0050800
Show file tree
Hide file tree
Showing 9 changed files with 281 additions and 56 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -175,3 +175,4 @@ pretrained_models/

# Secret files
hostfile
gradio_cached_examples/
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,15 @@ Our model's weight is partially initialized from [PixArt-α](https://github.com/

## Inference

To run inference with our provided weights, first download [T5](https://huggingface.co/DeepFloyd/t5-v1_1-xxl/tree/main) weights into `pretrained_models/t5_ckpts/t5-v1_1-xxl`. Then download the model weights from [huggingface](https://huggingface.co/hpcai-tech/Open-Sora/tree/main). Run the following commands to generate samples. To change sampling prompts, modify the txt file passed to `--prompt-path`. See [here](docs/structure.md#inference-config-demos) to customize the configuration.
We have provided a Gradio application in this repository, you can use the following the command to start an interactive web application to experience video generation with Open-Sora.

```bash
python scripts/demo.py
```

This will launch a Gradio application on your localhost.

Besides, we have also provided an offline inference script. To run inference with our provided weights, first download [T5](https://huggingface.co/DeepFloyd/t5-v1_1-xxl/tree/main) weights into `pretrained_models/t5_ckpts/t5-v1_1-xxl`. Then download the model weights from [huggingface](https://huggingface.co/hpcai-tech/Open-Sora/tree/main). Run the following commands to generate samples. To change sampling prompts, modify the txt file passed to `--prompt-path`. See [here](docs/structure.md#inference-config-demos) to customize the configuration.

```bash
# Sample 16x256x256 (5s/sample, 100 time steps, 22 GB memory)
Expand Down
2 changes: 1 addition & 1 deletion configs/opensora/inference/16x256x256.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
)
text_encoder = dict(
type="t5",
from_pretrained="./pretrained_models/t5_ckpts",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=120,
)
scheduler = dict(
Expand Down
2 changes: 1 addition & 1 deletion configs/opensora/inference/16x512x512.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
)
text_encoder = dict(
type="t5",
from_pretrained="./pretrained_models/t5_ckpts",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=120,
)
scheduler = dict(
Expand Down
2 changes: 1 addition & 1 deletion configs/opensora/inference/64x512x512.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
)
text_encoder = dict(
type="t5",
from_pretrained="./pretrained_models/t5_ckpts",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=120,
)
scheduler = dict(
Expand Down
2 changes: 1 addition & 1 deletion configs/opensora/train/16x256x256.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
)
text_encoder = dict(
type="t5",
from_pretrained="./pretrained_models/t5_ckpts",
from_pretrained="DeepFloyd/t5-v1_1-xxl",
model_max_length=120,
shardformer=True,
)
Expand Down
1 change: 1 addition & 0 deletions opensora/datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def save_sample(x, fps=8, save_path=None, normalize=True, value_range=(-1, 1)):
x = x.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 3, 0).to("cpu", torch.uint8)
write_video(save_path, x, fps=fps, video_codec="h264")
print(f"Saved to {save_path}")
return save_path


class StatefulDistributedSampler(DistributedSampler):
Expand Down
63 changes: 12 additions & 51 deletions opensora/models/text_encoder/t5.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,16 @@


class T5Embedder:
available_models = ["t5-v1_1-xxl"]
available_models = ["DeepFloyd/t5-v1_1-xxl"]
bad_punct_regex = re.compile(
r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
) # noqa

def __init__(
self,
device,
dir_or_name="t5-v1_1-xxl",
from_pretrained=None,
*,
local_cache=False,
cache_dir=None,
hf_token=None,
use_text_preprocessing=True,
Expand All @@ -58,8 +57,11 @@ def __init__(
):
self.device = torch.device(device)
self.torch_dtype = torch_dtype or torch.bfloat16
self.cache_dir = cache_dir

if t5_model_kwargs is None:
t5_model_kwargs = {"low_cpu_mem_usage": True, "torch_dtype": self.torch_dtype}

if use_offload_folder is not None:
t5_model_kwargs["offload_folder"] = use_offload_folder
t5_model_kwargs["device_map"] = {
Expand Down Expand Up @@ -97,51 +99,10 @@ def __init__(

self.use_text_preprocessing = use_text_preprocessing
self.hf_token = hf_token
self.cache_dir = cache_dir or os.path.expanduser("~/.cache/IF_")
self.dir_or_name = dir_or_name
tokenizer_path, path = dir_or_name, dir_or_name
if local_cache:
cache_dir = os.path.join(self.cache_dir, dir_or_name)
tokenizer_path, path = cache_dir, cache_dir
elif dir_or_name in self.available_models:
cache_dir = os.path.join(self.cache_dir, dir_or_name)
for filename in [
"config.json",
"special_tokens_map.json",
"spiece.model",
"tokenizer_config.json",
"pytorch_model.bin.index.json",
"pytorch_model-00001-of-00002.bin",
"pytorch_model-00002-of-00002.bin",
]:
hf_hub_download(
repo_id=f"DeepFloyd/{dir_or_name}",
filename=filename,
cache_dir=cache_dir,
force_filename=filename,
token=self.hf_token,
)
tokenizer_path, path = cache_dir, cache_dir
else:
cache_dir = os.path.join(self.cache_dir, "t5-v1_1-xxl")
for filename in [
"config.json",
"special_tokens_map.json",
"spiece.model",
"tokenizer_config.json",
]:
hf_hub_download(
repo_id="DeepFloyd/t5-v1_1-xxl",
filename=filename,
cache_dir=cache_dir,
force_filename=filename,
token=self.hf_token,
)
tokenizer_path = cache_dir

print(tokenizer_path)
self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
self.model = T5EncoderModel.from_pretrained(path, **t5_model_kwargs).eval()

assert from_pretrained in self.available_models
self.tokenizer = AutoTokenizer.from_pretrained(from_pretrained, cache_dir=cache_dir)
self.model = T5EncoderModel.from_pretrained(from_pretrained, cache_dir=cache_dir, **t5_model_kwargs).eval()
self.model_max_length = model_max_length

def get_text_embeddings(self, texts):
Expand Down Expand Up @@ -304,16 +265,16 @@ def __init__(
model_max_length=120,
device="cuda",
dtype=torch.float,
local_cache=True,
cache_dir=None,
shardformer=False,
):
assert from_pretrained is not None, "Please specify the path to the T5 model"

self.t5 = T5Embedder(
device=device,
torch_dtype=dtype,
local_cache=local_cache,
cache_dir=from_pretrained,
from_pretrained=from_pretrained,
cache_dir=cache_dir,
model_max_length=model_max_length,
)
self.t5.model.to(dtype=dtype)
Expand Down
Loading

0 comments on commit 0050800

Please sign in to comment.