MrForExample · iiiCpu · Jan 12, 2025 · Jan 12, 2025 · Jan 12, 2025 · Jan 12, 2025
diff --git a/Gen_3D_Modules/TRELLIS/trellis/modules/attention/__init__.py b/Gen_3D_Modules/TRELLIS/trellis/modules/attention/__init__.py
@@ -1,6 +1,6 @@
 from typing import *
 
-BACKEND = 'xformers' 
+BACKEND = 'flash_attn' 
 DEBUG = False
 
 def __from_env():

diff --git a/Gen_3D_Modules/TRELLIS/trellis/modules/sparse/__init__.py b/Gen_3D_Modules/TRELLIS/trellis/modules/sparse/__init__.py
@@ -2,7 +2,7 @@
 
 BACKEND = 'spconv' 
 DEBUG = False
-ATTN = 'xformers'
+ATTN = 'flash_attn'
 
 def __from_env():
     import os

diff --git a/Gen_3D_Modules/TRELLIS/trellis/pipelines/samplers/flow_euler.py b/Gen_3D_Modules/TRELLIS/trellis/pipelines/samplers/flow_euler.py
@@ -2,7 +2,6 @@
 import torch
 import numpy as np
 from tqdm import tqdm
-import comfy.utils
 from easydict import EasyDict as edict
 from .base import Sampler
 from .classifier_free_guidance_mixin import ClassifierFreeGuidanceSamplerMixin
@@ -109,13 +108,11 @@ def sample(
         t_seq = rescale_t * t_seq / (1 + (rescale_t - 1) * t_seq)
         t_pairs = list((t_seq[i], t_seq[i + 1]) for i in range(steps))
         ret = edict({"samples": None, "pred_x_t": [], "pred_x_0": []})
-        comfy_pbar = comfy.utils.ProgressBar(steps)
-        for i, (t, t_prev) in enumerate(tqdm(t_pairs, desc="Sampling", disable=not verbose)):
+        for t, t_prev in tqdm(t_pairs, desc="Sampling", disable=not verbose):
             out = self.sample_once(model, sample, t, t_prev, cond, **kwargs)
             sample = out.pred_x_prev
             ret.pred_x_t.append(out.pred_x_prev)
             ret.pred_x_0.append(out.pred_x_0)
-            comfy_pbar.update_absolute(i + 1)
         ret.samples = sample
         return ret
 

diff --git a/Gen_3D_Modules/TRELLIS/trellis/pipelines/trellis_image_to_3d.py b/Gen_3D_Modules/TRELLIS/trellis/pipelines/trellis_image_to_3d.py
@@ -1,4 +1,5 @@
 from typing import *
+from contextlib import contextmanager
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -7,6 +8,7 @@
 from easydict import EasyDict as edict
 from torchvision import transforms
 from PIL import Image
+import rembg
 from .base import Pipeline
 from . import samplers
 from ..modules import sparse as sp
@@ -93,7 +95,6 @@ def preprocess_image(self, input: Image.Image) -> Image.Image:
         if has_alpha:
             output = input
         else:
-            import rembg
             input = input.convert('RGB')
             max_size = max(input.size)
             scale = min(1, 1024 / max_size)
@@ -281,3 +282,95 @@ def run(
         coords = self.sample_sparse_structure(cond, num_samples, sparse_structure_sampler_params)
         slat = self.sample_slat(cond, coords, slat_sampler_params)
         return self.decode_slat(slat, formats)
+
+    @contextmanager
+    def inject_sampler_multi_image(
+        self,
+        sampler_name: str,
+        num_images: int,
+        num_steps: int,
+        mode: Literal['stochastic', 'multidiffusion'] = 'stochastic',
+    ):
+        """
+        Inject a sampler with multiple images as condition.
+
+        Args:
+            sampler_name (str): The name of the sampler to inject.
+            num_images (int): The number of images to condition on.
+            num_steps (int): The number of steps to run the sampler for.
+        """
+        sampler = getattr(self, sampler_name)
+        setattr(sampler, f'_old_inference_model', sampler._inference_model)
+
+        if mode == 'stochastic':
+            if num_images > num_steps:
+                print(f"\033[93mWarning: number of conditioning images is greater than number of steps for {sampler_name}. "
+                    "This may lead to performance degradation.\033[0m")
+
+            cond_indices = (np.arange(num_steps) % num_images).tolist()
+            def _new_inference_model(self, model, x_t, t, cond, **kwargs):
+                cond_idx = cond_indices.pop(0)
+                cond_i = cond[cond_idx:cond_idx+1]
+                return self._old_inference_model(model, x_t, t, cond=cond_i, **kwargs)
+
+        elif mode =='multidiffusion':
+            from .samplers import FlowEulerSampler
+            def _new_inference_model(self, model, x_t, t, cond, neg_cond, cfg_strength, cfg_interval, **kwargs):
+                if cfg_interval[0] <= t <= cfg_interval[1]:
+                    preds = []
+                    for i in range(len(cond)):
+                        preds.append(FlowEulerSampler._inference_model(self, model, x_t, t, cond[i:i+1], **kwargs))
+                    pred = sum(preds) / len(preds)
+                    neg_pred = FlowEulerSampler._inference_model(self, model, x_t, t, neg_cond, **kwargs)
+                    return (1 + cfg_strength) * pred - cfg_strength * neg_pred
+                else:
+                    preds = []
+                    for i in range(len(cond)):
+                        preds.append(FlowEulerSampler._inference_model(self, model, x_t, t, cond[i:i+1], **kwargs))
+                    pred = sum(preds) / len(preds)
+                    return pred
+
+        else:
+            raise ValueError(f"Unsupported mode: {mode}")
+
+        sampler._inference_model = _new_inference_model.__get__(sampler, type(sampler))
+
+        yield
+
+        sampler._inference_model = sampler._old_inference_model
+        delattr(sampler, f'_old_inference_model')
+
+    @torch.no_grad()
+    def run_multi_image(
+        self,
+        images: List[Image.Image],
+        num_samples: int = 1,
+        seed: int = 42,
+        sparse_structure_sampler_params: dict = {},
+        slat_sampler_params: dict = {},
+        formats: List[str] = ['mesh', 'gaussian', 'radiance_field'],
+        preprocess_image: bool = True,
+        mode: Literal['stochastic', 'multidiffusion'] = 'stochastic',
+    ) -> dict:
+        """
+        Run the pipeline with multiple images as condition
+
+        Args:
+            images (List[Image.Image]): The multi-view images of the assets
+            num_samples (int): The number of samples to generate.
+            sparse_structure_sampler_params (dict): Additional parameters for the sparse structure sampler.
+            slat_sampler_params (dict): Additional parameters for the structured latent sampler.
+            preprocess_image (bool): Whether to preprocess the image.
+        """
+        if preprocess_image:
+            images = [self.preprocess_image(image) for image in images]
+        cond = self.get_cond(images)
+        cond['neg_cond'] = cond['neg_cond'][:1]
+        torch.manual_seed(seed)
+        ss_steps = {**self.sparse_structure_sampler_params, **sparse_structure_sampler_params}.get('steps')
+        with self.inject_sampler_multi_image('sparse_structure_sampler', len(images), ss_steps, mode=mode):
+            coords = self.sample_sparse_structure(cond, num_samples, sparse_structure_sampler_params)
+        slat_steps = {**self.slat_sampler_params, **slat_sampler_params}.get('steps')
+        with self.inject_sampler_multi_image('slat_sampler', len(images), slat_steps, mode=mode):
+            slat = self.sample_slat(cond, coords, slat_sampler_params)
+        return self.decode_slat(slat, formats)
diff --git a/Gen_3D_Modules/TRELLIS/trellis/renderers/gaussian_render.py b/Gen_3D_Modules/TRELLIS/trellis/renderers/gaussian_render.py
@@ -52,10 +52,6 @@ def render(viewpoint_camera, pc : Gaussian, pipe, bg_color : torch.Tensor, scali
     Render the scene. 
 
     Background tensor (bg_color) must be on GPU!
-
-    Original code use the Differential Gaussian Rasterization from https://github.com/autonomousvision/mip-splatting/tree/main/submodules/diff-gaussian-rasterization
-    Modified to use the GaussianRasterizer from https://github.com/ashawkey/diff-gaussian-rasterization
-    Only changes are the inputs to GaussianRasterizationSettings: kernel_size and subpixel_offset are commented out.
     """
     # lazy import
     if 'GaussianRasterizer' not in globals():
@@ -71,16 +67,16 @@ def render(viewpoint_camera, pc : Gaussian, pipe, bg_color : torch.Tensor, scali
     tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
     tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
 
-    #kernel_size = pipe.kernel_size
-    #subpixel_offset = torch.zeros((int(viewpoint_camera.image_height), int(viewpoint_camera.image_width), 2), dtype=torch.float32, device="cuda")
+    kernel_size = pipe.kernel_size
+    subpixel_offset = torch.zeros((int(viewpoint_camera.image_height), int(viewpoint_camera.image_width), 2), dtype=torch.float32, device="cuda")
 
     raster_settings = GaussianRasterizationSettings(
         image_height=int(viewpoint_camera.image_height),
         image_width=int(viewpoint_camera.image_width),
         tanfovx=tanfovx,
         tanfovy=tanfovy,
-        #kernel_size=kernel_size,
-        #subpixel_offset=subpixel_offset,
+        kernel_size=kernel_size,
+        subpixel_offset=subpixel_offset,
         bg=bg_color,
         scale_modifier=scaling_modifier,
         viewmatrix=viewpoint_camera.world_view_transform,
@@ -125,7 +121,7 @@ def render(viewpoint_camera, pc : Gaussian, pipe, bg_color : torch.Tensor, scali
         colors_precomp = override_color
 
     # Rasterize visible Gaussians to image, obtain their radii (on screen). 
-    rendered_image, radii, rendered_depth, rendered_alpha = rasterizer(
+    rendered_image, radii = rasterizer(
         means3D = means3D,
         means2D = means2D,
         shs = shs,

diff --git a/Gen_3D_Modules/TRELLIS/trellis/representations/gaussian/gaussian_model.py b/Gen_3D_Modules/TRELLIS/trellis/representations/gaussian/gaussian_model.py
@@ -2,6 +2,7 @@
 import numpy as np
 from plyfile import PlyData, PlyElement
 from .general_utils import inverse_sigmoid, strip_symmetric, build_scaling_rotation
+import utils3d
 
 
 class Gaussian:
@@ -120,14 +121,21 @@ def construct_list_of_attributes(self):
         for i in range(self._rotation.shape[1]):
             l.append('rot_{}'.format(i))
         return l
-
-    def save_ply(self, path):
+        
+    def save_ply(self, path, transform=[[1, 0, 0], [0, 0, -1], [0, 1, 0]]):
         xyz = self.get_xyz.detach().cpu().numpy()
         normals = np.zeros_like(xyz)
         f_dc = self._features_dc.detach().transpose(1, 2).flatten(start_dim=1).contiguous().cpu().numpy()
         opacities = inverse_sigmoid(self.get_opacity).detach().cpu().numpy()
         scale = torch.log(self.get_scaling).detach().cpu().numpy()
         rotation = (self._rotation + self.rots_bias[None, :]).detach().cpu().numpy()
+
+        if transform is not None:
+            transform = np.array(transform)
+            xyz = np.matmul(xyz, transform.T)
+            rotation = utils3d.numpy.quaternion_to_matrix(rotation)
+            rotation = np.matmul(transform, rotation)
+            rotation = utils3d.numpy.matrix_to_quaternion(rotation)
 
         dtype_full = [(attribute, 'f4') for attribute in self.construct_list_of_attributes()]
 
@@ -137,7 +145,7 @@ def save_ply(self, path):
         el = PlyElement.describe(elements, 'vertex')
         PlyData([el]).write(path)
 
-    def load_ply(self, path):
+    def load_ply(self, path, transform=[[1, 0, 0], [0, 0, -1], [0, 1, 0]]):
         plydata = PlyData.read(path)
 
         xyz = np.stack((np.asarray(plydata.elements[0]["x"]),
@@ -172,6 +180,13 @@ def load_ply(self, path):
         for idx, attr_name in enumerate(rot_names):
             rots[:, idx] = np.asarray(plydata.elements[0][attr_name])
 
+        if transform is not None:
+            transform = np.array(transform)
+            xyz = np.matmul(xyz, transform)
+            rotation = utils3d.numpy.quaternion_to_matrix(rotation)
+            rotation = np.matmul(rotation, transform)
+            rotation = utils3d.numpy.matrix_to_quaternion(rotation)
+
         # convert to actual gaussian attributes
         xyz = torch.tensor(xyz, dtype=torch.float, device=self.device)
         features_dc = torch.tensor(features_dc, dtype=torch.float, device=self.device).transpose(1, 2).contiguous()

diff --git a/Gen_3D_Modules/TRELLIS/trellis/representations/mesh/cube2mesh.py b/Gen_3D_Modules/TRELLIS/trellis/representations/mesh/cube2mesh.py
@@ -2,10 +2,7 @@
 from ...modules.sparse import SparseTensor
 from easydict import EasyDict as edict
 from .utils_cube import *
-try:
-    from .flexicubes.flexicubes import FlexiCubes
-except:
-    print("Please install kaolin and diso to use the mesh extractor.")
+from .flexicubes.flexicubes import FlexiCubes
 
 
 class MeshExtractResult:

diff --git a/Gen_3D_Modules/TRELLIS/trellis/representations/mesh/flexicubes/LICENSE.txt b/Gen_3D_Modules/TRELLIS/trellis/representations/mesh/flexicubes/LICENSE.txt
@@ -0,0 +1,90 @@
+Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+
+NVIDIA Source Code License for FlexiCubes
+
+
+=======================================================================
+
+1. Definitions
+
+“Licensor” means any person or entity that distributes its Work.
+
+“Work” means (a) the original work of authorship made available under
+this license, which may include software, documentation, or other files,
+and (b) any additions to or derivative works  thereof  that are made
+available under this license.
+
+The terms “reproduce,” “reproduction,” “derivative works,” and
+“distribution” have the meaning as provided under U.S. copyright law;
+provided, however, that for the purposes of this license, derivative works
+shall not include works that remain separable from, or merely link
+(or bind by name) to the interfaces of, the Work.
+
+Works are “made available” under this license by including in or with
+the Work either (a) a copyright notice referencing the applicability of
+this license to the Work, or (b) a copy of this license.
+
+2. License Grant
+
+    2.1 Copyright Grant. Subject to the terms and conditions of this license,
+     each Licensor grants to you a perpetual, worldwide, non-exclusive,
+     royalty-free, copyright license to use, reproduce, prepare derivative
+     works of, publicly display, publicly perform, sublicense and distribute
+     its Work and any resulting derivative works in any form.
+
+3. Limitations
+
+    3.1 Redistribution. You may reproduce or distribute the Work only if
+    (a) you do so under this license, (b) you include a complete copy of
+    this license with your distribution, and (c) you retain without
+    modification any copyright, patent, trademark, or attribution notices
+    that are present in the Work.
+
+    3.2 Derivative Works. You may specify that additional or different terms
+     apply to the use, reproduction, and distribution of your derivative
+     works of the Work (“Your Terms”) only if (a) Your Terms provide that the
+     use limitation in Section 3.3 applies to your derivative works, and (b)
+     you identify the specific derivative works that are subject to Your Terms.
+     Notwithstanding Your Terms, this license (including the redistribution
+     requirements in Section 3.1) will continue to apply to the Work itself.
+
+    3.3 Use Limitation. The Work and any derivative works thereof only may be
+     used or intended for use non-commercially. Notwithstanding the foregoing,
+     NVIDIA Corporation and its affiliates may use the Work and any derivative
+     works commercially. As used herein, “non-commercially” means for research
+     or evaluation purposes only.
+
+    3.4 Patent Claims. If you bring or threaten to bring a patent claim against
+     any Licensor (including any claim, cross-claim or counterclaim in a lawsuit)
+     to enforce any patents that you allege are infringed by any Work, then your
+     rights under this license from such Licensor (including the grant in
+     Section 2.1) will terminate immediately.
+
+    3.5 Trademarks. This license does not grant any rights to use any Licensor’s
+     or its affiliates’ names, logos, or trademarks, except as necessary to
+     reproduce the notices described in this license.
+
+    3.6 Termination. If you violate any term of this license, then your rights
+     under this license (including the grant in Section 2.1) will terminate
+     immediately.
+
+4. Disclaimer of Warranty.
+
+THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
+EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT.
+YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE.
+
+5. Limitation of Liability.
+
+EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY,
+WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY
+LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL,
+INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE,
+THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF
+GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR
+MALFUNCTION, OR ANY OTHER DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+=======================================================================