From b51fb15583eff67c27ee9b05aca11538f550043b Mon Sep 17 00:00:00 2001
From: "Nathan V. Morrical" <natemorrical@gmail.com>
Date: Sun, 23 May 2021 17:59:07 -0600
Subject: [PATCH] Development (#97)

* removing unified memory for framebuffers, as this hurts multigpu performance in non-nvlink configurations

* some fixes for multigpu setups. Some issues with different gpus rendering different images:

* changing how max path bounces are handled to avoid long tails.

* working on multigpu perf

* some steps forward towards dynamic load balancing

* fixes to load balancing. Fixes to frame buffer when cuda visible devices set to non-zero value

* adding some sync points

* Adding support for constant material parameters with import_scene function

* updating owl submodule

* adding more visibility flags.

* assigning visibility masks to entities

* forcing denoiser configure to wait

* fixing bug where renderToImage wasnt saving the correct buffer

* working on a volume sample

* simplifying download_content script a bit

* adding a volumes example. Some bugs need to be worked out

* working on refactoring volume codepath to improve efficiency

* refacting volume rendering code. Now much cheaper to render surfaces without any volumes present

* upgrades to volume example

* ray visibility flags now working. Added an example demonstrating feature

* fixing some tests. Normal map test is failing

* fixing race condition with textures and materials

* adding a get_center function to entities, which behaves differently to the aabb center

* changing scene importer to allow for degenerate transforms with a warning

* adding support for point lights

* updating docs

* improvements to how import_scene handles normal maps

* removing debug print in device code
---
 CMakeLists.txt                        |   3 +
 examples/14.normal_map.py             |   6 +-
 examples/22.volumes.py                | 192 +++++
 examples/23.ray_visibility.py         | 282 ++++++++
 examples/content.txt                  |  12 +
 examples/download_content.sh          |  21 +-
 examples/requirements.txt             |   4 +-
 externals/owl                         |   2 +-
 include/nvisii/entity.h               |  22 +-
 include/nvisii/entity_struct.h        |   9 +-
 include/nvisii/light.h                |   4 +-
 include/nvisii/mesh.h                 |  10 +-
 include/nvisii/nvisii.h               |  45 +-
 include/nvisii/texture.h              |   3 +
 include/nvisii/volume.h               |   9 +-
 src/nvisii/devicecode/buffer.h        |   2 +-
 src/nvisii/devicecode/launch_params.h |  14 +-
 src/nvisii/devicecode/path_tracer.cu  | 669 +++++++----------
 src/nvisii/entity.cpp                 |  47 +-
 src/nvisii/light.cpp                  |   6 +-
 src/nvisii/mesh.cpp                   |  30 +-
 src/nvisii/nvisii.cpp                 | 986 ++++++++++++++------------
 src/nvisii/nvisii.cu                  |   2 +-
 src/nvisii/nvisii_import_scene.cpp    | 187 ++++-
 src/nvisii/texture.cpp                |   5 +
 src/nvisii/volume.cpp                 |   7 +-
 26 files changed, 1615 insertions(+), 964 deletions(-)
 create mode 100644 examples/22.volumes.py
 create mode 100644 examples/23.ray_visibility.py
 create mode 100644 examples/content.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9055fc1e..6b86d441 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -293,6 +293,9 @@ find_program(BIN2C bin2c
   /usr/local/cuda/bin)
 
 # optix 7
+include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/include/nvisii/utilities/sutil/)
+include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/include/nvisii/utilities/)
+
 if ($ENV{OPTIX_VERSION}) 
 set(OPTION_OPTIX_VERSION $ENV{OPTIX_VERSION})
 else()
diff --git a/examples/14.normal_map.py b/examples/14.normal_map.py
index dce409ca..f0518799 100644
--- a/examples/14.normal_map.py
+++ b/examples/14.normal_map.py
@@ -103,9 +103,9 @@
 mat.set_specular(0)
 
 # load an example brick texture 
-color_tex = nvisii.texture.create_from_file("color",'content/Bricks051_2K_Color.jpg')
-normal_tex = nvisii.texture.create_from_file("normal",'content/Bricks051_2K_Normal.jpg', linear = True)
-rough_tex = nvisii.texture.create_from_file("rough",'content/Bricks051_2K_Roughness.jpg', linear = True)
+color_tex = nvisii.texture.create_from_file("color",'./content/Bricks051_2K_Color.jpg')
+normal_tex = nvisii.texture.create_from_file("normal",'./content/Bricks051_2K_Normal.jpg', linear = True)
+rough_tex = nvisii.texture.create_from_file("rough",'./content/Bricks051_2K_Roughness.jpg', linear = True)
 
 color_tex.set_scale((.1,.1))
 normal_tex.set_scale((.1,.1))
diff --git a/examples/22.volumes.py b/examples/22.volumes.py
new file mode 100644
index 00000000..3b1878df
--- /dev/null
+++ b/examples/22.volumes.py
@@ -0,0 +1,192 @@
+#%%
+
+# 22.volumes.py
+#
+# This shows an example of several volumes. Some volume uses the NanoVDB format,
+# others use a raw volume, and then some are generated procedurally.
+# This scene tests how volumes can be lit up with light sources, and how they can 
+# overlap. 
+
+# Note, the API here is subject to change with future versions...
+
+import nvisii
+import numpy as np
+opt = lambda: None
+opt.spp = 512 
+opt.width = 1024
+opt.height = 1024 
+opt.out = '22_volumes.png'
+
+nvisii.initialize(headless = False, verbose = True, window_on_top = True)
+nvisii.enable_denoiser()
+
+# Configuring the denoiser here to not use albedo and normal guides, which are 
+# noisy for volumes
+nvisii.configure_denoiser(False, False, True)
+
+# Make a camera...
+camera = nvisii.entity.create(name = "camera")
+camera.set_transform(nvisii.transform.create(name = "camera_transform"))
+camera.set_camera(
+    nvisii.camera.create_from_fov(
+        name = "camera_camera", 
+        field_of_view = 0.785398, # note, this is in radians
+        aspect = opt.width / float(opt.height)
+    )
+)
+nvisii.set_camera_entity(camera)
+camera.get_transform().look_at(at = (0, 0, .5), up = (0, 0, 1), eye = (0, 5, 2))
+
+# Make a dome light
+env_tex = nvisii.texture.create_from_file("env_tex", "./content/kiara_4_mid-morning_4k.hdr")
+nvisii.enable_dome_light_sampling()
+nvisii.set_dome_light_texture(env_tex, enable_cdf=True)
+nvisii.set_dome_light_exposure(-2.0)
+
+
+# Make a textured floor
+floor = nvisii.entity.create(
+    name = "floor",
+    mesh = nvisii.mesh.create_plane("mesh_floor"),
+    transform = nvisii.transform.create("transform_floor"),
+    material = nvisii.material.create("material_floor")
+)
+mat = floor.get_material()
+floor_tex = nvisii.texture.create_from_file("floor_tex", "./content/salle_de_bain_separated/textures/WoodFloor_BaseColor.jpg")
+mat.set_base_color_texture(floor_tex) 
+trans = floor.get_transform()
+trans.set_scale((5,5,1))
+
+# Make a procedural torus volume 
+torus = nvisii.entity.create(
+    name="torus",
+    volume = nvisii.volume.create_torus("torus"),
+    transform = nvisii.transform.create("torus"),
+    material = nvisii.material.create("torus")
+)
+torus.get_transform().set_position((0.8,2,.2))
+torus.get_transform().set_scale((0.003, 0.003, 0.003))
+torus.get_transform().set_angle_axis(nvisii.pi() * .5, (1,0,0))
+torus.get_material().set_base_color((1.,1.,1.0))  
+# The gradient factor here controls how "surface like" the volume is. 
+# Higher values mean "more surface like" in areas where there is a strong 
+# gradient in the scalar field of the volume (which occurs near surfaces defined 
+# by high density regions)
+torus.get_volume().set_gradient_factor(10) 
+
+# Absorption controls the probability of light being absorbed by the volume
+torus.get_volume().set_absorption(1.)
+# Absorption controls the probability of light bouncing off one of the particles in the volume
+torus.get_volume().set_scattering(.0)
+# The scale here controls how "big" a voxel is, where "1" means a voxel is 1cm wide.
+# Larger scales result in particles being distributed over longer distances, 
+# causing the volume to appear less dense
+torus.get_volume().set_scale(100)
+
+# Create a procedural octahedron
+octahedron = nvisii.entity.create(
+    name="octahedron",
+    volume = nvisii.volume.create_octahedron("octahedron"),
+    transform = nvisii.transform.create("octahedron"),
+    material = nvisii.material.create("octahedron")
+)
+octahedron.get_transform().set_position((.80,2.0,0.2)) # Note that this octahedron is inside the torus
+octahedron.get_transform().set_scale((0.01, 0.01, 0.01))
+octahedron.get_transform().set_angle_axis(nvisii.pi() * .25, (0,0,1))
+octahedron.get_material().set_base_color((1.0,0.0,0))  
+octahedron.get_volume().set_gradient_factor(10)
+octahedron.get_volume().set_absorption(0)
+octahedron.get_volume().set_scattering(1)
+octahedron.get_volume().set_scale(15)
+
+# Create a procedural sphere
+sphere = nvisii.entity.create(
+    name="sphere",
+    volume = nvisii.volume.create_sphere("sphere"),
+    transform = nvisii.transform.create("sphere"),
+    material = nvisii.material.create("sphere")
+)
+sphere.get_transform().set_position((-1.0,2,0.25))
+sphere.get_transform().set_scale((0.0025, 0.0025, 0.0025))
+sphere.get_material().set_base_color((0.2,0.2,1.0))  
+sphere.get_volume().set_gradient_factor(10)
+sphere.get_volume().set_absorption(0)
+sphere.get_volume().set_scattering(1)
+sphere.get_volume().set_scale(100)
+
+# Create a procedural box
+box = nvisii.entity.create(
+    name="box",
+    volume = nvisii.volume.create_box("box"),
+    transform = nvisii.transform.create("box"),
+    material = nvisii.material.create("box")
+)
+box.get_transform().set_position((-1.0,2,0.25))
+box.get_transform().set_scale((0.005, 0.005, 0.005))
+box.get_transform().set_angle_axis(.3, (0,0,1))
+box.get_material().set_base_color((1.0,1.0,1.0))  
+box.get_volume().set_gradient_factor(10)
+box.get_volume().set_absorption(0)
+box.get_volume().set_scattering(1)
+box.get_volume().set_scale(100)
+
+# Create a cloudy bunny using a nanovdb file
+bunny = nvisii.entity.create(
+    name="bunny",
+    volume = nvisii.volume.create_from_file("bunny", "./content/bunny_cloud.nvdb"),
+    transform = nvisii.transform.create("bunny"),
+    material = nvisii.material.create("bunny")
+)
+bunny.get_transform().set_position((-.8,.5,0.75))
+bunny.get_transform().set_scale((0.003, 0.003, 0.003))
+bunny.get_material().set_base_color((0.1,0.9,0.08))  
+bunny.get_material().set_roughness(0.7)   
+bunny.get_volume().set_gradient_factor(10)
+bunny.get_volume().set_absorption(1)
+bunny.get_volume().set_scattering(0)
+bunny.get_volume().set_scale(4)
+bunny.get_transform().set_angle_axis(nvisii.pi() * .5, (1,0,0))
+bunny.get_transform().add_angle_axis(nvisii.pi(), (0,1,0))
+
+# Create a boston teapot using a raw CT scanned volume
+voxels = np.fromfile("./content/boston_teapot_256x256x178_uint8.raw", dtype=np.uint8).astype(np.float32)
+teapot = nvisii.entity.create(
+    name="teapot",
+    volume = nvisii.volume.create_from_data("teapot", width = 256, height = 256, depth = 178, data = voxels, background = 0.0),
+    transform = nvisii.transform.create("teapot"),
+    material = nvisii.material.create("teapot")
+)
+teapot.get_transform().set_position((1,0,0.7))
+teapot.get_transform().set_scale((0.005, 0.005, 0.005))
+teapot.get_material().set_base_color((1.0,1.0,1.0))  
+teapot.get_material().set_roughness(0.0)
+teapot.get_material().set_metallic(1.0)
+teapot.get_volume().set_gradient_factor(100)
+teapot.get_volume().set_absorption(1)
+teapot.get_volume().set_scattering(0)
+teapot.get_volume().set_scale(250)
+teapot.get_transform().set_angle_axis(-nvisii.pi() * .5, (1,0,0))
+teapot.get_transform().add_angle_axis(nvisii.pi() * 1.1, (0,1,0))
+
+# Volumes can be lit up using light sources
+light = nvisii.entity.create(
+    name="light",
+    mesh = nvisii.mesh.create_sphere("light"),
+    transform = nvisii.transform.create("light"),
+    light = nvisii.light.create("light")
+)
+light.get_transform().set_position((0,1,2.5))
+light.get_transform().set_scale((.2,.2,.2))
+light.get_light().set_temperature(4000)
+light.get_light().set_intensity(20)
+
+# Render out the image
+print("rendering to", "22_volumes.png")
+nvisii.render_to_file(
+    width = opt.width, 
+    height = opt.height, 
+    samples_per_pixel = opt.spp,   
+    file_path = "22_volumes.png"
+)
+
+nvisii.deinitialize()
diff --git a/examples/23.ray_visibility.py b/examples/23.ray_visibility.py
new file mode 100644
index 00000000..5311621f
--- /dev/null
+++ b/examples/23.ray_visibility.py
@@ -0,0 +1,282 @@
+import nvisii
+import math
+import PySide2
+import colorsys
+from PySide2.QtCore import *
+from PySide2.QtWidgets import *
+
+nvisii.initialize()
+nvisii.resize_window(1000,1000)
+nvisii.enable_denoiser()
+# nvisii.configure_denoiser(False, False, True)
+nvisii.set_max_bounce_depth(diffuse_depth=2, glossy_depth = 8, transparency_depth = 8, transmission_depth = 12, volume_depth = 2)
+
+# Set the sky
+nvisii.disable_dome_light_sampling()
+nvisii.set_dome_light_color((0,0,0))
+
+# Set camera
+camera = nvisii.entity.create(
+    name = "camera",
+    transform = nvisii.transform.create(name = "camera_transform"),
+    camera = nvisii.camera.create(
+        name = "camera_camera", 
+        aspect = 1.0
+    )
+)
+camera.get_transform().look_at(
+    at = (0, 0, 0.5), # at position
+    up = (0, 0, 1),   # up vector
+    eye = (0, 5, 2)   # eye position
+)
+nvisii.set_camera_entity(camera)
+
+# Floor
+floor = nvisii.entity.create(
+    name = "floor",
+    mesh = nvisii.mesh.create_plane("mesh_floor"),
+    transform = nvisii.transform.create("transform_floor"),
+    material = nvisii.material.create("material_floor")
+)
+floor.get_material().set_base_color((0.19,0.16,0.19)) 
+floor.get_material().set_metallic(0) 
+floor.get_material().set_roughness(1)
+floor.get_transform().set_scale((5,5,1))
+
+# Mirror 1
+mirror1 = nvisii.entity.create(
+    name = "mirror1",
+    mesh = nvisii.mesh.create_box("mesh_mirror1"),
+    transform = nvisii.transform.create("transform_mirror1"),
+    material = nvisii.material.create("material_mirror1")
+)
+mirror1.get_transform().look_at(eye = (-1.5, -1.5, .5), at = (0,0,.7), up = (0,0,1))
+mirror1.get_material().set_base_color((1.,1.,1.)) 
+mirror1.get_material().set_metallic(1) 
+mirror1.get_material().set_roughness(0)
+mirror1.get_transform().set_scale((.7,.7,.1))
+
+# Glass 1
+glass1 = nvisii.entity.create(
+    name = "glass1",
+    mesh = nvisii.mesh.create_box("mesh_glass1"),
+    transform = nvisii.transform.create("transform_glass1"),
+    material = nvisii.material.create("material_glass1")
+)
+glass1.get_transform().look_at(eye = (1.5, 1.5, .5), at = (0,0,.7), up = (0,0,1))
+glass1.get_material().set_base_color((1.,1.,1.)) 
+glass1.get_material().set_transmission(1) 
+glass1.get_material().set_roughness(0)
+glass1.get_transform().set_scale((.7,.7,.1))
+
+# Mirror 2
+mirror2 = nvisii.entity.create(
+    name = "mirror2",
+    mesh = nvisii.mesh.create_box("mesh_mirror2"),
+    transform = nvisii.transform.create("transform_mirror2"),
+    material = nvisii.material.create("material_mirror2")
+)
+mirror2.get_transform().look_at(eye = (1.5, -1.5, .5), at = (0,0,.7), up = (0,0,1))
+mirror2.get_material().set_base_color((1.,1.,1.)) 
+mirror2.get_material().set_metallic(1) 
+mirror2.get_material().set_roughness(0)
+mirror2.get_transform().set_scale((.7,.7,.1))
+
+# Glass 2
+glass2 = nvisii.entity.create(
+    name = "glass2",
+    mesh = nvisii.mesh.create_box("mesh_glass2"),
+    transform = nvisii.transform.create("transform_glass2"),
+    material = nvisii.material.create("material_glass2")
+)
+glass2.get_transform().look_at(eye = (-1.5, 1.5, .5), at = (0,0,.7), up = (0,0,1))
+glass2.get_material().set_base_color((1.,1.,1.)) 
+glass2.get_material().set_transmission(1) 
+glass2.get_material().set_roughness(0)
+glass2.get_transform().set_scale((.7,.7,.1))
+
+# Fog
+fog = nvisii.entity.create(
+    name = "fog",
+    volume = nvisii.volume.create_box("mesh_fog"),
+    transform = nvisii.transform.create("transform_fog"),
+    material = nvisii.material.create("material_fog")
+)
+fog.get_material().set_base_color((1.,1.,1.)) 
+fog.get_material().set_transmission(1) 
+fog.get_material().set_roughness(0)
+fog.get_volume().set_scale(100)
+
+# Light
+light = nvisii.entity.create(
+    name = "light",
+    light = nvisii.light.create("light"),
+    transform = nvisii.transform.create("light"),
+    mesh = nvisii.mesh.create_sphere("light")
+)
+light.get_transform().set_position((0,0,5))
+light.get_transform().set_scale((.1,.1,.1))
+light.get_light().set_exposure(7)
+
+# Light blocker
+blocker = nvisii.entity.create(
+    name = "blocker",
+    mesh = nvisii.mesh.create_capped_tube("blocker", innerRadius = .04),
+    transform = nvisii.transform.create("blocker"),
+    material = nvisii.material.create("blocker")
+)
+blocker.get_transform().set_scale((10,10,.01))
+blocker.get_transform().set_position((0,0,3.0))
+
+# Teapot
+teapotahedron = nvisii.entity.create(
+    name="teapotahedron",
+    mesh = nvisii.mesh.create_teapotahedron("teapotahedron", segments = 32),
+    transform = nvisii.transform.create("teapotahedron"),
+    material = nvisii.material.create("teapotahedron")
+)
+teapotahedron.get_transform().set_rotation(nvisii.angleAxis(nvisii.pi() / 4.0, (0,0,1)))
+teapotahedron.get_transform().set_position((0,0,0))
+teapotahedron.get_transform().set_scale((0.4, 0.4, 0.4))
+teapotahedron.get_material().set_base_color((255.0 / 255.0, 100.0 / 255.0, 2.0 / 256.0))  
+teapotahedron.get_material().set_roughness(0.0)  
+teapotahedron.get_material().set_specular(1.0) 
+teapotahedron.get_material().set_metallic(1.0) 
+
+# Make a QT window to demonstrate the difference between alpha transparency and transmission
+app = QApplication([]) # Start an application.
+window = QWidget() # Create a window.
+layout = QVBoxLayout() # Create a layout.
+
+def rotateCamera(value):
+    value = value / 100.0
+    cam_pos = camera.get_transform().get_position()
+
+    camera.get_transform().look_at(
+        at = (0, 0, 0.5), # at position
+        up = (0, 0, 1),   # up vector
+        eye = (5 * math.cos(value * 2 * nvisii.pi()), 5 * math.sin(value * 2 * nvisii.pi()), cam_pos[2])   # eye position
+    )
+rotateCamera(0)
+dial = QDial() 
+dial.setWrapping(True)
+dial.valueChanged[int].connect(rotateCamera)
+layout.addWidget(QLabel('Camera rotation')) 
+layout.addWidget(dial) 
+
+def rotateCameraElevation(value):
+    # print(value)
+    value = value / 100
+    cam_pos = camera.get_transform().get_position()
+    camera.get_transform().look_at(
+        at = (0, 0, 0.5), # at position
+        up = (0, 0, 1),   # up vector
+        eye = (cam_pos[0], cam_pos[1], 0.1 + 2.5*value)   # eye position
+    )
+    # print(value, 2 * math.cos(value * 2 * nvisii.pi()))
+
+slider = QSlider(Qt.Horizontal) 
+slider.valueChanged[int].connect(rotateCameraElevation)
+slider.setValue(40)
+layout.addWidget(QLabel('Camera Elevation')) 
+layout.addWidget(slider) 
+
+# Add some toggles to demonstrate how the set_visibility function works
+
+camera_visibility = True
+diffuse_visibility = True
+glossy_visibility = True
+transmission_visibility = True
+scatter_visibility = True
+shadow_visibility = True
+def updateVisibility():
+    global camera_visibility
+    global diffuse_visibility
+    global glossy_visibility
+    global transmission_visibility
+    global scatter_visibility
+    global shadow_visibility
+
+    teapotahedron.set_visibility(
+        camera = camera_visibility, 
+        diffuse = diffuse_visibility, 
+        glossy = glossy_visibility, 
+        transmission = transmission_visibility, 
+        volume_scatter = scatter_visibility, 
+        shadow = shadow_visibility)
+
+def toggleCamera():
+    global camera_visibility
+    camera_visibility = not camera_visibility
+    updateVisibility()
+button = QPushButton("toggleCamera")
+button.clicked.connect(toggleCamera)
+layout.addWidget(button) 
+
+def toggleDiffuse():
+    global diffuse_visibility
+    diffuse_visibility = not diffuse_visibility
+    updateVisibility()
+button = QPushButton("toggleDiffuse")
+button.clicked.connect(toggleDiffuse)
+layout.addWidget(button) 
+
+def toggleGlossy():
+    global glossy_visibility
+    glossy_visibility = not glossy_visibility
+    updateVisibility()
+button = QPushButton("toggleGlossy")
+button.clicked.connect(toggleGlossy)
+layout.addWidget(button) 
+
+def toggleTransmission():
+    global transmission_visibility
+    transmission_visibility = not transmission_visibility
+    updateVisibility()
+button = QPushButton("toggleTransmission")
+button.clicked.connect(toggleTransmission)
+layout.addWidget(button) 
+
+def toggleScattering():
+    global scatter_visibility
+    scatter_visibility = not scatter_visibility
+    updateVisibility()
+button = QPushButton("toggleScattering")
+button.clicked.connect(toggleScattering)
+layout.addWidget(button) 
+
+def toggleShadows():
+    global shadow_visibility
+    shadow_visibility = not shadow_visibility
+    updateVisibility()
+button = QPushButton("toggleShadows")
+button.clicked.connect(toggleShadows)
+layout.addWidget(button) 
+
+def setFogStrength(value):
+    value = (100 - value) * 2 + 10
+    fog.get_volume().set_scale(value)
+setFogStrength(100)
+slider = QSlider(Qt.Horizontal) 
+slider.valueChanged[int].connect(setFogStrength)
+slider.setValue(100)
+layout.addWidget(QLabel('Fog Strength')) 
+layout.addWidget(slider) 
+
+
+def setLightHeight(value):
+    value = value / 100.0
+    light.get_transform().set_position((0,0,3 + value * 2))
+setLightHeight(50)
+slider = QSlider(Qt.Horizontal) 
+slider.valueChanged[int].connect(setLightHeight)
+slider.setValue(50)
+layout.addWidget(QLabel('Light Height')) 
+layout.addWidget(slider) 
+
+
+window.setLayout(layout)
+window.show() 
+app.exec_() 
+
+nvisii.deinitialize()
\ No newline at end of file
diff --git a/examples/content.txt b/examples/content.txt
new file mode 100644
index 00000000..232b1289
--- /dev/null
+++ b/examples/content.txt
@@ -0,0 +1,12 @@
+https://www.dropbox.com/s/jve877nanizw2vf/dragon.zip
+https://www.dropbox.com/s/jh3o6wtqdrq4bi2/photos_2020_5_11_fst_gray-wall-grunge.jpg
+https://www.dropbox.com/s/gb67d0cv1lgrgdp/kiara_4_mid-morning_4k.hdr
+https://www.dropbox.com/s/8nj82vxvxwvnttt/salle_de_bain_separated.zip
+https://www.dropbox.com/s/p2xius4kd4olqg3/gradient.png
+https://www.dropbox.com/s/bxbkzmuy2mviyzb/Bricks051_2K-JPG.zip
+https://www.dropbox.com/s/na3vo8rca7feoiq/teatro_massimo_2k.hdr
+https://www.dropbox.com/s/22bug1he354oqpt/bmw.zip
+https://www.dropbox.com/s/76gumyy7j0f3cyj/dragon.stl
+https://www.dropbox.com/s/runlp60bjjf3dpu/bunny_cloud.zip
+https://www.dropbox.com/s/nim7jsjiumei4f9/boston_teapot_256x256x178_uint8.zip
+https://www.dropbox.com/s/yybckz6sawq5nbw/TestNormalMap.png
diff --git a/examples/download_content.sh b/examples/download_content.sh
index 02b0fe46..c6ca856f 100644
--- a/examples/download_content.sh
+++ b/examples/download_content.sh
@@ -1,26 +1,15 @@
 mkdir content
 cd content
-wget https://www.dropbox.com/s/jve877nanizw2vf/dragon.zip
-unzip dragon.zip -d dragon/
-rm dragon.zip
-
-wget https://www.dropbox.com/s/jh3o6wtqdrq4bi2/photos_2020_5_11_fst_gray-wall-grunge.jpg
 
-wget https://www.dropbox.com/s/gb67d0cv1lgrgdp/kiara_4_mid-morning_4k.hdr
+wget -i ../content.txt
 
-wget https://www.dropbox.com/s/8nj82vxvxwvnttt/salle_de_bain_separated.zip
+unzip dragon.zip -d dragon/
+rm dragon.zip
 unzip salle_de_bain_separated.zip
 rm salle_de_bain_separated.zip
-
-wget https://www.dropbox.com/s/p2xius4kd4olqg3/gradient.png
-wget https://www.dropbox.com/s/bxbkzmuy2mviyzb/Bricks051_2K-JPG.zip
 unzip Bricks051_2K-JPG.zip
-
-wget https://www.dropbox.com/s/na3vo8rca7feoiq/teatro_massimo_2k.hdr
-
 mkdir bmw 
-wget https://www.dropbox.com/s/22bug1he354oqpt/bmw.zip
 unzip bmw.zip -d bmw/ 
 rm bmw.zip
-
-wget https://www.dropbox.com/s/76gumyy7j0f3cyj/dragon.stl
\ No newline at end of file
+unzip bunny_cloud.zip
+unzip boston_teapot_256x256x178_uint8.zip
diff --git a/examples/requirements.txt b/examples/requirements.txt
index ebf9dd38..317f777d 100644
--- a/examples/requirements.txt
+++ b/examples/requirements.txt
@@ -4,6 +4,6 @@ noise
 numpy
 pillow
 scipy
-pygame
 open3d
-PySide2
\ No newline at end of file
+PySide2
+opencv-python
diff --git a/externals/owl b/externals/owl
index da110c5e..0f82536c 160000
--- a/externals/owl
+++ b/externals/owl
@@ -1 +1 @@
-Subproject commit da110c5e1453c17c8e3567d187a69b9bc6082943
+Subproject commit 0f82536cd56668ca786d4f5e8eb796e6b03752c9
diff --git a/include/nvisii/entity.h b/include/nvisii/entity.h
index 5120184a..a9d5220c 100644
--- a/include/nvisii/entity.h
+++ b/include/nvisii/entity.h
@@ -211,9 +211,21 @@ class Entity : public StaticFactory {
 
 	/**
 	 * Objects can be set to be invisible to particular ray types:
-	 * @param camera Makes the object visible to camera rays
+	 * @param camera Makes the object visible to camera rays (the first rays to be traced from the camera).
+	 * @param diffuse Makes the object visible to diffuse rays (eg for diffuse GI)
+	 * @param glossy Makes the object visible to glossy rays (eg in reflections)
+	 * @param transmission Makes the object visible to transmission rays (eg from inside glass)
+	 * @param volume_scatter Makes the object visible to volume scatter rays (eg from light simulation inside a volume)
+	 * @param shadow Enables the object to cast shadows.
 	*/
-	void setVisibility(bool camera = true);
+	void setVisibility(
+		bool camera = true, 
+		bool diffuse = true, 
+		bool glossy = true, 
+		bool transmission = true, 
+		bool volume_scatter = true, 
+		bool shadow = true
+	);
 
 	/** @returns the minimum axis aligned bounding box position. Requires a transform and mesh component to be attached. */
 	glm::vec3 getMinAabbCorner();
@@ -224,6 +236,12 @@ class Entity : public StaticFactory {
 	/** @returns the center of the aligned bounding box. Requires a transform and mesh component to be attached. */
 	glm::vec3 getAabbCenter();
 
+	/** 
+	 * @returns the average of the vertices of the mesh in world space, which will lay roughly in the center. Requires 
+	 * a transform and mesh component to be attached. 
+	 */
+	glm::vec3 getCenter();
+
 	/** For internal use. Returns the mutex used to lock entities for processing by the renderer. */
 	static std::shared_ptr<std::recursive_mutex> getEditMutex();
 
diff --git a/include/nvisii/entity_struct.h b/include/nvisii/entity_struct.h
index 41658fa4..86b0d396 100644
--- a/include/nvisii/entity_struct.h
+++ b/include/nvisii/entity_struct.h
@@ -6,7 +6,12 @@
 
 #ifndef ENTITY_VISIBILITY_FLAGS
 #define ENTITY_VISIBILITY_FLAGS
-#define ENTITY_VISIBILITY_CAMERA_RAYS (1<<0)
+#define ENTITY_VISIBILITY_CAMERA_RAYS (1<<0) // object is visible to direct camera rays
+#define ENTITY_VISIBILITY_DIFFUSE_RAYS (1<<1) // object is visible to diffuse rays
+#define ENTITY_VISIBILITY_GLOSSY_RAYS (1<<2) // object is visible to glossy rays
+#define ENTITY_VISIBILITY_TRANSMISSION_RAYS (1<<3) // object is visible to transmission rays
+#define ENTITY_VISIBILITY_VOLUME_SCATTER_RAYS (1<<4) // object is visible to multiple-scattering volume rays
+#define ENTITY_VISIBILITY_SHADOW_RAYS (1<<5) // object is visible to shadow rays (ie, casts shadows)
 #endif
 
 struct EntityStruct {
@@ -17,7 +22,7 @@ struct EntityStruct {
 	int32_t light_id = -1;
 	int32_t mesh_id = -1;
 	int32_t volume_id = -1;
-	int32_t flags = 1;
+	uint32_t flags = (uint32_t)-1;
 	glm::vec4 bbmin = glm::vec4(0.f);
 	glm::vec4 bbmax = glm::vec4(0.f);
 };
\ No newline at end of file
diff --git a/include/nvisii/light.h b/include/nvisii/light.h
index 4b957628..99dd7969 100644
--- a/include/nvisii/light.h
+++ b/include/nvisii/light.h
@@ -11,7 +11,9 @@ class Texture;
 /**
  * A "Light" component illuminates objects in a scene. Light components must 
  * be added to an entity with a transform component to have a visible      
- * impact on the scene.                                                    
+ * impact on the scene. Lights attached to entities with no mesh components
+ * act like point lights. Otherwise, lights attached to entities with meshes
+ * will act like mesh lights.                                                  
 */
 class Light : public StaticFactory {
     friend class StaticFactory;
diff --git a/include/nvisii/mesh.h b/include/nvisii/mesh.h
index 6bb73cca..91980cc4 100644
--- a/include/nvisii/mesh.h
+++ b/include/nvisii/mesh.h
@@ -699,6 +699,8 @@ class Mesh : public StaticFactory
          * @param position_dimensions The number of floats per position. Valid numbers are 3 or 4.
          * @param normals A list of vertex normals. If indices aren't supplied, this must be a multiple of 3.
          * @param normal_dimensions The number of floats per normal. Valid numbers are 3 or 4.
+         * @param tangents A list of vertex tangents. If indices aren't supplied, this must be a multiple of 3.
+         * @param tangent_dimensions The number of floats per tangent. Valid numbers are 3 or 4.
          * @param colors A list of per-vertex colors. If indices aren't supplied, this must be a multiple of 3.
          * @param color_dimensions The number of floats per color. Valid numbers are 3 or 4.
          * @param texcoords A list of 2D per-vertex texture coordinates. If indices aren't supplied, this must be a multiple of 3.
@@ -712,6 +714,8 @@ class Mesh : public StaticFactory
             uint32_t position_dimensions = 3,
             std::vector<float> normals = std::vector<float>(), 
             uint32_t normal_dimensions = 3, 
+            std::vector<float> tangents = std::vector<float>(), 
+            uint32_t tangent_dimensions = 3, 
             std::vector<float> colors = std::vector<float>(), 
             uint32_t color_dimensions = 4, 
             std::vector<float> texcoords = std::vector<float>(), 
@@ -843,8 +847,8 @@ class Mesh : public StaticFactory
         // /* TODO: Explain this */
         // void save_tetrahedralization(float quality_bound, float maximum_volume);
 
-        /** @returns the last computed mesh centroid. */
-        glm::vec3 getCentroid();
+        /** @returns the average of the vertices of the mesh, which will lay roughly in the center. */
+        glm::vec3 getCenter();
 
         /** @returns the minimum axis aligned bounding box position */
         glm::vec3 getMinAabbCorner();
@@ -1012,6 +1016,8 @@ class Mesh : public StaticFactory
             uint32_t position_dimensions,
             std::vector<float> &normals_,
             uint32_t normal_dimensions, 
+            std::vector<float> &tangents_,
+            uint32_t tangent_dimensions, 
             std::vector<float> &colors_, 
             uint32_t color_dimensions,
             std::vector<float> &texcoords_, 
diff --git a/include/nvisii/nvisii.h b/include/nvisii/nvisii.h
index e7126249..635ad212 100644
--- a/include/nvisii/nvisii.h
+++ b/include/nvisii/nvisii.h
@@ -207,7 +207,7 @@ void setDirectLightingClamp(float clamp);
  */ 
 void setMaxBounceDepth(
   uint32_t diffuse_depth = 2,
-  uint32_t glossy_depth = 2,
+  uint32_t glossy_depth = 8,
   uint32_t transparency_depth = 8,
   uint32_t transmission_depth = 12,
   uint32_t volume_depth = 2
@@ -300,10 +300,21 @@ void renderToFile(uint32_t width, uint32_t height, uint32_t samples_per_pixel, s
  * @param bounce The number of bounces required to reach the vertex whose metadata result should come from. A value of 0
  * would save data for objects directly visible to the camera, a value of 1 would save reflections/refractions, etc.
  * @param options Indicates the data to return. Current possible values include 
- * "none" for rendering out raw path traced data, "depth" to render the distance between the previous path vertex to the current one,
- * "position" for rendering out the world space position of the path vertex, "normal" for rendering out the world space normal of the 
- * path vertex, "entity_id" for rendering out the entity ID whose surface the path vertex hit, "denoise_normal" for rendering out
- * the normal buffer supplied to the Optix denoiser, and "denoise_albedo" for rendering out the albedo supplied to the Optix denoiser.   
+ * "none" for rendering out raw path traced data, 
+ * "depth" to render the distance between the previous path vertex to the current one,
+ * "ray_direction" to render the direction that the ray was traced in world space,
+ * "position" for rendering out the world space position of the path vertex, 
+ * "normal" for rendering out the world space normal of the path vertex, 
+ * "tangent" for rendering out the world space tangent of the path vertex, 
+ * "entity_id" for rendering out the entity ID whose surface the path vertex hit, 
+ * "base_color" for rendering out the surface base color, 
+ * "texture_coordinates" for rendering out the texture coordinates of the hit surface, 
+ * "screen_space_normal" for rendering out the normals of the hit surface in screen space, 
+ * "diffuse_motion_vectors" for rendering out screen space motion vectors for moving objects, 
+ * "denoise_normal" for rendering out the normal buffer supplied to the Optix denoiser, 
+ * "denoise_albedo" for rendering out the albedo supplied to the Optix denoiser,
+ * "heatmap" for rendering out the time it takes to render out each pixel,
+ * "device_id" for determining which GPU was used to render what pixel.
  * @param seed A seed used to initialize the random number generator.
 */
 std::vector<float> renderData(
@@ -319,10 +330,20 @@ std::vector<float> renderData(
  * @param bounce The number of bounces required to reach the vertex whose metadata result should come from. A value of 0
  * would save data for objects directly visible to the camera, a value of 1 would save reflections/refractions, etc.
  * @param options Indicates the data to return. Current possible values include 
- * "none" for rendering out raw path traced data, "depth" to render the distance between the previous path vertex to the current one,
- * "position" for rendering out the world space position of the path vertex, "normal" for rendering out the world space normal of the 
- * path vertex, "entity_id" for rendering out the entity ID whose surface the path vertex hit, "denoise_normal" for rendering out
- * the normal buffer supplied to the Optix denoiser, and "denoise_albedo" for rendering out the albedo supplied to the Optix denoiser.   
+ * "none" for rendering out raw path traced data, 
+ * "depth" to render the distance between the previous path vertex to the current one,
+ * "ray_direction" to render the direction that the ray was traced in world space,
+ * "position" for rendering out the world space position of the path vertex, 
+ * "normal" for rendering out the world space normal of the path vertex, 
+ * "entity_id" for rendering out the entity ID whose surface the path vertex hit, 
+ * "base_color" for rendering out the surface base color, 
+ * "texture_coordinates" for rendering out the texture coordinates of the hit surface, 
+ * "screen_space_normal" for rendering out the normals of the hit surface in screen space, 
+ * "diffuse_motion_vectors" for rendering out screen space motion vectors for moving objects, 
+ * "denoise_normal" for rendering out the normal buffer supplied to the Optix denoiser, 
+ * "denoise_albedo" for rendering out the albedo supplied to the Optix denoiser,
+ * "heatmap" for rendering out the time it takes to render out each pixel,
+ * "device_id" for determining which GPU was used to render what pixel.
  * @param file_path The path to use to save the file, including the extension. Supported extensions are EXR, HDR, and PNG
  * @param seed A seed used to initialize the random number generator.
 */
@@ -358,11 +379,11 @@ struct Scene {
  * 
  * @param filepath The path for the file to load
  * @param position A change in position to apply to all entities generated by this function
- * @param position A change in scale to apply to all entities generated by this function
- * @param position A change in rotation to apply to all entities generated by this function
+ * @param scale A change in scale to apply to all entities generated by this function
+ * @param rotation A change in rotation to apply to all entities generated by this function
  * @param args A list of optional arguments that can effect the importer. 
  * Possible options include: 
- * "verbose" - print out information related to loading the scene.
+ * "verbose" - print out information related to loading the scene. Useful for debugging!
 */
 Scene importScene(
         std::string file_path,
diff --git a/include/nvisii/texture.h b/include/nvisii/texture.h
index 107cb231..6bb64faa 100644
--- a/include/nvisii/texture.h
+++ b/include/nvisii/texture.h
@@ -209,6 +209,9 @@ class Texture : public StaticFactory
 	/** @returns True if the texture is represented linearly. Otherwise, the texture is in sRGB space */
     bool isLinear();
 
+	/** @param is_linear If True, texels will be interpreted as linear space. Otherwise, the texels will be interpreed as sRGB space */
+    void setLinear(bool is_linear);
+
   private:
   	/* TODO */
 	static std::shared_ptr<std::recursive_mutex> editMutex;
diff --git a/include/nvisii/volume.h b/include/nvisii/volume.h
index d71236d3..7ceddeb8 100644
--- a/include/nvisii/volume.h
+++ b/include/nvisii/volume.h
@@ -78,8 +78,15 @@ class Volume : public StaticFactory
 	 * is 0 and inactive, the interior is active with values varying
 	 * smoothly from 0 at the surface of the box to 1 at the half width
 	 * and interior of the box.
+	 * @param name The name of the volume to create.
+	 * @param size The width, height, and depth of the box in local units.
+	 * @param center The center of the box in local units
+	 * @param half_width The half-width of the narrow band in voxel units
 	 */
-	static Volume *createBox(std::string name);
+	static Volume *createBox(std::string name, 
+		glm::vec3 size = glm::vec3(100.f), 
+		glm::vec3 center = glm::vec3(0.f), 
+		float half_width = 3.f);
 	
 	/**
 	 * Creates a sparse fog volume of an octahedron such that the exterior
diff --git a/src/nvisii/devicecode/buffer.h b/src/nvisii/devicecode/buffer.h
index 5464326e..8354d66f 100644
--- a/src/nvisii/devicecode/buffer.h
+++ b/src/nvisii/devicecode/buffer.h
@@ -35,4 +35,4 @@ class Buffer : public owl::device::Buffer
 #define GET(RETURN, TYPE, BUFFER, ADDRESS) \
 if (BUFFER.data == nullptr) {::printf("Device Side Error on Line %d: buffer was nullptr.\n", __LINE__); asm("trap;");} \
 if (ADDRESS >= BUFFER.count) {::printf("Device Side Error on Line %d: out of bounds access (address: %d, size %d).\n", __LINE__, ADDRESS, uint32_t(BUFFER.count)); asm("trap;");} \
-RETURN = ((TYPE*)BUFFER.data)[ADDRESS];\
+RETURN = ((TYPE*)BUFFER.data)[ADDRESS];
diff --git a/src/nvisii/devicecode/launch_params.h b/src/nvisii/devicecode/launch_params.h
index ea2c2d51..5da1324b 100644
--- a/src/nvisii/devicecode/launch_params.h
+++ b/src/nvisii/devicecode/launch_params.h
@@ -19,16 +19,17 @@
 #include "./buffer.h"
 
 struct LaunchParams {
+    Buffer<float> assignmentBuffer;
+
     glm::ivec2 frameSize;
     uint64_t frameID = 0;
     glm::vec4 *frameBuffer;
-    glm::vec4 *albedoBuffer;
+    uchar4 *albedoBuffer;
     glm::vec4 *normalBuffer;
     glm::vec4 *scratchBuffer;
     glm::vec4 *mvecBuffer;
     glm::vec4 *accumPtr;
-    OptixTraversableHandle surfacesIAS;
-    OptixTraversableHandle volumesIAS;
+    OptixTraversableHandle IAS;
     float domeLightIntensity = 1.f;
     float domeLightExposure = 0.f;
     glm::vec3 domeLightColor = glm::vec3(-1.f);
@@ -58,8 +59,7 @@ struct LaunchParams {
     Buffer<TextureStruct> textures;
     Buffer<VolumeStruct> volumes;
     Buffer<uint32_t> lightEntities;
-    Buffer<uint32_t> surfaceInstanceToEntity;
-    Buffer<uint32_t> volumeInstanceToEntity;
+    Buffer<uint32_t> instanceToEntity;
     uint32_t         numLightEntities = 0;
 
     Buffer<Buffer<float3>> vertexLists;
@@ -111,7 +111,9 @@ enum RenderDataFlags : uint32_t {
   TRANSMISSION_INDIRECT_LIGHTING = 17,
   RAY_DIRECTION = 18,
   HEATMAP = 19,
-  TEXTURE_COORDINATES = 20
+  TEXTURE_COORDINATES = 20,
+  DEVICE_ID = 21,
+  TANGENT = 22
 };
 
 #define MAX_LIGHT_SAMPLES 10
diff --git a/src/nvisii/devicecode/path_tracer.cu b/src/nvisii/devicecode/path_tracer.cu
index ed8344db..4b5c896a 100644
--- a/src/nvisii/devicecode/path_tracer.cu
+++ b/src/nvisii/devicecode/path_tracer.cu
@@ -244,217 +244,39 @@ void SampleDeltaTracking(
     }
 }
 
-// bool debug = (prd.primitiveID == -2);
-// if (debug) {
-//     if (!  ((mn[0] < x[0]) && (x[0] < mx[0]) && 
-//             (mn[1] < x[1]) && (x[1] < mx[1]) && 
-//             (mn[2] < x[2]) && (x[2] < mx[2]))
-//     ) {
-//         printf("X");
-//     } else {
-//         printf("O");
-//     }
-// }
-// if (debug) {
-//     printf("\n");
-// }
-
 OPTIX_CLOSEST_HIT_PROGRAM(VolumeMesh)()
 {   
     auto &LP = optixLaunchParams;
     RayPayload &prd = owl::getPRD<RayPayload>();
-    const auto &self = owl::getProgramData<VolumeGeomData>();
-    LCGRand rng = prd.rng;
-
-    // Load the volume we hit
-    GET(VolumeStruct volume, VolumeStruct, LP.volumes, self.volumeID);
-    uint8_t *hdl = (uint8_t*)LP.volumeHandles.get(self.volumeID, __LINE__).data;
-    const auto grid = reinterpret_cast<const nanovdb::FloatGrid*>(hdl);
-    const auto& tree = grid->tree();
-    auto acc = tree.getAccessor();
-
-    auto bbox = acc.root().bbox();    
-    auto mx = bbox.max();
-    auto mn = bbox.min();
-    glm::vec3 offset = glm::vec3(mn[0], mn[1], mn[2]) + 
-                (glm::vec3(mx[0], mx[1], mx[2]) - 
-                glm::vec3(mn[0], mn[1], mn[2])) * .5f;
-
-    float majorant_extinction = acc.root().valueMax();
-    float gradient_factor = volume.gradient_factor;
-    float linear_attenuation_unit = volume.scale;
-    float absorption = volume.absorption;
-    float scattering = volume.scattering;
-
-    vec3 x = make_vec3(prd.objectSpaceRayOrigin) + offset;
-    vec3 w = make_vec3(prd.objectSpaceRayDirection);
-
-    linear_attenuation_unit /= length(w);
-
-    // Move ray to volume boundary
-    float t0 = prd.t0, t1 = prd.t1;
-    x = x + t0 * w;
-    t1 = t1 - t0;
-    t0 = 0.f;
-
-    // Sample the free path distance to see if our ray makes it to the boundary
-    float t;
-    int event;
-    bool hitVolume = false;
-    #define MAX_NULL_COLLISIONS 10000
-    for (int dti = 0; dti < MAX_NULL_COLLISIONS; ++dti) {
-        SampleDeltaTracking(rng, acc, majorant_extinction, linear_attenuation_unit, 
-            absorption, scattering, x, w, t1, t, event);
-        x = x + t * w;
-
-        // The boundary was hit
-        if (event == 0) {
-            break;
-        }
-
-        // An absorption / emission event occurred
-        if (event == 1) {
-            hitVolume = true;
-            break;
-        }
-
-        // A scattering event occurred
-        if (event == 2) {
-            hitVolume = true;
-            break;
-        }
+    optixGetObjectToWorldTransformMatrix(prd.localToWorld);
 
-        // A null collision occurred.
-        if (event == 3) {
-            // update boundary in relation to the new collision x, w does not change.
-            t1 = t1 - t;
-        }
-    }
+    // If we don't need motion vectors, (or in the future if an object 
+    // doesn't have motion blur) then return.
+    if (LP.renderDataMode == RenderDataFlags::NONE) return;
 
-    if (!hitVolume) {
-        prd.tHit = -1.f;
-    }
-    else {
-        prd.instanceID = optixGetInstanceIndex();
-        prd.eventID = event;
-        prd.tHit = t;
-
-        auto sampler = nanovdb::SampleFromVoxels<nanovdb::DefaultReadAccessor<float>, /*Interpolation Degree*/1, /*UseCache*/false>(acc);
-        auto coord_pos = nanovdb::Coord::Floor( nanovdb::Vec3f(x.x, x.y, x.z) );
-        float densityValue = acc.getValue(coord_pos);
-        auto g = sampler.gradient(nanovdb::Vec3f(x.x, x.y, x.z)); 
-
-        prd.mp = make_float3(x - offset); // not super confident about this offset...
-        prd.gradient = make_float3(g[0], g[1], g[2]);// TEMPORARY FOR BUNNY
-        prd.density = densityValue;
-        optixGetObjectToWorldTransformMatrix(prd.localToWorld);
-    
-        // If we don't need motion vectors, (or in the future if an object 
-        // doesn't have motion blur) then return.
-        if (LP.renderDataMode == RenderDataFlags::NONE) return;
+    OptixTraversableHandle handle = optixGetTransformListHandle(prd.instanceID);
+    float4 trf00, trf01, trf02;
+    float4 trf10, trf11, trf12;
     
-        OptixTraversableHandle handle = optixGetTransformListHandle(prd.instanceID);
-        float4 trf00, trf01, trf02;
-        float4 trf10, trf11, trf12;
-        
-        optix_impl::optixGetInterpolatedTransformationFromHandle( trf00, trf01, trf02, handle, /* time */ 0.f, true );
-        optix_impl::optixGetInterpolatedTransformationFromHandle( trf10, trf11, trf12, handle, /* time */ 1.f, true );
-        memcpy(&prd.localToWorldT0[0], &trf00, sizeof(trf00));
-        memcpy(&prd.localToWorldT0[4], &trf01, sizeof(trf01));
-        memcpy(&prd.localToWorldT0[8], &trf02, sizeof(trf02));
-        memcpy(&prd.localToWorldT1[0], &trf10, sizeof(trf10));
-        memcpy(&prd.localToWorldT1[4], &trf11, sizeof(trf11));
-        memcpy(&prd.localToWorldT1[8], &trf12, sizeof(trf12));
-    }
+    optix_impl::optixGetInterpolatedTransformationFromHandle( trf00, trf01, trf02, handle, /* time */ 0.f, true );
+    optix_impl::optixGetInterpolatedTransformationFromHandle( trf10, trf11, trf12, handle, /* time */ 1.f, true );
+    memcpy(&prd.localToWorldT0[0], &trf00, sizeof(trf00));
+    memcpy(&prd.localToWorldT0[4], &trf01, sizeof(trf01));
+    memcpy(&prd.localToWorldT0[8], &trf02, sizeof(trf02));
+    memcpy(&prd.localToWorldT1[0], &trf10, sizeof(trf10));
+    memcpy(&prd.localToWorldT1[4], &trf11, sizeof(trf11));
+    memcpy(&prd.localToWorldT1[8], &trf12, sizeof(trf12));
 }
 
 OPTIX_CLOSEST_HIT_PROGRAM(VolumeShadowRay)()
 {
-    auto &LP = optixLaunchParams;
-    const auto &self = owl::getProgramData<VolumeGeomData>();
-    RayPayload &prd = owl::getPRD<RayPayload>();
-    LCGRand rng = prd.rng;
-
-    GET(VolumeStruct volume, VolumeStruct, LP.volumes, self.volumeID);
-    uint8_t *hdl = (uint8_t*)LP.volumeHandles.get(self.volumeID, __LINE__).data;
-    const auto grid = reinterpret_cast<const nanovdb::FloatGrid*>(hdl);
-    const auto& tree = grid->tree();
-    auto acc = tree.getAccessor();
-
-    auto bbox = acc.root().bbox();    
-    auto mx = bbox.max();
-    auto mn = bbox.min();
-    glm::vec3 offset = glm::vec3(mn[0], mn[1], mn[2]) + 
-                (glm::vec3(mx[0], mx[1], mx[2]) - 
-                glm::vec3(mn[0], mn[1], mn[2])) * .5f;
-
-    float majorant_extinction = acc.root().valueMax();
-    float gradient_factor = volume.gradient_factor;
-    float linear_attenuation_unit = volume.scale;
-    float absorption = volume.absorption;
-    float scattering = volume.scattering;
-
-    vec3 x = make_vec3(prd.objectSpaceRayOrigin) + offset;
-    vec3 w = make_vec3(prd.objectSpaceRayDirection);
-
-    linear_attenuation_unit /= length(w);
-
-    // Move ray to volume boundary
-    float t0 = prd.t0, t1 = prd.t1;
-    x = x + t0 * w;
-    t1 = t1 - t0;
-    t0 = 0.f;
-
-    // Sample the free path distance to see if our ray makes it to the boundary
-    float t;
-    int event;
-    bool hitVolume = false;
-    #define MAX_NULL_COLLISIONS 10000
-    for (int dti = 0; dti < MAX_NULL_COLLISIONS; ++dti) {
-        SampleDeltaTracking(rng, acc, majorant_extinction, linear_attenuation_unit, 
-            absorption, scattering, x, w, t1, t, event);
-        x = x + t * w;
-
-        // The boundary was hit
-        if (event == 0) {
-            break;
-        }
-
-        // An absorption / emission event occurred
-        if (event == 1) {
-            hitVolume = true;
-            break;
-        }
-
-        // A scattering event occurred
-        if (event == 2) {
-            hitVolume = true;
-            break;
-        }
-
-        // A null collision occurred.
-        if (event == 3) {
-            // update boundary in relation to the new collision x, w does not change.
-            t1 = t1 - t;
-        }
-    }
-
-    if (!hitVolume) {
-        prd.tHit = -1.f;
-    }
-    else {
-        prd.instanceID = optixGetInstanceIndex();
-        prd.eventID = event;
-        prd.tHit = t;
-    }
 }
 
 OPTIX_INTERSECT_PROGRAM(VolumeIntersection)()
 {
-    // float old_tmax      = optixGetRayTmax();
-
-    // const int primID = optixGetPrimitiveIndex();
+    auto &LP = optixLaunchParams;
     const auto &self = owl::getProgramData<VolumeGeomData>();
+    RayPayload &prd = owl::getPRD<RayPayload>();
     float3 origin = optixGetObjectRayOrigin();
 
     // note, this is _not_ normalized. Useful for computing world space tmin/mmax
@@ -492,12 +314,78 @@ OPTIX_INTERSECT_PROGRAM(VolumeIntersection)()
     // clip hit to near position
     thit0 = max(thit0, optixGetRayTmin());
 
-    RayPayload &prd = owl::getPRD<RayPayload>();
-    if (optixReportIntersection(thit0, /* hit kind */ 0)) {
-        prd.objectSpaceRayOrigin = origin;
-        prd.objectSpaceRayDirection = direction;
-        prd.t0 = max(prd.t0, thit0);
-        prd.t1 = min(prd.t1, thit1);
+    // Load the volume we hit
+    GET(VolumeStruct volume, VolumeStruct, LP.volumes, self.volumeID);
+    uint8_t *hdl = (uint8_t*)LP.volumeHandles.get(self.volumeID, __LINE__).data;
+    const auto grid = reinterpret_cast<const nanovdb::FloatGrid*>(hdl);
+    const auto& tree = grid->tree();
+    auto acc = tree.getAccessor();
+    auto nvdbSampler = nanovdb::SampleFromVoxels<nanovdb::DefaultReadAccessor<float>, 
+        /*Interpolation Degree*/1, /*UseCache*/false>(acc);
+
+    float majorant_extinction = acc.root().valueMax();
+    float gradient_factor = volume.gradient_factor;
+    float linear_attenuation_unit = volume.scale;
+    float absorption = volume.absorption;
+    float scattering = volume.scattering;
+
+    auto bbox = acc.root().bbox();    
+    auto mx = bbox.max();
+    auto mn = bbox.min();
+    float3 offset = make_float3(glm::vec3(mn[0], mn[1], mn[2]) + 
+                (glm::vec3(mx[0], mx[1], mx[2]) - 
+                glm::vec3(mn[0], mn[1], mn[2])) * .5f);
+
+    // Sample the free path distance to see if our ray makes it to the boundary
+    float t = thit0;
+    int event;
+    bool hitVolume = false;
+    float unit = volume.scale / length(direction);
+    #define MAX_NULL_COLLISIONS 1000
+    for (int i = 0; i < MAX_NULL_COLLISIONS; ++i) {
+        // Sample a distance
+        t = t - (log(1.0f - lcg_randomf(prd.rng)) / majorant_extinction) * unit; 
+
+        // A boundary has been hit, no intersection
+        if (t >= thit1) return;
+
+        // Update current position
+        float3 x = offset + origin + t * direction;
+
+        // Sample heterogeneous media
+        float densityValue = nvdbSampler(nanovdb::Vec3f(x.x, x.y, x.z));
+
+        float a = densityValue * absorption;
+        float s = densityValue * scattering;
+        float e = a + s;
+        float n = majorant_extinction - e;
+
+        a = a / majorant_extinction;
+        s = s / majorant_extinction;
+        n = n / majorant_extinction;
+
+        float event = lcg_randomf(prd.rng);
+        // An absorption/emission collision occured
+        if (event < (a + s)) {
+            if (optixReportIntersection(t, /* hit kind */ 0)) {
+                auto g = nvdbSampler.gradient(nanovdb::Vec3f(x.x, x.y, x.z)); 
+                prd.objectSpaceRayOrigin = origin;
+                prd.objectSpaceRayDirection = direction;
+                prd.eventID = (event < a) ? 1 : 2;
+                prd.instanceID = optixGetInstanceIndex();
+                prd.tHit = t;
+                prd.mp = x - offset; // not super confident about this offset...
+                prd.gradient = make_float3(g[0], g[1], g[2]);// TEMPORARY FOR BUNNY
+                prd.density = densityValue;
+            }
+            return;
+        }
+
+        // A null collision occurred
+        else {
+            event = 3;
+            continue;    	
+        }
     }
 }
 
@@ -644,7 +532,7 @@ float sampleTime(float xi) {
 }
 
 inline __device__
-owl::Ray generateRay(const CameraStruct &camera, const TransformStruct &transform, ivec2 pixelID, ivec2 frameSize, LCGRand &rng, float time)
+owl::Ray generateRay(const CameraStruct &camera, const TransformStruct &transform, int2 pixelID, float2 frameSize, LCGRand &rng, float time)
 {
     auto &LP = optixLaunchParams;
     /* Generate camera rays */    
@@ -665,7 +553,7 @@ owl::Ray generateRay(const CameraStruct &camera, const TransformStruct &transfor
             -  vec2(LP.xPixelSamplingInterval[0], LP.yPixelSamplingInterval[0])
             ) * vec2(lcg_randomf(rng),lcg_randomf(rng));
 
-    vec2 inUV = (vec2(pixelID.x, pixelID.y) + aa) / vec2(frameSize);
+    vec2 inUV = (vec2(pixelID.x, pixelID.y) + aa) / make_vec2(frameSize);
     vec3 right = normalize(glm::column(viewinv, 0));
     vec3 up = normalize(glm::column(viewinv, 1));
     vec3 origin = glm::column(viewinv, 3);
@@ -704,16 +592,19 @@ void initializeRenderData(float3 &renderData)
     auto &LP = optixLaunchParams;
     // these might change in the future...
     if (LP.renderDataMode == RenderDataFlags::NONE) {
-        renderData = make_float3(FLT_MAX);
+        renderData = make_float3(0.0f);
     }
     else if (LP.renderDataMode == RenderDataFlags::DEPTH) {
-        renderData = make_float3(FLT_MAX);
+        renderData = make_float3(-FLT_MAX);
     }
     else if (LP.renderDataMode == RenderDataFlags::POSITION) {
-        renderData = make_float3(FLT_MAX);
+        renderData = make_float3(-FLT_MAX);
     }
     else if (LP.renderDataMode == RenderDataFlags::NORMAL) {
-        renderData = make_float3(FLT_MAX);
+        renderData = make_float3(0.0f);
+    }
+    else if (LP.renderDataMode == RenderDataFlags::TANGENT) {
+        renderData = make_float3(0.0f);
     }
     else if (LP.renderDataMode == RenderDataFlags::SCREEN_SPACE_NORMAL) {
         renderData = make_float3(0.0f);
@@ -811,7 +702,7 @@ __device__
 void saveGeometricRenderData(
     float3 &renderData, 
     int bounce, float depth, 
-    float3 w_p, float3 w_n, float3 w_o, float2 uv, 
+    float3 w_p, float3 w_n, float3 w_x, float3 w_o, float2 uv, 
     int entity_id, float3 diffuse_mvec, float time,
     DisneyMaterial &mat)
 {
@@ -828,6 +719,9 @@ void saveGeometricRenderData(
     else if (LP.renderDataMode == RenderDataFlags::NORMAL) {
         renderData = w_n;
     }
+    else if (LP.renderDataMode == RenderDataFlags::TANGENT) {
+        renderData = w_x;
+    }
     else if (LP.renderDataMode == RenderDataFlags::SCREEN_SPACE_NORMAL) {
         glm::quat r0 = glm::quat_cast(LP.viewT0);
         glm::quat r1 = glm::quat_cast(LP.viewT1);
@@ -872,6 +766,18 @@ void saveHeatmapRenderData(
     renderData = make_float3(relClock);
 }
 
+__device__
+void saveDeviceAssignment(
+    float3 &renderData, 
+    int bounce,
+    uint32_t deviceIndex
+)
+{
+    auto &LP = optixLaunchParams;
+    if (LP.renderDataMode != RenderDataFlags::DEVICE_ID) return;
+    renderData = make_float3(deviceIndex);
+}
+
 __device__
 bool debugging() {
     #ifndef DEBUGGING
@@ -885,23 +791,28 @@ bool debugging() {
 OPTIX_RAYGEN_PROGRAM(rayGen)()
 {
     const RayGenData &self = owl::getProgramData<RayGenData>();
-    cudaTextureObject_t envTex = getEnvironmentTexture();
-    
     auto &LP = optixLaunchParams;
     auto launchIndex = optixGetLaunchIndex().x;
     auto launchDim = optixGetLaunchDimensions().x;
-    auto pixelID = ivec2(launchIndex % LP.frameSize.x, launchIndex / LP.frameSize.x);
-    bool debug = (pixelID.x == int(LP.frameSize.x / 2) && pixelID.y == int(LP.frameSize.y / 2));
+    auto pixelID = make_int2(launchIndex % LP.frameSize.x, launchIndex / LP.frameSize.x);
 
-    float tmax = 1e20f; //todo: customize depending on scene bounds //glm::distance(LP.sceneBBMin, LP.sceneBBMax);
+    // Terminate thread if current pixel not assigned to this device
+    GET(float start, float, LP.assignmentBuffer, self.deviceIndex);
+    GET(float stop, float, LP.assignmentBuffer, self.deviceIndex + 1);
+    start *= (LP.frameSize.x * LP.frameSize.y);
+    stop *= (LP.frameSize.x * LP.frameSize.y);
 
-    /* compute who is repsonible for a given group of pixels */
-    /* and if it's not us, just return. */
-    /* (some other device will compute these pixels) */
-    int deviceThatIsResponsible = (pixelID.x>>5) % self.deviceCount;
-    if (self.deviceIndex != deviceThatIsResponsible) {
-        return;
-    }
+    // if (launchIndex == 0) {
+    //     printf("device %d start %f stop %f\n", self.deviceIndex, start, stop);
+    // }
+
+    if( pixelID.x > LP.frameSize.x-1 || pixelID.y > LP.frameSize.y-1 ) return;
+    if( (launchIndex < start) || (stop <= launchIndex) ) return;
+    // if (self.deviceIndex == 1) return;
+    
+    cudaTextureObject_t envTex = getEnvironmentTexture();    
+    bool debug = (pixelID.x == int(LP.frameSize.x / 2) && pixelID.y == int(LP.frameSize.y / 2));
+    float tmax = 1e20f; //todo: customize depending on scene bounds //glm::distance(LP.sceneBBMin, LP.sceneBBMax);
 
     auto dims = ivec2(LP.frameSize.x, LP.frameSize.x);
     uint64_t start_clock = clock();
@@ -913,7 +824,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
     float time = sampleTime(lcg_randomf(rng));
 
     // If no camera is in use, just display some random noise...
-    owl::Ray surfRay;
+    owl::Ray ray;
     EntityStruct    camera_entity;
     TransformStruct camera_transform;
     CameraStruct    camera;
@@ -924,8 +835,8 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
     }
     
     // Trace an initial ray through the scene
-    surfRay = generateRay(camera, camera_transform, pixelID, LP.frameSize, rng, time);
-    surfRay.tmax = tmax;
+    ray = generateRay(camera, camera_transform, pixelID, make_float2(LP.frameSize), rng, time);
+    ray.tmax = tmax;
 
     float3 accum_illum = make_float3(0.f);
     float3 pathThroughput = make_float3(1.f);
@@ -940,30 +851,20 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
     uint8_t transparencyDepth = 0;
     uint8_t transmissionDepth = 0;
     uint8_t volumeDepth = 0;
+    int sampledBsdf = -1;
+    bool useBRDF = true;
 
     // direct here is used for final image clamping
     float3 directIllum = make_float3(0.f);
     float3 illum = make_float3(0.f);
     
-    RayPayload surfPayload;
-    surfPayload.tHit = -1.f;
-    surfRay.time = time;    
-    owl::traceRay(  /*accel to trace against*/ LP.surfacesIAS,
-                    /*the ray to trace*/ surfRay,
-                    /*prd*/ surfPayload,
-                    OPTIX_RAY_FLAG_DISABLE_ANYHIT);
-
-    owl::Ray volRay = surfRay;
-    volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit;
-    RayPayload volPayload;
-    volPayload.tHit = -1.f;
-    volPayload.rng = rng;
-    volPayload.t0 = volRay.tmin;
-    volPayload.t1 = volRay.tmax;
-    volPayload.primitiveID = (debug) ? -2 : -1;
-    owl::traceRay(  /*accel to trace against*/ LP.volumesIAS,
-                    /*the ray to trace*/ volRay,
-                    /*prd*/ volPayload,
+    RayPayload payload;
+    payload.tHit = -1.f;
+    ray.time = time;
+    ray.visibilityMask = ENTITY_VISIBILITY_CAMERA_RAYS;
+    owl::traceRay(  /*accel to trace against*/ LP.IAS,
+                    /*the ray to trace*/ ray,
+                    /*prd*/ payload,
                     OPTIX_RAY_FLAG_DISABLE_ANYHIT);
     
     // Shade each hit point on a path using NEE with MIS
@@ -971,22 +872,22 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
         float alpha = 0.f;
         
         // If ray misses, terminate the ray
-        if ((surfPayload.tHit <= 0.f) && (volPayload.tHit <= 0.f)) {
+        if (payload.tHit <= 0.f) {
             // Compute lighting from environment
             if (depth == 0) {
-                float3 col = missColor(surfRay, envTex);
+                float3 col = missColor(ray, envTex);
                 illum = illum + pathThroughput * (col * LP.domeLightIntensity);
                 directIllum = illum;
                 primaryAlbedo = col;
             }
             else if (enableDomeSampling)
-                illum = illum + pathThroughput * (missColor(surfRay, envTex) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure));
+                illum = illum + pathThroughput * (missColor(ray, envTex) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure));
             
             const float envDist = 10000.0f; // large value
             /* Compute miss motion vector */
             float3 mvec;
             // Point far away
-            float3 pFar = surfRay.origin + surfRay.direction * envDist;
+            float3 pFar = ray.origin + ray.direction * envDist;
             // TODO: account for motion from rotating dome light
             vec4 tmp1 = LP.proj * LP.viewT0 * /*xfmt0 **/ make_vec4(pFar, 1.0f);
             float3 pt0 = make_float3(tmp1 / tmp1.w) * .5f;
@@ -997,64 +898,40 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
             break;
         }
 
-        bool isVolume = (volPayload.tHit >= 0.f);
 
         // Load the object we hit.
-        int entityID;
-        if (isVolume) { GET(entityID, int, LP.volumeInstanceToEntity, volPayload.instanceID); }
-        else { GET(entityID, int, LP.surfaceInstanceToEntity, surfPayload.instanceID); }
-
+        GET(int entityID, int, LP.instanceToEntity, payload.instanceID);
         GET(EntityStruct entity, EntityStruct, LP.entities, entityID);
         GET(TransformStruct transform, TransformStruct, LP.transforms, entity.transform_id);
+
+        bool isVolume = (entity.volume_id != -1);
         MeshStruct mesh;  
         VolumeStruct volume;  
         if (!isVolume) { GET(mesh, MeshStruct, LP.meshes, entity.mesh_id); }
         else { GET(volume, VolumeStruct, LP.volumes, entity.volume_id); }
         
-        // Skip forward if the hit object is invisible for this ray type, skip it.
-        if (((entity.flags & ENTITY_VISIBILITY_CAMERA_RAYS) == 0)) {
-            surfRay.origin = surfRay.origin + surfRay.direction * (surfPayload.tHit + EPSILON);
-            surfPayload.tHit = -1.f;
-            surfRay.time = time;
-            owl::traceRay( LP.surfacesIAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT);
-
-            volRay = surfRay;
-            volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit;
-            volPayload.tHit = -1.f;
-            volPayload.rng = rng;
-            volPayload.t0 = volRay.tmin;
-            volPayload.t1 = volRay.tmax;
-            volPayload.primitiveID = (debug) ? -3 : -1;
-            owl::traceRay( LP.volumesIAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT);
-            transparencyDepth++;
-            if (transparencyDepth > LP.maxTransparencyDepth) break;
-            continue;
-        }
-
         // Set new outgoing light direction and hit position.
-        const float3 w_o = -surfRay.direction;
-        float3 hit_p;
-        if (volPayload.tHit >= 0.f) hit_p = volRay.origin + volPayload.tHit * volRay.direction;
-        else hit_p = surfRay.origin + surfPayload.tHit * surfRay.direction;
+        const float3 w_o = -ray.direction;
+        float3 hit_p = ray.origin + payload.tHit * ray.direction;
 
         // Load geometry data for the hit object
         float3 mp, p, v_x, v_y, v_z, v_gz, v_bz; 
         float2 uv; 
         float3 diffuseMotion;
-        if (volPayload.tHit >= 0.f) {
+        if (isVolume) {
             v_x = v_y = make_float3(0.f); // Perhaps I could use divergence / curl here?
-            v_z = v_gz = normalize(volPayload.gradient);
+            v_z = v_gz = normalize(payload.gradient);
             if (any(isnan(make_vec3(v_z)))) v_z = v_gz = make_float3(0.f);
-            mp = volPayload.mp;
-            uv = make_float2(volPayload.density, length(volPayload.gradient));
+            mp = payload.mp;
+            uv = make_float2(payload.density, length(payload.gradient));
         }
         else {
             int3 indices;
-            loadMeshTriIndices(entity.mesh_id, mesh.numTris, surfPayload.primitiveID, indices);
-            loadMeshVertexData(entity.mesh_id, mesh.numVerts, indices, surfPayload.barycentrics, mp, v_gz);
-            loadMeshUVData(entity.mesh_id, mesh.numVerts, indices, surfPayload.barycentrics, uv);
-            loadMeshNormalData(entity.mesh_id, mesh.numVerts, indices, surfPayload.barycentrics, v_z);
-            loadMeshTangentData(entity.mesh_id, mesh.numVerts, indices, surfPayload.barycentrics, v_x);
+            loadMeshTriIndices(entity.mesh_id, mesh.numTris, payload.primitiveID, indices);
+            loadMeshVertexData(entity.mesh_id, mesh.numVerts, indices, payload.barycentrics, mp, v_gz);
+            loadMeshUVData(entity.mesh_id, mesh.numVerts, indices, payload.barycentrics, uv);
+            loadMeshNormalData(entity.mesh_id, mesh.numVerts, indices, payload.barycentrics, v_z);
+            loadMeshTangentData(entity.mesh_id, mesh.numVerts, indices, payload.barycentrics, v_x);
         }
 
         // Load material data for the hit object
@@ -1066,7 +943,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
       
         // Transform geometry data into world space
         {
-            glm::mat4 xfm = to_mat4((volPayload.tHit >= 0.f) ? volPayload.localToWorld : surfPayload.localToWorld);
+            glm::mat4 xfm = to_mat4(payload.localToWorld);
             p = make_float3(xfm * make_vec4(mp, 1.0f));
             hit_p = p;
             glm::mat3 nxfm = transpose(glm::inverse(glm::mat3(xfm)));
@@ -1074,11 +951,11 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
             v_z = make_float3(normalize(nxfm * make_vec3(v_z)));
             v_x = make_float3(normalize(nxfm * make_vec3(v_x)));
             v_y = cross(v_z, v_x);
-            v_x = cross(v_y, v_z);
+            // v_x = cross(v_y, v_z);
 
             if (LP.renderDataMode != RenderDataFlags::NONE) {
-                glm::mat4 xfmt0 = to_mat4((volPayload.tHit >= 0.f) ? volPayload.localToWorldT0 : surfPayload.localToWorldT0);
-                glm::mat4 xfmt1 = to_mat4((volPayload.tHit >= 0.f) ? volPayload.localToWorldT1 : surfPayload.localToWorldT1);
+                glm::mat4 xfmt0 = to_mat4(payload.localToWorldT0);
+                glm::mat4 xfmt1 = to_mat4(payload.localToWorldT1);
                 vec4 tmp1 = LP.proj * LP.viewT0 * xfmt0 * make_vec4(mp, 1.0f);
                 vec4 tmp2 = LP.proj * LP.viewT1 * xfmt1 * make_vec4(mp, 1.0f);
                 float3 pt0 = make_float3(tmp1 / tmp1.w) * .5f;
@@ -1157,7 +1034,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
         }
 
         // For segmentations, save geometric metadata
-        saveGeometricRenderData(renderData, depth, surfPayload.tHit, hit_p, v_z, w_o, uv, entityID, diffuseMotion, time, mat);
+        saveGeometricRenderData(renderData, depth, payload.tHit, hit_p, v_z, v_x, w_o, uv, entityID, diffuseMotion, time, mat);
         if (depth == 0) {
             primaryAlbedo = mat.base_color;
             primaryNormal = v_z;
@@ -1168,20 +1045,11 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
             float alpha_rnd = lcg_randomf(rng);
 
             if (alpha_rnd > mat.alpha) {
-                surfRay.origin = surfRay.origin + surfRay.direction * (surfPayload.tHit + EPSILON);
-                surfPayload.tHit = -1.f;
-                surfRay.time = time;
-                owl::traceRay( LP.surfacesIAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT);
-
-                volRay = surfRay;
-                volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit;
-                volPayload.tHit = -1.f;
-                volPayload.rng = rng;
-                volPayload.t0 = volRay.tmin;
-                volPayload.t1 = volRay.tmax;
-                volPayload.primitiveID = (debug) ? -4 : -1;
-                owl::traceRay( LP.volumesIAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT);
-                
+                ray.origin = ray.origin + ray.direction * (payload.tHit + EPSILON);
+                payload.tHit = -1.f;
+                ray.time = time;
+                // ray.visibilityMask reuses the last visibility mask here
+                owl::traceRay( LP.IAS, ray, payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT);                
                 ++depth;     
                 transparencyDepth++;
                 continue;
@@ -1192,14 +1060,14 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
         // Note that NEE/MIS will also potentially terminate the path, preventing double-counting.
         // todo: account for volumetric emission here...
         if (entity.light_id >= 0 && entity.light_id < LP.lights.count) {
-            float dotNWi = max(dot(surfRay.direction, v_z), 0.f);
+            float dotNWi = max(dot(ray.direction, v_z), 0.f);
             if ((dotNWi > EPSILON) && (depth != 0)) break;
 
             GET(LightStruct entityLight, LightStruct, LP.lights, entity.light_id);
             float3 lightEmission;
             if (entityLight.color_texture_id == -1) lightEmission = make_float3(entityLight.r, entityLight.g, entityLight.b);
             else lightEmission = sampleTexture(entityLight.color_texture_id, uv, make_float3(0.f, 0.f, 0.f));
-            float dist = surfPayload.tHit;
+            float dist = payload.tHit;
             lightEmission = (lightEmission * entityLight.intensity);
             if (depth != 0) lightEmission = (lightEmission * pow(2.f, entityLight.exposure)) / max((dist * dist), 1.f);
             float3 contribution = pathThroughput * lightEmission;
@@ -1214,8 +1082,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
         float3 irradiance = make_float3(0.f);
 
         // If we hit a volume, use hybrid scattering to determine whether or not to use a BRDF or a phase function.
-        bool useBRDF = true;
-        if (volPayload.tHit >= 0.f) {
+        if (isVolume) {
             float opacity = mat.alpha; // would otherwise be sampled from a transfer function
             float grad_len = uv.y;
             float p_brdf = opacity * (1.f - exp(-25.f * pow(volume.gradient_factor, 3.f) * grad_len));
@@ -1232,7 +1099,6 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
         // First, sample the BRDF / phase function so that we can use the sampled direction for MIS
         float3 w_i;
         float bsdfPDF;
-        int sampledBsdf = -1;
         float3 bsdf;
         if (useBRDF) {
             sample_disney_brdf(
@@ -1240,7 +1106,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
                 w_i, bsdfPDF, sampledBsdf, bsdf);         // outputs
         } else {
             /* a scatter event occurred */
-            if (volPayload.eventID == 2) {
+            if (payload.eventID == 2) {
                 // currently isotropic. Todo: implement henyey greenstien...
                 float rand1 = lcg_randomf(rng);
                 float rand2 = lcg_randomf(rng);
@@ -1256,7 +1122,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
             } 
 
             /* An absorption / emission event occurred */ 
-            if (volPayload.eventID == 1) {
+            if (payload.eventID == 1) {
                 bsdfPDF = 1.f / (4.0 * M_PI);
                 bsdf = make_float3(1.f / (4.0 * M_PI));
                 w_i = -w_o;
@@ -1269,19 +1135,11 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
         // At this point, if we are refracting and we ran out of transmission bounces, skip forward.
         // This avoids creating black regions on glass objects due to bounce limits
         if (sampledBsdf == DISNEY_TRANSMISSION_BRDF && transmissionDepth >= LP.maxTransmissionDepth) {
-            surfRay.origin = surfRay.origin + surfRay.direction * (surfPayload.tHit + EPSILON);
-            surfPayload.tHit = -1.f;
-            surfRay.time = time;
-            owl::traceRay( LP.surfacesIAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT);
-
-            volRay = surfRay;
-            volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit;
-            volPayload.tHit = -1.f;
-            volPayload.rng = rng;
-            volPayload.t0 = volRay.tmin;
-            volPayload.t1 = volRay.tmax;
-            volPayload.primitiveID = (debug) ? -4 : -1;
-            owl::traceRay( LP.volumesIAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT);
+            ray.origin = ray.origin + ray.direction * (payload.tHit + EPSILON);
+            payload.tHit = -1.f;
+            ray.time = time;
+            // ray.visibilityMask reuses the last visibility mask here
+            owl::traceRay( LP.IAS, ray, payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT);
             
             // Count this as a "transparent" bounce.
             ++depth;     
@@ -1346,47 +1204,58 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
         // sample light sources
         else 
         {
+            // Sample the light to compute an incident light ray to this point
             if (numLights == 0) continue;
             GET( sampledLightID, int, LP.lightEntities, randomID );
             GET( EntityStruct light_entity, EntityStruct, LP.entities, sampledLightID );
             GET( LightStruct light_light, LightStruct, LP.lights, light_entity.light_id );
             GET( TransformStruct transform, TransformStruct, LP.transforms, light_entity.transform_id );
-            GET( MeshStruct mesh, MeshStruct, LP.meshes, light_entity.mesh_id );
-            uint32_t random_tri_id = uint32_t(min(lcg_randomf(rng) * mesh.numTris, float(mesh.numTris - 1)));
-            GET( Buffer<int3> indices, Buffer<int3>, LP.indexLists, light_entity.mesh_id );
-            GET( Buffer<float3> vertices, Buffer<float3>, LP.vertexLists, light_entity.mesh_id );
-            GET( Buffer<float4> normals, Buffer<float4>, LP.normalLists, light_entity.mesh_id );
-            GET( Buffer<float2> texCoords, Buffer<float2>, LP.texCoordLists, light_entity.mesh_id );
-            GET( int3 triIndex, int3, indices, random_tri_id );
-            
-            // Sample the light to compute an incident light ray to this point
+
             auto &ltw = transform.localToWorld;
             float3 dir; float2 uv;
             float3 pos = hit_p;
 
-            GET(float3 n1, float3, normals, triIndex.x );
-            GET(float3 n2, float3, normals, triIndex.y );
-            GET(float3 n3, float3, normals, triIndex.z );
-            GET(float3 v1, float3, vertices, triIndex.x );
-            GET(float3 v2, float3, vertices, triIndex.y );
-            GET(float3 v3, float3, vertices, triIndex.z );
-            GET(float2 uv1, float2, texCoords, triIndex.x );
-            GET(float2 uv2, float2, texCoords, triIndex.y );
-            GET(float2 uv3, float2, texCoords, triIndex.z );
-
-            // Might be a bug here with normal transform...
-            n1 = make_float3(ltw * make_float4(n1, 0.0f));
-            n2 = make_float3(ltw * make_float4(n2, 0.0f));
-            n3 = make_float3(ltw * make_float4(n3, 0.0f));
-            v1 = make_float3(ltw * make_float4(v1, 1.0f));
-            v2 = make_float3(ltw * make_float4(v2, 1.0f));
-            v3 = make_float3(ltw * make_float4(v3, 1.0f));
-            sampleTriangle(pos, n1, n2, n3, v1, v2, v3, uv1, uv2, uv3, 
-                lcg_randomf(rng), lcg_randomf(rng), dir, lightDistance, lightPDF, uv, 
-                /*double_sided*/ false, /*use surface area*/ light_light.use_surface_area);
+            // The sampled light is a point light
+            if ((light_entity.mesh_id < 0) || (light_entity.mesh_id >= LP.meshes.count)) {
+                numTris = 1.f;
+                float3 tmp = make_float3(ltw[3]) - pos;
+                lightDistance = length(tmp);
+                dir = tmp / lightDistance;
+                lightPDF = PdfAtoW(1.f/(4.f * M_PI), lightDistance * lightDistance, 1.f);
+                uv = make_float2(0.f, 0.f);
+            } 
+            // The sampled light is a mesh light
+            else {    
+                GET( MeshStruct mesh, MeshStruct, LP.meshes, light_entity.mesh_id );
+                uint32_t random_tri_id = uint32_t(min(lcg_randomf(rng) * mesh.numTris, float(mesh.numTris - 1)));
+                GET( Buffer<int3> indices, Buffer<int3>, LP.indexLists, light_entity.mesh_id );
+                GET( Buffer<float3> vertices, Buffer<float3>, LP.vertexLists, light_entity.mesh_id );
+                GET( Buffer<float4> normals, Buffer<float4>, LP.normalLists, light_entity.mesh_id );
+                GET( Buffer<float2> texCoords, Buffer<float2>, LP.texCoordLists, light_entity.mesh_id );
+                GET( int3 triIndex, int3, indices, random_tri_id );
+                GET(float3 n1, float3, normals, triIndex.x );
+                GET(float3 n2, float3, normals, triIndex.y );
+                GET(float3 n3, float3, normals, triIndex.z );
+                GET(float3 v1, float3, vertices, triIndex.x );
+                GET(float3 v2, float3, vertices, triIndex.y );
+                GET(float3 v3, float3, vertices, triIndex.z );
+                GET(float2 uv1, float2, texCoords, triIndex.x );
+                GET(float2 uv2, float2, texCoords, triIndex.y );
+                GET(float2 uv3, float2, texCoords, triIndex.z );
+                // Might be a bug here with normal transform...
+                n1 = make_float3(ltw * make_float4(n1, 0.0f));
+                n2 = make_float3(ltw * make_float4(n2, 0.0f));
+                n3 = make_float3(ltw * make_float4(n3, 0.0f));
+                v1 = make_float3(ltw * make_float4(v1, 1.0f));
+                v2 = make_float3(ltw * make_float4(v2, 1.0f));
+                v3 = make_float3(ltw * make_float4(v3, 1.0f));
+                sampleTriangle(pos, n1, n2, n3, v1, v2, v3, uv1, uv2, uv3, 
+                    lcg_randomf(rng), lcg_randomf(rng), dir, lightDistance, lightPDF, uv, 
+                    /*double_sided*/ false, /*use surface area*/ light_light.use_surface_area);
+                numTris = mesh.numTris;
+            }
             
             falloff = light_light.falloff;
-            numTris = mesh.numTris;
             lightDir = make_float3(dir.x, dir.y, dir.z);
             if (light_light.color_texture_id == -1) lightEmission = make_float3(light_light.r, light_light.g, light_light.b) * (light_light.intensity * pow(2.f, light_light.exposure));
             else lightEmission = sampleTexture(light_light.color_texture_id, uv, make_float3(0.f, 0.f, 0.f)) * (light_light.intensity * pow(2.f, light_light.exposure));
@@ -1409,32 +1278,28 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
         }
         lightPDF *= (1.f / float(numLights + 1.f)) * (1.f / float(numTris));
         if ((lightPDF > 0.0) && (dotNWi > EPSILON)) {
-            RayPayload surfPayload; surfPayload.instanceID = -2;
-            RayPayload volPayload = surfPayload;
+            RayPayload payload; payload.instanceID = -2;
+            RayPayload volPayload = payload;
             owl::RayT</*type*/1, /*prd*/1> ray; // shadow ray
             ray.tmin = EPSILON * 10.f; ray.tmax = lightDistance + EPSILON; // needs to be distance to light, else anyhit logic breaks.
             ray.origin = hit_p; ray.direction = lightDir;
             ray.time = time;
-            owl::traceRay( LP.surfacesIAS, ray, surfPayload, occlusion_flags);
-            ray.tmax = (surfPayload.instanceID == -2) ? ray.tmax : surfPayload.tHit;
-            volPayload.rng = rng;
-            volPayload.t0 = volRay.tmin;
-            volPayload.t1 = volRay.tmax;
-            volPayload.primitiveID = (debug) ? -5 : -1;
-            owl::traceRay( LP.volumesIAS, ray, volPayload, occlusion_flags);
+            ray.visibilityMask = ENTITY_VISIBILITY_SHADOW_RAYS;
+            owl::traceRay( LP.IAS, ray, payload, occlusion_flags);
+            ray.tmax = (payload.instanceID == -2) ? ray.tmax : payload.tHit;
             bool visible;
             if (randomID == numLights) {
                 //  If we sampled the dome light, just check to see if we hit anything
-                visible = (surfPayload.instanceID == -2) && (volPayload.instanceID == -2);
+                visible = (payload.instanceID == -2);
             } else {
                 // If we sampled a light source, then check to see if we hit something other than the light
                 int surfEntity;
-                if (surfPayload.instanceID == -2) surfEntity = -1;
-                else { GET(surfEntity, int, LP.surfaceInstanceToEntity, surfPayload.instanceID); }
-                visible = (volPayload.instanceID == -2) && (surfPayload.instanceID == -2 || surfEntity == sampledLightID);
+                if (payload.instanceID == -2) surfEntity = -1;
+                else { GET(surfEntity, int, LP.instanceToEntity, payload.instanceID); }
+                visible = (payload.instanceID == -2 || surfEntity == sampledLightID);
             }
             if (visible) {
-                if (randomID != numLights) lightEmission = lightEmission / max(pow(surfPayload.tHit, falloff),1.f);
+                if (randomID != numLights) lightEmission = lightEmission / max(pow(payload.tHit, falloff),1.f);
                 float w = power_heuristic(1.f, lightPDF, 1.f, bsdfPDF);
                 float3 Li = (lightEmission * w) / lightPDF;
                 irradiance = irradiance + (l_bsdf * Li);
@@ -1452,33 +1317,30 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
         }
 
         // Next, sample a light source using the importance sampled BDRF direction.
-        surfRay.origin = hit_p;
-        surfRay.direction = w_i;
-        surfRay.tmin = EPSILON;//* 100.f;
-        surfPayload.instanceID = -1;
-        surfPayload.tHit = -1.f;
-        surfRay.time = sampleTime(lcg_randomf(rng));
-        owl::traceRay(LP.surfacesIAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT);
-
-        volRay = surfRay;
-        volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit;
-        volPayload.rng = rng;
-        volPayload.t0 = volRay.tmin;
-        volPayload.t1 = volRay.tmax;
-        volPayload.primitiveID = (debug) ? -6 : -1;
-        owl::traceRay(LP.volumesIAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT);
+        ray.origin = hit_p;
+        ray.direction = w_i;
+        ray.tmin = EPSILON;//* 100.f;
+        payload.instanceID = -1;
+        payload.tHit = -1.f;
+        ray.time = sampleTime(lcg_randomf(rng));
+        if (isVolume) ray.visibilityMask = ENTITY_VISIBILITY_VOLUME_SCATTER_RAYS;
+        else if (sampledBsdf == DISNEY_TRANSMISSION_BRDF) ray.visibilityMask = ENTITY_VISIBILITY_TRANSMISSION_RAYS;
+        else if (sampledBsdf == DISNEY_DIFFUSE_BRDF) ray.visibilityMask = ENTITY_VISIBILITY_DIFFUSE_RAYS;
+        else if (sampledBsdf == DISNEY_GLOSSY_BRDF) ray.visibilityMask = ENTITY_VISIBILITY_GLOSSY_RAYS;
+        else if (sampledBsdf == DISNEY_CLEARCOAT_BRDF) ray.visibilityMask = ENTITY_VISIBILITY_GLOSSY_RAYS;
+        owl::traceRay(LP.IAS, ray, payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT);
 
         // Check if we hit any of the previously sampled lights
         bool hitLight = false;
         if (lightPDF > EPSILON) 
         {
-            float dotNWi = (useBRDF) ? max(dot(surfRay.direction, v_gz), 0.f) : 1.f;  // geometry term
+            float dotNWi = (useBRDF) ? max(dot(ray.direction, v_gz), 0.f) : 1.f;  // geometry term
 
             // if by sampling the brdf we also hit the dome light...
-            if ((surfPayload.instanceID == -1) && (volPayload.instanceID == -1) && (sampledLightID == -1) && enableDomeSampling) {
+            if ((payload.instanceID == -1) && (sampledLightID == -1) && enableDomeSampling) {
                 // Case where we hit the background, and also previously sampled the background   
                 float w = power_heuristic(1.f, bsdfPDF, 1.f, lightPDF);
-                float3 lightEmission = missColor(surfRay, envTex) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure);
+                float3 lightEmission = missColor(ray, envTex) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure);
                 float3 Li = (lightEmission * w) / bsdfPDF;
                 
                 if (dotNWi > 0.f) {
@@ -1488,8 +1350,8 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
             }
             // else if by sampling the brdf we also hit an area light
             // TODO: consider hitting emissive voxels?
-            else if (surfPayload.instanceID != -1 && volPayload.instanceID == -1) {
-                GET(int entityID, int, LP.surfaceInstanceToEntity, surfPayload.instanceID);
+            else if (payload.instanceID != -1) {
+                GET(int entityID, int, LP.instanceToEntity, payload.instanceID);
                 bool visible = (entityID == sampledLightID);
                 // We hit the light we sampled previously
                 if (visible) {
@@ -1497,10 +1359,10 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
                     GET(EntityStruct light_entity, EntityStruct, LP.entities, sampledLightID);
                     GET(MeshStruct light_mesh, MeshStruct, LP.meshes, light_entity.mesh_id);
                     GET(LightStruct light_light, LightStruct, LP.lights, light_entity.light_id);
-                    loadMeshTriIndices(light_entity.mesh_id, light_mesh.numTris, surfPayload.primitiveID, indices);
-                    loadMeshUVData(light_entity.mesh_id, light_mesh.numVerts, indices, surfPayload.barycentrics, uv);
+                    loadMeshTriIndices(light_entity.mesh_id, light_mesh.numTris, payload.primitiveID, indices);
+                    loadMeshUVData(light_entity.mesh_id, light_mesh.numVerts, indices, payload.barycentrics, uv);
 
-                    float dist = surfPayload.tHit;
+                    float dist = payload.tHit;
                     
                     float3 lightEmission;
                     if (light_light.color_texture_id == -1) lightEmission = make_float3(light_light.r, light_light.g, light_light.b) * (light_light.intensity * pow(2.f, light_light.exposure));
@@ -1554,16 +1416,22 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
         // of terminating, just so that we don't get black regions in our glass
         if (transmissionDepth >= LP.maxTransmissionDepth) continue;
     } while (
-        diffuseDepth < LP.maxDiffuseDepth && 
-        glossyDepth < LP.maxGlossyDepth && 
-        // transmissionDepth < LP.maxTransmissionDepth &&  // see comment above
-        transparencyDepth < LP.maxTransparencyDepth && 
-        volumeDepth < LP.maxVolumeDepth
+        // Terminate the path if the sampled BRDF's corresponding bounce depth exceeds the max bounce for that bounce type minus the overall path depth.
+        // This prevents long tails that can otherwise occur from mixing BRDF events
+        (!(sampledBsdf == DISNEY_DIFFUSE_BRDF && diffuseDepth > (LP.maxDiffuseDepth - (depth - 1)))) &&
+        (!(sampledBsdf == DISNEY_GLOSSY_BRDF && glossyDepth > LP.maxGlossyDepth - (depth - 1)) ) &&
+        (!(sampledBsdf == DISNEY_CLEARCOAT_BRDF && glossyDepth > LP.maxGlossyDepth - (depth - 1)) ) &&
+        (!(useBRDF == false && volumeDepth > LP.maxVolumeDepth - (depth - 1))) &&
+        (!(transparencyDepth > LP.maxTransparencyDepth - (depth - 1)))
+        // (!(sampledBsdf == DISNEY_TRANSMISSION_BRDF && transmissionDepth < LP.maxTransmissionDepth - (depth - 1)) ) && // see comment above
     );   
 
     // For segmentations, save heatmap metadata
     saveHeatmapRenderData(renderData, depth, start_clock);
 
+    // Device assignment data
+    saveDeviceAssignment(renderData, depth, self.deviceIndex);
+
     // clamp out any extreme fireflies
     glm::vec3 gillum = vec3(illum.x, illum.y, illum.z);
     glm::vec3 dillum = vec3(directIllum.x, directIllum.y, directIllum.z);
@@ -1608,7 +1476,18 @@ OPTIX_RAYGEN_PROGRAM(rayGen)()
     else {
         // Override framebuffer output if user requested to render metadata
         accum_illum = make_float3(renderData.x, renderData.y, renderData.z);
+        if (isnan(renderData.x) || isnan(renderData.y) || isnan(renderData.z) || 
+            isinf(renderData.x) || isinf(renderData.y) || isinf(renderData.z) ||
+            isnan(prev_color.x) || isnan(prev_color.y) || isnan(prev_color.z) ||
+            isinf(prev_color.x) || isinf(prev_color.y) || isinf(prev_color.z)) {
+            accum_illum = make_float3(0.f, 0.f, 0.f);
+            prev_color = make_float4(0.f, 0.f, 0.f, 1.f);
+        }
         accum_color = make_float4((accum_illum + float(LP.frameID) * make_float3(prev_color)) / float(LP.frameID + 1), 1.0f);
+
+        // if (debug) {
+        //     printf("output: %f %f %f\n", accum_color.x, accum_color.y, accum_color.z);
+        // }
     }
     
     
diff --git a/src/nvisii/entity.cpp b/src/nvisii/entity.cpp
index 52987292..e04df6ae 100644
--- a/src/nvisii/entity.cpp
+++ b/src/nvisii/entity.cpp
@@ -32,6 +32,7 @@ Entity::Entity(std::string name, uint32_t id) {
 	entity.material_id = -1;
 	entity.light_id = -1;
 	entity.mesh_id = -1;
+	entity.flags = (uint32_t)-1;
 }
 
 std::string Entity::toString()
@@ -253,7 +254,14 @@ Mesh* Entity::getMesh()
 	return &mesh;
 }
 
-void Entity::setVisibility(bool camera)
+void Entity::setVisibility(
+	bool camera, 
+	bool diffuse, 
+	bool glossy, 
+	bool transmission, 
+	bool volume_scatter, 
+	bool shadow
+)
 {
 	std::lock_guard<std::recursive_mutex> lock(*Entity::getEditMutex().get());
 	
@@ -263,6 +271,36 @@ void Entity::setVisibility(bool camera)
 	} else {
 		entity.flags &= (~ENTITY_VISIBILITY_CAMERA_RAYS);
 	}
+
+	if (diffuse) {
+		entity.flags |= ENTITY_VISIBILITY_DIFFUSE_RAYS;
+	} else {
+		entity.flags &= (~ENTITY_VISIBILITY_DIFFUSE_RAYS);
+	}
+
+	if (glossy) {
+		entity.flags |= ENTITY_VISIBILITY_GLOSSY_RAYS;
+	} else {
+		entity.flags &= (~ENTITY_VISIBILITY_GLOSSY_RAYS);
+	}
+
+	if (transmission) {
+		entity.flags |= ENTITY_VISIBILITY_TRANSMISSION_RAYS;
+	} else {
+		entity.flags &= (~ENTITY_VISIBILITY_TRANSMISSION_RAYS);
+	}
+
+	if (volume_scatter) {
+		entity.flags |= ENTITY_VISIBILITY_VOLUME_SCATTER_RAYS;
+	} else {
+		entity.flags &= (~ENTITY_VISIBILITY_VOLUME_SCATTER_RAYS);
+	}
+
+	if (shadow) {
+		entity.flags |= ENTITY_VISIBILITY_SHADOW_RAYS;
+	} else {
+		entity.flags &= (~ENTITY_VISIBILITY_SHADOW_RAYS);
+	}
 	markDirty();
 }
 
@@ -281,6 +319,13 @@ glm::vec3 Entity::getAabbCenter()
 	return entityStructs[id].bbmin + (entityStructs[id].bbmax - entityStructs[id].bbmin) * .5f;
 }
 
+glm::vec3 Entity::getCenter()
+{
+	if (!getTransform()) throw std::runtime_error("Error: no transform attached to entity");
+	if (!getMesh()) throw std::runtime_error("Error: no mesh attached to entity");
+	return glm::vec3(getTransform()->getLocalToWorldMatrix() * glm::vec4(getMesh()->getCenter(), 1.f));
+}
+
 void Entity::initializeFactory(uint32_t max_components)
 {
 	if (isFactoryInitialized()) return;
diff --git a/src/nvisii/light.cpp b/src/nvisii/light.cpp
index 01e3ab02..842981b8 100644
--- a/src/nvisii/light.cpp
+++ b/src/nvisii/light.cpp
@@ -46,9 +46,9 @@ LightStruct &Light::getStruct() {
 void Light::setColor(glm::vec3 color)
 {
     auto &light = getStruct();
-    light.r = max(0.f, min(color.r, 1.f));
-    light.g = max(0.f, min(color.g, 1.f));
-    light.b = max(0.f, min(color.b, 1.f));
+    light.r = max(0.f, color.r);
+    light.g = max(0.f, color.g);
+    light.b = max(0.f, color.b);
     markDirty();
 }
 
diff --git a/src/nvisii/mesh.cpp b/src/nvisii/mesh.cpp
index 26fb96dc..83447ad5 100644
--- a/src/nvisii/mesh.cpp
+++ b/src/nvisii/mesh.cpp
@@ -141,7 +141,7 @@ void Mesh::computeMetadata()
 	this->meshStructs[id].numVerts = uint32_t(positions.size());
 }
 
-glm::vec3 Mesh::getCentroid()
+glm::vec3 Mesh::getCenter()
 {
 	return vec3(meshStructs[id].center);
 }
@@ -211,6 +211,8 @@ void Mesh::loadData(
 	uint32_t position_dimensions,
 	std::vector<float> &normals_,
 	uint32_t normal_dimensions, 
+	std::vector<float> &tangents_,
+	uint32_t tangent_dimensions, 
 	std::vector<float> &colors_, 
 	uint32_t color_dimensions,
 	std::vector<float> &texcoords_, 
@@ -219,6 +221,7 @@ void Mesh::loadData(
 )
 {
 	bool readingNormals = normals_.size() > 0;
+	bool readingTangents = tangents_.size() > 0;
 	bool readingColors = colors_.size() > 0;
 	bool readingTexCoords = texcoords_.size() > 0;
 	bool readingIndices = indices_.size() > 0;
@@ -228,6 +231,9 @@ void Mesh::loadData(
 	
 	if ((normal_dimensions != 3) && (normal_dimensions != 4)) 
 		throw std::runtime_error( std::string("Error, invalid normal dimensions. Possible normal dimensions are 3 or 4."));
+	
+	if ((tangent_dimensions != 3) && (tangent_dimensions != 4)) 
+		throw std::runtime_error( std::string("Error, invalid tangent dimensions. Possible tangent dimensions are 3 or 4."));
 
 	if ((color_dimensions != 3) && (color_dimensions != 4)) 
 		throw std::runtime_error( std::string("Error, invalid color dimensions. Possible color dimensions are 3 or 4."));
@@ -247,6 +253,9 @@ void Mesh::loadData(
 	if (readingNormals && ((normals_.size() / normal_dimensions) != (positions_.size() / position_dimensions)))
 		throw std::runtime_error( std::string("Error, length mismatch. Total normals: " + std::to_string(normals_.size() / normal_dimensions) + " does not equal total positions: " + std::to_string(positions_.size() / position_dimensions)));
 
+	if (readingTangents && ((tangents_.size() / tangent_dimensions) != (positions_.size() / position_dimensions)))
+		throw std::runtime_error( std::string("Error, length mismatch. Total tangents: " + std::to_string(tangents_.size() / tangent_dimensions) + " does not equal total positions: " + std::to_string(positions_.size() / position_dimensions)));
+
 	if (readingColors && ((colors_.size() / color_dimensions) != (positions_.size() / position_dimensions)))
 		throw std::runtime_error( std::string("Error, length mismatch. Total colors: " + std::to_string(colors_.size() / color_dimensions) + " does not equal total positions: " + std::to_string(positions_.size() / position_dimensions)));
 		
@@ -275,6 +284,12 @@ void Mesh::loadData(
 			vertex.normal.z = normals_[i * normal_dimensions + 2];
 			vertex.normal.w = (normal_dimensions == 4) ? normals_[i * normal_dimensions + 3] : 0.f;
 		}
+		if (readingTangents) {
+			vertex.tangent.x = tangents_[i * tangent_dimensions + 0];
+			vertex.tangent.y = tangents_[i * tangent_dimensions + 1];
+			vertex.tangent.z = tangents_[i * tangent_dimensions + 2];
+			vertex.tangent.w = (tangent_dimensions == 4) ? tangents_[i * tangent_dimensions + 3] : 0.f;
+		}
 		if (readingColors) {
 			vertex.color.x = colors_[i * color_dimensions + 0];
 			vertex.color.y = colors_[i * color_dimensions + 1];
@@ -316,6 +331,7 @@ void Mesh::loadData(
 	this->positions.resize(uniqueVertices.size());
 	this->colors.resize(uniqueVertices.size());
 	this->normals.resize(uniqueVertices.size());
+	this->tangents.resize(uniqueVertices.size());
 	this->texCoords.resize(uniqueVertices.size());
 	for (int i = 0; i < uniqueVertices.size(); ++i)
 	{
@@ -323,6 +339,7 @@ void Mesh::loadData(
 		this->positions[i] = {v.point.x, v.point.y, v.point.z};
 		this->colors[i] = v.color;
 		this->normals[i] = v.normal;
+		this->tangents[i] = v.tangent;
 		this->texCoords[i] = v.texcoord;
 	}
 
@@ -330,6 +347,10 @@ void Mesh::loadData(
 		generateSmoothNormals();
 	}
 
+	if (!readingTangents) {
+		generateSmoothTangents();
+	}
+
 	computeMetadata();
 }
 
@@ -1339,19 +1360,20 @@ Mesh* Mesh::createFromData(
 	uint32_t position_dimensions,
 	std::vector<float> normals_, 
 	uint32_t normal_dimensions, 
+	std::vector<float> tangents_, 
+	uint32_t tangent_dimensions, 
 	std::vector<float> colors_, 
 	uint32_t color_dimensions, 
 	std::vector<float> texcoords_, 
 	uint32_t texcoord_dimensions, 
 	std::vector<uint32_t> indices_
 ) {
-	auto create = [&positions_, position_dimensions, &normals_, normal_dimensions, 
+	auto create = [&positions_, position_dimensions, &normals_, normal_dimensions, &tangents_, tangent_dimensions, 
 				   &colors_, color_dimensions, &texcoords_, texcoord_dimensions, &indices_] 
 				   (Mesh* mesh) 
 	{
-		mesh->loadData(positions_, position_dimensions, normals_, normal_dimensions, 
+		mesh->loadData(positions_, position_dimensions, normals_, normal_dimensions, tangents_, tangent_dimensions, 
 			colors_, color_dimensions, texcoords_, texcoord_dimensions, indices_);
-		mesh->generateSmoothTangents();
 		dirtyMeshes.insert(mesh);
 	};
 	
diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp
index 0e9079c5..287065fb 100644
--- a/src/nvisii/nvisii.cpp
+++ b/src/nvisii/nvisii.cpp
@@ -89,6 +89,9 @@ static struct OptixData {
     LaunchParams LP;
     GLuint imageTexID = -1;
     cudaGraphicsResource_t cudaResourceTex;
+    bool resourceSharingSuccessful = true;
+    OWLBuffer assignmentBuffer;
+
     OWLBuffer frameBuffer;
     OWLBuffer normalBuffer;
     OWLBuffer albedoBuffer;
@@ -96,6 +99,10 @@ static struct OptixData {
     OWLBuffer mvecBuffer;
     OWLBuffer accumBuffer;
 
+    OWLBuffer combinedFrameBuffer;
+    OWLBuffer combinedNormalBuffer;
+    OWLBuffer combinedAlbedoBuffer;
+
     OWLBuffer entityBuffer;
     OWLBuffer transformBuffer;
     OWLBuffer cameraBuffer;
@@ -105,8 +112,7 @@ static struct OptixData {
     OWLBuffer textureBuffer;
     OWLBuffer volumeBuffer;
     OWLBuffer lightEntitiesBuffer;
-    OWLBuffer surfaceInstanceToEntityBuffer;
-    OWLBuffer volumeInstanceToEntityBuffer;
+    OWLBuffer instanceToEntityBuffer;
     OWLBuffer vertexListsBuffer;
     OWLBuffer normalListsBuffer;
     OWLBuffer tangentListsBuffer;
@@ -137,8 +143,7 @@ static struct OptixData {
     std::vector<OWLGeom> volumeGeomList;
     std::vector<OWLGroup> volumeBlasList;
 
-    OWLGroup surfacesIAS = nullptr;
-    OWLGroup volumesIAS = nullptr;
+    OWLGroup IAS = nullptr;
 
     std::vector<uint32_t> lightEntities;
 
@@ -185,6 +190,9 @@ static struct NVISII {
     std::function<void()> callback;
     std::recursive_mutex callbackMutex;
 
+    std::vector<std::pair<cudaEvent_t, cudaEvent_t>> events;
+    std::vector<float> times;
+    std::vector<float> weights;
 } NVISII;
 
 void applyStyle()
@@ -263,62 +271,6 @@ int getDeviceCount() {
     return owlGetDeviceCount(OptixData.context);
 }
 
-OWLModule moduleCreate(OWLContext context, const char* ptxCode)
-{
-    return owlModuleCreate(context, ptxCode);
-}
-
-OWLBuffer managedMemoryBufferCreate(OWLContext context, OWLDataType type, size_t count, void* init)
-{
-    return owlManagedMemoryBufferCreate(context, type, count, init);
-}
-
-OWLBuffer deviceBufferCreate(OWLContext context, OWLDataType type, size_t count, void* init)
-{
-    return owlDeviceBufferCreate(context, type, count, init);
-}
-
-void bufferDestroy(OWLBuffer buffer)
-{
-    owlBufferDestroy(buffer);
-}
-
-void bufferResize(OWLBuffer buffer, size_t newItemCount) {
-    owlBufferResize(buffer, newItemCount);
-}
-
-const void* bufferGetPointer(OWLBuffer buffer, int deviceId)
-{
-    return owlBufferGetPointer(buffer, deviceId);
-}
-
-void bufferUpload(OWLBuffer buffer, const void *hostPtr)
-{
-    owlBufferUpload(buffer, hostPtr);
-}
-
-CUstream getStream(OWLContext context, int deviceId)
-{
-    return owlContextGetStream(context, deviceId);
-}
-
-OptixDeviceContext getOptixContext(OWLContext context, int deviceID)
-{
-    return owlContextGetOptixContext(context, deviceID);
-}
-
-void buildPrograms(OWLContext context) {
-    owlBuildPrograms(context);
-}
-
-void buildPipeline(OWLContext context) {
-    owlBuildPipeline(context);
-}
-
-void buildSBT(OWLContext context) {
-    owlBuildSBT(context);
-}
-
 OWLMissProg missProgCreate(OWLContext context, OWLModule module, const char *programName, size_t sizeOfVarStruct, OWLVarDecl *vars, size_t numVars)
 {
     return owlMissProgCreate(context, module, programName, sizeOfVarStruct, vars, numVars);
@@ -400,38 +352,14 @@ owl4x3f glmToOWL(glm::mat4 &xfm){
     return oxfm;
 }
 
-OWLLaunchParams launchParamsCreate(OWLContext context, size_t size, OWLVarDecl *vars, size_t numVars)
-{
-    return owlParamsCreate(context, size, vars, numVars);
-}
-
-void launchParamsSetBuffer(OWLLaunchParams params, const char* varName, OWLBuffer buffer)
-{
-    owlParamsSetBuffer(params, varName, buffer);
-}
-
-void launchParamsSetRaw(OWLLaunchParams params, const char* varName, const void* data)
-{
-    owlParamsSetRaw(params, varName, data);
-}
-
-void launchParamsSetTexture(OWLLaunchParams params, const char* varName, OWLTexture texture)
-{
-    owlParamsSetTexture(params, varName, texture);
-}
-
-void launchParamsSetGroup(OWLLaunchParams params, const char *varName, OWLGroup group) {
-    owlParamsSetGroup(params, varName, group);
-}
-
-void synchronizeDevices()
+void synchronizeDevices(std::string error_string = "")
 {
     for (int i = 0; i < getDeviceCount(); i++) {
         cudaSetDevice(i);
         cudaDeviceSynchronize();
         cudaError_t err = cudaPeekAtLastError();
         if (err != 0) {
-            std::cout<< "ERROR: " << cudaGetErrorString(err)<<std::endl;
+            std::cout<< "ERROR " << error_string << ": " << cudaGetErrorString(err)<<std::endl;
             throw std::runtime_error(std::string("ERROR: ") + cudaGetErrorString(err));
         }
     }
@@ -450,13 +378,18 @@ void checkForErrors()
 }
 
 void initializeFrameBuffer(int fbWidth, int fbHeight) {
+    cudaSetDevice(0);
+
     fbWidth = glm::max(fbWidth, 1);
     fbHeight = glm::max(fbHeight, 1);
     synchronizeDevices();
 
     auto &OD = OptixData;
     if (OD.imageTexID != -1) {
-        cudaGraphicsUnregisterResource(OD.cudaResourceTex);
+        if (OptixData.cudaResourceTex && OptixData.resourceSharingSuccessful) {
+            cudaGraphicsUnregisterResource(OptixData.cudaResourceTex);
+            OptixData.cudaResourceTex = 0;
+        }
         glDeleteTextures(1, &OD.imageTexID);
     }
     
@@ -477,24 +410,46 @@ void initializeFrameBuffer(int fbWidth, int fbHeight) {
     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
 
     //Registration with CUDA
-    cudaGraphicsGLRegisterImage(&OD.cudaResourceTex, OD.imageTexID, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsNone);
-    
+    static bool renderToHDRDeprecatedShown = false;
+    cudaError_t rc = cudaGraphicsGLRegisterImage(&OD.cudaResourceTex, OD.imageTexID, GL_TEXTURE_2D, cudaGraphicsRegisterFlagsNone);
+    if (rc != cudaSuccess) {
+        std::string err = cudaGetErrorString(cudaGetLastError());
+        if (verbose && !renderToHDRDeprecatedShown) {
+            std::cout
+                  << "Warning: Could not do CUDA graphics resource sharing "
+                  << "for the display buffer texture ("
+                  << err
+                  << ")... falling back to slower path"
+                  << std::endl;
+            renderToHDRDeprecatedShown = true;
+        }
+        OD.resourceSharingSuccessful = false;
+        if (OD.cudaResourceTex) {
+          cudaGraphicsUnregisterResource(OD.cudaResourceTex);
+          OD.cudaResourceTex = 0;
+        }
+    } else {
+        OD.resourceSharingSuccessful = true;
+    }
     synchronizeDevices();
 }
 
 void resizeOptixFrameBuffer(uint32_t width, uint32_t height)
 {
     auto &OD = OptixData;
-
     OD.LP.frameSize.x = width;
     OD.LP.frameSize.y = height;
-    bufferResize(OD.frameBuffer, width * height);
-    bufferResize(OD.normalBuffer, width * height);
-    bufferResize(OD.albedoBuffer, width * height);
-    bufferResize(OD.scratchBuffer, width * height);
-    bufferResize(OD.mvecBuffer, width * height);    
-    bufferResize(OD.accumBuffer, width * height);
-    
+    owlBufferResize(OD.frameBuffer, width * height);
+    owlBufferResize(OD.normalBuffer, width * height);
+    owlBufferResize(OD.albedoBuffer, width * height);
+    owlBufferResize(OD.scratchBuffer, width * height);
+    owlBufferResize(OD.mvecBuffer, width * height);    
+    owlBufferResize(OD.accumBuffer, width * height);
+
+    owlBufferResize(OD.combinedFrameBuffer, width * height);
+    owlBufferResize(OD.combinedNormalBuffer, width * height);
+    owlBufferResize(OD.combinedAlbedoBuffer, width * height);
+
     // Reconfigure denoiser
     optixDenoiserComputeMemoryResources(OD.denoiser, OD.LP.frameSize.x, OD.LP.frameSize.y, &OD.denoiserSizes);
     uint64_t scratchSizeInBytes;
@@ -503,18 +458,18 @@ void resizeOptixFrameBuffer(uint32_t width, uint32_t height)
     #else
     scratchSizeInBytes = OD.denoiserSizes.withOverlapScratchSizeInBytes;
     #endif
-    bufferResize(OD.denoiserScratchBuffer, scratchSizeInBytes);
-    bufferResize(OD.denoiserStateBuffer, OD.denoiserSizes.stateSizeInBytes);
+    owlBufferResize(OD.denoiserScratchBuffer, scratchSizeInBytes);
+    owlBufferResize(OD.denoiserStateBuffer, OD.denoiserSizes.stateSizeInBytes);
     
-    auto cudaStream = getStream(OD.context, 0);
+    auto cudaStream = owlContextGetStream(OD.context, 0);
     optixDenoiserSetup (
         OD.denoiser, 
         (cudaStream_t) cudaStream, 
         (unsigned int) OD.LP.frameSize.x, 
         (unsigned int) OD.LP.frameSize.y, 
-        (CUdeviceptr) bufferGetPointer(OD.denoiserStateBuffer, 0), 
+        (CUdeviceptr) owlBufferGetPointer(OD.denoiserStateBuffer, 0), 
         OD.denoiserSizes.stateSizeInBytes,
-        (CUdeviceptr) bufferGetPointer(OD.denoiserScratchBuffer, 0), 
+        (CUdeviceptr) owlBufferGetPointer(OD.denoiserScratchBuffer, 0), 
         scratchSizeInBytes
     );
 
@@ -551,10 +506,11 @@ void initializeOptix(bool headless)
     owlEnableMotionBlur(OD.context);
     owlContextSetRayTypeCount(OD.context, 2);
     cudaSetDevice(0); // OWL leaves the device as num_devices - 1 after the context is created. set it back to 0.
-    OD.module = moduleCreate(OD.context, ptxCode);
+    OD.module = owlModuleCreate(OD.context, ptxCode);
     
     /* Setup Optix Launch Params */
     OWLVarDecl launchParamVars[] = {
+        { "assignmentBuffer",        OWL_BUFFER,                        OWL_OFFSETOF(LaunchParams, assignmentBuffer)},
         { "frameSize",               OWL_USER_TYPE(glm::ivec2),         OWL_OFFSETOF(LaunchParams, frameSize)},
         { "frameID",                 OWL_USER_TYPE(uint64_t),           OWL_OFFSETOF(LaunchParams, frameID)},
         { "frameBuffer",             OWL_BUFPTR,                        OWL_OFFSETOF(LaunchParams, frameBuffer)},
@@ -563,8 +519,7 @@ void initializeOptix(bool headless)
         { "scratchBuffer",           OWL_BUFPTR,                        OWL_OFFSETOF(LaunchParams, scratchBuffer)},
         { "mvecBuffer",              OWL_BUFPTR,                        OWL_OFFSETOF(LaunchParams, mvecBuffer)},
         { "accumPtr",                OWL_BUFPTR,                        OWL_OFFSETOF(LaunchParams, accumPtr)},
-        { "surfacesIAS",             OWL_GROUP,                         OWL_OFFSETOF(LaunchParams, surfacesIAS)},
-        { "volumesIAS",              OWL_GROUP,                         OWL_OFFSETOF(LaunchParams, volumesIAS)},
+        { "IAS",                     OWL_GROUP,                         OWL_OFFSETOF(LaunchParams, IAS)},
         { "cameraEntity",            OWL_USER_TYPE(EntityStruct),       OWL_OFFSETOF(LaunchParams, cameraEntity)},
         { "entities",                OWL_BUFFER,                        OWL_OFFSETOF(LaunchParams, entities)},
         { "transforms",              OWL_BUFFER,                        OWL_OFFSETOF(LaunchParams, transforms)},
@@ -581,8 +536,7 @@ void initializeOptix(bool headless)
         { "texCoordLists",           OWL_BUFFER,                        OWL_OFFSETOF(LaunchParams, texCoordLists)},
         { "indexLists",              OWL_BUFFER,                        OWL_OFFSETOF(LaunchParams, indexLists)},
         { "numLightEntities",        OWL_USER_TYPE(uint32_t),           OWL_OFFSETOF(LaunchParams, numLightEntities)},
-        { "surfaceInstanceToEntity", OWL_BUFFER,                        OWL_OFFSETOF(LaunchParams, surfaceInstanceToEntity)},
-        { "volumeInstanceToEntity",  OWL_BUFFER,                        OWL_OFFSETOF(LaunchParams, volumeInstanceToEntity)},
+        { "instanceToEntity",        OWL_BUFFER,                        OWL_OFFSETOF(LaunchParams, instanceToEntity)},
         { "domeLightIntensity",      OWL_USER_TYPE(float),              OWL_OFFSETOF(LaunchParams, domeLightIntensity)},
         { "domeLightExposure",       OWL_USER_TYPE(float),              OWL_OFFSETOF(LaunchParams, domeLightExposure)},
         { "domeLightColor",          OWL_USER_TYPE(glm::vec3),          OWL_OFFSETOF(LaunchParams, domeLightColor)},
@@ -619,76 +573,86 @@ void initializeOptix(bool headless)
         { "enableDomeSampling", OWL_USER_TYPE(bool),               OWL_OFFSETOF(LaunchParams, enableDomeSampling)},
         { /* sentinel to mark end of list */ }
     };
-    OD.launchParams = launchParamsCreate(OD.context, sizeof(LaunchParams), launchParamVars, -1);
+    OD.launchParams = owlParamsCreate(OD.context, sizeof(LaunchParams), launchParamVars, -1);
     
     /* Create AOV Buffers */
     if (!headless) {
         initializeFrameBuffer(512, 512);        
     }
 
-    if (numGPUsFound > 1) {
-        OD.frameBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
-        OD.accumBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
-        OD.normalBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
-        OD.albedoBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
-        OD.scratchBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
-        OD.mvecBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
-    } else {
-        OD.frameBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
-        OD.accumBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
-        OD.normalBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
-        OD.albedoBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
-        OD.scratchBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
-        OD.mvecBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
+    OD.assignmentBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(float), owlGetDeviceCount(OD.context) + 1, nullptr);
+    owlParamsSetBuffer(OD.launchParams, "assignmentBuffer", OD.assignmentBuffer);
+
+    // If we only have one GPU, framebuffer pixels can stay on device 0. 
+    if (numGPUsFound == 1) {
+        OD.frameBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
+        OD.accumBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
+        OD.normalBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
+        OD.albedoBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
+        OD.scratchBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
+        OD.mvecBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
     }
+    // Otherwise, multiple GPUs must use host pinned memory to merge partial framebuffers together
+    else {
+        OD.frameBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512);
+        OD.accumBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512);
+        OD.normalBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512);
+        OD.albedoBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512);
+        OD.scratchBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512);
+        OD.mvecBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512);
+    }
+
+    // For multiGPU denoising, its best to denoise using something other than zero-copy memory.
+    OD.combinedFrameBuffer = owlManagedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
+    OD.combinedNormalBuffer = owlManagedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
+    OD.combinedAlbedoBuffer = owlManagedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr);
+
     OD.LP.frameSize = glm::ivec2(512, 512);
-    launchParamsSetBuffer(OD.launchParams, "frameBuffer", OD.frameBuffer);
-    launchParamsSetBuffer(OD.launchParams, "normalBuffer", OD.normalBuffer);
-    launchParamsSetBuffer(OD.launchParams, "albedoBuffer", OD.albedoBuffer);
-    launchParamsSetBuffer(OD.launchParams, "scratchBuffer", OD.scratchBuffer);
-    launchParamsSetBuffer(OD.launchParams, "mvecBuffer", OD.mvecBuffer);
-    launchParamsSetBuffer(OD.launchParams, "accumPtr", OD.accumBuffer);
-    launchParamsSetRaw(OD.launchParams, "frameSize", &OD.LP.frameSize);
+    owlParamsSetBuffer(OD.launchParams, "frameBuffer", OD.frameBuffer);
+    owlParamsSetBuffer(OD.launchParams, "normalBuffer", OD.normalBuffer);
+    owlParamsSetBuffer(OD.launchParams, "albedoBuffer", OD.albedoBuffer);
+    owlParamsSetBuffer(OD.launchParams, "scratchBuffer", OD.scratchBuffer);
+    owlParamsSetBuffer(OD.launchParams, "mvecBuffer", OD.mvecBuffer);
+    owlParamsSetBuffer(OD.launchParams, "accumPtr", OD.accumBuffer);
+    owlParamsSetRaw(OD.launchParams, "frameSize", &OD.LP.frameSize);
 
     /* Create Component Buffers */
     // note, extra textures reserved for internal use
-    OD.entityBuffer              = deviceBufferCreate(OD.context, OWL_USER_TYPE(EntityStruct),        Entity::getCount(),   nullptr);
-    OD.transformBuffer           = deviceBufferCreate(OD.context, OWL_USER_TYPE(TransformStruct),     Transform::getCount(), nullptr);
-    OD.cameraBuffer              = deviceBufferCreate(OD.context, OWL_USER_TYPE(CameraStruct),        Camera::getCount(),    nullptr);
-    OD.materialBuffer            = deviceBufferCreate(OD.context, OWL_USER_TYPE(MaterialStruct),      Material::getCount(),  nullptr);
-    OD.meshBuffer                = deviceBufferCreate(OD.context, OWL_USER_TYPE(MeshStruct),          Mesh::getCount(),     nullptr);
-    OD.lightBuffer               = deviceBufferCreate(OD.context, OWL_USER_TYPE(LightStruct),         Light::getCount(),     nullptr);
-    OD.textureBuffer             = deviceBufferCreate(OD.context, OWL_USER_TYPE(TextureStruct),       Texture::getCount() + NUM_MAT_PARAMS * Material::getCount(),   nullptr);
-    OD.volumeBuffer              = deviceBufferCreate(OD.context, OWL_USER_TYPE(VolumeStruct),        Volume::getCount(),   nullptr);
-    OD.volumeHandlesBuffer       = deviceBufferCreate(OD.context, OWL_BUFFER,                         Volume::getCount(),   nullptr);
-    OD.lightEntitiesBuffer       = deviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t),            1,              nullptr);
-    OD.surfaceInstanceToEntityBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t),            1,              nullptr);
-    OD.volumeInstanceToEntityBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t),            1,              nullptr);
-    OD.vertexListsBuffer         = deviceBufferCreate(OD.context, OWL_BUFFER,                         Mesh::getCount(),     nullptr);
-    OD.normalListsBuffer         = deviceBufferCreate(OD.context, OWL_BUFFER,                         Mesh::getCount(),     nullptr);
-    OD.tangentListsBuffer        = deviceBufferCreate(OD.context, OWL_BUFFER,                         Mesh::getCount(),     nullptr);
-    OD.texCoordListsBuffer       = deviceBufferCreate(OD.context, OWL_BUFFER,                         Mesh::getCount(),     nullptr);
-    OD.indexListsBuffer          = deviceBufferCreate(OD.context, OWL_BUFFER,                         Mesh::getCount(),     nullptr);
-    OD.textureObjectsBuffer      = deviceBufferCreate(OD.context, OWL_TEXTURE,                        Texture::getCount() + NUM_MAT_PARAMS * Material::getCount(),   nullptr);
-
-    launchParamsSetBuffer(OD.launchParams, "entities",             OD.entityBuffer);
-    launchParamsSetBuffer(OD.launchParams, "transforms",           OD.transformBuffer);
-    launchParamsSetBuffer(OD.launchParams, "cameras",              OD.cameraBuffer);
-    launchParamsSetBuffer(OD.launchParams, "materials",            OD.materialBuffer);
-    launchParamsSetBuffer(OD.launchParams, "meshes",               OD.meshBuffer);
-    launchParamsSetBuffer(OD.launchParams, "lights",               OD.lightBuffer);
-    launchParamsSetBuffer(OD.launchParams, "textures",             OD.textureBuffer);
-    launchParamsSetBuffer(OD.launchParams, "volumes",              OD.volumeBuffer);
-    launchParamsSetBuffer(OD.launchParams, "lightEntities",        OD.lightEntitiesBuffer);
-    launchParamsSetBuffer(OD.launchParams, "surfaceInstanceToEntity",  OD.surfaceInstanceToEntityBuffer);
-    launchParamsSetBuffer(OD.launchParams, "volumeInstanceToEntity",  OD.volumeInstanceToEntityBuffer);
-    launchParamsSetBuffer(OD.launchParams, "vertexLists",          OD.vertexListsBuffer);
-    launchParamsSetBuffer(OD.launchParams, "normalLists",          OD.normalListsBuffer);
-    launchParamsSetBuffer(OD.launchParams, "tangentLists",          OD.tangentListsBuffer);
-    launchParamsSetBuffer(OD.launchParams, "texCoordLists",        OD.texCoordListsBuffer);
-    launchParamsSetBuffer(OD.launchParams, "indexLists",           OD.indexListsBuffer);
-    launchParamsSetBuffer(OD.launchParams, "textureObjects",       OD.textureObjectsBuffer);
-    launchParamsSetBuffer(OD.launchParams, "volumeHandles",       OD.volumeHandlesBuffer);
+    OD.entityBuffer              = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(EntityStruct),        Entity::getCount(),   nullptr);
+    OD.transformBuffer           = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(TransformStruct),     Transform::getCount(), nullptr);
+    OD.cameraBuffer              = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(CameraStruct),        Camera::getCount(),    nullptr);
+    OD.materialBuffer            = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(MaterialStruct),      Material::getCount(),  nullptr);
+    OD.meshBuffer                = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(MeshStruct),          Mesh::getCount(),     nullptr);
+    OD.lightBuffer               = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(LightStruct),         Light::getCount(),     nullptr);
+    OD.textureBuffer             = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(TextureStruct),       Texture::getCount() + NUM_MAT_PARAMS * Material::getCount(),   nullptr);
+    OD.volumeBuffer              = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(VolumeStruct),        Volume::getCount(),   nullptr);
+    OD.volumeHandlesBuffer       = owlDeviceBufferCreate(OD.context, OWL_BUFFER,                         Volume::getCount(),   nullptr);
+    OD.lightEntitiesBuffer       = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t),            1,              nullptr);
+    OD.instanceToEntityBuffer    = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t),            1,              nullptr);
+    OD.vertexListsBuffer         = owlDeviceBufferCreate(OD.context, OWL_BUFFER,                         Mesh::getCount(),     nullptr);
+    OD.normalListsBuffer         = owlDeviceBufferCreate(OD.context, OWL_BUFFER,                         Mesh::getCount(),     nullptr);
+    OD.tangentListsBuffer        = owlDeviceBufferCreate(OD.context, OWL_BUFFER,                         Mesh::getCount(),     nullptr);
+    OD.texCoordListsBuffer       = owlDeviceBufferCreate(OD.context, OWL_BUFFER,                         Mesh::getCount(),     nullptr);
+    OD.indexListsBuffer          = owlDeviceBufferCreate(OD.context, OWL_BUFFER,                         Mesh::getCount(),     nullptr);
+    OD.textureObjectsBuffer      = owlDeviceBufferCreate(OD.context, OWL_TEXTURE,                        Texture::getCount() + NUM_MAT_PARAMS * Material::getCount(),   nullptr);
+
+    owlParamsSetBuffer(OD.launchParams, "entities",             OD.entityBuffer);
+    owlParamsSetBuffer(OD.launchParams, "transforms",           OD.transformBuffer);
+    owlParamsSetBuffer(OD.launchParams, "cameras",              OD.cameraBuffer);
+    owlParamsSetBuffer(OD.launchParams, "materials",            OD.materialBuffer);
+    owlParamsSetBuffer(OD.launchParams, "meshes",               OD.meshBuffer);
+    owlParamsSetBuffer(OD.launchParams, "lights",               OD.lightBuffer);
+    owlParamsSetBuffer(OD.launchParams, "textures",             OD.textureBuffer);
+    owlParamsSetBuffer(OD.launchParams, "volumes",              OD.volumeBuffer);
+    owlParamsSetBuffer(OD.launchParams, "lightEntities",        OD.lightEntitiesBuffer);
+    owlParamsSetBuffer(OD.launchParams, "instanceToEntity",     OD.instanceToEntityBuffer);
+    owlParamsSetBuffer(OD.launchParams, "vertexLists",          OD.vertexListsBuffer);
+    owlParamsSetBuffer(OD.launchParams, "normalLists",          OD.normalListsBuffer);
+    owlParamsSetBuffer(OD.launchParams, "tangentLists",         OD.tangentListsBuffer);
+    owlParamsSetBuffer(OD.launchParams, "texCoordLists",        OD.texCoordListsBuffer);
+    owlParamsSetBuffer(OD.launchParams, "indexLists",           OD.indexListsBuffer);
+    owlParamsSetBuffer(OD.launchParams, "textureObjects",       OD.textureObjectsBuffer);
+    owlParamsSetBuffer(OD.launchParams, "volumeHandles",        OD.volumeHandlesBuffer);
 
     uint32_t meshCount = Mesh::getCount();
     OD.vertexLists.resize(meshCount);
@@ -712,48 +676,31 @@ void initializeOptix(bool headless)
 
     OD.LP.environmentMapID = -1;
     OD.LP.environmentMapRotation = glm::quat(1,0,0,0);
-    launchParamsSetRaw(OD.launchParams, "environmentMapID", &OD.LP.environmentMapID);
-    launchParamsSetRaw(OD.launchParams, "environmentMapRotation", &OD.LP.environmentMapRotation);
-
-    launchParamsSetBuffer(OD.launchParams, "environmentMapRows", OD.environmentMapRowsBuffer);
-    launchParamsSetBuffer(OD.launchParams, "environmentMapCols", OD.environmentMapColsBuffer);
-    launchParamsSetRaw(OD.launchParams, "environmentMapWidth", &OD.LP.environmentMapWidth);
-    launchParamsSetRaw(OD.launchParams, "environmentMapHeight", &OD.LP.environmentMapHeight);
-
-    // OWLTexture GGX_E_AVG_LOOKUP = owlTexture2DCreate(OD.context,
-    //                         OWL_TEXEL_FORMAT_R32F,
-    //                         GGX_E_avg_size,1,
-    //                         GGX_E_avg,
-    //                         OWL_TEXTURE_LINEAR,
-    //                         OWL_COLOR_SPACE_LINEAR,
-    //                         OWL_TEXTURE_CLAMP);
-    // OWLTexture GGX_E_LOOKUP = owlTexture2DCreate(OD.context,
-    //                         OWL_TEXEL_FORMAT_R32F,
-    //                         GGX_E_size[0],GGX_E_size[1],
-    //                         GGX_E,
-    //                         OWL_TEXTURE_LINEAR,
-    //                         OWL_TEXTURE_CLAMP,
-    //                         OWL_COLOR_SPACE_LINEAR);
-    // launchParamsSetTexture(OD.launchParams, "GGX_E_AVG_LOOKUP", GGX_E_AVG_LOOKUP);
-    // launchParamsSetTexture(OD.launchParams, "GGX_E_LOOKUP",     GGX_E_LOOKUP);
-    
+    owlParamsSetRaw(OD.launchParams, "environmentMapID", &OD.LP.environmentMapID);
+    owlParamsSetRaw(OD.launchParams, "environmentMapRotation", &OD.LP.environmentMapRotation);
+
+    owlParamsSetBuffer(OD.launchParams, "environmentMapRows", OD.environmentMapRowsBuffer);
+    owlParamsSetBuffer(OD.launchParams, "environmentMapCols", OD.environmentMapColsBuffer);
+    owlParamsSetRaw(OD.launchParams, "environmentMapWidth", &OD.LP.environmentMapWidth);
+    owlParamsSetRaw(OD.launchParams, "environmentMapHeight", &OD.LP.environmentMapHeight);
+   
     OD.LP.numLightEntities = uint32_t(OD.lightEntities.size());
-    launchParamsSetRaw(OD.launchParams, "numLightEntities", &OD.LP.numLightEntities);
-    launchParamsSetRaw(OD.launchParams, "domeLightIntensity", &OD.LP.domeLightIntensity);
-    launchParamsSetRaw(OD.launchParams, "domeLightExposure", &OD.LP.domeLightExposure);
-    launchParamsSetRaw(OD.launchParams, "domeLightColor", &OD.LP.domeLightColor);
-    launchParamsSetRaw(OD.launchParams, "directClamp", &OD.LP.directClamp);
-    launchParamsSetRaw(OD.launchParams, "indirectClamp", &OD.LP.indirectClamp);
-    launchParamsSetRaw(OD.launchParams, "maxDiffuseDepth", &OD.LP.maxDiffuseDepth);
-    launchParamsSetRaw(OD.launchParams, "maxGlossyDepth", &OD.LP.maxGlossyDepth);
-    launchParamsSetRaw(OD.launchParams, "maxTransparencyDepth", &OD.LP.maxTransparencyDepth);
-    launchParamsSetRaw(OD.launchParams, "maxTransmissionDepth", &OD.LP.maxTransmissionDepth);
-    launchParamsSetRaw(OD.launchParams, "maxVolumeDepth", &OD.LP.maxVolumeDepth);
-    launchParamsSetRaw(OD.launchParams, "numLightSamples", &OD.LP.numLightSamples);
-    launchParamsSetRaw(OD.launchParams, "seed", &OD.LP.seed);
-    launchParamsSetRaw(OD.launchParams, "xPixelSamplingInterval", &OD.LP.xPixelSamplingInterval);
-    launchParamsSetRaw(OD.launchParams, "yPixelSamplingInterval", &OD.LP.yPixelSamplingInterval);
-    launchParamsSetRaw(OD.launchParams, "timeSamplingInterval", &OD.LP.timeSamplingInterval);
+    owlParamsSetRaw(OD.launchParams, "numLightEntities", &OD.LP.numLightEntities);
+    owlParamsSetRaw(OD.launchParams, "domeLightIntensity", &OD.LP.domeLightIntensity);
+    owlParamsSetRaw(OD.launchParams, "domeLightExposure", &OD.LP.domeLightExposure);
+    owlParamsSetRaw(OD.launchParams, "domeLightColor", &OD.LP.domeLightColor);
+    owlParamsSetRaw(OD.launchParams, "directClamp", &OD.LP.directClamp);
+    owlParamsSetRaw(OD.launchParams, "indirectClamp", &OD.LP.indirectClamp);
+    owlParamsSetRaw(OD.launchParams, "maxDiffuseDepth", &OD.LP.maxDiffuseDepth);
+    owlParamsSetRaw(OD.launchParams, "maxGlossyDepth", &OD.LP.maxGlossyDepth);
+    owlParamsSetRaw(OD.launchParams, "maxTransparencyDepth", &OD.LP.maxTransparencyDepth);
+    owlParamsSetRaw(OD.launchParams, "maxTransmissionDepth", &OD.LP.maxTransmissionDepth);
+    owlParamsSetRaw(OD.launchParams, "maxVolumeDepth", &OD.LP.maxVolumeDepth);
+    owlParamsSetRaw(OD.launchParams, "numLightSamples", &OD.LP.numLightSamples);
+    owlParamsSetRaw(OD.launchParams, "seed", &OD.LP.seed);
+    owlParamsSetRaw(OD.launchParams, "xPixelSamplingInterval", &OD.LP.xPixelSamplingInterval);
+    owlParamsSetRaw(OD.launchParams, "yPixelSamplingInterval", &OD.LP.yPixelSamplingInterval);
+    owlParamsSetRaw(OD.launchParams, "timeSamplingInterval", &OD.LP.timeSamplingInterval);
 
     OWLVarDecl trianglesGeomVars[] = {{/* sentinel to mark end of list */}};
     OD.trianglesGeomType = geomTypeCreate(OD.context, OWL_GEOM_TRIANGLES, sizeof(TrianglesGeomData), trianglesGeomVars,-1);
@@ -784,15 +731,15 @@ void initializeOptix(bool headless)
     OD.rayGen = rayGenCreate(OD.context,OD.module,"rayGen", sizeof(RayGenData), rayGenVars,-1);
     owlRayGenSet1i(OD.rayGen, "deviceCount",  numGPUsFound);
 
-    buildPrograms(OD.context);
+    owlBuildPrograms(OD.context);
     
     /* Temporary GAS. Required for certain older driver versions. */
     const int NUM_VERTICES = 1;
     vec3 vertices[NUM_VERTICES] = {{ 0.f, 0.f, 0.f }};
     const int NUM_INDICES = 1;
     ivec3 indices[NUM_INDICES] = {{ 0, 0, 0 }};    
-    OWLBuffer vertexBuffer = deviceBufferCreate(OD.context,OWL_FLOAT4,NUM_VERTICES,vertices);
-    OWLBuffer indexBuffer = deviceBufferCreate(OD.context,OWL_INT3,NUM_INDICES,indices);
+    OWLBuffer vertexBuffer = owlDeviceBufferCreate(OD.context,OWL_FLOAT4,NUM_VERTICES,vertices);
+    OWLBuffer indexBuffer = owlDeviceBufferCreate(OD.context,OWL_INT3,NUM_INDICES,indices);
     OWLGeom trianglesGeom = geomCreate(OD.context,OD.trianglesGeomType);
     trianglesSetVertices(trianglesGeom,vertexBuffer,NUM_VERTICES,sizeof(vec4),0);
     trianglesSetIndices(trianglesGeom,indexBuffer, NUM_INDICES,sizeof(ivec3),0);
@@ -800,10 +747,10 @@ void initializeOptix(bool headless)
     groupBuildAccel(OD.placeholderGroup);
 
     // build IAS
-    OWLGroup surfacesIAS = instanceGroupCreate(OD.context, 1);
-    instanceGroupSetChild(surfacesIAS, 0, OD.placeholderGroup); 
-    groupBuildAccel(surfacesIAS);
-    launchParamsSetGroup(OD.launchParams, "surfacesIAS", surfacesIAS);
+    OWLGroup IAS = instanceGroupCreate(OD.context, 1);
+    instanceGroupSetChild(IAS, 0, OD.placeholderGroup); 
+    groupBuildAccel(IAS);
+    owlParamsSetGroup(OD.launchParams, "IAS", IAS);
 
     OWLGeom userGeom = owlGeomCreate(OD.context, OD.volumeGeomType);
     owlGeomSetPrimCount(userGeom, 1);
@@ -813,14 +760,9 @@ void initializeOptix(bool headless)
     OD.placeholderUserGroup = owlUserGeomGroupCreate(OD.context, 1, &userGeom);
     groupBuildAccel(OD.placeholderUserGroup);
 
-    OWLGroup volumesIAS = instanceGroupCreate(OD.context, 1);
-    instanceGroupSetChild(volumesIAS, 0, OD.placeholderUserGroup); 
-    groupBuildAccel(volumesIAS);
-    launchParamsSetGroup(OD.launchParams, "volumesIAS", volumesIAS);
-
     // Build *SBT* required to trace the groups   
-    buildPipeline(OD.context);
-    buildSBT(OD.context);
+    owlBuildPipeline(OD.context);
+    owlBuildSBT(OD.context);
 
     // Setup denoiser
     configureDenoiser(OD.enableAlbedoGuide, OD.enableNormalGuide, OD.enableKernelPrediction);
@@ -830,6 +772,19 @@ void initializeOptix(bool headless)
     setDomeLightSky(glm::vec3(0,0,10));
 
     OptixData.LP.sceneBBMin = OptixData.LP.sceneBBMax = glm::vec3(0.f);
+
+    // To measure how long each card takes to trace for load balancing
+    int numGPUs = owlGetDeviceCount(OptixData.context);
+    for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) {
+        cudaSetDevice(deviceID);
+        cudaEvent_t start, stop;
+        cudaEventCreate(&start);
+        cudaEventCreate(&stop);
+        NVISII.events.push_back({start, stop});
+        NVISII.times.push_back(1.f);
+        NVISII.weights.push_back(1.f / float(numGPUs));
+    }
+    cudaSetDevice(0);
 }
 
 void initializeImgui()
@@ -895,6 +850,41 @@ void processCommandQueue()
     }
 }
 
+void updateGPUWeights()
+{
+    int num_gpus = owlGetDeviceCount(OptixData.context);
+    float target = 1.f / float(num_gpus);
+    
+    std::vector<float> signals(num_gpus);
+    float total_time = 0.f;
+    for (uint32_t i = 0; i < num_gpus; ++i) total_time += NVISII.times[i];
+    for (uint32_t i = 0; i < num_gpus; ++i) signals[i] = NVISII.times[i] / float(total_time);
+
+    std::vector<float> p_error(num_gpus);
+    for (uint32_t i = 0; i < num_gpus; ++i) p_error[i] = target - signals[i];
+
+    // update weights 
+    float pK = 1.f;
+    for (uint32_t i = 0; i < num_gpus; ++i) {
+        NVISII.weights[i] = max(NVISII.weights[i] + p_error[i], .001f);
+    }
+
+    std::vector<float> scan;
+    for (size_t i = 0; i <= num_gpus; ++i) {
+        if (i == 0) scan.push_back(0.f);
+        else scan.push_back(scan[i - 1] + NVISII.weights[i - 1]);
+    }
+
+    // std::cout<<"Scan: ";
+    for (size_t i = 0; i <= num_gpus; ++i) {
+        scan[i] /= scan[num_gpus];
+        // std::cout<<scan[i] << " ";
+    }
+    // std::cout<<std::endl;
+
+    owlBufferUpload(OptixData.assignmentBuffer, scan.data());
+}
+
 void setCameraEntity(Entity* camera_entity)
 {
     if (!camera_entity) {
@@ -994,36 +984,6 @@ void setDomeLightSky(vec3 sunPos, vec3 skyTint, float atmosphereThickness, float
         OptixData.proceduralSkyTexture = owlTexture2DCreate(OptixData.context, OWL_TEXEL_FORMAT_RGBA32F, width, height, texels.data());
         owlParamsSetTexture(OptixData.launchParams, "proceduralSkyTexture", OptixData.proceduralSkyTexture);
 
-        // float invWidth = 1.f / float(width);
-        // float invHeight = 1.f / float(height);
-        // float invjacobian = width * height / float(4 * M_PI);
-
-        // auto rows = std::vector<float>(height);
-        // auto cols = std::vector<float>(width * height);
-        // for (int y = 0, i = 0; y < height; y++) {
-        //     for (int x = 0; x < width; x++, i++) {
-        //         cols[i] = std::max(texels[i].r, std::max(texels[i].g, texels[i].b)) + ((x > 0) ? cols[i - 1] : 0.f);
-        //     }
-        //     rows[y] = cols[i - 1] + ((y > 0) ? rows[y - 1] : 0.0f);
-        //     // normalize the pdf for this scanline (if it was non-zero)
-        //     if (cols[i - 1] > 0) {
-        //         for (int x = 0; x < width; x++) {
-        //             cols[i - width + x] /= cols[i - 1];
-        //         }
-        //     }
-        // }
-
-        // // normalize the pdf across all scanlines
-        // for (int y = 0; y < height; y++)
-        //     rows[y] /= rows[height - 1];
-        
-        // if (OptixData.environmentMapRowsBuffer) owlBufferRelease(OptixData.environmentMapRowsBuffer);
-        // if (OptixData.environmentMapColsBuffer) owlBufferRelease(OptixData.environmentMapColsBuffer);
-        // OptixData.environmentMapRowsBuffer = owlDeviceBufferCreate(OptixData.context, OWL_USER_TYPE(float), height, rows.data());
-        // OptixData.environmentMapColsBuffer = owlDeviceBufferCreate(OptixData.context, OWL_USER_TYPE(float), width * height, cols.data());
-        // OptixData.LP.environmentMapWidth = width;
-        // OptixData.LP.environmentMapHeight = height;  
-
         OptixData.LP.environmentMapWidth = 0;
         OptixData.LP.environmentMapHeight = 0;  
         resetAccumulation();
@@ -1102,7 +1062,7 @@ void setIndirectLightingClamp(float clamp)
 {
     clamp = std::max(float(clamp), float(0.f));
     OptixData.LP.indirectClamp = clamp;
-    launchParamsSetRaw(OptixData.launchParams, "indirectClamp", &OptixData.LP.indirectClamp);
+    owlParamsSetRaw(OptixData.launchParams, "indirectClamp", &OptixData.LP.indirectClamp);
     resetAccumulation();
 }
 
@@ -1110,7 +1070,7 @@ void setDirectLightingClamp(float clamp)
 {
     clamp = std::max(float(clamp), float(0.f));
     OptixData.LP.directClamp = clamp;
-    launchParamsSetRaw(OptixData.launchParams, "directClamp", &OptixData.LP.directClamp);
+    owlParamsSetRaw(OptixData.launchParams, "directClamp", &OptixData.LP.directClamp);
     resetAccumulation();
 }
 
@@ -1127,11 +1087,11 @@ void setMaxBounceDepth(
     OptixData.LP.maxTransmissionDepth = transmissionDepth;
     OptixData.LP.maxVolumeDepth = volumeDepth;
     
-    launchParamsSetRaw(OptixData.launchParams, "maxDiffuseDepth", &OptixData.LP.maxDiffuseDepth);
-    launchParamsSetRaw(OptixData.launchParams, "maxGlossyDepth", &OptixData.LP.maxGlossyDepth);
-    launchParamsSetRaw(OptixData.launchParams, "maxTransparencyDepth", &OptixData.LP.maxTransparencyDepth);
-    launchParamsSetRaw(OptixData.launchParams, "maxTransmissionDepth", &OptixData.LP.maxTransmissionDepth);
-    launchParamsSetRaw(OptixData.launchParams, "maxVolumeDepth", &OptixData.LP.maxVolumeDepth);
+    owlParamsSetRaw(OptixData.launchParams, "maxDiffuseDepth", &OptixData.LP.maxDiffuseDepth);
+    owlParamsSetRaw(OptixData.launchParams, "maxGlossyDepth", &OptixData.LP.maxGlossyDepth);
+    owlParamsSetRaw(OptixData.launchParams, "maxTransparencyDepth", &OptixData.LP.maxTransparencyDepth);
+    owlParamsSetRaw(OptixData.launchParams, "maxTransmissionDepth", &OptixData.LP.maxTransmissionDepth);
+    owlParamsSetRaw(OptixData.launchParams, "maxVolumeDepth", &OptixData.LP.maxVolumeDepth);
     resetAccumulation();
 }
 
@@ -1146,7 +1106,7 @@ void setLightSampleCount(uint32_t count)
             std::string("Error: number of light samples must be between 1 and ") 
             + std::to_string(MAX_LIGHT_SAMPLES));
     OptixData.LP.numLightSamples = count;
-    launchParamsSetRaw(OptixData.launchParams, "numLightSamples", &OptixData.LP.numLightSamples);
+    owlParamsSetRaw(OptixData.launchParams, "numLightSamples", &OptixData.LP.numLightSamples);
     resetAccumulation();
 }
 
@@ -1154,15 +1114,15 @@ void samplePixelArea(vec2 xSampleInterval, vec2 ySampleInterval)
 {
     OptixData.LP.xPixelSamplingInterval = xSampleInterval;
     OptixData.LP.yPixelSamplingInterval = ySampleInterval;
-    launchParamsSetRaw(OptixData.launchParams, "xPixelSamplingInterval", &OptixData.LP.xPixelSamplingInterval);
-    launchParamsSetRaw(OptixData.launchParams, "yPixelSamplingInterval", &OptixData.LP.yPixelSamplingInterval);
+    owlParamsSetRaw(OptixData.launchParams, "xPixelSamplingInterval", &OptixData.LP.xPixelSamplingInterval);
+    owlParamsSetRaw(OptixData.launchParams, "yPixelSamplingInterval", &OptixData.LP.yPixelSamplingInterval);
     resetAccumulation();
 }
 
 void sampleTimeInterval(vec2 sampleTimeInterval)
 {
     OptixData.LP.timeSamplingInterval = sampleTimeInterval;
-    launchParamsSetRaw(OptixData.launchParams, "timeSamplingInterval", &OptixData.LP.timeSamplingInterval);
+    owlParamsSetRaw(OptixData.launchParams, "timeSamplingInterval", &OptixData.LP.timeSamplingInterval);
     resetAccumulation();
 }
 
@@ -1188,8 +1148,8 @@ void updateComponents()
     anyUpdated |= Texture::areAnyDirty();
     anyUpdated |= Entity::areAnyDirty();
     anyUpdated |= Volume::areAnyDirty();
-
     if (!anyUpdated) return;
+
     resetAccumulation();
     
     std::recursive_mutex dummyMutex;
@@ -1222,11 +1182,11 @@ void updateComponents()
             if (m->getTriangleIndices().size() == 0) throw std::runtime_error("ERROR: indices is 0");
 
             // Next, allocate resources for the new mesh.
-            OD.vertexLists[m->getAddress()]  = deviceBufferCreate(OD.context, OWL_USER_TYPE(vec3), m->getVertices().size(), m->getVertices().data());
-            OD.normalLists[m->getAddress()]   = deviceBufferCreate(OD.context, OWL_USER_TYPE(vec4), m->getNormals().size(), m->getNormals().data());
-            OD.tangentLists[m->getAddress()]   = deviceBufferCreate(OD.context, OWL_USER_TYPE(vec4), m->getTangents().size(), m->getTangents().data());
-            OD.texCoordLists[m->getAddress()] = deviceBufferCreate(OD.context, OWL_USER_TYPE(vec2), m->getTexCoords().size(), m->getTexCoords().data());
-            OD.indexLists[m->getAddress()]   = deviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), m->getTriangleIndices().size(), m->getTriangleIndices().data());
+            OD.vertexLists[m->getAddress()]  = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(vec3), m->getVertices().size(), m->getVertices().data());
+            OD.normalLists[m->getAddress()]   = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(vec4), m->getNormals().size(), m->getNormals().data());
+            OD.tangentLists[m->getAddress()]   = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(vec4), m->getTangents().size(), m->getTangents().data());
+            OD.texCoordLists[m->getAddress()] = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(vec2), m->getTexCoords().size(), m->getTexCoords().data());
+            OD.indexLists[m->getAddress()]   = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), m->getTriangleIndices().size(), m->getTriangleIndices().data());
             
             // Create geometry and build BLAS
             OD.surfaceGeomList[m->getAddress()] = geomCreate(OD.context, OD.trianglesGeomType);
@@ -1236,13 +1196,13 @@ void updateComponents()
             groupBuildAccel(OD.surfaceBlasList[m->getAddress()]);          
         }
 
-        bufferUpload(OD.vertexListsBuffer, OD.vertexLists.data());
-        bufferUpload(OD.texCoordListsBuffer, OD.texCoordLists.data());
-        bufferUpload(OD.indexListsBuffer, OD.indexLists.data());
-        bufferUpload(OD.normalListsBuffer, OD.normalLists.data());
-        bufferUpload(OD.tangentListsBuffer, OD.tangentLists.data());
+        owlBufferUpload(OD.vertexListsBuffer, OD.vertexLists.data());
+        owlBufferUpload(OD.texCoordListsBuffer, OD.texCoordLists.data());
+        owlBufferUpload(OD.indexListsBuffer, OD.indexLists.data());
+        owlBufferUpload(OD.normalListsBuffer, OD.normalLists.data());
+        owlBufferUpload(OD.tangentListsBuffer, OD.tangentLists.data());
         Mesh::updateComponents();
-        bufferUpload(OptixData.meshBuffer, Mesh::getFrontStruct());
+        owlBufferUpload(OptixData.meshBuffer, Mesh::getFrontStruct());
     }    
 
     // Manage Volumes: Build / Rebuild BLAS
@@ -1260,24 +1220,20 @@ void updateComponents()
             // Next, allocate resources for the new volume.
             auto gridHdlPtr = v->getNanoVDBGridHandle();
             const nanovdb::FloatGrid* grid = reinterpret_cast<nanovdb::FloatGrid*>(gridHdlPtr.get()->data());
-            std::cout<<grid->checksum()<<std::endl;
             nanovdb::isValid(*grid, true, true);
 
             // auto acc = grid->tree().getAccessor();
             // auto bbox = tree.root().bbox();
             auto bbox = grid->tree().bbox().asReal<float>();
             // int nodecount = grid->tree().nodeCount(3);
-            // std::cout<<nodecount<<std::endl;
-            std::cout<<bbox.min()[0]<<bbox.min()[1]<<bbox.min()[2]<<bbox.max()[0]<<bbox.max()[1]<<bbox.max()[2]<<std::endl;
 
             OD.volumeHandles[v->getAddress()] = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint8_t), gridHdlPtr.get()->size(), nullptr);
             owlBufferUpload(OD.volumeHandles[v->getAddress()], gridHdlPtr.get()->data());
-            printf("%hhx\n",gridHdlPtr.get()->data()[0]);
+            // printf("%hhx\n",gridHdlPtr.get()->data()[0]);
             const void* d_gridData = owlBufferGetPointer(OD.volumeHandles[v->getAddress()], 0);
             uint8_t first_byte;
             cudaMemcpy((void*)&first_byte, d_gridData, 1, cudaMemcpyDeviceToHost);
-            printf("%hhx\n",first_byte);
-
+            // printf("%hhx\n",first_byte);
 
             // Create geometry and build BLAS
             uint32_t volumeID = v->getAddress();
@@ -1299,19 +1255,11 @@ void updateComponents()
     // Manage Entities: Build / Rebuild TLAS
     auto dirtyEntities = Entity::getDirtyEntities();
     if (dirtyEntities.size() > 0) {
-        // Surface instances
-        std::vector<OWLGroup> surfaceInstances;
-        std::vector<glm::mat4> t0SurfaceTransforms;
-        std::vector<glm::mat4> t1SurfaceTransforms;
-        std::vector<uint32_t> surfaceInstanceToEntity;
-        
-        // Volume instances
-        std::vector<OWLGroup> volumeInstances;
-        std::vector<glm::mat4> t0VolumeTransforms;
-        std::vector<glm::mat4> t1VolumeTransforms;
-        std::vector<uint32_t> volumeInstanceToEntity;
-
-        // Todo: curves...
+        std::vector<OWLGroup> instances;
+        std::vector<glm::mat4> t0Transforms;
+        std::vector<glm::mat4> t1Transforms;
+        std::vector<uint8_t> masks;
+        std::vector<uint32_t> instanceToEntity;
 
         // Aggregate instanced geometry and transformations 
         Entity* entities = Entity::getFront();
@@ -1330,6 +1278,14 @@ void updateComponents()
             // Get instance transformation
             glm::mat4 prevLocalToWorld = entities[eid].getTransform()->getLocalToWorldMatrix(/*previous = */true);
             glm::mat4 localToWorld = entities[eid].getTransform()->getLocalToWorldMatrix(/*previous = */false);
+            t0Transforms.push_back(prevLocalToWorld);
+            t1Transforms.push_back(localToWorld);
+
+            // Get instance mask
+            masks.push_back(entities[eid].getStruct().flags);
+
+            // Indirection from instance back to entity ID
+            instanceToEntity.push_back(eid);
 
             // Add any instanced mesh geometry to the list
             if (entities[eid].getMesh()) {
@@ -1340,90 +1296,63 @@ void updateComponents()
                     // Mark it as dirty. It should be available in a subsequent frame
                     entities[eid].getMesh()->markDirty(); return; 
                 }
-                surfaceInstances.push_back(blas);
-                surfaceInstanceToEntity.push_back(eid);
-                t0SurfaceTransforms.push_back(prevLocalToWorld);
-                t1SurfaceTransforms.push_back(localToWorld);
+                instances.push_back(blas);
             }
             
             // Add any instanced volume geometry to the list
-            if (entities[eid].getVolume()) {
+            else if (entities[eid].getVolume()) {
                 uint32_t address = entities[eid].getVolume()->getAddress();
                 OWLGroup blas = OD.volumeBlasList[address];
                 if (!blas) {
                     // Same as meshes, if BLAS doesn't exist, force BLAS build and try again.
                     entities[eid].getMesh()->markDirty(); return; 
                 }
-                volumeInstances.push_back(blas);
-                volumeInstanceToEntity.push_back(eid);
-                t0VolumeTransforms.push_back(prevLocalToWorld);
-                t1VolumeTransforms.push_back(localToWorld);
-            }     
+                instances.push_back(blas);
+            } 
+            
+            else {
+                throw std::runtime_error("Internal Error, renderable entity has no mesh or volume components!?");
+            }   
         }
 
-        std::vector<owl4x3f>     t0OwlSurfaceTransforms;
-        std::vector<owl4x3f>     t1OwlSurfaceTransforms;
-        std::vector<owl4x3f>     t0OwlVolumeTransforms;
-        std::vector<owl4x3f>     t1OwlVolumeTransforms;
-        auto oldSurfaceIAS = OD.surfacesIAS;
-        auto oldVolumeIAS = OD.volumesIAS;
+        std::vector<uint8_t>     owlVisibilityMasks;
+        std::vector<owl4x3f>     t0OwlTransforms;
+        std::vector<owl4x3f>     t1OwlTransforms;
+        auto oldIAS = OD.IAS;
         
-        // If no surfaces instanced, insert an unhittable placeholder.
-        // (required for certain older driver versions)
-        if (surfaceInstances.size() == 0) {
-            OD.surfacesIAS = instanceGroupCreate(OD.context, 1);
-            instanceGroupSetChild(OD.surfacesIAS, 0, OD.placeholderGroup); 
-            groupBuildAccel(OD.surfacesIAS);
-        }
-
-        // If no volumes instanced, insert an unhittable placeholder.
+        // If no objects are instanced, insert an unhittable placeholder.
         // (required for certain older driver versions)
-        if (volumeInstances.size() == 0) {
-            OD.volumesIAS = instanceGroupCreate(OD.context, 1);
-            instanceGroupSetChild(OD.volumesIAS, 0, OD.placeholderUserGroup); 
-            groupBuildAccel(OD.volumesIAS);
+        if (instances.size() == 0) {
+            OD.IAS = instanceGroupCreate(OD.context, 1);
+            instanceGroupSetChild(OD.IAS, 0, OD.placeholderGroup); 
+            groupBuildAccel(OD.IAS);
         }
 
-        // Set surface transforms to IAS, upload surface instance to entity map
-        if (surfaceInstances.size() > 0) {
-            OD.surfacesIAS = instanceGroupCreate(OD.context, surfaceInstances.size());
-            for (uint32_t iid = 0; iid < surfaceInstances.size(); ++iid) {
-                instanceGroupSetChild(OD.surfacesIAS, iid, surfaceInstances[iid]);                 
-                t0OwlSurfaceTransforms.push_back(glmToOWL(t0SurfaceTransforms[iid]));
-                t1OwlSurfaceTransforms.push_back(glmToOWL(t1SurfaceTransforms[iid]));
+        // Set instance transforms and masks, upload instance to entity map
+        if (instances.size() > 0) {
+            OD.IAS = instanceGroupCreate(OD.context, instances.size());
+            for (uint32_t iid = 0; iid < instances.size(); ++iid) {
+                instanceGroupSetChild(OD.IAS, iid, instances[iid]);                 
+                t0OwlTransforms.push_back(glmToOWL(t0Transforms[iid]));
+                t1OwlTransforms.push_back(glmToOWL(t1Transforms[iid]));
+                owlVisibilityMasks.push_back(masks[iid]);
             }            
-            owlInstanceGroupSetTransforms(OD.surfacesIAS,0,(const float*)t0OwlSurfaceTransforms.data());
-            owlInstanceGroupSetTransforms(OD.surfacesIAS,1,(const float*)t1OwlSurfaceTransforms.data());
-            bufferResize(OD.surfaceInstanceToEntityBuffer, surfaceInstanceToEntity.size());
-            bufferUpload(OD.surfaceInstanceToEntityBuffer, surfaceInstanceToEntity.data());
+            owlInstanceGroupSetTransforms(OD.IAS,0,(const float*)t0OwlTransforms.data());
+            owlInstanceGroupSetTransforms(OD.IAS,1,(const float*)t1OwlTransforms.data());
+            owlInstanceGroupSetVisibilityMasks(OD.IAS, owlVisibilityMasks.data());
+            owlBufferResize(OD.instanceToEntityBuffer, instanceToEntity.size());
+            owlBufferUpload(OD.instanceToEntityBuffer, instanceToEntity.data());
         }       
 
-        // Set volume transforms to IAS, upload volume instance to entity map
-        if (volumeInstances.size() > 0) {
-            OD.volumesIAS = instanceGroupCreate(OD.context, volumeInstances.size());
-            for (uint32_t iid = 0; iid < volumeInstances.size(); ++iid) {
-                instanceGroupSetChild(OD.volumesIAS, iid, volumeInstances[iid]);                 
-                t0OwlVolumeTransforms.push_back(glmToOWL(t0VolumeTransforms[iid]));
-                t1OwlVolumeTransforms.push_back(glmToOWL(t1VolumeTransforms[iid]));
-            }            
-            owlInstanceGroupSetTransforms(OD.volumesIAS,0,(const float*)t0OwlVolumeTransforms.data());
-            owlInstanceGroupSetTransforms(OD.volumesIAS,1,(const float*)t1OwlVolumeTransforms.data());
-            bufferResize(OD.volumeInstanceToEntityBuffer, volumeInstanceToEntity.size());
-            bufferUpload(OD.volumeInstanceToEntityBuffer, volumeInstanceToEntity.data());
-        }
-
         // Build IAS
-        groupBuildAccel(OD.volumesIAS);
-        launchParamsSetGroup(OD.launchParams, "volumesIAS", OD.volumesIAS);
-        groupBuildAccel(OD.surfacesIAS);
-        launchParamsSetGroup(OD.launchParams, "surfacesIAS", OD.surfacesIAS);
-
+        groupBuildAccel(OD.IAS);
+        owlParamsSetGroup(OD.launchParams, "IAS", OD.IAS);
+        
         // Now that IAS have changed, we need to rebuild SBT
-        buildSBT(OD.context);
+        owlBuildSBT(OD.context);
 
         // Release any old IAS (TODO, don't rebuild if entity edit doesn't effect IAS...)
-        if (oldSurfaceIAS) {owlGroupRelease(oldSurfaceIAS);}
-        if (oldVolumeIAS) {owlGroupRelease(oldVolumeIAS);}
+        if (oldIAS) {owlGroupRelease(oldIAS);}
     
         // Aggregate entities that are light sources (todo: consider emissive volumes...)
         OD.lightEntities.resize(0);
@@ -1431,22 +1360,24 @@ void updateComponents()
             if (!entities[eid].isInitialized()) continue;
             if (!entities[eid].getTransform()) continue;
             if (!entities[eid].getLight()) continue;
-            if (!entities[eid].getMesh()) continue;
+            // Edit: adding support for "point" lights that have no meshes
+            // if (!entities[eid].getMesh()) continue; 
             OD.lightEntities.push_back(eid);
         }
-        bufferResize(OptixData.lightEntitiesBuffer, OD.lightEntities.size());
-        bufferUpload(OptixData.lightEntitiesBuffer, OD.lightEntities.data());
+        owlBufferResize(OptixData.lightEntitiesBuffer, OD.lightEntities.size());
+        owlBufferUpload(OptixData.lightEntitiesBuffer, OD.lightEntities.data());
         OD.LP.numLightEntities = uint32_t(OD.lightEntities.size());
-        launchParamsSetRaw(OD.launchParams, "numLightEntities", &OD.LP.numLightEntities);
+        owlParamsSetRaw(OD.launchParams, "numLightEntities", &OD.LP.numLightEntities);
 
         // Finally, upload entity structs to the GPU.
         Entity::updateComponents();
-        bufferUpload(OptixData.entityBuffer,    Entity::getFrontStruct());
+        owlBufferUpload(OptixData.entityBuffer,    Entity::getFrontStruct());
     }
 
     // Manage textures and materials
     if (Texture::areAnyDirty() || Material::areAnyDirty()) {
-        std::lock_guard<std::recursive_mutex> material_lock(Material::areAnyDirty()   ? *Material::getEditMutex().get() : dummyMutex);
+        std::lock_guard<std::recursive_mutex> material_lock(*Material::getEditMutex().get());
+        std::lock_guard<std::recursive_mutex> texture_lock(*Texture::getEditMutex().get());
 
         // Allocate cuda textures for all texture components
         auto dirtyTextures = Texture::getDirtyTextures();
@@ -1492,7 +1423,6 @@ void updateComponents()
                     colorSpace
                 );
             }
-            
         }
 
         // Create additional cuda textures for material constants
@@ -1568,32 +1498,19 @@ void updateComponents()
             }
 
             Material::updateComponents();
-            bufferUpload(OptixData.materialBuffer, OptixData.materialStructs.data());
+            owlBufferUpload(OptixData.materialBuffer, OptixData.materialStructs.data());
         }
         
-        bufferUpload(OD.textureObjectsBuffer, OD.textureObjects.data());
+        owlBufferUpload(OD.textureObjectsBuffer, OD.textureObjects.data());
         Texture::updateComponents();
         memcpy(OptixData.textureStructs.data(), Texture::getFrontStruct(), Texture::getCount() * sizeof(TextureStruct));
-        bufferUpload(OptixData.textureBuffer, OptixData.textureStructs.data());
+        owlBufferUpload(OptixData.textureBuffer, OptixData.textureStructs.data());
     }
     
     // Manage transforms
     auto dirtyTransforms = Transform::getDirtyTransforms();
     if (dirtyTransforms.size() > 0) {
         Transform::updateComponents();
-        
-        // // for each device
-        // for (uint32_t id = 0; id < owlGetDeviceCount(OptixData.context); ++id)
-        // {
-        //     cudaSetDevice(id);
-
-        //     TransformStruct* devTransforms = (TransformStruct*)owlBufferGetPointer(OptixData.transformBuffer, id);
-        //     TransformStruct* transformStructs = Transform::getFrontStruct();
-        //     for (auto &t : dirtyTransforms) {
-        //         if (!t->isInitialized()) continue;
-        //         CUDA_CHECK(cudaMemcpy(&devTransforms[t->getAddress()], &transformStructs[t->getAddress()], sizeof(TransformStruct), cudaMemcpyHostToDevice));
-        //     }
-        // }
 
         // cudaSetDevice(0);
         owlBufferUpload(OptixData.transformBuffer, Transform::getFrontStruct());
@@ -1602,51 +1519,67 @@ void updateComponents()
     // Manage Cameras
     if (Camera::areAnyDirty()) {
         Camera::updateComponents();
-        bufferUpload(OptixData.cameraBuffer,    Camera::getFrontStruct());
+        owlBufferUpload(OptixData.cameraBuffer,    Camera::getFrontStruct());
     }    
 
     // Manage lights
     if (Light::areAnyDirty()) {
         Light::updateComponents();
-        bufferUpload(OptixData.lightBuffer,     Light::getFrontStruct());
+        owlBufferUpload(OptixData.lightBuffer,     Light::getFrontStruct());
     }
 }
 
 void updateLaunchParams()
 {
-    launchParamsSetRaw(OptixData.launchParams, "frameID", &OptixData.LP.frameID);
-    launchParamsSetRaw(OptixData.launchParams, "frameSize", &OptixData.LP.frameSize);
-    launchParamsSetRaw(OptixData.launchParams, "cameraEntity", &OptixData.LP.cameraEntity);
-    launchParamsSetRaw(OptixData.launchParams, "domeLightIntensity", &OptixData.LP.domeLightIntensity);
-    launchParamsSetRaw(OptixData.launchParams, "domeLightExposure", &OptixData.LP.domeLightExposure);
-    launchParamsSetRaw(OptixData.launchParams, "domeLightColor", &OptixData.LP.domeLightColor);
-    launchParamsSetRaw(OptixData.launchParams, "renderDataMode", &OptixData.LP.renderDataMode);
-    launchParamsSetRaw(OptixData.launchParams, "renderDataBounce", &OptixData.LP.renderDataBounce);
-    launchParamsSetRaw(OptixData.launchParams, "enableDomeSampling", &OptixData.LP.enableDomeSampling);
-    launchParamsSetRaw(OptixData.launchParams, "seed", &OptixData.LP.seed);
-    launchParamsSetRaw(OptixData.launchParams, "proj", &OptixData.LP.proj);
-    launchParamsSetRaw(OptixData.launchParams, "viewT0", &OptixData.LP.viewT0);
-    launchParamsSetRaw(OptixData.launchParams, "viewT1", &OptixData.LP.viewT1);
-
-    launchParamsSetRaw(OptixData.launchParams, "environmentMapID", &OptixData.LP.environmentMapID);
-    launchParamsSetRaw(OptixData.launchParams, "environmentMapRotation", &OptixData.LP.environmentMapRotation);
-    launchParamsSetBuffer(OptixData.launchParams, "environmentMapRows", OptixData.environmentMapRowsBuffer);
-    launchParamsSetBuffer(OptixData.launchParams, "environmentMapCols", OptixData.environmentMapColsBuffer);
-    launchParamsSetRaw(OptixData.launchParams, "environmentMapWidth", &OptixData.LP.environmentMapWidth);
-    launchParamsSetRaw(OptixData.launchParams, "environmentMapHeight", &OptixData.LP.environmentMapHeight);
-    launchParamsSetRaw(OptixData.launchParams, "sceneBBMin", &OptixData.LP.sceneBBMin);
-    launchParamsSetRaw(OptixData.launchParams, "sceneBBMax", &OptixData.LP.sceneBBMax);
+    owlParamsSetRaw(OptixData.launchParams, "frameID", &OptixData.LP.frameID);
+    owlParamsSetRaw(OptixData.launchParams, "frameSize", &OptixData.LP.frameSize);
+    owlParamsSetRaw(OptixData.launchParams, "cameraEntity", &OptixData.LP.cameraEntity);
+    owlParamsSetRaw(OptixData.launchParams, "domeLightIntensity", &OptixData.LP.domeLightIntensity);
+    owlParamsSetRaw(OptixData.launchParams, "domeLightExposure", &OptixData.LP.domeLightExposure);
+    owlParamsSetRaw(OptixData.launchParams, "domeLightColor", &OptixData.LP.domeLightColor);
+    owlParamsSetRaw(OptixData.launchParams, "renderDataMode", &OptixData.LP.renderDataMode);
+    owlParamsSetRaw(OptixData.launchParams, "renderDataBounce", &OptixData.LP.renderDataBounce);
+    owlParamsSetRaw(OptixData.launchParams, "enableDomeSampling", &OptixData.LP.enableDomeSampling);
+    owlParamsSetRaw(OptixData.launchParams, "seed", &OptixData.LP.seed);
+    owlParamsSetRaw(OptixData.launchParams, "proj", &OptixData.LP.proj);
+    owlParamsSetRaw(OptixData.launchParams, "viewT0", &OptixData.LP.viewT0);
+    owlParamsSetRaw(OptixData.launchParams, "viewT1", &OptixData.LP.viewT1);
+
+    owlParamsSetRaw(OptixData.launchParams, "environmentMapID", &OptixData.LP.environmentMapID);
+    owlParamsSetRaw(OptixData.launchParams, "environmentMapRotation", &OptixData.LP.environmentMapRotation);
+    owlParamsSetBuffer(OptixData.launchParams, "environmentMapRows", OptixData.environmentMapRowsBuffer);
+    owlParamsSetBuffer(OptixData.launchParams, "environmentMapCols", OptixData.environmentMapColsBuffer);
+    owlParamsSetRaw(OptixData.launchParams, "environmentMapWidth", &OptixData.LP.environmentMapWidth);
+    owlParamsSetRaw(OptixData.launchParams, "environmentMapHeight", &OptixData.LP.environmentMapHeight);
+    owlParamsSetRaw(OptixData.launchParams, "sceneBBMin", &OptixData.LP.sceneBBMin);
+    owlParamsSetRaw(OptixData.launchParams, "sceneBBMax", &OptixData.LP.sceneBBMax);
 
     OptixData.LP.frameID ++;
 }
 
+// Update: This is still prohibitively slow. Official OptiX samples use host pinned memory. 
+// Moving to that approach...
+// // Different GPUs have different local framebuffers.
+// // This function combines those framebuffers on the CPU, then uploads results to device 0.
+void mergeFrameBuffers() {
+    // For multigpu setups, we currently render to zero-copy memory to merge on the host.
+    // So for now, just upload those results to device 0's combined unified frame buffers on the device
+    owlBufferUpload(OptixData.combinedFrameBuffer, owlBufferGetPointer(OptixData.frameBuffer, 0));
+    
+    if (OptixData.enableAlbedoGuide) {
+        owlBufferUpload(OptixData.combinedAlbedoBuffer, owlBufferGetPointer(OptixData.albedoBuffer, 0));
+    }
+
+    if (OptixData.enableNormalGuide) {
+        owlBufferUpload(OptixData.combinedNormalBuffer, owlBufferGetPointer(OptixData.normalBuffer, 0));
+    }
+}
+
 void denoiseImage() {
     synchronizeDevices();
 
     auto &OD = OptixData;
-    auto cudaStream = getStream(OD.context, 0);
-
-    CUdeviceptr frameBuffer = (CUdeviceptr) bufferGetPointer(OD.frameBuffer, 0);
+    auto cudaStream = owlContextGetStream(OD.context, 0);
 
     std::vector<OptixImage2D> inputLayers;
     OptixImage2D colorLayer;
@@ -1655,7 +1588,7 @@ void denoiseImage() {
     colorLayer.format = OPTIX_PIXEL_FORMAT_FLOAT4;
     colorLayer.pixelStrideInBytes = 4 * sizeof(float);
     colorLayer.rowStrideInBytes   = OD.LP.frameSize.x * 4 * sizeof(float);
-    colorLayer.data   = (CUdeviceptr) bufferGetPointer(OD.frameBuffer, 0);
+    colorLayer.data   = (CUdeviceptr) owlBufferGetPointer(OD.combinedFrameBuffer, 0);
     inputLayers.push_back(colorLayer);
 
     OptixImage2D albedoLayer;
@@ -1664,7 +1597,7 @@ void denoiseImage() {
     albedoLayer.format = OPTIX_PIXEL_FORMAT_FLOAT4;
     albedoLayer.pixelStrideInBytes = 4 * sizeof(float);
     albedoLayer.rowStrideInBytes   = OD.LP.frameSize.x * 4 * sizeof(float);
-    albedoLayer.data   = (CUdeviceptr) bufferGetPointer(OD.albedoBuffer, 0);
+    albedoLayer.data   = (CUdeviceptr) owlBufferGetPointer(OD.combinedAlbedoBuffer, 0);
     if (OD.enableAlbedoGuide) inputLayers.push_back(albedoLayer);
 
     OptixImage2D normalLayer;
@@ -1673,7 +1606,7 @@ void denoiseImage() {
     normalLayer.format = OPTIX_PIXEL_FORMAT_FLOAT4;
     normalLayer.pixelStrideInBytes = 4 * sizeof(float);
     normalLayer.rowStrideInBytes   = OD.LP.frameSize.x * 4 * sizeof(float);
-    normalLayer.data   = (CUdeviceptr) bufferGetPointer(OD.normalBuffer, 0);
+    normalLayer.data   = (CUdeviceptr) owlBufferGetPointer(OD.combinedNormalBuffer, 0);
     if (OD.enableNormalGuide) inputLayers.push_back(normalLayer);
 
     OptixImage2D outputLayer = colorLayer; // can I get away with this?
@@ -1692,8 +1625,8 @@ void denoiseImage() {
             OD.denoiser, 
             cudaStream, 
             &inputLayers[0], 
-            (CUdeviceptr) bufferGetPointer(OD.hdrIntensityBuffer, 0),
-            (CUdeviceptr) bufferGetPointer(OD.denoiserScratchBuffer, 0),
+            (CUdeviceptr) owlBufferGetPointer(OD.hdrIntensityBuffer, 0),
+            (CUdeviceptr) owlBufferGetPointer(OD.denoiserScratchBuffer, 0),
             scratchSizeInBytes));
     }
 
@@ -1703,50 +1636,115 @@ void denoiseImage() {
             OD.denoiser, 
             cudaStream, 
             &inputLayers[0], 
-            (CUdeviceptr) bufferGetPointer(OD.colorAvgBuffer, 0),
-            (CUdeviceptr) bufferGetPointer(OD.denoiserScratchBuffer, 0),
+            (CUdeviceptr) owlBufferGetPointer(OD.colorAvgBuffer, 0),
+            (CUdeviceptr) owlBufferGetPointer(OD.denoiserScratchBuffer, 0),
             scratchSizeInBytes));
     }
     #endif
 
     params.denoiseAlpha = 0;    // Don't touch alpha.
     params.blendFactor  = 0.0f; // Show the denoised image only.
-    params.hdrIntensity = (CUdeviceptr) bufferGetPointer(OD.hdrIntensityBuffer, 0);
+    params.hdrIntensity = (CUdeviceptr) owlBufferGetPointer(OD.hdrIntensityBuffer, 0);
     #ifdef USE_OPTIX72
-    params.hdrAverageColor = (CUdeviceptr) bufferGetPointer(OD.colorAvgBuffer, 0);
+    params.hdrAverageColor = (CUdeviceptr) owlBufferGetPointer(OD.colorAvgBuffer, 0);
     #endif
     
     OPTIX_CHECK(optixDenoiserInvoke(
         OD.denoiser,
         cudaStream,
         &params,
-        (CUdeviceptr) bufferGetPointer(OD.denoiserStateBuffer, 0),
+        (CUdeviceptr) owlBufferGetPointer(OD.denoiserStateBuffer, 0),
         OD.denoiserSizes.stateSizeInBytes,
         inputLayers.data(),
         inputLayers.size(),
         /* inputOffsetX */ 0,
         /* inputOffsetY */ 0,
         &outputLayer,
-        (CUdeviceptr) bufferGetPointer(OD.denoiserScratchBuffer, 0),
+        (CUdeviceptr) owlBufferGetPointer(OD.denoiserScratchBuffer, 0),
         scratchSizeInBytes
     ));
+}
 
-    synchronizeDevices();
+inline const char* getGLErrorString( GLenum error )
+{
+    switch( error )
+    {
+    case GL_NO_ERROR:            return "No error";
+    case GL_INVALID_ENUM:        return "Invalid enum";
+    case GL_INVALID_VALUE:       return "Invalid value";
+    case GL_INVALID_OPERATION:   return "Invalid operation";
+        //case GL_STACK_OVERFLOW:      return "Stack overflow";
+        //case GL_STACK_UNDERFLOW:     return "Stack underflow";
+    case GL_OUT_OF_MEMORY:       return "Out of memory";
+        //case GL_TABLE_TOO_LARGE:     return "Table too large";
+    default:                     return "Unknown GL error";
+    }
 }
 
+#define DO_GL_CHECK
+#ifdef DO_GL_CHECK
+#    define GL_CHECK( call )                                            \
+    do                                                                  \
+      {                                                                 \
+        call;                                                           \
+        GLenum err = glGetError();                                      \
+        if( err != GL_NO_ERROR )                                        \
+          {                                                             \
+            std::stringstream ss;                                       \
+            ss << "GL error " <<  getGLErrorString( err ) << " at "     \
+               << __FILE__  << "(" <<  __LINE__  << "): " << #call      \
+               << std::endl;                                            \
+            std::cerr << ss.str() << std::endl;                         \
+            throw std::runtime_error( ss.str().c_str() );               \
+          }                                                             \
+      }                                                                 \
+    while (0)
+
+
+#    define GL_CHECK_ERRORS( )                                          \
+    do                                                                  \
+      {                                                                 \
+        GLenum err = glGetError();                                      \
+        if( err != GL_NO_ERROR )                                        \
+          {                                                             \
+            std::stringstream ss;                                       \
+            ss << "GL error " <<  getGLErrorString( err ) << " at "     \
+               << __FILE__  << "(" <<  __LINE__  << ")";                \
+            std::cerr << ss.str() << std::endl;                         \
+            throw std::runtime_error( ss.str().c_str() );               \
+          }                                                             \
+      }                                                                 \
+    while (0)
+
+#else
+#    define GL_CHECK( call )   do { call; } while(0)
+#    define GL_CHECK_ERRORS( ) do { ;     } while(0)
+#endif
+
 void drawFrameBufferToWindow()
 {
     synchronizeDevices();
     glFlush();
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
 
     auto &OD = OptixData;
-    cudaGraphicsMapResources(1, &OD.cudaResourceTex);
-    const void* fbdevptr = bufferGetPointer(OD.frameBuffer,0);
-    cudaArray_t array;
-    cudaGraphicsSubResourceGetMappedArray(&array, OD.cudaResourceTex, 0, 0);
-    cudaMemcpyToArray(array, 0, 0, fbdevptr, OD.LP.frameSize.x *  OD.LP.frameSize.y  * sizeof(glm::vec4), cudaMemcpyDeviceToDevice);
-    cudaGraphicsUnmapResources(1, &OD.cudaResourceTex);
-    
+    const void* fbdevptr = owlBufferGetPointer(OD.combinedFrameBuffer,0);
+
+    if (OD.resourceSharingSuccessful) {
+        cudaGraphicsMapResources(1, &OD.cudaResourceTex);
+        cudaArray_t array;
+        cudaGraphicsSubResourceGetMappedArray(&array, OD.cudaResourceTex, 0, 0);
+        cudaMemcpyToArray(array, 0, 0, fbdevptr, OD.LP.frameSize.x *  OD.LP.frameSize.y  * sizeof(glm::vec4), cudaMemcpyDeviceToDevice);
+        cudaGraphicsUnmapResources(1, &OD.cudaResourceTex);
+    } else {
+        GL_CHECK(glBindTexture(GL_TEXTURE_2D, OD.imageTexID));
+        glEnable(GL_TEXTURE_2D);
+        GL_CHECK(glTexSubImage2D(GL_TEXTURE_2D,0,
+                                    0, 0,
+                                    OD.LP.frameSize.x, OD.LP.frameSize.y,
+                                    GL_RGBA, GL_FLOAT, fbdevptr));    
+    }
+
     // Draw pixels from optix frame buffer
     glEnable(GL_FRAMEBUFFER_SRGB); 
     glViewport(0, 0, OD.LP.frameSize.x, OD.LP.frameSize.y);
@@ -1759,7 +1757,6 @@ void drawFrameBufferToWindow()
     glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
             
     glDisable(GL_DEPTH_TEST);    
-    glBindTexture(GL_TEXTURE_2D, OD.imageTexID);
     
     // Draw texture to screen via immediate mode
     glEnable(GL_TEXTURE_2D);
@@ -1839,7 +1836,7 @@ void configureDenoiser(bool useAlbedoGuide, bool useNormalGuide, bool useKernelP
             "If normal guide is enabled, albedo guide must also be enabled.");
     }
 
-    enqueueCommand([useAlbedoGuide, useNormalGuide, useKernelPrediction](){
+    enqueueCommandAndWait([useAlbedoGuide, useNormalGuide, useKernelPrediction](){
         OptixData.enableAlbedoGuide = useAlbedoGuide;
         OptixData.enableNormalGuide = useNormalGuide;
         #ifdef USE_OPTIX70
@@ -1857,9 +1854,9 @@ void configureDenoiser(bool useAlbedoGuide, bool useNormalGuide, bool useKernelP
         if (!OptixData.colorAvgBuffer)
             OptixData.colorAvgBuffer = owlDeviceBufferCreate(OptixData.context, OWL_USER_TYPE(float), 4, nullptr);
         if (!OptixData.denoiserScratchBuffer)
-            OptixData.denoiserScratchBuffer = deviceBufferCreate(OptixData.context, OWL_USER_TYPE(void*), 1, nullptr);
+            OptixData.denoiserScratchBuffer = owlDeviceBufferCreate(OptixData.context, OWL_USER_TYPE(void*), 1, nullptr);
         if (!OptixData.denoiserStateBuffer)
-            OptixData.denoiserStateBuffer = deviceBufferCreate(OptixData.context, OWL_USER_TYPE(void*), 1, nullptr);
+            OptixData.denoiserStateBuffer = owlDeviceBufferCreate(OptixData.context, OWL_USER_TYPE(void*), 1, nullptr);
         
         // Setup denoiser
         OptixDenoiserOptions options;
@@ -1873,8 +1870,8 @@ void configureDenoiser(bool useAlbedoGuide, bool useNormalGuide, bool useKernelP
 
         if (OptixData.denoiser) optixDenoiserDestroy(OptixData.denoiser);
         
-        auto optixContext = getOptixContext(OptixData.context, 0);
-        auto cudaStream = getStream(OptixData.context, 0);
+        auto optixContext = owlContextGetOptixContext(OptixData.context, 0);
+        auto cudaStream = owlContextGetStream(OptixData.context, 0);
         OPTIX_CHECK(optixDenoiserCreate(optixContext, &options, &OptixData.denoiser));
 
         OptixDenoiserModelKind kind;
@@ -1907,9 +1904,9 @@ void configureDenoiser(bool useAlbedoGuide, bool useNormalGuide, bool useKernelP
             (cudaStream_t) cudaStream, 
             (unsigned int) OptixData.LP.frameSize.x, 
             (unsigned int) OptixData.LP.frameSize.y, 
-            (CUdeviceptr) bufferGetPointer(OptixData.denoiserStateBuffer, 0), 
+            (CUdeviceptr) owlBufferGetPointer(OptixData.denoiserStateBuffer, 0), 
             OptixData.denoiserSizes.stateSizeInBytes,
-            (CUdeviceptr) bufferGetPointer(OptixData.denoiserScratchBuffer, 0), 
+            (CUdeviceptr) owlBufferGetPointer(OptixData.denoiserScratchBuffer, 0), 
             scratchSizeInBytes
         );
     });
@@ -1922,7 +1919,7 @@ std::vector<float> readFrameBuffer() {
         int num_devices = getDeviceCount();
         synchronizeDevices();
 
-        const glm::vec4 *fb = (const glm::vec4*)bufferGetPointer(OptixData.frameBuffer,0);
+        const glm::vec4 *fb = (const glm::vec4*)owlBufferGetPointer(OptixData.combinedFrameBuffer,0);
         for (uint32_t test = 0; test < frameBuffer.size(); test += 4) {
             frameBuffer[test + 0] = fb[test / 4].r;
             frameBuffer[test + 1] = fb[test / 4].g;
@@ -1939,6 +1936,8 @@ std::vector<float> render(uint32_t width, uint32_t height, uint32_t samplesPerPi
     if ((width < 1) || (height < 1)) throw std::runtime_error("Error, invalid width/height");
     std::vector<float> frameBuffer(width * height * 4);
 
+    enqueueCommandAndWait([](){});
+
     enqueueCommandAndWait([&frameBuffer, width, height, samplesPerPixel, seed] () {
         if (!NVISII.headlessMode) {
             if ((width != WindowData.currentSize.x) || (height != WindowData.currentSize.y))
@@ -1955,6 +1954,7 @@ std::vector<float> render(uint32_t width, uint32_t height, uint32_t samplesPerPi
         resizeOptixFrameBuffer(width, height);
         resetAccumulation();
         updateComponents();
+        int numGPUs = owlGetDeviceCount(OptixData.context);
 
         for (uint32_t i = 0; i < samplesPerPixel; ++i) {
             // std::cout<<i<<std::endl;
@@ -1967,13 +1967,25 @@ std::vector<float> render(uint32_t width, uint32_t height, uint32_t samplesPerPi
             }
 
             updateLaunchParams();
-            owlLaunch2D(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, OptixData.launchParams);
-            if (OptixData.enableDenoiser)
-            {
-                denoiseImage();
+            for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) {
+                cudaSetDevice(deviceID);
+                cudaEventRecord(NVISII.events[deviceID].first);
+                owlAsyncLaunch2DOnDevice(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, deviceID, OptixData.launchParams);
+                cudaEventRecord(NVISII.events[deviceID].second);
+            }
+            for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) {
+                cudaEventSynchronize(NVISII.events[deviceID].second);
+                cudaEventElapsedTime(&NVISII.times[deviceID], NVISII.events[deviceID].first, NVISII.events[deviceID].second);
             }
+            updateGPUWeights();
+            mergeFrameBuffers();
 
             if (!NVISII.headlessMode) {
+                if (OptixData.enableDenoiser)
+                {
+                    denoiseImage();
+                }
+
                 drawFrameBufferToWindow();
                 glfwSetWindowTitle(WindowData.window, 
                     (std::to_string(i) + std::string("/") + std::to_string(samplesPerPixel)).c_str());
@@ -1993,12 +2005,14 @@ std::vector<float> render(uint32_t width, uint32_t height, uint32_t samplesPerPi
             std::cout<<"\r "<< samplesPerPixel << "/" << samplesPerPixel <<" - done!" << std::endl;
         }
 
-        synchronizeDevices();
-
-        const glm::vec4 *fb = (const glm::vec4*) bufferGetPointer(OptixData.frameBuffer,0);
-        cudaMemcpyAsync(frameBuffer.data(), fb, width * height * sizeof(glm::vec4), cudaMemcpyDeviceToHost);
+        if (OptixData.enableDenoiser)
+        {
+            denoiseImage();
+        }
 
         synchronizeDevices();
+        const glm::vec4 *fb = (const glm::vec4*) owlBufferGetPointer(OptixData.combinedFrameBuffer,0);
+        cudaMemcpyAsync(frameBuffer.data(), fb, width * height * sizeof(glm::vec4), cudaMemcpyDeviceToHost);
     });
 
     return frameBuffer;
@@ -2016,6 +2030,8 @@ std::vector<float> renderData(uint32_t width, uint32_t height, uint32_t startFra
 {
     std::vector<float> frameBuffer(width * height * 4);
 
+    enqueueCommandAndWait([](){});
+
     enqueueCommandAndWait([&frameBuffer, width, height, startFrame, frameCount, bounce, _option, seed] () {
         if (!NVISII.headlessMode) {
             if ((width != WindowData.currentSize.x) || (height != WindowData.currentSize.y))
@@ -2046,6 +2062,9 @@ std::vector<float> renderData(uint32_t width, uint32_t height, uint32_t startFra
         else if (option == std::string("normal")) {
             OptixData.LP.renderDataMode = RenderDataFlags::NORMAL;
         }
+        else if (option == std::string("tangent")) {
+            OptixData.LP.renderDataMode = RenderDataFlags::TANGENT;
+        }
         else if (option == std::string("entity_id")) {
             OptixData.LP.renderDataMode = RenderDataFlags::ENTITY_ID;
         }
@@ -2091,6 +2110,9 @@ std::vector<float> renderData(uint32_t width, uint32_t height, uint32_t startFra
         else if (option == std::string("heatmap")) {
             OptixData.LP.renderDataMode = RenderDataFlags::HEATMAP;
         }
+        else if (option == std::string("device_id")) {
+            OptixData.LP.renderDataMode = RenderDataFlags::DEVICE_ID;
+        }
         else {
             throw std::runtime_error(std::string("Error, unknown option : \"") + _option + std::string("\". ")
             + std::string("See documentation for available options"));
@@ -2101,6 +2123,7 @@ std::vector<float> renderData(uint32_t width, uint32_t height, uint32_t startFra
         OptixData.LP.renderDataBounce = bounce;
         OptixData.LP.seed = seed;
         updateComponents();
+        int numGPUs = owlGetDeviceCount(OptixData.context);
 
         for (uint32_t i = startFrame; i < frameCount; ++i) {
             // std::cout<<i<<std::endl;
@@ -2113,7 +2136,20 @@ std::vector<float> renderData(uint32_t width, uint32_t height, uint32_t startFra
             }
 
             updateLaunchParams();
-            owlLaunch2D(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, OptixData.launchParams);
+
+            for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) {
+                cudaSetDevice(deviceID);
+                cudaEventRecord(NVISII.events[deviceID].first);
+                owlAsyncLaunch2DOnDevice(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, deviceID, OptixData.launchParams);
+                cudaEventRecord(NVISII.events[deviceID].second);
+            }
+            for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) {
+                cudaEventSynchronize(NVISII.events[deviceID].second);
+                cudaEventElapsedTime(&NVISII.times[deviceID], NVISII.events[deviceID].first, NVISII.events[deviceID].second);
+            }
+            updateGPUWeights();
+            mergeFrameBuffers();
+            
             // Dont run denoiser to raw data rendering
             // if (OptixData.enableDenoiser)
             // {
@@ -2127,7 +2163,7 @@ std::vector<float> renderData(uint32_t width, uint32_t height, uint32_t startFra
 
         synchronizeDevices();
 
-        const glm::vec4 *fb = (const glm::vec4*) bufferGetPointer(OptixData.frameBuffer,0);
+        const glm::vec4 *fb = (const glm::vec4*) owlBufferGetPointer(OptixData.combinedFrameBuffer,0);
         cudaMemcpyAsync(frameBuffer.data(), fb, width * height * sizeof(glm::vec4), cudaMemcpyDeviceToHost);
 
         OptixData.LP.renderDataMode = 0;
@@ -2309,20 +2345,6 @@ void renderToFile(uint32_t width, uint32_t height, uint32_t samplesPerPixel, std
     }
 }
 
-// void renderDataToPNG(uint32_t width, uint32_t height, uint32_t startFrame, uint32_t frameCount, uint32_t bounce, std::string field, std::string imagePath)
-// {
-//     std::vector<float> fb = renderData(width, height, startFrame, frameCount, bounce, field);
-//     std::vector<uint8_t> colors(4 * width * height);
-//     for (size_t i = 0; i < (width * height); ++i) {       
-//         colors[i * 4 + 0] = uint8_t(glm::clamp(fb[i * 4 + 0] * 255.f, 0.f, 255.f));
-//         colors[i * 4 + 1] = uint8_t(glm::clamp(fb[i * 4 + 1] * 255.f, 0.f, 255.f));
-//         colors[i * 4 + 2] = uint8_t(glm::clamp(fb[i * 4 + 2] * 255.f, 0.f, 255.f));
-//         colors[i * 4 + 3] = uint8_t(glm::clamp(fb[i * 4 + 3] * 255.f, 0.f, 255.f));
-//     }
-//     stbi_flip_vertically_on_write(true);
-//     stbi_write_png(imagePath.c_str(), width, height, /* num channels*/ 4, colors.data(), /* stride in bytes */ width * 4);
-// }
-
 void initializeComponentFactories(
     uint32_t maxEntities, 
     uint32_t maxCameras, 
@@ -2388,9 +2410,10 @@ void initializeInteractive(
         glfw->poll_events();
 
         initializeOptix(/*headless = */ false);
-
         initializeImgui();
 
+        int numGPUs = owlGetDeviceCount(OptixData.context);
+
         while (!stopped)
         {
             /* Poll events from the window */
@@ -2412,18 +2435,30 @@ void initializeInteractive(
                 updateFrameBuffer();
                 updateComponents();
                 updateLaunchParams();
-                owlLaunch2D(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, OptixData.launchParams);
-                if (OptixData.enableDenoiser)
-                {
+
+                for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) {
+                    cudaSetDevice(deviceID);
+                    cudaEventRecord(NVISII.events[deviceID].first, owlParamsGetCudaStream(OptixData.launchParams, deviceID));
+                    owlAsyncLaunch2DOnDevice(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, deviceID, OptixData.launchParams);
+                    cudaEventRecord(NVISII.events[deviceID].second, owlParamsGetCudaStream(OptixData.launchParams, deviceID));
+                }
+                owlLaunchSync(OptixData.launchParams);
+                for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) {
+                    cudaEventElapsedTime(&NVISII.times[deviceID], NVISII.events[deviceID].first, NVISII.events[deviceID].second);
+                }
+                updateGPUWeights();
+                mergeFrameBuffers();
+
+                if (OptixData.enableDenoiser) {
                     denoiseImage();
-                }        
+                }
             }
-            // glm::vec4* samplePtr = (glm::vec4*) bufferGetPointer(OptixData.accumBuffer,0);
-            // glm::vec4* mvecPtr = (glm::vec4*) bufferGetPointer(OptixData.mvecBuffer,0);
-            // glm::vec4* t0AlbPtr = (glm::vec4*) bufferGetPointer(OptixData.scratchBuffer,0);
-            // glm::vec4* t1AlbPtr = (glm::vec4*) bufferGetPointer(OptixData.albedoBuffer,0);
-            // glm::vec4* fbPtr = (glm::vec4*) bufferGetPointer(OptixData.frameBuffer,0);
-            // glm::vec4* sPtr = (glm::vec4*) bufferGetPointer(OptixData.normalBuffer,0);
+            // glm::vec4* samplePtr = (glm::vec4*) owlBufferGetPointer(OptixData.accumBuffer,0);
+            // glm::vec4* mvecPtr = (glm::vec4*) owlBufferGetPointer(OptixData.mvecBuffer,0);
+            // glm::vec4* t0AlbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.scratchBuffer,0);
+            // glm::vec4* t1AlbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.albedoBuffer,0);
+            // glm::vec4* fbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.frameBuffer,0);
+            // glm::vec4* sPtr = (glm::vec4*) owlBufferGetPointer(OptixData.normalBuffer,0);
             // int width = OptixData.LP.frameSize.x;
             // int height = OptixData.LP.frameSize.y;
             // reproject(samplePtr, t0AlbPtr, t1AlbPtr, mvecPtr, sPtr, fbPtr, width, height);
@@ -2442,7 +2477,10 @@ void initializeInteractive(
             OPTIX_CHECK(optixDenoiserDestroy(OptixData.denoiser));
 
         if (OptixData.imageTexID != -1) {
-            cudaGraphicsUnregisterResource(OptixData.cudaResourceTex);
+            if (OptixData.cudaResourceTex) {
+                cudaGraphicsUnregisterResource(OptixData.cudaResourceTex);
+                OptixData.cudaResourceTex = 0;
+            }
             glDeleteTextures(1, &OptixData.imageTexID);
         }
 
@@ -2604,12 +2642,12 @@ void updateSceneAabb(Entity* entity)
 
 void enableUpdates()
 {
-    enqueueCommand([] () { lazyUpdatesEnabled = false; });
+    enqueueCommandAndWait([] () { lazyUpdatesEnabled = false; });
 }
 
 void disableUpdates()
 {
-    enqueueCommand([] () { lazyUpdatesEnabled = true; });
+    enqueueCommandAndWait([] () { lazyUpdatesEnabled = true; });
 }
 
 bool areUpdatesEnabled()
@@ -2731,6 +2769,9 @@ void __test__(std::vector<std::string> args) {
     else if (option == std::string("normal")) {
         OptixData.LP.renderDataMode = RenderDataFlags::NORMAL;
     }
+    else if (option == std::string("tangent")) {
+        OptixData.LP.renderDataMode = RenderDataFlags::TANGENT;
+    }
     else if (option == std::string("entity_id")) {
         OptixData.LP.renderDataMode = RenderDataFlags::ENTITY_ID;
     }
@@ -2776,6 +2817,9 @@ void __test__(std::vector<std::string> args) {
     else if (option == std::string("heatmap")) {
         OptixData.LP.renderDataMode = RenderDataFlags::HEATMAP;
     }
+    else if (option == std::string("device_id")) {
+        OptixData.LP.renderDataMode = RenderDataFlags::DEVICE_ID;
+    }
     else {
         throw std::runtime_error(std::string("Error, unknown option : \"") + option + std::string("\". ")
         + std::string("See documentation for available options"));
diff --git a/src/nvisii/nvisii.cu b/src/nvisii/nvisii.cu
index 6a94855c..15f41939 100644
--- a/src/nvisii/nvisii.cu
+++ b/src/nvisii/nvisii.cu
@@ -53,4 +53,4 @@ void reproject(glm::vec4 *sampleBuffer, glm::vec4 *t0AlbedoBuffer, glm::vec4 *t1
     dim3 gridSize = dim3 (bx, by);
     _reproject<<<gridSize,blockSize>>>(sampleBuffer, t0AlbedoBuffer, t1AlbedoBuffer, mvecBuffer, scratchBuffer, imageBuffer, true, width, height);
     _reproject<<<gridSize,blockSize>>>(sampleBuffer, t0AlbedoBuffer, t1AlbedoBuffer, mvecBuffer, scratchBuffer, imageBuffer, false, width, height);
-}
\ No newline at end of file
+}
diff --git a/src/nvisii/nvisii_import_scene.cpp b/src/nvisii/nvisii_import_scene.cpp
index 9b557d69..aed71804 100644
--- a/src/nvisii/nvisii_import_scene.cpp
+++ b/src/nvisii/nvisii_import_scene.cpp
@@ -42,9 +42,6 @@ std::string dirnameOf(const std::string& fname)
 
 Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::quat rotation, std::vector<std::string> args)
 {
-    bool updatesEnabled = areUpdatesEnabled();
-
-    disableUpdates();
     std::string directory = dirnameOf(path);
     bool verbose = false;
     bool max_quality = false;
@@ -101,7 +98,7 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu
         nvisiiScene.materials.push_back(mat);
         material_light_map[mat] = nullptr;
         aiString Path;
-        
+
         // Diffuse/specular workflow
         if (material->GetTextureCount(aiTextureType_DIFFUSE) > 0) {
             if (material->GetTexture(aiTextureType_DIFFUSE, 0, &Path, NULL, NULL, NULL, NULL, NULL) == AI_SUCCESS) {
@@ -204,7 +201,61 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu
         auto name = std::string(material->GetName().C_Str());
         auto mat = nvisiiScene.materials[materialIdx];
         aiString Path;
-        
+
+        if (verbose) std::cout<<"Creating material : " << name << std::endl;             
+
+        aiColor3D color (0.f,0.f,0.f);
+        if(AI_SUCCESS == material->Get(AI_MATKEY_COLOR_DIFFUSE, color)) {
+            mat->setBaseColor(glm::vec3(color.r, color.g, color.b));
+            if (verbose) std::cout<<"Assigning base color : " << color.r << " " << color.g << " " << color.b << std::endl;            
+        }
+        if(AI_SUCCESS == material->Get(AI_MATKEY_COLOR_EMISSIVE, color)) {
+            if (color.r > 0.f || color.g > 0.f || color.b > 0.f) {
+                if (verbose) std::cout<<"Assigning base color : " << color.r << " " << color.g << " " << color.b << std::endl;            
+                if (Light::get(mat->getName()) == nullptr) {
+                    Light::create(mat->getName());
+                    nvisiiScene.lights.push_back(material_light_map[mat]);
+                }
+                material_light_map[mat] = Light::get(mat->getName());
+                material_light_map[mat]->setColor(glm::vec3(color.r, color.g, color.b));
+            }
+        }
+        if(AI_SUCCESS == material->Get(AI_MATKEY_COLOR_SPECULAR, color)) {
+            if (color.r == color.b && color.r == color.g) {
+                if (verbose) std::cout<<"Setting constant specular: " << color.r << std::endl;
+                mat->setSpecular(color.r);
+            }
+            else if (verbose) {
+                std::cout<<"Error, colored specular found (not supported)" << std::endl;
+            }
+        }
+
+        float scalar;
+        if(AI_SUCCESS == material->Get(AI_MATKEY_SHININESS, scalar)) {
+            if (scalar != 0.f) {
+                if (verbose) std::cout<<"Interpreting shininess as 2/roughness^4 - 2: " << powf(2.f / (scalar + 2.f), 1.f/4.f) << std::endl;
+                mat->setRoughness(powf(2.f / (scalar + 2.f), 1.f/4.f));
+            }
+        }
+        float ior = 1.f;
+        if(AI_SUCCESS == material->Get(AI_MATKEY_REFRACTI, ior)) {
+            if (verbose) std::cout<<"Assigning index of refraction " << ior << std::endl;
+            mat->setIor(ior);
+        }
+
+        if(AI_SUCCESS == material->Get(AI_MATKEY_OPACITY, scalar)) {
+            if (scalar != 1.f) {
+                if (ior == 1) {
+                    if (verbose) std::cout<<"Assigning opacity " << scalar << std::endl;
+                    mat->setAlpha(scalar);
+                }
+                else {
+                    if (verbose) std::cout<<"IOR != 1.0, interpreting dissolve as transmission " << scalar << std::endl;
+                    mat->setTransmission(scalar);
+                }
+            }
+        }
+ 
         // todo, add texture paths to map above, load later and connect
         if (material->GetTextureCount(aiTextureType_DIFFUSE) > 0) {
             if (material->GetTexture(aiTextureType_DIFFUSE, 0, &Path, NULL, NULL, NULL, NULL, NULL) == AI_SUCCESS) {
@@ -228,6 +279,10 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu
                 std::string path = directory + "/" + std::string(Path.C_Str());
                 std::replace(path.begin(), path.end(), '\\', '/');
                 if (texture_map[path]) mat->setNormalMapTexture(texture_map[path]);
+                if (!texture_map[path]->isLinear()) {
+                    if (verbose) std::cout<<"WARNING: normal map texture " << path << " not marked as linear! Forcing texture into linear mode..." << std::endl;
+                    texture_map[path]->setLinear(true);
+                }
             }
         }
 
@@ -236,9 +291,12 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu
                 std::string path = directory + "/" + std::string(Path.C_Str());
                 std::replace(path.begin(), path.end(), '\\', '/');
                 if (texture_map[path]) {
-                    material_light_map[mat] = Light::create(mat->getName());
+                    if (Light::get(mat->getName()) == nullptr) {
+                        Light::create(mat->getName());
+                        nvisiiScene.lights.push_back(material_light_map[mat]);
+                    }
+                    material_light_map[mat] = Light::get(mat->getName());
                     material_light_map[mat]->setColorTexture(texture_map[path]);
-                    nvisiiScene.lights.push_back(material_light_map[mat]);
                 }
             }
         }  
@@ -267,11 +325,13 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu
         auto &aiMesh = scene->mMeshes[meshIdx];
         auto &aiVertices = aiMesh->mVertices;
         auto &aiNormals = aiMesh->mNormals;
+        auto &aiTangents = aiMesh->mTangents;
         auto &aiFaces = aiMesh->mFaces;
         auto &aiTextureCoords = aiMesh->mTextureCoords;
 
         std::vector<float> positions;
         std::vector<float> normals;
+        std::vector<float> tangents;
         std::vector<float> texCoords;
         std::vector<uint32_t> indices;
 
@@ -291,6 +351,9 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu
         if (!aiMesh->HasNormals()) {
             if (verbose) std::cout<<"\tWARNING: mesh " << meshName << " has no normals" << std::endl;
         }
+        if (!aiMesh->HasTangentsAndBitangents()) {
+            if (verbose) std::cout<<"\tWARNING: mesh " << meshName << " has no tangents" << std::endl;
+        }
         if (!aiMesh->HasTextureCoords(0)) {
             if (verbose) std::cout<<"\tWARNING: mesh " << meshName << " has no texture coordinates" << std::endl;
         }
@@ -310,6 +373,12 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu
                 v.normal.y = normal.y;
                 v.normal.z = normal.z;
             }
+            if (aiMesh->HasTangentsAndBitangents()) {
+                auto tangent = aiTangents[vid];
+                v.tangent.x = tangent.x;
+                v.tangent.y = tangent.y;
+                v.tangent.z = tangent.z;
+            }
             if (aiMesh->HasTextureCoords(0)) {
                 // just try to take the first texcoord
                 auto texCoord = aiTextureCoords[0][vid];						
@@ -322,6 +391,9 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu
             normals.push_back(v.normal.x);
             normals.push_back(v.normal.y);
             normals.push_back(v.normal.z);
+            tangents.push_back(v.tangent.x);
+            tangents.push_back(v.tangent.y);
+            tangents.push_back(v.tangent.z);
             texCoords.push_back(v.texcoord.x);
             texCoords.push_back(v.texcoord.y);
         }
@@ -352,6 +424,7 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu
                 meshName, 
                 positions, 3,
                 normals, 3,
+                tangents, 3,
                 /*colors*/{}, 3,
                 texCoords, 2,
                 indices
@@ -363,33 +436,6 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu
         }
     }
 
-    // load lights
-    for (uint32_t lightIdx = 0; lightIdx < scene->mNumLights; ++lightIdx) {
-        auto light = scene->mLights[lightIdx];
-        if (verbose) {
-            std::cout<<"Found light: " << std::string(light->mName.C_Str()) << std::endl;
-            if (light->mType == aiLightSource_DIRECTIONAL) {
-                std::cout<<"Directional"<<std::endl;
-            } else if (light->mType == aiLightSource_POINT) {
-                std::cout<<"Point"<<std::endl;
-            } else if (light->mType == aiLightSource_SPOT) {
-                std::cout<<"Spot"<<std::endl;
-            } else if (light->mType == aiLightSource_AMBIENT) {
-                std::cout<<"Ambient"<<std::endl;
-            } else if (light->mType == aiLightSource_AREA) {
-                std::cout<<"Area"<<std::endl;
-            } 
-        }
-    }
-
-    // load cameras
-    for (uint32_t cameraIdx = 0; cameraIdx < scene->mNumCameras; ++cameraIdx) {
-        auto camera = scene->mCameras[cameraIdx];
-        if (verbose) {
-            std::cout<<"Found camera: " << std::string(camera->mName.C_Str()) << std::endl;
-        }
-    }
-
     std::function<void(aiNode*, Transform*, int level)> addNode;
     addNode = [&scene, &nvisiiScene, &material_light_map, &addNode, position, rotation, scale, verbose]
         (aiNode* node, Transform* parentTransform, int level) 
@@ -403,7 +449,16 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu
         }
         if (verbose) std::cout<< std::string(level, '\t') << "Creating transform " << transformName << std::endl;
         auto transform = Transform::create(transformName);
-        transform->setTransform(aiMatrix4x4ToGlm(&node->mTransformation));
+        try {
+            transform->setTransform(aiMatrix4x4ToGlm(&node->mTransformation));
+        } catch(...) {
+            if (verbose) std::cout<< std::string(level, '\t') << "Warning! transform " << transformName << " Decomposition failed! Is the product of the 4x4 with the determinant of the upper left 3x3 nonzero? See Graphics Gems II: Decomposing a Matrix into Simple Transformations" << std::endl;
+            Transform::remove(transformName);
+            return;
+
+            // transform->setTransform(aiMatrix4x4ToGlm(&node->mTransformation), false);
+            // transform->setScale({0.f, 0.f, 0.f});
+        }
         if (parentTransform == nullptr) {
             transform->setScale(transform->getScale() * scale);
             transform->addRotation(rotation);
@@ -451,11 +506,69 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu
         for (uint32_t cid = 0; cid < node->mNumChildren; ++cid) 
             addNode(node->mChildren[cid], transform, level+1);
     };
-
     addNode(scene->mRootNode, nullptr, 0);
+
+
+    // load lights
+    for (uint32_t lightIdx = 0; lightIdx < scene->mNumLights; ++lightIdx) {
+        auto &aiLight = scene->mLights[lightIdx];
+        std::string lightName = std::string(aiLight->mName.C_Str());
+        // if (verbose) 
+        {
+            if (verbose) std::cout<<"Found light: " << lightName << std::endl;
+            if (aiLight->mType == aiLightSource_DIRECTIONAL) {
+                if (verbose) std::cout<<"Directional"<<std::endl;
+            } else if (aiLight->mType == aiLightSource_POINT) {
+                if (verbose) std::cout<<"Point"<<std::endl;
+
+                if (!Transform::get(lightName)) {
+                    if (verbose) std::cout<<"Error, can't find transform named " << lightName << std::endl;
+                    continue;
+                }
+                
+                int duplicateCount = 0;
+                std::string entityName = lightName;
+                while (Entity::get(entityName) != nullptr) {
+                    duplicateCount += 1;
+                    entityName += std::to_string(duplicateCount);
+                }    
+                if (verbose) std::cout<< std::string(1, '\t') << "Creating entity " << entityName << " with" <<std::endl;
+                auto entity = Entity::create(entityName);
+                
+                entity->setTransform(Transform::get(lightName));
+                if (verbose) std::cout<< std::string(1, '\t') << "transform: " << lightName <<std::endl;
+
+                duplicateCount = 0;
+                while (Entity::get(lightName) != nullptr) {
+                    duplicateCount += 1;
+                    lightName += std::to_string(duplicateCount);
+                }    
+                if (verbose) std::cout<< std::string(1, '\t') << "light: " << lightName <<std::endl;
+                auto light = Light::create(lightName);
+                entity->setLight(light);
+                light->setColor({aiLight->mColorDiffuse.r, aiLight->mColorDiffuse.g, aiLight->mColorDiffuse.b});
+                
+                nvisiiScene.entities.push_back(entity);
+            } else if (aiLight->mType == aiLightSource_SPOT) {
+                std::cout<<"Spot"<<std::endl;
+            } else if (aiLight->mType == aiLightSource_AMBIENT) {
+                std::cout<<"Ambient"<<std::endl;
+            } else if (aiLight->mType == aiLightSource_AREA) {
+                std::cout<<"Area"<<std::endl;
+            } 
+        }
+    }
+
+    // load cameras
+    for (uint32_t cameraIdx = 0; cameraIdx < scene->mNumCameras; ++cameraIdx) {
+        auto camera = scene->mCameras[cameraIdx];
+        if (verbose) {
+            std::cout<<"Found camera: " << std::string(camera->mName.C_Str()) << std::endl;
+        }
+    }
+
     aiReleaseImport(scene);
 
-    if (updatesEnabled) enableUpdates();
     if (verbose) std::cout<<"Done!"<<std::endl;
     return nvisiiScene;
 }
diff --git a/src/nvisii/texture.cpp b/src/nvisii/texture.cpp
index 15063228..beda6b59 100644
--- a/src/nvisii/texture.cpp
+++ b/src/nvisii/texture.cpp
@@ -101,6 +101,11 @@ bool Texture::isLinear() {
     return linear;
 }
 
+void Texture::setLinear(bool is_linear) {
+    linear = is_linear;
+    markDirty();
+}
+
 /* SSBO logic */
 void Texture::initializeFactory(uint32_t max_components)
 {
diff --git a/src/nvisii/volume.cpp b/src/nvisii/volume.cpp
index 838b7c51..8654e7f6 100644
--- a/src/nvisii/volume.cpp
+++ b/src/nvisii/volume.cpp
@@ -188,10 +188,11 @@ Volume *Volume::createTorus(std::string name)
 	}
 }
 
-Volume *Volume::createBox(std::string name)
+Volume *Volume::createBox(std::string name,
+    glm::vec3 size, glm::vec3 center, float halfWidth)
 {
-    auto create = [] (Volume* v) {
-        nanovdb::GridHandle<> gridHdl = nanovdb::createFogVolumeBox();
+    auto create = [size, center, halfWidth] (Volume* v) {
+        nanovdb::GridHandle<> gridHdl = nanovdb::createFogVolumeBox(size.x, size.y, size.z, ((nanovdb::Vec3R)(0)), 1.0f, halfWidth);
         v->gridHdlPtr = std::make_shared<nanovdb::GridHandle<>>(std::move(gridHdl));
         v->markDirty();
     };