From b51fb15583eff67c27ee9b05aca11538f550043b Mon Sep 17 00:00:00 2001 From: "Nathan V. Morrical" Date: Sun, 23 May 2021 17:59:07 -0600 Subject: [PATCH] Development (#97) * removing unified memory for framebuffers, as this hurts multigpu performance in non-nvlink configurations * some fixes for multigpu setups. Some issues with different gpus rendering different images: * changing how max path bounces are handled to avoid long tails. * working on multigpu perf * some steps forward towards dynamic load balancing * fixes to load balancing. Fixes to frame buffer when cuda visible devices set to non-zero value * adding some sync points * Adding support for constant material parameters with import_scene function * updating owl submodule * adding more visibility flags. * assigning visibility masks to entities * forcing denoiser configure to wait * fixing bug where renderToImage wasnt saving the correct buffer * working on a volume sample * simplifying download_content script a bit * adding a volumes example. Some bugs need to be worked out * working on refactoring volume codepath to improve efficiency * refacting volume rendering code. Now much cheaper to render surfaces without any volumes present * upgrades to volume example * ray visibility flags now working. Added an example demonstrating feature * fixing some tests. Normal map test is failing * fixing race condition with textures and materials * adding a get_center function to entities, which behaves differently to the aabb center * changing scene importer to allow for degenerate transforms with a warning * adding support for point lights * updating docs * improvements to how import_scene handles normal maps * removing debug print in device code --- CMakeLists.txt | 3 + examples/14.normal_map.py | 6 +- examples/22.volumes.py | 192 +++++ examples/23.ray_visibility.py | 282 ++++++++ examples/content.txt | 12 + examples/download_content.sh | 21 +- examples/requirements.txt | 4 +- externals/owl | 2 +- include/nvisii/entity.h | 22 +- include/nvisii/entity_struct.h | 9 +- include/nvisii/light.h | 4 +- include/nvisii/mesh.h | 10 +- include/nvisii/nvisii.h | 45 +- include/nvisii/texture.h | 3 + include/nvisii/volume.h | 9 +- src/nvisii/devicecode/buffer.h | 2 +- src/nvisii/devicecode/launch_params.h | 14 +- src/nvisii/devicecode/path_tracer.cu | 669 +++++++---------- src/nvisii/entity.cpp | 47 +- src/nvisii/light.cpp | 6 +- src/nvisii/mesh.cpp | 30 +- src/nvisii/nvisii.cpp | 986 ++++++++++++++------------ src/nvisii/nvisii.cu | 2 +- src/nvisii/nvisii_import_scene.cpp | 187 ++++- src/nvisii/texture.cpp | 5 + src/nvisii/volume.cpp | 7 +- 26 files changed, 1615 insertions(+), 964 deletions(-) create mode 100644 examples/22.volumes.py create mode 100644 examples/23.ray_visibility.py create mode 100644 examples/content.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 9055fc1e..6b86d441 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -293,6 +293,9 @@ find_program(BIN2C bin2c /usr/local/cuda/bin) # optix 7 +include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/include/nvisii/utilities/sutil/) +include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/include/nvisii/utilities/) + if ($ENV{OPTIX_VERSION}) set(OPTION_OPTIX_VERSION $ENV{OPTIX_VERSION}) else() diff --git a/examples/14.normal_map.py b/examples/14.normal_map.py index dce409ca..f0518799 100644 --- a/examples/14.normal_map.py +++ b/examples/14.normal_map.py @@ -103,9 +103,9 @@ mat.set_specular(0) # load an example brick texture -color_tex = nvisii.texture.create_from_file("color",'content/Bricks051_2K_Color.jpg') -normal_tex = nvisii.texture.create_from_file("normal",'content/Bricks051_2K_Normal.jpg', linear = True) -rough_tex = nvisii.texture.create_from_file("rough",'content/Bricks051_2K_Roughness.jpg', linear = True) +color_tex = nvisii.texture.create_from_file("color",'./content/Bricks051_2K_Color.jpg') +normal_tex = nvisii.texture.create_from_file("normal",'./content/Bricks051_2K_Normal.jpg', linear = True) +rough_tex = nvisii.texture.create_from_file("rough",'./content/Bricks051_2K_Roughness.jpg', linear = True) color_tex.set_scale((.1,.1)) normal_tex.set_scale((.1,.1)) diff --git a/examples/22.volumes.py b/examples/22.volumes.py new file mode 100644 index 00000000..3b1878df --- /dev/null +++ b/examples/22.volumes.py @@ -0,0 +1,192 @@ +#%% + +# 22.volumes.py +# +# This shows an example of several volumes. Some volume uses the NanoVDB format, +# others use a raw volume, and then some are generated procedurally. +# This scene tests how volumes can be lit up with light sources, and how they can +# overlap. + +# Note, the API here is subject to change with future versions... + +import nvisii +import numpy as np +opt = lambda: None +opt.spp = 512 +opt.width = 1024 +opt.height = 1024 +opt.out = '22_volumes.png' + +nvisii.initialize(headless = False, verbose = True, window_on_top = True) +nvisii.enable_denoiser() + +# Configuring the denoiser here to not use albedo and normal guides, which are +# noisy for volumes +nvisii.configure_denoiser(False, False, True) + +# Make a camera... +camera = nvisii.entity.create(name = "camera") +camera.set_transform(nvisii.transform.create(name = "camera_transform")) +camera.set_camera( + nvisii.camera.create_from_fov( + name = "camera_camera", + field_of_view = 0.785398, # note, this is in radians + aspect = opt.width / float(opt.height) + ) +) +nvisii.set_camera_entity(camera) +camera.get_transform().look_at(at = (0, 0, .5), up = (0, 0, 1), eye = (0, 5, 2)) + +# Make a dome light +env_tex = nvisii.texture.create_from_file("env_tex", "./content/kiara_4_mid-morning_4k.hdr") +nvisii.enable_dome_light_sampling() +nvisii.set_dome_light_texture(env_tex, enable_cdf=True) +nvisii.set_dome_light_exposure(-2.0) + + +# Make a textured floor +floor = nvisii.entity.create( + name = "floor", + mesh = nvisii.mesh.create_plane("mesh_floor"), + transform = nvisii.transform.create("transform_floor"), + material = nvisii.material.create("material_floor") +) +mat = floor.get_material() +floor_tex = nvisii.texture.create_from_file("floor_tex", "./content/salle_de_bain_separated/textures/WoodFloor_BaseColor.jpg") +mat.set_base_color_texture(floor_tex) +trans = floor.get_transform() +trans.set_scale((5,5,1)) + +# Make a procedural torus volume +torus = nvisii.entity.create( + name="torus", + volume = nvisii.volume.create_torus("torus"), + transform = nvisii.transform.create("torus"), + material = nvisii.material.create("torus") +) +torus.get_transform().set_position((0.8,2,.2)) +torus.get_transform().set_scale((0.003, 0.003, 0.003)) +torus.get_transform().set_angle_axis(nvisii.pi() * .5, (1,0,0)) +torus.get_material().set_base_color((1.,1.,1.0)) +# The gradient factor here controls how "surface like" the volume is. +# Higher values mean "more surface like" in areas where there is a strong +# gradient in the scalar field of the volume (which occurs near surfaces defined +# by high density regions) +torus.get_volume().set_gradient_factor(10) + +# Absorption controls the probability of light being absorbed by the volume +torus.get_volume().set_absorption(1.) +# Absorption controls the probability of light bouncing off one of the particles in the volume +torus.get_volume().set_scattering(.0) +# The scale here controls how "big" a voxel is, where "1" means a voxel is 1cm wide. +# Larger scales result in particles being distributed over longer distances, +# causing the volume to appear less dense +torus.get_volume().set_scale(100) + +# Create a procedural octahedron +octahedron = nvisii.entity.create( + name="octahedron", + volume = nvisii.volume.create_octahedron("octahedron"), + transform = nvisii.transform.create("octahedron"), + material = nvisii.material.create("octahedron") +) +octahedron.get_transform().set_position((.80,2.0,0.2)) # Note that this octahedron is inside the torus +octahedron.get_transform().set_scale((0.01, 0.01, 0.01)) +octahedron.get_transform().set_angle_axis(nvisii.pi() * .25, (0,0,1)) +octahedron.get_material().set_base_color((1.0,0.0,0)) +octahedron.get_volume().set_gradient_factor(10) +octahedron.get_volume().set_absorption(0) +octahedron.get_volume().set_scattering(1) +octahedron.get_volume().set_scale(15) + +# Create a procedural sphere +sphere = nvisii.entity.create( + name="sphere", + volume = nvisii.volume.create_sphere("sphere"), + transform = nvisii.transform.create("sphere"), + material = nvisii.material.create("sphere") +) +sphere.get_transform().set_position((-1.0,2,0.25)) +sphere.get_transform().set_scale((0.0025, 0.0025, 0.0025)) +sphere.get_material().set_base_color((0.2,0.2,1.0)) +sphere.get_volume().set_gradient_factor(10) +sphere.get_volume().set_absorption(0) +sphere.get_volume().set_scattering(1) +sphere.get_volume().set_scale(100) + +# Create a procedural box +box = nvisii.entity.create( + name="box", + volume = nvisii.volume.create_box("box"), + transform = nvisii.transform.create("box"), + material = nvisii.material.create("box") +) +box.get_transform().set_position((-1.0,2,0.25)) +box.get_transform().set_scale((0.005, 0.005, 0.005)) +box.get_transform().set_angle_axis(.3, (0,0,1)) +box.get_material().set_base_color((1.0,1.0,1.0)) +box.get_volume().set_gradient_factor(10) +box.get_volume().set_absorption(0) +box.get_volume().set_scattering(1) +box.get_volume().set_scale(100) + +# Create a cloudy bunny using a nanovdb file +bunny = nvisii.entity.create( + name="bunny", + volume = nvisii.volume.create_from_file("bunny", "./content/bunny_cloud.nvdb"), + transform = nvisii.transform.create("bunny"), + material = nvisii.material.create("bunny") +) +bunny.get_transform().set_position((-.8,.5,0.75)) +bunny.get_transform().set_scale((0.003, 0.003, 0.003)) +bunny.get_material().set_base_color((0.1,0.9,0.08)) +bunny.get_material().set_roughness(0.7) +bunny.get_volume().set_gradient_factor(10) +bunny.get_volume().set_absorption(1) +bunny.get_volume().set_scattering(0) +bunny.get_volume().set_scale(4) +bunny.get_transform().set_angle_axis(nvisii.pi() * .5, (1,0,0)) +bunny.get_transform().add_angle_axis(nvisii.pi(), (0,1,0)) + +# Create a boston teapot using a raw CT scanned volume +voxels = np.fromfile("./content/boston_teapot_256x256x178_uint8.raw", dtype=np.uint8).astype(np.float32) +teapot = nvisii.entity.create( + name="teapot", + volume = nvisii.volume.create_from_data("teapot", width = 256, height = 256, depth = 178, data = voxels, background = 0.0), + transform = nvisii.transform.create("teapot"), + material = nvisii.material.create("teapot") +) +teapot.get_transform().set_position((1,0,0.7)) +teapot.get_transform().set_scale((0.005, 0.005, 0.005)) +teapot.get_material().set_base_color((1.0,1.0,1.0)) +teapot.get_material().set_roughness(0.0) +teapot.get_material().set_metallic(1.0) +teapot.get_volume().set_gradient_factor(100) +teapot.get_volume().set_absorption(1) +teapot.get_volume().set_scattering(0) +teapot.get_volume().set_scale(250) +teapot.get_transform().set_angle_axis(-nvisii.pi() * .5, (1,0,0)) +teapot.get_transform().add_angle_axis(nvisii.pi() * 1.1, (0,1,0)) + +# Volumes can be lit up using light sources +light = nvisii.entity.create( + name="light", + mesh = nvisii.mesh.create_sphere("light"), + transform = nvisii.transform.create("light"), + light = nvisii.light.create("light") +) +light.get_transform().set_position((0,1,2.5)) +light.get_transform().set_scale((.2,.2,.2)) +light.get_light().set_temperature(4000) +light.get_light().set_intensity(20) + +# Render out the image +print("rendering to", "22_volumes.png") +nvisii.render_to_file( + width = opt.width, + height = opt.height, + samples_per_pixel = opt.spp, + file_path = "22_volumes.png" +) + +nvisii.deinitialize() diff --git a/examples/23.ray_visibility.py b/examples/23.ray_visibility.py new file mode 100644 index 00000000..5311621f --- /dev/null +++ b/examples/23.ray_visibility.py @@ -0,0 +1,282 @@ +import nvisii +import math +import PySide2 +import colorsys +from PySide2.QtCore import * +from PySide2.QtWidgets import * + +nvisii.initialize() +nvisii.resize_window(1000,1000) +nvisii.enable_denoiser() +# nvisii.configure_denoiser(False, False, True) +nvisii.set_max_bounce_depth(diffuse_depth=2, glossy_depth = 8, transparency_depth = 8, transmission_depth = 12, volume_depth = 2) + +# Set the sky +nvisii.disable_dome_light_sampling() +nvisii.set_dome_light_color((0,0,0)) + +# Set camera +camera = nvisii.entity.create( + name = "camera", + transform = nvisii.transform.create(name = "camera_transform"), + camera = nvisii.camera.create( + name = "camera_camera", + aspect = 1.0 + ) +) +camera.get_transform().look_at( + at = (0, 0, 0.5), # at position + up = (0, 0, 1), # up vector + eye = (0, 5, 2) # eye position +) +nvisii.set_camera_entity(camera) + +# Floor +floor = nvisii.entity.create( + name = "floor", + mesh = nvisii.mesh.create_plane("mesh_floor"), + transform = nvisii.transform.create("transform_floor"), + material = nvisii.material.create("material_floor") +) +floor.get_material().set_base_color((0.19,0.16,0.19)) +floor.get_material().set_metallic(0) +floor.get_material().set_roughness(1) +floor.get_transform().set_scale((5,5,1)) + +# Mirror 1 +mirror1 = nvisii.entity.create( + name = "mirror1", + mesh = nvisii.mesh.create_box("mesh_mirror1"), + transform = nvisii.transform.create("transform_mirror1"), + material = nvisii.material.create("material_mirror1") +) +mirror1.get_transform().look_at(eye = (-1.5, -1.5, .5), at = (0,0,.7), up = (0,0,1)) +mirror1.get_material().set_base_color((1.,1.,1.)) +mirror1.get_material().set_metallic(1) +mirror1.get_material().set_roughness(0) +mirror1.get_transform().set_scale((.7,.7,.1)) + +# Glass 1 +glass1 = nvisii.entity.create( + name = "glass1", + mesh = nvisii.mesh.create_box("mesh_glass1"), + transform = nvisii.transform.create("transform_glass1"), + material = nvisii.material.create("material_glass1") +) +glass1.get_transform().look_at(eye = (1.5, 1.5, .5), at = (0,0,.7), up = (0,0,1)) +glass1.get_material().set_base_color((1.,1.,1.)) +glass1.get_material().set_transmission(1) +glass1.get_material().set_roughness(0) +glass1.get_transform().set_scale((.7,.7,.1)) + +# Mirror 2 +mirror2 = nvisii.entity.create( + name = "mirror2", + mesh = nvisii.mesh.create_box("mesh_mirror2"), + transform = nvisii.transform.create("transform_mirror2"), + material = nvisii.material.create("material_mirror2") +) +mirror2.get_transform().look_at(eye = (1.5, -1.5, .5), at = (0,0,.7), up = (0,0,1)) +mirror2.get_material().set_base_color((1.,1.,1.)) +mirror2.get_material().set_metallic(1) +mirror2.get_material().set_roughness(0) +mirror2.get_transform().set_scale((.7,.7,.1)) + +# Glass 2 +glass2 = nvisii.entity.create( + name = "glass2", + mesh = nvisii.mesh.create_box("mesh_glass2"), + transform = nvisii.transform.create("transform_glass2"), + material = nvisii.material.create("material_glass2") +) +glass2.get_transform().look_at(eye = (-1.5, 1.5, .5), at = (0,0,.7), up = (0,0,1)) +glass2.get_material().set_base_color((1.,1.,1.)) +glass2.get_material().set_transmission(1) +glass2.get_material().set_roughness(0) +glass2.get_transform().set_scale((.7,.7,.1)) + +# Fog +fog = nvisii.entity.create( + name = "fog", + volume = nvisii.volume.create_box("mesh_fog"), + transform = nvisii.transform.create("transform_fog"), + material = nvisii.material.create("material_fog") +) +fog.get_material().set_base_color((1.,1.,1.)) +fog.get_material().set_transmission(1) +fog.get_material().set_roughness(0) +fog.get_volume().set_scale(100) + +# Light +light = nvisii.entity.create( + name = "light", + light = nvisii.light.create("light"), + transform = nvisii.transform.create("light"), + mesh = nvisii.mesh.create_sphere("light") +) +light.get_transform().set_position((0,0,5)) +light.get_transform().set_scale((.1,.1,.1)) +light.get_light().set_exposure(7) + +# Light blocker +blocker = nvisii.entity.create( + name = "blocker", + mesh = nvisii.mesh.create_capped_tube("blocker", innerRadius = .04), + transform = nvisii.transform.create("blocker"), + material = nvisii.material.create("blocker") +) +blocker.get_transform().set_scale((10,10,.01)) +blocker.get_transform().set_position((0,0,3.0)) + +# Teapot +teapotahedron = nvisii.entity.create( + name="teapotahedron", + mesh = nvisii.mesh.create_teapotahedron("teapotahedron", segments = 32), + transform = nvisii.transform.create("teapotahedron"), + material = nvisii.material.create("teapotahedron") +) +teapotahedron.get_transform().set_rotation(nvisii.angleAxis(nvisii.pi() / 4.0, (0,0,1))) +teapotahedron.get_transform().set_position((0,0,0)) +teapotahedron.get_transform().set_scale((0.4, 0.4, 0.4)) +teapotahedron.get_material().set_base_color((255.0 / 255.0, 100.0 / 255.0, 2.0 / 256.0)) +teapotahedron.get_material().set_roughness(0.0) +teapotahedron.get_material().set_specular(1.0) +teapotahedron.get_material().set_metallic(1.0) + +# Make a QT window to demonstrate the difference between alpha transparency and transmission +app = QApplication([]) # Start an application. +window = QWidget() # Create a window. +layout = QVBoxLayout() # Create a layout. + +def rotateCamera(value): + value = value / 100.0 + cam_pos = camera.get_transform().get_position() + + camera.get_transform().look_at( + at = (0, 0, 0.5), # at position + up = (0, 0, 1), # up vector + eye = (5 * math.cos(value * 2 * nvisii.pi()), 5 * math.sin(value * 2 * nvisii.pi()), cam_pos[2]) # eye position + ) +rotateCamera(0) +dial = QDial() +dial.setWrapping(True) +dial.valueChanged[int].connect(rotateCamera) +layout.addWidget(QLabel('Camera rotation')) +layout.addWidget(dial) + +def rotateCameraElevation(value): + # print(value) + value = value / 100 + cam_pos = camera.get_transform().get_position() + camera.get_transform().look_at( + at = (0, 0, 0.5), # at position + up = (0, 0, 1), # up vector + eye = (cam_pos[0], cam_pos[1], 0.1 + 2.5*value) # eye position + ) + # print(value, 2 * math.cos(value * 2 * nvisii.pi())) + +slider = QSlider(Qt.Horizontal) +slider.valueChanged[int].connect(rotateCameraElevation) +slider.setValue(40) +layout.addWidget(QLabel('Camera Elevation')) +layout.addWidget(slider) + +# Add some toggles to demonstrate how the set_visibility function works + +camera_visibility = True +diffuse_visibility = True +glossy_visibility = True +transmission_visibility = True +scatter_visibility = True +shadow_visibility = True +def updateVisibility(): + global camera_visibility + global diffuse_visibility + global glossy_visibility + global transmission_visibility + global scatter_visibility + global shadow_visibility + + teapotahedron.set_visibility( + camera = camera_visibility, + diffuse = diffuse_visibility, + glossy = glossy_visibility, + transmission = transmission_visibility, + volume_scatter = scatter_visibility, + shadow = shadow_visibility) + +def toggleCamera(): + global camera_visibility + camera_visibility = not camera_visibility + updateVisibility() +button = QPushButton("toggleCamera") +button.clicked.connect(toggleCamera) +layout.addWidget(button) + +def toggleDiffuse(): + global diffuse_visibility + diffuse_visibility = not diffuse_visibility + updateVisibility() +button = QPushButton("toggleDiffuse") +button.clicked.connect(toggleDiffuse) +layout.addWidget(button) + +def toggleGlossy(): + global glossy_visibility + glossy_visibility = not glossy_visibility + updateVisibility() +button = QPushButton("toggleGlossy") +button.clicked.connect(toggleGlossy) +layout.addWidget(button) + +def toggleTransmission(): + global transmission_visibility + transmission_visibility = not transmission_visibility + updateVisibility() +button = QPushButton("toggleTransmission") +button.clicked.connect(toggleTransmission) +layout.addWidget(button) + +def toggleScattering(): + global scatter_visibility + scatter_visibility = not scatter_visibility + updateVisibility() +button = QPushButton("toggleScattering") +button.clicked.connect(toggleScattering) +layout.addWidget(button) + +def toggleShadows(): + global shadow_visibility + shadow_visibility = not shadow_visibility + updateVisibility() +button = QPushButton("toggleShadows") +button.clicked.connect(toggleShadows) +layout.addWidget(button) + +def setFogStrength(value): + value = (100 - value) * 2 + 10 + fog.get_volume().set_scale(value) +setFogStrength(100) +slider = QSlider(Qt.Horizontal) +slider.valueChanged[int].connect(setFogStrength) +slider.setValue(100) +layout.addWidget(QLabel('Fog Strength')) +layout.addWidget(slider) + + +def setLightHeight(value): + value = value / 100.0 + light.get_transform().set_position((0,0,3 + value * 2)) +setLightHeight(50) +slider = QSlider(Qt.Horizontal) +slider.valueChanged[int].connect(setLightHeight) +slider.setValue(50) +layout.addWidget(QLabel('Light Height')) +layout.addWidget(slider) + + +window.setLayout(layout) +window.show() +app.exec_() + +nvisii.deinitialize() \ No newline at end of file diff --git a/examples/content.txt b/examples/content.txt new file mode 100644 index 00000000..232b1289 --- /dev/null +++ b/examples/content.txt @@ -0,0 +1,12 @@ +https://www.dropbox.com/s/jve877nanizw2vf/dragon.zip +https://www.dropbox.com/s/jh3o6wtqdrq4bi2/photos_2020_5_11_fst_gray-wall-grunge.jpg +https://www.dropbox.com/s/gb67d0cv1lgrgdp/kiara_4_mid-morning_4k.hdr +https://www.dropbox.com/s/8nj82vxvxwvnttt/salle_de_bain_separated.zip +https://www.dropbox.com/s/p2xius4kd4olqg3/gradient.png +https://www.dropbox.com/s/bxbkzmuy2mviyzb/Bricks051_2K-JPG.zip +https://www.dropbox.com/s/na3vo8rca7feoiq/teatro_massimo_2k.hdr +https://www.dropbox.com/s/22bug1he354oqpt/bmw.zip +https://www.dropbox.com/s/76gumyy7j0f3cyj/dragon.stl +https://www.dropbox.com/s/runlp60bjjf3dpu/bunny_cloud.zip +https://www.dropbox.com/s/nim7jsjiumei4f9/boston_teapot_256x256x178_uint8.zip +https://www.dropbox.com/s/yybckz6sawq5nbw/TestNormalMap.png diff --git a/examples/download_content.sh b/examples/download_content.sh index 02b0fe46..c6ca856f 100644 --- a/examples/download_content.sh +++ b/examples/download_content.sh @@ -1,26 +1,15 @@ mkdir content cd content -wget https://www.dropbox.com/s/jve877nanizw2vf/dragon.zip -unzip dragon.zip -d dragon/ -rm dragon.zip - -wget https://www.dropbox.com/s/jh3o6wtqdrq4bi2/photos_2020_5_11_fst_gray-wall-grunge.jpg -wget https://www.dropbox.com/s/gb67d0cv1lgrgdp/kiara_4_mid-morning_4k.hdr +wget -i ../content.txt -wget https://www.dropbox.com/s/8nj82vxvxwvnttt/salle_de_bain_separated.zip +unzip dragon.zip -d dragon/ +rm dragon.zip unzip salle_de_bain_separated.zip rm salle_de_bain_separated.zip - -wget https://www.dropbox.com/s/p2xius4kd4olqg3/gradient.png -wget https://www.dropbox.com/s/bxbkzmuy2mviyzb/Bricks051_2K-JPG.zip unzip Bricks051_2K-JPG.zip - -wget https://www.dropbox.com/s/na3vo8rca7feoiq/teatro_massimo_2k.hdr - mkdir bmw -wget https://www.dropbox.com/s/22bug1he354oqpt/bmw.zip unzip bmw.zip -d bmw/ rm bmw.zip - -wget https://www.dropbox.com/s/76gumyy7j0f3cyj/dragon.stl \ No newline at end of file +unzip bunny_cloud.zip +unzip boston_teapot_256x256x178_uint8.zip diff --git a/examples/requirements.txt b/examples/requirements.txt index ebf9dd38..317f777d 100644 --- a/examples/requirements.txt +++ b/examples/requirements.txt @@ -4,6 +4,6 @@ noise numpy pillow scipy -pygame open3d -PySide2 \ No newline at end of file +PySide2 +opencv-python diff --git a/externals/owl b/externals/owl index da110c5e..0f82536c 160000 --- a/externals/owl +++ b/externals/owl @@ -1 +1 @@ -Subproject commit da110c5e1453c17c8e3567d187a69b9bc6082943 +Subproject commit 0f82536cd56668ca786d4f5e8eb796e6b03752c9 diff --git a/include/nvisii/entity.h b/include/nvisii/entity.h index 5120184a..a9d5220c 100644 --- a/include/nvisii/entity.h +++ b/include/nvisii/entity.h @@ -211,9 +211,21 @@ class Entity : public StaticFactory { /** * Objects can be set to be invisible to particular ray types: - * @param camera Makes the object visible to camera rays + * @param camera Makes the object visible to camera rays (the first rays to be traced from the camera). + * @param diffuse Makes the object visible to diffuse rays (eg for diffuse GI) + * @param glossy Makes the object visible to glossy rays (eg in reflections) + * @param transmission Makes the object visible to transmission rays (eg from inside glass) + * @param volume_scatter Makes the object visible to volume scatter rays (eg from light simulation inside a volume) + * @param shadow Enables the object to cast shadows. */ - void setVisibility(bool camera = true); + void setVisibility( + bool camera = true, + bool diffuse = true, + bool glossy = true, + bool transmission = true, + bool volume_scatter = true, + bool shadow = true + ); /** @returns the minimum axis aligned bounding box position. Requires a transform and mesh component to be attached. */ glm::vec3 getMinAabbCorner(); @@ -224,6 +236,12 @@ class Entity : public StaticFactory { /** @returns the center of the aligned bounding box. Requires a transform and mesh component to be attached. */ glm::vec3 getAabbCenter(); + /** + * @returns the average of the vertices of the mesh in world space, which will lay roughly in the center. Requires + * a transform and mesh component to be attached. + */ + glm::vec3 getCenter(); + /** For internal use. Returns the mutex used to lock entities for processing by the renderer. */ static std::shared_ptr getEditMutex(); diff --git a/include/nvisii/entity_struct.h b/include/nvisii/entity_struct.h index 41658fa4..86b0d396 100644 --- a/include/nvisii/entity_struct.h +++ b/include/nvisii/entity_struct.h @@ -6,7 +6,12 @@ #ifndef ENTITY_VISIBILITY_FLAGS #define ENTITY_VISIBILITY_FLAGS -#define ENTITY_VISIBILITY_CAMERA_RAYS (1<<0) +#define ENTITY_VISIBILITY_CAMERA_RAYS (1<<0) // object is visible to direct camera rays +#define ENTITY_VISIBILITY_DIFFUSE_RAYS (1<<1) // object is visible to diffuse rays +#define ENTITY_VISIBILITY_GLOSSY_RAYS (1<<2) // object is visible to glossy rays +#define ENTITY_VISIBILITY_TRANSMISSION_RAYS (1<<3) // object is visible to transmission rays +#define ENTITY_VISIBILITY_VOLUME_SCATTER_RAYS (1<<4) // object is visible to multiple-scattering volume rays +#define ENTITY_VISIBILITY_SHADOW_RAYS (1<<5) // object is visible to shadow rays (ie, casts shadows) #endif struct EntityStruct { @@ -17,7 +22,7 @@ struct EntityStruct { int32_t light_id = -1; int32_t mesh_id = -1; int32_t volume_id = -1; - int32_t flags = 1; + uint32_t flags = (uint32_t)-1; glm::vec4 bbmin = glm::vec4(0.f); glm::vec4 bbmax = glm::vec4(0.f); }; \ No newline at end of file diff --git a/include/nvisii/light.h b/include/nvisii/light.h index 4b957628..99dd7969 100644 --- a/include/nvisii/light.h +++ b/include/nvisii/light.h @@ -11,7 +11,9 @@ class Texture; /** * A "Light" component illuminates objects in a scene. Light components must * be added to an entity with a transform component to have a visible - * impact on the scene. + * impact on the scene. Lights attached to entities with no mesh components + * act like point lights. Otherwise, lights attached to entities with meshes + * will act like mesh lights. */ class Light : public StaticFactory { friend class StaticFactory; diff --git a/include/nvisii/mesh.h b/include/nvisii/mesh.h index 6bb73cca..91980cc4 100644 --- a/include/nvisii/mesh.h +++ b/include/nvisii/mesh.h @@ -699,6 +699,8 @@ class Mesh : public StaticFactory * @param position_dimensions The number of floats per position. Valid numbers are 3 or 4. * @param normals A list of vertex normals. If indices aren't supplied, this must be a multiple of 3. * @param normal_dimensions The number of floats per normal. Valid numbers are 3 or 4. + * @param tangents A list of vertex tangents. If indices aren't supplied, this must be a multiple of 3. + * @param tangent_dimensions The number of floats per tangent. Valid numbers are 3 or 4. * @param colors A list of per-vertex colors. If indices aren't supplied, this must be a multiple of 3. * @param color_dimensions The number of floats per color. Valid numbers are 3 or 4. * @param texcoords A list of 2D per-vertex texture coordinates. If indices aren't supplied, this must be a multiple of 3. @@ -712,6 +714,8 @@ class Mesh : public StaticFactory uint32_t position_dimensions = 3, std::vector normals = std::vector(), uint32_t normal_dimensions = 3, + std::vector tangents = std::vector(), + uint32_t tangent_dimensions = 3, std::vector colors = std::vector(), uint32_t color_dimensions = 4, std::vector texcoords = std::vector(), @@ -843,8 +847,8 @@ class Mesh : public StaticFactory // /* TODO: Explain this */ // void save_tetrahedralization(float quality_bound, float maximum_volume); - /** @returns the last computed mesh centroid. */ - glm::vec3 getCentroid(); + /** @returns the average of the vertices of the mesh, which will lay roughly in the center. */ + glm::vec3 getCenter(); /** @returns the minimum axis aligned bounding box position */ glm::vec3 getMinAabbCorner(); @@ -1012,6 +1016,8 @@ class Mesh : public StaticFactory uint32_t position_dimensions, std::vector &normals_, uint32_t normal_dimensions, + std::vector &tangents_, + uint32_t tangent_dimensions, std::vector &colors_, uint32_t color_dimensions, std::vector &texcoords_, diff --git a/include/nvisii/nvisii.h b/include/nvisii/nvisii.h index e7126249..635ad212 100644 --- a/include/nvisii/nvisii.h +++ b/include/nvisii/nvisii.h @@ -207,7 +207,7 @@ void setDirectLightingClamp(float clamp); */ void setMaxBounceDepth( uint32_t diffuse_depth = 2, - uint32_t glossy_depth = 2, + uint32_t glossy_depth = 8, uint32_t transparency_depth = 8, uint32_t transmission_depth = 12, uint32_t volume_depth = 2 @@ -300,10 +300,21 @@ void renderToFile(uint32_t width, uint32_t height, uint32_t samples_per_pixel, s * @param bounce The number of bounces required to reach the vertex whose metadata result should come from. A value of 0 * would save data for objects directly visible to the camera, a value of 1 would save reflections/refractions, etc. * @param options Indicates the data to return. Current possible values include - * "none" for rendering out raw path traced data, "depth" to render the distance between the previous path vertex to the current one, - * "position" for rendering out the world space position of the path vertex, "normal" for rendering out the world space normal of the - * path vertex, "entity_id" for rendering out the entity ID whose surface the path vertex hit, "denoise_normal" for rendering out - * the normal buffer supplied to the Optix denoiser, and "denoise_albedo" for rendering out the albedo supplied to the Optix denoiser. + * "none" for rendering out raw path traced data, + * "depth" to render the distance between the previous path vertex to the current one, + * "ray_direction" to render the direction that the ray was traced in world space, + * "position" for rendering out the world space position of the path vertex, + * "normal" for rendering out the world space normal of the path vertex, + * "tangent" for rendering out the world space tangent of the path vertex, + * "entity_id" for rendering out the entity ID whose surface the path vertex hit, + * "base_color" for rendering out the surface base color, + * "texture_coordinates" for rendering out the texture coordinates of the hit surface, + * "screen_space_normal" for rendering out the normals of the hit surface in screen space, + * "diffuse_motion_vectors" for rendering out screen space motion vectors for moving objects, + * "denoise_normal" for rendering out the normal buffer supplied to the Optix denoiser, + * "denoise_albedo" for rendering out the albedo supplied to the Optix denoiser, + * "heatmap" for rendering out the time it takes to render out each pixel, + * "device_id" for determining which GPU was used to render what pixel. * @param seed A seed used to initialize the random number generator. */ std::vector renderData( @@ -319,10 +330,20 @@ std::vector renderData( * @param bounce The number of bounces required to reach the vertex whose metadata result should come from. A value of 0 * would save data for objects directly visible to the camera, a value of 1 would save reflections/refractions, etc. * @param options Indicates the data to return. Current possible values include - * "none" for rendering out raw path traced data, "depth" to render the distance between the previous path vertex to the current one, - * "position" for rendering out the world space position of the path vertex, "normal" for rendering out the world space normal of the - * path vertex, "entity_id" for rendering out the entity ID whose surface the path vertex hit, "denoise_normal" for rendering out - * the normal buffer supplied to the Optix denoiser, and "denoise_albedo" for rendering out the albedo supplied to the Optix denoiser. + * "none" for rendering out raw path traced data, + * "depth" to render the distance between the previous path vertex to the current one, + * "ray_direction" to render the direction that the ray was traced in world space, + * "position" for rendering out the world space position of the path vertex, + * "normal" for rendering out the world space normal of the path vertex, + * "entity_id" for rendering out the entity ID whose surface the path vertex hit, + * "base_color" for rendering out the surface base color, + * "texture_coordinates" for rendering out the texture coordinates of the hit surface, + * "screen_space_normal" for rendering out the normals of the hit surface in screen space, + * "diffuse_motion_vectors" for rendering out screen space motion vectors for moving objects, + * "denoise_normal" for rendering out the normal buffer supplied to the Optix denoiser, + * "denoise_albedo" for rendering out the albedo supplied to the Optix denoiser, + * "heatmap" for rendering out the time it takes to render out each pixel, + * "device_id" for determining which GPU was used to render what pixel. * @param file_path The path to use to save the file, including the extension. Supported extensions are EXR, HDR, and PNG * @param seed A seed used to initialize the random number generator. */ @@ -358,11 +379,11 @@ struct Scene { * * @param filepath The path for the file to load * @param position A change in position to apply to all entities generated by this function - * @param position A change in scale to apply to all entities generated by this function - * @param position A change in rotation to apply to all entities generated by this function + * @param scale A change in scale to apply to all entities generated by this function + * @param rotation A change in rotation to apply to all entities generated by this function * @param args A list of optional arguments that can effect the importer. * Possible options include: - * "verbose" - print out information related to loading the scene. + * "verbose" - print out information related to loading the scene. Useful for debugging! */ Scene importScene( std::string file_path, diff --git a/include/nvisii/texture.h b/include/nvisii/texture.h index 107cb231..6bb64faa 100644 --- a/include/nvisii/texture.h +++ b/include/nvisii/texture.h @@ -209,6 +209,9 @@ class Texture : public StaticFactory /** @returns True if the texture is represented linearly. Otherwise, the texture is in sRGB space */ bool isLinear(); + /** @param is_linear If True, texels will be interpreted as linear space. Otherwise, the texels will be interpreed as sRGB space */ + void setLinear(bool is_linear); + private: /* TODO */ static std::shared_ptr editMutex; diff --git a/include/nvisii/volume.h b/include/nvisii/volume.h index d71236d3..7ceddeb8 100644 --- a/include/nvisii/volume.h +++ b/include/nvisii/volume.h @@ -78,8 +78,15 @@ class Volume : public StaticFactory * is 0 and inactive, the interior is active with values varying * smoothly from 0 at the surface of the box to 1 at the half width * and interior of the box. + * @param name The name of the volume to create. + * @param size The width, height, and depth of the box in local units. + * @param center The center of the box in local units + * @param half_width The half-width of the narrow band in voxel units */ - static Volume *createBox(std::string name); + static Volume *createBox(std::string name, + glm::vec3 size = glm::vec3(100.f), + glm::vec3 center = glm::vec3(0.f), + float half_width = 3.f); /** * Creates a sparse fog volume of an octahedron such that the exterior diff --git a/src/nvisii/devicecode/buffer.h b/src/nvisii/devicecode/buffer.h index 5464326e..8354d66f 100644 --- a/src/nvisii/devicecode/buffer.h +++ b/src/nvisii/devicecode/buffer.h @@ -35,4 +35,4 @@ class Buffer : public owl::device::Buffer #define GET(RETURN, TYPE, BUFFER, ADDRESS) \ if (BUFFER.data == nullptr) {::printf("Device Side Error on Line %d: buffer was nullptr.\n", __LINE__); asm("trap;");} \ if (ADDRESS >= BUFFER.count) {::printf("Device Side Error on Line %d: out of bounds access (address: %d, size %d).\n", __LINE__, ADDRESS, uint32_t(BUFFER.count)); asm("trap;");} \ -RETURN = ((TYPE*)BUFFER.data)[ADDRESS];\ +RETURN = ((TYPE*)BUFFER.data)[ADDRESS]; diff --git a/src/nvisii/devicecode/launch_params.h b/src/nvisii/devicecode/launch_params.h index ea2c2d51..5da1324b 100644 --- a/src/nvisii/devicecode/launch_params.h +++ b/src/nvisii/devicecode/launch_params.h @@ -19,16 +19,17 @@ #include "./buffer.h" struct LaunchParams { + Buffer assignmentBuffer; + glm::ivec2 frameSize; uint64_t frameID = 0; glm::vec4 *frameBuffer; - glm::vec4 *albedoBuffer; + uchar4 *albedoBuffer; glm::vec4 *normalBuffer; glm::vec4 *scratchBuffer; glm::vec4 *mvecBuffer; glm::vec4 *accumPtr; - OptixTraversableHandle surfacesIAS; - OptixTraversableHandle volumesIAS; + OptixTraversableHandle IAS; float domeLightIntensity = 1.f; float domeLightExposure = 0.f; glm::vec3 domeLightColor = glm::vec3(-1.f); @@ -58,8 +59,7 @@ struct LaunchParams { Buffer textures; Buffer volumes; Buffer lightEntities; - Buffer surfaceInstanceToEntity; - Buffer volumeInstanceToEntity; + Buffer instanceToEntity; uint32_t numLightEntities = 0; Buffer> vertexLists; @@ -111,7 +111,9 @@ enum RenderDataFlags : uint32_t { TRANSMISSION_INDIRECT_LIGHTING = 17, RAY_DIRECTION = 18, HEATMAP = 19, - TEXTURE_COORDINATES = 20 + TEXTURE_COORDINATES = 20, + DEVICE_ID = 21, + TANGENT = 22 }; #define MAX_LIGHT_SAMPLES 10 diff --git a/src/nvisii/devicecode/path_tracer.cu b/src/nvisii/devicecode/path_tracer.cu index ed8344db..4b5c896a 100644 --- a/src/nvisii/devicecode/path_tracer.cu +++ b/src/nvisii/devicecode/path_tracer.cu @@ -244,217 +244,39 @@ void SampleDeltaTracking( } } -// bool debug = (prd.primitiveID == -2); -// if (debug) { -// if (! ((mn[0] < x[0]) && (x[0] < mx[0]) && -// (mn[1] < x[1]) && (x[1] < mx[1]) && -// (mn[2] < x[2]) && (x[2] < mx[2])) -// ) { -// printf("X"); -// } else { -// printf("O"); -// } -// } -// if (debug) { -// printf("\n"); -// } - OPTIX_CLOSEST_HIT_PROGRAM(VolumeMesh)() { auto &LP = optixLaunchParams; RayPayload &prd = owl::getPRD(); - const auto &self = owl::getProgramData(); - LCGRand rng = prd.rng; - - // Load the volume we hit - GET(VolumeStruct volume, VolumeStruct, LP.volumes, self.volumeID); - uint8_t *hdl = (uint8_t*)LP.volumeHandles.get(self.volumeID, __LINE__).data; - const auto grid = reinterpret_cast(hdl); - const auto& tree = grid->tree(); - auto acc = tree.getAccessor(); - - auto bbox = acc.root().bbox(); - auto mx = bbox.max(); - auto mn = bbox.min(); - glm::vec3 offset = glm::vec3(mn[0], mn[1], mn[2]) + - (glm::vec3(mx[0], mx[1], mx[2]) - - glm::vec3(mn[0], mn[1], mn[2])) * .5f; - - float majorant_extinction = acc.root().valueMax(); - float gradient_factor = volume.gradient_factor; - float linear_attenuation_unit = volume.scale; - float absorption = volume.absorption; - float scattering = volume.scattering; - - vec3 x = make_vec3(prd.objectSpaceRayOrigin) + offset; - vec3 w = make_vec3(prd.objectSpaceRayDirection); - - linear_attenuation_unit /= length(w); - - // Move ray to volume boundary - float t0 = prd.t0, t1 = prd.t1; - x = x + t0 * w; - t1 = t1 - t0; - t0 = 0.f; - - // Sample the free path distance to see if our ray makes it to the boundary - float t; - int event; - bool hitVolume = false; - #define MAX_NULL_COLLISIONS 10000 - for (int dti = 0; dti < MAX_NULL_COLLISIONS; ++dti) { - SampleDeltaTracking(rng, acc, majorant_extinction, linear_attenuation_unit, - absorption, scattering, x, w, t1, t, event); - x = x + t * w; - - // The boundary was hit - if (event == 0) { - break; - } - - // An absorption / emission event occurred - if (event == 1) { - hitVolume = true; - break; - } - - // A scattering event occurred - if (event == 2) { - hitVolume = true; - break; - } + optixGetObjectToWorldTransformMatrix(prd.localToWorld); - // A null collision occurred. - if (event == 3) { - // update boundary in relation to the new collision x, w does not change. - t1 = t1 - t; - } - } + // If we don't need motion vectors, (or in the future if an object + // doesn't have motion blur) then return. + if (LP.renderDataMode == RenderDataFlags::NONE) return; - if (!hitVolume) { - prd.tHit = -1.f; - } - else { - prd.instanceID = optixGetInstanceIndex(); - prd.eventID = event; - prd.tHit = t; - - auto sampler = nanovdb::SampleFromVoxels, /*Interpolation Degree*/1, /*UseCache*/false>(acc); - auto coord_pos = nanovdb::Coord::Floor( nanovdb::Vec3f(x.x, x.y, x.z) ); - float densityValue = acc.getValue(coord_pos); - auto g = sampler.gradient(nanovdb::Vec3f(x.x, x.y, x.z)); - - prd.mp = make_float3(x - offset); // not super confident about this offset... - prd.gradient = make_float3(g[0], g[1], g[2]);// TEMPORARY FOR BUNNY - prd.density = densityValue; - optixGetObjectToWorldTransformMatrix(prd.localToWorld); - - // If we don't need motion vectors, (or in the future if an object - // doesn't have motion blur) then return. - if (LP.renderDataMode == RenderDataFlags::NONE) return; + OptixTraversableHandle handle = optixGetTransformListHandle(prd.instanceID); + float4 trf00, trf01, trf02; + float4 trf10, trf11, trf12; - OptixTraversableHandle handle = optixGetTransformListHandle(prd.instanceID); - float4 trf00, trf01, trf02; - float4 trf10, trf11, trf12; - - optix_impl::optixGetInterpolatedTransformationFromHandle( trf00, trf01, trf02, handle, /* time */ 0.f, true ); - optix_impl::optixGetInterpolatedTransformationFromHandle( trf10, trf11, trf12, handle, /* time */ 1.f, true ); - memcpy(&prd.localToWorldT0[0], &trf00, sizeof(trf00)); - memcpy(&prd.localToWorldT0[4], &trf01, sizeof(trf01)); - memcpy(&prd.localToWorldT0[8], &trf02, sizeof(trf02)); - memcpy(&prd.localToWorldT1[0], &trf10, sizeof(trf10)); - memcpy(&prd.localToWorldT1[4], &trf11, sizeof(trf11)); - memcpy(&prd.localToWorldT1[8], &trf12, sizeof(trf12)); - } + optix_impl::optixGetInterpolatedTransformationFromHandle( trf00, trf01, trf02, handle, /* time */ 0.f, true ); + optix_impl::optixGetInterpolatedTransformationFromHandle( trf10, trf11, trf12, handle, /* time */ 1.f, true ); + memcpy(&prd.localToWorldT0[0], &trf00, sizeof(trf00)); + memcpy(&prd.localToWorldT0[4], &trf01, sizeof(trf01)); + memcpy(&prd.localToWorldT0[8], &trf02, sizeof(trf02)); + memcpy(&prd.localToWorldT1[0], &trf10, sizeof(trf10)); + memcpy(&prd.localToWorldT1[4], &trf11, sizeof(trf11)); + memcpy(&prd.localToWorldT1[8], &trf12, sizeof(trf12)); } OPTIX_CLOSEST_HIT_PROGRAM(VolumeShadowRay)() { - auto &LP = optixLaunchParams; - const auto &self = owl::getProgramData(); - RayPayload &prd = owl::getPRD(); - LCGRand rng = prd.rng; - - GET(VolumeStruct volume, VolumeStruct, LP.volumes, self.volumeID); - uint8_t *hdl = (uint8_t*)LP.volumeHandles.get(self.volumeID, __LINE__).data; - const auto grid = reinterpret_cast(hdl); - const auto& tree = grid->tree(); - auto acc = tree.getAccessor(); - - auto bbox = acc.root().bbox(); - auto mx = bbox.max(); - auto mn = bbox.min(); - glm::vec3 offset = glm::vec3(mn[0], mn[1], mn[2]) + - (glm::vec3(mx[0], mx[1], mx[2]) - - glm::vec3(mn[0], mn[1], mn[2])) * .5f; - - float majorant_extinction = acc.root().valueMax(); - float gradient_factor = volume.gradient_factor; - float linear_attenuation_unit = volume.scale; - float absorption = volume.absorption; - float scattering = volume.scattering; - - vec3 x = make_vec3(prd.objectSpaceRayOrigin) + offset; - vec3 w = make_vec3(prd.objectSpaceRayDirection); - - linear_attenuation_unit /= length(w); - - // Move ray to volume boundary - float t0 = prd.t0, t1 = prd.t1; - x = x + t0 * w; - t1 = t1 - t0; - t0 = 0.f; - - // Sample the free path distance to see if our ray makes it to the boundary - float t; - int event; - bool hitVolume = false; - #define MAX_NULL_COLLISIONS 10000 - for (int dti = 0; dti < MAX_NULL_COLLISIONS; ++dti) { - SampleDeltaTracking(rng, acc, majorant_extinction, linear_attenuation_unit, - absorption, scattering, x, w, t1, t, event); - x = x + t * w; - - // The boundary was hit - if (event == 0) { - break; - } - - // An absorption / emission event occurred - if (event == 1) { - hitVolume = true; - break; - } - - // A scattering event occurred - if (event == 2) { - hitVolume = true; - break; - } - - // A null collision occurred. - if (event == 3) { - // update boundary in relation to the new collision x, w does not change. - t1 = t1 - t; - } - } - - if (!hitVolume) { - prd.tHit = -1.f; - } - else { - prd.instanceID = optixGetInstanceIndex(); - prd.eventID = event; - prd.tHit = t; - } } OPTIX_INTERSECT_PROGRAM(VolumeIntersection)() { - // float old_tmax = optixGetRayTmax(); - - // const int primID = optixGetPrimitiveIndex(); + auto &LP = optixLaunchParams; const auto &self = owl::getProgramData(); + RayPayload &prd = owl::getPRD(); float3 origin = optixGetObjectRayOrigin(); // note, this is _not_ normalized. Useful for computing world space tmin/mmax @@ -492,12 +314,78 @@ OPTIX_INTERSECT_PROGRAM(VolumeIntersection)() // clip hit to near position thit0 = max(thit0, optixGetRayTmin()); - RayPayload &prd = owl::getPRD(); - if (optixReportIntersection(thit0, /* hit kind */ 0)) { - prd.objectSpaceRayOrigin = origin; - prd.objectSpaceRayDirection = direction; - prd.t0 = max(prd.t0, thit0); - prd.t1 = min(prd.t1, thit1); + // Load the volume we hit + GET(VolumeStruct volume, VolumeStruct, LP.volumes, self.volumeID); + uint8_t *hdl = (uint8_t*)LP.volumeHandles.get(self.volumeID, __LINE__).data; + const auto grid = reinterpret_cast(hdl); + const auto& tree = grid->tree(); + auto acc = tree.getAccessor(); + auto nvdbSampler = nanovdb::SampleFromVoxels, + /*Interpolation Degree*/1, /*UseCache*/false>(acc); + + float majorant_extinction = acc.root().valueMax(); + float gradient_factor = volume.gradient_factor; + float linear_attenuation_unit = volume.scale; + float absorption = volume.absorption; + float scattering = volume.scattering; + + auto bbox = acc.root().bbox(); + auto mx = bbox.max(); + auto mn = bbox.min(); + float3 offset = make_float3(glm::vec3(mn[0], mn[1], mn[2]) + + (glm::vec3(mx[0], mx[1], mx[2]) - + glm::vec3(mn[0], mn[1], mn[2])) * .5f); + + // Sample the free path distance to see if our ray makes it to the boundary + float t = thit0; + int event; + bool hitVolume = false; + float unit = volume.scale / length(direction); + #define MAX_NULL_COLLISIONS 1000 + for (int i = 0; i < MAX_NULL_COLLISIONS; ++i) { + // Sample a distance + t = t - (log(1.0f - lcg_randomf(prd.rng)) / majorant_extinction) * unit; + + // A boundary has been hit, no intersection + if (t >= thit1) return; + + // Update current position + float3 x = offset + origin + t * direction; + + // Sample heterogeneous media + float densityValue = nvdbSampler(nanovdb::Vec3f(x.x, x.y, x.z)); + + float a = densityValue * absorption; + float s = densityValue * scattering; + float e = a + s; + float n = majorant_extinction - e; + + a = a / majorant_extinction; + s = s / majorant_extinction; + n = n / majorant_extinction; + + float event = lcg_randomf(prd.rng); + // An absorption/emission collision occured + if (event < (a + s)) { + if (optixReportIntersection(t, /* hit kind */ 0)) { + auto g = nvdbSampler.gradient(nanovdb::Vec3f(x.x, x.y, x.z)); + prd.objectSpaceRayOrigin = origin; + prd.objectSpaceRayDirection = direction; + prd.eventID = (event < a) ? 1 : 2; + prd.instanceID = optixGetInstanceIndex(); + prd.tHit = t; + prd.mp = x - offset; // not super confident about this offset... + prd.gradient = make_float3(g[0], g[1], g[2]);// TEMPORARY FOR BUNNY + prd.density = densityValue; + } + return; + } + + // A null collision occurred + else { + event = 3; + continue; + } } } @@ -644,7 +532,7 @@ float sampleTime(float xi) { } inline __device__ -owl::Ray generateRay(const CameraStruct &camera, const TransformStruct &transform, ivec2 pixelID, ivec2 frameSize, LCGRand &rng, float time) +owl::Ray generateRay(const CameraStruct &camera, const TransformStruct &transform, int2 pixelID, float2 frameSize, LCGRand &rng, float time) { auto &LP = optixLaunchParams; /* Generate camera rays */ @@ -665,7 +553,7 @@ owl::Ray generateRay(const CameraStruct &camera, const TransformStruct &transfor - vec2(LP.xPixelSamplingInterval[0], LP.yPixelSamplingInterval[0]) ) * vec2(lcg_randomf(rng),lcg_randomf(rng)); - vec2 inUV = (vec2(pixelID.x, pixelID.y) + aa) / vec2(frameSize); + vec2 inUV = (vec2(pixelID.x, pixelID.y) + aa) / make_vec2(frameSize); vec3 right = normalize(glm::column(viewinv, 0)); vec3 up = normalize(glm::column(viewinv, 1)); vec3 origin = glm::column(viewinv, 3); @@ -704,16 +592,19 @@ void initializeRenderData(float3 &renderData) auto &LP = optixLaunchParams; // these might change in the future... if (LP.renderDataMode == RenderDataFlags::NONE) { - renderData = make_float3(FLT_MAX); + renderData = make_float3(0.0f); } else if (LP.renderDataMode == RenderDataFlags::DEPTH) { - renderData = make_float3(FLT_MAX); + renderData = make_float3(-FLT_MAX); } else if (LP.renderDataMode == RenderDataFlags::POSITION) { - renderData = make_float3(FLT_MAX); + renderData = make_float3(-FLT_MAX); } else if (LP.renderDataMode == RenderDataFlags::NORMAL) { - renderData = make_float3(FLT_MAX); + renderData = make_float3(0.0f); + } + else if (LP.renderDataMode == RenderDataFlags::TANGENT) { + renderData = make_float3(0.0f); } else if (LP.renderDataMode == RenderDataFlags::SCREEN_SPACE_NORMAL) { renderData = make_float3(0.0f); @@ -811,7 +702,7 @@ __device__ void saveGeometricRenderData( float3 &renderData, int bounce, float depth, - float3 w_p, float3 w_n, float3 w_o, float2 uv, + float3 w_p, float3 w_n, float3 w_x, float3 w_o, float2 uv, int entity_id, float3 diffuse_mvec, float time, DisneyMaterial &mat) { @@ -828,6 +719,9 @@ void saveGeometricRenderData( else if (LP.renderDataMode == RenderDataFlags::NORMAL) { renderData = w_n; } + else if (LP.renderDataMode == RenderDataFlags::TANGENT) { + renderData = w_x; + } else if (LP.renderDataMode == RenderDataFlags::SCREEN_SPACE_NORMAL) { glm::quat r0 = glm::quat_cast(LP.viewT0); glm::quat r1 = glm::quat_cast(LP.viewT1); @@ -872,6 +766,18 @@ void saveHeatmapRenderData( renderData = make_float3(relClock); } +__device__ +void saveDeviceAssignment( + float3 &renderData, + int bounce, + uint32_t deviceIndex +) +{ + auto &LP = optixLaunchParams; + if (LP.renderDataMode != RenderDataFlags::DEVICE_ID) return; + renderData = make_float3(deviceIndex); +} + __device__ bool debugging() { #ifndef DEBUGGING @@ -885,23 +791,28 @@ bool debugging() { OPTIX_RAYGEN_PROGRAM(rayGen)() { const RayGenData &self = owl::getProgramData(); - cudaTextureObject_t envTex = getEnvironmentTexture(); - auto &LP = optixLaunchParams; auto launchIndex = optixGetLaunchIndex().x; auto launchDim = optixGetLaunchDimensions().x; - auto pixelID = ivec2(launchIndex % LP.frameSize.x, launchIndex / LP.frameSize.x); - bool debug = (pixelID.x == int(LP.frameSize.x / 2) && pixelID.y == int(LP.frameSize.y / 2)); + auto pixelID = make_int2(launchIndex % LP.frameSize.x, launchIndex / LP.frameSize.x); - float tmax = 1e20f; //todo: customize depending on scene bounds //glm::distance(LP.sceneBBMin, LP.sceneBBMax); + // Terminate thread if current pixel not assigned to this device + GET(float start, float, LP.assignmentBuffer, self.deviceIndex); + GET(float stop, float, LP.assignmentBuffer, self.deviceIndex + 1); + start *= (LP.frameSize.x * LP.frameSize.y); + stop *= (LP.frameSize.x * LP.frameSize.y); - /* compute who is repsonible for a given group of pixels */ - /* and if it's not us, just return. */ - /* (some other device will compute these pixels) */ - int deviceThatIsResponsible = (pixelID.x>>5) % self.deviceCount; - if (self.deviceIndex != deviceThatIsResponsible) { - return; - } + // if (launchIndex == 0) { + // printf("device %d start %f stop %f\n", self.deviceIndex, start, stop); + // } + + if( pixelID.x > LP.frameSize.x-1 || pixelID.y > LP.frameSize.y-1 ) return; + if( (launchIndex < start) || (stop <= launchIndex) ) return; + // if (self.deviceIndex == 1) return; + + cudaTextureObject_t envTex = getEnvironmentTexture(); + bool debug = (pixelID.x == int(LP.frameSize.x / 2) && pixelID.y == int(LP.frameSize.y / 2)); + float tmax = 1e20f; //todo: customize depending on scene bounds //glm::distance(LP.sceneBBMin, LP.sceneBBMax); auto dims = ivec2(LP.frameSize.x, LP.frameSize.x); uint64_t start_clock = clock(); @@ -913,7 +824,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() float time = sampleTime(lcg_randomf(rng)); // If no camera is in use, just display some random noise... - owl::Ray surfRay; + owl::Ray ray; EntityStruct camera_entity; TransformStruct camera_transform; CameraStruct camera; @@ -924,8 +835,8 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } // Trace an initial ray through the scene - surfRay = generateRay(camera, camera_transform, pixelID, LP.frameSize, rng, time); - surfRay.tmax = tmax; + ray = generateRay(camera, camera_transform, pixelID, make_float2(LP.frameSize), rng, time); + ray.tmax = tmax; float3 accum_illum = make_float3(0.f); float3 pathThroughput = make_float3(1.f); @@ -940,30 +851,20 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() uint8_t transparencyDepth = 0; uint8_t transmissionDepth = 0; uint8_t volumeDepth = 0; + int sampledBsdf = -1; + bool useBRDF = true; // direct here is used for final image clamping float3 directIllum = make_float3(0.f); float3 illum = make_float3(0.f); - RayPayload surfPayload; - surfPayload.tHit = -1.f; - surfRay.time = time; - owl::traceRay( /*accel to trace against*/ LP.surfacesIAS, - /*the ray to trace*/ surfRay, - /*prd*/ surfPayload, - OPTIX_RAY_FLAG_DISABLE_ANYHIT); - - owl::Ray volRay = surfRay; - volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit; - RayPayload volPayload; - volPayload.tHit = -1.f; - volPayload.rng = rng; - volPayload.t0 = volRay.tmin; - volPayload.t1 = volRay.tmax; - volPayload.primitiveID = (debug) ? -2 : -1; - owl::traceRay( /*accel to trace against*/ LP.volumesIAS, - /*the ray to trace*/ volRay, - /*prd*/ volPayload, + RayPayload payload; + payload.tHit = -1.f; + ray.time = time; + ray.visibilityMask = ENTITY_VISIBILITY_CAMERA_RAYS; + owl::traceRay( /*accel to trace against*/ LP.IAS, + /*the ray to trace*/ ray, + /*prd*/ payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); // Shade each hit point on a path using NEE with MIS @@ -971,22 +872,22 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() float alpha = 0.f; // If ray misses, terminate the ray - if ((surfPayload.tHit <= 0.f) && (volPayload.tHit <= 0.f)) { + if (payload.tHit <= 0.f) { // Compute lighting from environment if (depth == 0) { - float3 col = missColor(surfRay, envTex); + float3 col = missColor(ray, envTex); illum = illum + pathThroughput * (col * LP.domeLightIntensity); directIllum = illum; primaryAlbedo = col; } else if (enableDomeSampling) - illum = illum + pathThroughput * (missColor(surfRay, envTex) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure)); + illum = illum + pathThroughput * (missColor(ray, envTex) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure)); const float envDist = 10000.0f; // large value /* Compute miss motion vector */ float3 mvec; // Point far away - float3 pFar = surfRay.origin + surfRay.direction * envDist; + float3 pFar = ray.origin + ray.direction * envDist; // TODO: account for motion from rotating dome light vec4 tmp1 = LP.proj * LP.viewT0 * /*xfmt0 **/ make_vec4(pFar, 1.0f); float3 pt0 = make_float3(tmp1 / tmp1.w) * .5f; @@ -997,64 +898,40 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() break; } - bool isVolume = (volPayload.tHit >= 0.f); // Load the object we hit. - int entityID; - if (isVolume) { GET(entityID, int, LP.volumeInstanceToEntity, volPayload.instanceID); } - else { GET(entityID, int, LP.surfaceInstanceToEntity, surfPayload.instanceID); } - + GET(int entityID, int, LP.instanceToEntity, payload.instanceID); GET(EntityStruct entity, EntityStruct, LP.entities, entityID); GET(TransformStruct transform, TransformStruct, LP.transforms, entity.transform_id); + + bool isVolume = (entity.volume_id != -1); MeshStruct mesh; VolumeStruct volume; if (!isVolume) { GET(mesh, MeshStruct, LP.meshes, entity.mesh_id); } else { GET(volume, VolumeStruct, LP.volumes, entity.volume_id); } - // Skip forward if the hit object is invisible for this ray type, skip it. - if (((entity.flags & ENTITY_VISIBILITY_CAMERA_RAYS) == 0)) { - surfRay.origin = surfRay.origin + surfRay.direction * (surfPayload.tHit + EPSILON); - surfPayload.tHit = -1.f; - surfRay.time = time; - owl::traceRay( LP.surfacesIAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); - - volRay = surfRay; - volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit; - volPayload.tHit = -1.f; - volPayload.rng = rng; - volPayload.t0 = volRay.tmin; - volPayload.t1 = volRay.tmax; - volPayload.primitiveID = (debug) ? -3 : -1; - owl::traceRay( LP.volumesIAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); - transparencyDepth++; - if (transparencyDepth > LP.maxTransparencyDepth) break; - continue; - } - // Set new outgoing light direction and hit position. - const float3 w_o = -surfRay.direction; - float3 hit_p; - if (volPayload.tHit >= 0.f) hit_p = volRay.origin + volPayload.tHit * volRay.direction; - else hit_p = surfRay.origin + surfPayload.tHit * surfRay.direction; + const float3 w_o = -ray.direction; + float3 hit_p = ray.origin + payload.tHit * ray.direction; // Load geometry data for the hit object float3 mp, p, v_x, v_y, v_z, v_gz, v_bz; float2 uv; float3 diffuseMotion; - if (volPayload.tHit >= 0.f) { + if (isVolume) { v_x = v_y = make_float3(0.f); // Perhaps I could use divergence / curl here? - v_z = v_gz = normalize(volPayload.gradient); + v_z = v_gz = normalize(payload.gradient); if (any(isnan(make_vec3(v_z)))) v_z = v_gz = make_float3(0.f); - mp = volPayload.mp; - uv = make_float2(volPayload.density, length(volPayload.gradient)); + mp = payload.mp; + uv = make_float2(payload.density, length(payload.gradient)); } else { int3 indices; - loadMeshTriIndices(entity.mesh_id, mesh.numTris, surfPayload.primitiveID, indices); - loadMeshVertexData(entity.mesh_id, mesh.numVerts, indices, surfPayload.barycentrics, mp, v_gz); - loadMeshUVData(entity.mesh_id, mesh.numVerts, indices, surfPayload.barycentrics, uv); - loadMeshNormalData(entity.mesh_id, mesh.numVerts, indices, surfPayload.barycentrics, v_z); - loadMeshTangentData(entity.mesh_id, mesh.numVerts, indices, surfPayload.barycentrics, v_x); + loadMeshTriIndices(entity.mesh_id, mesh.numTris, payload.primitiveID, indices); + loadMeshVertexData(entity.mesh_id, mesh.numVerts, indices, payload.barycentrics, mp, v_gz); + loadMeshUVData(entity.mesh_id, mesh.numVerts, indices, payload.barycentrics, uv); + loadMeshNormalData(entity.mesh_id, mesh.numVerts, indices, payload.barycentrics, v_z); + loadMeshTangentData(entity.mesh_id, mesh.numVerts, indices, payload.barycentrics, v_x); } // Load material data for the hit object @@ -1066,7 +943,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // Transform geometry data into world space { - glm::mat4 xfm = to_mat4((volPayload.tHit >= 0.f) ? volPayload.localToWorld : surfPayload.localToWorld); + glm::mat4 xfm = to_mat4(payload.localToWorld); p = make_float3(xfm * make_vec4(mp, 1.0f)); hit_p = p; glm::mat3 nxfm = transpose(glm::inverse(glm::mat3(xfm))); @@ -1074,11 +951,11 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() v_z = make_float3(normalize(nxfm * make_vec3(v_z))); v_x = make_float3(normalize(nxfm * make_vec3(v_x))); v_y = cross(v_z, v_x); - v_x = cross(v_y, v_z); + // v_x = cross(v_y, v_z); if (LP.renderDataMode != RenderDataFlags::NONE) { - glm::mat4 xfmt0 = to_mat4((volPayload.tHit >= 0.f) ? volPayload.localToWorldT0 : surfPayload.localToWorldT0); - glm::mat4 xfmt1 = to_mat4((volPayload.tHit >= 0.f) ? volPayload.localToWorldT1 : surfPayload.localToWorldT1); + glm::mat4 xfmt0 = to_mat4(payload.localToWorldT0); + glm::mat4 xfmt1 = to_mat4(payload.localToWorldT1); vec4 tmp1 = LP.proj * LP.viewT0 * xfmt0 * make_vec4(mp, 1.0f); vec4 tmp2 = LP.proj * LP.viewT1 * xfmt1 * make_vec4(mp, 1.0f); float3 pt0 = make_float3(tmp1 / tmp1.w) * .5f; @@ -1157,7 +1034,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } // For segmentations, save geometric metadata - saveGeometricRenderData(renderData, depth, surfPayload.tHit, hit_p, v_z, w_o, uv, entityID, diffuseMotion, time, mat); + saveGeometricRenderData(renderData, depth, payload.tHit, hit_p, v_z, v_x, w_o, uv, entityID, diffuseMotion, time, mat); if (depth == 0) { primaryAlbedo = mat.base_color; primaryNormal = v_z; @@ -1168,20 +1045,11 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() float alpha_rnd = lcg_randomf(rng); if (alpha_rnd > mat.alpha) { - surfRay.origin = surfRay.origin + surfRay.direction * (surfPayload.tHit + EPSILON); - surfPayload.tHit = -1.f; - surfRay.time = time; - owl::traceRay( LP.surfacesIAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); - - volRay = surfRay; - volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit; - volPayload.tHit = -1.f; - volPayload.rng = rng; - volPayload.t0 = volRay.tmin; - volPayload.t1 = volRay.tmax; - volPayload.primitiveID = (debug) ? -4 : -1; - owl::traceRay( LP.volumesIAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); - + ray.origin = ray.origin + ray.direction * (payload.tHit + EPSILON); + payload.tHit = -1.f; + ray.time = time; + // ray.visibilityMask reuses the last visibility mask here + owl::traceRay( LP.IAS, ray, payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); ++depth; transparencyDepth++; continue; @@ -1192,14 +1060,14 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // Note that NEE/MIS will also potentially terminate the path, preventing double-counting. // todo: account for volumetric emission here... if (entity.light_id >= 0 && entity.light_id < LP.lights.count) { - float dotNWi = max(dot(surfRay.direction, v_z), 0.f); + float dotNWi = max(dot(ray.direction, v_z), 0.f); if ((dotNWi > EPSILON) && (depth != 0)) break; GET(LightStruct entityLight, LightStruct, LP.lights, entity.light_id); float3 lightEmission; if (entityLight.color_texture_id == -1) lightEmission = make_float3(entityLight.r, entityLight.g, entityLight.b); else lightEmission = sampleTexture(entityLight.color_texture_id, uv, make_float3(0.f, 0.f, 0.f)); - float dist = surfPayload.tHit; + float dist = payload.tHit; lightEmission = (lightEmission * entityLight.intensity); if (depth != 0) lightEmission = (lightEmission * pow(2.f, entityLight.exposure)) / max((dist * dist), 1.f); float3 contribution = pathThroughput * lightEmission; @@ -1214,8 +1082,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() float3 irradiance = make_float3(0.f); // If we hit a volume, use hybrid scattering to determine whether or not to use a BRDF or a phase function. - bool useBRDF = true; - if (volPayload.tHit >= 0.f) { + if (isVolume) { float opacity = mat.alpha; // would otherwise be sampled from a transfer function float grad_len = uv.y; float p_brdf = opacity * (1.f - exp(-25.f * pow(volume.gradient_factor, 3.f) * grad_len)); @@ -1232,7 +1099,6 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // First, sample the BRDF / phase function so that we can use the sampled direction for MIS float3 w_i; float bsdfPDF; - int sampledBsdf = -1; float3 bsdf; if (useBRDF) { sample_disney_brdf( @@ -1240,7 +1106,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() w_i, bsdfPDF, sampledBsdf, bsdf); // outputs } else { /* a scatter event occurred */ - if (volPayload.eventID == 2) { + if (payload.eventID == 2) { // currently isotropic. Todo: implement henyey greenstien... float rand1 = lcg_randomf(rng); float rand2 = lcg_randomf(rng); @@ -1256,7 +1122,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } /* An absorption / emission event occurred */ - if (volPayload.eventID == 1) { + if (payload.eventID == 1) { bsdfPDF = 1.f / (4.0 * M_PI); bsdf = make_float3(1.f / (4.0 * M_PI)); w_i = -w_o; @@ -1269,19 +1135,11 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // At this point, if we are refracting and we ran out of transmission bounces, skip forward. // This avoids creating black regions on glass objects due to bounce limits if (sampledBsdf == DISNEY_TRANSMISSION_BRDF && transmissionDepth >= LP.maxTransmissionDepth) { - surfRay.origin = surfRay.origin + surfRay.direction * (surfPayload.tHit + EPSILON); - surfPayload.tHit = -1.f; - surfRay.time = time; - owl::traceRay( LP.surfacesIAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); - - volRay = surfRay; - volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit; - volPayload.tHit = -1.f; - volPayload.rng = rng; - volPayload.t0 = volRay.tmin; - volPayload.t1 = volRay.tmax; - volPayload.primitiveID = (debug) ? -4 : -1; - owl::traceRay( LP.volumesIAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); + ray.origin = ray.origin + ray.direction * (payload.tHit + EPSILON); + payload.tHit = -1.f; + ray.time = time; + // ray.visibilityMask reuses the last visibility mask here + owl::traceRay( LP.IAS, ray, payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); // Count this as a "transparent" bounce. ++depth; @@ -1346,47 +1204,58 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // sample light sources else { + // Sample the light to compute an incident light ray to this point if (numLights == 0) continue; GET( sampledLightID, int, LP.lightEntities, randomID ); GET( EntityStruct light_entity, EntityStruct, LP.entities, sampledLightID ); GET( LightStruct light_light, LightStruct, LP.lights, light_entity.light_id ); GET( TransformStruct transform, TransformStruct, LP.transforms, light_entity.transform_id ); - GET( MeshStruct mesh, MeshStruct, LP.meshes, light_entity.mesh_id ); - uint32_t random_tri_id = uint32_t(min(lcg_randomf(rng) * mesh.numTris, float(mesh.numTris - 1))); - GET( Buffer indices, Buffer, LP.indexLists, light_entity.mesh_id ); - GET( Buffer vertices, Buffer, LP.vertexLists, light_entity.mesh_id ); - GET( Buffer normals, Buffer, LP.normalLists, light_entity.mesh_id ); - GET( Buffer texCoords, Buffer, LP.texCoordLists, light_entity.mesh_id ); - GET( int3 triIndex, int3, indices, random_tri_id ); - - // Sample the light to compute an incident light ray to this point + auto <w = transform.localToWorld; float3 dir; float2 uv; float3 pos = hit_p; - GET(float3 n1, float3, normals, triIndex.x ); - GET(float3 n2, float3, normals, triIndex.y ); - GET(float3 n3, float3, normals, triIndex.z ); - GET(float3 v1, float3, vertices, triIndex.x ); - GET(float3 v2, float3, vertices, triIndex.y ); - GET(float3 v3, float3, vertices, triIndex.z ); - GET(float2 uv1, float2, texCoords, triIndex.x ); - GET(float2 uv2, float2, texCoords, triIndex.y ); - GET(float2 uv3, float2, texCoords, triIndex.z ); - - // Might be a bug here with normal transform... - n1 = make_float3(ltw * make_float4(n1, 0.0f)); - n2 = make_float3(ltw * make_float4(n2, 0.0f)); - n3 = make_float3(ltw * make_float4(n3, 0.0f)); - v1 = make_float3(ltw * make_float4(v1, 1.0f)); - v2 = make_float3(ltw * make_float4(v2, 1.0f)); - v3 = make_float3(ltw * make_float4(v3, 1.0f)); - sampleTriangle(pos, n1, n2, n3, v1, v2, v3, uv1, uv2, uv3, - lcg_randomf(rng), lcg_randomf(rng), dir, lightDistance, lightPDF, uv, - /*double_sided*/ false, /*use surface area*/ light_light.use_surface_area); + // The sampled light is a point light + if ((light_entity.mesh_id < 0) || (light_entity.mesh_id >= LP.meshes.count)) { + numTris = 1.f; + float3 tmp = make_float3(ltw[3]) - pos; + lightDistance = length(tmp); + dir = tmp / lightDistance; + lightPDF = PdfAtoW(1.f/(4.f * M_PI), lightDistance * lightDistance, 1.f); + uv = make_float2(0.f, 0.f); + } + // The sampled light is a mesh light + else { + GET( MeshStruct mesh, MeshStruct, LP.meshes, light_entity.mesh_id ); + uint32_t random_tri_id = uint32_t(min(lcg_randomf(rng) * mesh.numTris, float(mesh.numTris - 1))); + GET( Buffer indices, Buffer, LP.indexLists, light_entity.mesh_id ); + GET( Buffer vertices, Buffer, LP.vertexLists, light_entity.mesh_id ); + GET( Buffer normals, Buffer, LP.normalLists, light_entity.mesh_id ); + GET( Buffer texCoords, Buffer, LP.texCoordLists, light_entity.mesh_id ); + GET( int3 triIndex, int3, indices, random_tri_id ); + GET(float3 n1, float3, normals, triIndex.x ); + GET(float3 n2, float3, normals, triIndex.y ); + GET(float3 n3, float3, normals, triIndex.z ); + GET(float3 v1, float3, vertices, triIndex.x ); + GET(float3 v2, float3, vertices, triIndex.y ); + GET(float3 v3, float3, vertices, triIndex.z ); + GET(float2 uv1, float2, texCoords, triIndex.x ); + GET(float2 uv2, float2, texCoords, triIndex.y ); + GET(float2 uv3, float2, texCoords, triIndex.z ); + // Might be a bug here with normal transform... + n1 = make_float3(ltw * make_float4(n1, 0.0f)); + n2 = make_float3(ltw * make_float4(n2, 0.0f)); + n3 = make_float3(ltw * make_float4(n3, 0.0f)); + v1 = make_float3(ltw * make_float4(v1, 1.0f)); + v2 = make_float3(ltw * make_float4(v2, 1.0f)); + v3 = make_float3(ltw * make_float4(v3, 1.0f)); + sampleTriangle(pos, n1, n2, n3, v1, v2, v3, uv1, uv2, uv3, + lcg_randomf(rng), lcg_randomf(rng), dir, lightDistance, lightPDF, uv, + /*double_sided*/ false, /*use surface area*/ light_light.use_surface_area); + numTris = mesh.numTris; + } falloff = light_light.falloff; - numTris = mesh.numTris; lightDir = make_float3(dir.x, dir.y, dir.z); if (light_light.color_texture_id == -1) lightEmission = make_float3(light_light.r, light_light.g, light_light.b) * (light_light.intensity * pow(2.f, light_light.exposure)); else lightEmission = sampleTexture(light_light.color_texture_id, uv, make_float3(0.f, 0.f, 0.f)) * (light_light.intensity * pow(2.f, light_light.exposure)); @@ -1409,32 +1278,28 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } lightPDF *= (1.f / float(numLights + 1.f)) * (1.f / float(numTris)); if ((lightPDF > 0.0) && (dotNWi > EPSILON)) { - RayPayload surfPayload; surfPayload.instanceID = -2; - RayPayload volPayload = surfPayload; + RayPayload payload; payload.instanceID = -2; + RayPayload volPayload = payload; owl::RayT ray; // shadow ray ray.tmin = EPSILON * 10.f; ray.tmax = lightDistance + EPSILON; // needs to be distance to light, else anyhit logic breaks. ray.origin = hit_p; ray.direction = lightDir; ray.time = time; - owl::traceRay( LP.surfacesIAS, ray, surfPayload, occlusion_flags); - ray.tmax = (surfPayload.instanceID == -2) ? ray.tmax : surfPayload.tHit; - volPayload.rng = rng; - volPayload.t0 = volRay.tmin; - volPayload.t1 = volRay.tmax; - volPayload.primitiveID = (debug) ? -5 : -1; - owl::traceRay( LP.volumesIAS, ray, volPayload, occlusion_flags); + ray.visibilityMask = ENTITY_VISIBILITY_SHADOW_RAYS; + owl::traceRay( LP.IAS, ray, payload, occlusion_flags); + ray.tmax = (payload.instanceID == -2) ? ray.tmax : payload.tHit; bool visible; if (randomID == numLights) { // If we sampled the dome light, just check to see if we hit anything - visible = (surfPayload.instanceID == -2) && (volPayload.instanceID == -2); + visible = (payload.instanceID == -2); } else { // If we sampled a light source, then check to see if we hit something other than the light int surfEntity; - if (surfPayload.instanceID == -2) surfEntity = -1; - else { GET(surfEntity, int, LP.surfaceInstanceToEntity, surfPayload.instanceID); } - visible = (volPayload.instanceID == -2) && (surfPayload.instanceID == -2 || surfEntity == sampledLightID); + if (payload.instanceID == -2) surfEntity = -1; + else { GET(surfEntity, int, LP.instanceToEntity, payload.instanceID); } + visible = (payload.instanceID == -2 || surfEntity == sampledLightID); } if (visible) { - if (randomID != numLights) lightEmission = lightEmission / max(pow(surfPayload.tHit, falloff),1.f); + if (randomID != numLights) lightEmission = lightEmission / max(pow(payload.tHit, falloff),1.f); float w = power_heuristic(1.f, lightPDF, 1.f, bsdfPDF); float3 Li = (lightEmission * w) / lightPDF; irradiance = irradiance + (l_bsdf * Li); @@ -1452,33 +1317,30 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } // Next, sample a light source using the importance sampled BDRF direction. - surfRay.origin = hit_p; - surfRay.direction = w_i; - surfRay.tmin = EPSILON;//* 100.f; - surfPayload.instanceID = -1; - surfPayload.tHit = -1.f; - surfRay.time = sampleTime(lcg_randomf(rng)); - owl::traceRay(LP.surfacesIAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); - - volRay = surfRay; - volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit; - volPayload.rng = rng; - volPayload.t0 = volRay.tmin; - volPayload.t1 = volRay.tmax; - volPayload.primitiveID = (debug) ? -6 : -1; - owl::traceRay(LP.volumesIAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); + ray.origin = hit_p; + ray.direction = w_i; + ray.tmin = EPSILON;//* 100.f; + payload.instanceID = -1; + payload.tHit = -1.f; + ray.time = sampleTime(lcg_randomf(rng)); + if (isVolume) ray.visibilityMask = ENTITY_VISIBILITY_VOLUME_SCATTER_RAYS; + else if (sampledBsdf == DISNEY_TRANSMISSION_BRDF) ray.visibilityMask = ENTITY_VISIBILITY_TRANSMISSION_RAYS; + else if (sampledBsdf == DISNEY_DIFFUSE_BRDF) ray.visibilityMask = ENTITY_VISIBILITY_DIFFUSE_RAYS; + else if (sampledBsdf == DISNEY_GLOSSY_BRDF) ray.visibilityMask = ENTITY_VISIBILITY_GLOSSY_RAYS; + else if (sampledBsdf == DISNEY_CLEARCOAT_BRDF) ray.visibilityMask = ENTITY_VISIBILITY_GLOSSY_RAYS; + owl::traceRay(LP.IAS, ray, payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); // Check if we hit any of the previously sampled lights bool hitLight = false; if (lightPDF > EPSILON) { - float dotNWi = (useBRDF) ? max(dot(surfRay.direction, v_gz), 0.f) : 1.f; // geometry term + float dotNWi = (useBRDF) ? max(dot(ray.direction, v_gz), 0.f) : 1.f; // geometry term // if by sampling the brdf we also hit the dome light... - if ((surfPayload.instanceID == -1) && (volPayload.instanceID == -1) && (sampledLightID == -1) && enableDomeSampling) { + if ((payload.instanceID == -1) && (sampledLightID == -1) && enableDomeSampling) { // Case where we hit the background, and also previously sampled the background float w = power_heuristic(1.f, bsdfPDF, 1.f, lightPDF); - float3 lightEmission = missColor(surfRay, envTex) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure); + float3 lightEmission = missColor(ray, envTex) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure); float3 Li = (lightEmission * w) / bsdfPDF; if (dotNWi > 0.f) { @@ -1488,8 +1350,8 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } // else if by sampling the brdf we also hit an area light // TODO: consider hitting emissive voxels? - else if (surfPayload.instanceID != -1 && volPayload.instanceID == -1) { - GET(int entityID, int, LP.surfaceInstanceToEntity, surfPayload.instanceID); + else if (payload.instanceID != -1) { + GET(int entityID, int, LP.instanceToEntity, payload.instanceID); bool visible = (entityID == sampledLightID); // We hit the light we sampled previously if (visible) { @@ -1497,10 +1359,10 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() GET(EntityStruct light_entity, EntityStruct, LP.entities, sampledLightID); GET(MeshStruct light_mesh, MeshStruct, LP.meshes, light_entity.mesh_id); GET(LightStruct light_light, LightStruct, LP.lights, light_entity.light_id); - loadMeshTriIndices(light_entity.mesh_id, light_mesh.numTris, surfPayload.primitiveID, indices); - loadMeshUVData(light_entity.mesh_id, light_mesh.numVerts, indices, surfPayload.barycentrics, uv); + loadMeshTriIndices(light_entity.mesh_id, light_mesh.numTris, payload.primitiveID, indices); + loadMeshUVData(light_entity.mesh_id, light_mesh.numVerts, indices, payload.barycentrics, uv); - float dist = surfPayload.tHit; + float dist = payload.tHit; float3 lightEmission; if (light_light.color_texture_id == -1) lightEmission = make_float3(light_light.r, light_light.g, light_light.b) * (light_light.intensity * pow(2.f, light_light.exposure)); @@ -1554,16 +1416,22 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // of terminating, just so that we don't get black regions in our glass if (transmissionDepth >= LP.maxTransmissionDepth) continue; } while ( - diffuseDepth < LP.maxDiffuseDepth && - glossyDepth < LP.maxGlossyDepth && - // transmissionDepth < LP.maxTransmissionDepth && // see comment above - transparencyDepth < LP.maxTransparencyDepth && - volumeDepth < LP.maxVolumeDepth + // Terminate the path if the sampled BRDF's corresponding bounce depth exceeds the max bounce for that bounce type minus the overall path depth. + // This prevents long tails that can otherwise occur from mixing BRDF events + (!(sampledBsdf == DISNEY_DIFFUSE_BRDF && diffuseDepth > (LP.maxDiffuseDepth - (depth - 1)))) && + (!(sampledBsdf == DISNEY_GLOSSY_BRDF && glossyDepth > LP.maxGlossyDepth - (depth - 1)) ) && + (!(sampledBsdf == DISNEY_CLEARCOAT_BRDF && glossyDepth > LP.maxGlossyDepth - (depth - 1)) ) && + (!(useBRDF == false && volumeDepth > LP.maxVolumeDepth - (depth - 1))) && + (!(transparencyDepth > LP.maxTransparencyDepth - (depth - 1))) + // (!(sampledBsdf == DISNEY_TRANSMISSION_BRDF && transmissionDepth < LP.maxTransmissionDepth - (depth - 1)) ) && // see comment above ); // For segmentations, save heatmap metadata saveHeatmapRenderData(renderData, depth, start_clock); + // Device assignment data + saveDeviceAssignment(renderData, depth, self.deviceIndex); + // clamp out any extreme fireflies glm::vec3 gillum = vec3(illum.x, illum.y, illum.z); glm::vec3 dillum = vec3(directIllum.x, directIllum.y, directIllum.z); @@ -1608,7 +1476,18 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() else { // Override framebuffer output if user requested to render metadata accum_illum = make_float3(renderData.x, renderData.y, renderData.z); + if (isnan(renderData.x) || isnan(renderData.y) || isnan(renderData.z) || + isinf(renderData.x) || isinf(renderData.y) || isinf(renderData.z) || + isnan(prev_color.x) || isnan(prev_color.y) || isnan(prev_color.z) || + isinf(prev_color.x) || isinf(prev_color.y) || isinf(prev_color.z)) { + accum_illum = make_float3(0.f, 0.f, 0.f); + prev_color = make_float4(0.f, 0.f, 0.f, 1.f); + } accum_color = make_float4((accum_illum + float(LP.frameID) * make_float3(prev_color)) / float(LP.frameID + 1), 1.0f); + + // if (debug) { + // printf("output: %f %f %f\n", accum_color.x, accum_color.y, accum_color.z); + // } } diff --git a/src/nvisii/entity.cpp b/src/nvisii/entity.cpp index 52987292..e04df6ae 100644 --- a/src/nvisii/entity.cpp +++ b/src/nvisii/entity.cpp @@ -32,6 +32,7 @@ Entity::Entity(std::string name, uint32_t id) { entity.material_id = -1; entity.light_id = -1; entity.mesh_id = -1; + entity.flags = (uint32_t)-1; } std::string Entity::toString() @@ -253,7 +254,14 @@ Mesh* Entity::getMesh() return &mesh; } -void Entity::setVisibility(bool camera) +void Entity::setVisibility( + bool camera, + bool diffuse, + bool glossy, + bool transmission, + bool volume_scatter, + bool shadow +) { std::lock_guard lock(*Entity::getEditMutex().get()); @@ -263,6 +271,36 @@ void Entity::setVisibility(bool camera) } else { entity.flags &= (~ENTITY_VISIBILITY_CAMERA_RAYS); } + + if (diffuse) { + entity.flags |= ENTITY_VISIBILITY_DIFFUSE_RAYS; + } else { + entity.flags &= (~ENTITY_VISIBILITY_DIFFUSE_RAYS); + } + + if (glossy) { + entity.flags |= ENTITY_VISIBILITY_GLOSSY_RAYS; + } else { + entity.flags &= (~ENTITY_VISIBILITY_GLOSSY_RAYS); + } + + if (transmission) { + entity.flags |= ENTITY_VISIBILITY_TRANSMISSION_RAYS; + } else { + entity.flags &= (~ENTITY_VISIBILITY_TRANSMISSION_RAYS); + } + + if (volume_scatter) { + entity.flags |= ENTITY_VISIBILITY_VOLUME_SCATTER_RAYS; + } else { + entity.flags &= (~ENTITY_VISIBILITY_VOLUME_SCATTER_RAYS); + } + + if (shadow) { + entity.flags |= ENTITY_VISIBILITY_SHADOW_RAYS; + } else { + entity.flags &= (~ENTITY_VISIBILITY_SHADOW_RAYS); + } markDirty(); } @@ -281,6 +319,13 @@ glm::vec3 Entity::getAabbCenter() return entityStructs[id].bbmin + (entityStructs[id].bbmax - entityStructs[id].bbmin) * .5f; } +glm::vec3 Entity::getCenter() +{ + if (!getTransform()) throw std::runtime_error("Error: no transform attached to entity"); + if (!getMesh()) throw std::runtime_error("Error: no mesh attached to entity"); + return glm::vec3(getTransform()->getLocalToWorldMatrix() * glm::vec4(getMesh()->getCenter(), 1.f)); +} + void Entity::initializeFactory(uint32_t max_components) { if (isFactoryInitialized()) return; diff --git a/src/nvisii/light.cpp b/src/nvisii/light.cpp index 01e3ab02..842981b8 100644 --- a/src/nvisii/light.cpp +++ b/src/nvisii/light.cpp @@ -46,9 +46,9 @@ LightStruct &Light::getStruct() { void Light::setColor(glm::vec3 color) { auto &light = getStruct(); - light.r = max(0.f, min(color.r, 1.f)); - light.g = max(0.f, min(color.g, 1.f)); - light.b = max(0.f, min(color.b, 1.f)); + light.r = max(0.f, color.r); + light.g = max(0.f, color.g); + light.b = max(0.f, color.b); markDirty(); } diff --git a/src/nvisii/mesh.cpp b/src/nvisii/mesh.cpp index 26fb96dc..83447ad5 100644 --- a/src/nvisii/mesh.cpp +++ b/src/nvisii/mesh.cpp @@ -141,7 +141,7 @@ void Mesh::computeMetadata() this->meshStructs[id].numVerts = uint32_t(positions.size()); } -glm::vec3 Mesh::getCentroid() +glm::vec3 Mesh::getCenter() { return vec3(meshStructs[id].center); } @@ -211,6 +211,8 @@ void Mesh::loadData( uint32_t position_dimensions, std::vector &normals_, uint32_t normal_dimensions, + std::vector &tangents_, + uint32_t tangent_dimensions, std::vector &colors_, uint32_t color_dimensions, std::vector &texcoords_, @@ -219,6 +221,7 @@ void Mesh::loadData( ) { bool readingNormals = normals_.size() > 0; + bool readingTangents = tangents_.size() > 0; bool readingColors = colors_.size() > 0; bool readingTexCoords = texcoords_.size() > 0; bool readingIndices = indices_.size() > 0; @@ -228,6 +231,9 @@ void Mesh::loadData( if ((normal_dimensions != 3) && (normal_dimensions != 4)) throw std::runtime_error( std::string("Error, invalid normal dimensions. Possible normal dimensions are 3 or 4.")); + + if ((tangent_dimensions != 3) && (tangent_dimensions != 4)) + throw std::runtime_error( std::string("Error, invalid tangent dimensions. Possible tangent dimensions are 3 or 4.")); if ((color_dimensions != 3) && (color_dimensions != 4)) throw std::runtime_error( std::string("Error, invalid color dimensions. Possible color dimensions are 3 or 4.")); @@ -247,6 +253,9 @@ void Mesh::loadData( if (readingNormals && ((normals_.size() / normal_dimensions) != (positions_.size() / position_dimensions))) throw std::runtime_error( std::string("Error, length mismatch. Total normals: " + std::to_string(normals_.size() / normal_dimensions) + " does not equal total positions: " + std::to_string(positions_.size() / position_dimensions))); + if (readingTangents && ((tangents_.size() / tangent_dimensions) != (positions_.size() / position_dimensions))) + throw std::runtime_error( std::string("Error, length mismatch. Total tangents: " + std::to_string(tangents_.size() / tangent_dimensions) + " does not equal total positions: " + std::to_string(positions_.size() / position_dimensions))); + if (readingColors && ((colors_.size() / color_dimensions) != (positions_.size() / position_dimensions))) throw std::runtime_error( std::string("Error, length mismatch. Total colors: " + std::to_string(colors_.size() / color_dimensions) + " does not equal total positions: " + std::to_string(positions_.size() / position_dimensions))); @@ -275,6 +284,12 @@ void Mesh::loadData( vertex.normal.z = normals_[i * normal_dimensions + 2]; vertex.normal.w = (normal_dimensions == 4) ? normals_[i * normal_dimensions + 3] : 0.f; } + if (readingTangents) { + vertex.tangent.x = tangents_[i * tangent_dimensions + 0]; + vertex.tangent.y = tangents_[i * tangent_dimensions + 1]; + vertex.tangent.z = tangents_[i * tangent_dimensions + 2]; + vertex.tangent.w = (tangent_dimensions == 4) ? tangents_[i * tangent_dimensions + 3] : 0.f; + } if (readingColors) { vertex.color.x = colors_[i * color_dimensions + 0]; vertex.color.y = colors_[i * color_dimensions + 1]; @@ -316,6 +331,7 @@ void Mesh::loadData( this->positions.resize(uniqueVertices.size()); this->colors.resize(uniqueVertices.size()); this->normals.resize(uniqueVertices.size()); + this->tangents.resize(uniqueVertices.size()); this->texCoords.resize(uniqueVertices.size()); for (int i = 0; i < uniqueVertices.size(); ++i) { @@ -323,6 +339,7 @@ void Mesh::loadData( this->positions[i] = {v.point.x, v.point.y, v.point.z}; this->colors[i] = v.color; this->normals[i] = v.normal; + this->tangents[i] = v.tangent; this->texCoords[i] = v.texcoord; } @@ -330,6 +347,10 @@ void Mesh::loadData( generateSmoothNormals(); } + if (!readingTangents) { + generateSmoothTangents(); + } + computeMetadata(); } @@ -1339,19 +1360,20 @@ Mesh* Mesh::createFromData( uint32_t position_dimensions, std::vector normals_, uint32_t normal_dimensions, + std::vector tangents_, + uint32_t tangent_dimensions, std::vector colors_, uint32_t color_dimensions, std::vector texcoords_, uint32_t texcoord_dimensions, std::vector indices_ ) { - auto create = [&positions_, position_dimensions, &normals_, normal_dimensions, + auto create = [&positions_, position_dimensions, &normals_, normal_dimensions, &tangents_, tangent_dimensions, &colors_, color_dimensions, &texcoords_, texcoord_dimensions, &indices_] (Mesh* mesh) { - mesh->loadData(positions_, position_dimensions, normals_, normal_dimensions, + mesh->loadData(positions_, position_dimensions, normals_, normal_dimensions, tangents_, tangent_dimensions, colors_, color_dimensions, texcoords_, texcoord_dimensions, indices_); - mesh->generateSmoothTangents(); dirtyMeshes.insert(mesh); }; diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 0e9079c5..287065fb 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -89,6 +89,9 @@ static struct OptixData { LaunchParams LP; GLuint imageTexID = -1; cudaGraphicsResource_t cudaResourceTex; + bool resourceSharingSuccessful = true; + OWLBuffer assignmentBuffer; + OWLBuffer frameBuffer; OWLBuffer normalBuffer; OWLBuffer albedoBuffer; @@ -96,6 +99,10 @@ static struct OptixData { OWLBuffer mvecBuffer; OWLBuffer accumBuffer; + OWLBuffer combinedFrameBuffer; + OWLBuffer combinedNormalBuffer; + OWLBuffer combinedAlbedoBuffer; + OWLBuffer entityBuffer; OWLBuffer transformBuffer; OWLBuffer cameraBuffer; @@ -105,8 +112,7 @@ static struct OptixData { OWLBuffer textureBuffer; OWLBuffer volumeBuffer; OWLBuffer lightEntitiesBuffer; - OWLBuffer surfaceInstanceToEntityBuffer; - OWLBuffer volumeInstanceToEntityBuffer; + OWLBuffer instanceToEntityBuffer; OWLBuffer vertexListsBuffer; OWLBuffer normalListsBuffer; OWLBuffer tangentListsBuffer; @@ -137,8 +143,7 @@ static struct OptixData { std::vector volumeGeomList; std::vector volumeBlasList; - OWLGroup surfacesIAS = nullptr; - OWLGroup volumesIAS = nullptr; + OWLGroup IAS = nullptr; std::vector lightEntities; @@ -185,6 +190,9 @@ static struct NVISII { std::function callback; std::recursive_mutex callbackMutex; + std::vector> events; + std::vector times; + std::vector weights; } NVISII; void applyStyle() @@ -263,62 +271,6 @@ int getDeviceCount() { return owlGetDeviceCount(OptixData.context); } -OWLModule moduleCreate(OWLContext context, const char* ptxCode) -{ - return owlModuleCreate(context, ptxCode); -} - -OWLBuffer managedMemoryBufferCreate(OWLContext context, OWLDataType type, size_t count, void* init) -{ - return owlManagedMemoryBufferCreate(context, type, count, init); -} - -OWLBuffer deviceBufferCreate(OWLContext context, OWLDataType type, size_t count, void* init) -{ - return owlDeviceBufferCreate(context, type, count, init); -} - -void bufferDestroy(OWLBuffer buffer) -{ - owlBufferDestroy(buffer); -} - -void bufferResize(OWLBuffer buffer, size_t newItemCount) { - owlBufferResize(buffer, newItemCount); -} - -const void* bufferGetPointer(OWLBuffer buffer, int deviceId) -{ - return owlBufferGetPointer(buffer, deviceId); -} - -void bufferUpload(OWLBuffer buffer, const void *hostPtr) -{ - owlBufferUpload(buffer, hostPtr); -} - -CUstream getStream(OWLContext context, int deviceId) -{ - return owlContextGetStream(context, deviceId); -} - -OptixDeviceContext getOptixContext(OWLContext context, int deviceID) -{ - return owlContextGetOptixContext(context, deviceID); -} - -void buildPrograms(OWLContext context) { - owlBuildPrograms(context); -} - -void buildPipeline(OWLContext context) { - owlBuildPipeline(context); -} - -void buildSBT(OWLContext context) { - owlBuildSBT(context); -} - OWLMissProg missProgCreate(OWLContext context, OWLModule module, const char *programName, size_t sizeOfVarStruct, OWLVarDecl *vars, size_t numVars) { return owlMissProgCreate(context, module, programName, sizeOfVarStruct, vars, numVars); @@ -400,38 +352,14 @@ owl4x3f glmToOWL(glm::mat4 &xfm){ return oxfm; } -OWLLaunchParams launchParamsCreate(OWLContext context, size_t size, OWLVarDecl *vars, size_t numVars) -{ - return owlParamsCreate(context, size, vars, numVars); -} - -void launchParamsSetBuffer(OWLLaunchParams params, const char* varName, OWLBuffer buffer) -{ - owlParamsSetBuffer(params, varName, buffer); -} - -void launchParamsSetRaw(OWLLaunchParams params, const char* varName, const void* data) -{ - owlParamsSetRaw(params, varName, data); -} - -void launchParamsSetTexture(OWLLaunchParams params, const char* varName, OWLTexture texture) -{ - owlParamsSetTexture(params, varName, texture); -} - -void launchParamsSetGroup(OWLLaunchParams params, const char *varName, OWLGroup group) { - owlParamsSetGroup(params, varName, group); -} - -void synchronizeDevices() +void synchronizeDevices(std::string error_string = "") { for (int i = 0; i < getDeviceCount(); i++) { cudaSetDevice(i); cudaDeviceSynchronize(); cudaError_t err = cudaPeekAtLastError(); if (err != 0) { - std::cout<< "ERROR: " << cudaGetErrorString(err)< 1) { - OD.frameBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.accumBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.normalBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.albedoBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.scratchBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.mvecBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - } else { - OD.frameBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.accumBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.normalBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.albedoBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.scratchBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.mvecBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + OD.assignmentBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(float), owlGetDeviceCount(OD.context) + 1, nullptr); + owlParamsSetBuffer(OD.launchParams, "assignmentBuffer", OD.assignmentBuffer); + + // If we only have one GPU, framebuffer pixels can stay on device 0. + if (numGPUsFound == 1) { + OD.frameBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + OD.accumBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + OD.normalBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + OD.albedoBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + OD.scratchBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + OD.mvecBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); } + // Otherwise, multiple GPUs must use host pinned memory to merge partial framebuffers together + else { + OD.frameBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512); + OD.accumBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512); + OD.normalBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512); + OD.albedoBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512); + OD.scratchBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512); + OD.mvecBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512); + } + + // For multiGPU denoising, its best to denoise using something other than zero-copy memory. + OD.combinedFrameBuffer = owlManagedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + OD.combinedNormalBuffer = owlManagedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + OD.combinedAlbedoBuffer = owlManagedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + OD.LP.frameSize = glm::ivec2(512, 512); - launchParamsSetBuffer(OD.launchParams, "frameBuffer", OD.frameBuffer); - launchParamsSetBuffer(OD.launchParams, "normalBuffer", OD.normalBuffer); - launchParamsSetBuffer(OD.launchParams, "albedoBuffer", OD.albedoBuffer); - launchParamsSetBuffer(OD.launchParams, "scratchBuffer", OD.scratchBuffer); - launchParamsSetBuffer(OD.launchParams, "mvecBuffer", OD.mvecBuffer); - launchParamsSetBuffer(OD.launchParams, "accumPtr", OD.accumBuffer); - launchParamsSetRaw(OD.launchParams, "frameSize", &OD.LP.frameSize); + owlParamsSetBuffer(OD.launchParams, "frameBuffer", OD.frameBuffer); + owlParamsSetBuffer(OD.launchParams, "normalBuffer", OD.normalBuffer); + owlParamsSetBuffer(OD.launchParams, "albedoBuffer", OD.albedoBuffer); + owlParamsSetBuffer(OD.launchParams, "scratchBuffer", OD.scratchBuffer); + owlParamsSetBuffer(OD.launchParams, "mvecBuffer", OD.mvecBuffer); + owlParamsSetBuffer(OD.launchParams, "accumPtr", OD.accumBuffer); + owlParamsSetRaw(OD.launchParams, "frameSize", &OD.LP.frameSize); /* Create Component Buffers */ // note, extra textures reserved for internal use - OD.entityBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(EntityStruct), Entity::getCount(), nullptr); - OD.transformBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(TransformStruct), Transform::getCount(), nullptr); - OD.cameraBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(CameraStruct), Camera::getCount(), nullptr); - OD.materialBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(MaterialStruct), Material::getCount(), nullptr); - OD.meshBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(MeshStruct), Mesh::getCount(), nullptr); - OD.lightBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(LightStruct), Light::getCount(), nullptr); - OD.textureBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(TextureStruct), Texture::getCount() + NUM_MAT_PARAMS * Material::getCount(), nullptr); - OD.volumeBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(VolumeStruct), Volume::getCount(), nullptr); - OD.volumeHandlesBuffer = deviceBufferCreate(OD.context, OWL_BUFFER, Volume::getCount(), nullptr); - OD.lightEntitiesBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), 1, nullptr); - OD.surfaceInstanceToEntityBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), 1, nullptr); - OD.volumeInstanceToEntityBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), 1, nullptr); - OD.vertexListsBuffer = deviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); - OD.normalListsBuffer = deviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); - OD.tangentListsBuffer = deviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); - OD.texCoordListsBuffer = deviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); - OD.indexListsBuffer = deviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); - OD.textureObjectsBuffer = deviceBufferCreate(OD.context, OWL_TEXTURE, Texture::getCount() + NUM_MAT_PARAMS * Material::getCount(), nullptr); - - launchParamsSetBuffer(OD.launchParams, "entities", OD.entityBuffer); - launchParamsSetBuffer(OD.launchParams, "transforms", OD.transformBuffer); - launchParamsSetBuffer(OD.launchParams, "cameras", OD.cameraBuffer); - launchParamsSetBuffer(OD.launchParams, "materials", OD.materialBuffer); - launchParamsSetBuffer(OD.launchParams, "meshes", OD.meshBuffer); - launchParamsSetBuffer(OD.launchParams, "lights", OD.lightBuffer); - launchParamsSetBuffer(OD.launchParams, "textures", OD.textureBuffer); - launchParamsSetBuffer(OD.launchParams, "volumes", OD.volumeBuffer); - launchParamsSetBuffer(OD.launchParams, "lightEntities", OD.lightEntitiesBuffer); - launchParamsSetBuffer(OD.launchParams, "surfaceInstanceToEntity", OD.surfaceInstanceToEntityBuffer); - launchParamsSetBuffer(OD.launchParams, "volumeInstanceToEntity", OD.volumeInstanceToEntityBuffer); - launchParamsSetBuffer(OD.launchParams, "vertexLists", OD.vertexListsBuffer); - launchParamsSetBuffer(OD.launchParams, "normalLists", OD.normalListsBuffer); - launchParamsSetBuffer(OD.launchParams, "tangentLists", OD.tangentListsBuffer); - launchParamsSetBuffer(OD.launchParams, "texCoordLists", OD.texCoordListsBuffer); - launchParamsSetBuffer(OD.launchParams, "indexLists", OD.indexListsBuffer); - launchParamsSetBuffer(OD.launchParams, "textureObjects", OD.textureObjectsBuffer); - launchParamsSetBuffer(OD.launchParams, "volumeHandles", OD.volumeHandlesBuffer); + OD.entityBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(EntityStruct), Entity::getCount(), nullptr); + OD.transformBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(TransformStruct), Transform::getCount(), nullptr); + OD.cameraBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(CameraStruct), Camera::getCount(), nullptr); + OD.materialBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(MaterialStruct), Material::getCount(), nullptr); + OD.meshBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(MeshStruct), Mesh::getCount(), nullptr); + OD.lightBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(LightStruct), Light::getCount(), nullptr); + OD.textureBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(TextureStruct), Texture::getCount() + NUM_MAT_PARAMS * Material::getCount(), nullptr); + OD.volumeBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(VolumeStruct), Volume::getCount(), nullptr); + OD.volumeHandlesBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Volume::getCount(), nullptr); + OD.lightEntitiesBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), 1, nullptr); + OD.instanceToEntityBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), 1, nullptr); + OD.vertexListsBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); + OD.normalListsBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); + OD.tangentListsBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); + OD.texCoordListsBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); + OD.indexListsBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); + OD.textureObjectsBuffer = owlDeviceBufferCreate(OD.context, OWL_TEXTURE, Texture::getCount() + NUM_MAT_PARAMS * Material::getCount(), nullptr); + + owlParamsSetBuffer(OD.launchParams, "entities", OD.entityBuffer); + owlParamsSetBuffer(OD.launchParams, "transforms", OD.transformBuffer); + owlParamsSetBuffer(OD.launchParams, "cameras", OD.cameraBuffer); + owlParamsSetBuffer(OD.launchParams, "materials", OD.materialBuffer); + owlParamsSetBuffer(OD.launchParams, "meshes", OD.meshBuffer); + owlParamsSetBuffer(OD.launchParams, "lights", OD.lightBuffer); + owlParamsSetBuffer(OD.launchParams, "textures", OD.textureBuffer); + owlParamsSetBuffer(OD.launchParams, "volumes", OD.volumeBuffer); + owlParamsSetBuffer(OD.launchParams, "lightEntities", OD.lightEntitiesBuffer); + owlParamsSetBuffer(OD.launchParams, "instanceToEntity", OD.instanceToEntityBuffer); + owlParamsSetBuffer(OD.launchParams, "vertexLists", OD.vertexListsBuffer); + owlParamsSetBuffer(OD.launchParams, "normalLists", OD.normalListsBuffer); + owlParamsSetBuffer(OD.launchParams, "tangentLists", OD.tangentListsBuffer); + owlParamsSetBuffer(OD.launchParams, "texCoordLists", OD.texCoordListsBuffer); + owlParamsSetBuffer(OD.launchParams, "indexLists", OD.indexListsBuffer); + owlParamsSetBuffer(OD.launchParams, "textureObjects", OD.textureObjectsBuffer); + owlParamsSetBuffer(OD.launchParams, "volumeHandles", OD.volumeHandlesBuffer); uint32_t meshCount = Mesh::getCount(); OD.vertexLists.resize(meshCount); @@ -712,48 +676,31 @@ void initializeOptix(bool headless) OD.LP.environmentMapID = -1; OD.LP.environmentMapRotation = glm::quat(1,0,0,0); - launchParamsSetRaw(OD.launchParams, "environmentMapID", &OD.LP.environmentMapID); - launchParamsSetRaw(OD.launchParams, "environmentMapRotation", &OD.LP.environmentMapRotation); - - launchParamsSetBuffer(OD.launchParams, "environmentMapRows", OD.environmentMapRowsBuffer); - launchParamsSetBuffer(OD.launchParams, "environmentMapCols", OD.environmentMapColsBuffer); - launchParamsSetRaw(OD.launchParams, "environmentMapWidth", &OD.LP.environmentMapWidth); - launchParamsSetRaw(OD.launchParams, "environmentMapHeight", &OD.LP.environmentMapHeight); - - // OWLTexture GGX_E_AVG_LOOKUP = owlTexture2DCreate(OD.context, - // OWL_TEXEL_FORMAT_R32F, - // GGX_E_avg_size,1, - // GGX_E_avg, - // OWL_TEXTURE_LINEAR, - // OWL_COLOR_SPACE_LINEAR, - // OWL_TEXTURE_CLAMP); - // OWLTexture GGX_E_LOOKUP = owlTexture2DCreate(OD.context, - // OWL_TEXEL_FORMAT_R32F, - // GGX_E_size[0],GGX_E_size[1], - // GGX_E, - // OWL_TEXTURE_LINEAR, - // OWL_TEXTURE_CLAMP, - // OWL_COLOR_SPACE_LINEAR); - // launchParamsSetTexture(OD.launchParams, "GGX_E_AVG_LOOKUP", GGX_E_AVG_LOOKUP); - // launchParamsSetTexture(OD.launchParams, "GGX_E_LOOKUP", GGX_E_LOOKUP); - + owlParamsSetRaw(OD.launchParams, "environmentMapID", &OD.LP.environmentMapID); + owlParamsSetRaw(OD.launchParams, "environmentMapRotation", &OD.LP.environmentMapRotation); + + owlParamsSetBuffer(OD.launchParams, "environmentMapRows", OD.environmentMapRowsBuffer); + owlParamsSetBuffer(OD.launchParams, "environmentMapCols", OD.environmentMapColsBuffer); + owlParamsSetRaw(OD.launchParams, "environmentMapWidth", &OD.LP.environmentMapWidth); + owlParamsSetRaw(OD.launchParams, "environmentMapHeight", &OD.LP.environmentMapHeight); + OD.LP.numLightEntities = uint32_t(OD.lightEntities.size()); - launchParamsSetRaw(OD.launchParams, "numLightEntities", &OD.LP.numLightEntities); - launchParamsSetRaw(OD.launchParams, "domeLightIntensity", &OD.LP.domeLightIntensity); - launchParamsSetRaw(OD.launchParams, "domeLightExposure", &OD.LP.domeLightExposure); - launchParamsSetRaw(OD.launchParams, "domeLightColor", &OD.LP.domeLightColor); - launchParamsSetRaw(OD.launchParams, "directClamp", &OD.LP.directClamp); - launchParamsSetRaw(OD.launchParams, "indirectClamp", &OD.LP.indirectClamp); - launchParamsSetRaw(OD.launchParams, "maxDiffuseDepth", &OD.LP.maxDiffuseDepth); - launchParamsSetRaw(OD.launchParams, "maxGlossyDepth", &OD.LP.maxGlossyDepth); - launchParamsSetRaw(OD.launchParams, "maxTransparencyDepth", &OD.LP.maxTransparencyDepth); - launchParamsSetRaw(OD.launchParams, "maxTransmissionDepth", &OD.LP.maxTransmissionDepth); - launchParamsSetRaw(OD.launchParams, "maxVolumeDepth", &OD.LP.maxVolumeDepth); - launchParamsSetRaw(OD.launchParams, "numLightSamples", &OD.LP.numLightSamples); - launchParamsSetRaw(OD.launchParams, "seed", &OD.LP.seed); - launchParamsSetRaw(OD.launchParams, "xPixelSamplingInterval", &OD.LP.xPixelSamplingInterval); - launchParamsSetRaw(OD.launchParams, "yPixelSamplingInterval", &OD.LP.yPixelSamplingInterval); - launchParamsSetRaw(OD.launchParams, "timeSamplingInterval", &OD.LP.timeSamplingInterval); + owlParamsSetRaw(OD.launchParams, "numLightEntities", &OD.LP.numLightEntities); + owlParamsSetRaw(OD.launchParams, "domeLightIntensity", &OD.LP.domeLightIntensity); + owlParamsSetRaw(OD.launchParams, "domeLightExposure", &OD.LP.domeLightExposure); + owlParamsSetRaw(OD.launchParams, "domeLightColor", &OD.LP.domeLightColor); + owlParamsSetRaw(OD.launchParams, "directClamp", &OD.LP.directClamp); + owlParamsSetRaw(OD.launchParams, "indirectClamp", &OD.LP.indirectClamp); + owlParamsSetRaw(OD.launchParams, "maxDiffuseDepth", &OD.LP.maxDiffuseDepth); + owlParamsSetRaw(OD.launchParams, "maxGlossyDepth", &OD.LP.maxGlossyDepth); + owlParamsSetRaw(OD.launchParams, "maxTransparencyDepth", &OD.LP.maxTransparencyDepth); + owlParamsSetRaw(OD.launchParams, "maxTransmissionDepth", &OD.LP.maxTransmissionDepth); + owlParamsSetRaw(OD.launchParams, "maxVolumeDepth", &OD.LP.maxVolumeDepth); + owlParamsSetRaw(OD.launchParams, "numLightSamples", &OD.LP.numLightSamples); + owlParamsSetRaw(OD.launchParams, "seed", &OD.LP.seed); + owlParamsSetRaw(OD.launchParams, "xPixelSamplingInterval", &OD.LP.xPixelSamplingInterval); + owlParamsSetRaw(OD.launchParams, "yPixelSamplingInterval", &OD.LP.yPixelSamplingInterval); + owlParamsSetRaw(OD.launchParams, "timeSamplingInterval", &OD.LP.timeSamplingInterval); OWLVarDecl trianglesGeomVars[] = {{/* sentinel to mark end of list */}}; OD.trianglesGeomType = geomTypeCreate(OD.context, OWL_GEOM_TRIANGLES, sizeof(TrianglesGeomData), trianglesGeomVars,-1); @@ -784,15 +731,15 @@ void initializeOptix(bool headless) OD.rayGen = rayGenCreate(OD.context,OD.module,"rayGen", sizeof(RayGenData), rayGenVars,-1); owlRayGenSet1i(OD.rayGen, "deviceCount", numGPUsFound); - buildPrograms(OD.context); + owlBuildPrograms(OD.context); /* Temporary GAS. Required for certain older driver versions. */ const int NUM_VERTICES = 1; vec3 vertices[NUM_VERTICES] = {{ 0.f, 0.f, 0.f }}; const int NUM_INDICES = 1; ivec3 indices[NUM_INDICES] = {{ 0, 0, 0 }}; - OWLBuffer vertexBuffer = deviceBufferCreate(OD.context,OWL_FLOAT4,NUM_VERTICES,vertices); - OWLBuffer indexBuffer = deviceBufferCreate(OD.context,OWL_INT3,NUM_INDICES,indices); + OWLBuffer vertexBuffer = owlDeviceBufferCreate(OD.context,OWL_FLOAT4,NUM_VERTICES,vertices); + OWLBuffer indexBuffer = owlDeviceBufferCreate(OD.context,OWL_INT3,NUM_INDICES,indices); OWLGeom trianglesGeom = geomCreate(OD.context,OD.trianglesGeomType); trianglesSetVertices(trianglesGeom,vertexBuffer,NUM_VERTICES,sizeof(vec4),0); trianglesSetIndices(trianglesGeom,indexBuffer, NUM_INDICES,sizeof(ivec3),0); @@ -800,10 +747,10 @@ void initializeOptix(bool headless) groupBuildAccel(OD.placeholderGroup); // build IAS - OWLGroup surfacesIAS = instanceGroupCreate(OD.context, 1); - instanceGroupSetChild(surfacesIAS, 0, OD.placeholderGroup); - groupBuildAccel(surfacesIAS); - launchParamsSetGroup(OD.launchParams, "surfacesIAS", surfacesIAS); + OWLGroup IAS = instanceGroupCreate(OD.context, 1); + instanceGroupSetChild(IAS, 0, OD.placeholderGroup); + groupBuildAccel(IAS); + owlParamsSetGroup(OD.launchParams, "IAS", IAS); OWLGeom userGeom = owlGeomCreate(OD.context, OD.volumeGeomType); owlGeomSetPrimCount(userGeom, 1); @@ -813,14 +760,9 @@ void initializeOptix(bool headless) OD.placeholderUserGroup = owlUserGeomGroupCreate(OD.context, 1, &userGeom); groupBuildAccel(OD.placeholderUserGroup); - OWLGroup volumesIAS = instanceGroupCreate(OD.context, 1); - instanceGroupSetChild(volumesIAS, 0, OD.placeholderUserGroup); - groupBuildAccel(volumesIAS); - launchParamsSetGroup(OD.launchParams, "volumesIAS", volumesIAS); - // Build *SBT* required to trace the groups - buildPipeline(OD.context); - buildSBT(OD.context); + owlBuildPipeline(OD.context); + owlBuildSBT(OD.context); // Setup denoiser configureDenoiser(OD.enableAlbedoGuide, OD.enableNormalGuide, OD.enableKernelPrediction); @@ -830,6 +772,19 @@ void initializeOptix(bool headless) setDomeLightSky(glm::vec3(0,0,10)); OptixData.LP.sceneBBMin = OptixData.LP.sceneBBMax = glm::vec3(0.f); + + // To measure how long each card takes to trace for load balancing + int numGPUs = owlGetDeviceCount(OptixData.context); + for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) { + cudaSetDevice(deviceID); + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + NVISII.events.push_back({start, stop}); + NVISII.times.push_back(1.f); + NVISII.weights.push_back(1.f / float(numGPUs)); + } + cudaSetDevice(0); } void initializeImgui() @@ -895,6 +850,41 @@ void processCommandQueue() } } +void updateGPUWeights() +{ + int num_gpus = owlGetDeviceCount(OptixData.context); + float target = 1.f / float(num_gpus); + + std::vector signals(num_gpus); + float total_time = 0.f; + for (uint32_t i = 0; i < num_gpus; ++i) total_time += NVISII.times[i]; + for (uint32_t i = 0; i < num_gpus; ++i) signals[i] = NVISII.times[i] / float(total_time); + + std::vector p_error(num_gpus); + for (uint32_t i = 0; i < num_gpus; ++i) p_error[i] = target - signals[i]; + + // update weights + float pK = 1.f; + for (uint32_t i = 0; i < num_gpus; ++i) { + NVISII.weights[i] = max(NVISII.weights[i] + p_error[i], .001f); + } + + std::vector scan; + for (size_t i = 0; i <= num_gpus; ++i) { + if (i == 0) scan.push_back(0.f); + else scan.push_back(scan[i - 1] + NVISII.weights[i - 1]); + } + + // std::cout<<"Scan: "; + for (size_t i = 0; i <= num_gpus; ++i) { + scan[i] /= scan[num_gpus]; + // std::cout<(height); - // auto cols = std::vector(width * height); - // for (int y = 0, i = 0; y < height; y++) { - // for (int x = 0; x < width; x++, i++) { - // cols[i] = std::max(texels[i].r, std::max(texels[i].g, texels[i].b)) + ((x > 0) ? cols[i - 1] : 0.f); - // } - // rows[y] = cols[i - 1] + ((y > 0) ? rows[y - 1] : 0.0f); - // // normalize the pdf for this scanline (if it was non-zero) - // if (cols[i - 1] > 0) { - // for (int x = 0; x < width; x++) { - // cols[i - width + x] /= cols[i - 1]; - // } - // } - // } - - // // normalize the pdf across all scanlines - // for (int y = 0; y < height; y++) - // rows[y] /= rows[height - 1]; - - // if (OptixData.environmentMapRowsBuffer) owlBufferRelease(OptixData.environmentMapRowsBuffer); - // if (OptixData.environmentMapColsBuffer) owlBufferRelease(OptixData.environmentMapColsBuffer); - // OptixData.environmentMapRowsBuffer = owlDeviceBufferCreate(OptixData.context, OWL_USER_TYPE(float), height, rows.data()); - // OptixData.environmentMapColsBuffer = owlDeviceBufferCreate(OptixData.context, OWL_USER_TYPE(float), width * height, cols.data()); - // OptixData.LP.environmentMapWidth = width; - // OptixData.LP.environmentMapHeight = height; - OptixData.LP.environmentMapWidth = 0; OptixData.LP.environmentMapHeight = 0; resetAccumulation(); @@ -1102,7 +1062,7 @@ void setIndirectLightingClamp(float clamp) { clamp = std::max(float(clamp), float(0.f)); OptixData.LP.indirectClamp = clamp; - launchParamsSetRaw(OptixData.launchParams, "indirectClamp", &OptixData.LP.indirectClamp); + owlParamsSetRaw(OptixData.launchParams, "indirectClamp", &OptixData.LP.indirectClamp); resetAccumulation(); } @@ -1110,7 +1070,7 @@ void setDirectLightingClamp(float clamp) { clamp = std::max(float(clamp), float(0.f)); OptixData.LP.directClamp = clamp; - launchParamsSetRaw(OptixData.launchParams, "directClamp", &OptixData.LP.directClamp); + owlParamsSetRaw(OptixData.launchParams, "directClamp", &OptixData.LP.directClamp); resetAccumulation(); } @@ -1127,11 +1087,11 @@ void setMaxBounceDepth( OptixData.LP.maxTransmissionDepth = transmissionDepth; OptixData.LP.maxVolumeDepth = volumeDepth; - launchParamsSetRaw(OptixData.launchParams, "maxDiffuseDepth", &OptixData.LP.maxDiffuseDepth); - launchParamsSetRaw(OptixData.launchParams, "maxGlossyDepth", &OptixData.LP.maxGlossyDepth); - launchParamsSetRaw(OptixData.launchParams, "maxTransparencyDepth", &OptixData.LP.maxTransparencyDepth); - launchParamsSetRaw(OptixData.launchParams, "maxTransmissionDepth", &OptixData.LP.maxTransmissionDepth); - launchParamsSetRaw(OptixData.launchParams, "maxVolumeDepth", &OptixData.LP.maxVolumeDepth); + owlParamsSetRaw(OptixData.launchParams, "maxDiffuseDepth", &OptixData.LP.maxDiffuseDepth); + owlParamsSetRaw(OptixData.launchParams, "maxGlossyDepth", &OptixData.LP.maxGlossyDepth); + owlParamsSetRaw(OptixData.launchParams, "maxTransparencyDepth", &OptixData.LP.maxTransparencyDepth); + owlParamsSetRaw(OptixData.launchParams, "maxTransmissionDepth", &OptixData.LP.maxTransmissionDepth); + owlParamsSetRaw(OptixData.launchParams, "maxVolumeDepth", &OptixData.LP.maxVolumeDepth); resetAccumulation(); } @@ -1146,7 +1106,7 @@ void setLightSampleCount(uint32_t count) std::string("Error: number of light samples must be between 1 and ") + std::to_string(MAX_LIGHT_SAMPLES)); OptixData.LP.numLightSamples = count; - launchParamsSetRaw(OptixData.launchParams, "numLightSamples", &OptixData.LP.numLightSamples); + owlParamsSetRaw(OptixData.launchParams, "numLightSamples", &OptixData.LP.numLightSamples); resetAccumulation(); } @@ -1154,15 +1114,15 @@ void samplePixelArea(vec2 xSampleInterval, vec2 ySampleInterval) { OptixData.LP.xPixelSamplingInterval = xSampleInterval; OptixData.LP.yPixelSamplingInterval = ySampleInterval; - launchParamsSetRaw(OptixData.launchParams, "xPixelSamplingInterval", &OptixData.LP.xPixelSamplingInterval); - launchParamsSetRaw(OptixData.launchParams, "yPixelSamplingInterval", &OptixData.LP.yPixelSamplingInterval); + owlParamsSetRaw(OptixData.launchParams, "xPixelSamplingInterval", &OptixData.LP.xPixelSamplingInterval); + owlParamsSetRaw(OptixData.launchParams, "yPixelSamplingInterval", &OptixData.LP.yPixelSamplingInterval); resetAccumulation(); } void sampleTimeInterval(vec2 sampleTimeInterval) { OptixData.LP.timeSamplingInterval = sampleTimeInterval; - launchParamsSetRaw(OptixData.launchParams, "timeSamplingInterval", &OptixData.LP.timeSamplingInterval); + owlParamsSetRaw(OptixData.launchParams, "timeSamplingInterval", &OptixData.LP.timeSamplingInterval); resetAccumulation(); } @@ -1188,8 +1148,8 @@ void updateComponents() anyUpdated |= Texture::areAnyDirty(); anyUpdated |= Entity::areAnyDirty(); anyUpdated |= Volume::areAnyDirty(); - if (!anyUpdated) return; + resetAccumulation(); std::recursive_mutex dummyMutex; @@ -1222,11 +1182,11 @@ void updateComponents() if (m->getTriangleIndices().size() == 0) throw std::runtime_error("ERROR: indices is 0"); // Next, allocate resources for the new mesh. - OD.vertexLists[m->getAddress()] = deviceBufferCreate(OD.context, OWL_USER_TYPE(vec3), m->getVertices().size(), m->getVertices().data()); - OD.normalLists[m->getAddress()] = deviceBufferCreate(OD.context, OWL_USER_TYPE(vec4), m->getNormals().size(), m->getNormals().data()); - OD.tangentLists[m->getAddress()] = deviceBufferCreate(OD.context, OWL_USER_TYPE(vec4), m->getTangents().size(), m->getTangents().data()); - OD.texCoordLists[m->getAddress()] = deviceBufferCreate(OD.context, OWL_USER_TYPE(vec2), m->getTexCoords().size(), m->getTexCoords().data()); - OD.indexLists[m->getAddress()] = deviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), m->getTriangleIndices().size(), m->getTriangleIndices().data()); + OD.vertexLists[m->getAddress()] = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(vec3), m->getVertices().size(), m->getVertices().data()); + OD.normalLists[m->getAddress()] = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(vec4), m->getNormals().size(), m->getNormals().data()); + OD.tangentLists[m->getAddress()] = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(vec4), m->getTangents().size(), m->getTangents().data()); + OD.texCoordLists[m->getAddress()] = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(vec2), m->getTexCoords().size(), m->getTexCoords().data()); + OD.indexLists[m->getAddress()] = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), m->getTriangleIndices().size(), m->getTriangleIndices().data()); // Create geometry and build BLAS OD.surfaceGeomList[m->getAddress()] = geomCreate(OD.context, OD.trianglesGeomType); @@ -1236,13 +1196,13 @@ void updateComponents() groupBuildAccel(OD.surfaceBlasList[m->getAddress()]); } - bufferUpload(OD.vertexListsBuffer, OD.vertexLists.data()); - bufferUpload(OD.texCoordListsBuffer, OD.texCoordLists.data()); - bufferUpload(OD.indexListsBuffer, OD.indexLists.data()); - bufferUpload(OD.normalListsBuffer, OD.normalLists.data()); - bufferUpload(OD.tangentListsBuffer, OD.tangentLists.data()); + owlBufferUpload(OD.vertexListsBuffer, OD.vertexLists.data()); + owlBufferUpload(OD.texCoordListsBuffer, OD.texCoordLists.data()); + owlBufferUpload(OD.indexListsBuffer, OD.indexLists.data()); + owlBufferUpload(OD.normalListsBuffer, OD.normalLists.data()); + owlBufferUpload(OD.tangentListsBuffer, OD.tangentLists.data()); Mesh::updateComponents(); - bufferUpload(OptixData.meshBuffer, Mesh::getFrontStruct()); + owlBufferUpload(OptixData.meshBuffer, Mesh::getFrontStruct()); } // Manage Volumes: Build / Rebuild BLAS @@ -1260,24 +1220,20 @@ void updateComponents() // Next, allocate resources for the new volume. auto gridHdlPtr = v->getNanoVDBGridHandle(); const nanovdb::FloatGrid* grid = reinterpret_cast(gridHdlPtr.get()->data()); - std::cout<checksum()<tree().getAccessor(); // auto bbox = tree.root().bbox(); auto bbox = grid->tree().bbox().asReal(); // int nodecount = grid->tree().nodeCount(3); - // std::cout<getAddress()] = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint8_t), gridHdlPtr.get()->size(), nullptr); owlBufferUpload(OD.volumeHandles[v->getAddress()], gridHdlPtr.get()->data()); - printf("%hhx\n",gridHdlPtr.get()->data()[0]); + // printf("%hhx\n",gridHdlPtr.get()->data()[0]); const void* d_gridData = owlBufferGetPointer(OD.volumeHandles[v->getAddress()], 0); uint8_t first_byte; cudaMemcpy((void*)&first_byte, d_gridData, 1, cudaMemcpyDeviceToHost); - printf("%hhx\n",first_byte); - + // printf("%hhx\n",first_byte); // Create geometry and build BLAS uint32_t volumeID = v->getAddress(); @@ -1299,19 +1255,11 @@ void updateComponents() // Manage Entities: Build / Rebuild TLAS auto dirtyEntities = Entity::getDirtyEntities(); if (dirtyEntities.size() > 0) { - // Surface instances - std::vector surfaceInstances; - std::vector t0SurfaceTransforms; - std::vector t1SurfaceTransforms; - std::vector surfaceInstanceToEntity; - - // Volume instances - std::vector volumeInstances; - std::vector t0VolumeTransforms; - std::vector t1VolumeTransforms; - std::vector volumeInstanceToEntity; - - // Todo: curves... + std::vector instances; + std::vector t0Transforms; + std::vector t1Transforms; + std::vector masks; + std::vector instanceToEntity; // Aggregate instanced geometry and transformations Entity* entities = Entity::getFront(); @@ -1330,6 +1278,14 @@ void updateComponents() // Get instance transformation glm::mat4 prevLocalToWorld = entities[eid].getTransform()->getLocalToWorldMatrix(/*previous = */true); glm::mat4 localToWorld = entities[eid].getTransform()->getLocalToWorldMatrix(/*previous = */false); + t0Transforms.push_back(prevLocalToWorld); + t1Transforms.push_back(localToWorld); + + // Get instance mask + masks.push_back(entities[eid].getStruct().flags); + + // Indirection from instance back to entity ID + instanceToEntity.push_back(eid); // Add any instanced mesh geometry to the list if (entities[eid].getMesh()) { @@ -1340,90 +1296,63 @@ void updateComponents() // Mark it as dirty. It should be available in a subsequent frame entities[eid].getMesh()->markDirty(); return; } - surfaceInstances.push_back(blas); - surfaceInstanceToEntity.push_back(eid); - t0SurfaceTransforms.push_back(prevLocalToWorld); - t1SurfaceTransforms.push_back(localToWorld); + instances.push_back(blas); } // Add any instanced volume geometry to the list - if (entities[eid].getVolume()) { + else if (entities[eid].getVolume()) { uint32_t address = entities[eid].getVolume()->getAddress(); OWLGroup blas = OD.volumeBlasList[address]; if (!blas) { // Same as meshes, if BLAS doesn't exist, force BLAS build and try again. entities[eid].getMesh()->markDirty(); return; } - volumeInstances.push_back(blas); - volumeInstanceToEntity.push_back(eid); - t0VolumeTransforms.push_back(prevLocalToWorld); - t1VolumeTransforms.push_back(localToWorld); - } + instances.push_back(blas); + } + + else { + throw std::runtime_error("Internal Error, renderable entity has no mesh or volume components!?"); + } } - std::vector t0OwlSurfaceTransforms; - std::vector t1OwlSurfaceTransforms; - std::vector t0OwlVolumeTransforms; - std::vector t1OwlVolumeTransforms; - auto oldSurfaceIAS = OD.surfacesIAS; - auto oldVolumeIAS = OD.volumesIAS; + std::vector owlVisibilityMasks; + std::vector t0OwlTransforms; + std::vector t1OwlTransforms; + auto oldIAS = OD.IAS; - // If no surfaces instanced, insert an unhittable placeholder. - // (required for certain older driver versions) - if (surfaceInstances.size() == 0) { - OD.surfacesIAS = instanceGroupCreate(OD.context, 1); - instanceGroupSetChild(OD.surfacesIAS, 0, OD.placeholderGroup); - groupBuildAccel(OD.surfacesIAS); - } - - // If no volumes instanced, insert an unhittable placeholder. + // If no objects are instanced, insert an unhittable placeholder. // (required for certain older driver versions) - if (volumeInstances.size() == 0) { - OD.volumesIAS = instanceGroupCreate(OD.context, 1); - instanceGroupSetChild(OD.volumesIAS, 0, OD.placeholderUserGroup); - groupBuildAccel(OD.volumesIAS); + if (instances.size() == 0) { + OD.IAS = instanceGroupCreate(OD.context, 1); + instanceGroupSetChild(OD.IAS, 0, OD.placeholderGroup); + groupBuildAccel(OD.IAS); } - // Set surface transforms to IAS, upload surface instance to entity map - if (surfaceInstances.size() > 0) { - OD.surfacesIAS = instanceGroupCreate(OD.context, surfaceInstances.size()); - for (uint32_t iid = 0; iid < surfaceInstances.size(); ++iid) { - instanceGroupSetChild(OD.surfacesIAS, iid, surfaceInstances[iid]); - t0OwlSurfaceTransforms.push_back(glmToOWL(t0SurfaceTransforms[iid])); - t1OwlSurfaceTransforms.push_back(glmToOWL(t1SurfaceTransforms[iid])); + // Set instance transforms and masks, upload instance to entity map + if (instances.size() > 0) { + OD.IAS = instanceGroupCreate(OD.context, instances.size()); + for (uint32_t iid = 0; iid < instances.size(); ++iid) { + instanceGroupSetChild(OD.IAS, iid, instances[iid]); + t0OwlTransforms.push_back(glmToOWL(t0Transforms[iid])); + t1OwlTransforms.push_back(glmToOWL(t1Transforms[iid])); + owlVisibilityMasks.push_back(masks[iid]); } - owlInstanceGroupSetTransforms(OD.surfacesIAS,0,(const float*)t0OwlSurfaceTransforms.data()); - owlInstanceGroupSetTransforms(OD.surfacesIAS,1,(const float*)t1OwlSurfaceTransforms.data()); - bufferResize(OD.surfaceInstanceToEntityBuffer, surfaceInstanceToEntity.size()); - bufferUpload(OD.surfaceInstanceToEntityBuffer, surfaceInstanceToEntity.data()); + owlInstanceGroupSetTransforms(OD.IAS,0,(const float*)t0OwlTransforms.data()); + owlInstanceGroupSetTransforms(OD.IAS,1,(const float*)t1OwlTransforms.data()); + owlInstanceGroupSetVisibilityMasks(OD.IAS, owlVisibilityMasks.data()); + owlBufferResize(OD.instanceToEntityBuffer, instanceToEntity.size()); + owlBufferUpload(OD.instanceToEntityBuffer, instanceToEntity.data()); } - // Set volume transforms to IAS, upload volume instance to entity map - if (volumeInstances.size() > 0) { - OD.volumesIAS = instanceGroupCreate(OD.context, volumeInstances.size()); - for (uint32_t iid = 0; iid < volumeInstances.size(); ++iid) { - instanceGroupSetChild(OD.volumesIAS, iid, volumeInstances[iid]); - t0OwlVolumeTransforms.push_back(glmToOWL(t0VolumeTransforms[iid])); - t1OwlVolumeTransforms.push_back(glmToOWL(t1VolumeTransforms[iid])); - } - owlInstanceGroupSetTransforms(OD.volumesIAS,0,(const float*)t0OwlVolumeTransforms.data()); - owlInstanceGroupSetTransforms(OD.volumesIAS,1,(const float*)t1OwlVolumeTransforms.data()); - bufferResize(OD.volumeInstanceToEntityBuffer, volumeInstanceToEntity.size()); - bufferUpload(OD.volumeInstanceToEntityBuffer, volumeInstanceToEntity.data()); - } - // Build IAS - groupBuildAccel(OD.volumesIAS); - launchParamsSetGroup(OD.launchParams, "volumesIAS", OD.volumesIAS); - groupBuildAccel(OD.surfacesIAS); - launchParamsSetGroup(OD.launchParams, "surfacesIAS", OD.surfacesIAS); - + groupBuildAccel(OD.IAS); + owlParamsSetGroup(OD.launchParams, "IAS", OD.IAS); + // Now that IAS have changed, we need to rebuild SBT - buildSBT(OD.context); + owlBuildSBT(OD.context); // Release any old IAS (TODO, don't rebuild if entity edit doesn't effect IAS...) - if (oldSurfaceIAS) {owlGroupRelease(oldSurfaceIAS);} - if (oldVolumeIAS) {owlGroupRelease(oldVolumeIAS);} + if (oldIAS) {owlGroupRelease(oldIAS);} // Aggregate entities that are light sources (todo: consider emissive volumes...) OD.lightEntities.resize(0); @@ -1431,22 +1360,24 @@ void updateComponents() if (!entities[eid].isInitialized()) continue; if (!entities[eid].getTransform()) continue; if (!entities[eid].getLight()) continue; - if (!entities[eid].getMesh()) continue; + // Edit: adding support for "point" lights that have no meshes + // if (!entities[eid].getMesh()) continue; OD.lightEntities.push_back(eid); } - bufferResize(OptixData.lightEntitiesBuffer, OD.lightEntities.size()); - bufferUpload(OptixData.lightEntitiesBuffer, OD.lightEntities.data()); + owlBufferResize(OptixData.lightEntitiesBuffer, OD.lightEntities.size()); + owlBufferUpload(OptixData.lightEntitiesBuffer, OD.lightEntities.data()); OD.LP.numLightEntities = uint32_t(OD.lightEntities.size()); - launchParamsSetRaw(OD.launchParams, "numLightEntities", &OD.LP.numLightEntities); + owlParamsSetRaw(OD.launchParams, "numLightEntities", &OD.LP.numLightEntities); // Finally, upload entity structs to the GPU. Entity::updateComponents(); - bufferUpload(OptixData.entityBuffer, Entity::getFrontStruct()); + owlBufferUpload(OptixData.entityBuffer, Entity::getFrontStruct()); } // Manage textures and materials if (Texture::areAnyDirty() || Material::areAnyDirty()) { - std::lock_guard material_lock(Material::areAnyDirty() ? *Material::getEditMutex().get() : dummyMutex); + std::lock_guard material_lock(*Material::getEditMutex().get()); + std::lock_guard texture_lock(*Texture::getEditMutex().get()); // Allocate cuda textures for all texture components auto dirtyTextures = Texture::getDirtyTextures(); @@ -1492,7 +1423,6 @@ void updateComponents() colorSpace ); } - } // Create additional cuda textures for material constants @@ -1568,32 +1498,19 @@ void updateComponents() } Material::updateComponents(); - bufferUpload(OptixData.materialBuffer, OptixData.materialStructs.data()); + owlBufferUpload(OptixData.materialBuffer, OptixData.materialStructs.data()); } - bufferUpload(OD.textureObjectsBuffer, OD.textureObjects.data()); + owlBufferUpload(OD.textureObjectsBuffer, OD.textureObjects.data()); Texture::updateComponents(); memcpy(OptixData.textureStructs.data(), Texture::getFrontStruct(), Texture::getCount() * sizeof(TextureStruct)); - bufferUpload(OptixData.textureBuffer, OptixData.textureStructs.data()); + owlBufferUpload(OptixData.textureBuffer, OptixData.textureStructs.data()); } // Manage transforms auto dirtyTransforms = Transform::getDirtyTransforms(); if (dirtyTransforms.size() > 0) { Transform::updateComponents(); - - // // for each device - // for (uint32_t id = 0; id < owlGetDeviceCount(OptixData.context); ++id) - // { - // cudaSetDevice(id); - - // TransformStruct* devTransforms = (TransformStruct*)owlBufferGetPointer(OptixData.transformBuffer, id); - // TransformStruct* transformStructs = Transform::getFrontStruct(); - // for (auto &t : dirtyTransforms) { - // if (!t->isInitialized()) continue; - // CUDA_CHECK(cudaMemcpy(&devTransforms[t->getAddress()], &transformStructs[t->getAddress()], sizeof(TransformStruct), cudaMemcpyHostToDevice)); - // } - // } // cudaSetDevice(0); owlBufferUpload(OptixData.transformBuffer, Transform::getFrontStruct()); @@ -1602,51 +1519,67 @@ void updateComponents() // Manage Cameras if (Camera::areAnyDirty()) { Camera::updateComponents(); - bufferUpload(OptixData.cameraBuffer, Camera::getFrontStruct()); + owlBufferUpload(OptixData.cameraBuffer, Camera::getFrontStruct()); } // Manage lights if (Light::areAnyDirty()) { Light::updateComponents(); - bufferUpload(OptixData.lightBuffer, Light::getFrontStruct()); + owlBufferUpload(OptixData.lightBuffer, Light::getFrontStruct()); } } void updateLaunchParams() { - launchParamsSetRaw(OptixData.launchParams, "frameID", &OptixData.LP.frameID); - launchParamsSetRaw(OptixData.launchParams, "frameSize", &OptixData.LP.frameSize); - launchParamsSetRaw(OptixData.launchParams, "cameraEntity", &OptixData.LP.cameraEntity); - launchParamsSetRaw(OptixData.launchParams, "domeLightIntensity", &OptixData.LP.domeLightIntensity); - launchParamsSetRaw(OptixData.launchParams, "domeLightExposure", &OptixData.LP.domeLightExposure); - launchParamsSetRaw(OptixData.launchParams, "domeLightColor", &OptixData.LP.domeLightColor); - launchParamsSetRaw(OptixData.launchParams, "renderDataMode", &OptixData.LP.renderDataMode); - launchParamsSetRaw(OptixData.launchParams, "renderDataBounce", &OptixData.LP.renderDataBounce); - launchParamsSetRaw(OptixData.launchParams, "enableDomeSampling", &OptixData.LP.enableDomeSampling); - launchParamsSetRaw(OptixData.launchParams, "seed", &OptixData.LP.seed); - launchParamsSetRaw(OptixData.launchParams, "proj", &OptixData.LP.proj); - launchParamsSetRaw(OptixData.launchParams, "viewT0", &OptixData.LP.viewT0); - launchParamsSetRaw(OptixData.launchParams, "viewT1", &OptixData.LP.viewT1); - - launchParamsSetRaw(OptixData.launchParams, "environmentMapID", &OptixData.LP.environmentMapID); - launchParamsSetRaw(OptixData.launchParams, "environmentMapRotation", &OptixData.LP.environmentMapRotation); - launchParamsSetBuffer(OptixData.launchParams, "environmentMapRows", OptixData.environmentMapRowsBuffer); - launchParamsSetBuffer(OptixData.launchParams, "environmentMapCols", OptixData.environmentMapColsBuffer); - launchParamsSetRaw(OptixData.launchParams, "environmentMapWidth", &OptixData.LP.environmentMapWidth); - launchParamsSetRaw(OptixData.launchParams, "environmentMapHeight", &OptixData.LP.environmentMapHeight); - launchParamsSetRaw(OptixData.launchParams, "sceneBBMin", &OptixData.LP.sceneBBMin); - launchParamsSetRaw(OptixData.launchParams, "sceneBBMax", &OptixData.LP.sceneBBMax); + owlParamsSetRaw(OptixData.launchParams, "frameID", &OptixData.LP.frameID); + owlParamsSetRaw(OptixData.launchParams, "frameSize", &OptixData.LP.frameSize); + owlParamsSetRaw(OptixData.launchParams, "cameraEntity", &OptixData.LP.cameraEntity); + owlParamsSetRaw(OptixData.launchParams, "domeLightIntensity", &OptixData.LP.domeLightIntensity); + owlParamsSetRaw(OptixData.launchParams, "domeLightExposure", &OptixData.LP.domeLightExposure); + owlParamsSetRaw(OptixData.launchParams, "domeLightColor", &OptixData.LP.domeLightColor); + owlParamsSetRaw(OptixData.launchParams, "renderDataMode", &OptixData.LP.renderDataMode); + owlParamsSetRaw(OptixData.launchParams, "renderDataBounce", &OptixData.LP.renderDataBounce); + owlParamsSetRaw(OptixData.launchParams, "enableDomeSampling", &OptixData.LP.enableDomeSampling); + owlParamsSetRaw(OptixData.launchParams, "seed", &OptixData.LP.seed); + owlParamsSetRaw(OptixData.launchParams, "proj", &OptixData.LP.proj); + owlParamsSetRaw(OptixData.launchParams, "viewT0", &OptixData.LP.viewT0); + owlParamsSetRaw(OptixData.launchParams, "viewT1", &OptixData.LP.viewT1); + + owlParamsSetRaw(OptixData.launchParams, "environmentMapID", &OptixData.LP.environmentMapID); + owlParamsSetRaw(OptixData.launchParams, "environmentMapRotation", &OptixData.LP.environmentMapRotation); + owlParamsSetBuffer(OptixData.launchParams, "environmentMapRows", OptixData.environmentMapRowsBuffer); + owlParamsSetBuffer(OptixData.launchParams, "environmentMapCols", OptixData.environmentMapColsBuffer); + owlParamsSetRaw(OptixData.launchParams, "environmentMapWidth", &OptixData.LP.environmentMapWidth); + owlParamsSetRaw(OptixData.launchParams, "environmentMapHeight", &OptixData.LP.environmentMapHeight); + owlParamsSetRaw(OptixData.launchParams, "sceneBBMin", &OptixData.LP.sceneBBMin); + owlParamsSetRaw(OptixData.launchParams, "sceneBBMax", &OptixData.LP.sceneBBMax); OptixData.LP.frameID ++; } +// Update: This is still prohibitively slow. Official OptiX samples use host pinned memory. +// Moving to that approach... +// // Different GPUs have different local framebuffers. +// // This function combines those framebuffers on the CPU, then uploads results to device 0. +void mergeFrameBuffers() { + // For multigpu setups, we currently render to zero-copy memory to merge on the host. + // So for now, just upload those results to device 0's combined unified frame buffers on the device + owlBufferUpload(OptixData.combinedFrameBuffer, owlBufferGetPointer(OptixData.frameBuffer, 0)); + + if (OptixData.enableAlbedoGuide) { + owlBufferUpload(OptixData.combinedAlbedoBuffer, owlBufferGetPointer(OptixData.albedoBuffer, 0)); + } + + if (OptixData.enableNormalGuide) { + owlBufferUpload(OptixData.combinedNormalBuffer, owlBufferGetPointer(OptixData.normalBuffer, 0)); + } +} + void denoiseImage() { synchronizeDevices(); auto &OD = OptixData; - auto cudaStream = getStream(OD.context, 0); - - CUdeviceptr frameBuffer = (CUdeviceptr) bufferGetPointer(OD.frameBuffer, 0); + auto cudaStream = owlContextGetStream(OD.context, 0); std::vector inputLayers; OptixImage2D colorLayer; @@ -1655,7 +1588,7 @@ void denoiseImage() { colorLayer.format = OPTIX_PIXEL_FORMAT_FLOAT4; colorLayer.pixelStrideInBytes = 4 * sizeof(float); colorLayer.rowStrideInBytes = OD.LP.frameSize.x * 4 * sizeof(float); - colorLayer.data = (CUdeviceptr) bufferGetPointer(OD.frameBuffer, 0); + colorLayer.data = (CUdeviceptr) owlBufferGetPointer(OD.combinedFrameBuffer, 0); inputLayers.push_back(colorLayer); OptixImage2D albedoLayer; @@ -1664,7 +1597,7 @@ void denoiseImage() { albedoLayer.format = OPTIX_PIXEL_FORMAT_FLOAT4; albedoLayer.pixelStrideInBytes = 4 * sizeof(float); albedoLayer.rowStrideInBytes = OD.LP.frameSize.x * 4 * sizeof(float); - albedoLayer.data = (CUdeviceptr) bufferGetPointer(OD.albedoBuffer, 0); + albedoLayer.data = (CUdeviceptr) owlBufferGetPointer(OD.combinedAlbedoBuffer, 0); if (OD.enableAlbedoGuide) inputLayers.push_back(albedoLayer); OptixImage2D normalLayer; @@ -1673,7 +1606,7 @@ void denoiseImage() { normalLayer.format = OPTIX_PIXEL_FORMAT_FLOAT4; normalLayer.pixelStrideInBytes = 4 * sizeof(float); normalLayer.rowStrideInBytes = OD.LP.frameSize.x * 4 * sizeof(float); - normalLayer.data = (CUdeviceptr) bufferGetPointer(OD.normalBuffer, 0); + normalLayer.data = (CUdeviceptr) owlBufferGetPointer(OD.combinedNormalBuffer, 0); if (OD.enableNormalGuide) inputLayers.push_back(normalLayer); OptixImage2D outputLayer = colorLayer; // can I get away with this? @@ -1692,8 +1625,8 @@ void denoiseImage() { OD.denoiser, cudaStream, &inputLayers[0], - (CUdeviceptr) bufferGetPointer(OD.hdrIntensityBuffer, 0), - (CUdeviceptr) bufferGetPointer(OD.denoiserScratchBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OD.hdrIntensityBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OD.denoiserScratchBuffer, 0), scratchSizeInBytes)); } @@ -1703,50 +1636,115 @@ void denoiseImage() { OD.denoiser, cudaStream, &inputLayers[0], - (CUdeviceptr) bufferGetPointer(OD.colorAvgBuffer, 0), - (CUdeviceptr) bufferGetPointer(OD.denoiserScratchBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OD.colorAvgBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OD.denoiserScratchBuffer, 0), scratchSizeInBytes)); } #endif params.denoiseAlpha = 0; // Don't touch alpha. params.blendFactor = 0.0f; // Show the denoised image only. - params.hdrIntensity = (CUdeviceptr) bufferGetPointer(OD.hdrIntensityBuffer, 0); + params.hdrIntensity = (CUdeviceptr) owlBufferGetPointer(OD.hdrIntensityBuffer, 0); #ifdef USE_OPTIX72 - params.hdrAverageColor = (CUdeviceptr) bufferGetPointer(OD.colorAvgBuffer, 0); + params.hdrAverageColor = (CUdeviceptr) owlBufferGetPointer(OD.colorAvgBuffer, 0); #endif OPTIX_CHECK(optixDenoiserInvoke( OD.denoiser, cudaStream, ¶ms, - (CUdeviceptr) bufferGetPointer(OD.denoiserStateBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OD.denoiserStateBuffer, 0), OD.denoiserSizes.stateSizeInBytes, inputLayers.data(), inputLayers.size(), /* inputOffsetX */ 0, /* inputOffsetY */ 0, &outputLayer, - (CUdeviceptr) bufferGetPointer(OD.denoiserScratchBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OD.denoiserScratchBuffer, 0), scratchSizeInBytes )); +} - synchronizeDevices(); +inline const char* getGLErrorString( GLenum error ) +{ + switch( error ) + { + case GL_NO_ERROR: return "No error"; + case GL_INVALID_ENUM: return "Invalid enum"; + case GL_INVALID_VALUE: return "Invalid value"; + case GL_INVALID_OPERATION: return "Invalid operation"; + //case GL_STACK_OVERFLOW: return "Stack overflow"; + //case GL_STACK_UNDERFLOW: return "Stack underflow"; + case GL_OUT_OF_MEMORY: return "Out of memory"; + //case GL_TABLE_TOO_LARGE: return "Table too large"; + default: return "Unknown GL error"; + } } +#define DO_GL_CHECK +#ifdef DO_GL_CHECK +# define GL_CHECK( call ) \ + do \ + { \ + call; \ + GLenum err = glGetError(); \ + if( err != GL_NO_ERROR ) \ + { \ + std::stringstream ss; \ + ss << "GL error " << getGLErrorString( err ) << " at " \ + << __FILE__ << "(" << __LINE__ << "): " << #call \ + << std::endl; \ + std::cerr << ss.str() << std::endl; \ + throw std::runtime_error( ss.str().c_str() ); \ + } \ + } \ + while (0) + + +# define GL_CHECK_ERRORS( ) \ + do \ + { \ + GLenum err = glGetError(); \ + if( err != GL_NO_ERROR ) \ + { \ + std::stringstream ss; \ + ss << "GL error " << getGLErrorString( err ) << " at " \ + << __FILE__ << "(" << __LINE__ << ")"; \ + std::cerr << ss.str() << std::endl; \ + throw std::runtime_error( ss.str().c_str() ); \ + } \ + } \ + while (0) + +#else +# define GL_CHECK( call ) do { call; } while(0) +# define GL_CHECK_ERRORS( ) do { ; } while(0) +#endif + void drawFrameBufferToWindow() { synchronizeDevices(); glFlush(); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); auto &OD = OptixData; - cudaGraphicsMapResources(1, &OD.cudaResourceTex); - const void* fbdevptr = bufferGetPointer(OD.frameBuffer,0); - cudaArray_t array; - cudaGraphicsSubResourceGetMappedArray(&array, OD.cudaResourceTex, 0, 0); - cudaMemcpyToArray(array, 0, 0, fbdevptr, OD.LP.frameSize.x * OD.LP.frameSize.y * sizeof(glm::vec4), cudaMemcpyDeviceToDevice); - cudaGraphicsUnmapResources(1, &OD.cudaResourceTex); - + const void* fbdevptr = owlBufferGetPointer(OD.combinedFrameBuffer,0); + + if (OD.resourceSharingSuccessful) { + cudaGraphicsMapResources(1, &OD.cudaResourceTex); + cudaArray_t array; + cudaGraphicsSubResourceGetMappedArray(&array, OD.cudaResourceTex, 0, 0); + cudaMemcpyToArray(array, 0, 0, fbdevptr, OD.LP.frameSize.x * OD.LP.frameSize.y * sizeof(glm::vec4), cudaMemcpyDeviceToDevice); + cudaGraphicsUnmapResources(1, &OD.cudaResourceTex); + } else { + GL_CHECK(glBindTexture(GL_TEXTURE_2D, OD.imageTexID)); + glEnable(GL_TEXTURE_2D); + GL_CHECK(glTexSubImage2D(GL_TEXTURE_2D,0, + 0, 0, + OD.LP.frameSize.x, OD.LP.frameSize.y, + GL_RGBA, GL_FLOAT, fbdevptr)); + } + // Draw pixels from optix frame buffer glEnable(GL_FRAMEBUFFER_SRGB); glViewport(0, 0, OD.LP.frameSize.x, OD.LP.frameSize.y); @@ -1759,7 +1757,6 @@ void drawFrameBufferToWindow() glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0); glDisable(GL_DEPTH_TEST); - glBindTexture(GL_TEXTURE_2D, OD.imageTexID); // Draw texture to screen via immediate mode glEnable(GL_TEXTURE_2D); @@ -1839,7 +1836,7 @@ void configureDenoiser(bool useAlbedoGuide, bool useNormalGuide, bool useKernelP "If normal guide is enabled, albedo guide must also be enabled."); } - enqueueCommand([useAlbedoGuide, useNormalGuide, useKernelPrediction](){ + enqueueCommandAndWait([useAlbedoGuide, useNormalGuide, useKernelPrediction](){ OptixData.enableAlbedoGuide = useAlbedoGuide; OptixData.enableNormalGuide = useNormalGuide; #ifdef USE_OPTIX70 @@ -1857,9 +1854,9 @@ void configureDenoiser(bool useAlbedoGuide, bool useNormalGuide, bool useKernelP if (!OptixData.colorAvgBuffer) OptixData.colorAvgBuffer = owlDeviceBufferCreate(OptixData.context, OWL_USER_TYPE(float), 4, nullptr); if (!OptixData.denoiserScratchBuffer) - OptixData.denoiserScratchBuffer = deviceBufferCreate(OptixData.context, OWL_USER_TYPE(void*), 1, nullptr); + OptixData.denoiserScratchBuffer = owlDeviceBufferCreate(OptixData.context, OWL_USER_TYPE(void*), 1, nullptr); if (!OptixData.denoiserStateBuffer) - OptixData.denoiserStateBuffer = deviceBufferCreate(OptixData.context, OWL_USER_TYPE(void*), 1, nullptr); + OptixData.denoiserStateBuffer = owlDeviceBufferCreate(OptixData.context, OWL_USER_TYPE(void*), 1, nullptr); // Setup denoiser OptixDenoiserOptions options; @@ -1873,8 +1870,8 @@ void configureDenoiser(bool useAlbedoGuide, bool useNormalGuide, bool useKernelP if (OptixData.denoiser) optixDenoiserDestroy(OptixData.denoiser); - auto optixContext = getOptixContext(OptixData.context, 0); - auto cudaStream = getStream(OptixData.context, 0); + auto optixContext = owlContextGetOptixContext(OptixData.context, 0); + auto cudaStream = owlContextGetStream(OptixData.context, 0); OPTIX_CHECK(optixDenoiserCreate(optixContext, &options, &OptixData.denoiser)); OptixDenoiserModelKind kind; @@ -1907,9 +1904,9 @@ void configureDenoiser(bool useAlbedoGuide, bool useNormalGuide, bool useKernelP (cudaStream_t) cudaStream, (unsigned int) OptixData.LP.frameSize.x, (unsigned int) OptixData.LP.frameSize.y, - (CUdeviceptr) bufferGetPointer(OptixData.denoiserStateBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OptixData.denoiserStateBuffer, 0), OptixData.denoiserSizes.stateSizeInBytes, - (CUdeviceptr) bufferGetPointer(OptixData.denoiserScratchBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OptixData.denoiserScratchBuffer, 0), scratchSizeInBytes ); }); @@ -1922,7 +1919,7 @@ std::vector readFrameBuffer() { int num_devices = getDeviceCount(); synchronizeDevices(); - const glm::vec4 *fb = (const glm::vec4*)bufferGetPointer(OptixData.frameBuffer,0); + const glm::vec4 *fb = (const glm::vec4*)owlBufferGetPointer(OptixData.combinedFrameBuffer,0); for (uint32_t test = 0; test < frameBuffer.size(); test += 4) { frameBuffer[test + 0] = fb[test / 4].r; frameBuffer[test + 1] = fb[test / 4].g; @@ -1939,6 +1936,8 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi if ((width < 1) || (height < 1)) throw std::runtime_error("Error, invalid width/height"); std::vector frameBuffer(width * height * 4); + enqueueCommandAndWait([](){}); + enqueueCommandAndWait([&frameBuffer, width, height, samplesPerPixel, seed] () { if (!NVISII.headlessMode) { if ((width != WindowData.currentSize.x) || (height != WindowData.currentSize.y)) @@ -1955,6 +1954,7 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi resizeOptixFrameBuffer(width, height); resetAccumulation(); updateComponents(); + int numGPUs = owlGetDeviceCount(OptixData.context); for (uint32_t i = 0; i < samplesPerPixel; ++i) { // std::cout< render(uint32_t width, uint32_t height, uint32_t samplesPerPi } updateLaunchParams(); - owlLaunch2D(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, OptixData.launchParams); - if (OptixData.enableDenoiser) - { - denoiseImage(); + for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) { + cudaSetDevice(deviceID); + cudaEventRecord(NVISII.events[deviceID].first); + owlAsyncLaunch2DOnDevice(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, deviceID, OptixData.launchParams); + cudaEventRecord(NVISII.events[deviceID].second); + } + for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) { + cudaEventSynchronize(NVISII.events[deviceID].second); + cudaEventElapsedTime(&NVISII.times[deviceID], NVISII.events[deviceID].first, NVISII.events[deviceID].second); } + updateGPUWeights(); + mergeFrameBuffers(); if (!NVISII.headlessMode) { + if (OptixData.enableDenoiser) + { + denoiseImage(); + } + drawFrameBufferToWindow(); glfwSetWindowTitle(WindowData.window, (std::to_string(i) + std::string("/") + std::to_string(samplesPerPixel)).c_str()); @@ -1993,12 +2005,14 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi std::cout<<"\r "<< samplesPerPixel << "/" << samplesPerPixel <<" - done!" << std::endl; } - synchronizeDevices(); - - const glm::vec4 *fb = (const glm::vec4*) bufferGetPointer(OptixData.frameBuffer,0); - cudaMemcpyAsync(frameBuffer.data(), fb, width * height * sizeof(glm::vec4), cudaMemcpyDeviceToHost); + if (OptixData.enableDenoiser) + { + denoiseImage(); + } synchronizeDevices(); + const glm::vec4 *fb = (const glm::vec4*) owlBufferGetPointer(OptixData.combinedFrameBuffer,0); + cudaMemcpyAsync(frameBuffer.data(), fb, width * height * sizeof(glm::vec4), cudaMemcpyDeviceToHost); }); return frameBuffer; @@ -2016,6 +2030,8 @@ std::vector renderData(uint32_t width, uint32_t height, uint32_t startFra { std::vector frameBuffer(width * height * 4); + enqueueCommandAndWait([](){}); + enqueueCommandAndWait([&frameBuffer, width, height, startFrame, frameCount, bounce, _option, seed] () { if (!NVISII.headlessMode) { if ((width != WindowData.currentSize.x) || (height != WindowData.currentSize.y)) @@ -2046,6 +2062,9 @@ std::vector renderData(uint32_t width, uint32_t height, uint32_t startFra else if (option == std::string("normal")) { OptixData.LP.renderDataMode = RenderDataFlags::NORMAL; } + else if (option == std::string("tangent")) { + OptixData.LP.renderDataMode = RenderDataFlags::TANGENT; + } else if (option == std::string("entity_id")) { OptixData.LP.renderDataMode = RenderDataFlags::ENTITY_ID; } @@ -2091,6 +2110,9 @@ std::vector renderData(uint32_t width, uint32_t height, uint32_t startFra else if (option == std::string("heatmap")) { OptixData.LP.renderDataMode = RenderDataFlags::HEATMAP; } + else if (option == std::string("device_id")) { + OptixData.LP.renderDataMode = RenderDataFlags::DEVICE_ID; + } else { throw std::runtime_error(std::string("Error, unknown option : \"") + _option + std::string("\". ") + std::string("See documentation for available options")); @@ -2101,6 +2123,7 @@ std::vector renderData(uint32_t width, uint32_t height, uint32_t startFra OptixData.LP.renderDataBounce = bounce; OptixData.LP.seed = seed; updateComponents(); + int numGPUs = owlGetDeviceCount(OptixData.context); for (uint32_t i = startFrame; i < frameCount; ++i) { // std::cout< renderData(uint32_t width, uint32_t height, uint32_t startFra } updateLaunchParams(); - owlLaunch2D(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, OptixData.launchParams); + + for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) { + cudaSetDevice(deviceID); + cudaEventRecord(NVISII.events[deviceID].first); + owlAsyncLaunch2DOnDevice(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, deviceID, OptixData.launchParams); + cudaEventRecord(NVISII.events[deviceID].second); + } + for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) { + cudaEventSynchronize(NVISII.events[deviceID].second); + cudaEventElapsedTime(&NVISII.times[deviceID], NVISII.events[deviceID].first, NVISII.events[deviceID].second); + } + updateGPUWeights(); + mergeFrameBuffers(); + // Dont run denoiser to raw data rendering // if (OptixData.enableDenoiser) // { @@ -2127,7 +2163,7 @@ std::vector renderData(uint32_t width, uint32_t height, uint32_t startFra synchronizeDevices(); - const glm::vec4 *fb = (const glm::vec4*) bufferGetPointer(OptixData.frameBuffer,0); + const glm::vec4 *fb = (const glm::vec4*) owlBufferGetPointer(OptixData.combinedFrameBuffer,0); cudaMemcpyAsync(frameBuffer.data(), fb, width * height * sizeof(glm::vec4), cudaMemcpyDeviceToHost); OptixData.LP.renderDataMode = 0; @@ -2309,20 +2345,6 @@ void renderToFile(uint32_t width, uint32_t height, uint32_t samplesPerPixel, std } } -// void renderDataToPNG(uint32_t width, uint32_t height, uint32_t startFrame, uint32_t frameCount, uint32_t bounce, std::string field, std::string imagePath) -// { -// std::vector fb = renderData(width, height, startFrame, frameCount, bounce, field); -// std::vector colors(4 * width * height); -// for (size_t i = 0; i < (width * height); ++i) { -// colors[i * 4 + 0] = uint8_t(glm::clamp(fb[i * 4 + 0] * 255.f, 0.f, 255.f)); -// colors[i * 4 + 1] = uint8_t(glm::clamp(fb[i * 4 + 1] * 255.f, 0.f, 255.f)); -// colors[i * 4 + 2] = uint8_t(glm::clamp(fb[i * 4 + 2] * 255.f, 0.f, 255.f)); -// colors[i * 4 + 3] = uint8_t(glm::clamp(fb[i * 4 + 3] * 255.f, 0.f, 255.f)); -// } -// stbi_flip_vertically_on_write(true); -// stbi_write_png(imagePath.c_str(), width, height, /* num channels*/ 4, colors.data(), /* stride in bytes */ width * 4); -// } - void initializeComponentFactories( uint32_t maxEntities, uint32_t maxCameras, @@ -2388,9 +2410,10 @@ void initializeInteractive( glfw->poll_events(); initializeOptix(/*headless = */ false); - initializeImgui(); + int numGPUs = owlGetDeviceCount(OptixData.context); + while (!stopped) { /* Poll events from the window */ @@ -2412,18 +2435,30 @@ void initializeInteractive( updateFrameBuffer(); updateComponents(); updateLaunchParams(); - owlLaunch2D(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, OptixData.launchParams); - if (OptixData.enableDenoiser) - { + + for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) { + cudaSetDevice(deviceID); + cudaEventRecord(NVISII.events[deviceID].first, owlParamsGetCudaStream(OptixData.launchParams, deviceID)); + owlAsyncLaunch2DOnDevice(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, deviceID, OptixData.launchParams); + cudaEventRecord(NVISII.events[deviceID].second, owlParamsGetCudaStream(OptixData.launchParams, deviceID)); + } + owlLaunchSync(OptixData.launchParams); + for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) { + cudaEventElapsedTime(&NVISII.times[deviceID], NVISII.events[deviceID].first, NVISII.events[deviceID].second); + } + updateGPUWeights(); + mergeFrameBuffers(); + + if (OptixData.enableDenoiser) { denoiseImage(); - } + } } - // glm::vec4* samplePtr = (glm::vec4*) bufferGetPointer(OptixData.accumBuffer,0); - // glm::vec4* mvecPtr = (glm::vec4*) bufferGetPointer(OptixData.mvecBuffer,0); - // glm::vec4* t0AlbPtr = (glm::vec4*) bufferGetPointer(OptixData.scratchBuffer,0); - // glm::vec4* t1AlbPtr = (glm::vec4*) bufferGetPointer(OptixData.albedoBuffer,0); - // glm::vec4* fbPtr = (glm::vec4*) bufferGetPointer(OptixData.frameBuffer,0); - // glm::vec4* sPtr = (glm::vec4*) bufferGetPointer(OptixData.normalBuffer,0); + // glm::vec4* samplePtr = (glm::vec4*) owlBufferGetPointer(OptixData.accumBuffer,0); + // glm::vec4* mvecPtr = (glm::vec4*) owlBufferGetPointer(OptixData.mvecBuffer,0); + // glm::vec4* t0AlbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.scratchBuffer,0); + // glm::vec4* t1AlbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.albedoBuffer,0); + // glm::vec4* fbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.frameBuffer,0); + // glm::vec4* sPtr = (glm::vec4*) owlBufferGetPointer(OptixData.normalBuffer,0); // int width = OptixData.LP.frameSize.x; // int height = OptixData.LP.frameSize.y; // reproject(samplePtr, t0AlbPtr, t1AlbPtr, mvecPtr, sPtr, fbPtr, width, height); @@ -2442,7 +2477,10 @@ void initializeInteractive( OPTIX_CHECK(optixDenoiserDestroy(OptixData.denoiser)); if (OptixData.imageTexID != -1) { - cudaGraphicsUnregisterResource(OptixData.cudaResourceTex); + if (OptixData.cudaResourceTex) { + cudaGraphicsUnregisterResource(OptixData.cudaResourceTex); + OptixData.cudaResourceTex = 0; + } glDeleteTextures(1, &OptixData.imageTexID); } @@ -2604,12 +2642,12 @@ void updateSceneAabb(Entity* entity) void enableUpdates() { - enqueueCommand([] () { lazyUpdatesEnabled = false; }); + enqueueCommandAndWait([] () { lazyUpdatesEnabled = false; }); } void disableUpdates() { - enqueueCommand([] () { lazyUpdatesEnabled = true; }); + enqueueCommandAndWait([] () { lazyUpdatesEnabled = true; }); } bool areUpdatesEnabled() @@ -2731,6 +2769,9 @@ void __test__(std::vector args) { else if (option == std::string("normal")) { OptixData.LP.renderDataMode = RenderDataFlags::NORMAL; } + else if (option == std::string("tangent")) { + OptixData.LP.renderDataMode = RenderDataFlags::TANGENT; + } else if (option == std::string("entity_id")) { OptixData.LP.renderDataMode = RenderDataFlags::ENTITY_ID; } @@ -2776,6 +2817,9 @@ void __test__(std::vector args) { else if (option == std::string("heatmap")) { OptixData.LP.renderDataMode = RenderDataFlags::HEATMAP; } + else if (option == std::string("device_id")) { + OptixData.LP.renderDataMode = RenderDataFlags::DEVICE_ID; + } else { throw std::runtime_error(std::string("Error, unknown option : \"") + option + std::string("\". ") + std::string("See documentation for available options")); diff --git a/src/nvisii/nvisii.cu b/src/nvisii/nvisii.cu index 6a94855c..15f41939 100644 --- a/src/nvisii/nvisii.cu +++ b/src/nvisii/nvisii.cu @@ -53,4 +53,4 @@ void reproject(glm::vec4 *sampleBuffer, glm::vec4 *t0AlbedoBuffer, glm::vec4 *t1 dim3 gridSize = dim3 (bx, by); _reproject<<>>(sampleBuffer, t0AlbedoBuffer, t1AlbedoBuffer, mvecBuffer, scratchBuffer, imageBuffer, true, width, height); _reproject<<>>(sampleBuffer, t0AlbedoBuffer, t1AlbedoBuffer, mvecBuffer, scratchBuffer, imageBuffer, false, width, height); -} \ No newline at end of file +} diff --git a/src/nvisii/nvisii_import_scene.cpp b/src/nvisii/nvisii_import_scene.cpp index 9b557d69..aed71804 100644 --- a/src/nvisii/nvisii_import_scene.cpp +++ b/src/nvisii/nvisii_import_scene.cpp @@ -42,9 +42,6 @@ std::string dirnameOf(const std::string& fname) Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::quat rotation, std::vector args) { - bool updatesEnabled = areUpdatesEnabled(); - - disableUpdates(); std::string directory = dirnameOf(path); bool verbose = false; bool max_quality = false; @@ -101,7 +98,7 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu nvisiiScene.materials.push_back(mat); material_light_map[mat] = nullptr; aiString Path; - + // Diffuse/specular workflow if (material->GetTextureCount(aiTextureType_DIFFUSE) > 0) { if (material->GetTexture(aiTextureType_DIFFUSE, 0, &Path, NULL, NULL, NULL, NULL, NULL) == AI_SUCCESS) { @@ -204,7 +201,61 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu auto name = std::string(material->GetName().C_Str()); auto mat = nvisiiScene.materials[materialIdx]; aiString Path; - + + if (verbose) std::cout<<"Creating material : " << name << std::endl; + + aiColor3D color (0.f,0.f,0.f); + if(AI_SUCCESS == material->Get(AI_MATKEY_COLOR_DIFFUSE, color)) { + mat->setBaseColor(glm::vec3(color.r, color.g, color.b)); + if (verbose) std::cout<<"Assigning base color : " << color.r << " " << color.g << " " << color.b << std::endl; + } + if(AI_SUCCESS == material->Get(AI_MATKEY_COLOR_EMISSIVE, color)) { + if (color.r > 0.f || color.g > 0.f || color.b > 0.f) { + if (verbose) std::cout<<"Assigning base color : " << color.r << " " << color.g << " " << color.b << std::endl; + if (Light::get(mat->getName()) == nullptr) { + Light::create(mat->getName()); + nvisiiScene.lights.push_back(material_light_map[mat]); + } + material_light_map[mat] = Light::get(mat->getName()); + material_light_map[mat]->setColor(glm::vec3(color.r, color.g, color.b)); + } + } + if(AI_SUCCESS == material->Get(AI_MATKEY_COLOR_SPECULAR, color)) { + if (color.r == color.b && color.r == color.g) { + if (verbose) std::cout<<"Setting constant specular: " << color.r << std::endl; + mat->setSpecular(color.r); + } + else if (verbose) { + std::cout<<"Error, colored specular found (not supported)" << std::endl; + } + } + + float scalar; + if(AI_SUCCESS == material->Get(AI_MATKEY_SHININESS, scalar)) { + if (scalar != 0.f) { + if (verbose) std::cout<<"Interpreting shininess as 2/roughness^4 - 2: " << powf(2.f / (scalar + 2.f), 1.f/4.f) << std::endl; + mat->setRoughness(powf(2.f / (scalar + 2.f), 1.f/4.f)); + } + } + float ior = 1.f; + if(AI_SUCCESS == material->Get(AI_MATKEY_REFRACTI, ior)) { + if (verbose) std::cout<<"Assigning index of refraction " << ior << std::endl; + mat->setIor(ior); + } + + if(AI_SUCCESS == material->Get(AI_MATKEY_OPACITY, scalar)) { + if (scalar != 1.f) { + if (ior == 1) { + if (verbose) std::cout<<"Assigning opacity " << scalar << std::endl; + mat->setAlpha(scalar); + } + else { + if (verbose) std::cout<<"IOR != 1.0, interpreting dissolve as transmission " << scalar << std::endl; + mat->setTransmission(scalar); + } + } + } + // todo, add texture paths to map above, load later and connect if (material->GetTextureCount(aiTextureType_DIFFUSE) > 0) { if (material->GetTexture(aiTextureType_DIFFUSE, 0, &Path, NULL, NULL, NULL, NULL, NULL) == AI_SUCCESS) { @@ -228,6 +279,10 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu std::string path = directory + "/" + std::string(Path.C_Str()); std::replace(path.begin(), path.end(), '\\', '/'); if (texture_map[path]) mat->setNormalMapTexture(texture_map[path]); + if (!texture_map[path]->isLinear()) { + if (verbose) std::cout<<"WARNING: normal map texture " << path << " not marked as linear! Forcing texture into linear mode..." << std::endl; + texture_map[path]->setLinear(true); + } } } @@ -236,9 +291,12 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu std::string path = directory + "/" + std::string(Path.C_Str()); std::replace(path.begin(), path.end(), '\\', '/'); if (texture_map[path]) { - material_light_map[mat] = Light::create(mat->getName()); + if (Light::get(mat->getName()) == nullptr) { + Light::create(mat->getName()); + nvisiiScene.lights.push_back(material_light_map[mat]); + } + material_light_map[mat] = Light::get(mat->getName()); material_light_map[mat]->setColorTexture(texture_map[path]); - nvisiiScene.lights.push_back(material_light_map[mat]); } } } @@ -267,11 +325,13 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu auto &aiMesh = scene->mMeshes[meshIdx]; auto &aiVertices = aiMesh->mVertices; auto &aiNormals = aiMesh->mNormals; + auto &aiTangents = aiMesh->mTangents; auto &aiFaces = aiMesh->mFaces; auto &aiTextureCoords = aiMesh->mTextureCoords; std::vector positions; std::vector normals; + std::vector tangents; std::vector texCoords; std::vector indices; @@ -291,6 +351,9 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu if (!aiMesh->HasNormals()) { if (verbose) std::cout<<"\tWARNING: mesh " << meshName << " has no normals" << std::endl; } + if (!aiMesh->HasTangentsAndBitangents()) { + if (verbose) std::cout<<"\tWARNING: mesh " << meshName << " has no tangents" << std::endl; + } if (!aiMesh->HasTextureCoords(0)) { if (verbose) std::cout<<"\tWARNING: mesh " << meshName << " has no texture coordinates" << std::endl; } @@ -310,6 +373,12 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu v.normal.y = normal.y; v.normal.z = normal.z; } + if (aiMesh->HasTangentsAndBitangents()) { + auto tangent = aiTangents[vid]; + v.tangent.x = tangent.x; + v.tangent.y = tangent.y; + v.tangent.z = tangent.z; + } if (aiMesh->HasTextureCoords(0)) { // just try to take the first texcoord auto texCoord = aiTextureCoords[0][vid]; @@ -322,6 +391,9 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu normals.push_back(v.normal.x); normals.push_back(v.normal.y); normals.push_back(v.normal.z); + tangents.push_back(v.tangent.x); + tangents.push_back(v.tangent.y); + tangents.push_back(v.tangent.z); texCoords.push_back(v.texcoord.x); texCoords.push_back(v.texcoord.y); } @@ -352,6 +424,7 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu meshName, positions, 3, normals, 3, + tangents, 3, /*colors*/{}, 3, texCoords, 2, indices @@ -363,33 +436,6 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu } } - // load lights - for (uint32_t lightIdx = 0; lightIdx < scene->mNumLights; ++lightIdx) { - auto light = scene->mLights[lightIdx]; - if (verbose) { - std::cout<<"Found light: " << std::string(light->mName.C_Str()) << std::endl; - if (light->mType == aiLightSource_DIRECTIONAL) { - std::cout<<"Directional"<mType == aiLightSource_POINT) { - std::cout<<"Point"<mType == aiLightSource_SPOT) { - std::cout<<"Spot"<mType == aiLightSource_AMBIENT) { - std::cout<<"Ambient"<mType == aiLightSource_AREA) { - std::cout<<"Area"<mNumCameras; ++cameraIdx) { - auto camera = scene->mCameras[cameraIdx]; - if (verbose) { - std::cout<<"Found camera: " << std::string(camera->mName.C_Str()) << std::endl; - } - } - std::function addNode; addNode = [&scene, &nvisiiScene, &material_light_map, &addNode, position, rotation, scale, verbose] (aiNode* node, Transform* parentTransform, int level) @@ -403,7 +449,16 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu } if (verbose) std::cout<< std::string(level, '\t') << "Creating transform " << transformName << std::endl; auto transform = Transform::create(transformName); - transform->setTransform(aiMatrix4x4ToGlm(&node->mTransformation)); + try { + transform->setTransform(aiMatrix4x4ToGlm(&node->mTransformation)); + } catch(...) { + if (verbose) std::cout<< std::string(level, '\t') << "Warning! transform " << transformName << " Decomposition failed! Is the product of the 4x4 with the determinant of the upper left 3x3 nonzero? See Graphics Gems II: Decomposing a Matrix into Simple Transformations" << std::endl; + Transform::remove(transformName); + return; + + // transform->setTransform(aiMatrix4x4ToGlm(&node->mTransformation), false); + // transform->setScale({0.f, 0.f, 0.f}); + } if (parentTransform == nullptr) { transform->setScale(transform->getScale() * scale); transform->addRotation(rotation); @@ -451,11 +506,69 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu for (uint32_t cid = 0; cid < node->mNumChildren; ++cid) addNode(node->mChildren[cid], transform, level+1); }; - addNode(scene->mRootNode, nullptr, 0); + + + // load lights + for (uint32_t lightIdx = 0; lightIdx < scene->mNumLights; ++lightIdx) { + auto &aiLight = scene->mLights[lightIdx]; + std::string lightName = std::string(aiLight->mName.C_Str()); + // if (verbose) + { + if (verbose) std::cout<<"Found light: " << lightName << std::endl; + if (aiLight->mType == aiLightSource_DIRECTIONAL) { + if (verbose) std::cout<<"Directional"<mType == aiLightSource_POINT) { + if (verbose) std::cout<<"Point"<setTransform(Transform::get(lightName)); + if (verbose) std::cout<< std::string(1, '\t') << "transform: " << lightName <setLight(light); + light->setColor({aiLight->mColorDiffuse.r, aiLight->mColorDiffuse.g, aiLight->mColorDiffuse.b}); + + nvisiiScene.entities.push_back(entity); + } else if (aiLight->mType == aiLightSource_SPOT) { + std::cout<<"Spot"<mType == aiLightSource_AMBIENT) { + std::cout<<"Ambient"<mType == aiLightSource_AREA) { + std::cout<<"Area"<mNumCameras; ++cameraIdx) { + auto camera = scene->mCameras[cameraIdx]; + if (verbose) { + std::cout<<"Found camera: " << std::string(camera->mName.C_Str()) << std::endl; + } + } + aiReleaseImport(scene); - if (updatesEnabled) enableUpdates(); if (verbose) std::cout<<"Done!"< gridHdl = nanovdb::createFogVolumeBox(); + auto create = [size, center, halfWidth] (Volume* v) { + nanovdb::GridHandle<> gridHdl = nanovdb::createFogVolumeBox(size.x, size.y, size.z, ((nanovdb::Vec3R)(0)), 1.0f, halfWidth); v->gridHdlPtr = std::make_shared>(std::move(gridHdl)); v->markDirty(); };