From 464e38dcfff09f1ff55b7a0a3ba6cc2fbee51930 Mon Sep 17 00:00:00 2001
From: Mohsen <36326627+mohsenD98@users.noreply.github.com>
Date: Tue, 22 Oct 2024 06:51:15 +0330
Subject: [PATCH] Enable Qt Quick3D support (#5752)
---
CMakeLists.txt | 2 +-
cmake/qgis-cmake-wrapper.cmake | 2 +
src/core/CMakeLists.txt | 1 +
src/qml/Dummy.qml | 9 +
src/qml/qgismobileapp.qml | 3 +-
src/qml/qml.qrc | 1 +
vcpkg.json | 1 +
vcpkg/ports/qgis/meshoptimizer.patch | 60 +
vcpkg/ports/qgis/portfile.cmake | 2 +
vcpkg/ports/qgis/vcpkg.json | 1 +
.../0001-devendor-meshoptimizer.patch | 8977 +++++++++++++++++
vcpkg/ports/qtquick3d/portfile.cmake | 30 +
vcpkg/ports/qtquick3d/vcpkg.json | 39 +
13 files changed, 9125 insertions(+), 3 deletions(-)
create mode 100644 src/qml/Dummy.qml
create mode 100644 vcpkg/ports/qgis/meshoptimizer.patch
create mode 100644 vcpkg/ports/qtquick3d/0001-devendor-meshoptimizer.patch
create mode 100644 vcpkg/ports/qtquick3d/portfile.cmake
create mode 100644 vcpkg/ports/qtquick3d/vcpkg.json
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8845f610ab..5f6c08bd4e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -117,7 +117,7 @@ if (ANDROID)
set(ANDROID_PACKAGE_SOURCE_DIR ${CMAKE_BINARY_DIR}/android-template)
endif()
-find_package(Qt6 COMPONENTS Concurrent Core Qml Gui Xml Positioning Widgets Network Quick Svg Sql Sensors WebView Multimedia Bluetooth Nfc WebSockets REQUIRED)
+find_package(Qt6 COMPONENTS Concurrent Core Qml Gui Xml Positioning Widgets Network Quick Svg Sql Sensors WebView Multimedia Bluetooth Nfc WebSockets Quick3D REQUIRED)
if(NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
find_package(Qt6 COMPONENTS PrintSupport REQUIRED)
diff --git a/cmake/qgis-cmake-wrapper.cmake b/cmake/qgis-cmake-wrapper.cmake
index 75f92c31e6..c28e619076 100644
--- a/cmake/qgis-cmake-wrapper.cmake
+++ b/cmake/qgis-cmake-wrapper.cmake
@@ -134,6 +134,8 @@ if(TRUE) # Should possibly have a "static only" check
endif()
find_package(poly2tri CONFIG)
target_link_libraries(QGIS::Core INTERFACE poly2tri::poly2tri)
+ find_package(meshoptimizer CONFIG REQUIRED)
+ target_link_libraries(QGIS::Core INTERFACE meshoptimizer::meshoptimizer)
pkg_check_modules(freexl REQUIRED IMPORTED_TARGET freexl)
target_link_libraries(QGIS::Core INTERFACE PkgConfig::freexl)
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index da39d2766e..db9ae84ab7 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -354,6 +354,7 @@ target_link_libraries(
Qt::WebView
Qt::Multimedia
Qt::WebSockets
+ Qt::Quick3D
QGIS::Core
QGIS::Analysis
ZXing::ZXing
diff --git a/src/qml/Dummy.qml b/src/qml/Dummy.qml
new file mode 100644
index 0000000000..38ec82db14
--- /dev/null
+++ b/src/qml/Dummy.qml
@@ -0,0 +1,9 @@
+// This file is used to add extra imports for the qml import scanner to add additional modules
+import QtQuick3D
+import QtQuick3D.AssetUtils
+import QtQuick3D.Helpers
+import QtQuick3D.Particles3D
+import QtWebSockets
+
+Item {
+}
diff --git a/src/qml/qgismobileapp.qml b/src/qml/qgismobileapp.qml
index e046e3ec3a..ba87f5c057 100644
--- a/src/qml/qgismobileapp.qml
+++ b/src/qml/qgismobileapp.qml
@@ -14,6 +14,7 @@
* (at your option) any later version. *
* *
***************************************************************************/
+import QtCore
import QtQuick
import QtQuick.Controls
import QtQuick.Controls.Material
@@ -21,8 +22,6 @@ import QtQuick.Effects
import QtQuick.Window
import QtQml
import QtSensors
-import QtCore
-import QtWebSockets // Not used here but added so QML registers its dependencies for plugins to use
import org.qgis
import org.qfield
import Theme
diff --git a/src/qml/qml.qrc b/src/qml/qml.qrc
index d753b191f4..1381aab6fc 100644
--- a/src/qml/qml.qrc
+++ b/src/qml/qml.qrc
@@ -13,6 +13,7 @@
CoordinateLocator.qml
DashBoard.qml
DigitizingToolbar.qml
+ Dummy.qml
ElevationProfile.qml
FeatureForm.qml
FeatureListForm.qml
diff --git a/vcpkg.json b/vcpkg.json
index 94a161e1bd..b5b3425798 100644
--- a/vcpkg.json
+++ b/vcpkg.json
@@ -84,6 +84,7 @@
"qml"
]
},
+ "qtquick3d",
{
"name": "qtsensors",
"features": [
diff --git a/vcpkg/ports/qgis/meshoptimizer.patch b/vcpkg/ports/qgis/meshoptimizer.patch
new file mode 100644
index 0000000000..7401ce3213
--- /dev/null
+++ b/vcpkg/ports/qgis/meshoptimizer.patch
@@ -0,0 +1,60 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index e299cb1d617..ae6482b4e90 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -255,6 +255,7 @@ if(WITH_CORE)
+
+ # try to configure and build POLY2TRI support
+ set (WITH_INTERNAL_POLY2TRI TRUE CACHE BOOL "Determines whether POLY2TRI should be built from internal copy")
++ set (WITH_INTERNAL_MESHOPTIMIZER TRUE CACHE BOOL "Determines whether MESHOPTIMIZER should be built from internal copy")
+
+ # try to configure and build POSTGRESQL support
+ set (WITH_POSTGRESQL TRUE CACHE BOOL "Determines whether POSTGRESQL support should be built")
+diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
+index 94c0e143001..777bd4bcb21 100644
+--- a/src/core/CMakeLists.txt
++++ b/src/core/CMakeLists.txt
+@@ -19,6 +19,18 @@ if (WITH_PDF4QT)
+ SUBDIRS(${CMAKE_SOURCE_DIR}/external/PDF4QT)
+ endif()
+
++if(WITH_INTERNAL_MESHOPTIMIZER)
++ add_library(STATIC meshoptimizer::meshoptimizer
++ ${CMAKE_SOURCE_DIR}/external/meshOptimizer/simplifier.cpp
++ )
++
++ target_include_directories(meshoptimizer::meshoptimizer
++ ${CMAKE_SOURCE_DIR}/external/meshOptimizer
++ )
++else()
++ find_package(meshoptimizer CONFIG REQUIRED)
++endif()
++
+ set(QGIS_CORE_SRCS
+ ${CMAKE_SOURCE_DIR}/external/kdbush/include/kdbush.hpp
+
+@@ -30,8 +42,6 @@ set(QGIS_CORE_SRCS
+ ${CMAKE_SOURCE_DIR}/external/nmea/time.c
+ ${CMAKE_SOURCE_DIR}/external/nmea/tok.c
+
+- ${CMAKE_SOURCE_DIR}/external/meshOptimizer/simplifier.cpp
+-
+ ${FLEX_QgsExpressionLexer_OUTPUTS}
+ ${BISON_QgsExpressionParser_OUTPUTS}
+ ${FLEX_QgsSqlStatementLexer_OUTPUTS}
+@@ -2375,7 +2385,6 @@ target_include_directories(qgis_core PUBLIC
+ ${CMAKE_SOURCE_DIR}/external/kdbush/include
+ ${CMAKE_SOURCE_DIR}/external/nmea
+ ${CMAKE_SOURCE_DIR}/external/rtree/include
+- ${CMAKE_SOURCE_DIR}/external/meshOptimizer
+ ${CMAKE_SOURCE_DIR}/external/tinygltf
+ )
+
+@@ -2485,6 +2494,7 @@ target_link_libraries(qgis_core
+ ${ZLIB_LIBRARIES}
+ ${EXIV2_LIBRARY}
+ PROJ::proj
++ meshoptimizer::meshoptimizer
+ )
+
+ if(BUILD_WITH_QT6)
diff --git a/vcpkg/ports/qgis/portfile.cmake b/vcpkg/ports/qgis/portfile.cmake
index 523f53e77e..af4cba1d26 100644
--- a/vcpkg/ports/qgis/portfile.cmake
+++ b/vcpkg/ports/qgis/portfile.cmake
@@ -18,6 +18,7 @@ vcpkg_from_github(
include-qthread.patch
processing.patch # Needed to avoid link issue with tinygltf (ATM embedded into QGIS) and _GEOSQueryCallback defined multiple times
font_download.patch
+ meshoptimizer.patch # Unvendor meshoptimizer
)
file(REMOVE ${SOURCE_PATH}/cmake/FindGDAL.cmake)
@@ -38,6 +39,7 @@ list(APPEND QGIS_OPTIONS "-DWITH_QSPATIALITE:BOOL=OFF")
list(APPEND QGIS_OPTIONS "-DWITH_PDAL:BOOL=OFF")
list(APPEND QGIS_OPTIONS "-DWITH_DRACO:BOOL=ON")
list(APPEND QGIS_OPTIONS "-DWITH_INTERNAL_POLY2TRI:BOOL=OFF")
+list(APPEND QGIS_OPTIONS "-DWITH_INTERNAL_MESHOPTIMIZER:BOOL=OFF")
list(APPEND QGIS_OPTIONS "-DBISON_EXECUTABLE=${BISON}")
list(APPEND QGIS_OPTIONS "-DFLEX_EXECUTABLE=${FLEX}")
diff --git a/vcpkg/ports/qgis/vcpkg.json b/vcpkg/ports/qgis/vcpkg.json
index 3d71b5ddac..c597b938f8 100644
--- a/vcpkg/ports/qgis/vcpkg.json
+++ b/vcpkg/ports/qgis/vcpkg.json
@@ -30,6 +30,7 @@
},
"libxml2",
"libzip",
+ "meshoptimizer",
"proj",
"protobuf",
{
diff --git a/vcpkg/ports/qtquick3d/0001-devendor-meshoptimizer.patch b/vcpkg/ports/qtquick3d/0001-devendor-meshoptimizer.patch
new file mode 100644
index 0000000000..6cba201d21
--- /dev/null
+++ b/vcpkg/ports/qtquick3d/0001-devendor-meshoptimizer.patch
@@ -0,0 +1,8977 @@
+From b35dbb67688edf360342ff238fbc1b8e3a3f67d4 Mon Sep 17 00:00:00 2001
+From: Matthias Kuhn
+Date: Mon, 21 Oct 2024 23:02:47 +0200
+Subject: [PATCH] devendor meshoptimizer
+
+---
+ src/3rdparty/meshoptimizer/LICENSE.md | 21 -
+ .../meshoptimizer/qt_attribution.json | 14 -
+ src/3rdparty/meshoptimizer/src/allocator.cpp | 8 -
+ .../meshoptimizer/src/clusterizer.cpp | 856 ---------
+ src/3rdparty/meshoptimizer/src/indexcodec.cpp | 674 -------
+ .../meshoptimizer/src/indexgenerator.cpp | 551 ------
+ .../meshoptimizer/src/meshoptimizer.h | 1069 -----------
+ .../meshoptimizer/src/overdrawanalyzer.cpp | 230 ---
+ .../meshoptimizer/src/overdrawoptimizer.cpp | 333 ----
+ src/3rdparty/meshoptimizer/src/simplifier.cpp | 1677 -----------------
+ .../meshoptimizer/src/spatialorder.cpp | 194 --
+ src/3rdparty/meshoptimizer/src/stripifier.cpp | 295 ---
+ .../meshoptimizer/src/vcacheanalyzer.cpp | 73 -
+ .../meshoptimizer/src/vcacheoptimizer.cpp | 473 -----
+ .../meshoptimizer/src/vertexcodec.cpp | 1195 ------------
+ .../meshoptimizer/src/vertexfilter.cpp | 962 ----------
+ .../meshoptimizer/src/vfetchanalyzer.cpp | 58 -
+ .../meshoptimizer/src/vfetchoptimizer.cpp | 74 -
+ src/utils/CMakeLists.txt | 36 +-
+ 19 files changed, 3 insertions(+), 8790 deletions(-)
+ delete mode 100644 src/3rdparty/meshoptimizer/LICENSE.md
+ delete mode 100644 src/3rdparty/meshoptimizer/qt_attribution.json
+ delete mode 100644 src/3rdparty/meshoptimizer/src/allocator.cpp
+ delete mode 100644 src/3rdparty/meshoptimizer/src/clusterizer.cpp
+ delete mode 100644 src/3rdparty/meshoptimizer/src/indexcodec.cpp
+ delete mode 100644 src/3rdparty/meshoptimizer/src/indexgenerator.cpp
+ delete mode 100644 src/3rdparty/meshoptimizer/src/meshoptimizer.h
+ delete mode 100644 src/3rdparty/meshoptimizer/src/overdrawanalyzer.cpp
+ delete mode 100644 src/3rdparty/meshoptimizer/src/overdrawoptimizer.cpp
+ delete mode 100644 src/3rdparty/meshoptimizer/src/simplifier.cpp
+ delete mode 100644 src/3rdparty/meshoptimizer/src/spatialorder.cpp
+ delete mode 100644 src/3rdparty/meshoptimizer/src/stripifier.cpp
+ delete mode 100644 src/3rdparty/meshoptimizer/src/vcacheanalyzer.cpp
+ delete mode 100644 src/3rdparty/meshoptimizer/src/vcacheoptimizer.cpp
+ delete mode 100644 src/3rdparty/meshoptimizer/src/vertexcodec.cpp
+ delete mode 100644 src/3rdparty/meshoptimizer/src/vertexfilter.cpp
+ delete mode 100644 src/3rdparty/meshoptimizer/src/vfetchanalyzer.cpp
+ delete mode 100644 src/3rdparty/meshoptimizer/src/vfetchoptimizer.cpp
+
+diff --git a/src/3rdparty/meshoptimizer/LICENSE.md b/src/3rdparty/meshoptimizer/LICENSE.md
+deleted file mode 100644
+index b673c24..0000000
+--- a/src/3rdparty/meshoptimizer/LICENSE.md
++++ /dev/null
+@@ -1,21 +0,0 @@
+-MIT License
+-
+-Copyright (c) 2016-2022 Arseny Kapoulkine
+-
+-Permission is hereby granted, free of charge, to any person obtaining a copy
+-of this software and associated documentation files (the "Software"), to deal
+-in the Software without restriction, including without limitation the rights
+-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+-copies of the Software, and to permit persons to whom the Software is
+-furnished to do so, subject to the following conditions:
+-
+-The above copyright notice and this permission notice shall be included in all
+-copies or substantial portions of the Software.
+-
+-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+-SOFTWARE.
+diff --git a/src/3rdparty/meshoptimizer/qt_attribution.json b/src/3rdparty/meshoptimizer/qt_attribution.json
+deleted file mode 100644
+index c7ff110..0000000
+--- a/src/3rdparty/meshoptimizer/qt_attribution.json
++++ /dev/null
+@@ -1,14 +0,0 @@
+-{
+- "Id": "meshoptimizer",
+- "Name": "meshoptimizer",
+- "QDocModule": "qtquick3d",
+- "Description": "Provides algorithms to help optimize meshes for GPU render stages, reduce the mesh complexity and storage overhead.",
+- "QtUsage": "Used to generate Level of Detail meshes for Models in Qt Quick 3D",
+-
+- "Homepage": "https://github.com/zeux/meshoptimizer",
+- "Version": "v0.18",
+- "License": "MIT License",
+- "LicenseId": "MIT",
+- "LicenseFile": "LICENSE.md",
+- "Copyright": "Copyright (c) 2016-2022 Arseny Kapoulkine"
+-}
+diff --git a/src/3rdparty/meshoptimizer/src/allocator.cpp b/src/3rdparty/meshoptimizer/src/allocator.cpp
+deleted file mode 100644
+index 072e8e5..0000000
+--- a/src/3rdparty/meshoptimizer/src/allocator.cpp
++++ /dev/null
+@@ -1,8 +0,0 @@
+-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+-#include "meshoptimizer.h"
+-
+-void meshopt_setAllocator(void* (MESHOPTIMIZER_ALLOC_CALLCONV *allocate)(size_t), void (MESHOPTIMIZER_ALLOC_CALLCONV *deallocate)(void*))
+-{
+- meshopt_Allocator::Storage::allocate = allocate;
+- meshopt_Allocator::Storage::deallocate = deallocate;
+-}
+diff --git a/src/3rdparty/meshoptimizer/src/clusterizer.cpp b/src/3rdparty/meshoptimizer/src/clusterizer.cpp
+deleted file mode 100644
+index b1f7b35..0000000
+--- a/src/3rdparty/meshoptimizer/src/clusterizer.cpp
++++ /dev/null
+@@ -1,856 +0,0 @@
+-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+-#include "meshoptimizer.h"
+-
+-#include
+-#include
+-#include
+-#include
+-
+-// This work is based on:
+-// Graham Wihlidal. Optimizing the Graphics Pipeline with Compute. 2016
+-// Matthaeus Chajdas. GeometryFX 1.2 - Cluster Culling. 2016
+-// Jack Ritter. An Efficient Bounding Sphere. 1990
+-namespace meshopt
+-{
+-
+-// This must be <= 255 since index 0xff is used internally to indice a vertex that doesn't belong to a meshlet
+-const size_t kMeshletMaxVertices = 255;
+-
+-// A reasonable limit is around 2*max_vertices or less
+-const size_t kMeshletMaxTriangles = 512;
+-
+-struct TriangleAdjacency2
+-{
+- unsigned int* counts;
+- unsigned int* offsets;
+- unsigned int* data;
+-};
+-
+-static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
+-{
+- size_t face_count = index_count / 3;
+-
+- // allocate arrays
+- adjacency.counts = allocator.allocate(vertex_count);
+- adjacency.offsets = allocator.allocate(vertex_count);
+- adjacency.data = allocator.allocate(index_count);
+-
+- // fill triangle counts
+- memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
+-
+- for (size_t i = 0; i < index_count; ++i)
+- {
+- assert(indices[i] < vertex_count);
+-
+- adjacency.counts[indices[i]]++;
+- }
+-
+- // fill offset table
+- unsigned int offset = 0;
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- adjacency.offsets[i] = offset;
+- offset += adjacency.counts[i];
+- }
+-
+- assert(offset == index_count);
+-
+- // fill triangle data
+- for (size_t i = 0; i < face_count; ++i)
+- {
+- unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+-
+- adjacency.data[adjacency.offsets[a]++] = unsigned(i);
+- adjacency.data[adjacency.offsets[b]++] = unsigned(i);
+- adjacency.data[adjacency.offsets[c]++] = unsigned(i);
+- }
+-
+- // fix offsets that have been disturbed by the previous pass
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- assert(adjacency.offsets[i] >= adjacency.counts[i]);
+-
+- adjacency.offsets[i] -= adjacency.counts[i];
+- }
+-}
+-
+-static void computeBoundingSphere(float result[4], const float points[][3], size_t count)
+-{
+- assert(count > 0);
+-
+- // find extremum points along all 3 axes; for each axis we get a pair of points with min/max coordinates
+- size_t pmin[3] = {0, 0, 0};
+- size_t pmax[3] = {0, 0, 0};
+-
+- for (size_t i = 0; i < count; ++i)
+- {
+- const float* p = points[i];
+-
+- for (int axis = 0; axis < 3; ++axis)
+- {
+- pmin[axis] = (p[axis] < points[pmin[axis]][axis]) ? i : pmin[axis];
+- pmax[axis] = (p[axis] > points[pmax[axis]][axis]) ? i : pmax[axis];
+- }
+- }
+-
+- // find the pair of points with largest distance
+- float paxisd2 = 0;
+- int paxis = 0;
+-
+- for (int axis = 0; axis < 3; ++axis)
+- {
+- const float* p1 = points[pmin[axis]];
+- const float* p2 = points[pmax[axis]];
+-
+- float d2 = (p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2]);
+-
+- if (d2 > paxisd2)
+- {
+- paxisd2 = d2;
+- paxis = axis;
+- }
+- }
+-
+- // use the longest segment as the initial sphere diameter
+- const float* p1 = points[pmin[paxis]];
+- const float* p2 = points[pmax[paxis]];
+-
+- float center[3] = {(p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2, (p1[2] + p2[2]) / 2};
+- float radius = sqrtf(paxisd2) / 2;
+-
+- // iteratively adjust the sphere up until all points fit
+- for (size_t i = 0; i < count; ++i)
+- {
+- const float* p = points[i];
+- float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]);
+-
+- if (d2 > radius * radius)
+- {
+- float d = sqrtf(d2);
+- assert(d > 0);
+-
+- float k = 0.5f + (radius / d) / 2;
+-
+- center[0] = center[0] * k + p[0] * (1 - k);
+- center[1] = center[1] * k + p[1] * (1 - k);
+- center[2] = center[2] * k + p[2] * (1 - k);
+- radius = (radius + d) / 2;
+- }
+- }
+-
+- result[0] = center[0];
+- result[1] = center[1];
+- result[2] = center[2];
+- result[3] = radius;
+-}
+-
+-struct Cone
+-{
+- float px, py, pz;
+- float nx, ny, nz;
+-};
+-
+-static float getMeshletScore(float distance2, float spread, float cone_weight, float expected_radius)
+-{
+- float cone = 1.f - spread * cone_weight;
+- float cone_clamped = cone < 1e-3f ? 1e-3f : cone;
+-
+- return (1 + sqrtf(distance2) / expected_radius * (1 - cone_weight)) * cone_clamped;
+-}
+-
+-static Cone getMeshletCone(const Cone& acc, unsigned int triangle_count)
+-{
+- Cone result = acc;
+-
+- float center_scale = triangle_count == 0 ? 0.f : 1.f / float(triangle_count);
+-
+- result.px *= center_scale;
+- result.py *= center_scale;
+- result.pz *= center_scale;
+-
+- float axis_length = result.nx * result.nx + result.ny * result.ny + result.nz * result.nz;
+- float axis_scale = axis_length == 0.f ? 0.f : 1.f / sqrtf(axis_length);
+-
+- result.nx *= axis_scale;
+- result.ny *= axis_scale;
+- result.nz *= axis_scale;
+-
+- return result;
+-}
+-
+-static float computeTriangleCones(Cone* triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- (void)vertex_count;
+-
+- size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+- size_t face_count = index_count / 3;
+-
+- float mesh_area = 0;
+-
+- for (size_t i = 0; i < face_count; ++i)
+- {
+- unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+- assert(a < vertex_count && b < vertex_count && c < vertex_count);
+-
+- const float* p0 = vertex_positions + vertex_stride_float * a;
+- const float* p1 = vertex_positions + vertex_stride_float * b;
+- const float* p2 = vertex_positions + vertex_stride_float * c;
+-
+- float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
+- float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
+-
+- float normalx = p10[1] * p20[2] - p10[2] * p20[1];
+- float normaly = p10[2] * p20[0] - p10[0] * p20[2];
+- float normalz = p10[0] * p20[1] - p10[1] * p20[0];
+-
+- float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
+- float invarea = (area == 0.f) ? 0.f : 1.f / area;
+-
+- triangles[i].px = (p0[0] + p1[0] + p2[0]) / 3.f;
+- triangles[i].py = (p0[1] + p1[1] + p2[1]) / 3.f;
+- triangles[i].pz = (p0[2] + p1[2] + p2[2]) / 3.f;
+-
+- triangles[i].nx = normalx * invarea;
+- triangles[i].ny = normaly * invarea;
+- triangles[i].nz = normalz * invarea;
+-
+- mesh_area += area;
+- }
+-
+- return mesh_area;
+-}
+-
+-static void finishMeshlet(meshopt_Meshlet& meshlet, unsigned char* meshlet_triangles)
+-{
+- size_t offset = meshlet.triangle_offset + meshlet.triangle_count * 3;
+-
+- // fill 4b padding with 0
+- while (offset & 3)
+- meshlet_triangles[offset++] = 0;
+-}
+-
+-static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int b, unsigned int c, unsigned char* used, meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t meshlet_offset, size_t max_vertices, size_t max_triangles)
+-{
+- unsigned char& av = used[a];
+- unsigned char& bv = used[b];
+- unsigned char& cv = used[c];
+-
+- bool result = false;
+-
+- unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff);
+-
+- if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles)
+- {
+- meshlets[meshlet_offset] = meshlet;
+-
+- for (size_t j = 0; j < meshlet.vertex_count; ++j)
+- used[meshlet_vertices[meshlet.vertex_offset + j]] = 0xff;
+-
+- finishMeshlet(meshlet, meshlet_triangles);
+-
+- meshlet.vertex_offset += meshlet.vertex_count;
+- meshlet.triangle_offset += (meshlet.triangle_count * 3 + 3) & ~3; // 4b padding
+- meshlet.vertex_count = 0;
+- meshlet.triangle_count = 0;
+-
+- result = true;
+- }
+-
+- if (av == 0xff)
+- {
+- av = (unsigned char)meshlet.vertex_count;
+- meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = a;
+- }
+-
+- if (bv == 0xff)
+- {
+- bv = (unsigned char)meshlet.vertex_count;
+- meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = b;
+- }
+-
+- if (cv == 0xff)
+- {
+- cv = (unsigned char)meshlet.vertex_count;
+- meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = c;
+- }
+-
+- meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 0] = av;
+- meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 1] = bv;
+- meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 2] = cv;
+- meshlet.triangle_count++;
+-
+- return result;
+-}
+-
+-struct KDNode
+-{
+- union
+- {
+- float split;
+- unsigned int index;
+- };
+-
+- // leaves: axis = 3, children = number of extra points after this one (0 if 'index' is the only point)
+- // branches: axis != 3, left subtree = skip 1, right subtree = skip 1+children
+- unsigned int axis : 2;
+- unsigned int children : 30;
+-};
+-
+-static size_t kdtreePartition(unsigned int* indices, size_t count, const float* points, size_t stride, unsigned int axis, float pivot)
+-{
+- size_t m = 0;
+-
+- // invariant: elements in range [0, m) are < pivot, elements in range [m, i) are >= pivot
+- for (size_t i = 0; i < count; ++i)
+- {
+- float v = points[indices[i] * stride + axis];
+-
+- // swap(m, i) unconditionally
+- unsigned int t = indices[m];
+- indices[m] = indices[i];
+- indices[i] = t;
+-
+- // when v >= pivot, we swap i with m without advancing it, preserving invariants
+- m += v < pivot;
+- }
+-
+- return m;
+-}
+-
+-static size_t kdtreeBuildLeaf(size_t offset, KDNode* nodes, size_t node_count, unsigned int* indices, size_t count)
+-{
+- assert(offset + count <= node_count);
+- (void)node_count;
+-
+- KDNode& result = nodes[offset];
+-
+- result.index = indices[0];
+- result.axis = 3;
+- result.children = unsigned(count - 1);
+-
+- // all remaining points are stored in nodes immediately following the leaf
+- for (size_t i = 1; i < count; ++i)
+- {
+- KDNode& tail = nodes[offset + i];
+-
+- tail.index = indices[i];
+- tail.axis = 3;
+- tail.children = ~0u >> 2; // bogus value to prevent misuse
+- }
+-
+- return offset + count;
+-}
+-
+-static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const float* points, size_t stride, unsigned int* indices, size_t count, size_t leaf_size)
+-{
+- assert(count > 0);
+- assert(offset < node_count);
+-
+- if (count <= leaf_size)
+- return kdtreeBuildLeaf(offset, nodes, node_count, indices, count);
+-
+- float mean[3] = {};
+- float vars[3] = {};
+- float runc = 1, runs = 1;
+-
+- // gather statistics on the points in the subtree using Welford's algorithm
+- for (size_t i = 0; i < count; ++i, runc += 1.f, runs = 1.f / runc)
+- {
+- const float* point = points + indices[i] * stride;
+-
+- for (int k = 0; k < 3; ++k)
+- {
+- float delta = point[k] - mean[k];
+- mean[k] += delta * runs;
+- vars[k] += delta * (point[k] - mean[k]);
+- }
+- }
+-
+- // split axis is one where the variance is largest
+- unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1 : 2;
+-
+- float split = mean[axis];
+- size_t middle = kdtreePartition(indices, count, points, stride, axis, split);
+-
+- // when the partition is degenerate simply consolidate the points into a single node
+- if (middle <= leaf_size / 2 || middle >= count - leaf_size / 2)
+- return kdtreeBuildLeaf(offset, nodes, node_count, indices, count);
+-
+- KDNode& result = nodes[offset];
+-
+- result.split = split;
+- result.axis = axis;
+-
+- // left subtree is right after our node
+- size_t next_offset = kdtreeBuild(offset + 1, nodes, node_count, points, stride, indices, middle, leaf_size);
+-
+- // distance to the right subtree is represented explicitly
+- result.children = unsigned(next_offset - offset - 1);
+-
+- return kdtreeBuild(next_offset, nodes, node_count, points, stride, indices + middle, count - middle, leaf_size);
+-}
+-
+-static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, size_t stride, const unsigned char* emitted_flags, const float* position, unsigned int& result, float& limit)
+-{
+- const KDNode& node = nodes[root];
+-
+- if (node.axis == 3)
+- {
+- // leaf
+- for (unsigned int i = 0; i <= node.children; ++i)
+- {
+- unsigned int index = nodes[root + i].index;
+-
+- if (emitted_flags[index])
+- continue;
+-
+- const float* point = points + index * stride;
+-
+- float distance2 =
+- (point[0] - position[0]) * (point[0] - position[0]) +
+- (point[1] - position[1]) * (point[1] - position[1]) +
+- (point[2] - position[2]) * (point[2] - position[2]);
+- float distance = sqrtf(distance2);
+-
+- if (distance < limit)
+- {
+- result = index;
+- limit = distance;
+- }
+- }
+- }
+- else
+- {
+- // branch; we order recursion to process the node that search position is in first
+- float delta = position[node.axis] - node.split;
+- unsigned int first = (delta <= 0) ? 0 : node.children;
+- unsigned int second = first ^ node.children;
+-
+- kdtreeNearest(nodes, root + 1 + first, points, stride, emitted_flags, position, result, limit);
+-
+- // only process the other node if it can have a match based on closest distance so far
+- if (fabsf(delta) <= limit)
+- kdtreeNearest(nodes, root + 1 + second, points, stride, emitted_flags, position, result, limit);
+- }
+-}
+-
+-} // namespace meshopt
+-
+-size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles)
+-{
+- using namespace meshopt;
+-
+- assert(index_count % 3 == 0);
+- assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
+- assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
+- assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
+-
+- (void)kMeshletMaxVertices;
+- (void)kMeshletMaxTriangles;
+-
+- // meshlet construction is limited by max vertices and max triangles per meshlet
+- // the worst case is that the input is an unindexed stream since this equally stresses both limits
+- // note that we assume that in the worst case, we leave 2 vertices unpacked in each meshlet - if we have space for 3 we can pack any triangle
+- size_t max_vertices_conservative = max_vertices - 2;
+- size_t meshlet_limit_vertices = (index_count + max_vertices_conservative - 1) / max_vertices_conservative;
+- size_t meshlet_limit_triangles = (index_count / 3 + max_triangles - 1) / max_triangles;
+-
+- return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles;
+-}
+-
+-size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight)
+-{
+- using namespace meshopt;
+-
+- assert(index_count % 3 == 0);
+- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+- assert(vertex_positions_stride % sizeof(float) == 0);
+-
+- assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
+- assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
+- assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
+-
+- meshopt_Allocator allocator;
+-
+- TriangleAdjacency2 adjacency = {};
+- buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
+-
+- unsigned int* live_triangles = allocator.allocate(vertex_count);
+- memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
+-
+- size_t face_count = index_count / 3;
+-
+- unsigned char* emitted_flags = allocator.allocate(face_count);
+- memset(emitted_flags, 0, face_count);
+-
+- // for each triangle, precompute centroid & normal to use for scoring
+- Cone* triangles = allocator.allocate(face_count);
+- float mesh_area = computeTriangleCones(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+-
+- // assuming each meshlet is a square patch, expected radius is sqrt(expected area)
+- float triangle_area_avg = face_count == 0 ? 0.f : mesh_area / float(face_count) * 0.5f;
+- float meshlet_expected_radius = sqrtf(triangle_area_avg * max_triangles) * 0.5f;
+-
+- // build a kd-tree for nearest neighbor lookup
+- unsigned int* kdindices = allocator.allocate(face_count);
+- for (size_t i = 0; i < face_count; ++i)
+- kdindices[i] = unsigned(i);
+-
+- KDNode* nodes = allocator.allocate(face_count * 2);
+- kdtreeBuild(0, nodes, face_count * 2, &triangles[0].px, sizeof(Cone) / sizeof(float), kdindices, face_count, /* leaf_size= */ 8);
+-
+- // index of the vertex in the meshlet, 0xff if the vertex isn't used
+- unsigned char* used = allocator.allocate(vertex_count);
+- memset(used, -1, vertex_count);
+-
+- meshopt_Meshlet meshlet = {};
+- size_t meshlet_offset = 0;
+-
+- Cone meshlet_cone_acc = {};
+-
+- for (;;)
+- {
+- unsigned int best_triangle = ~0u;
+- unsigned int best_extra = 5;
+- float best_score = FLT_MAX;
+-
+- Cone meshlet_cone = getMeshletCone(meshlet_cone_acc, meshlet.triangle_count);
+-
+- for (size_t i = 0; i < meshlet.vertex_count; ++i)
+- {
+- unsigned int index = meshlet_vertices[meshlet.vertex_offset + i];
+-
+- unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
+- size_t neighbours_size = adjacency.counts[index];
+-
+- for (size_t j = 0; j < neighbours_size; ++j)
+- {
+- unsigned int triangle = neighbours[j];
+- assert(!emitted_flags[triangle]);
+-
+- unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
+- assert(a < vertex_count && b < vertex_count && c < vertex_count);
+-
+- unsigned int extra = (used[a] == 0xff) + (used[b] == 0xff) + (used[c] == 0xff);
+-
+- // triangles that don't add new vertices to meshlets are max. priority
+- if (extra != 0)
+- {
+- // artificially increase the priority of dangling triangles as they're expensive to add to new meshlets
+- if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1)
+- extra = 0;
+-
+- extra++;
+- }
+-
+- // since topology-based priority is always more important than the score, we can skip scoring in some cases
+- if (extra > best_extra)
+- continue;
+-
+- const Cone& tri_cone = triangles[triangle];
+-
+- float distance2 =
+- (tri_cone.px - meshlet_cone.px) * (tri_cone.px - meshlet_cone.px) +
+- (tri_cone.py - meshlet_cone.py) * (tri_cone.py - meshlet_cone.py) +
+- (tri_cone.pz - meshlet_cone.pz) * (tri_cone.pz - meshlet_cone.pz);
+-
+- float spread = tri_cone.nx * meshlet_cone.nx + tri_cone.ny * meshlet_cone.ny + tri_cone.nz * meshlet_cone.nz;
+-
+- float score = getMeshletScore(distance2, spread, cone_weight, meshlet_expected_radius);
+-
+- // note that topology-based priority is always more important than the score
+- // this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost
+- if (extra < best_extra || score < best_score)
+- {
+- best_triangle = triangle;
+- best_extra = extra;
+- best_score = score;
+- }
+- }
+- }
+-
+- if (best_triangle == ~0u)
+- {
+- float position[3] = {meshlet_cone.px, meshlet_cone.py, meshlet_cone.pz};
+- unsigned int index = ~0u;
+- float limit = FLT_MAX;
+-
+- kdtreeNearest(nodes, 0, &triangles[0].px, sizeof(Cone) / sizeof(float), emitted_flags, position, index, limit);
+-
+- best_triangle = index;
+- }
+-
+- if (best_triangle == ~0u)
+- break;
+-
+- unsigned int a = indices[best_triangle * 3 + 0], b = indices[best_triangle * 3 + 1], c = indices[best_triangle * 3 + 2];
+- assert(a < vertex_count && b < vertex_count && c < vertex_count);
+-
+- // add meshlet to the output; when the current meshlet is full we reset the accumulated bounds
+- if (appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles))
+- {
+- meshlet_offset++;
+- memset(&meshlet_cone_acc, 0, sizeof(meshlet_cone_acc));
+- }
+-
+- live_triangles[a]--;
+- live_triangles[b]--;
+- live_triangles[c]--;
+-
+- // remove emitted triangle from adjacency data
+- // this makes sure that we spend less time traversing these lists on subsequent iterations
+- for (size_t k = 0; k < 3; ++k)
+- {
+- unsigned int index = indices[best_triangle * 3 + k];
+-
+- unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
+- size_t neighbours_size = adjacency.counts[index];
+-
+- for (size_t i = 0; i < neighbours_size; ++i)
+- {
+- unsigned int tri = neighbours[i];
+-
+- if (tri == best_triangle)
+- {
+- neighbours[i] = neighbours[neighbours_size - 1];
+- adjacency.counts[index]--;
+- break;
+- }
+- }
+- }
+-
+- // update aggregated meshlet cone data for scoring subsequent triangles
+- meshlet_cone_acc.px += triangles[best_triangle].px;
+- meshlet_cone_acc.py += triangles[best_triangle].py;
+- meshlet_cone_acc.pz += triangles[best_triangle].pz;
+- meshlet_cone_acc.nx += triangles[best_triangle].nx;
+- meshlet_cone_acc.ny += triangles[best_triangle].ny;
+- meshlet_cone_acc.nz += triangles[best_triangle].nz;
+-
+- emitted_flags[best_triangle] = 1;
+- }
+-
+- if (meshlet.triangle_count)
+- {
+- finishMeshlet(meshlet, meshlet_triangles);
+-
+- meshlets[meshlet_offset++] = meshlet;
+- }
+-
+- assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
+- return meshlet_offset;
+-}
+-
+-size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
+-{
+- using namespace meshopt;
+-
+- assert(index_count % 3 == 0);
+-
+- assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
+- assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
+- assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
+-
+- meshopt_Allocator allocator;
+-
+- // index of the vertex in the meshlet, 0xff if the vertex isn't used
+- unsigned char* used = allocator.allocate(vertex_count);
+- memset(used, -1, vertex_count);
+-
+- meshopt_Meshlet meshlet = {};
+- size_t meshlet_offset = 0;
+-
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
+- assert(a < vertex_count && b < vertex_count && c < vertex_count);
+-
+- // appends triangle to the meshlet and writes previous meshlet to the output if full
+- meshlet_offset += appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles);
+- }
+-
+- if (meshlet.triangle_count)
+- {
+- finishMeshlet(meshlet, meshlet_triangles);
+-
+- meshlets[meshlet_offset++] = meshlet;
+- }
+-
+- assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
+- return meshlet_offset;
+-}
+-
+-meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- using namespace meshopt;
+-
+- assert(index_count % 3 == 0);
+- assert(index_count / 3 <= kMeshletMaxTriangles);
+- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+- assert(vertex_positions_stride % sizeof(float) == 0);
+-
+- (void)vertex_count;
+-
+- size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+-
+- // compute triangle normals and gather triangle corners
+- float normals[kMeshletMaxTriangles][3];
+- float corners[kMeshletMaxTriangles][3][3];
+- size_t triangles = 0;
+-
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
+- assert(a < vertex_count && b < vertex_count && c < vertex_count);
+-
+- const float* p0 = vertex_positions + vertex_stride_float * a;
+- const float* p1 = vertex_positions + vertex_stride_float * b;
+- const float* p2 = vertex_positions + vertex_stride_float * c;
+-
+- float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
+- float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
+-
+- float normalx = p10[1] * p20[2] - p10[2] * p20[1];
+- float normaly = p10[2] * p20[0] - p10[0] * p20[2];
+- float normalz = p10[0] * p20[1] - p10[1] * p20[0];
+-
+- float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
+-
+- // no need to include degenerate triangles - they will be invisible anyway
+- if (area == 0.f)
+- continue;
+-
+- // record triangle normals & corners for future use; normal and corner 0 define a plane equation
+- normals[triangles][0] = normalx / area;
+- normals[triangles][1] = normaly / area;
+- normals[triangles][2] = normalz / area;
+- memcpy(corners[triangles][0], p0, 3 * sizeof(float));
+- memcpy(corners[triangles][1], p1, 3 * sizeof(float));
+- memcpy(corners[triangles][2], p2, 3 * sizeof(float));
+- triangles++;
+- }
+-
+- meshopt_Bounds bounds = {};
+-
+- // degenerate cluster, no valid triangles => trivial reject (cone data is 0)
+- if (triangles == 0)
+- return bounds;
+-
+- // compute cluster bounding sphere; we'll use the center to determine normal cone apex as well
+- float psphere[4] = {};
+- computeBoundingSphere(psphere, corners[0], triangles * 3);
+-
+- float center[3] = {psphere[0], psphere[1], psphere[2]};
+-
+- // treating triangle normals as points, find the bounding sphere - the sphere center determines the optimal cone axis
+- float nsphere[4] = {};
+- computeBoundingSphere(nsphere, normals, triangles);
+-
+- float axis[3] = {nsphere[0], nsphere[1], nsphere[2]};
+- float axislength = sqrtf(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]);
+- float invaxislength = axislength == 0.f ? 0.f : 1.f / axislength;
+-
+- axis[0] *= invaxislength;
+- axis[1] *= invaxislength;
+- axis[2] *= invaxislength;
+-
+- // compute a tight cone around all normals, mindp = cos(angle/2)
+- float mindp = 1.f;
+-
+- for (size_t i = 0; i < triangles; ++i)
+- {
+- float dp = normals[i][0] * axis[0] + normals[i][1] * axis[1] + normals[i][2] * axis[2];
+-
+- mindp = (dp < mindp) ? dp : mindp;
+- }
+-
+- // fill bounding sphere info; note that below we can return bounds without cone information for degenerate cones
+- bounds.center[0] = center[0];
+- bounds.center[1] = center[1];
+- bounds.center[2] = center[2];
+- bounds.radius = psphere[3];
+-
+- // degenerate cluster, normal cone is larger than a hemisphere => trivial accept
+- // note that if mindp is positive but close to 0, the triangle intersection code below gets less stable
+- // we arbitrarily decide that if a normal cone is ~168 degrees wide or more, the cone isn't useful
+- if (mindp <= 0.1f)
+- {
+- bounds.cone_cutoff = 1;
+- bounds.cone_cutoff_s8 = 127;
+- return bounds;
+- }
+-
+- float maxt = 0;
+-
+- // we need to find the point on center-t*axis ray that lies in negative half-space of all triangles
+- for (size_t i = 0; i < triangles; ++i)
+- {
+- // dot(center-t*axis-corner, trinormal) = 0
+- // dot(center-corner, trinormal) - t * dot(axis, trinormal) = 0
+- float cx = center[0] - corners[i][0][0];
+- float cy = center[1] - corners[i][0][1];
+- float cz = center[2] - corners[i][0][2];
+-
+- float dc = cx * normals[i][0] + cy * normals[i][1] + cz * normals[i][2];
+- float dn = axis[0] * normals[i][0] + axis[1] * normals[i][1] + axis[2] * normals[i][2];
+-
+- // dn should be larger than mindp cutoff above
+- assert(dn > 0.f);
+- float t = dc / dn;
+-
+- maxt = (t > maxt) ? t : maxt;
+- }
+-
+- // cone apex should be in the negative half-space of all cluster triangles by construction
+- bounds.cone_apex[0] = center[0] - axis[0] * maxt;
+- bounds.cone_apex[1] = center[1] - axis[1] * maxt;
+- bounds.cone_apex[2] = center[2] - axis[2] * maxt;
+-
+- // note: this axis is the axis of the normal cone, but our test for perspective camera effectively negates the axis
+- bounds.cone_axis[0] = axis[0];
+- bounds.cone_axis[1] = axis[1];
+- bounds.cone_axis[2] = axis[2];
+-
+- // cos(a) for normal cone is mindp; we need to add 90 degrees on both sides and invert the cone
+- // which gives us -cos(a+90) = -(-sin(a)) = sin(a) = sqrt(1 - cos^2(a))
+- bounds.cone_cutoff = sqrtf(1 - mindp * mindp);
+-
+- // quantize axis & cutoff to 8-bit SNORM format
+- bounds.cone_axis_s8[0] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[0], 8));
+- bounds.cone_axis_s8[1] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[1], 8));
+- bounds.cone_axis_s8[2] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[2], 8));
+-
+- // for the 8-bit test to be conservative, we need to adjust the cutoff by measuring the max. error
+- float cone_axis_s8_e0 = fabsf(bounds.cone_axis_s8[0] / 127.f - bounds.cone_axis[0]);
+- float cone_axis_s8_e1 = fabsf(bounds.cone_axis_s8[1] / 127.f - bounds.cone_axis[1]);
+- float cone_axis_s8_e2 = fabsf(bounds.cone_axis_s8[2] / 127.f - bounds.cone_axis[2]);
+-
+- // note that we need to round this up instead of rounding to nearest, hence +1
+- int cone_cutoff_s8 = int(127 * (bounds.cone_cutoff + cone_axis_s8_e0 + cone_axis_s8_e1 + cone_axis_s8_e2) + 1);
+-
+- bounds.cone_cutoff_s8 = (cone_cutoff_s8 > 127) ? 127 : (signed char)(cone_cutoff_s8);
+-
+- return bounds;
+-}
+-
+-meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- using namespace meshopt;
+-
+- assert(triangle_count <= kMeshletMaxTriangles);
+- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+- assert(vertex_positions_stride % sizeof(float) == 0);
+-
+- unsigned int indices[kMeshletMaxTriangles * 3];
+-
+- for (size_t i = 0; i < triangle_count * 3; ++i)
+- {
+- unsigned int index = meshlet_vertices[meshlet_triangles[i]];
+- assert(index < vertex_count);
+-
+- indices[i] = index;
+- }
+-
+- return meshopt_computeClusterBounds(indices, triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride);
+-}
+diff --git a/src/3rdparty/meshoptimizer/src/indexcodec.cpp b/src/3rdparty/meshoptimizer/src/indexcodec.cpp
+deleted file mode 100644
+index e4495b8..0000000
+--- a/src/3rdparty/meshoptimizer/src/indexcodec.cpp
++++ /dev/null
+@@ -1,674 +0,0 @@
+-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+-#include "meshoptimizer.h"
+-
+-#include
+-#include
+-
+-// This work is based on:
+-// Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013
+-// Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014
+-namespace meshopt
+-{
+-
+-const unsigned char kIndexHeader = 0xe0;
+-const unsigned char kSequenceHeader = 0xd0;
+-
+-static int gEncodeIndexVersion = 0;
+-
+-typedef unsigned int VertexFifo[16];
+-typedef unsigned int EdgeFifo[16][2];
+-
+-static const unsigned int kTriangleIndexOrder[3][3] = {
+- {0, 1, 2},
+- {1, 2, 0},
+- {2, 0, 1},
+-};
+-
+-static const unsigned char kCodeAuxEncodingTable[16] = {
+- 0x00, 0x76, 0x87, 0x56, 0x67, 0x78, 0xa9, 0x86, 0x65, 0x89, 0x68, 0x98, 0x01, 0x69,
+- 0, 0, // last two entries aren't used for encoding
+-};
+-
+-static int rotateTriangle(unsigned int a, unsigned int b, unsigned int c, unsigned int next)
+-{
+- (void)a;
+-
+- return (b == next) ? 1 : (c == next) ? 2 : 0;
+-}
+-
+-static int getEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t offset)
+-{
+- for (int i = 0; i < 16; ++i)
+- {
+- size_t index = (offset - 1 - i) & 15;
+-
+- unsigned int e0 = fifo[index][0];
+- unsigned int e1 = fifo[index][1];
+-
+- if (e0 == a && e1 == b)
+- return (i << 2) | 0;
+- if (e0 == b && e1 == c)
+- return (i << 2) | 1;
+- if (e0 == c && e1 == a)
+- return (i << 2) | 2;
+- }
+-
+- return -1;
+-}
+-
+-static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, size_t& offset)
+-{
+- fifo[offset][0] = a;
+- fifo[offset][1] = b;
+- offset = (offset + 1) & 15;
+-}
+-
+-static int getVertexFifo(VertexFifo fifo, unsigned int v, size_t offset)
+-{
+- for (int i = 0; i < 16; ++i)
+- {
+- size_t index = (offset - 1 - i) & 15;
+-
+- if (fifo[index] == v)
+- return i;
+- }
+-
+- return -1;
+-}
+-
+-static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset, int cond = 1)
+-{
+- fifo[offset] = v;
+- offset = (offset + cond) & 15;
+-}
+-
+-static void encodeVByte(unsigned char*& data, unsigned int v)
+-{
+- // encode 32-bit value in up to 5 7-bit groups
+- do
+- {
+- *data++ = (v & 127) | (v > 127 ? 128 : 0);
+- v >>= 7;
+- } while (v);
+-}
+-
+-static unsigned int decodeVByte(const unsigned char*& data)
+-{
+- unsigned char lead = *data++;
+-
+- // fast path: single byte
+- if (lead < 128)
+- return lead;
+-
+- // slow path: up to 4 extra bytes
+- // note that this loop always terminates, which is important for malformed data
+- unsigned int result = lead & 127;
+- unsigned int shift = 7;
+-
+- for (int i = 0; i < 4; ++i)
+- {
+- unsigned char group = *data++;
+- result |= unsigned(group & 127) << shift;
+- shift += 7;
+-
+- if (group < 128)
+- break;
+- }
+-
+- return result;
+-}
+-
+-static void encodeIndex(unsigned char*& data, unsigned int index, unsigned int last)
+-{
+- unsigned int d = index - last;
+- unsigned int v = (d << 1) ^ (int(d) >> 31);
+-
+- encodeVByte(data, v);
+-}
+-
+-static unsigned int decodeIndex(const unsigned char*& data, unsigned int last)
+-{
+- unsigned int v = decodeVByte(data);
+- unsigned int d = (v >> 1) ^ -int(v & 1);
+-
+- return last + d;
+-}
+-
+-static int getCodeAuxIndex(unsigned char v, const unsigned char* table)
+-{
+- for (int i = 0; i < 16; ++i)
+- if (table[i] == v)
+- return i;
+-
+- return -1;
+-}
+-
+-static void writeTriangle(void* destination, size_t offset, size_t index_size, unsigned int a, unsigned int b, unsigned int c)
+-{
+- if (index_size == 2)
+- {
+- static_cast(destination)[offset + 0] = (unsigned short)(a);
+- static_cast(destination)[offset + 1] = (unsigned short)(b);
+- static_cast(destination)[offset + 2] = (unsigned short)(c);
+- }
+- else
+- {
+- static_cast(destination)[offset + 0] = a;
+- static_cast(destination)[offset + 1] = b;
+- static_cast(destination)[offset + 2] = c;
+- }
+-}
+-
+-} // namespace meshopt
+-
+-size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
+-{
+- using namespace meshopt;
+-
+- assert(index_count % 3 == 0);
+-
+- // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
+- if (buffer_size < 1 + index_count / 3 + 16)
+- return 0;
+-
+- int version = gEncodeIndexVersion;
+-
+- buffer[0] = (unsigned char)(kIndexHeader | version);
+-
+- EdgeFifo edgefifo;
+- memset(edgefifo, -1, sizeof(edgefifo));
+-
+- VertexFifo vertexfifo;
+- memset(vertexfifo, -1, sizeof(vertexfifo));
+-
+- size_t edgefifooffset = 0;
+- size_t vertexfifooffset = 0;
+-
+- unsigned int next = 0;
+- unsigned int last = 0;
+-
+- unsigned char* code = buffer + 1;
+- unsigned char* data = code + index_count / 3;
+- unsigned char* data_safe_end = buffer + buffer_size - 16;
+-
+- int fecmax = version >= 1 ? 13 : 15;
+-
+- // use static encoding table; it's possible to pack the result and then build an optimal table and repack
+- // for now we keep it simple and use the table that has been generated based on symbol frequency on a training mesh set
+- const unsigned char* codeaux_table = kCodeAuxEncodingTable;
+-
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- // make sure we have enough space to write a triangle
+- // each triangle writes at most 16 bytes: 1b for codeaux and 5b for each free index
+- // after this we can be sure we can write without extra bounds checks
+- if (data > data_safe_end)
+- return 0;
+-
+- int fer = getEdgeFifo(edgefifo, indices[i + 0], indices[i + 1], indices[i + 2], edgefifooffset);
+-
+- if (fer >= 0 && (fer >> 2) < 15)
+- {
+- const unsigned int* order = kTriangleIndexOrder[fer & 3];
+-
+- unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
+-
+- // encode edge index and vertex fifo index, next or free index
+- int fe = fer >> 2;
+- int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
+-
+- int fec = (fc >= 1 && fc < fecmax) ? fc : (c == next) ? (next++, 0) : 15;
+-
+- if (fec == 15 && version >= 1)
+- {
+- // encode last-1 and last+1 to optimize strip-like sequences
+- if (c + 1 == last)
+- fec = 13, last = c;
+- if (c == last + 1)
+- fec = 14, last = c;
+- }
+-
+- *code++ = (unsigned char)((fe << 4) | fec);
+-
+- // note that we need to update the last index since free indices are delta-encoded
+- if (fec == 15)
+- encodeIndex(data, c, last), last = c;
+-
+- // we only need to push third vertex since first two are likely already in the vertex fifo
+- if (fec == 0 || fec >= fecmax)
+- pushVertexFifo(vertexfifo, c, vertexfifooffset);
+-
+- // we only need to push two new edges to edge fifo since the third one is already there
+- pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+- pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+- }
+- else
+- {
+- int rotation = rotateTriangle(indices[i + 0], indices[i + 1], indices[i + 2], next);
+- const unsigned int* order = kTriangleIndexOrder[rotation];
+-
+- unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]];
+-
+- // if a/b/c are 0/1/2, we emit a reset code
+- bool reset = false;
+-
+- if (a == 0 && b == 1 && c == 2 && next > 0 && version >= 1)
+- {
+- reset = true;
+- next = 0;
+-
+- // reset vertex fifo to make sure we don't accidentally reference vertices from that in the future
+- // this makes sure next continues to get incremented instead of being stuck
+- memset(vertexfifo, -1, sizeof(vertexfifo));
+- }
+-
+- int fb = getVertexFifo(vertexfifo, b, vertexfifooffset);
+- int fc = getVertexFifo(vertexfifo, c, vertexfifooffset);
+-
+- // after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a
+- int fea = (a == next) ? (next++, 0) : 15;
+- int feb = (fb >= 0 && fb < 14) ? (fb + 1) : (b == next) ? (next++, 0) : 15;
+- int fec = (fc >= 0 && fc < 14) ? (fc + 1) : (c == next) ? (next++, 0) : 15;
+-
+- // we encode feb & fec in 4 bits using a table if possible, and as a full byte otherwise
+- unsigned char codeaux = (unsigned char)((feb << 4) | fec);
+- int codeauxindex = getCodeAuxIndex(codeaux, codeaux_table);
+-
+- // <14 encodes an index into codeaux table, 14 encodes fea=0, 15 encodes fea=15
+- if (fea == 0 && codeauxindex >= 0 && codeauxindex < 14 && !reset)
+- {
+- *code++ = (unsigned char)((15 << 4) | codeauxindex);
+- }
+- else
+- {
+- *code++ = (unsigned char)((15 << 4) | 14 | fea);
+- *data++ = codeaux;
+- }
+-
+- // note that we need to update the last index since free indices are delta-encoded
+- if (fea == 15)
+- encodeIndex(data, a, last), last = a;
+-
+- if (feb == 15)
+- encodeIndex(data, b, last), last = b;
+-
+- if (fec == 15)
+- encodeIndex(data, c, last), last = c;
+-
+- // only push vertices that weren't already in fifo
+- if (fea == 0 || fea == 15)
+- pushVertexFifo(vertexfifo, a, vertexfifooffset);
+-
+- if (feb == 0 || feb == 15)
+- pushVertexFifo(vertexfifo, b, vertexfifooffset);
+-
+- if (fec == 0 || fec == 15)
+- pushVertexFifo(vertexfifo, c, vertexfifooffset);
+-
+- // all three edges aren't in the fifo; pushing all of them is important so that we can match them for later triangles
+- pushEdgeFifo(edgefifo, b, a, edgefifooffset);
+- pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+- pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+- }
+- }
+-
+- // make sure we have enough space to write codeaux table
+- if (data > data_safe_end)
+- return 0;
+-
+- // add codeaux encoding table to the end of the stream; this is used for decoding codeaux *and* as padding
+- // we need padding for decoding to be able to assume that each triangle is encoded as <= 16 bytes of extra data
+- // this is enough space for aux byte + 5 bytes per varint index which is the absolute worst case for any input
+- for (size_t i = 0; i < 16; ++i)
+- {
+- // decoder assumes that table entries never refer to separately encoded indices
+- assert((codeaux_table[i] & 0xf) != 0xf && (codeaux_table[i] >> 4) != 0xf);
+-
+- *data++ = codeaux_table[i];
+- }
+-
+- // since we encode restarts as codeaux without a table reference, we need to make sure 00 is encoded as a table reference
+- assert(codeaux_table[0] == 0);
+-
+- assert(data >= buffer + index_count / 3 + 16);
+- assert(data <= buffer + buffer_size);
+-
+- return data - buffer;
+-}
+-
+-size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count)
+-{
+- assert(index_count % 3 == 0);
+-
+- // compute number of bits required for each index
+- unsigned int vertex_bits = 1;
+-
+- while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
+- vertex_bits++;
+-
+- // worst-case encoding is 2 header bytes + 3 varint-7 encoded index deltas
+- unsigned int vertex_groups = (vertex_bits + 1 + 6) / 7;
+-
+- return 1 + (index_count / 3) * (2 + 3 * vertex_groups) + 16;
+-}
+-
+-void meshopt_encodeIndexVersion(int version)
+-{
+- assert(unsigned(version) <= 1);
+-
+- meshopt::gEncodeIndexVersion = version;
+-}
+-
+-int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size)
+-{
+- using namespace meshopt;
+-
+- assert(index_count % 3 == 0);
+- assert(index_size == 2 || index_size == 4);
+-
+- // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table
+- if (buffer_size < 1 + index_count / 3 + 16)
+- return -2;
+-
+- if ((buffer[0] & 0xf0) != kIndexHeader)
+- return -1;
+-
+- int version = buffer[0] & 0x0f;
+- if (version > 1)
+- return -1;
+-
+- EdgeFifo edgefifo;
+- memset(edgefifo, -1, sizeof(edgefifo));
+-
+- VertexFifo vertexfifo;
+- memset(vertexfifo, -1, sizeof(vertexfifo));
+-
+- size_t edgefifooffset = 0;
+- size_t vertexfifooffset = 0;
+-
+- unsigned int next = 0;
+- unsigned int last = 0;
+-
+- int fecmax = version >= 1 ? 13 : 15;
+-
+- // since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end
+- const unsigned char* code = buffer + 1;
+- const unsigned char* data = code + index_count / 3;
+- const unsigned char* data_safe_end = buffer + buffer_size - 16;
+-
+- const unsigned char* codeaux_table = data_safe_end;
+-
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- // make sure we have enough data to read for a triangle
+- // each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index
+- // after this we can be sure we can read without extra bounds checks
+- if (data > data_safe_end)
+- return -2;
+-
+- unsigned char codetri = *code++;
+-
+- if (codetri < 0xf0)
+- {
+- int fe = codetri >> 4;
+-
+- // fifo reads are wrapped around 16 entry buffer
+- unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0];
+- unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1];
+-
+- int fec = codetri & 15;
+-
+- // note: this is the most common path in the entire decoder
+- // inside this if we try to stay branchless (by using cmov/etc.) since these aren't predictable
+- if (fec < fecmax)
+- {
+- // fifo reads are wrapped around 16 entry buffer
+- unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15];
+- unsigned int c = (fec == 0) ? next : cf;
+-
+- int fec0 = fec == 0;
+- next += fec0;
+-
+- // output triangle
+- writeTriangle(destination, i, index_size, a, b, c);
+-
+- // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
+- pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
+-
+- pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+- pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+- }
+- else
+- {
+- unsigned int c = 0;
+-
+- // fec - (fec ^ 3) decodes 13, 14 into -1, 1
+- // note that we need to update the last index since free indices are delta-encoded
+- last = c = (fec != 15) ? last + (fec - (fec ^ 3)) : decodeIndex(data, last);
+-
+- // output triangle
+- writeTriangle(destination, i, index_size, a, b, c);
+-
+- // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
+- pushVertexFifo(vertexfifo, c, vertexfifooffset);
+-
+- pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+- pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+- }
+- }
+- else
+- {
+- // fast path: read codeaux from the table
+- if (codetri < 0xfe)
+- {
+- unsigned char codeaux = codeaux_table[codetri & 15];
+-
+- // note: table can't contain feb/fec=15
+- int feb = codeaux >> 4;
+- int fec = codeaux & 15;
+-
+- // fifo reads are wrapped around 16 entry buffer
+- // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
+- unsigned int a = next++;
+-
+- unsigned int bf = vertexfifo[(vertexfifooffset - feb) & 15];
+- unsigned int b = (feb == 0) ? next : bf;
+-
+- int feb0 = feb == 0;
+- next += feb0;
+-
+- unsigned int cf = vertexfifo[(vertexfifooffset - fec) & 15];
+- unsigned int c = (fec == 0) ? next : cf;
+-
+- int fec0 = fec == 0;
+- next += fec0;
+-
+- // output triangle
+- writeTriangle(destination, i, index_size, a, b, c);
+-
+- // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
+- pushVertexFifo(vertexfifo, a, vertexfifooffset);
+- pushVertexFifo(vertexfifo, b, vertexfifooffset, feb0);
+- pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0);
+-
+- pushEdgeFifo(edgefifo, b, a, edgefifooffset);
+- pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+- pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+- }
+- else
+- {
+- // slow path: read a full byte for codeaux instead of using a table lookup
+- unsigned char codeaux = *data++;
+-
+- int fea = codetri == 0xfe ? 0 : 15;
+- int feb = codeaux >> 4;
+- int fec = codeaux & 15;
+-
+- // reset: codeaux is 0 but encoded as not-a-table
+- if (codeaux == 0)
+- next = 0;
+-
+- // fifo reads are wrapped around 16 entry buffer
+- // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior
+- unsigned int a = (fea == 0) ? next++ : 0;
+- unsigned int b = (feb == 0) ? next++ : vertexfifo[(vertexfifooffset - feb) & 15];
+- unsigned int c = (fec == 0) ? next++ : vertexfifo[(vertexfifooffset - fec) & 15];
+-
+- // note that we need to update the last index since free indices are delta-encoded
+- if (fea == 15)
+- last = a = decodeIndex(data, last);
+-
+- if (feb == 15)
+- last = b = decodeIndex(data, last);
+-
+- if (fec == 15)
+- last = c = decodeIndex(data, last);
+-
+- // output triangle
+- writeTriangle(destination, i, index_size, a, b, c);
+-
+- // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly
+- pushVertexFifo(vertexfifo, a, vertexfifooffset);
+- pushVertexFifo(vertexfifo, b, vertexfifooffset, (feb == 0) | (feb == 15));
+- pushVertexFifo(vertexfifo, c, vertexfifooffset, (fec == 0) | (fec == 15));
+-
+- pushEdgeFifo(edgefifo, b, a, edgefifooffset);
+- pushEdgeFifo(edgefifo, c, b, edgefifooffset);
+- pushEdgeFifo(edgefifo, a, c, edgefifooffset);
+- }
+- }
+- }
+-
+- // we should've read all data bytes and stopped at the boundary between data and codeaux table
+- if (data != data_safe_end)
+- return -3;
+-
+- return 0;
+-}
+-
+-size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count)
+-{
+- using namespace meshopt;
+-
+- // the minimum valid encoding is header, 1 byte per index and a 4-byte tail
+- if (buffer_size < 1 + index_count + 4)
+- return 0;
+-
+- int version = gEncodeIndexVersion;
+-
+- buffer[0] = (unsigned char)(kSequenceHeader | version);
+-
+- unsigned int last[2] = {};
+- unsigned int current = 0;
+-
+- unsigned char* data = buffer + 1;
+- unsigned char* data_safe_end = buffer + buffer_size - 4;
+-
+- for (size_t i = 0; i < index_count; ++i)
+- {
+- // make sure we have enough data to write
+- // each index writes at most 5 bytes of data; there's a 4 byte tail after data_safe_end
+- // after this we can be sure we can write without extra bounds checks
+- if (data >= data_safe_end)
+- return 0;
+-
+- unsigned int index = indices[i];
+-
+- // this is a heuristic that switches between baselines when the delta grows too large
+- // we want the encoded delta to fit into one byte (7 bits), but 2 bits are used for sign and baseline index
+- // for now we immediately switch the baseline when delta grows too large - this can be adjusted arbitrarily
+- int cd = int(index - last[current]);
+- current ^= ((cd < 0 ? -cd : cd) >= 30);
+-
+- // encode delta from the last index
+- unsigned int d = index - last[current];
+- unsigned int v = (d << 1) ^ (int(d) >> 31);
+-
+- // note: low bit encodes the index of the last baseline which will be used for reconstruction
+- encodeVByte(data, (v << 1) | current);
+-
+- // update last for the next iteration that uses it
+- last[current] = index;
+- }
+-
+- // make sure we have enough space to write tail
+- if (data > data_safe_end)
+- return 0;
+-
+- for (int k = 0; k < 4; ++k)
+- *data++ = 0;
+-
+- return data - buffer;
+-}
+-
+-size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count)
+-{
+- // compute number of bits required for each index
+- unsigned int vertex_bits = 1;
+-
+- while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits)
+- vertex_bits++;
+-
+- // worst-case encoding is 1 varint-7 encoded index delta for a K bit value and an extra bit
+- unsigned int vertex_groups = (vertex_bits + 1 + 1 + 6) / 7;
+-
+- return 1 + index_count * vertex_groups + 4;
+-}
+-
+-int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size)
+-{
+- using namespace meshopt;
+-
+- // the minimum valid encoding is header, 1 byte per index and a 4-byte tail
+- if (buffer_size < 1 + index_count + 4)
+- return -2;
+-
+- if ((buffer[0] & 0xf0) != kSequenceHeader)
+- return -1;
+-
+- int version = buffer[0] & 0x0f;
+- if (version > 1)
+- return -1;
+-
+- const unsigned char* data = buffer + 1;
+- const unsigned char* data_safe_end = buffer + buffer_size - 4;
+-
+- unsigned int last[2] = {};
+-
+- for (size_t i = 0; i < index_count; ++i)
+- {
+- // make sure we have enough data to read
+- // each index reads at most 5 bytes of data; there's a 4 byte tail after data_safe_end
+- // after this we can be sure we can read without extra bounds checks
+- if (data >= data_safe_end)
+- return -2;
+-
+- unsigned int v = decodeVByte(data);
+-
+- // decode the index of the last baseline
+- unsigned int current = v & 1;
+- v >>= 1;
+-
+- // reconstruct index as a delta
+- unsigned int d = (v >> 1) ^ -int(v & 1);
+- unsigned int index = last[current] + d;
+-
+- // update last for the next iteration that uses it
+- last[current] = index;
+-
+- if (index_size == 2)
+- {
+- static_cast(destination)[i] = (unsigned short)(index);
+- }
+- else
+- {
+- static_cast(destination)[i] = index;
+- }
+- }
+-
+- // we should've read all data bytes and stopped at the boundary between data and tail
+- if (data != data_safe_end)
+- return -3;
+-
+- return 0;
+-}
+diff --git a/src/3rdparty/meshoptimizer/src/indexgenerator.cpp b/src/3rdparty/meshoptimizer/src/indexgenerator.cpp
+deleted file mode 100644
+index 9a25c21..0000000
+--- a/src/3rdparty/meshoptimizer/src/indexgenerator.cpp
++++ /dev/null
+@@ -1,551 +0,0 @@
+-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+-#include "meshoptimizer.h"
+-
+-#include
+-#include
+-
+-// This work is based on:
+-// John McDonald, Mark Kilgard. Crack-Free Point-Normal Triangles using Adjacent Edge Normals. 2010
+-namespace meshopt
+-{
+-
+-static unsigned int hashUpdate4(unsigned int h, const unsigned char* key, size_t len)
+-{
+- // MurmurHash2
+- const unsigned int m = 0x5bd1e995;
+- const int r = 24;
+-
+- while (len >= 4)
+- {
+- unsigned int k = *reinterpret_cast(key);
+-
+- k *= m;
+- k ^= k >> r;
+- k *= m;
+-
+- h *= m;
+- h ^= k;
+-
+- key += 4;
+- len -= 4;
+- }
+-
+- return h;
+-}
+-
+-struct VertexHasher
+-{
+- const unsigned char* vertices;
+- size_t vertex_size;
+- size_t vertex_stride;
+-
+- size_t hash(unsigned int index) const
+- {
+- return hashUpdate4(0, vertices + index * vertex_stride, vertex_size);
+- }
+-
+- bool equal(unsigned int lhs, unsigned int rhs) const
+- {
+- return memcmp(vertices + lhs * vertex_stride, vertices + rhs * vertex_stride, vertex_size) == 0;
+- }
+-};
+-
+-struct VertexStreamHasher
+-{
+- const meshopt_Stream* streams;
+- size_t stream_count;
+-
+- size_t hash(unsigned int index) const
+- {
+- unsigned int h = 0;
+-
+- for (size_t i = 0; i < stream_count; ++i)
+- {
+- const meshopt_Stream& s = streams[i];
+- const unsigned char* data = static_cast(s.data);
+-
+- h = hashUpdate4(h, data + index * s.stride, s.size);
+- }
+-
+- return h;
+- }
+-
+- bool equal(unsigned int lhs, unsigned int rhs) const
+- {
+- for (size_t i = 0; i < stream_count; ++i)
+- {
+- const meshopt_Stream& s = streams[i];
+- const unsigned char* data = static_cast(s.data);
+-
+- if (memcmp(data + lhs * s.stride, data + rhs * s.stride, s.size) != 0)
+- return false;
+- }
+-
+- return true;
+- }
+-};
+-
+-struct EdgeHasher
+-{
+- const unsigned int* remap;
+-
+- size_t hash(unsigned long long edge) const
+- {
+- unsigned int e0 = unsigned(edge >> 32);
+- unsigned int e1 = unsigned(edge);
+-
+- unsigned int h1 = remap[e0];
+- unsigned int h2 = remap[e1];
+-
+- const unsigned int m = 0x5bd1e995;
+-
+- // MurmurHash64B finalizer
+- h1 ^= h2 >> 18;
+- h1 *= m;
+- h2 ^= h1 >> 22;
+- h2 *= m;
+- h1 ^= h2 >> 17;
+- h1 *= m;
+- h2 ^= h1 >> 19;
+- h2 *= m;
+-
+- return h2;
+- }
+-
+- bool equal(unsigned long long lhs, unsigned long long rhs) const
+- {
+- unsigned int l0 = unsigned(lhs >> 32);
+- unsigned int l1 = unsigned(lhs);
+-
+- unsigned int r0 = unsigned(rhs >> 32);
+- unsigned int r1 = unsigned(rhs);
+-
+- return remap[l0] == remap[r0] && remap[l1] == remap[r1];
+- }
+-};
+-
+-static size_t hashBuckets(size_t count)
+-{
+- size_t buckets = 1;
+- while (buckets < count + count / 4)
+- buckets *= 2;
+-
+- return buckets;
+-}
+-
+-template
+-static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, const T& empty)
+-{
+- assert(buckets > 0);
+- assert((buckets & (buckets - 1)) == 0);
+-
+- size_t hashmod = buckets - 1;
+- size_t bucket = hash.hash(key) & hashmod;
+-
+- for (size_t probe = 0; probe <= hashmod; ++probe)
+- {
+- T& item = table[bucket];
+-
+- if (item == empty)
+- return &item;
+-
+- if (hash.equal(item, key))
+- return &item;
+-
+- // hash collision, quadratic probing
+- bucket = (bucket + probe + 1) & hashmod;
+- }
+-
+- assert(false && "Hash table is full"); // unreachable
+- return 0;
+-}
+-
+-static void buildPositionRemap(unsigned int* remap, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator)
+-{
+- VertexHasher vertex_hasher = {reinterpret_cast(vertex_positions), 3 * sizeof(float), vertex_positions_stride};
+-
+- size_t vertex_table_size = hashBuckets(vertex_count);
+- unsigned int* vertex_table = allocator.allocate(vertex_table_size);
+- memset(vertex_table, -1, vertex_table_size * sizeof(unsigned int));
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- unsigned int index = unsigned(i);
+- unsigned int* entry = hashLookup(vertex_table, vertex_table_size, vertex_hasher, index, ~0u);
+-
+- if (*entry == ~0u)
+- *entry = index;
+-
+- remap[index] = *entry;
+- }
+-}
+-
+-} // namespace meshopt
+-
+-size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
+-{
+- using namespace meshopt;
+-
+- assert(indices || index_count == vertex_count);
+- assert(!indices || index_count % 3 == 0);
+- assert(vertex_size > 0 && vertex_size <= 256);
+-
+- meshopt_Allocator allocator;
+-
+- memset(destination, -1, vertex_count * sizeof(unsigned int));
+-
+- VertexHasher hasher = {static_cast(vertices), vertex_size, vertex_size};
+-
+- size_t table_size = hashBuckets(vertex_count);
+- unsigned int* table = allocator.allocate(table_size);
+- memset(table, -1, table_size * sizeof(unsigned int));
+-
+- unsigned int next_vertex = 0;
+-
+- for (size_t i = 0; i < index_count; ++i)
+- {
+- unsigned int index = indices ? indices[i] : unsigned(i);
+- assert(index < vertex_count);
+-
+- if (destination[index] == ~0u)
+- {
+- unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
+-
+- if (*entry == ~0u)
+- {
+- *entry = index;
+-
+- destination[index] = next_vertex++;
+- }
+- else
+- {
+- assert(destination[*entry] != ~0u);
+-
+- destination[index] = destination[*entry];
+- }
+- }
+- }
+-
+- assert(next_vertex <= vertex_count);
+-
+- return next_vertex;
+-}
+-
+-size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count)
+-{
+- using namespace meshopt;
+-
+- assert(indices || index_count == vertex_count);
+- assert(index_count % 3 == 0);
+- assert(stream_count > 0 && stream_count <= 16);
+-
+- for (size_t i = 0; i < stream_count; ++i)
+- {
+- assert(streams[i].size > 0 && streams[i].size <= 256);
+- assert(streams[i].size <= streams[i].stride);
+- }
+-
+- meshopt_Allocator allocator;
+-
+- memset(destination, -1, vertex_count * sizeof(unsigned int));
+-
+- VertexStreamHasher hasher = {streams, stream_count};
+-
+- size_t table_size = hashBuckets(vertex_count);
+- unsigned int* table = allocator.allocate(table_size);
+- memset(table, -1, table_size * sizeof(unsigned int));
+-
+- unsigned int next_vertex = 0;
+-
+- for (size_t i = 0; i < index_count; ++i)
+- {
+- unsigned int index = indices ? indices[i] : unsigned(i);
+- assert(index < vertex_count);
+-
+- if (destination[index] == ~0u)
+- {
+- unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
+-
+- if (*entry == ~0u)
+- {
+- *entry = index;
+-
+- destination[index] = next_vertex++;
+- }
+- else
+- {
+- assert(destination[*entry] != ~0u);
+-
+- destination[index] = destination[*entry];
+- }
+- }
+- }
+-
+- assert(next_vertex <= vertex_count);
+-
+- return next_vertex;
+-}
+-
+-void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap)
+-{
+- assert(vertex_size > 0 && vertex_size <= 256);
+-
+- meshopt_Allocator allocator;
+-
+- // support in-place remap
+- if (destination == vertices)
+- {
+- unsigned char* vertices_copy = allocator.allocate(vertex_count * vertex_size);
+- memcpy(vertices_copy, vertices, vertex_count * vertex_size);
+- vertices = vertices_copy;
+- }
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- if (remap[i] != ~0u)
+- {
+- assert(remap[i] < vertex_count);
+-
+- memcpy(static_cast(destination) + remap[i] * vertex_size, static_cast(vertices) + i * vertex_size, vertex_size);
+- }
+- }
+-}
+-
+-void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap)
+-{
+- assert(index_count % 3 == 0);
+-
+- for (size_t i = 0; i < index_count; ++i)
+- {
+- unsigned int index = indices ? indices[i] : unsigned(i);
+- assert(remap[index] != ~0u);
+-
+- destination[i] = remap[index];
+- }
+-}
+-
+-void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride)
+-{
+- using namespace meshopt;
+-
+- assert(indices);
+- assert(index_count % 3 == 0);
+- assert(vertex_size > 0 && vertex_size <= 256);
+- assert(vertex_size <= vertex_stride);
+-
+- meshopt_Allocator allocator;
+-
+- unsigned int* remap = allocator.allocate(vertex_count);
+- memset(remap, -1, vertex_count * sizeof(unsigned int));
+-
+- VertexHasher hasher = {static_cast(vertices), vertex_size, vertex_stride};
+-
+- size_t table_size = hashBuckets(vertex_count);
+- unsigned int* table = allocator.allocate(table_size);
+- memset(table, -1, table_size * sizeof(unsigned int));
+-
+- for (size_t i = 0; i < index_count; ++i)
+- {
+- unsigned int index = indices[i];
+- assert(index < vertex_count);
+-
+- if (remap[index] == ~0u)
+- {
+- unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
+-
+- if (*entry == ~0u)
+- *entry = index;
+-
+- remap[index] = *entry;
+- }
+-
+- destination[i] = remap[index];
+- }
+-}
+-
+-void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count)
+-{
+- using namespace meshopt;
+-
+- assert(indices);
+- assert(index_count % 3 == 0);
+- assert(stream_count > 0 && stream_count <= 16);
+-
+- for (size_t i = 0; i < stream_count; ++i)
+- {
+- assert(streams[i].size > 0 && streams[i].size <= 256);
+- assert(streams[i].size <= streams[i].stride);
+- }
+-
+- meshopt_Allocator allocator;
+-
+- unsigned int* remap = allocator.allocate(vertex_count);
+- memset(remap, -1, vertex_count * sizeof(unsigned int));
+-
+- VertexStreamHasher hasher = {streams, stream_count};
+-
+- size_t table_size = hashBuckets(vertex_count);
+- unsigned int* table = allocator.allocate(table_size);
+- memset(table, -1, table_size * sizeof(unsigned int));
+-
+- for (size_t i = 0; i < index_count; ++i)
+- {
+- unsigned int index = indices[i];
+- assert(index < vertex_count);
+-
+- if (remap[index] == ~0u)
+- {
+- unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u);
+-
+- if (*entry == ~0u)
+- *entry = index;
+-
+- remap[index] = *entry;
+- }
+-
+- destination[i] = remap[index];
+- }
+-}
+-
+-void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- using namespace meshopt;
+-
+- assert(index_count % 3 == 0);
+- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+- assert(vertex_positions_stride % sizeof(float) == 0);
+-
+- meshopt_Allocator allocator;
+-
+- static const int next[4] = {1, 2, 0, 1};
+-
+- // build position remap: for each vertex, which other (canonical) vertex does it map to?
+- unsigned int* remap = allocator.allocate(vertex_count);
+- buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
+-
+- // build edge set; this stores all triangle edges but we can look these up by any other wedge
+- EdgeHasher edge_hasher = {remap};
+-
+- size_t edge_table_size = hashBuckets(index_count);
+- unsigned long long* edge_table = allocator.allocate(edge_table_size);
+- unsigned int* edge_vertex_table = allocator.allocate(edge_table_size);
+-
+- memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
+- memset(edge_vertex_table, -1, edge_table_size * sizeof(unsigned int));
+-
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- for (int e = 0; e < 3; ++e)
+- {
+- unsigned int i0 = indices[i + e];
+- unsigned int i1 = indices[i + next[e]];
+- unsigned int i2 = indices[i + next[e + 1]];
+- assert(i0 < vertex_count && i1 < vertex_count && i2 < vertex_count);
+-
+- unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
+- unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+-
+- if (*entry == ~0ull)
+- {
+- *entry = edge;
+-
+- // store vertex opposite to the edge
+- edge_vertex_table[entry - edge_table] = i2;
+- }
+- }
+- }
+-
+- // build resulting index buffer: 6 indices for each input triangle
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- unsigned int patch[6];
+-
+- for (int e = 0; e < 3; ++e)
+- {
+- unsigned int i0 = indices[i + e];
+- unsigned int i1 = indices[i + next[e]];
+- assert(i0 < vertex_count && i1 < vertex_count);
+-
+- // note: this refers to the opposite edge!
+- unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
+- unsigned long long* oppe = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+-
+- patch[e * 2 + 0] = i0;
+- patch[e * 2 + 1] = (*oppe == ~0ull) ? i0 : edge_vertex_table[oppe - edge_table];
+- }
+-
+- memcpy(destination + i * 2, patch, sizeof(patch));
+- }
+-}
+-
+-void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- using namespace meshopt;
+-
+- assert(index_count % 3 == 0);
+- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+- assert(vertex_positions_stride % sizeof(float) == 0);
+-
+- meshopt_Allocator allocator;
+-
+- static const int next[3] = {1, 2, 0};
+-
+- // build position remap: for each vertex, which other (canonical) vertex does it map to?
+- unsigned int* remap = allocator.allocate(vertex_count);
+- buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
+-
+- // build edge set; this stores all triangle edges but we can look these up by any other wedge
+- EdgeHasher edge_hasher = {remap};
+-
+- size_t edge_table_size = hashBuckets(index_count);
+- unsigned long long* edge_table = allocator.allocate(edge_table_size);
+- memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
+-
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- for (int e = 0; e < 3; ++e)
+- {
+- unsigned int i0 = indices[i + e];
+- unsigned int i1 = indices[i + next[e]];
+- assert(i0 < vertex_count && i1 < vertex_count);
+-
+- unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
+- unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+-
+- if (*entry == ~0ull)
+- *entry = edge;
+- }
+- }
+-
+- // build resulting index buffer: 12 indices for each input triangle
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- unsigned int patch[12];
+-
+- for (int e = 0; e < 3; ++e)
+- {
+- unsigned int i0 = indices[i + e];
+- unsigned int i1 = indices[i + next[e]];
+- assert(i0 < vertex_count && i1 < vertex_count);
+-
+- // note: this refers to the opposite edge!
+- unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
+- unsigned long long oppe = *hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+-
+- // use the same edge if opposite edge doesn't exist (border)
+- oppe = (oppe == ~0ull) ? edge : oppe;
+-
+- // triangle index (0, 1, 2)
+- patch[e] = i0;
+-
+- // opposite edge (3, 4; 5, 6; 7, 8)
+- patch[3 + e * 2 + 0] = unsigned(oppe);
+- patch[3 + e * 2 + 1] = unsigned(oppe >> 32);
+-
+- // dominant vertex (9, 10, 11)
+- patch[9 + e] = remap[i0];
+- }
+-
+- memcpy(destination + i * 4, patch, sizeof(patch));
+- }
+-}
+diff --git a/src/3rdparty/meshoptimizer/src/meshoptimizer.h b/src/3rdparty/meshoptimizer/src/meshoptimizer.h
+deleted file mode 100644
+index f94dbaf..0000000
+--- a/src/3rdparty/meshoptimizer/src/meshoptimizer.h
++++ /dev/null
+@@ -1,1069 +0,0 @@
+-/**
+- * meshoptimizer - version 0.18
+- *
+- * Copyright (C) 2016-2022, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+- * Report bugs and download new versions at https://github.com/zeux/meshoptimizer
+- *
+- * This library is distributed under the MIT License. See notice at the end of this file.
+- */
+-#pragma once
+-
+-#include
+-#include
+-
+-/* Version macro; major * 1000 + minor * 10 + patch */
+-#define MESHOPTIMIZER_VERSION 180 /* 0.18 */
+-
+-/* If no API is defined, assume default */
+-#ifndef MESHOPTIMIZER_API
+-#define MESHOPTIMIZER_API
+-#endif
+-
+-/* Set the calling-convention for alloc/dealloc function pointers */
+-#ifndef MESHOPTIMIZER_ALLOC_CALLCONV
+-#ifdef _MSC_VER
+-#define MESHOPTIMIZER_ALLOC_CALLCONV __cdecl
+-#else
+-#define MESHOPTIMIZER_ALLOC_CALLCONV
+-#endif
+-#endif
+-
+-/* Experimental APIs have unstable interface and might have implementation that's not fully tested or optimized */
+-#define MESHOPTIMIZER_EXPERIMENTAL MESHOPTIMIZER_API
+-
+-/* C interface */
+-#ifdef __cplusplus
+-extern "C" {
+-#endif
+-
+-/**
+- * Vertex attribute stream, similar to glVertexPointer
+- * Each element takes size bytes, with stride controlling the spacing between successive elements.
+- */
+-struct meshopt_Stream
+-{
+- const void* data;
+- size_t size;
+- size_t stride;
+-};
+-
+-/**
+- * Generates a vertex remap table from the vertex buffer and an optional index buffer and returns number of unique vertices
+- * As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence.
+- * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer.
+- * Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized.
+- *
+- * destination must contain enough space for the resulting remap table (vertex_count elements)
+- * indices can be NULL if the input is unindexed
+- */
+-MESHOPTIMIZER_API size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size);
+-
+-/**
+- * Generates a vertex remap table from multiple vertex streams and an optional index buffer and returns number of unique vertices
+- * As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence.
+- * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer.
+- * To remap vertex buffers, you will need to call meshopt_remapVertexBuffer for each vertex stream.
+- * Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized.
+- *
+- * destination must contain enough space for the resulting remap table (vertex_count elements)
+- * indices can be NULL if the input is unindexed
+- */
+-MESHOPTIMIZER_API size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count);
+-
+-/**
+- * Generates vertex buffer from the source vertex buffer and remap table generated by meshopt_generateVertexRemap
+- *
+- * destination must contain enough space for the resulting vertex buffer (unique_vertex_count elements, returned by meshopt_generateVertexRemap)
+- * vertex_count should be the initial vertex count and not the value returned by meshopt_generateVertexRemap
+- */
+-MESHOPTIMIZER_API void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap);
+-
+-/**
+- * Generate index buffer from the source index buffer and remap table generated by meshopt_generateVertexRemap
+- *
+- * destination must contain enough space for the resulting index buffer (index_count elements)
+- * indices can be NULL if the input is unindexed
+- */
+-MESHOPTIMIZER_API void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap);
+-
+-/**
+- * Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary
+- * All vertices that are binary equivalent (wrt first vertex_size bytes) map to the first vertex in the original vertex buffer.
+- * This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering.
+- * Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized.
+- *
+- * destination must contain enough space for the resulting index buffer (index_count elements)
+- */
+-MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride);
+-
+-/**
+- * Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary
+- * All vertices that are binary equivalent (wrt specified streams) map to the first vertex in the original vertex buffer.
+- * This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering.
+- * Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized.
+- *
+- * destination must contain enough space for the resulting index buffer (index_count elements)
+- */
+-MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count);
+-
+-/**
+- * Generate index buffer that can be used as a geometry shader input with triangle adjacency topology
+- * Each triangle is converted into a 6-vertex patch with the following layout:
+- * - 0, 2, 4: original triangle vertices
+- * - 1, 3, 5: vertices adjacent to edges 02, 24 and 40
+- * The resulting patch can be rendered with geometry shaders using e.g. VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY.
+- * This can be used to implement algorithms like silhouette detection/expansion and other forms of GS-driven rendering.
+- *
+- * destination must contain enough space for the resulting index buffer (index_count*2 elements)
+- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+- */
+-MESHOPTIMIZER_API void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+-
+-/**
+- * Generate index buffer that can be used for PN-AEN tessellation with crack-free displacement
+- * Each triangle is converted into a 12-vertex patch with the following layout:
+- * - 0, 1, 2: original triangle vertices
+- * - 3, 4: opposing edge for edge 0, 1
+- * - 5, 6: opposing edge for edge 1, 2
+- * - 7, 8: opposing edge for edge 2, 0
+- * - 9, 10, 11: dominant vertices for corners 0, 1, 2
+- * The resulting patch can be rendered with hardware tessellation using PN-AEN and displacement mapping.
+- * See "Tessellation on Any Budget" (John McDonald, GDC 2011) for implementation details.
+- *
+- * destination must contain enough space for the resulting index buffer (index_count*4 elements)
+- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+- */
+-MESHOPTIMIZER_API void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+-
+-/**
+- * Vertex transform cache optimizer
+- * Reorders indices to reduce the number of GPU vertex shader invocations
+- * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually.
+- *
+- * destination must contain enough space for the resulting index buffer (index_count elements)
+- */
+-MESHOPTIMIZER_API void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count);
+-
+-/**
+- * Vertex transform cache optimizer for strip-like caches
+- * Produces inferior results to meshopt_optimizeVertexCache from the GPU vertex cache perspective
+- * However, the resulting index order is more optimal if the goal is to reduce the triangle strip length or improve compression efficiency
+- *
+- * destination must contain enough space for the resulting index buffer (index_count elements)
+- */
+-MESHOPTIMIZER_API void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count);
+-
+-/**
+- * Vertex transform cache optimizer for FIFO caches
+- * Reorders indices to reduce the number of GPU vertex shader invocations
+- * Generally takes ~3x less time to optimize meshes but produces inferior results compared to meshopt_optimizeVertexCache
+- * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually.
+- *
+- * destination must contain enough space for the resulting index buffer (index_count elements)
+- * cache_size should be less than the actual GPU cache size to avoid cache thrashing
+- */
+-MESHOPTIMIZER_API void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size);
+-
+-/**
+- * Overdraw optimizer
+- * Reorders indices to reduce the number of GPU vertex shader invocations and the pixel overdraw
+- * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually.
+- *
+- * destination must contain enough space for the resulting index buffer (index_count elements)
+- * indices must contain index data that is the result of meshopt_optimizeVertexCache (*not* the original mesh indices!)
+- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+- * threshold indicates how much the overdraw optimizer can degrade vertex cache efficiency (1.05 = up to 5%) to reduce overdraw more efficiently
+- */
+-MESHOPTIMIZER_API void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold);
+-
+-/**
+- * Vertex fetch cache optimizer
+- * Reorders vertices and changes indices to reduce the amount of GPU memory fetches during vertex processing
+- * Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused
+- * This functions works for a single vertex stream; for multiple vertex streams, use meshopt_optimizeVertexFetchRemap + meshopt_remapVertexBuffer for each stream.
+- *
+- * destination must contain enough space for the resulting vertex buffer (vertex_count elements)
+- * indices is used both as an input and as an output index buffer
+- */
+-MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size);
+-
+-/**
+- * Vertex fetch cache optimizer
+- * Generates vertex remap to reduce the amount of GPU memory fetches during vertex processing
+- * Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused
+- * The resulting remap table should be used to reorder vertex/index buffers using meshopt_remapVertexBuffer/meshopt_remapIndexBuffer
+- *
+- * destination must contain enough space for the resulting remap table (vertex_count elements)
+- */
+-MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count);
+-
+-/**
+- * Index buffer encoder
+- * Encodes index data into an array of bytes that is generally much smaller (<1.5 bytes/triangle) and compresses better (<1 bytes/triangle) compared to original.
+- * Input index buffer must represent a triangle list.
+- * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space
+- * For maximum efficiency the index buffer being encoded has to be optimized for vertex cache and vertex fetch first.
+- *
+- * buffer must contain enough space for the encoded index buffer (use meshopt_encodeIndexBufferBound to compute worst case size)
+- */
+-MESHOPTIMIZER_API size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count);
+-MESHOPTIMIZER_API size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count);
+-
+-/**
+- * Set index encoder format version
+- * version must specify the data format version to encode; valid values are 0 (decodable by all library versions) and 1 (decodable by 0.14+)
+- */
+-MESHOPTIMIZER_API void meshopt_encodeIndexVersion(int version);
+-
+-/**
+- * Index buffer decoder
+- * Decodes index data from an array of bytes generated by meshopt_encodeIndexBuffer
+- * Returns 0 if decoding was successful, and an error code otherwise
+- * The decoder is safe to use for untrusted input, but it may produce garbage data (e.g. out of range indices).
+- *
+- * destination must contain enough space for the resulting index buffer (index_count elements)
+- */
+-MESHOPTIMIZER_API int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size);
+-
+-/**
+- * Index sequence encoder
+- * Encodes index sequence into an array of bytes that is generally smaller and compresses better compared to original.
+- * Input index sequence can represent arbitrary topology; for triangle lists meshopt_encodeIndexBuffer is likely to be better.
+- * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space
+- *
+- * buffer must contain enough space for the encoded index sequence (use meshopt_encodeIndexSequenceBound to compute worst case size)
+- */
+-MESHOPTIMIZER_API size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count);
+-MESHOPTIMIZER_API size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count);
+-
+-/**
+- * Index sequence decoder
+- * Decodes index data from an array of bytes generated by meshopt_encodeIndexSequence
+- * Returns 0 if decoding was successful, and an error code otherwise
+- * The decoder is safe to use for untrusted input, but it may produce garbage data (e.g. out of range indices).
+- *
+- * destination must contain enough space for the resulting index sequence (index_count elements)
+- */
+-MESHOPTIMIZER_API int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size);
+-
+-/**
+- * Vertex buffer encoder
+- * Encodes vertex data into an array of bytes that is generally smaller and compresses better compared to original.
+- * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space
+- * This function works for a single vertex stream; for multiple vertex streams, call meshopt_encodeVertexBuffer for each stream.
+- * Note that all vertex_size bytes of each vertex are encoded verbatim, including padding which should be zero-initialized.
+- *
+- * buffer must contain enough space for the encoded vertex buffer (use meshopt_encodeVertexBufferBound to compute worst case size)
+- */
+-MESHOPTIMIZER_API size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size);
+-MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size);
+-
+-/**
+- * Set vertex encoder format version
+- * version must specify the data format version to encode; valid values are 0 (decodable by all library versions)
+- */
+-MESHOPTIMIZER_API void meshopt_encodeVertexVersion(int version);
+-
+-/**
+- * Vertex buffer decoder
+- * Decodes vertex data from an array of bytes generated by meshopt_encodeVertexBuffer
+- * Returns 0 if decoding was successful, and an error code otherwise
+- * The decoder is safe to use for untrusted input, but it may produce garbage data.
+- *
+- * destination must contain enough space for the resulting vertex buffer (vertex_count * vertex_size bytes)
+- */
+-MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size);
+-
+-/**
+- * Vertex buffer filters
+- * These functions can be used to filter output of meshopt_decodeVertexBuffer in-place.
+- *
+- * meshopt_decodeFilterOct decodes octahedral encoding of a unit vector with K-bit (K <= 16) signed X/Y as an input; Z must store 1.0f.
+- * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is.
+- *
+- * meshopt_decodeFilterQuat decodes 3-component quaternion encoding with K-bit (4 <= K <= 16) component encoding and a 2-bit component index indicating which component to reconstruct.
+- * Each component is stored as an 16-bit integer; stride must be equal to 8.
+- *
+- * meshopt_decodeFilterExp decodes exponential encoding of floating-point data with 8-bit exponent and 24-bit integer mantissa as 2^E*M.
+- * Each 32-bit component is decoded in isolation; stride must be divisible by 4.
+- */
+-MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride);
+-MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t count, size_t stride);
+-MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t count, size_t stride);
+-
+-/**
+- * Vertex buffer filter encoders
+- * These functions can be used to encode data in a format that meshopt_decodeFilter can decode
+- *
+- * meshopt_encodeFilterOct encodes unit vectors with K-bit (K <= 16) signed X/Y as an output.
+- * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is.
+- * Input data must contain 4 floats for every vector (count*4 total).
+- *
+- * meshopt_encodeFilterQuat encodes unit quaternions with K-bit (4 <= K <= 16) component encoding.
+- * Each component is stored as an 16-bit integer; stride must be equal to 8.
+- * Input data must contain 4 floats for every quaternion (count*4 total).
+- *
+- * meshopt_encodeFilterExp encodes arbitrary (finite) floating-point data with 8-bit exponent and K-bit integer mantissa (1 <= K <= 24).
+- * Mantissa is shared between all components of a given vector as defined by stride; stride must be divisible by 4.
+- * Input data must contain stride/4 floats for every vector (count*stride/4 total).
+- * When individual (scalar) encoding is desired, simply pass stride=4 and adjust count accordingly.
+- */
+-MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data);
+-MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterQuat(void* destination, size_t count, size_t stride, int bits, const float* data);
+-MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterExp(void* destination, size_t count, size_t stride, int bits, const float* data);
+-
+-/**
+- * Simplification options
+- */
+-enum
+-{
+- /* Do not move vertices that are located on the topological border (vertices on triangle edges that don't have a paired triangle). Useful for simplifying portions of the larger mesh. */
+- meshopt_SimplifyLockBorder = 1 << 0,
+-};
+-
+-/**
+- * Mesh simplifier
+- * Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible
+- * The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error.
+- * If not all attributes from the input mesh are required, it's recommended to reindex the mesh using meshopt_generateShadowIndexBuffer prior to simplification.
+- * Returns the number of indices after simplification, with destination containing new index data
+- * The resulting index buffer references vertices from the original vertex buffer.
+- * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
+- *
+- * destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)!
+- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+- * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation
+- * options must be a bitmask composed of meshopt_SimplifyX options; 0 is a safe default
+- * result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification
+- */
+-MESHOPTIMIZER_API size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error);
+-
+-/**
+- * Experimental: Mesh simplifier (sloppy)
+- * Reduces the number of triangles in the mesh, sacrificing mesh appearance for simplification performance
+- * The algorithm doesn't preserve mesh topology but can stop short of the target goal based on target error.
+- * Returns the number of indices after simplification, with destination containing new index data
+- * The resulting index buffer references vertices from the original vertex buffer.
+- * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
+- *
+- * destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)!
+- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+- * target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation
+- * result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification
+- */
+-MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error);
+-
+-/**
+- * Experimental: Point cloud simplifier
+- * Reduces the number of points in the cloud to reach the given target
+- * Returns the number of points after simplification, with destination containing new index data
+- * The resulting index buffer references vertices from the original vertex buffer.
+- * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
+- *
+- * destination must contain enough space for the target index buffer (target_vertex_count elements)
+- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+- */
+-MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count);
+-
+-/**
+- * Returns the error scaling factor used by the simplifier to convert between absolute and relative extents
+- *
+- * Absolute error must be *divided* by the scaling factor before passing it to meshopt_simplify as target_error
+- * Relative error returned by meshopt_simplify via result_error must be *multiplied* by the scaling factor to get absolute error.
+- */
+-MESHOPTIMIZER_API float meshopt_simplifyScale(const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+-
+-/**
+- * Mesh stripifier
+- * Converts a previously vertex cache optimized triangle list to triangle strip, stitching strips using restart index or degenerate triangles
+- * Returns the number of indices in the resulting strip, with destination containing new index data
+- * For maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
+- * Using restart indices can result in ~10% smaller index buffers, but on some GPUs restart indices may result in decreased performance.
+- *
+- * destination must contain enough space for the target index buffer, worst case can be computed with meshopt_stripifyBound
+- * restart_index should be 0xffff or 0xffffffff depending on index size, or 0 to use degenerate triangles
+- */
+-MESHOPTIMIZER_API size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index);
+-MESHOPTIMIZER_API size_t meshopt_stripifyBound(size_t index_count);
+-
+-/**
+- * Mesh unstripifier
+- * Converts a triangle strip to a triangle list
+- * Returns the number of indices in the resulting list, with destination containing new index data
+- *
+- * destination must contain enough space for the target index buffer, worst case can be computed with meshopt_unstripifyBound
+- */
+-MESHOPTIMIZER_API size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index);
+-MESHOPTIMIZER_API size_t meshopt_unstripifyBound(size_t index_count);
+-
+-struct meshopt_VertexCacheStatistics
+-{
+- unsigned int vertices_transformed;
+- unsigned int warps_executed;
+- float acmr; /* transformed vertices / triangle count; best case 0.5, worst case 3.0, optimum depends on topology */
+- float atvr; /* transformed vertices / vertex count; best case 1.0, worst case 6.0, optimum is 1.0 (each vertex is transformed once) */
+-};
+-
+-/**
+- * Vertex transform cache analyzer
+- * Returns cache hit statistics using a simplified FIFO model
+- * Results may not match actual GPU performance
+- */
+-MESHOPTIMIZER_API struct meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size);
+-
+-struct meshopt_OverdrawStatistics
+-{
+- unsigned int pixels_covered;
+- unsigned int pixels_shaded;
+- float overdraw; /* shaded pixels / covered pixels; best case 1.0 */
+-};
+-
+-/**
+- * Overdraw analyzer
+- * Returns overdraw statistics using a software rasterizer
+- * Results may not match actual GPU performance
+- *
+- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+- */
+-MESHOPTIMIZER_API struct meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+-
+-struct meshopt_VertexFetchStatistics
+-{
+- unsigned int bytes_fetched;
+- float overfetch; /* fetched bytes / vertex buffer size; best case 1.0 (each byte is fetched once) */
+-};
+-
+-/**
+- * Vertex fetch cache analyzer
+- * Returns cache hit statistics using a simplified direct mapped model
+- * Results may not match actual GPU performance
+- */
+-MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size);
+-
+-struct meshopt_Meshlet
+-{
+- /* offsets within meshlet_vertices and meshlet_triangles arrays with meshlet data */
+- unsigned int vertex_offset;
+- unsigned int triangle_offset;
+-
+- /* number of vertices and triangles used in the meshlet; data is stored in consecutive range defined by offset and count */
+- unsigned int vertex_count;
+- unsigned int triangle_count;
+-};
+-
+-/**
+- * Meshlet builder
+- * Splits the mesh into a set of meshlets where each meshlet has a micro index buffer indexing into meshlet vertices that refer to the original vertex buffer
+- * The resulting data can be used to render meshes using NVidia programmable mesh shading pipeline, or in other cluster-based renderers.
+- * When using buildMeshlets, vertex positions need to be provided to minimize the size of the resulting clusters.
+- * When using buildMeshletsScan, for maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
+- *
+- * meshlets must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound
+- * meshlet_vertices must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_vertices
+- * meshlet_triangles must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_triangles * 3
+- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+- * max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 255 - not 256!, max_triangles <= 512)
+- * cone_weight should be set to 0 when cone culling is not used, and a value between 0 and 1 otherwise to balance between cluster size and cone culling efficiency
+- */
+-MESHOPTIMIZER_API size_t meshopt_buildMeshlets(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight);
+-MESHOPTIMIZER_API size_t meshopt_buildMeshletsScan(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
+-MESHOPTIMIZER_API size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles);
+-
+-struct meshopt_Bounds
+-{
+- /* bounding sphere, useful for frustum and occlusion culling */
+- float center[3];
+- float radius;
+-
+- /* normal cone, useful for backface culling */
+- float cone_apex[3];
+- float cone_axis[3];
+- float cone_cutoff; /* = cos(angle/2) */
+-
+- /* normal cone axis and cutoff, stored in 8-bit SNORM format; decode using x/127.0 */
+- signed char cone_axis_s8[3];
+- signed char cone_cutoff_s8;
+-};
+-
+-/**
+- * Cluster bounds generator
+- * Creates bounding volumes that can be used for frustum, backface and occlusion culling.
+- *
+- * For backface culling with orthographic projection, use the following formula to reject backfacing clusters:
+- * dot(view, cone_axis) >= cone_cutoff
+- *
+- * For perspective projection, you can the formula that needs cone apex in addition to axis & cutoff:
+- * dot(normalize(cone_apex - camera_position), cone_axis) >= cone_cutoff
+- *
+- * Alternatively, you can use the formula that doesn't need cone apex and uses bounding sphere instead:
+- * dot(normalize(center - camera_position), cone_axis) >= cone_cutoff + radius / length(center - camera_position)
+- * or an equivalent formula that doesn't have a singularity at center = camera_position:
+- * dot(center - camera_position, cone_axis) >= cone_cutoff * length(center - camera_position) + radius
+- *
+- * The formula that uses the apex is slightly more accurate but needs the apex; if you are already using bounding sphere
+- * to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable.
+- *
+- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+- * index_count/3 should be less than or equal to 512 (the function assumes clusters of limited size)
+- */
+-MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+-MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+-
+-/**
+- * Experimental: Spatial sorter
+- * Generates a remap table that can be used to reorder points for spatial locality.
+- * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer.
+- *
+- * destination must contain enough space for the resulting remap table (vertex_count elements)
+- */
+-MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+-
+-/**
+- * Experimental: Spatial sorter
+- * Reorders triangles for spatial locality, and generates a new index buffer. The resulting index buffer can be used with other functions like optimizeVertexCache.
+- *
+- * destination must contain enough space for the resulting index buffer (index_count elements)
+- * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+- */
+-MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+-
+-/**
+- * Set allocation callbacks
+- * These callbacks will be used instead of the default operator new/operator delete for all temporary allocations in the library.
+- * Note that all algorithms only allocate memory for temporary use.
+- * allocate/deallocate are always called in a stack-like order - last pointer to be allocated is deallocated first.
+- */
+-MESHOPTIMIZER_API void meshopt_setAllocator(void* (MESHOPTIMIZER_ALLOC_CALLCONV *allocate)(size_t), void (MESHOPTIMIZER_ALLOC_CALLCONV *deallocate)(void*));
+-
+-#ifdef __cplusplus
+-} /* extern "C" */
+-#endif
+-
+-/* Quantization into commonly supported data formats */
+-#ifdef __cplusplus
+-/**
+- * Quantize a float in [0..1] range into an N-bit fixed point unorm value
+- * Assumes reconstruction function (q / (2^N-1)), which is the case for fixed-function normalized fixed point conversion
+- * Maximum reconstruction error: 1/2^(N+1)
+- */
+-inline int meshopt_quantizeUnorm(float v, int N);
+-
+-/**
+- * Quantize a float in [-1..1] range into an N-bit fixed point snorm value
+- * Assumes reconstruction function (q / (2^(N-1)-1)), which is the case for fixed-function normalized fixed point conversion (except early OpenGL versions)
+- * Maximum reconstruction error: 1/2^N
+- */
+-inline int meshopt_quantizeSnorm(float v, int N);
+-
+-/**
+- * Quantize a float into half-precision floating point value
+- * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest
+- * Representable magnitude range: [6e-5; 65504]
+- * Maximum relative reconstruction error: 5e-4
+- */
+-inline unsigned short meshopt_quantizeHalf(float v);
+-
+-/**
+- * Quantize a float into a floating point value with a limited number of significant mantissa bits
+- * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest
+- * Assumes N is in a valid mantissa precision range, which is 1..23
+- */
+-inline float meshopt_quantizeFloat(float v, int N);
+-#endif
+-
+-/**
+- * C++ template interface
+- *
+- * These functions mirror the C interface the library provides, providing template-based overloads so that
+- * the caller can use an arbitrary type for the index data, both for input and output.
+- * When the supplied type is the same size as that of unsigned int, the wrappers are zero-cost; when it's not,
+- * the wrappers end up allocating memory and copying index data to convert from one type to another.
+- */
+-#if defined(__cplusplus) && !defined(MESHOPTIMIZER_NO_WRAPPERS)
+-template
+-inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size);
+-template
+-inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count);
+-template
+-inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap);
+-template
+-inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride);
+-template
+-inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count);
+-template
+-inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+-template
+-inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+-template
+-inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count);
+-template
+-inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count);
+-template
+-inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size);
+-template
+-inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold);
+-template
+-inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count);
+-template
+-inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size);
+-template
+-inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count);
+-template
+-inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size);
+-template
+-inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count);
+-template
+-inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size);
+-template
+-inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = 0);
+-template
+-inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error = 0);
+-template
+-inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index);
+-template
+-inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index);
+-template
+-inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size);
+-template
+-inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+-template
+-inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size);
+-template
+-inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight);
+-template
+-inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
+-template
+-inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+-template
+-inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+-#endif
+-
+-/* Inline implementation */
+-#ifdef __cplusplus
+-inline int meshopt_quantizeUnorm(float v, int N)
+-{
+- const float scale = float((1 << N) - 1);
+-
+- v = (v >= 0) ? v : 0;
+- v = (v <= 1) ? v : 1;
+-
+- return int(v * scale + 0.5f);
+-}
+-
+-inline int meshopt_quantizeSnorm(float v, int N)
+-{
+- const float scale = float((1 << (N - 1)) - 1);
+-
+- float round = (v >= 0 ? 0.5f : -0.5f);
+-
+- v = (v >= -1) ? v : -1;
+- v = (v <= +1) ? v : +1;
+-
+- return int(v * scale + round);
+-}
+-
+-inline unsigned short meshopt_quantizeHalf(float v)
+-{
+- union { float f; unsigned int ui; } u = {v};
+- unsigned int ui = u.ui;
+-
+- int s = (ui >> 16) & 0x8000;
+- int em = ui & 0x7fffffff;
+-
+- /* bias exponent and round to nearest; 112 is relative exponent bias (127-15) */
+- int h = (em - (112 << 23) + (1 << 12)) >> 13;
+-
+- /* underflow: flush to zero; 113 encodes exponent -14 */
+- h = (em < (113 << 23)) ? 0 : h;
+-
+- /* overflow: infinity; 143 encodes exponent 16 */
+- h = (em >= (143 << 23)) ? 0x7c00 : h;
+-
+- /* NaN; note that we convert all types of NaN to qNaN */
+- h = (em > (255 << 23)) ? 0x7e00 : h;
+-
+- return (unsigned short)(s | h);
+-}
+-
+-inline float meshopt_quantizeFloat(float v, int N)
+-{
+- union { float f; unsigned int ui; } u = {v};
+- unsigned int ui = u.ui;
+-
+- const int mask = (1 << (23 - N)) - 1;
+- const int round = (1 << (23 - N)) >> 1;
+-
+- int e = ui & 0x7f800000;
+- unsigned int rui = (ui + round) & ~mask;
+-
+- /* round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0 */
+- ui = e == 0x7f800000 ? ui : rui;
+-
+- /* flush denormals to zero */
+- ui = e == 0 ? 0 : ui;
+-
+- u.ui = ui;
+- return u.f;
+-}
+-#endif
+-
+-/* Internal implementation helpers */
+-#ifdef __cplusplus
+-class meshopt_Allocator
+-{
+-public:
+- template
+- struct StorageT
+- {
+- static void* (MESHOPTIMIZER_ALLOC_CALLCONV *allocate)(size_t);
+- static void (MESHOPTIMIZER_ALLOC_CALLCONV *deallocate)(void*);
+- };
+-
+- typedef StorageT Storage;
+-
+- meshopt_Allocator()
+- : blocks()
+- , count(0)
+- {
+- }
+-
+- ~meshopt_Allocator()
+- {
+- for (size_t i = count; i > 0; --i)
+- Storage::deallocate(blocks[i - 1]);
+- }
+-
+- template T* allocate(size_t size)
+- {
+- assert(count < sizeof(blocks) / sizeof(blocks[0]));
+- T* result = static_cast(Storage::allocate(size > size_t(-1) / sizeof(T) ? size_t(-1) : size * sizeof(T)));
+- blocks[count++] = result;
+- return result;
+- }
+-
+-private:
+- void* blocks[24];
+- size_t count;
+-};
+-
+-// This makes sure that allocate/deallocate are lazily generated in translation units that need them and are deduplicated by the linker
+-template void* (MESHOPTIMIZER_ALLOC_CALLCONV *meshopt_Allocator::StorageT::allocate)(size_t) = operator new;
+-template void (MESHOPTIMIZER_ALLOC_CALLCONV *meshopt_Allocator::StorageT::deallocate)(void*) = operator delete;
+-#endif
+-
+-/* Inline implementation for C++ templated wrappers */
+-#if defined(__cplusplus) && !defined(MESHOPTIMIZER_NO_WRAPPERS)
+-template
+-struct meshopt_IndexAdapter;
+-
+-template
+-struct meshopt_IndexAdapter
+-{
+- T* result;
+- unsigned int* data;
+- size_t count;
+-
+- meshopt_IndexAdapter(T* result_, const T* input, size_t count_)
+- : result(result_)
+- , data(0)
+- , count(count_)
+- {
+- size_t size = count > size_t(-1) / sizeof(unsigned int) ? size_t(-1) : count * sizeof(unsigned int);
+-
+- data = static_cast(meshopt_Allocator::Storage::allocate(size));
+-
+- if (input)
+- {
+- for (size_t i = 0; i < count; ++i)
+- data[i] = input[i];
+- }
+- }
+-
+- ~meshopt_IndexAdapter()
+- {
+- if (result)
+- {
+- for (size_t i = 0; i < count; ++i)
+- result[i] = T(data[i]);
+- }
+-
+- meshopt_Allocator::Storage::deallocate(data);
+- }
+-};
+-
+-template
+-struct meshopt_IndexAdapter
+-{
+- unsigned int* data;
+-
+- meshopt_IndexAdapter(T* result, const T* input, size_t)
+- : data(reinterpret_cast(result ? result : const_cast(input)))
+- {
+- }
+-};
+-
+-template
+-inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
+-{
+- meshopt_IndexAdapter in(0, indices, indices ? index_count : 0);
+-
+- return meshopt_generateVertexRemap(destination, indices ? in.data : 0, index_count, vertices, vertex_count, vertex_size);
+-}
+-
+-template
+-inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count)
+-{
+- meshopt_IndexAdapter in(0, indices, indices ? index_count : 0);
+-
+- return meshopt_generateVertexRemapMulti(destination, indices ? in.data : 0, index_count, vertex_count, streams, stream_count);
+-}
+-
+-template
+-inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap)
+-{
+- meshopt_IndexAdapter in(0, indices, indices ? index_count : 0);
+- meshopt_IndexAdapter out(destination, 0, index_count);
+-
+- meshopt_remapIndexBuffer(out.data, indices ? in.data : 0, index_count, remap);
+-}
+-
+-template
+-inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+- meshopt_IndexAdapter out(destination, 0, index_count);
+-
+- meshopt_generateShadowIndexBuffer(out.data, in.data, index_count, vertices, vertex_count, vertex_size, vertex_stride);
+-}
+-
+-template
+-inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+- meshopt_IndexAdapter out(destination, 0, index_count);
+-
+- meshopt_generateShadowIndexBufferMulti(out.data, in.data, index_count, vertex_count, streams, stream_count);
+-}
+-
+-template
+-inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+- meshopt_IndexAdapter out(destination, 0, index_count * 2);
+-
+- meshopt_generateAdjacencyIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+-}
+-
+-template
+-inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+- meshopt_IndexAdapter out(destination, 0, index_count * 4);
+-
+- meshopt_generateTessellationIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+-}
+-
+-template
+-inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+- meshopt_IndexAdapter out(destination, 0, index_count);
+-
+- meshopt_optimizeVertexCache(out.data, in.data, index_count, vertex_count);
+-}
+-
+-template
+-inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+- meshopt_IndexAdapter out(destination, 0, index_count);
+-
+- meshopt_optimizeVertexCacheStrip(out.data, in.data, index_count, vertex_count);
+-}
+-
+-template
+-inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+- meshopt_IndexAdapter out(destination, 0, index_count);
+-
+- meshopt_optimizeVertexCacheFifo(out.data, in.data, index_count, vertex_count, cache_size);
+-}
+-
+-template
+-inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+- meshopt_IndexAdapter out(destination, 0, index_count);
+-
+- meshopt_optimizeOverdraw(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, threshold);
+-}
+-
+-template
+-inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+-
+- return meshopt_optimizeVertexFetchRemap(destination, in.data, index_count, vertex_count);
+-}
+-
+-template
+-inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
+-{
+- meshopt_IndexAdapter inout(indices, indices, index_count);
+-
+- return meshopt_optimizeVertexFetch(destination, inout.data, index_count, vertices, vertex_count, vertex_size);
+-}
+-
+-template
+-inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+-
+- return meshopt_encodeIndexBuffer(buffer, buffer_size, in.data, index_count);
+-}
+-
+-template
+-inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size)
+-{
+- char index_size_valid[sizeof(T) == 2 || sizeof(T) == 4 ? 1 : -1];
+- (void)index_size_valid;
+-
+- return meshopt_decodeIndexBuffer(destination, index_count, sizeof(T), buffer, buffer_size);
+-}
+-
+-template
+-inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+-
+- return meshopt_encodeIndexSequence(buffer, buffer_size, in.data, index_count);
+-}
+-
+-template
+-inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size)
+-{
+- char index_size_valid[sizeof(T) == 2 || sizeof(T) == 4 ? 1 : -1];
+- (void)index_size_valid;
+-
+- return meshopt_decodeIndexSequence(destination, index_count, sizeof(T), buffer, buffer_size);
+-}
+-
+-template
+-inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* result_error)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+- meshopt_IndexAdapter out(destination, 0, index_count);
+-
+- return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, options, result_error);
+-}
+-
+-template
+-inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+- meshopt_IndexAdapter out(destination, 0, index_count);
+-
+- return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, result_error);
+-}
+-
+-template
+-inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+- meshopt_IndexAdapter out(destination, 0, (index_count / 3) * 5);
+-
+- return meshopt_stripify(out.data, in.data, index_count, vertex_count, unsigned(restart_index));
+-}
+-
+-template
+-inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+- meshopt_IndexAdapter out(destination, 0, (index_count - 2) * 3);
+-
+- return meshopt_unstripify(out.data, in.data, index_count, unsigned(restart_index));
+-}
+-
+-template
+-inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+-
+- return meshopt_analyzeVertexCache(in.data, index_count, vertex_count, cache_size, warp_size, buffer_size);
+-}
+-
+-template
+-inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+-
+- return meshopt_analyzeOverdraw(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+-}
+-
+-template
+-inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+-
+- return meshopt_analyzeVertexFetch(in.data, index_count, vertex_count, vertex_size);
+-}
+-
+-template
+-inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+-
+- return meshopt_buildMeshlets(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, max_triangles, cone_weight);
+-}
+-
+-template
+-inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+-
+- return meshopt_buildMeshletsScan(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_count, max_vertices, max_triangles);
+-}
+-
+-template
+-inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+-
+- return meshopt_computeClusterBounds(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+-}
+-
+-template
+-inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- meshopt_IndexAdapter in(0, indices, index_count);
+- meshopt_IndexAdapter out(destination, 0, index_count);
+-
+- meshopt_spatialSortTriangles(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+-}
+-#endif
+-
+-/**
+- * Copyright (c) 2016-2022 Arseny Kapoulkine
+- *
+- * Permission is hereby granted, free of charge, to any person
+- * obtaining a copy of this software and associated documentation
+- * files (the "Software"), to deal in the Software without
+- * restriction, including without limitation the rights to use,
+- * copy, modify, merge, publish, distribute, sublicense, and/or sell
+- * copies of the Software, and to permit persons to whom the
+- * Software is furnished to do so, subject to the following
+- * conditions:
+- *
+- * The above copyright notice and this permission notice shall be
+- * included in all copies or substantial portions of the Software.
+- *
+- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+- * OTHER DEALINGS IN THE SOFTWARE.
+- */
+diff --git a/src/3rdparty/meshoptimizer/src/overdrawanalyzer.cpp b/src/3rdparty/meshoptimizer/src/overdrawanalyzer.cpp
+deleted file mode 100644
+index 8d5859b..0000000
+--- a/src/3rdparty/meshoptimizer/src/overdrawanalyzer.cpp
++++ /dev/null
+@@ -1,230 +0,0 @@
+-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+-#include "meshoptimizer.h"
+-
+-#include
+-#include
+-#include
+-
+-// This work is based on:
+-// Nicolas Capens. Advanced Rasterization. 2004
+-namespace meshopt
+-{
+-
+-const int kViewport = 256;
+-
+-struct OverdrawBuffer
+-{
+- float z[kViewport][kViewport][2];
+- unsigned int overdraw[kViewport][kViewport][2];
+-};
+-
+-#ifndef min
+-#define min(a, b) ((a) < (b) ? (a) : (b))
+-#endif
+-
+-#ifndef max
+-#define max(a, b) ((a) > (b) ? (a) : (b))
+-#endif
+-
+-static float computeDepthGradients(float& dzdx, float& dzdy, float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3)
+-{
+- // z2 = z1 + dzdx * (x2 - x1) + dzdy * (y2 - y1)
+- // z3 = z1 + dzdx * (x3 - x1) + dzdy * (y3 - y1)
+- // (x2-x1 y2-y1)(dzdx) = (z2-z1)
+- // (x3-x1 y3-y1)(dzdy) (z3-z1)
+- // we'll solve it with Cramer's rule
+- float det = (x2 - x1) * (y3 - y1) - (y2 - y1) * (x3 - x1);
+- float invdet = (det == 0) ? 0 : 1 / det;
+-
+- dzdx = (z2 - z1) * (y3 - y1) - (y2 - y1) * (z3 - z1) * invdet;
+- dzdy = (x2 - x1) * (z3 - z1) - (z2 - z1) * (x3 - x1) * invdet;
+-
+- return det;
+-}
+-
+-// half-space fixed point triangle rasterizer
+-static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, float v2x, float v2y, float v2z, float v3x, float v3y, float v3z)
+-{
+- // compute depth gradients
+- float DZx, DZy;
+- float det = computeDepthGradients(DZx, DZy, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z);
+- int sign = det > 0;
+-
+- // flip backfacing triangles to simplify rasterization logic
+- if (sign)
+- {
+- // flipping v2 & v3 preserves depth gradients since they're based on v1
+- float t;
+- t = v2x, v2x = v3x, v3x = t;
+- t = v2y, v2y = v3y, v3y = t;
+- t = v2z, v2z = v3z, v3z = t;
+-
+- // flip depth since we rasterize backfacing triangles to second buffer with reverse Z; only v1z is used below
+- v1z = kViewport - v1z;
+- DZx = -DZx;
+- DZy = -DZy;
+- }
+-
+- // coordinates, 28.4 fixed point
+- int X1 = int(16.0f * v1x + 0.5f);
+- int X2 = int(16.0f * v2x + 0.5f);
+- int X3 = int(16.0f * v3x + 0.5f);
+-
+- int Y1 = int(16.0f * v1y + 0.5f);
+- int Y2 = int(16.0f * v2y + 0.5f);
+- int Y3 = int(16.0f * v3y + 0.5f);
+-
+- // bounding rectangle, clipped against viewport
+- // since we rasterize pixels with covered centers, min >0.5 should round up
+- // as for max, due to top-left filling convention we will never rasterize right/bottom edges
+- // so max >= 0.5 should round down
+- int minx = max((min(X1, min(X2, X3)) + 7) >> 4, 0);
+- int maxx = min((max(X1, max(X2, X3)) + 7) >> 4, kViewport);
+- int miny = max((min(Y1, min(Y2, Y3)) + 7) >> 4, 0);
+- int maxy = min((max(Y1, max(Y2, Y3)) + 7) >> 4, kViewport);
+-
+- // deltas, 28.4 fixed point
+- int DX12 = X1 - X2;
+- int DX23 = X2 - X3;
+- int DX31 = X3 - X1;
+-
+- int DY12 = Y1 - Y2;
+- int DY23 = Y2 - Y3;
+- int DY31 = Y3 - Y1;
+-
+- // fill convention correction
+- int TL1 = DY12 < 0 || (DY12 == 0 && DX12 > 0);
+- int TL2 = DY23 < 0 || (DY23 == 0 && DX23 > 0);
+- int TL3 = DY31 < 0 || (DY31 == 0 && DX31 > 0);
+-
+- // half edge equations, 24.8 fixed point
+- // note that we offset minx/miny by half pixel since we want to rasterize pixels with covered centers
+- int FX = (minx << 4) + 8;
+- int FY = (miny << 4) + 8;
+- int CY1 = DX12 * (FY - Y1) - DY12 * (FX - X1) + TL1 - 1;
+- int CY2 = DX23 * (FY - Y2) - DY23 * (FX - X2) + TL2 - 1;
+- int CY3 = DX31 * (FY - Y3) - DY31 * (FX - X3) + TL3 - 1;
+- float ZY = v1z + (DZx * float(FX - X1) + DZy * float(FY - Y1)) * (1 / 16.f);
+-
+- for (int y = miny; y < maxy; y++)
+- {
+- int CX1 = CY1;
+- int CX2 = CY2;
+- int CX3 = CY3;
+- float ZX = ZY;
+-
+- for (int x = minx; x < maxx; x++)
+- {
+- // check if all CXn are non-negative
+- if ((CX1 | CX2 | CX3) >= 0)
+- {
+- if (ZX >= buffer->z[y][x][sign])
+- {
+- buffer->z[y][x][sign] = ZX;
+- buffer->overdraw[y][x][sign]++;
+- }
+- }
+-
+- // signed left shift is UB for negative numbers so use unsigned-signed casts
+- CX1 -= int(unsigned(DY12) << 4);
+- CX2 -= int(unsigned(DY23) << 4);
+- CX3 -= int(unsigned(DY31) << 4);
+- ZX += DZx;
+- }
+-
+- // signed left shift is UB for negative numbers so use unsigned-signed casts
+- CY1 += int(unsigned(DX12) << 4);
+- CY2 += int(unsigned(DX23) << 4);
+- CY3 += int(unsigned(DX31) << 4);
+- ZY += DZy;
+- }
+-}
+-
+-} // namespace meshopt
+-
+-meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- using namespace meshopt;
+-
+- assert(index_count % 3 == 0);
+- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+- assert(vertex_positions_stride % sizeof(float) == 0);
+-
+- meshopt_Allocator allocator;
+-
+- size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+-
+- meshopt_OverdrawStatistics result = {};
+-
+- float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
+- float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- const float* v = vertex_positions + i * vertex_stride_float;
+-
+- for (int j = 0; j < 3; ++j)
+- {
+- minv[j] = min(minv[j], v[j]);
+- maxv[j] = max(maxv[j], v[j]);
+- }
+- }
+-
+- float extent = max(maxv[0] - minv[0], max(maxv[1] - minv[1], maxv[2] - minv[2]));
+- float scale = kViewport / extent;
+-
+- float* triangles = allocator.allocate(index_count * 3);
+-
+- for (size_t i = 0; i < index_count; ++i)
+- {
+- unsigned int index = indices[i];
+- assert(index < vertex_count);
+-
+- const float* v = vertex_positions + index * vertex_stride_float;
+-
+- triangles[i * 3 + 0] = (v[0] - minv[0]) * scale;
+- triangles[i * 3 + 1] = (v[1] - minv[1]) * scale;
+- triangles[i * 3 + 2] = (v[2] - minv[2]) * scale;
+- }
+-
+- OverdrawBuffer* buffer = allocator.allocate(1);
+-
+- for (int axis = 0; axis < 3; ++axis)
+- {
+- memset(buffer, 0, sizeof(OverdrawBuffer));
+-
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- const float* vn0 = &triangles[3 * (i + 0)];
+- const float* vn1 = &triangles[3 * (i + 1)];
+- const float* vn2 = &triangles[3 * (i + 2)];
+-
+- switch (axis)
+- {
+- case 0:
+- rasterize(buffer, vn0[2], vn0[1], vn0[0], vn1[2], vn1[1], vn1[0], vn2[2], vn2[1], vn2[0]);
+- break;
+- case 1:
+- rasterize(buffer, vn0[0], vn0[2], vn0[1], vn1[0], vn1[2], vn1[1], vn2[0], vn2[2], vn2[1]);
+- break;
+- case 2:
+- rasterize(buffer, vn0[1], vn0[0], vn0[2], vn1[1], vn1[0], vn1[2], vn2[1], vn2[0], vn2[2]);
+- break;
+- }
+- }
+-
+- for (int y = 0; y < kViewport; ++y)
+- for (int x = 0; x < kViewport; ++x)
+- for (int s = 0; s < 2; ++s)
+- {
+- unsigned int overdraw = buffer->overdraw[y][x][s];
+-
+- result.pixels_covered += overdraw > 0;
+- result.pixels_shaded += overdraw;
+- }
+- }
+-
+- result.overdraw = result.pixels_covered ? float(result.pixels_shaded) / float(result.pixels_covered) : 0.f;
+-
+- return result;
+-}
+diff --git a/src/3rdparty/meshoptimizer/src/overdrawoptimizer.cpp b/src/3rdparty/meshoptimizer/src/overdrawoptimizer.cpp
+deleted file mode 100644
+index 143656e..0000000
+--- a/src/3rdparty/meshoptimizer/src/overdrawoptimizer.cpp
++++ /dev/null
+@@ -1,333 +0,0 @@
+-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+-#include "meshoptimizer.h"
+-
+-#include
+-#include
+-#include
+-
+-// This work is based on:
+-// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
+-namespace meshopt
+-{
+-
+-static void calculateSortData(float* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count)
+-{
+- size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+-
+- float mesh_centroid[3] = {};
+-
+- for (size_t i = 0; i < index_count; ++i)
+- {
+- const float* p = vertex_positions + vertex_stride_float * indices[i];
+-
+- mesh_centroid[0] += p[0];
+- mesh_centroid[1] += p[1];
+- mesh_centroid[2] += p[2];
+- }
+-
+- mesh_centroid[0] /= index_count;
+- mesh_centroid[1] /= index_count;
+- mesh_centroid[2] /= index_count;
+-
+- for (size_t cluster = 0; cluster < cluster_count; ++cluster)
+- {
+- size_t cluster_begin = clusters[cluster] * 3;
+- size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count;
+- assert(cluster_begin < cluster_end);
+-
+- float cluster_area = 0;
+- float cluster_centroid[3] = {};
+- float cluster_normal[3] = {};
+-
+- for (size_t i = cluster_begin; i < cluster_end; i += 3)
+- {
+- const float* p0 = vertex_positions + vertex_stride_float * indices[i + 0];
+- const float* p1 = vertex_positions + vertex_stride_float * indices[i + 1];
+- const float* p2 = vertex_positions + vertex_stride_float * indices[i + 2];
+-
+- float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
+- float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
+-
+- float normalx = p10[1] * p20[2] - p10[2] * p20[1];
+- float normaly = p10[2] * p20[0] - p10[0] * p20[2];
+- float normalz = p10[0] * p20[1] - p10[1] * p20[0];
+-
+- float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
+-
+- cluster_centroid[0] += (p0[0] + p1[0] + p2[0]) * (area / 3);
+- cluster_centroid[1] += (p0[1] + p1[1] + p2[1]) * (area / 3);
+- cluster_centroid[2] += (p0[2] + p1[2] + p2[2]) * (area / 3);
+- cluster_normal[0] += normalx;
+- cluster_normal[1] += normaly;
+- cluster_normal[2] += normalz;
+- cluster_area += area;
+- }
+-
+- float inv_cluster_area = cluster_area == 0 ? 0 : 1 / cluster_area;
+-
+- cluster_centroid[0] *= inv_cluster_area;
+- cluster_centroid[1] *= inv_cluster_area;
+- cluster_centroid[2] *= inv_cluster_area;
+-
+- float cluster_normal_length = sqrtf(cluster_normal[0] * cluster_normal[0] + cluster_normal[1] * cluster_normal[1] + cluster_normal[2] * cluster_normal[2]);
+- float inv_cluster_normal_length = cluster_normal_length == 0 ? 0 : 1 / cluster_normal_length;
+-
+- cluster_normal[0] *= inv_cluster_normal_length;
+- cluster_normal[1] *= inv_cluster_normal_length;
+- cluster_normal[2] *= inv_cluster_normal_length;
+-
+- float centroid_vector[3] = {cluster_centroid[0] - mesh_centroid[0], cluster_centroid[1] - mesh_centroid[1], cluster_centroid[2] - mesh_centroid[2]};
+-
+- sort_data[cluster] = centroid_vector[0] * cluster_normal[0] + centroid_vector[1] * cluster_normal[1] + centroid_vector[2] * cluster_normal[2];
+- }
+-}
+-
+-static void calculateSortOrderRadix(unsigned int* sort_order, const float* sort_data, unsigned short* sort_keys, size_t cluster_count)
+-{
+- // compute sort data bounds and renormalize, using fixed point snorm
+- float sort_data_max = 1e-3f;
+-
+- for (size_t i = 0; i < cluster_count; ++i)
+- {
+- float dpa = fabsf(sort_data[i]);
+-
+- sort_data_max = (sort_data_max < dpa) ? dpa : sort_data_max;
+- }
+-
+- const int sort_bits = 11;
+-
+- for (size_t i = 0; i < cluster_count; ++i)
+- {
+- // note that we flip distribution since high dot product should come first
+- float sort_key = 0.5f - 0.5f * (sort_data[i] / sort_data_max);
+-
+- sort_keys[i] = meshopt_quantizeUnorm(sort_key, sort_bits) & ((1 << sort_bits) - 1);
+- }
+-
+- // fill histogram for counting sort
+- unsigned int histogram[1 << sort_bits];
+- memset(histogram, 0, sizeof(histogram));
+-
+- for (size_t i = 0; i < cluster_count; ++i)
+- {
+- histogram[sort_keys[i]]++;
+- }
+-
+- // compute offsets based on histogram data
+- size_t histogram_sum = 0;
+-
+- for (size_t i = 0; i < 1 << sort_bits; ++i)
+- {
+- size_t count = histogram[i];
+- histogram[i] = unsigned(histogram_sum);
+- histogram_sum += count;
+- }
+-
+- assert(histogram_sum == cluster_count);
+-
+- // compute sort order based on offsets
+- for (size_t i = 0; i < cluster_count; ++i)
+- {
+- sort_order[histogram[sort_keys[i]]++] = unsigned(i);
+- }
+-}
+-
+-static unsigned int updateCache(unsigned int a, unsigned int b, unsigned int c, unsigned int cache_size, unsigned int* cache_timestamps, unsigned int& timestamp)
+-{
+- unsigned int cache_misses = 0;
+-
+- // if vertex is not in cache, put it in cache
+- if (timestamp - cache_timestamps[a] > cache_size)
+- {
+- cache_timestamps[a] = timestamp++;
+- cache_misses++;
+- }
+-
+- if (timestamp - cache_timestamps[b] > cache_size)
+- {
+- cache_timestamps[b] = timestamp++;
+- cache_misses++;
+- }
+-
+- if (timestamp - cache_timestamps[c] > cache_size)
+- {
+- cache_timestamps[c] = timestamp++;
+- cache_misses++;
+- }
+-
+- return cache_misses;
+-}
+-
+-static size_t generateHardBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int* cache_timestamps)
+-{
+- memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
+-
+- unsigned int timestamp = cache_size + 1;
+-
+- size_t face_count = index_count / 3;
+-
+- size_t result = 0;
+-
+- for (size_t i = 0; i < face_count; ++i)
+- {
+- unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
+-
+- // when all three vertices are not in the cache it's usually relatively safe to assume that this is a new patch in the mesh
+- // that is disjoint from previous vertices; sometimes it might come back to reference existing vertices but that frequently
+- // suggests an inefficiency in the vertex cache optimization algorithm
+- // usually the first triangle has 3 misses unless it's degenerate - thus we make sure the first cluster always starts with 0
+- if (i == 0 || m == 3)
+- {
+- destination[result++] = unsigned(i);
+- }
+- }
+-
+- assert(result <= index_count / 3);
+-
+- return result;
+-}
+-
+-static size_t generateSoftBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* clusters, size_t cluster_count, unsigned int cache_size, float threshold, unsigned int* cache_timestamps)
+-{
+- memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
+-
+- unsigned int timestamp = 0;
+-
+- size_t result = 0;
+-
+- for (size_t it = 0; it < cluster_count; ++it)
+- {
+- size_t start = clusters[it];
+- size_t end = (it + 1 < cluster_count) ? clusters[it + 1] : index_count / 3;
+- assert(start < end);
+-
+- // reset cache
+- timestamp += cache_size + 1;
+-
+- // measure cluster ACMR
+- unsigned int cluster_misses = 0;
+-
+- for (size_t i = start; i < end; ++i)
+- {
+- unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
+-
+- cluster_misses += m;
+- }
+-
+- float cluster_threshold = threshold * (float(cluster_misses) / float(end - start));
+-
+- // first cluster always starts from the hard cluster boundary
+- destination[result++] = unsigned(start);
+-
+- // reset cache
+- timestamp += cache_size + 1;
+-
+- unsigned int running_misses = 0;
+- unsigned int running_faces = 0;
+-
+- for (size_t i = start; i < end; ++i)
+- {
+- unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp);
+-
+- running_misses += m;
+- running_faces += 1;
+-
+- if (float(running_misses) / float(running_faces) <= cluster_threshold)
+- {
+- // we have reached the target ACMR with the current triangle so we need to start a new cluster on the next one
+- // note that this may mean that we add 'end` to destination for the last triangle, which will imply that the last
+- // cluster is empty; however, the 'pop_back' after the loop will clean it up
+- destination[result++] = unsigned(i + 1);
+-
+- // reset cache
+- timestamp += cache_size + 1;
+-
+- running_misses = 0;
+- running_faces = 0;
+- }
+- }
+-
+- // each time we reach the target ACMR we flush the cluster
+- // this means that the last cluster is by definition not very good - there are frequent cases where we are left with a few triangles
+- // in the last cluster, producing a very bad ACMR and significantly penalizing the overall results
+- // thus we remove the last cluster boundary, merging the last complete cluster with the last incomplete one
+- // there are sometimes cases when the last cluster is actually good enough - in which case the code above would have added 'end'
+- // to the cluster boundary array which we need to remove anyway - this code will do that automatically
+- if (destination[result - 1] != start)
+- {
+- result--;
+- }
+- }
+-
+- assert(result >= cluster_count);
+- assert(result <= index_count / 3);
+-
+- return result;
+-}
+-
+-} // namespace meshopt
+-
+-void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold)
+-{
+- using namespace meshopt;
+-
+- assert(index_count % 3 == 0);
+- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+- assert(vertex_positions_stride % sizeof(float) == 0);
+-
+- meshopt_Allocator allocator;
+-
+- // guard for empty meshes
+- if (index_count == 0 || vertex_count == 0)
+- return;
+-
+- // support in-place optimization
+- if (destination == indices)
+- {
+- unsigned int* indices_copy = allocator.allocate(index_count);
+- memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
+- indices = indices_copy;
+- }
+-
+- unsigned int cache_size = 16;
+-
+- unsigned int* cache_timestamps = allocator.allocate(vertex_count);
+-
+- // generate hard boundaries from full-triangle cache misses
+- unsigned int* hard_clusters = allocator.allocate(index_count / 3);
+- size_t hard_cluster_count = generateHardBoundaries(hard_clusters, indices, index_count, vertex_count, cache_size, cache_timestamps);
+-
+- // generate soft boundaries
+- unsigned int* soft_clusters = allocator.allocate(index_count / 3 + 1);
+- size_t soft_cluster_count = generateSoftBoundaries(soft_clusters, indices, index_count, vertex_count, hard_clusters, hard_cluster_count, cache_size, threshold, cache_timestamps);
+-
+- const unsigned int* clusters = soft_clusters;
+- size_t cluster_count = soft_cluster_count;
+-
+- // fill sort data
+- float* sort_data = allocator.allocate(cluster_count);
+- calculateSortData(sort_data, indices, index_count, vertex_positions, vertex_positions_stride, clusters, cluster_count);
+-
+- // sort clusters using sort data
+- unsigned short* sort_keys = allocator.allocate(cluster_count);
+- unsigned int* sort_order = allocator.allocate(cluster_count);
+- calculateSortOrderRadix(sort_order, sort_data, sort_keys, cluster_count);
+-
+- // fill output buffer
+- size_t offset = 0;
+-
+- for (size_t it = 0; it < cluster_count; ++it)
+- {
+- unsigned int cluster = sort_order[it];
+- assert(cluster < cluster_count);
+-
+- size_t cluster_begin = clusters[cluster] * 3;
+- size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count;
+- assert(cluster_begin < cluster_end);
+-
+- memcpy(destination + offset, indices + cluster_begin, (cluster_end - cluster_begin) * sizeof(unsigned int));
+- offset += cluster_end - cluster_begin;
+- }
+-
+- assert(offset == index_count);
+-}
+diff --git a/src/3rdparty/meshoptimizer/src/simplifier.cpp b/src/3rdparty/meshoptimizer/src/simplifier.cpp
+deleted file mode 100644
+index 72704c1..0000000
+--- a/src/3rdparty/meshoptimizer/src/simplifier.cpp
++++ /dev/null
+@@ -1,1677 +0,0 @@
+-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+-#include "meshoptimizer.h"
+-
+-#include
+-#include
+-#include
+-#include
+-
+-#ifndef TRACE
+-#define TRACE 0
+-#endif
+-
+-#if TRACE
+-#include
+-#endif
+-
+-#if TRACE
+-#define TRACESTATS(i) stats[i]++;
+-#else
+-#define TRACESTATS(i) (void)0
+-#endif
+-
+-// This work is based on:
+-// Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997
+-// Michael Garland. Quadric-based polygonal surface simplification. 1999
+-// Peter Lindstrom. Out-of-Core Simplification of Large Polygonal Models. 2000
+-// Matthias Teschner, Bruno Heidelberger, Matthias Mueller, Danat Pomeranets, Markus Gross. Optimized Spatial Hashing for Collision Detection of Deformable Objects. 2003
+-// Peter Van Sandt, Yannis Chronis, Jignesh M. Patel. Efficiently Searching In-Memory Sorted Arrays: Revenge of the Interpolation Search? 2019
+-namespace meshopt
+-{
+-
+-struct EdgeAdjacency
+-{
+- struct Edge
+- {
+- unsigned int next;
+- unsigned int prev;
+- };
+-
+- unsigned int* counts;
+- unsigned int* offsets;
+- Edge* data;
+-};
+-
+-static void prepareEdgeAdjacency(EdgeAdjacency& adjacency, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
+-{
+- adjacency.counts = allocator.allocate(vertex_count);
+- adjacency.offsets = allocator.allocate(vertex_count);
+- adjacency.data = allocator.allocate(index_count);
+-}
+-
+-static void updateEdgeAdjacency(EdgeAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* remap)
+-{
+- size_t face_count = index_count / 3;
+-
+- // fill edge counts
+- memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
+-
+- for (size_t i = 0; i < index_count; ++i)
+- {
+- unsigned int v = remap ? remap[indices[i]] : indices[i];
+- assert(v < vertex_count);
+-
+- adjacency.counts[v]++;
+- }
+-
+- // fill offset table
+- unsigned int offset = 0;
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- adjacency.offsets[i] = offset;
+- offset += adjacency.counts[i];
+- }
+-
+- assert(offset == index_count);
+-
+- // fill edge data
+- for (size_t i = 0; i < face_count; ++i)
+- {
+- unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+-
+- if (remap)
+- {
+- a = remap[a];
+- b = remap[b];
+- c = remap[c];
+- }
+-
+- adjacency.data[adjacency.offsets[a]].next = b;
+- adjacency.data[adjacency.offsets[a]].prev = c;
+- adjacency.offsets[a]++;
+-
+- adjacency.data[adjacency.offsets[b]].next = c;
+- adjacency.data[adjacency.offsets[b]].prev = a;
+- adjacency.offsets[b]++;
+-
+- adjacency.data[adjacency.offsets[c]].next = a;
+- adjacency.data[adjacency.offsets[c]].prev = b;
+- adjacency.offsets[c]++;
+- }
+-
+- // fix offsets that have been disturbed by the previous pass
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- assert(adjacency.offsets[i] >= adjacency.counts[i]);
+-
+- adjacency.offsets[i] -= adjacency.counts[i];
+- }
+-}
+-
+-struct PositionHasher
+-{
+- const float* vertex_positions;
+- size_t vertex_stride_float;
+-
+- size_t hash(unsigned int index) const
+- {
+- const unsigned int* key = reinterpret_cast(vertex_positions + index * vertex_stride_float);
+-
+- // scramble bits to make sure that integer coordinates have entropy in lower bits
+- unsigned int x = key[0] ^ (key[0] >> 17);
+- unsigned int y = key[1] ^ (key[1] >> 17);
+- unsigned int z = key[2] ^ (key[2] >> 17);
+-
+- // Optimized Spatial Hashing for Collision Detection of Deformable Objects
+- return (x * 73856093) ^ (y * 19349663) ^ (z * 83492791);
+- }
+-
+- bool equal(unsigned int lhs, unsigned int rhs) const
+- {
+- return memcmp(vertex_positions + lhs * vertex_stride_float, vertex_positions + rhs * vertex_stride_float, sizeof(float) * 3) == 0;
+- }
+-};
+-
+-static size_t hashBuckets2(size_t count)
+-{
+- size_t buckets = 1;
+- while (buckets < count + count / 4)
+- buckets *= 2;
+-
+- return buckets;
+-}
+-
+-template
+-static T* hashLookup2(T* table, size_t buckets, const Hash& hash, const T& key, const T& empty)
+-{
+- assert(buckets > 0);
+- assert((buckets & (buckets - 1)) == 0);
+-
+- size_t hashmod = buckets - 1;
+- size_t bucket = hash.hash(key) & hashmod;
+-
+- for (size_t probe = 0; probe <= hashmod; ++probe)
+- {
+- T& item = table[bucket];
+-
+- if (item == empty)
+- return &item;
+-
+- if (hash.equal(item, key))
+- return &item;
+-
+- // hash collision, quadratic probing
+- bucket = (bucket + probe + 1) & hashmod;
+- }
+-
+- assert(false && "Hash table is full"); // unreachable
+- return 0;
+-}
+-
+-static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator)
+-{
+- PositionHasher hasher = {vertex_positions_data, vertex_positions_stride / sizeof(float)};
+-
+- size_t table_size = hashBuckets2(vertex_count);
+- unsigned int* table = allocator.allocate(table_size);
+- memset(table, -1, table_size * sizeof(unsigned int));
+-
+- // build forward remap: for each vertex, which other (canonical) vertex does it map to?
+- // we use position equivalence for this, and remap vertices to other existing vertices
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- unsigned int index = unsigned(i);
+- unsigned int* entry = hashLookup2(table, table_size, hasher, index, ~0u);
+-
+- if (*entry == ~0u)
+- *entry = index;
+-
+- remap[index] = *entry;
+- }
+-
+- // build wedge table: for each vertex, which other vertex is the next wedge that also maps to the same vertex?
+- // entries in table form a (cyclic) wedge loop per vertex; for manifold vertices, wedge[i] == remap[i] == i
+- for (size_t i = 0; i < vertex_count; ++i)
+- wedge[i] = unsigned(i);
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- if (remap[i] != i)
+- {
+- unsigned int r = remap[i];
+-
+- wedge[i] = wedge[r];
+- wedge[r] = unsigned(i);
+- }
+-}
+-
+-enum VertexKind
+-{
+- Kind_Manifold, // not on an attribute seam, not on any boundary
+- Kind_Border, // not on an attribute seam, has exactly two open edges
+- Kind_Seam, // on an attribute seam with exactly two attribute seam edges
+- Kind_Complex, // none of the above; these vertices can move as long as all wedges move to the target vertex
+- Kind_Locked, // none of the above; these vertices can't move
+-
+- Kind_Count
+-};
+-
+-// manifold vertices can collapse onto anything
+-// border/seam vertices can only be collapsed onto border/seam respectively
+-// complex vertices can collapse onto complex/locked
+-// a rule of thumb is that collapsing kind A into kind B preserves the kind B in the target vertex
+-// for example, while we could collapse Complex into Manifold, this would mean the target vertex isn't Manifold anymore
+-const unsigned char kCanCollapse[Kind_Count][Kind_Count] = {
+- {1, 1, 1, 1, 1},
+- {0, 1, 0, 0, 0},
+- {0, 0, 1, 0, 0},
+- {0, 0, 0, 1, 1},
+- {0, 0, 0, 0, 0},
+-};
+-
+-// if a vertex is manifold or seam, adjoining edges are guaranteed to have an opposite edge
+-// note that for seam edges, the opposite edge isn't present in the attribute-based topology
+-// but is present if you consider a position-only mesh variant
+-const unsigned char kHasOpposite[Kind_Count][Kind_Count] = {
+- {1, 1, 1, 0, 1},
+- {1, 0, 1, 0, 0},
+- {1, 1, 1, 0, 1},
+- {0, 0, 0, 0, 0},
+- {1, 0, 1, 0, 0},
+-};
+-
+-static bool hasEdge(const EdgeAdjacency& adjacency, unsigned int a, unsigned int b)
+-{
+- unsigned int count = adjacency.counts[a];
+- const EdgeAdjacency::Edge* edges = adjacency.data + adjacency.offsets[a];
+-
+- for (size_t i = 0; i < count; ++i)
+- if (edges[i].next == b)
+- return true;
+-
+- return false;
+-}
+-
+-static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned int* loopback, size_t vertex_count, const EdgeAdjacency& adjacency, const unsigned int* remap, const unsigned int* wedge, unsigned int options)
+-{
+- memset(loop, -1, vertex_count * sizeof(unsigned int));
+- memset(loopback, -1, vertex_count * sizeof(unsigned int));
+-
+- // incoming & outgoing open edges: ~0u if no open edges, i if there are more than 1
+- // note that this is the same data as required in loop[] arrays; loop[] data is only valid for border/seam
+- // but here it's okay to fill the data out for other types of vertices as well
+- unsigned int* openinc = loopback;
+- unsigned int* openout = loop;
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- unsigned int vertex = unsigned(i);
+-
+- unsigned int count = adjacency.counts[vertex];
+- const EdgeAdjacency::Edge* edges = adjacency.data + adjacency.offsets[vertex];
+-
+- for (size_t j = 0; j < count; ++j)
+- {
+- unsigned int target = edges[j].next;
+-
+- if (target == vertex)
+- {
+- // degenerate triangles have two distinct edges instead of three, and the self edge
+- // is bi-directional by definition; this can break border/seam classification by "closing"
+- // the open edge from another triangle and falsely marking the vertex as manifold
+- // instead we mark the vertex as having >1 open edges which turns it into locked/complex
+- openinc[vertex] = openout[vertex] = vertex;
+- }
+- else if (!hasEdge(adjacency, target, vertex))
+- {
+- openinc[target] = (openinc[target] == ~0u) ? vertex : target;
+- openout[vertex] = (openout[vertex] == ~0u) ? target : vertex;
+- }
+- }
+- }
+-
+-#if TRACE
+- size_t stats[4] = {};
+-#endif
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- if (remap[i] == i)
+- {
+- if (wedge[i] == i)
+- {
+- // no attribute seam, need to check if it's manifold
+- unsigned int openi = openinc[i], openo = openout[i];
+-
+- // note: we classify any vertices with no open edges as manifold
+- // this is technically incorrect - if 4 triangles share an edge, we'll classify vertices as manifold
+- // it's unclear if this is a problem in practice
+- if (openi == ~0u && openo == ~0u)
+- {
+- result[i] = Kind_Manifold;
+- }
+- else if (openi != i && openo != i)
+- {
+- result[i] = Kind_Border;
+- }
+- else
+- {
+- result[i] = Kind_Locked;
+- TRACESTATS(0);
+- }
+- }
+- else if (wedge[wedge[i]] == i)
+- {
+- // attribute seam; need to distinguish between Seam and Locked
+- unsigned int w = wedge[i];
+- unsigned int openiv = openinc[i], openov = openout[i];
+- unsigned int openiw = openinc[w], openow = openout[w];
+-
+- // seam should have one open half-edge for each vertex, and the edges need to "connect" - point to the same vertex post-remap
+- if (openiv != ~0u && openiv != i && openov != ~0u && openov != i &&
+- openiw != ~0u && openiw != w && openow != ~0u && openow != w)
+- {
+- if (remap[openiv] == remap[openow] && remap[openov] == remap[openiw])
+- {
+- result[i] = Kind_Seam;
+- }
+- else
+- {
+- result[i] = Kind_Locked;
+- TRACESTATS(1);
+- }
+- }
+- else
+- {
+- result[i] = Kind_Locked;
+- TRACESTATS(2);
+- }
+- }
+- else
+- {
+- // more than one vertex maps to this one; we don't have classification available
+- result[i] = Kind_Locked;
+- TRACESTATS(3);
+- }
+- }
+- else
+- {
+- assert(remap[i] < i);
+-
+- result[i] = result[remap[i]];
+- }
+- }
+-
+- if (options & meshopt_SimplifyLockBorder)
+- for (size_t i = 0; i < vertex_count; ++i)
+- if (result[i] == Kind_Border)
+- result[i] = Kind_Locked;
+-
+-#if TRACE
+- printf("locked: many open edges %d, disconnected seam %d, many seam edges %d, many wedges %d\n",
+- int(stats[0]), int(stats[1]), int(stats[2]), int(stats[3]));
+-#endif
+-}
+-
+-struct Vector3
+-{
+- float x, y, z;
+-};
+-
+-static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+-
+- float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
+- float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- const float* v = vertex_positions_data + i * vertex_stride_float;
+-
+- if (result)
+- {
+- result[i].x = v[0];
+- result[i].y = v[1];
+- result[i].z = v[2];
+- }
+-
+- for (int j = 0; j < 3; ++j)
+- {
+- float vj = v[j];
+-
+- minv[j] = minv[j] > vj ? vj : minv[j];
+- maxv[j] = maxv[j] < vj ? vj : maxv[j];
+- }
+- }
+-
+- float extent = 0.f;
+-
+- extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]);
+- extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]);
+- extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]);
+-
+- if (result)
+- {
+- float scale = extent == 0 ? 0.f : 1.f / extent;
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- result[i].x = (result[i].x - minv[0]) * scale;
+- result[i].y = (result[i].y - minv[1]) * scale;
+- result[i].z = (result[i].z - minv[2]) * scale;
+- }
+- }
+-
+- return extent;
+-}
+-
+-struct Quadric
+-{
+- float a00, a11, a22;
+- float a10, a20, a21;
+- float b0, b1, b2, c;
+- float w;
+-};
+-
+-struct Collapse
+-{
+- unsigned int v0;
+- unsigned int v1;
+-
+- union
+- {
+- unsigned int bidi;
+- float error;
+- unsigned int errorui;
+- };
+-};
+-
+-static float normalize(Vector3& v)
+-{
+- float length = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
+-
+- if (length > 0)
+- {
+- v.x /= length;
+- v.y /= length;
+- v.z /= length;
+- }
+-
+- return length;
+-}
+-
+-static void quadricAdd(Quadric& Q, const Quadric& R)
+-{
+- Q.a00 += R.a00;
+- Q.a11 += R.a11;
+- Q.a22 += R.a22;
+- Q.a10 += R.a10;
+- Q.a20 += R.a20;
+- Q.a21 += R.a21;
+- Q.b0 += R.b0;
+- Q.b1 += R.b1;
+- Q.b2 += R.b2;
+- Q.c += R.c;
+- Q.w += R.w;
+-}
+-
+-static float quadricError(const Quadric& Q, const Vector3& v)
+-{
+- float rx = Q.b0;
+- float ry = Q.b1;
+- float rz = Q.b2;
+-
+- rx += Q.a10 * v.y;
+- ry += Q.a21 * v.z;
+- rz += Q.a20 * v.x;
+-
+- rx *= 2;
+- ry *= 2;
+- rz *= 2;
+-
+- rx += Q.a00 * v.x;
+- ry += Q.a11 * v.y;
+- rz += Q.a22 * v.z;
+-
+- float r = Q.c;
+- r += rx * v.x;
+- r += ry * v.y;
+- r += rz * v.z;
+-
+- float s = Q.w == 0.f ? 0.f : 1.f / Q.w;
+-
+- return fabsf(r) * s;
+-}
+-
+-static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, float w)
+-{
+- float aw = a * w;
+- float bw = b * w;
+- float cw = c * w;
+- float dw = d * w;
+-
+- Q.a00 = a * aw;
+- Q.a11 = b * bw;
+- Q.a22 = c * cw;
+- Q.a10 = a * bw;
+- Q.a20 = a * cw;
+- Q.a21 = b * cw;
+- Q.b0 = a * dw;
+- Q.b1 = b * dw;
+- Q.b2 = c * dw;
+- Q.c = d * dw;
+- Q.w = w;
+-}
+-
+-static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w)
+-{
+- // we need to encode (x - X) ^ 2 + (y - Y)^2 + (z - Z)^2 into the quadric
+- Q.a00 = w;
+- Q.a11 = w;
+- Q.a22 = w;
+- Q.a10 = 0.f;
+- Q.a20 = 0.f;
+- Q.a21 = 0.f;
+- Q.b0 = -2.f * x * w;
+- Q.b1 = -2.f * y * w;
+- Q.b2 = -2.f * z * w;
+- Q.c = (x * x + y * y + z * z) * w;
+- Q.w = w;
+-}
+-
+-static void quadricFromTriangle(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight)
+-{
+- Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
+- Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z};
+-
+- // normal = cross(p1 - p0, p2 - p0)
+- Vector3 normal = {p10.y * p20.z - p10.z * p20.y, p10.z * p20.x - p10.x * p20.z, p10.x * p20.y - p10.y * p20.x};
+- float area = normalize(normal);
+-
+- float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z;
+-
+- // we use sqrtf(area) so that the error is scaled linearly; this tends to improve silhouettes
+- quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, sqrtf(area) * weight);
+-}
+-
+-static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight)
+-{
+- Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
+- float length = normalize(p10);
+-
+- // p20p = length of projection of p2-p0 onto normalize(p1 - p0)
+- Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z};
+- float p20p = p20.x * p10.x + p20.y * p10.y + p20.z * p10.z;
+-
+- // normal = altitude of triangle from point p2 onto edge p1-p0
+- Vector3 normal = {p20.x - p10.x * p20p, p20.y - p10.y * p20p, p20.z - p10.z * p20p};
+- normalize(normal);
+-
+- float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z;
+-
+- // note: the weight is scaled linearly with edge length; this has to match the triangle weight
+- quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight);
+-}
+-
+-static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap)
+-{
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- unsigned int i0 = indices[i + 0];
+- unsigned int i1 = indices[i + 1];
+- unsigned int i2 = indices[i + 2];
+-
+- Quadric Q;
+- quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f);
+-
+- quadricAdd(vertex_quadrics[remap[i0]], Q);
+- quadricAdd(vertex_quadrics[remap[i1]], Q);
+- quadricAdd(vertex_quadrics[remap[i2]], Q);
+- }
+-}
+-
+-static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback)
+-{
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- static const int next[3] = {1, 2, 0};
+-
+- for (int e = 0; e < 3; ++e)
+- {
+- unsigned int i0 = indices[i + e];
+- unsigned int i1 = indices[i + next[e]];
+-
+- unsigned char k0 = vertex_kind[i0];
+- unsigned char k1 = vertex_kind[i1];
+-
+- // check that either i0 or i1 are border/seam and are on the same edge loop
+- // note that we need to add the error even for edged that connect e.g. border & locked
+- // if we don't do that, the adjacent border->border edge won't have correct errors for corners
+- if (k0 != Kind_Border && k0 != Kind_Seam && k1 != Kind_Border && k1 != Kind_Seam)
+- continue;
+-
+- if ((k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1)
+- continue;
+-
+- if ((k1 == Kind_Border || k1 == Kind_Seam) && loopback[i1] != i0)
+- continue;
+-
+- // seam edges should occur twice (i0->i1 and i1->i0) - skip redundant edges
+- if (kHasOpposite[k0][k1] && remap[i1] > remap[i0])
+- continue;
+-
+- unsigned int i2 = indices[i + next[next[e]]];
+-
+- // we try hard to maintain border edge geometry; seam edges can move more freely
+- // due to topological restrictions on collapses, seam quadrics slightly improves collapse structure but aren't critical
+- const float kEdgeWeightSeam = 1.f;
+- const float kEdgeWeightBorder = 10.f;
+-
+- float edgeWeight = (k0 == Kind_Border || k1 == Kind_Border) ? kEdgeWeightBorder : kEdgeWeightSeam;
+-
+- Quadric Q;
+- quadricFromTriangleEdge(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], edgeWeight);
+-
+- quadricAdd(vertex_quadrics[remap[i0]], Q);
+- quadricAdd(vertex_quadrics[remap[i1]], Q);
+- }
+- }
+-}
+-
+-// does triangle ABC flip when C is replaced with D?
+-static bool hasTriangleFlip(const Vector3& a, const Vector3& b, const Vector3& c, const Vector3& d)
+-{
+- Vector3 eb = {b.x - a.x, b.y - a.y, b.z - a.z};
+- Vector3 ec = {c.x - a.x, c.y - a.y, c.z - a.z};
+- Vector3 ed = {d.x - a.x, d.y - a.y, d.z - a.z};
+-
+- Vector3 nbc = {eb.y * ec.z - eb.z * ec.y, eb.z * ec.x - eb.x * ec.z, eb.x * ec.y - eb.y * ec.x};
+- Vector3 nbd = {eb.y * ed.z - eb.z * ed.y, eb.z * ed.x - eb.x * ed.z, eb.x * ed.y - eb.y * ed.x};
+-
+- return nbc.x * nbd.x + nbc.y * nbd.y + nbc.z * nbd.z < 0;
+-}
+-
+-static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vertex_positions, const unsigned int* collapse_remap, unsigned int i0, unsigned int i1)
+-{
+- assert(collapse_remap[i0] == i0);
+- assert(collapse_remap[i1] == i1);
+-
+- const Vector3& v0 = vertex_positions[i0];
+- const Vector3& v1 = vertex_positions[i1];
+-
+- const EdgeAdjacency::Edge* edges = &adjacency.data[adjacency.offsets[i0]];
+- size_t count = adjacency.counts[i0];
+-
+- for (size_t i = 0; i < count; ++i)
+- {
+- unsigned int a = collapse_remap[edges[i].next];
+- unsigned int b = collapse_remap[edges[i].prev];
+-
+- // skip triangles that get collapsed
+- // note: this is mathematically redundant as if either of these is true, the dot product in hasTriangleFlip should be 0
+- if (a == i1 || b == i1)
+- continue;
+-
+- // early-out when at least one triangle flips due to a collapse
+- if (hasTriangleFlip(vertex_positions[a], vertex_positions[b], v0, v1))
+- return true;
+- }
+-
+- return false;
+-}
+-
+-static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices, size_t index_count, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop)
+-{
+- size_t collapse_count = 0;
+-
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- static const int next[3] = {1, 2, 0};
+-
+- for (int e = 0; e < 3; ++e)
+- {
+- unsigned int i0 = indices[i + e];
+- unsigned int i1 = indices[i + next[e]];
+-
+- // this can happen either when input has a zero-length edge, or when we perform collapses for complex
+- // topology w/seams and collapse a manifold vertex that connects to both wedges onto one of them
+- // we leave edges like this alone since they may be important for preserving mesh integrity
+- if (remap[i0] == remap[i1])
+- continue;
+-
+- unsigned char k0 = vertex_kind[i0];
+- unsigned char k1 = vertex_kind[i1];
+-
+- // the edge has to be collapsible in at least one direction
+- if (!(kCanCollapse[k0][k1] | kCanCollapse[k1][k0]))
+- continue;
+-
+- // manifold and seam edges should occur twice (i0->i1 and i1->i0) - skip redundant edges
+- if (kHasOpposite[k0][k1] && remap[i1] > remap[i0])
+- continue;
+-
+- // two vertices are on a border or a seam, but there's no direct edge between them
+- // this indicates that they belong to two different edge loops and we should not collapse this edge
+- // loop[] tracks half edges so we only need to check i0->i1
+- if (k0 == k1 && (k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1)
+- continue;
+-
+- // edge can be collapsed in either direction - we will pick the one with minimum error
+- // note: we evaluate error later during collapse ranking, here we just tag the edge as bidirectional
+- if (kCanCollapse[k0][k1] & kCanCollapse[k1][k0])
+- {
+- Collapse c = {i0, i1, {/* bidi= */ 1}};
+- collapses[collapse_count++] = c;
+- }
+- else
+- {
+- // edge can only be collapsed in one direction
+- unsigned int e0 = kCanCollapse[k0][k1] ? i0 : i1;
+- unsigned int e1 = kCanCollapse[k0][k1] ? i1 : i0;
+-
+- Collapse c = {e0, e1, {/* bidi= */ 0}};
+- collapses[collapse_count++] = c;
+- }
+- }
+- }
+-
+- return collapse_count;
+-}
+-
+-static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const Quadric* vertex_quadrics, const unsigned int* remap)
+-{
+- for (size_t i = 0; i < collapse_count; ++i)
+- {
+- Collapse& c = collapses[i];
+-
+- unsigned int i0 = c.v0;
+- unsigned int i1 = c.v1;
+-
+- // most edges are bidirectional which means we need to evaluate errors for two collapses
+- // to keep this code branchless we just use the same edge for unidirectional edges
+- unsigned int j0 = c.bidi ? i1 : i0;
+- unsigned int j1 = c.bidi ? i0 : i1;
+-
+- const Quadric& qi = vertex_quadrics[remap[i0]];
+- const Quadric& qj = vertex_quadrics[remap[j0]];
+-
+- float ei = quadricError(qi, vertex_positions[i1]);
+- float ej = quadricError(qj, vertex_positions[j1]);
+-
+- // pick edge direction with minimal error
+- c.v0 = ei <= ej ? i0 : j0;
+- c.v1 = ei <= ej ? i1 : j1;
+- c.error = ei <= ej ? ei : ej;
+- }
+-}
+-
+-#if TRACE > 1
+-static void dumpEdgeCollapses(const Collapse* collapses, size_t collapse_count, const unsigned char* vertex_kind)
+-{
+- size_t ckinds[Kind_Count][Kind_Count] = {};
+- float cerrors[Kind_Count][Kind_Count] = {};
+-
+- for (int k0 = 0; k0 < Kind_Count; ++k0)
+- for (int k1 = 0; k1 < Kind_Count; ++k1)
+- cerrors[k0][k1] = FLT_MAX;
+-
+- for (size_t i = 0; i < collapse_count; ++i)
+- {
+- unsigned int i0 = collapses[i].v0;
+- unsigned int i1 = collapses[i].v1;
+-
+- unsigned char k0 = vertex_kind[i0];
+- unsigned char k1 = vertex_kind[i1];
+-
+- ckinds[k0][k1]++;
+- cerrors[k0][k1] = (collapses[i].error < cerrors[k0][k1]) ? collapses[i].error : cerrors[k0][k1];
+- }
+-
+- for (int k0 = 0; k0 < Kind_Count; ++k0)
+- for (int k1 = 0; k1 < Kind_Count; ++k1)
+- if (ckinds[k0][k1])
+- printf("collapses %d -> %d: %d, min error %e\n", k0, k1, int(ckinds[k0][k1]), ckinds[k0][k1] ? sqrtf(cerrors[k0][k1]) : 0.f);
+-}
+-
+-static void dumpLockedCollapses(const unsigned int* indices, size_t index_count, const unsigned char* vertex_kind)
+-{
+- size_t locked_collapses[Kind_Count][Kind_Count] = {};
+-
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- static const int next[3] = {1, 2, 0};
+-
+- for (int e = 0; e < 3; ++e)
+- {
+- unsigned int i0 = indices[i + e];
+- unsigned int i1 = indices[i + next[e]];
+-
+- unsigned char k0 = vertex_kind[i0];
+- unsigned char k1 = vertex_kind[i1];
+-
+- locked_collapses[k0][k1] += !kCanCollapse[k0][k1] && !kCanCollapse[k1][k0];
+- }
+- }
+-
+- for (int k0 = 0; k0 < Kind_Count; ++k0)
+- for (int k1 = 0; k1 < Kind_Count; ++k1)
+- if (locked_collapses[k0][k1])
+- printf("locked collapses %d -> %d: %d\n", k0, k1, int(locked_collapses[k0][k1]));
+-}
+-#endif
+-
+-static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapses, size_t collapse_count)
+-{
+- const int sort_bits = 11;
+-
+- // fill histogram for counting sort
+- unsigned int histogram[1 << sort_bits];
+- memset(histogram, 0, sizeof(histogram));
+-
+- for (size_t i = 0; i < collapse_count; ++i)
+- {
+- // skip sign bit since error is non-negative
+- unsigned int key = (collapses[i].errorui << 1) >> (32 - sort_bits);
+-
+- histogram[key]++;
+- }
+-
+- // compute offsets based on histogram data
+- size_t histogram_sum = 0;
+-
+- for (size_t i = 0; i < 1 << sort_bits; ++i)
+- {
+- size_t count = histogram[i];
+- histogram[i] = unsigned(histogram_sum);
+- histogram_sum += count;
+- }
+-
+- assert(histogram_sum == collapse_count);
+-
+- // compute sort order based on offsets
+- for (size_t i = 0; i < collapse_count; ++i)
+- {
+- // skip sign bit since error is non-negative
+- unsigned int key = (collapses[i].errorui << 1) >> (32 - sort_bits);
+-
+- sort_order[histogram[key]++] = unsigned(i);
+- }
+-}
+-
+-static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const Vector3* vertex_positions, const EdgeAdjacency& adjacency, size_t triangle_collapse_goal, float error_limit, float& result_error)
+-{
+- size_t edge_collapses = 0;
+- size_t triangle_collapses = 0;
+-
+- // most collapses remove 2 triangles; use this to establish a bound on the pass in terms of error limit
+- // note that edge_collapse_goal is an estimate; triangle_collapse_goal will be used to actually limit collapses
+- size_t edge_collapse_goal = triangle_collapse_goal / 2;
+-
+-#if TRACE
+- size_t stats[4] = {};
+-#endif
+-
+- for (size_t i = 0; i < collapse_count; ++i)
+- {
+- const Collapse& c = collapses[collapse_order[i]];
+-
+- TRACESTATS(0);
+-
+- if (c.error > error_limit)
+- break;
+-
+- if (triangle_collapses >= triangle_collapse_goal)
+- break;
+-
+- // we limit the error in each pass based on the error of optimal last collapse; since many collapses will be locked
+- // as they will share vertices with other successfull collapses, we need to increase the acceptable error by some factor
+- float error_goal = edge_collapse_goal < collapse_count ? 1.5f * collapses[collapse_order[edge_collapse_goal]].error : FLT_MAX;
+-
+- // on average, each collapse is expected to lock 6 other collapses; to avoid degenerate passes on meshes with odd
+- // topology, we only abort if we got over 1/6 collapses accordingly.
+- if (c.error > error_goal && triangle_collapses > triangle_collapse_goal / 6)
+- break;
+-
+- unsigned int i0 = c.v0;
+- unsigned int i1 = c.v1;
+-
+- unsigned int r0 = remap[i0];
+- unsigned int r1 = remap[i1];
+-
+- // we don't collapse vertices that had source or target vertex involved in a collapse
+- // it's important to not move the vertices twice since it complicates the tracking/remapping logic
+- // it's important to not move other vertices towards a moved vertex to preserve error since we don't re-rank collapses mid-pass
+- if (collapse_locked[r0] | collapse_locked[r1])
+- {
+- TRACESTATS(1);
+- continue;
+- }
+-
+- if (hasTriangleFlips(adjacency, vertex_positions, collapse_remap, r0, r1))
+- {
+- // adjust collapse goal since this collapse is invalid and shouldn't factor into error goal
+- edge_collapse_goal++;
+-
+- TRACESTATS(2);
+- continue;
+- }
+-
+- assert(collapse_remap[r0] == r0);
+- assert(collapse_remap[r1] == r1);
+-
+- quadricAdd(vertex_quadrics[r1], vertex_quadrics[r0]);
+-
+- if (vertex_kind[i0] == Kind_Complex)
+- {
+- unsigned int v = i0;
+-
+- do
+- {
+- collapse_remap[v] = r1;
+- v = wedge[v];
+- } while (v != i0);
+- }
+- else if (vertex_kind[i0] == Kind_Seam)
+- {
+- // remap v0 to v1 and seam pair of v0 to seam pair of v1
+- unsigned int s0 = wedge[i0];
+- unsigned int s1 = wedge[i1];
+-
+- assert(s0 != i0 && s1 != i1);
+- assert(wedge[s0] == i0 && wedge[s1] == i1);
+-
+- collapse_remap[i0] = i1;
+- collapse_remap[s0] = s1;
+- }
+- else
+- {
+- assert(wedge[i0] == i0);
+-
+- collapse_remap[i0] = i1;
+- }
+-
+- collapse_locked[r0] = 1;
+- collapse_locked[r1] = 1;
+-
+- // border edges collapse 1 triangle, other edges collapse 2 or more
+- triangle_collapses += (vertex_kind[i0] == Kind_Border) ? 1 : 2;
+- edge_collapses++;
+-
+- result_error = result_error < c.error ? c.error : result_error;
+- }
+-
+-#if TRACE
+- float error_goal_perfect = edge_collapse_goal < collapse_count ? collapses[collapse_order[edge_collapse_goal]].error : 0.f;
+-
+- printf("removed %d triangles, error %e (goal %e); evaluated %d/%d collapses (done %d, skipped %d, invalid %d)\n",
+- int(triangle_collapses), sqrtf(result_error), sqrtf(error_goal_perfect),
+- int(stats[0]), int(collapse_count), int(edge_collapses), int(stats[1]), int(stats[2]));
+-#endif
+-
+- return edge_collapses;
+-}
+-
+-static size_t remapIndexBuffer(unsigned int* indices, size_t index_count, const unsigned int* collapse_remap)
+-{
+- size_t write = 0;
+-
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- unsigned int v0 = collapse_remap[indices[i + 0]];
+- unsigned int v1 = collapse_remap[indices[i + 1]];
+- unsigned int v2 = collapse_remap[indices[i + 2]];
+-
+- // we never move the vertex twice during a single pass
+- assert(collapse_remap[v0] == v0);
+- assert(collapse_remap[v1] == v1);
+- assert(collapse_remap[v2] == v2);
+-
+- if (v0 != v1 && v0 != v2 && v1 != v2)
+- {
+- indices[write + 0] = v0;
+- indices[write + 1] = v1;
+- indices[write + 2] = v2;
+- write += 3;
+- }
+- }
+-
+- return write;
+-}
+-
+-static void remapEdgeLoops(unsigned int* loop, size_t vertex_count, const unsigned int* collapse_remap)
+-{
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- if (loop[i] != ~0u)
+- {
+- unsigned int l = loop[i];
+- unsigned int r = collapse_remap[l];
+-
+- // i == r is a special case when the seam edge is collapsed in a direction opposite to where loop goes
+- loop[i] = (i == r) ? loop[l] : r;
+- }
+- }
+-}
+-
+-struct CellHasher
+-{
+- const unsigned int* vertex_ids;
+-
+- size_t hash(unsigned int i) const
+- {
+- unsigned int h = vertex_ids[i];
+-
+- // MurmurHash2 finalizer
+- h ^= h >> 13;
+- h *= 0x5bd1e995;
+- h ^= h >> 15;
+- return h;
+- }
+-
+- bool equal(unsigned int lhs, unsigned int rhs) const
+- {
+- return vertex_ids[lhs] == vertex_ids[rhs];
+- }
+-};
+-
+-struct IdHasher
+-{
+- size_t hash(unsigned int id) const
+- {
+- unsigned int h = id;
+-
+- // MurmurHash2 finalizer
+- h ^= h >> 13;
+- h *= 0x5bd1e995;
+- h ^= h >> 15;
+- return h;
+- }
+-
+- bool equal(unsigned int lhs, unsigned int rhs) const
+- {
+- return lhs == rhs;
+- }
+-};
+-
+-struct TriangleHasher
+-{
+- const unsigned int* indices;
+-
+- size_t hash(unsigned int i) const
+- {
+- const unsigned int* tri = indices + i * 3;
+-
+- // Optimized Spatial Hashing for Collision Detection of Deformable Objects
+- return (tri[0] * 73856093) ^ (tri[1] * 19349663) ^ (tri[2] * 83492791);
+- }
+-
+- bool equal(unsigned int lhs, unsigned int rhs) const
+- {
+- const unsigned int* lt = indices + lhs * 3;
+- const unsigned int* rt = indices + rhs * 3;
+-
+- return lt[0] == rt[0] && lt[1] == rt[1] && lt[2] == rt[2];
+- }
+-};
+-
+-static void computeVertexIds(unsigned int* vertex_ids, const Vector3* vertex_positions, size_t vertex_count, int grid_size)
+-{
+- assert(grid_size >= 1 && grid_size <= 1024);
+- float cell_scale = float(grid_size - 1);
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- const Vector3& v = vertex_positions[i];
+-
+- int xi = int(v.x * cell_scale + 0.5f);
+- int yi = int(v.y * cell_scale + 0.5f);
+- int zi = int(v.z * cell_scale + 0.5f);
+-
+- vertex_ids[i] = (xi << 20) | (yi << 10) | zi;
+- }
+-}
+-
+-static size_t countTriangles(const unsigned int* vertex_ids, const unsigned int* indices, size_t index_count)
+-{
+- size_t result = 0;
+-
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- unsigned int id0 = vertex_ids[indices[i + 0]];
+- unsigned int id1 = vertex_ids[indices[i + 1]];
+- unsigned int id2 = vertex_ids[indices[i + 2]];
+-
+- result += (id0 != id1) & (id0 != id2) & (id1 != id2);
+- }
+-
+- return result;
+-}
+-
+-static size_t fillVertexCells(unsigned int* table, size_t table_size, unsigned int* vertex_cells, const unsigned int* vertex_ids, size_t vertex_count)
+-{
+- CellHasher hasher = {vertex_ids};
+-
+- memset(table, -1, table_size * sizeof(unsigned int));
+-
+- size_t result = 0;
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- unsigned int* entry = hashLookup2(table, table_size, hasher, unsigned(i), ~0u);
+-
+- if (*entry == ~0u)
+- {
+- *entry = unsigned(i);
+- vertex_cells[i] = unsigned(result++);
+- }
+- else
+- {
+- vertex_cells[i] = vertex_cells[*entry];
+- }
+- }
+-
+- return result;
+-}
+-
+-static size_t countVertexCells(unsigned int* table, size_t table_size, const unsigned int* vertex_ids, size_t vertex_count)
+-{
+- IdHasher hasher;
+-
+- memset(table, -1, table_size * sizeof(unsigned int));
+-
+- size_t result = 0;
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- unsigned int id = vertex_ids[i];
+- unsigned int* entry = hashLookup2(table, table_size, hasher, id, ~0u);
+-
+- result += (*entry == ~0u);
+- *entry = id;
+- }
+-
+- return result;
+-}
+-
+-static void fillCellQuadrics(Quadric* cell_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* vertex_cells)
+-{
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- unsigned int i0 = indices[i + 0];
+- unsigned int i1 = indices[i + 1];
+- unsigned int i2 = indices[i + 2];
+-
+- unsigned int c0 = vertex_cells[i0];
+- unsigned int c1 = vertex_cells[i1];
+- unsigned int c2 = vertex_cells[i2];
+-
+- bool single_cell = (c0 == c1) & (c0 == c2);
+-
+- Quadric Q;
+- quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], single_cell ? 3.f : 1.f);
+-
+- if (single_cell)
+- {
+- quadricAdd(cell_quadrics[c0], Q);
+- }
+- else
+- {
+- quadricAdd(cell_quadrics[c0], Q);
+- quadricAdd(cell_quadrics[c1], Q);
+- quadricAdd(cell_quadrics[c2], Q);
+- }
+- }
+-}
+-
+-static void fillCellQuadrics(Quadric* cell_quadrics, const Vector3* vertex_positions, size_t vertex_count, const unsigned int* vertex_cells)
+-{
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- unsigned int c = vertex_cells[i];
+- const Vector3& v = vertex_positions[i];
+-
+- Quadric Q;
+- quadricFromPoint(Q, v.x, v.y, v.z, 1.f);
+-
+- quadricAdd(cell_quadrics[c], Q);
+- }
+-}
+-
+-static void fillCellRemap(unsigned int* cell_remap, float* cell_errors, size_t cell_count, const unsigned int* vertex_cells, const Quadric* cell_quadrics, const Vector3* vertex_positions, size_t vertex_count)
+-{
+- memset(cell_remap, -1, cell_count * sizeof(unsigned int));
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- unsigned int cell = vertex_cells[i];
+- float error = quadricError(cell_quadrics[cell], vertex_positions[i]);
+-
+- if (cell_remap[cell] == ~0u || cell_errors[cell] > error)
+- {
+- cell_remap[cell] = unsigned(i);
+- cell_errors[cell] = error;
+- }
+- }
+-}
+-
+-static size_t filterTriangles(unsigned int* destination, unsigned int* tritable, size_t tritable_size, const unsigned int* indices, size_t index_count, const unsigned int* vertex_cells, const unsigned int* cell_remap)
+-{
+- TriangleHasher hasher = {destination};
+-
+- memset(tritable, -1, tritable_size * sizeof(unsigned int));
+-
+- size_t result = 0;
+-
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- unsigned int c0 = vertex_cells[indices[i + 0]];
+- unsigned int c1 = vertex_cells[indices[i + 1]];
+- unsigned int c2 = vertex_cells[indices[i + 2]];
+-
+- if (c0 != c1 && c0 != c2 && c1 != c2)
+- {
+- unsigned int a = cell_remap[c0];
+- unsigned int b = cell_remap[c1];
+- unsigned int c = cell_remap[c2];
+-
+- if (b < a && b < c)
+- {
+- unsigned int t = a;
+- a = b, b = c, c = t;
+- }
+- else if (c < a && c < b)
+- {
+- unsigned int t = c;
+- c = b, b = a, a = t;
+- }
+-
+- destination[result * 3 + 0] = a;
+- destination[result * 3 + 1] = b;
+- destination[result * 3 + 2] = c;
+-
+- unsigned int* entry = hashLookup2(tritable, tritable_size, hasher, unsigned(result), ~0u);
+-
+- if (*entry == ~0u)
+- *entry = unsigned(result++);
+- }
+- }
+-
+- return result * 3;
+-}
+-
+-static float interpolate(float y, float x0, float y0, float x1, float y1, float x2, float y2)
+-{
+- // three point interpolation from "revenge of interpolation search" paper
+- float num = (y1 - y) * (x1 - x2) * (x1 - x0) * (y2 - y0);
+- float den = (y2 - y) * (x1 - x2) * (y0 - y1) + (y0 - y) * (x1 - x0) * (y1 - y2);
+- return x1 + num / den;
+-}
+-
+-} // namespace meshopt
+-
+-#ifndef NDEBUG
+-// Note: this is only exposed for debug visualization purposes; do *not* use these in debug builds
+-MESHOPTIMIZER_API unsigned char* meshopt_simplifyDebugKind = 0;
+-MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoop = 0;
+-MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoopBack = 0;
+-#endif
+-
+-size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* out_result_error)
+-{
+- using namespace meshopt;
+-
+- assert(index_count % 3 == 0);
+- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+- assert(vertex_positions_stride % sizeof(float) == 0);
+- assert(target_index_count <= index_count);
+- assert((options & ~(meshopt_SimplifyLockBorder)) == 0);
+-
+- meshopt_Allocator allocator;
+-
+- unsigned int* result = destination;
+-
+- // build adjacency information
+- EdgeAdjacency adjacency = {};
+- prepareEdgeAdjacency(adjacency, index_count, vertex_count, allocator);
+- updateEdgeAdjacency(adjacency, indices, index_count, vertex_count, NULL);
+-
+- // build position remap that maps each vertex to the one with identical position
+- unsigned int* remap = allocator.allocate(vertex_count);
+- unsigned int* wedge = allocator.allocate(vertex_count);
+- buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, allocator);
+-
+- // classify vertices; vertex kind determines collapse rules, see kCanCollapse
+- unsigned char* vertex_kind = allocator.allocate(vertex_count);
+- unsigned int* loop = allocator.allocate(vertex_count);
+- unsigned int* loopback = allocator.allocate(vertex_count);
+- classifyVertices(vertex_kind, loop, loopback, vertex_count, adjacency, remap, wedge, options);
+-
+-#if TRACE
+- size_t unique_positions = 0;
+- for (size_t i = 0; i < vertex_count; ++i)
+- unique_positions += remap[i] == i;
+-
+- printf("position remap: %d vertices => %d positions\n", int(vertex_count), int(unique_positions));
+-
+- size_t kinds[Kind_Count] = {};
+- for (size_t i = 0; i < vertex_count; ++i)
+- kinds[vertex_kind[i]] += remap[i] == i;
+-
+- printf("kinds: manifold %d, border %d, seam %d, complex %d, locked %d\n",
+- int(kinds[Kind_Manifold]), int(kinds[Kind_Border]), int(kinds[Kind_Seam]), int(kinds[Kind_Complex]), int(kinds[Kind_Locked]));
+-#endif
+-
+- Vector3* vertex_positions = allocator.allocate(vertex_count);
+- rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride);
+-
+- Quadric* vertex_quadrics = allocator.allocate(vertex_count);
+- memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric));
+-
+- fillFaceQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap);
+- fillEdgeQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap, vertex_kind, loop, loopback);
+-
+- if (result != indices)
+- memcpy(result, indices, index_count * sizeof(unsigned int));
+-
+-#if TRACE
+- size_t pass_count = 0;
+-#endif
+-
+- Collapse* edge_collapses = allocator.allocate(index_count);
+- unsigned int* collapse_order = allocator.allocate(index_count);
+- unsigned int* collapse_remap = allocator.allocate(vertex_count);
+- unsigned char* collapse_locked = allocator.allocate(vertex_count);
+-
+- size_t result_count = index_count;
+- float result_error = 0;
+-
+- // target_error input is linear; we need to adjust it to match quadricError units
+- float error_limit = target_error * target_error;
+-
+- while (result_count > target_index_count)
+- {
+- // note: throughout the simplification process adjacency structure reflects welded topology for result-in-progress
+- updateEdgeAdjacency(adjacency, result, result_count, vertex_count, remap);
+-
+- size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, result, result_count, remap, vertex_kind, loop);
+-
+- // no edges can be collapsed any more due to topology restrictions
+- if (edge_collapse_count == 0)
+- break;
+-
+- rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_quadrics, remap);
+-
+-#if TRACE > 1
+- dumpEdgeCollapses(edge_collapses, edge_collapse_count, vertex_kind);
+-#endif
+-
+- sortEdgeCollapses(collapse_order, edge_collapses, edge_collapse_count);
+-
+- size_t triangle_collapse_goal = (result_count - target_index_count) / 3;
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- collapse_remap[i] = unsigned(i);
+-
+- memset(collapse_locked, 0, vertex_count);
+-
+-#if TRACE
+- printf("pass %d: ", int(pass_count++));
+-#endif
+-
+- size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, vertex_positions, adjacency, triangle_collapse_goal, error_limit, result_error);
+-
+- // no edges can be collapsed any more due to hitting the error limit or triangle collapse limit
+- if (collapses == 0)
+- break;
+-
+- remapEdgeLoops(loop, vertex_count, collapse_remap);
+- remapEdgeLoops(loopback, vertex_count, collapse_remap);
+-
+- size_t new_count = remapIndexBuffer(result, result_count, collapse_remap);
+- assert(new_count < result_count);
+-
+- result_count = new_count;
+- }
+-
+-#if TRACE
+- printf("result: %d triangles, error: %e; total %d passes\n", int(result_count), sqrtf(result_error), int(pass_count));
+-#endif
+-
+-#if TRACE > 1
+- dumpLockedCollapses(result, result_count, vertex_kind);
+-#endif
+-
+-#ifndef NDEBUG
+- if (meshopt_simplifyDebugKind)
+- memcpy(meshopt_simplifyDebugKind, vertex_kind, vertex_count);
+-
+- if (meshopt_simplifyDebugLoop)
+- memcpy(meshopt_simplifyDebugLoop, loop, vertex_count * sizeof(unsigned int));
+-
+- if (meshopt_simplifyDebugLoopBack)
+- memcpy(meshopt_simplifyDebugLoopBack, loopback, vertex_count * sizeof(unsigned int));
+-#endif
+-
+- // result_error is quadratic; we need to remap it back to linear
+- if (out_result_error)
+- *out_result_error = sqrtf(result_error);
+-
+- return result_count;
+-}
+-
+-size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error)
+-{
+- using namespace meshopt;
+-
+- assert(index_count % 3 == 0);
+- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+- assert(vertex_positions_stride % sizeof(float) == 0);
+- assert(target_index_count <= index_count);
+-
+- // we expect to get ~2 triangles/vertex in the output
+- size_t target_cell_count = target_index_count / 6;
+-
+- meshopt_Allocator allocator;
+-
+- Vector3* vertex_positions = allocator.allocate(vertex_count);
+- rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride);
+-
+- // find the optimal grid size using guided binary search
+-#if TRACE
+- printf("source: %d vertices, %d triangles\n", int(vertex_count), int(index_count / 3));
+- printf("target: %d cells, %d triangles\n", int(target_cell_count), int(target_index_count / 3));
+-#endif
+-
+- unsigned int* vertex_ids = allocator.allocate(vertex_count);
+-
+- const int kInterpolationPasses = 5;
+-
+- // invariant: # of triangles in min_grid <= target_count
+- int min_grid = int(1.f / (target_error < 1e-3f ? 1e-3f : target_error));
+- int max_grid = 1025;
+- size_t min_triangles = 0;
+- size_t max_triangles = index_count / 3;
+-
+- // when we're error-limited, we compute the triangle count for the min. size; this accelerates convergence and provides the correct answer when we can't use a larger grid
+- if (min_grid > 1)
+- {
+- computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid);
+- min_triangles = countTriangles(vertex_ids, indices, index_count);
+- }
+-
+- // instead of starting in the middle, let's guess as to what the answer might be! triangle count usually grows as a square of grid size...
+- int next_grid_size = int(sqrtf(float(target_cell_count)) + 0.5f);
+-
+- for (int pass = 0; pass < 10 + kInterpolationPasses; ++pass)
+- {
+- if (min_triangles >= target_index_count / 3 || max_grid - min_grid <= 1)
+- break;
+-
+- // we clamp the prediction of the grid size to make sure that the search converges
+- int grid_size = next_grid_size;
+- grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid) ? max_grid - 1 : grid_size;
+-
+- computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size);
+- size_t triangles = countTriangles(vertex_ids, indices, index_count);
+-
+-#if TRACE
+- printf("pass %d (%s): grid size %d, triangles %d, %s\n",
+- pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary",
+- grid_size, int(triangles),
+- (triangles <= target_index_count / 3) ? "under" : "over");
+-#endif
+-
+- float tip = interpolate(float(target_index_count / 3), float(min_grid), float(min_triangles), float(grid_size), float(triangles), float(max_grid), float(max_triangles));
+-
+- if (triangles <= target_index_count / 3)
+- {
+- min_grid = grid_size;
+- min_triangles = triangles;
+- }
+- else
+- {
+- max_grid = grid_size;
+- max_triangles = triangles;
+- }
+-
+- // we start by using interpolation search - it usually converges faster
+- // however, interpolation search has a worst case of O(N) so we switch to binary search after a few iterations which converges in O(logN)
+- next_grid_size = (pass < kInterpolationPasses) ? int(tip + 0.5f) : (min_grid + max_grid) / 2;
+- }
+-
+- if (min_triangles == 0)
+- {
+- if (out_result_error)
+- *out_result_error = 1.f;
+-
+- return 0;
+- }
+-
+- // build vertex->cell association by mapping all vertices with the same quantized position to the same cell
+- size_t table_size = hashBuckets2(vertex_count);
+- unsigned int* table = allocator.allocate(table_size);
+-
+- unsigned int* vertex_cells = allocator.allocate(vertex_count);
+-
+- computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid);
+- size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count);
+-
+- // build a quadric for each target cell
+- Quadric* cell_quadrics = allocator.allocate(cell_count);
+- memset(cell_quadrics, 0, cell_count * sizeof(Quadric));
+-
+- fillCellQuadrics(cell_quadrics, indices, index_count, vertex_positions, vertex_cells);
+-
+- // for each target cell, find the vertex with the minimal error
+- unsigned int* cell_remap = allocator.allocate(cell_count);
+- float* cell_errors = allocator.allocate(cell_count);
+-
+- fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_quadrics, vertex_positions, vertex_count);
+-
+- // compute error
+- float result_error = 0.f;
+-
+- for (size_t i = 0; i < cell_count; ++i)
+- result_error = result_error < cell_errors[i] ? cell_errors[i] : result_error;
+-
+- // collapse triangles!
+- // note that we need to filter out triangles that we've already output because we very frequently generate redundant triangles between cells :(
+- size_t tritable_size = hashBuckets2(min_triangles);
+- unsigned int* tritable = allocator.allocate(tritable_size);
+-
+- size_t write = filterTriangles(destination, tritable, tritable_size, indices, index_count, vertex_cells, cell_remap);
+-
+-#if TRACE
+- printf("result: %d cells, %d triangles (%d unfiltered), error %e\n", int(cell_count), int(write / 3), int(min_triangles), sqrtf(result_error));
+-#endif
+-
+- if (out_result_error)
+- *out_result_error = sqrtf(result_error);
+-
+- return write;
+-}
+-
+-size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count)
+-{
+- using namespace meshopt;
+-
+- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+- assert(vertex_positions_stride % sizeof(float) == 0);
+- assert(target_vertex_count <= vertex_count);
+-
+- size_t target_cell_count = target_vertex_count;
+-
+- if (target_cell_count == 0)
+- return 0;
+-
+- meshopt_Allocator allocator;
+-
+- Vector3* vertex_positions = allocator.allocate(vertex_count);
+- rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride);
+-
+- // find the optimal grid size using guided binary search
+-#if TRACE
+- printf("source: %d vertices\n", int(vertex_count));
+- printf("target: %d cells\n", int(target_cell_count));
+-#endif
+-
+- unsigned int* vertex_ids = allocator.allocate(vertex_count);
+-
+- size_t table_size = hashBuckets2(vertex_count);
+- unsigned int* table = allocator.allocate(table_size);
+-
+- const int kInterpolationPasses = 5;
+-
+- // invariant: # of vertices in min_grid <= target_count
+- int min_grid = 0;
+- int max_grid = 1025;
+- size_t min_vertices = 0;
+- size_t max_vertices = vertex_count;
+-
+- // instead of starting in the middle, let's guess as to what the answer might be! triangle count usually grows as a square of grid size...
+- int next_grid_size = int(sqrtf(float(target_cell_count)) + 0.5f);
+-
+- for (int pass = 0; pass < 10 + kInterpolationPasses; ++pass)
+- {
+- assert(min_vertices < target_vertex_count);
+- assert(max_grid - min_grid > 1);
+-
+- // we clamp the prediction of the grid size to make sure that the search converges
+- int grid_size = next_grid_size;
+- grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid) ? max_grid - 1 : grid_size;
+-
+- computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size);
+- size_t vertices = countVertexCells(table, table_size, vertex_ids, vertex_count);
+-
+-#if TRACE
+- printf("pass %d (%s): grid size %d, vertices %d, %s\n",
+- pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary",
+- grid_size, int(vertices),
+- (vertices <= target_vertex_count) ? "under" : "over");
+-#endif
+-
+- float tip = interpolate(float(target_vertex_count), float(min_grid), float(min_vertices), float(grid_size), float(vertices), float(max_grid), float(max_vertices));
+-
+- if (vertices <= target_vertex_count)
+- {
+- min_grid = grid_size;
+- min_vertices = vertices;
+- }
+- else
+- {
+- max_grid = grid_size;
+- max_vertices = vertices;
+- }
+-
+- if (vertices == target_vertex_count || max_grid - min_grid <= 1)
+- break;
+-
+- // we start by using interpolation search - it usually converges faster
+- // however, interpolation search has a worst case of O(N) so we switch to binary search after a few iterations which converges in O(logN)
+- next_grid_size = (pass < kInterpolationPasses) ? int(tip + 0.5f) : (min_grid + max_grid) / 2;
+- }
+-
+- if (min_vertices == 0)
+- return 0;
+-
+- // build vertex->cell association by mapping all vertices with the same quantized position to the same cell
+- unsigned int* vertex_cells = allocator.allocate(vertex_count);
+-
+- computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid);
+- size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count);
+-
+- // build a quadric for each target cell
+- Quadric* cell_quadrics = allocator.allocate(cell_count);
+- memset(cell_quadrics, 0, cell_count * sizeof(Quadric));
+-
+- fillCellQuadrics(cell_quadrics, vertex_positions, vertex_count, vertex_cells);
+-
+- // for each target cell, find the vertex with the minimal error
+- unsigned int* cell_remap = allocator.allocate(cell_count);
+- float* cell_errors = allocator.allocate(cell_count);
+-
+- fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_quadrics, vertex_positions, vertex_count);
+-
+- // copy results to the output
+- assert(cell_count <= target_vertex_count);
+- memcpy(destination, cell_remap, sizeof(unsigned int) * cell_count);
+-
+-#if TRACE
+- printf("result: %d cells\n", int(cell_count));
+-#endif
+-
+- return cell_count;
+-}
+-
+-float meshopt_simplifyScale(const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- using namespace meshopt;
+-
+- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+- assert(vertex_positions_stride % sizeof(float) == 0);
+-
+- float extent = rescalePositions(NULL, vertex_positions, vertex_count, vertex_positions_stride);
+-
+- return extent;
+-}
+diff --git a/src/3rdparty/meshoptimizer/src/spatialorder.cpp b/src/3rdparty/meshoptimizer/src/spatialorder.cpp
+deleted file mode 100644
+index b09f80a..0000000
+--- a/src/3rdparty/meshoptimizer/src/spatialorder.cpp
++++ /dev/null
+@@ -1,194 +0,0 @@
+-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+-#include "meshoptimizer.h"
+-
+-#include
+-#include
+-#include
+-
+-// This work is based on:
+-// Fabian Giesen. Decoding Morton codes. 2009
+-namespace meshopt
+-{
+-
+-// "Insert" two 0 bits after each of the 10 low bits of x
+-inline unsigned int part1By2(unsigned int x)
+-{
+- x &= 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210
+- x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210
+- x = (x ^ (x << 8)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210
+- x = (x ^ (x << 4)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10
+- x = (x ^ (x << 2)) & 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
+- return x;
+-}
+-
+-static void computeOrder(unsigned int* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+-
+- float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
+- float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- const float* v = vertex_positions_data + i * vertex_stride_float;
+-
+- for (int j = 0; j < 3; ++j)
+- {
+- float vj = v[j];
+-
+- minv[j] = minv[j] > vj ? vj : minv[j];
+- maxv[j] = maxv[j] < vj ? vj : maxv[j];
+- }
+- }
+-
+- float extent = 0.f;
+-
+- extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]);
+- extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]);
+- extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]);
+-
+- float scale = extent == 0 ? 0.f : 1.f / extent;
+-
+- // generate Morton order based on the position inside a unit cube
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- const float* v = vertex_positions_data + i * vertex_stride_float;
+-
+- int x = int((v[0] - minv[0]) * scale * 1023.f + 0.5f);
+- int y = int((v[1] - minv[1]) * scale * 1023.f + 0.5f);
+- int z = int((v[2] - minv[2]) * scale * 1023.f + 0.5f);
+-
+- result[i] = part1By2(x) | (part1By2(y) << 1) | (part1By2(z) << 2);
+- }
+-}
+-
+-static void computeHistogram(unsigned int (&hist)[1024][3], const unsigned int* data, size_t count)
+-{
+- memset(hist, 0, sizeof(hist));
+-
+- // compute 3 10-bit histograms in parallel
+- for (size_t i = 0; i < count; ++i)
+- {
+- unsigned int id = data[i];
+-
+- hist[(id >> 0) & 1023][0]++;
+- hist[(id >> 10) & 1023][1]++;
+- hist[(id >> 20) & 1023][2]++;
+- }
+-
+- unsigned int sumx = 0, sumy = 0, sumz = 0;
+-
+- // replace histogram data with prefix histogram sums in-place
+- for (int i = 0; i < 1024; ++i)
+- {
+- unsigned int hx = hist[i][0], hy = hist[i][1], hz = hist[i][2];
+-
+- hist[i][0] = sumx;
+- hist[i][1] = sumy;
+- hist[i][2] = sumz;
+-
+- sumx += hx;
+- sumy += hy;
+- sumz += hz;
+- }
+-
+- assert(sumx == count && sumy == count && sumz == count);
+-}
+-
+-static void radixPass(unsigned int* destination, const unsigned int* source, const unsigned int* keys, size_t count, unsigned int (&hist)[1024][3], int pass)
+-{
+- int bitoff = pass * 10;
+-
+- for (size_t i = 0; i < count; ++i)
+- {
+- unsigned int id = (keys[source[i]] >> bitoff) & 1023;
+-
+- destination[hist[id][pass]++] = source[i];
+- }
+-}
+-
+-} // namespace meshopt
+-
+-void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- using namespace meshopt;
+-
+- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+- assert(vertex_positions_stride % sizeof(float) == 0);
+-
+- meshopt_Allocator allocator;
+-
+- unsigned int* keys = allocator.allocate(vertex_count);
+- computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride);
+-
+- unsigned int hist[1024][3];
+- computeHistogram(hist, keys, vertex_count);
+-
+- unsigned int* scratch = allocator.allocate(vertex_count);
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- destination[i] = unsigned(i);
+-
+- // 3-pass radix sort computes the resulting order into scratch
+- radixPass(scratch, destination, keys, vertex_count, hist, 0);
+- radixPass(destination, scratch, keys, vertex_count, hist, 1);
+- radixPass(scratch, destination, keys, vertex_count, hist, 2);
+-
+- // since our remap table is mapping old=>new, we need to reverse it
+- for (size_t i = 0; i < vertex_count; ++i)
+- destination[scratch[i]] = unsigned(i);
+-}
+-
+-void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+-{
+- using namespace meshopt;
+-
+- assert(index_count % 3 == 0);
+- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+- assert(vertex_positions_stride % sizeof(float) == 0);
+-
+- (void)vertex_count;
+-
+- size_t face_count = index_count / 3;
+- size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+-
+- meshopt_Allocator allocator;
+-
+- float* centroids = allocator.allocate(face_count * 3);
+-
+- for (size_t i = 0; i < face_count; ++i)
+- {
+- unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+- assert(a < vertex_count && b < vertex_count && c < vertex_count);
+-
+- const float* va = vertex_positions + a * vertex_stride_float;
+- const float* vb = vertex_positions + b * vertex_stride_float;
+- const float* vc = vertex_positions + c * vertex_stride_float;
+-
+- centroids[i * 3 + 0] = (va[0] + vb[0] + vc[0]) / 3.f;
+- centroids[i * 3 + 1] = (va[1] + vb[1] + vc[1]) / 3.f;
+- centroids[i * 3 + 2] = (va[2] + vb[2] + vc[2]) / 3.f;
+- }
+-
+- unsigned int* remap = allocator.allocate(face_count);
+-
+- meshopt_spatialSortRemap(remap, centroids, face_count, sizeof(float) * 3);
+-
+- // support in-order remap
+- if (destination == indices)
+- {
+- unsigned int* indices_copy = allocator.allocate(index_count);
+- memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
+- indices = indices_copy;
+- }
+-
+- for (size_t i = 0; i < face_count; ++i)
+- {
+- unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+- unsigned int r = remap[i];
+-
+- destination[r * 3 + 0] = a;
+- destination[r * 3 + 1] = b;
+- destination[r * 3 + 2] = c;
+- }
+-}
+diff --git a/src/3rdparty/meshoptimizer/src/stripifier.cpp b/src/3rdparty/meshoptimizer/src/stripifier.cpp
+deleted file mode 100644
+index 8ce17ef..0000000
+--- a/src/3rdparty/meshoptimizer/src/stripifier.cpp
++++ /dev/null
+@@ -1,295 +0,0 @@
+-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+-#include "meshoptimizer.h"
+-
+-#include
+-#include
+-#include
+-
+-// This work is based on:
+-// Francine Evans, Steven Skiena and Amitabh Varshney. Optimizing Triangle Strips for Fast Rendering. 1996
+-namespace meshopt
+-{
+-
+-static unsigned int findStripFirst(const unsigned int buffer[][3], unsigned int buffer_size, const unsigned int* valence)
+-{
+- unsigned int index = 0;
+- unsigned int iv = ~0u;
+-
+- for (size_t i = 0; i < buffer_size; ++i)
+- {
+- unsigned int va = valence[buffer[i][0]], vb = valence[buffer[i][1]], vc = valence[buffer[i][2]];
+- unsigned int v = (va < vb && va < vc) ? va : (vb < vc) ? vb : vc;
+-
+- if (v < iv)
+- {
+- index = unsigned(i);
+- iv = v;
+- }
+- }
+-
+- return index;
+-}
+-
+-static int findStripNext(const unsigned int buffer[][3], unsigned int buffer_size, unsigned int e0, unsigned int e1)
+-{
+- for (size_t i = 0; i < buffer_size; ++i)
+- {
+- unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
+-
+- if (e0 == a && e1 == b)
+- return (int(i) << 2) | 2;
+- else if (e0 == b && e1 == c)
+- return (int(i) << 2) | 0;
+- else if (e0 == c && e1 == a)
+- return (int(i) << 2) | 1;
+- }
+-
+- return -1;
+-}
+-
+-} // namespace meshopt
+-
+-size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index)
+-{
+- assert(destination != indices);
+- assert(index_count % 3 == 0);
+-
+- using namespace meshopt;
+-
+- meshopt_Allocator allocator;
+-
+- const size_t buffer_capacity = 8;
+-
+- unsigned int buffer[buffer_capacity][3] = {};
+- unsigned int buffer_size = 0;
+-
+- size_t index_offset = 0;
+-
+- unsigned int strip[2] = {};
+- unsigned int parity = 0;
+-
+- size_t strip_size = 0;
+-
+- // compute vertex valence; this is used to prioritize starting triangle for strips
+- unsigned int* valence = allocator.allocate(vertex_count);
+- memset(valence, 0, vertex_count * sizeof(unsigned int));
+-
+- for (size_t i = 0; i < index_count; ++i)
+- {
+- unsigned int index = indices[i];
+- assert(index < vertex_count);
+-
+- valence[index]++;
+- }
+-
+- int next = -1;
+-
+- while (buffer_size > 0 || index_offset < index_count)
+- {
+- assert(next < 0 || (size_t(next >> 2) < buffer_size && (next & 3) < 3));
+-
+- // fill triangle buffer
+- while (buffer_size < buffer_capacity && index_offset < index_count)
+- {
+- buffer[buffer_size][0] = indices[index_offset + 0];
+- buffer[buffer_size][1] = indices[index_offset + 1];
+- buffer[buffer_size][2] = indices[index_offset + 2];
+-
+- buffer_size++;
+- index_offset += 3;
+- }
+-
+- assert(buffer_size > 0);
+-
+- if (next >= 0)
+- {
+- unsigned int i = next >> 2;
+- unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
+- unsigned int v = buffer[i][next & 3];
+-
+- // ordered removal from the buffer
+- memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0]));
+- buffer_size--;
+-
+- // update vertex valences for strip start heuristic
+- valence[a]--;
+- valence[b]--;
+- valence[c]--;
+-
+- // find next triangle (note that edge order flips on every iteration)
+- // in some cases we need to perform a swap to pick a different outgoing triangle edge
+- // for [a b c], the default strip edge is [b c], but we might want to use [a c]
+- int cont = findStripNext(buffer, buffer_size, parity ? strip[1] : v, parity ? v : strip[1]);
+- int swap = cont < 0 ? findStripNext(buffer, buffer_size, parity ? v : strip[0], parity ? strip[0] : v) : -1;
+-
+- if (cont < 0 && swap >= 0)
+- {
+- // [a b c] => [a b a c]
+- destination[strip_size++] = strip[0];
+- destination[strip_size++] = v;
+-
+- // next strip has same winding
+- // ? a b => b a v
+- strip[1] = v;
+-
+- next = swap;
+- }
+- else
+- {
+- // emit the next vertex in the strip
+- destination[strip_size++] = v;
+-
+- // next strip has flipped winding
+- strip[0] = strip[1];
+- strip[1] = v;
+- parity ^= 1;
+-
+- next = cont;
+- }
+- }
+- else
+- {
+- // if we didn't find anything, we need to find the next new triangle
+- // we use a heuristic to maximize the strip length
+- unsigned int i = findStripFirst(buffer, buffer_size, &valence[0]);
+- unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2];
+-
+- // ordered removal from the buffer
+- memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0]));
+- buffer_size--;
+-
+- // update vertex valences for strip start heuristic
+- valence[a]--;
+- valence[b]--;
+- valence[c]--;
+-
+- // we need to pre-rotate the triangle so that we will find a match in the existing buffer on the next iteration
+- int ea = findStripNext(buffer, buffer_size, c, b);
+- int eb = findStripNext(buffer, buffer_size, a, c);
+- int ec = findStripNext(buffer, buffer_size, b, a);
+-
+- // in some cases we can have several matching edges; since we can pick any edge, we pick the one with the smallest
+- // triangle index in the buffer. this reduces the effect of stripification on ACMR and additionally - for unclear
+- // reasons - slightly improves the stripification efficiency
+- int mine = INT_MAX;
+- mine = (ea >= 0 && mine > ea) ? ea : mine;
+- mine = (eb >= 0 && mine > eb) ? eb : mine;
+- mine = (ec >= 0 && mine > ec) ? ec : mine;
+-
+- if (ea == mine)
+- {
+- // keep abc
+- next = ea;
+- }
+- else if (eb == mine)
+- {
+- // abc -> bca
+- unsigned int t = a;
+- a = b, b = c, c = t;
+-
+- next = eb;
+- }
+- else if (ec == mine)
+- {
+- // abc -> cab
+- unsigned int t = c;
+- c = b, b = a, a = t;
+-
+- next = ec;
+- }
+-
+- if (restart_index)
+- {
+- if (strip_size)
+- destination[strip_size++] = restart_index;
+-
+- destination[strip_size++] = a;
+- destination[strip_size++] = b;
+- destination[strip_size++] = c;
+-
+- // new strip always starts with the same edge winding
+- strip[0] = b;
+- strip[1] = c;
+- parity = 1;
+- }
+- else
+- {
+- if (strip_size)
+- {
+- // connect last strip using degenerate triangles
+- destination[strip_size++] = strip[1];
+- destination[strip_size++] = a;
+- }
+-
+- // note that we may need to flip the emitted triangle based on parity
+- // we always end up with outgoing edge "cb" in the end
+- unsigned int e0 = parity ? c : b;
+- unsigned int e1 = parity ? b : c;
+-
+- destination[strip_size++] = a;
+- destination[strip_size++] = e0;
+- destination[strip_size++] = e1;
+-
+- strip[0] = e0;
+- strip[1] = e1;
+- parity ^= 1;
+- }
+- }
+- }
+-
+- return strip_size;
+-}
+-
+-size_t meshopt_stripifyBound(size_t index_count)
+-{
+- assert(index_count % 3 == 0);
+-
+- // worst case without restarts is 2 degenerate indices and 3 indices per triangle
+- // worst case with restarts is 1 restart index and 3 indices per triangle
+- return (index_count / 3) * 5;
+-}
+-
+-size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index)
+-{
+- assert(destination != indices);
+-
+- size_t offset = 0;
+- size_t start = 0;
+-
+- for (size_t i = 0; i < index_count; ++i)
+- {
+- if (restart_index && indices[i] == restart_index)
+- {
+- start = i + 1;
+- }
+- else if (i - start >= 2)
+- {
+- unsigned int a = indices[i - 2], b = indices[i - 1], c = indices[i];
+-
+- // flip winding for odd triangles
+- if ((i - start) & 1)
+- {
+- unsigned int t = a;
+- a = b, b = t;
+- }
+-
+- // although we use restart indices, strip swaps still produce degenerate triangles, so skip them
+- if (a != b && a != c && b != c)
+- {
+- destination[offset + 0] = a;
+- destination[offset + 1] = b;
+- destination[offset + 2] = c;
+- offset += 3;
+- }
+- }
+- }
+-
+- return offset;
+-}
+-
+-size_t meshopt_unstripifyBound(size_t index_count)
+-{
+- assert(index_count == 0 || index_count >= 3);
+-
+- return (index_count == 0) ? 0 : (index_count - 2) * 3;
+-}
+diff --git a/src/3rdparty/meshoptimizer/src/vcacheanalyzer.cpp b/src/3rdparty/meshoptimizer/src/vcacheanalyzer.cpp
+deleted file mode 100644
+index 3682743..0000000
+--- a/src/3rdparty/meshoptimizer/src/vcacheanalyzer.cpp
++++ /dev/null
+@@ -1,73 +0,0 @@
+-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+-#include "meshoptimizer.h"
+-
+-#include
+-#include
+-
+-meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size)
+-{
+- assert(index_count % 3 == 0);
+- assert(cache_size >= 3);
+- assert(warp_size == 0 || warp_size >= 3);
+-
+- meshopt_Allocator allocator;
+-
+- meshopt_VertexCacheStatistics result = {};
+-
+- unsigned int warp_offset = 0;
+- unsigned int primgroup_offset = 0;
+-
+- unsigned int* cache_timestamps = allocator.allocate(vertex_count);
+- memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
+-
+- unsigned int timestamp = cache_size + 1;
+-
+- for (size_t i = 0; i < index_count; i += 3)
+- {
+- unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
+- assert(a < vertex_count && b < vertex_count && c < vertex_count);
+-
+- bool ac = (timestamp - cache_timestamps[a]) > cache_size;
+- bool bc = (timestamp - cache_timestamps[b]) > cache_size;
+- bool cc = (timestamp - cache_timestamps[c]) > cache_size;
+-
+- // flush cache if triangle doesn't fit into warp or into the primitive buffer
+- if ((primgroup_size && primgroup_offset == primgroup_size) || (warp_size && warp_offset + ac + bc + cc > warp_size))
+- {
+- result.warps_executed += warp_offset > 0;
+-
+- warp_offset = 0;
+- primgroup_offset = 0;
+-
+- // reset cache
+- timestamp += cache_size + 1;
+- }
+-
+- // update cache and add vertices to warp
+- for (int j = 0; j < 3; ++j)
+- {
+- unsigned int index = indices[i + j];
+-
+- if (timestamp - cache_timestamps[index] > cache_size)
+- {
+- cache_timestamps[index] = timestamp++;
+- result.vertices_transformed++;
+- warp_offset++;
+- }
+- }
+-
+- primgroup_offset++;
+- }
+-
+- size_t unique_vertex_count = 0;
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- unique_vertex_count += cache_timestamps[i] > 0;
+-
+- result.warps_executed += warp_offset > 0;
+-
+- result.acmr = index_count == 0 ? 0 : float(result.vertices_transformed) / float(index_count / 3);
+- result.atvr = unique_vertex_count == 0 ? 0 : float(result.vertices_transformed) / float(unique_vertex_count);
+-
+- return result;
+-}
+diff --git a/src/3rdparty/meshoptimizer/src/vcacheoptimizer.cpp b/src/3rdparty/meshoptimizer/src/vcacheoptimizer.cpp
+deleted file mode 100644
+index fb8ade4..0000000
+--- a/src/3rdparty/meshoptimizer/src/vcacheoptimizer.cpp
++++ /dev/null
+@@ -1,473 +0,0 @@
+-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+-#include "meshoptimizer.h"
+-
+-#include
+-#include
+-
+-// This work is based on:
+-// Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006
+-// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
+-namespace meshopt
+-{
+-
+-const size_t kCacheSizeMax = 16;
+-const size_t kValenceMax = 8;
+-
+-struct VertexScoreTable
+-{
+- float cache[1 + kCacheSizeMax];
+- float live[1 + kValenceMax];
+-};
+-
+-// Tuned to minimize the ACMR of a GPU that has a cache profile similar to NVidia and AMD
+-static const VertexScoreTable kVertexScoreTable = {
+- {0.f, 0.779f, 0.791f, 0.789f, 0.981f, 0.843f, 0.726f, 0.847f, 0.882f, 0.867f, 0.799f, 0.642f, 0.613f, 0.600f, 0.568f, 0.372f, 0.234f},
+- {0.f, 0.995f, 0.713f, 0.450f, 0.404f, 0.059f, 0.005f, 0.147f, 0.006f},
+-};
+-
+-// Tuned to minimize the encoded index buffer size
+-static const VertexScoreTable kVertexScoreTableStrip = {
+- {0.f, 1.000f, 1.000f, 1.000f, 0.453f, 0.561f, 0.490f, 0.459f, 0.179f, 0.526f, 0.000f, 0.227f, 0.184f, 0.490f, 0.112f, 0.050f, 0.131f},
+- {0.f, 0.956f, 0.786f, 0.577f, 0.558f, 0.618f, 0.549f, 0.499f, 0.489f},
+-};
+-
+-struct TriangleAdjacency
+-{
+- unsigned int* counts;
+- unsigned int* offsets;
+- unsigned int* data;
+-};
+-
+-static void buildTriangleAdjacency(TriangleAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
+-{
+- size_t face_count = index_count / 3;
+-
+- // allocate arrays
+- adjacency.counts = allocator.allocate(vertex_count);
+- adjacency.offsets = allocator.allocate(vertex_count);
+- adjacency.data = allocator.allocate(index_count);
+-
+- // fill triangle counts
+- memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
+-
+- for (size_t i = 0; i < index_count; ++i)
+- {
+- assert(indices[i] < vertex_count);
+-
+- adjacency.counts[indices[i]]++;
+- }
+-
+- // fill offset table
+- unsigned int offset = 0;
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- adjacency.offsets[i] = offset;
+- offset += adjacency.counts[i];
+- }
+-
+- assert(offset == index_count);
+-
+- // fill triangle data
+- for (size_t i = 0; i < face_count; ++i)
+- {
+- unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+-
+- adjacency.data[adjacency.offsets[a]++] = unsigned(i);
+- adjacency.data[adjacency.offsets[b]++] = unsigned(i);
+- adjacency.data[adjacency.offsets[c]++] = unsigned(i);
+- }
+-
+- // fix offsets that have been disturbed by the previous pass
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- assert(adjacency.offsets[i] >= adjacency.counts[i]);
+-
+- adjacency.offsets[i] -= adjacency.counts[i];
+- }
+-}
+-
+-static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count)
+-{
+- // check dead-end stack
+- while (dead_end_top)
+- {
+- unsigned int vertex = dead_end[--dead_end_top];
+-
+- if (live_triangles[vertex] > 0)
+- return vertex;
+- }
+-
+- // input order
+- while (input_cursor < vertex_count)
+- {
+- if (live_triangles[input_cursor] > 0)
+- return input_cursor;
+-
+- ++input_cursor;
+- }
+-
+- return ~0u;
+-}
+-
+-static unsigned int getNextVertexNeighbour(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size)
+-{
+- unsigned int best_candidate = ~0u;
+- int best_priority = -1;
+-
+- for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate)
+- {
+- unsigned int vertex = *next_candidate;
+-
+- // otherwise we don't need to process it
+- if (live_triangles[vertex] > 0)
+- {
+- int priority = 0;
+-
+- // will it be in cache after fanning?
+- if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size)
+- {
+- priority = timestamp - cache_timestamps[vertex]; // position in cache
+- }
+-
+- if (priority > best_priority)
+- {
+- best_candidate = vertex;
+- best_priority = priority;
+- }
+- }
+- }
+-
+- return best_candidate;
+-}
+-
+-static float vertexScore(const VertexScoreTable* table, int cache_position, unsigned int live_triangles)
+-{
+- assert(cache_position >= -1 && cache_position < int(kCacheSizeMax));
+-
+- unsigned int live_triangles_clamped = live_triangles < kValenceMax ? live_triangles : kValenceMax;
+-
+- return table->cache[1 + cache_position] + table->live[live_triangles_clamped];
+-}
+-
+-static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const unsigned char* emitted_flags, size_t face_count)
+-{
+- // input order
+- while (input_cursor < face_count)
+- {
+- if (!emitted_flags[input_cursor])
+- return input_cursor;
+-
+- ++input_cursor;
+- }
+-
+- return ~0u;
+-}
+-
+-} // namespace meshopt
+-
+-void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const meshopt::VertexScoreTable* table)
+-{
+- using namespace meshopt;
+-
+- assert(index_count % 3 == 0);
+-
+- meshopt_Allocator allocator;
+-
+- // guard for empty meshes
+- if (index_count == 0 || vertex_count == 0)
+- return;
+-
+- // support in-place optimization
+- if (destination == indices)
+- {
+- unsigned int* indices_copy = allocator.allocate(index_count);
+- memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
+- indices = indices_copy;
+- }
+-
+- unsigned int cache_size = 16;
+- assert(cache_size <= kCacheSizeMax);
+-
+- size_t face_count = index_count / 3;
+-
+- // build adjacency information
+- TriangleAdjacency adjacency = {};
+- buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
+-
+- // live triangle counts
+- unsigned int* live_triangles = allocator.allocate(vertex_count);
+- memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
+-
+- // emitted flags
+- unsigned char* emitted_flags = allocator.allocate(face_count);
+- memset(emitted_flags, 0, face_count);
+-
+- // compute initial vertex scores
+- float* vertex_scores = allocator.allocate(vertex_count);
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- vertex_scores[i] = vertexScore(table, -1, live_triangles[i]);
+-
+- // compute triangle scores
+- float* triangle_scores = allocator.allocate(face_count);
+-
+- for (size_t i = 0; i < face_count; ++i)
+- {
+- unsigned int a = indices[i * 3 + 0];
+- unsigned int b = indices[i * 3 + 1];
+- unsigned int c = indices[i * 3 + 2];
+-
+- triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c];
+- }
+-
+- unsigned int cache_holder[2 * (kCacheSizeMax + 3)];
+- unsigned int* cache = cache_holder;
+- unsigned int* cache_new = cache_holder + kCacheSizeMax + 3;
+- size_t cache_count = 0;
+-
+- unsigned int current_triangle = 0;
+- unsigned int input_cursor = 1;
+-
+- unsigned int output_triangle = 0;
+-
+- while (current_triangle != ~0u)
+- {
+- assert(output_triangle < face_count);
+-
+- unsigned int a = indices[current_triangle * 3 + 0];
+- unsigned int b = indices[current_triangle * 3 + 1];
+- unsigned int c = indices[current_triangle * 3 + 2];
+-
+- // output indices
+- destination[output_triangle * 3 + 0] = a;
+- destination[output_triangle * 3 + 1] = b;
+- destination[output_triangle * 3 + 2] = c;
+- output_triangle++;
+-
+- // update emitted flags
+- emitted_flags[current_triangle] = true;
+- triangle_scores[current_triangle] = 0;
+-
+- // new triangle
+- size_t cache_write = 0;
+- cache_new[cache_write++] = a;
+- cache_new[cache_write++] = b;
+- cache_new[cache_write++] = c;
+-
+- // old triangles
+- for (size_t i = 0; i < cache_count; ++i)
+- {
+- unsigned int index = cache[i];
+-
+- if (index != a && index != b && index != c)
+- {
+- cache_new[cache_write++] = index;
+- }
+- }
+-
+- unsigned int* cache_temp = cache;
+- cache = cache_new, cache_new = cache_temp;
+- cache_count = cache_write > cache_size ? cache_size : cache_write;
+-
+- // update live triangle counts
+- live_triangles[a]--;
+- live_triangles[b]--;
+- live_triangles[c]--;
+-
+- // remove emitted triangle from adjacency data
+- // this makes sure that we spend less time traversing these lists on subsequent iterations
+- for (size_t k = 0; k < 3; ++k)
+- {
+- unsigned int index = indices[current_triangle * 3 + k];
+-
+- unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
+- size_t neighbours_size = adjacency.counts[index];
+-
+- for (size_t i = 0; i < neighbours_size; ++i)
+- {
+- unsigned int tri = neighbours[i];
+-
+- if (tri == current_triangle)
+- {
+- neighbours[i] = neighbours[neighbours_size - 1];
+- adjacency.counts[index]--;
+- break;
+- }
+- }
+- }
+-
+- unsigned int best_triangle = ~0u;
+- float best_score = 0;
+-
+- // update cache positions, vertex scores and triangle scores, and find next best triangle
+- for (size_t i = 0; i < cache_write; ++i)
+- {
+- unsigned int index = cache[i];
+-
+- int cache_position = i >= cache_size ? -1 : int(i);
+-
+- // update vertex score
+- float score = vertexScore(table, cache_position, live_triangles[index]);
+- float score_diff = score - vertex_scores[index];
+-
+- vertex_scores[index] = score;
+-
+- // update scores of vertex triangles
+- const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[index];
+- const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[index];
+-
+- for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
+- {
+- unsigned int tri = *it;
+- assert(!emitted_flags[tri]);
+-
+- float tri_score = triangle_scores[tri] + score_diff;
+- assert(tri_score > 0);
+-
+- if (best_score < tri_score)
+- {
+- best_triangle = tri;
+- best_score = tri_score;
+- }
+-
+- triangle_scores[tri] = tri_score;
+- }
+- }
+-
+- // step through input triangles in order if we hit a dead-end
+- current_triangle = best_triangle;
+-
+- if (current_triangle == ~0u)
+- {
+- current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count);
+- }
+- }
+-
+- assert(input_cursor == face_count);
+- assert(output_triangle == face_count);
+-}
+-
+-void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
+-{
+- meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTable);
+-}
+-
+-void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
+-{
+- meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTableStrip);
+-}
+-
+-void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
+-{
+- using namespace meshopt;
+-
+- assert(index_count % 3 == 0);
+- assert(cache_size >= 3);
+-
+- meshopt_Allocator allocator;
+-
+- // guard for empty meshes
+- if (index_count == 0 || vertex_count == 0)
+- return;
+-
+- // support in-place optimization
+- if (destination == indices)
+- {
+- unsigned int* indices_copy = allocator.allocate(index_count);
+- memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
+- indices = indices_copy;
+- }
+-
+- size_t face_count = index_count / 3;
+-
+- // build adjacency information
+- TriangleAdjacency adjacency = {};
+- buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
+-
+- // live triangle counts
+- unsigned int* live_triangles = allocator.allocate(vertex_count);
+- memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
+-
+- // cache time stamps
+- unsigned int* cache_timestamps = allocator.allocate(vertex_count);
+- memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
+-
+- // dead-end stack
+- unsigned int* dead_end = allocator.allocate(index_count);
+- unsigned int dead_end_top = 0;
+-
+- // emitted flags
+- unsigned char* emitted_flags = allocator.allocate(face_count);
+- memset(emitted_flags, 0, face_count);
+-
+- unsigned int current_vertex = 0;
+-
+- unsigned int timestamp = cache_size + 1;
+- unsigned int input_cursor = 1; // vertex to restart from in case of dead-end
+-
+- unsigned int output_triangle = 0;
+-
+- while (current_vertex != ~0u)
+- {
+- const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top;
+-
+- // emit all vertex neighbours
+- const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[current_vertex];
+- const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[current_vertex];
+-
+- for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
+- {
+- unsigned int triangle = *it;
+-
+- if (!emitted_flags[triangle])
+- {
+- unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
+-
+- // output indices
+- destination[output_triangle * 3 + 0] = a;
+- destination[output_triangle * 3 + 1] = b;
+- destination[output_triangle * 3 + 2] = c;
+- output_triangle++;
+-
+- // update dead-end stack
+- dead_end[dead_end_top + 0] = a;
+- dead_end[dead_end_top + 1] = b;
+- dead_end[dead_end_top + 2] = c;
+- dead_end_top += 3;
+-
+- // update live triangle counts
+- live_triangles[a]--;
+- live_triangles[b]--;
+- live_triangles[c]--;
+-
+- // update cache info
+- // if vertex is not in cache, put it in cache
+- if (timestamp - cache_timestamps[a] > cache_size)
+- cache_timestamps[a] = timestamp++;
+-
+- if (timestamp - cache_timestamps[b] > cache_size)
+- cache_timestamps[b] = timestamp++;
+-
+- if (timestamp - cache_timestamps[c] > cache_size)
+- cache_timestamps[c] = timestamp++;
+-
+- // update emitted flags
+- emitted_flags[triangle] = true;
+- }
+- }
+-
+- // next candidates are the ones we pushed to dead-end stack just now
+- const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top;
+-
+- // get next vertex
+- current_vertex = getNextVertexNeighbour(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size);
+-
+- if (current_vertex == ~0u)
+- {
+- current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count);
+- }
+- }
+-
+- assert(output_triangle == face_count);
+-}
+diff --git a/src/3rdparty/meshoptimizer/src/vertexcodec.cpp b/src/3rdparty/meshoptimizer/src/vertexcodec.cpp
+deleted file mode 100644
+index 7925ea8..0000000
+--- a/src/3rdparty/meshoptimizer/src/vertexcodec.cpp
++++ /dev/null
+@@ -1,1195 +0,0 @@
+-// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+-#include "meshoptimizer.h"
+-
+-#include
+-#include
+-
+-// The block below auto-detects SIMD ISA that can be used on the target platform
+-#ifndef MESHOPTIMIZER_NO_SIMD
+-
+-// The SIMD implementation requires SSSE3, which can be enabled unconditionally through compiler settings
+-#if defined(__AVX__) || defined(__SSSE3__)
+-#define SIMD_SSE
+-#endif
+-
+-// An experimental implementation using AVX512 instructions; it's only enabled when AVX512 is enabled through compiler settings
+-#if defined(__AVX512VBMI2__) && defined(__AVX512VBMI__) && defined(__AVX512VL__) && defined(__POPCNT__)
+-#undef SIMD_SSE
+-#define SIMD_AVX
+-#endif
+-
+-// MSVC supports compiling SSSE3 code regardless of compile options; we use a cpuid-based scalar fallback
+-#if !defined(SIMD_SSE) && !defined(SIMD_AVX) && defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
+-#define SIMD_SSE
+-#define SIMD_FALLBACK
+-#endif
+-
+-// GCC 4.9+ and clang 3.8+ support targeting SIMD ISA from individual functions; we use a cpuid-based scalar fallback
+-#if !defined(SIMD_SSE) && !defined(SIMD_AVX) && ((defined(__clang__) && __clang_major__ * 100 + __clang_minor__ >= 308) || (defined(__GNUC__) && __GNUC__ * 100 + __GNUC_MINOR__ >= 409)) && (defined(__i386__) || defined(__x86_64__))
+-#define SIMD_SSE
+-#define SIMD_FALLBACK
+-#define SIMD_TARGET __attribute__((target("ssse3")))
+-#endif
+-
+-// GCC/clang define these when NEON support is available
+-#if defined(__ARM_NEON__) || defined(__ARM_NEON)
+-#define SIMD_NEON
+-#endif
+-
+-// On MSVC, we assume that ARM builds always target NEON-capable devices
+-#if !defined(SIMD_NEON) && defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+-#define SIMD_NEON
+-#endif
+-
+-// When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD
+-#if defined(__wasm_simd128__)
+-#define SIMD_WASM
+-#endif
+-
+-#ifndef SIMD_TARGET
+-#define SIMD_TARGET
+-#endif
+-
+-#endif // !MESHOPTIMIZER_NO_SIMD
+-
+-#ifdef SIMD_SSE
+-#include
+-#endif
+-
+-#if defined(SIMD_SSE) && defined(SIMD_FALLBACK)
+-#ifdef _MSC_VER
+-#include // __cpuid
+-#else
+-#include // __cpuid
+-#endif
+-#endif
+-
+-#ifdef SIMD_AVX
+-#include
+-#endif
+-
+-#ifdef SIMD_NEON
+-#if defined(_MSC_VER) && defined(_M_ARM64)
+-#include
+-#else
+-#include
+-#endif
+-#endif
+-
+-#ifdef SIMD_WASM
+-#undef __DEPRECATED
+-#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+-#include
+-#endif
+-
+-#ifdef SIMD_WASM
+-#define wasmx_splat_v32x4(v, i) wasm_v32x4_shuffle(v, v, i, i, i, i)
+-#define wasmx_unpacklo_v8x16(a, b) wasm_v8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23)
+-#define wasmx_unpackhi_v8x16(a, b) wasm_v8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31)
+-#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11)
+-#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15)
+-#define wasmx_unpacklo_v64x2(a, b) wasm_v64x2_shuffle(a, b, 0, 2)
+-#define wasmx_unpackhi_v64x2(a, b) wasm_v64x2_shuffle(a, b, 1, 3)
+-#endif
+-
+-namespace meshopt
+-{
+-
+-const unsigned char kVertexHeader = 0xa0;
+-
+-static int gEncodeVertexVersion = 0;
+-
+-const size_t kVertexBlockSizeBytes = 8192;
+-const size_t kVertexBlockMaxSize = 256;
+-const size_t kByteGroupSize = 16;
+-const size_t kByteGroupDecodeLimit = 24;
+-const size_t kTailMaxSize = 32;
+-
+-static size_t getVertexBlockSize(size_t vertex_size)
+-{
+- // make sure the entire block fits into the scratch buffer
+- size_t result = kVertexBlockSizeBytes / vertex_size;
+-
+- // align to byte group size; we encode each byte as a byte group
+- // if vertex block is misaligned, it results in wasted bytes, so just truncate the block size
+- result &= ~(kByteGroupSize - 1);
+-
+- return (result < kVertexBlockMaxSize) ? result : kVertexBlockMaxSize;
+-}
+-
+-inline unsigned char zigzag8(unsigned char v)
+-{
+- return ((signed char)(v) >> 7) ^ (v << 1);
+-}
+-
+-inline unsigned char unzigzag8(unsigned char v)
+-{
+- return -(v & 1) ^ (v >> 1);
+-}
+-
+-static bool encodeBytesGroupZero(const unsigned char* buffer)
+-{
+- for (size_t i = 0; i < kByteGroupSize; ++i)
+- if (buffer[i])
+- return false;
+-
+- return true;
+-}
+-
+-static size_t encodeBytesGroupMeasure(const unsigned char* buffer, int bits)
+-{
+- assert(bits >= 1 && bits <= 8);
+-
+- if (bits == 1)
+- return encodeBytesGroupZero(buffer) ? 0 : size_t(-1);
+-
+- if (bits == 8)
+- return kByteGroupSize;
+-
+- size_t result = kByteGroupSize * bits / 8;
+-
+- unsigned char sentinel = (1 << bits) - 1;
+-
+- for (size_t i = 0; i < kByteGroupSize; ++i)
+- result += buffer[i] >= sentinel;
+-
+- return result;
+-}
+-
+-static unsigned char* encodeBytesGroup(unsigned char* data, const unsigned char* buffer, int bits)
+-{
+- assert(bits >= 1 && bits <= 8);
+-
+- if (bits == 1)
+- return data;
+-
+- if (bits == 8)
+- {
+- memcpy(data, buffer, kByteGroupSize);
+- return data + kByteGroupSize;
+- }
+-
+- size_t byte_size = 8 / bits;
+- assert(kByteGroupSize % byte_size == 0);
+-
+- // fixed portion: bits bits for each value
+- // variable portion: full byte for each out-of-range value (using 1...1 as sentinel)
+- unsigned char sentinel = (1 << bits) - 1;
+-
+- for (size_t i = 0; i < kByteGroupSize; i += byte_size)
+- {
+- unsigned char byte = 0;
+-
+- for (size_t k = 0; k < byte_size; ++k)
+- {
+- unsigned char enc = (buffer[i + k] >= sentinel) ? sentinel : buffer[i + k];
+-
+- byte <<= bits;
+- byte |= enc;
+- }
+-
+- *data++ = byte;
+- }
+-
+- for (size_t i = 0; i < kByteGroupSize; ++i)
+- {
+- if (buffer[i] >= sentinel)
+- {
+- *data++ = buffer[i];
+- }
+- }
+-
+- return data;
+-}
+-
+-static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end, const unsigned char* buffer, size_t buffer_size)
+-{
+- assert(buffer_size % kByteGroupSize == 0);
+-
+- unsigned char* header = data;
+-
+- // round number of groups to 4 to get number of header bytes
+- size_t header_size = (buffer_size / kByteGroupSize + 3) / 4;
+-
+- if (size_t(data_end - data) < header_size)
+- return 0;
+-
+- data += header_size;
+-
+- memset(header, 0, header_size);
+-
+- for (size_t i = 0; i < buffer_size; i += kByteGroupSize)
+- {
+- if (size_t(data_end - data) < kByteGroupDecodeLimit)
+- return 0;
+-
+- int best_bits = 8;
+- size_t best_size = encodeBytesGroupMeasure(buffer + i, 8);
+-
+- for (int bits = 1; bits < 8; bits *= 2)
+- {
+- size_t size = encodeBytesGroupMeasure(buffer + i, bits);
+-
+- if (size < best_size)
+- {
+- best_bits = bits;
+- best_size = size;
+- }
+- }
+-
+- int bitslog2 = (best_bits == 1) ? 0 : (best_bits == 2) ? 1 : (best_bits == 4) ? 2 : 3;
+- assert((1 << bitslog2) == best_bits);
+-
+- size_t header_offset = i / kByteGroupSize;
+-
+- header[header_offset / 4] |= bitslog2 << ((header_offset % 4) * 2);
+-
+- unsigned char* next = encodeBytesGroup(data, buffer + i, best_bits);
+-
+- assert(data + best_size == next);
+- data = next;
+- }
+-
+- return data;
+-}
+-
+-static unsigned char* encodeVertexBlock(unsigned char* data, unsigned char* data_end, const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256])
+-{
+- assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize);
+-
+- unsigned char buffer[kVertexBlockMaxSize];
+- assert(sizeof(buffer) % kByteGroupSize == 0);
+-
+- // we sometimes encode elements we didn't fill when rounding to kByteGroupSize
+- memset(buffer, 0, sizeof(buffer));
+-
+- for (size_t k = 0; k < vertex_size; ++k)
+- {
+- size_t vertex_offset = k;
+-
+- unsigned char p = last_vertex[k];
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- buffer[i] = zigzag8(vertex_data[vertex_offset] - p);
+-
+- p = vertex_data[vertex_offset];
+-
+- vertex_offset += vertex_size;
+- }
+-
+- data = encodeBytes(data, data_end, buffer, (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1));
+- if (!data)
+- return 0;
+- }
+-
+- memcpy(last_vertex, &vertex_data[vertex_size * (vertex_count - 1)], vertex_size);
+-
+- return data;
+-}
+-
+-#if defined(SIMD_FALLBACK) || (!defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_AVX))
+-static const unsigned char* decodeBytesGroup(const unsigned char* data, unsigned char* buffer, int bitslog2)
+-{
+-#define READ() byte = *data++
+-#define NEXT(bits) enc = byte >> (8 - bits), byte <<= bits, encv = *data_var, *buffer++ = (enc == (1 << bits) - 1) ? encv : enc, data_var += (enc == (1 << bits) - 1)
+-
+- unsigned char byte, enc, encv;
+- const unsigned char* data_var;
+-
+- switch (bitslog2)
+- {
+- case 0:
+- memset(buffer, 0, kByteGroupSize);
+- return data;
+- case 1:
+- data_var = data + 4;
+-
+- // 4 groups with 4 2-bit values in each byte
+- READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2);
+- READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2);
+- READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2);
+- READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2);
+-
+- return data_var;
+- case 2:
+- data_var = data + 8;
+-
+- // 8 groups with 2 4-bit values in each byte
+- READ(), NEXT(4), NEXT(4);
+- READ(), NEXT(4), NEXT(4);
+- READ(), NEXT(4), NEXT(4);
+- READ(), NEXT(4), NEXT(4);
+- READ(), NEXT(4), NEXT(4);
+- READ(), NEXT(4), NEXT(4);
+- READ(), NEXT(4), NEXT(4);
+- READ(), NEXT(4), NEXT(4);
+-
+- return data_var;
+- case 3:
+- memcpy(buffer, data, kByteGroupSize);
+- return data + kByteGroupSize;
+- default:
+- assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value
+- return data;
+- }
+-
+-#undef READ
+-#undef NEXT
+-}
+-
+-static const unsigned char* decodeBytes(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size)
+-{
+- assert(buffer_size % kByteGroupSize == 0);
+-
+- const unsigned char* header = data;
+-
+- // round number of groups to 4 to get number of header bytes
+- size_t header_size = (buffer_size / kByteGroupSize + 3) / 4;
+-
+- if (size_t(data_end - data) < header_size)
+- return 0;
+-
+- data += header_size;
+-
+- for (size_t i = 0; i < buffer_size; i += kByteGroupSize)
+- {
+- if (size_t(data_end - data) < kByteGroupDecodeLimit)
+- return 0;
+-
+- size_t header_offset = i / kByteGroupSize;
+-
+- int bitslog2 = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3;
+-
+- data = decodeBytesGroup(data, buffer + i, bitslog2);
+- }
+-
+- return data;
+-}
+-
+-static const unsigned char* decodeVertexBlock(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256])
+-{
+- assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize);
+-
+- unsigned char buffer[kVertexBlockMaxSize];
+- unsigned char transposed[kVertexBlockSizeBytes];
+-
+- size_t vertex_count_aligned = (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1);
+-
+- for (size_t k = 0; k < vertex_size; ++k)
+- {
+- data = decodeBytes(data, data_end, buffer, vertex_count_aligned);
+- if (!data)
+- return 0;
+-
+- size_t vertex_offset = k;
+-
+- unsigned char p = last_vertex[k];
+-
+- for (size_t i = 0; i < vertex_count; ++i)
+- {
+- unsigned char v = unzigzag8(buffer[i]) + p;
+-
+- transposed[vertex_offset] = v;
+- p = v;
+-
+- vertex_offset += vertex_size;
+- }
+- }
+-
+- memcpy(vertex_data, transposed, vertex_count * vertex_size);
+-
+- memcpy(last_vertex, &transposed[vertex_size * (vertex_count - 1)], vertex_size);
+-
+- return data;
+-}
+-#endif
+-
+-#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
+-static unsigned char kDecodeBytesGroupShuffle[256][8];
+-static unsigned char kDecodeBytesGroupCount[256];
+-
+-#ifdef __wasm__
+-__attribute__((cold)) // this saves 500 bytes in the output binary - we don't need to vectorize this loop!
+-#endif
+-static bool
+-decodeBytesGroupBuildTables()
+-{
+- for (int mask = 0; mask < 256; ++mask)
+- {
+- unsigned char shuffle[8];
+- unsigned char count = 0;
+-
+- for (int i = 0; i < 8; ++i)
+- {
+- int maski = (mask >> i) & 1;
+- shuffle[i] = maski ? count : 0x80;
+- count += (unsigned char)(maski);
+- }
+-
+- memcpy(kDecodeBytesGroupShuffle[mask], shuffle, 8);
+- kDecodeBytesGroupCount[mask] = count;
+- }
+-
+- return true;
+-}
+-
+-static bool gDecodeBytesGroupInitialized = decodeBytesGroupBuildTables();
+-#endif
+-
+-#ifdef SIMD_SSE
+-SIMD_TARGET
+-static __m128i decodeShuffleMask(unsigned char mask0, unsigned char mask1)
+-{
+- __m128i sm0 = _mm_loadl_epi64(reinterpret_cast