From 47372de6a80688354696c4c7d791135076a6336d Mon Sep 17 00:00:00 2001
From: Rye Mutt <rye@alchemyviewer.org>
Date: Fri, 19 Apr 2024 07:34:04 -0400
Subject: [PATCH] Update mikktspace code to use the blender forks improvements

---
 autobuild.xml                                 |    4 +-
 .../mikktspace/mikk_atomic_hash_set.hh        |  184 ++
 mikktspace/include/mikktspace/mikk_float3.hh  |  120 ++
 mikktspace/include/mikktspace/mikk_util.hh    |  158 ++
 mikktspace/include/mikktspace/mikktspace.c    | 1899 -----------------
 mikktspace/include/mikktspace/mikktspace.h    |  145 --
 mikktspace/include/mikktspace/mikktspace.hh   |  839 ++++++++
 mikktspace/mikktspace.txt                     |  220 +-
 8 files changed, 1508 insertions(+), 2061 deletions(-)
 create mode 100644 mikktspace/include/mikktspace/mikk_atomic_hash_set.hh
 create mode 100644 mikktspace/include/mikktspace/mikk_float3.hh
 create mode 100644 mikktspace/include/mikktspace/mikk_util.hh
 delete mode 100644 mikktspace/include/mikktspace/mikktspace.c
 delete mode 100644 mikktspace/include/mikktspace/mikktspace.h
 create mode 100644 mikktspace/include/mikktspace/mikktspace.hh
diff --git a/autobuild.xml b/autobuild.xml
index 4624a68..1519121 100644
--- a/autobuild.xml
+++ b/autobuild.xml
@@ -6,11 +6,11 @@
       <key>canonical_repo</key>
       <string>https://github.com/secondlife/3p-mikktspace</string>
       <key>copyright</key>
-      <string>Copyright (C) 2011 by Morten S. Mikkelsen</string>
+      <string>Copyright (C) 2011 by Morten S. Mikkelsen, Copyright (C) 2022 Blender Authors</string>
       <key>description</key>
       <string>Mikktspace Tangent Generator</string>
       <key>license</key>
-      <string>Copyright (C) 2011 by Morten S. Mikkelsen</string>
+      <string>Apache 2.0</string>
       <key>license_file</key>
       <string>mikktspace.txt</string>
       <key>name</key>
diff --git a/mikktspace/include/mikktspace/mikk_atomic_hash_set.hh b/mikktspace/include/mikktspace/mikk_atomic_hash_set.hh
new file mode 100644
index 0000000..4703159
--- /dev/null
+++ b/mikktspace/include/mikktspace/mikk_atomic_hash_set.hh
@@ -0,0 +1,184 @@
+/* SPDX-FileCopyrightText: 2012-2021 Meta Platforms, Inc. and affiliates.
+ * SPDX-FileCopyrightText: 2022 Blender Authors
+ *
+ * SPDX-License-Identifier: Apache-2.0 */
+
+/* Simplified version of Folly's AtomicHashArray
+ * (https://github.com/facebook/folly/blob/main/folly/AtomicHashArray.h).
+ *
+ * Notable changes:
+ * - Standalone and header-only.
+ * - Behaves like a set, not like a map: There's no value type anymore, only keys.
+ * - Capacity check logic have been removed, the code assumes you know the required size in
+ * advance.
+ * - Custom allocator support has been removed.
+ * - Erase has been removed.
+ * - Find has been removed.
+ */
+
+/** \file
+ * \ingroup mikktspace
+ */
+
+#pragma once
+
+#ifdef _MSC_VER
+#  include <intrin.h>
+#endif
+
+#include <atomic>
+#include <type_traits>
+
+namespace mikk {
+
+struct AtomicHashSetLinearProbeFcn {
+  inline size_t operator()(size_t idx, size_t /* numProbes */, size_t capacity) const
+  {
+    idx += 1;  // linear probing
+
+    // Avoid modulus because it's slow
+    return (idx < capacity) ? idx : (idx - capacity);
+  }
+};
+
+struct AtomicHashSetQuadraticProbeFcn {
+  inline size_t operator()(size_t idx, size_t numProbes, size_t capacity) const
+  {
+    idx += numProbes;  // quadratic probing
+
+    // Avoid modulus because it's slow
+    return (idx < capacity) ? idx : (idx - capacity);
+  }
+};
+
+template<class KeyT,
+         bool isAtomic,
+         class KeyHash = std::hash<KeyT>,
+         class KeyEqual = std::equal_to<KeyT>,
+         class ProbeFcn = AtomicHashSetLinearProbeFcn>
+class AtomicHashSet {
+  static_assert((std::is_convertible<KeyT, int32_t>::value ||
+                 std::is_convertible<KeyT, int64_t>::value ||
+                 std::is_convertible<KeyT, const void *>::value),
+                "You are trying to use AtomicHashSet with disallowed key "
+                "types.  You must use atomically compare-and-swappable integer "
+                "keys, or a different container class.");
+
+ public:
+  const size_t capacity_;
+  const KeyT kEmptyKey_;
+
+  KeyHash hasher_;
+  KeyEqual equalityChecker_;
+
+ private:
+  size_t kAnchorMask_;
+  /* When using a single thread, we can avoid overhead by not bothering with atomic cells. */
+  typedef typename std::conditional<isAtomic, std::atomic<KeyT>, KeyT>::type cell_type;
+  std::vector<cell_type> cells_;
+
+ public:
+  struct Config {
+    KeyT emptyKey;
+    double maxLoadFactor;
+    double growthFactor;
+    size_t capacity;  // if positive, overrides maxLoadFactor
+
+    //  Cannot have constexpr ctor because some compilers rightly complain.
+    Config() : emptyKey((KeyT)-1), maxLoadFactor(0.8), growthFactor(-1), capacity(0) {}
+  };
+
+  /* Instead of a mess of arguments, we take a max size and a Config struct to
+   * simulate named ctor parameters.  The Config struct has sensible defaults
+   * for everything, but is overloaded - if you specify a positive capacity,
+   * that will be used directly instead of computing it based on maxLoadFactor.
+   */
+  AtomicHashSet(size_t maxSize,
+                KeyHash hasher = KeyHash(),
+                KeyEqual equalityChecker = KeyEqual(),
+                const Config &c = Config())
+      : capacity_(size_t(double(maxSize) / c.maxLoadFactor) + 1),
+        kEmptyKey_(c.emptyKey),
+        hasher_(hasher),
+        equalityChecker_(equalityChecker),
+        cells_(capacity_)
+  {
+    /* Get next power of two. Could be done more effiently with builtin_clz, but this is not
+     * performance-critical. */
+    kAnchorMask_ = 1;
+    while (kAnchorMask_ < capacity_) {
+      kAnchorMask_ *= 2;
+    }
+    /* Get mask for lower bits. */
+    kAnchorMask_ -= 1;
+
+    /* Not great, but the best we can do to support both atomic and non-atomic cells
+     * since std::atomic doesn't have a copy constructor so cells_(capacity_, kEmptyKey_)
+     * in the initializer list won't work. */
+    std::fill((KeyT *)cells_.data(), (KeyT *)cells_.data() + capacity_, kEmptyKey_);
+  }
+
+  AtomicHashSet(const AtomicHashSet &) = delete;
+  AtomicHashSet &operator=(const AtomicHashSet &) = delete;
+
+  ~AtomicHashSet() = default;
+
+  /* Sequential specialization. */
+  bool tryUpdateCell(KeyT *cell, KeyT &existingKey, KeyT newKey)
+  {
+    if (*cell == existingKey) {
+      *cell = newKey;
+      return true;
+    }
+    existingKey = *cell;
+    return false;
+  }
+
+  /* Atomic specialization. */
+  bool tryUpdateCell(std::atomic<KeyT> *cell, KeyT &existingKey, KeyT newKey)
+  {
+    return cell->compare_exchange_strong(existingKey, newKey, std::memory_order_acq_rel);
+  }
+
+  std::pair<KeyT, bool> emplace(KeyT key)
+  {
+    size_t idx = keyToAnchorIdx(key);
+    size_t numProbes = 0;
+    for (;;) {
+      cell_type *cell = &cells_[idx];
+      KeyT existingKey = kEmptyKey_;
+      /* Try to replace empty cell with our key. */
+      if (tryUpdateCell(cell, existingKey, key)) {
+        /* Cell was empty, we're done. */
+        return std::make_pair(key, true);
+      }
+
+      /* Cell was not empty, check if the existing key is equal. */
+      if (equalityChecker_(existingKey, key)) {
+        /* Found equal element, we're done. */
+        return std::make_pair(existingKey, false);
+      }
+
+      /* Continue to next cell according to probe strategy. */
+      ++numProbes;
+      if ((numProbes >= capacity_)) {
+        // probed every cell...fail
+        assert(false);
+        return std::make_pair(kEmptyKey_, false);
+      }
+
+      idx = ProbeFcn()(idx, numProbes, capacity_);
+    }
+  }
+
+ private:
+  inline size_t keyToAnchorIdx(const KeyT k) const
+  {
+    const size_t hashVal = hasher_(k);
+    const size_t probe = hashVal & kAnchorMask_;
+    return (probe < capacity_) ? probe : hashVal % capacity_;
+  }
+
+};  // AtomicHashSet
+
+}  // namespace mikk
diff --git a/mikktspace/include/mikktspace/mikk_float3.hh b/mikktspace/include/mikktspace/mikk_float3.hh
new file mode 100644
index 0000000..b0b34e9
--- /dev/null
+++ b/mikktspace/include/mikktspace/mikk_float3.hh
@@ -0,0 +1,120 @@
+/* SPDX-FileCopyrightText: 2020-2023 Blender Authors
+ *
+ * SPDX-License-Identifier: Apache-2.0 */
+
+/** \file
+ * \ingroup mikktspace
+ */
+
+#pragma once
+
+#include <cmath>
+
+namespace mikk {
+
+struct float3 {
+  float x, y, z;
+
+  float3() = default;
+
+  float3(const float *ptr) : x{ptr[0]}, y{ptr[1]}, z{ptr[2]} {}
+
+  float3(const float (*ptr)[3]) : float3((const float *)ptr) {}
+
+  explicit float3(float value) : x(value), y(value), z(value) {}
+
+  explicit float3(int value) : x((float)value), y((float)value), z((float)value) {}
+
+  float3(float x_, float y_, float z_) : x{x_}, y{y_}, z{z_} {}
+
+  static float3 zero()
+  {
+    return {0.0f, 0.0f, 0.0f};
+  }
+
+  friend float3 operator*(const float3 &a, float b)
+  {
+    return {a.x * b, a.y * b, a.z * b};
+  }
+
+  friend float3 operator*(float b, const float3 &a)
+  {
+    return {a.x * b, a.y * b, a.z * b};
+  }
+
+  friend float3 operator-(const float3 &a, const float3 &b)
+  {
+    return {a.x - b.x, a.y - b.y, a.z - b.z};
+  }
+
+  friend float3 operator-(const float3 &a)
+  {
+    return {-a.x, -a.y, -a.z};
+  }
+
+  friend bool operator==(const float3 &a, const float3 &b)
+  {
+    return a.x == b.x && a.y == b.y && a.z == b.z;
+  }
+
+  float length_squared() const
+  {
+    return x * x + y * y + z * z;
+  }
+
+  float length() const
+  {
+    return sqrt(length_squared());
+  }
+
+  static float distance(const float3 &a, const float3 &b)
+  {
+    return (a - b).length();
+  }
+
+  friend float3 operator+(const float3 &a, const float3 &b)
+  {
+    return {a.x + b.x, a.y + b.y, a.z + b.z};
+  }
+
+  void operator+=(const float3 &b)
+  {
+    this->x += b.x;
+    this->y += b.y;
+    this->z += b.z;
+  }
+
+  friend float3 operator*(const float3 &a, const float3 &b)
+  {
+    return {a.x * b.x, a.y * b.y, a.z * b.z};
+  }
+
+  float3 normalize() const
+  {
+    const float len = length();
+    return (len != 0.0f) ? *this * (1.0f / len) : *this;
+  }
+
+  float reduce_add() const
+  {
+    return x + y + z;
+  }
+};
+
+inline float dot(const float3 &a, const float3 &b)
+{
+  return a.x * b.x + a.y * b.y + a.z * b.z;
+}
+
+inline float distance(const float3 &a, const float3 &b)
+{
+  return float3::distance(a, b);
+}
+
+/* Projects v onto the surface with normal n. */
+inline float3 project(const float3 &n, const float3 &v)
+{
+  return (v - n * dot(n, v)).normalize();
+}
+
+}  // namespace mikk
diff --git a/mikktspace/include/mikktspace/mikk_util.hh b/mikktspace/include/mikktspace/mikk_util.hh
new file mode 100644
index 0000000..bff35a3
--- /dev/null
+++ b/mikktspace/include/mikktspace/mikk_util.hh
@@ -0,0 +1,158 @@
+/* SPDX-FileCopyrightText: 2022-2023 Blender Authors
+ *
+ * SPDX-License-Identifier: Apache-2.0 */
+
+/** \file
+ * \ingroup mikktspace
+ */
+
+#pragma once
+
+#include <cassert>
+#include <cmath>
+
+#ifndef M_PI_F
+#  define M_PI_F (3.1415926535897932f) /* pi */
+#endif
+
+namespace mikk {
+
+inline bool not_zero(const float fX)
+{
+  return fabsf(fX) > FLT_MIN;
+}
+
+/* Helpers for (un)packing a 2-bit vertex index and a 30-bit face index to one integer. */
+static uint pack_index(const uint face, const uint vert)
+{
+  assert((vert & 0x3) == vert);
+  return (face << 2) | (vert & 0x3);
+}
+
+static void unpack_index(uint &face, uint &vert, const uint indexIn)
+{
+  vert = indexIn & 0x3;
+  face = indexIn >> 2;
+}
+
+/* From intern/cycles/util/math_fast.h */
+inline float fast_acosf(float x)
+{
+  const float f = fabsf(x);
+  /* clamp and crush denormals. */
+  const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
+  /* Based on http://www.pouet.net/topic.php?which=9132&page=2
+   * 85% accurate (ULP 0)
+   * Examined 2130706434 values of acos:
+   *   15.2000597 avg ULP diff, 4492 max ULP, 4.51803e-05 max error // without "denormal crush"
+   * Examined 2130706434 values of acos:
+   *   15.2007108 avg ULP diff, 4492 max ULP, 4.51803e-05 max error // with "denormal crush"
+   */
+  const float a = sqrtf(1.0f - m) *
+                  (1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
+  return x < 0 ? M_PI_F - a : a;
+}
+
+static uint rotl(uint x, uint k)
+{
+  return (x << k) | (x >> (32 - k));
+}
+
+static uint hash_uint3(uint kx, uint ky, uint kz)
+{
+  uint a, b, c;
+  a = b = c = 0xdeadbeef + (2 << 2) + 13;
+
+  c += kz;
+  b += ky;
+  a += kx;
+
+  c = (c ^ b) - rotl(b, 14);
+  a = (a ^ c) - rotl(c, 11);
+  b = (b ^ a) - rotl(a, 25);
+  c = (c ^ b) - rotl(b, 16);
+
+  return c;
+}
+
+static uint hash_uint3_fast(const uint x, const uint y, const uint z)
+{
+  return (x * 73856093) ^ (y * 19349663) ^ (z * 83492791);
+}
+
+static uint float_as_uint(const float v)
+{
+  return *((uint *)(&v));
+}
+
+static float uint_as_float(const uint v)
+{
+  return *((float *)(&v));
+}
+
+static uint hash_float3_fast(const float x, const float y, const float z)
+{
+  return hash_uint3_fast(float_as_uint(x), float_as_uint(y), float_as_uint(z));
+}
+
+static uint hash_float3x3(const float3 &x, const float3 &y, const float3 &z)
+{
+  return hash_uint3(hash_float3_fast(x.x, x.y, x.z),
+                    hash_float3_fast(y.x, y.y, y.z),
+                    hash_float3_fast(z.x, z.y, z.z));
+}
+
+template<typename T, typename KeyGetter>
+void radixsort(std::vector<T> &data, std::vector<T> &data2, KeyGetter getKey)
+{
+  typedef decltype(getKey(data[0])) key_t;
+  constexpr size_t datasize = sizeof(key_t);
+  static_assert(datasize % 2 == 0);
+  static_assert(std::is_integral<key_t>::value);
+
+  uint bins[datasize][257] = {{0}};
+
+  /* Count number of elements per bin. */
+  for (const T &item : data) {
+    key_t key = getKey(item);
+    for (uint pass = 0; pass < datasize; pass++)
+      bins[pass][((key >> (8 * pass)) & 0xff) + 1]++;
+  }
+
+  /* Compute prefix sum to find position of each bin in the sorted array. */
+  for (uint pass = 0; pass < datasize; pass++) {
+    for (uint i = 2; i < 256; i++) {
+      bins[pass][i] += bins[pass][i - 1];
+    }
+  }
+
+  int shift = 0;
+  for (uint pass = 0; pass < datasize; pass++, shift += 8) {
+    /* Insert the elements in their correct location based on their bin. */
+    for (const T &item : data) {
+      uint pos = bins[pass][(getKey(item) >> shift) & 0xff]++;
+      data2[pos] = item;
+    }
+
+    /* Swap arrays. */
+    std::swap(data, data2);
+  }
+}
+
+static void float_add_atomic(float *val, float add)
+{
+  /* Hacky, but atomic floats are only supported from C++20 onward.
+   * This works in practice since `std::atomic<uint32_t>` is really just an `uint32_t` in memory,
+   * so this cast lets us do a 32-bit CAS operation (which is used to build the atomic float
+   * operation) without needing any external libraries or compiler-specific builtins. */
+  std::atomic<uint32_t> *atomic_val = reinterpret_cast<std::atomic<uint32_t> *>(val);
+  for (;;) {
+    uint32_t old_v = atomic_val->load();
+    uint32_t new_v = float_as_uint(uint_as_float(old_v) + add);
+    if (atomic_val->compare_exchange_weak(old_v, new_v)) {
+      return;
+    }
+  }
+}
+
+}  // namespace mikk
diff --git a/mikktspace/include/mikktspace/mikktspace.c b/mikktspace/include/mikktspace/mikktspace.c
deleted file mode 100644
index 0342ae0..0000000
--- a/mikktspace/include/mikktspace/mikktspace.c
+++ /dev/null
@@ -1,1899 +0,0 @@
-/** \file mikktspace/mikktspace.c
- *  \ingroup mikktspace
- */
-/**
- *  Copyright (C) 2011 by Morten S. Mikkelsen
- *
- *  This software is provided 'as-is', without any express or implied
- *  warranty.  In no event will the authors be held liable for any damages
- *  arising from the use of this software.
- *
- *  Permission is granted to anyone to use this software for any purpose,
- *  including commercial applications, and to alter it and redistribute it
- *  freely, subject to the following restrictions:
- *
- *  1. The origin of this software must not be misrepresented; you must not
- *     claim that you wrote the original software. If you use this software
- *     in a product, an acknowledgment in the product documentation would be
- *     appreciated but is not required.
- *  2. Altered source versions must be plainly marked as such, and must not be
- *     misrepresented as being the original software.
- *  3. This notice may not be removed or altered from any source distribution.
- */
-
-#include <assert.h>
-#include <stdio.h>
-#include <math.h>
-#include <string.h>
-#include <float.h>
-#include <stdlib.h>
-
-#include "mikktspace.h"
-
-#define TFALSE		0
-#define TTRUE		1
-
-#ifndef M_PI
-#define M_PI	3.1415926535897932384626433832795
-#endif
-
-#define INTERNAL_RND_SORT_SEED		39871946
-
-// internal structure
-typedef struct {
-	float x, y, z;
-} SVec3;
-
-static tbool			veq( const SVec3 v1, const SVec3 v2 )
-{
-	return (v1.x == v2.x) && (v1.y == v2.y) && (v1.z == v2.z);
-}
-
-static SVec3		vadd( const SVec3 v1, const SVec3 v2 )
-{
-	SVec3 vRes;
-
-	vRes.x = v1.x + v2.x;
-	vRes.y = v1.y + v2.y;
-	vRes.z = v1.z + v2.z;
-
-	return vRes;
-}
-
-
-static SVec3		vsub( const SVec3 v1, const SVec3 v2 )
-{
-	SVec3 vRes;
-
-	vRes.x = v1.x - v2.x;
-	vRes.y = v1.y - v2.y;
-	vRes.z = v1.z - v2.z;
-
-	return vRes;
-}
-
-static SVec3		vscale(const float fS, const SVec3 v)
-{
-	SVec3 vRes;
-
-	vRes.x = fS * v.x;
-	vRes.y = fS * v.y;
-	vRes.z = fS * v.z;
-
-	return vRes;
-}
-
-static float			LengthSquared( const SVec3 v )
-{
-	return v.x*v.x + v.y*v.y + v.z*v.z;
-}
-
-static float			Length( const SVec3 v )
-{
-	return sqrtf(LengthSquared(v));
-}
-
-static SVec3		Normalize( const SVec3 v )
-{
-	return vscale(1 / Length(v), v);
-}
-
-static float		vdot( const SVec3 v1, const SVec3 v2)
-{
-	return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z;
-}
-
-
-static tbool NotZero(const float fX)
-{
-	// could possibly use FLT_EPSILON instead
-	return fabsf(fX) > FLT_MIN;
-}
-
-static tbool VNotZero(const SVec3 v)
-{
-	// might change this to an epsilon based test
-	return NotZero(v.x) || NotZero(v.y) || NotZero(v.z);
-}
-
-
-
-typedef struct {
-	int iNrFaces;
-	int * pTriMembers;
-} SSubGroup;
-
-typedef struct {
-	int iNrFaces;
-	int * pFaceIndices;
-	int iVertexRepresentitive;
-	tbool bOrientPreservering;
-} SGroup;
-
-// 
-#define MARK_DEGENERATE				1
-#define QUAD_ONE_DEGEN_TRI			2
-#define GROUP_WITH_ANY				4
-#define ORIENT_PRESERVING			8
-
-
-
-typedef struct {
-	int FaceNeighbors[3];
-	SGroup * AssignedGroup[3];
-	
-	// normalized first order face derivatives
-	SVec3 vOs, vOt;
-	float fMagS, fMagT;	// original magnitudes
-
-	// determines if the current and the next triangle are a quad.
-	int iOrgFaceNumber;
-	int iFlag, iTSpacesOffs;
-	unsigned char vert_num[4];
-} STriInfo;
-
-typedef struct {
-	SVec3 vOs;
-	float fMagS;
-	SVec3 vOt;
-	float fMagT;
-	int iCounter;	// this is to average back into quads.
-	tbool bOrient;
-} STSpace;
-
-static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn);
-static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn);
-static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn);
-static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupTrianglesBuffer[], const int piTriListIn[], const int iNrTrianglesIn);
-static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], const SGroup pGroups[],
-                             const int iNrActiveGroups, const int piTriListIn[], const float fThresCos,
-                             const SMikkTSpaceContext * pContext);
-
-static int MakeIndex(const int iFace, const int iVert)
-{
-	assert(iVert>=0 && iVert<4 && iFace>=0);
-	return (iFace<<2) | (iVert&0x3);
-}
-
-static void IndexToData(int * piFace, int * piVert, const int iIndexIn)
-{
-	piVert[0] = iIndexIn&0x3;
-	piFace[0] = iIndexIn>>2;
-}
-
-static STSpace AvgTSpace(const STSpace * pTS0, const STSpace * pTS1)
-{
-	STSpace ts_res;
-
-	// this if is important. Due to floating point precision
-	// averaging when ts0==ts1 will cause a slight difference
-	// which results in tangent space splits later on
-	if (pTS0->fMagS==pTS1->fMagS && pTS0->fMagT==pTS1->fMagT &&
-	   veq(pTS0->vOs,pTS1->vOs)	&& veq(pTS0->vOt, pTS1->vOt))
-	{
-		ts_res.fMagS = pTS0->fMagS;
-		ts_res.fMagT = pTS0->fMagT;
-		ts_res.vOs = pTS0->vOs;
-		ts_res.vOt = pTS0->vOt;
-	}
-	else
-	{
-		ts_res.fMagS = 0.5f*(pTS0->fMagS+pTS1->fMagS);
-		ts_res.fMagT = 0.5f*(pTS0->fMagT+pTS1->fMagT);
-		ts_res.vOs = vadd(pTS0->vOs,pTS1->vOs);
-		ts_res.vOt = vadd(pTS0->vOt,pTS1->vOt);
-		if ( VNotZero(ts_res.vOs) ) ts_res.vOs = Normalize(ts_res.vOs);
-		if ( VNotZero(ts_res.vOt) ) ts_res.vOt = Normalize(ts_res.vOt);
-	}
-
-	return ts_res;
-}
-
-
-
-static SVec3 GetPosition(const SMikkTSpaceContext * pContext, const int index);
-static SVec3 GetNormal(const SMikkTSpaceContext * pContext, const int index);
-static SVec3 GetTexCoord(const SMikkTSpaceContext * pContext, const int index);
-
-
-// degen triangles
-static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int iNrTrianglesIn, const int iTotTris);
-static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn, const int iTotTris);
-
-
-tbool genTangSpaceDefault(const SMikkTSpaceContext * pContext)
-{
-	return genTangSpace(pContext, 180.0f);
-}
-
-tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThreshold)
-{
-	// count nr_triangles
-	int * piTriListIn = NULL, * piGroupTrianglesBuffer = NULL;
-	STriInfo * pTriInfos = NULL;
-	SGroup * pGroups = NULL;
-	STSpace * psTspace = NULL;
-	int iNrTrianglesIn = 0, f=0, t=0, i=0;
-	int iNrTSPaces = 0, iTotTris = 0, iDegenTriangles = 0, iNrMaxGroups = 0;
-	int iNrActiveGroups = 0, index = 0;
-	const int iNrFaces = pContext->m_pInterface->m_getNumFaces(pContext);
-	tbool bRes = TFALSE;
-	const float fThresCos = (float) cos((fAngularThreshold*(float)M_PI)/180.0f);
-
-	// verify all call-backs have been set
-	if ( pContext->m_pInterface->m_getNumFaces==NULL ||
-		pContext->m_pInterface->m_getNumVerticesOfFace==NULL ||
-		pContext->m_pInterface->m_getPosition==NULL ||
-		pContext->m_pInterface->m_getNormal==NULL ||
-		pContext->m_pInterface->m_getTexCoord==NULL )
-		return TFALSE;
-
-	// count triangles on supported faces
-	for (f=0; f<iNrFaces; f++)
-	{
-		const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f);
-		if (verts==3) ++iNrTrianglesIn;
-		else if (verts==4) iNrTrianglesIn += 2;
-	}
-	if (iNrTrianglesIn<=0) return TFALSE;
-
-	// allocate memory for an index list
-	piTriListIn = (int *) malloc(sizeof(int)*3*iNrTrianglesIn);
-	pTriInfos = (STriInfo *) malloc(sizeof(STriInfo)*iNrTrianglesIn);
-	if (piTriListIn==NULL || pTriInfos==NULL)
-	{
-		if (piTriListIn!=NULL) free(piTriListIn);
-		if (pTriInfos!=NULL) free(pTriInfos);
-		return TFALSE;
-	}
-
-	// make an initial triangle --> face index list
-	iNrTSPaces = GenerateInitialVerticesIndexList(pTriInfos, piTriListIn, pContext, iNrTrianglesIn);
-
-	// make a welded index list of identical positions and attributes (pos, norm, texc)
-	//printf("gen welded index list begin\n");
-	GenerateSharedVerticesIndexList(piTriListIn, pContext, iNrTrianglesIn);
-	//printf("gen welded index list end\n");
-
-	// Mark all degenerate triangles
-	iTotTris = iNrTrianglesIn;
-	iDegenTriangles = 0;
-	for (t=0; t<iTotTris; t++)
-	{
-		const int i0 = piTriListIn[t*3+0];
-		const int i1 = piTriListIn[t*3+1];
-		const int i2 = piTriListIn[t*3+2];
-		const SVec3 p0 = GetPosition(pContext, i0);
-		const SVec3 p1 = GetPosition(pContext, i1);
-		const SVec3 p2 = GetPosition(pContext, i2);
-		if (veq(p0,p1) || veq(p0,p2) || veq(p1,p2))	// degenerate
-		{
-			pTriInfos[t].iFlag |= MARK_DEGENERATE;
-			++iDegenTriangles;
-		}
-	}
-	iNrTrianglesIn = iTotTris - iDegenTriangles;
-
-	// mark all triangle pairs that belong to a quad with only one
-	// good triangle. These need special treatment in DegenEpilogue().
-	// Additionally, move all good triangles to the start of
-	// pTriInfos[] and piTriListIn[] without changing order and
-	// put the degenerate triangles last.
-	DegenPrologue(pTriInfos, piTriListIn, iNrTrianglesIn, iTotTris);
-
-	
-	// evaluate triangle level attributes and neighbor list
-	//printf("gen neighbors list begin\n");
-	InitTriInfo(pTriInfos, piTriListIn, pContext, iNrTrianglesIn);
-	//printf("gen neighbors list end\n");
-
-	
-	// based on the 4 rules, identify groups based on connectivity
-	iNrMaxGroups = iNrTrianglesIn*3;
-	pGroups = (SGroup *) malloc(sizeof(SGroup)*iNrMaxGroups);
-	piGroupTrianglesBuffer = (int *) malloc(sizeof(int)*iNrTrianglesIn*3);
-	if (pGroups==NULL || piGroupTrianglesBuffer==NULL)
-	{
-		if (pGroups!=NULL) free(pGroups);
-		if (piGroupTrianglesBuffer!=NULL) free(piGroupTrianglesBuffer);
-		free(piTriListIn);
-		free(pTriInfos);
-		return TFALSE;
-	}
-	//printf("gen 4rule groups begin\n");
-	iNrActiveGroups =
-		Build4RuleGroups(pTriInfos, pGroups, piGroupTrianglesBuffer, piTriListIn, iNrTrianglesIn);
-	//printf("gen 4rule groups end\n");
-
-	//
-
-	psTspace = (STSpace *) malloc(sizeof(STSpace)*iNrTSPaces);
-	if (psTspace==NULL)
-	{
-		free(piTriListIn);
-		free(pTriInfos);
-		free(pGroups);
-		free(piGroupTrianglesBuffer);
-		return TFALSE;
-	}
-	memset(psTspace, 0, sizeof(STSpace)*iNrTSPaces);
-	for (t=0; t<iNrTSPaces; t++)
-	{
-		psTspace[t].vOs.x=1.0f; psTspace[t].vOs.y=0.0f; psTspace[t].vOs.z=0.0f; psTspace[t].fMagS = 1.0f;
-		psTspace[t].vOt.x=0.0f; psTspace[t].vOt.y=1.0f; psTspace[t].vOt.z=0.0f; psTspace[t].fMagT = 1.0f;
-	}
-
-	// make tspaces, each group is split up into subgroups if necessary
-	// based on fAngularThreshold. Finally a tangent space is made for
-	// every resulting subgroup
-	//printf("gen tspaces begin\n");
-	bRes = GenerateTSpaces(psTspace, pTriInfos, pGroups, iNrActiveGroups, piTriListIn, fThresCos, pContext);
-	//printf("gen tspaces end\n");
-	
-	// clean up
-	free(pGroups);
-	free(piGroupTrianglesBuffer);
-
-	if (!bRes)	// if an allocation in GenerateTSpaces() failed
-	{
-		// clean up and return false
-		free(pTriInfos); free(piTriListIn); free(psTspace);
-		return TFALSE;
-	}
-
-
-	// degenerate quads with one good triangle will be fixed by copying a space from
-	// the good triangle to the coinciding vertex.
-	// all other degenerate triangles will just copy a space from any good triangle
-	// with the same welded index in piTriListIn[].
-	DegenEpilogue(psTspace, pTriInfos, piTriListIn, pContext, iNrTrianglesIn, iTotTris);
-
-	free(pTriInfos); free(piTriListIn);
-
-	index = 0;
-	for (f=0; f<iNrFaces; f++)
-	{
-		const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f);
-		if (verts!=3 && verts!=4) continue;
-		
-
-		// I've decided to let degenerate triangles and group-with-anythings
-		// vary between left/right hand coordinate systems at the vertices.
-		// All healthy triangles on the other hand are built to always be either or.
-
-		/*// force the coordinate system orientation to be uniform for every face.
-		// (this is already the case for good triangles but not for
-		// degenerate ones and those with bGroupWithAnything==true)
-		bool bOrient = psTspace[index].bOrient;
-		if (psTspace[index].iCounter == 0)	// tspace was not derived from a group
-		{
-			// look for a space created in GenerateTSpaces() by iCounter>0
-			bool bNotFound = true;
-			int i=1;
-			while (i<verts && bNotFound)
-			{
-				if (psTspace[index+i].iCounter > 0) bNotFound=false;
-				else ++i;
-			}
-			if (!bNotFound) bOrient = psTspace[index+i].bOrient;
-		}*/
-
-		// set data
-		for (i=0; i<verts; i++)
-		{
-			const STSpace * pTSpace = &psTspace[index];
-			float tang[] = {pTSpace->vOs.x, pTSpace->vOs.y, pTSpace->vOs.z};
-			float bitang[] = {pTSpace->vOt.x, pTSpace->vOt.y, pTSpace->vOt.z};
-			if (pContext->m_pInterface->m_setTSpace!=NULL)
-				pContext->m_pInterface->m_setTSpace(pContext, tang, bitang, pTSpace->fMagS, pTSpace->fMagT, pTSpace->bOrient, f, i);
-			if (pContext->m_pInterface->m_setTSpaceBasic!=NULL)
-				pContext->m_pInterface->m_setTSpaceBasic(pContext, tang, pTSpace->bOrient==TTRUE ? 1.0f : (-1.0f), f, i);
-
-			++index;
-		}
-	}
-
-	free(psTspace);
-
-	
-	return TTRUE;
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-typedef struct {
-	float vert[3];
-	int index;
-} STmpVert;
-
-static const int g_iCells = 2048;
-
-#ifdef _MSC_VER
-#  define NOINLINE __declspec(noinline)
-#else
-#  define NOINLINE __attribute__ ((noinline))
-#endif
-
-// it is IMPORTANT that this function is called to evaluate the hash since
-// inlining could potentially reorder instructions and generate different
-// results for the same effective input value fVal.
-static NOINLINE int FindGridCell(const float fMin, const float fMax, const float fVal)
-{
-	const float fIndex = g_iCells * ((fVal-fMin)/(fMax-fMin));
-	const int iIndex = (int)fIndex;
-	return iIndex < g_iCells ? (iIndex >= 0 ? iIndex : 0) : (g_iCells - 1);
-}
-
-static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in);
-static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries);
-static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn);
-
-static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
-{
-
-	// Generate bounding box
-	int * piHashTable=NULL, * piHashCount=NULL, * piHashOffsets=NULL, * piHashCount2=NULL;
-	STmpVert * pTmpVert = NULL;
-	int i=0, iChannel=0, k=0, e=0;
-	int iMaxCount=0;
-	SVec3 vMin = GetPosition(pContext, 0), vMax = vMin, vDim;
-	float fMin, fMax;
-	for (i=1; i<(iNrTrianglesIn*3); i++)
-	{
-		const int index = piTriList_in_and_out[i];
-
-		const SVec3 vP = GetPosition(pContext, index);
-		if (vMin.x > vP.x) vMin.x = vP.x;
-		else if (vMax.x < vP.x) vMax.x = vP.x;
-		if (vMin.y > vP.y) vMin.y = vP.y;
-		else if (vMax.y < vP.y) vMax.y = vP.y;
-		if (vMin.z > vP.z) vMin.z = vP.z;
-		else if (vMax.z < vP.z) vMax.z = vP.z;
-	}
-
-	vDim = vsub(vMax,vMin);
-	iChannel = 0;
-	fMin = vMin.x; fMax=vMax.x;
-	if (vDim.y>vDim.x && vDim.y>vDim.z)
-	{
-		iChannel=1;
-		fMin = vMin.y;
-		fMax = vMax.y;
-	}
-	else if (vDim.z>vDim.x)
-	{
-		iChannel=2;
-		fMin = vMin.z;
-		fMax = vMax.z;
-	}
-
-	// make allocations
-	piHashTable = (int *) malloc(sizeof(int)*iNrTrianglesIn*3);
-	piHashCount = (int *) malloc(sizeof(int)*g_iCells);
-	piHashOffsets = (int *) malloc(sizeof(int)*g_iCells);
-	piHashCount2 = (int *) malloc(sizeof(int)*g_iCells);
-
-	if (piHashTable==NULL || piHashCount==NULL || piHashOffsets==NULL || piHashCount2==NULL)
-	{
-		if (piHashTable!=NULL) free(piHashTable);
-		if (piHashCount!=NULL) free(piHashCount);
-		if (piHashOffsets!=NULL) free(piHashOffsets);
-		if (piHashCount2!=NULL) free(piHashCount2);
-		GenerateSharedVerticesIndexListSlow(piTriList_in_and_out, pContext, iNrTrianglesIn);
-		return;
-	}
-	memset(piHashCount, 0, sizeof(int)*g_iCells);
-	memset(piHashCount2, 0, sizeof(int)*g_iCells);
-
-	// count amount of elements in each cell unit
-	for (i=0; i<(iNrTrianglesIn*3); i++)
-	{
-		const int index = piTriList_in_and_out[i];
-		const SVec3 vP = GetPosition(pContext, index);
-		const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z);
-		const int iCell = FindGridCell(fMin, fMax, fVal);
-		++piHashCount[iCell];
-	}
-
-	// evaluate start index of each cell.
-	piHashOffsets[0]=0;
-	for (k=1; k<g_iCells; k++)
-		piHashOffsets[k]=piHashOffsets[k-1]+piHashCount[k-1];
-
-	// insert vertices
-	for (i=0; i<(iNrTrianglesIn*3); i++)
-	{
-		const int index = piTriList_in_and_out[i];
-		const SVec3 vP = GetPosition(pContext, index);
-		const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z);
-		const int iCell = FindGridCell(fMin, fMax, fVal);
-		int * pTable = NULL;
-
-		assert(piHashCount2[iCell]<piHashCount[iCell]);
-		pTable = &piHashTable[piHashOffsets[iCell]];
-		pTable[piHashCount2[iCell]] = i;	// vertex i has been inserted.
-		++piHashCount2[iCell];
-	}
-	for (k=0; k<g_iCells; k++)
-		assert(piHashCount2[k] == piHashCount[k]);	// verify the count
-	free(piHashCount2);
-
-	// find maximum amount of entries in any hash entry
-	iMaxCount = piHashCount[0];
-	for (k=1; k<g_iCells; k++)
-		if (iMaxCount<piHashCount[k])
-			iMaxCount=piHashCount[k];
-	pTmpVert = (STmpVert *) malloc(sizeof(STmpVert)*iMaxCount);
-	
-
-	// complete the merge
-	for (k=0; k<g_iCells; k++)
-	{
-		// extract table of cell k and amount of entries in it
-		int * pTable = &piHashTable[piHashOffsets[k]];
-		const int iEntries = piHashCount[k];
-		if (iEntries < 2) continue;
-
-		if (pTmpVert!=NULL)
-		{
-			for (e=0; e<iEntries; e++)
-			{
-				int i = pTable[e];
-				const SVec3 vP = GetPosition(pContext, piTriList_in_and_out[i]);
-				pTmpVert[e].vert[0] = vP.x; pTmpVert[e].vert[1] = vP.y;
-				pTmpVert[e].vert[2] = vP.z; pTmpVert[e].index = i;
-			}
-			MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, 0, iEntries-1);
-		}
-		else
-			MergeVertsSlow(piTriList_in_and_out, pContext, pTable, iEntries);
-	}
-
-	if (pTmpVert!=NULL) { free(pTmpVert); }
-	free(piHashTable);
-	free(piHashCount);
-	free(piHashOffsets);
-}
-
-static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in)
-{
-	// make bbox
-	int c=0, l=0, channel=0;
-	float fvMin[3], fvMax[3];
-	float dx=0, dy=0, dz=0, fSep=0;
-	for (c=0; c<3; c++)
-	{	fvMin[c]=pTmpVert[iL_in].vert[c]; fvMax[c]=fvMin[c];	}
-	for (l=(iL_in+1); l<=iR_in; l++) {
-		for (c=0; c<3; c++) {
-			if (fvMin[c]>pTmpVert[l].vert[c]) fvMin[c]=pTmpVert[l].vert[c];
-			if (fvMax[c]<pTmpVert[l].vert[c]) fvMax[c]=pTmpVert[l].vert[c];
-		}
-	}
-
-	dx = fvMax[0]-fvMin[0];
-	dy = fvMax[1]-fvMin[1];
-	dz = fvMax[2]-fvMin[2];
-
-	channel = 0;
-	if (dy>dx && dy>dz) channel=1;
-	else if (dz>dx) channel=2;
-
-	fSep = 0.5f*(fvMax[channel]+fvMin[channel]);
-
-	// stop if all vertices are NaNs
-	if (!isfinite(fSep))
-		return;
-
-	// terminate recursion when the separation/average value
-	// is no longer strictly between fMin and fMax values.
-	if (fSep>=fvMax[channel] || fSep<=fvMin[channel])
-	{
-		// complete the weld
-		for (l=iL_in; l<=iR_in; l++)
-		{
-			int i = pTmpVert[l].index;
-			const int index = piTriList_in_and_out[i];
-			const SVec3 vP = GetPosition(pContext, index);
-			const SVec3 vN = GetNormal(pContext, index);
-			const SVec3 vT = GetTexCoord(pContext, index);
-
-			tbool bNotFound = TTRUE;
-			int l2=iL_in, i2rec=-1;
-			while (l2<l && bNotFound)
-			{
-				const int i2 = pTmpVert[l2].index;
-				const int index2 = piTriList_in_and_out[i2];
-				const SVec3 vP2 = GetPosition(pContext, index2);
-				const SVec3 vN2 = GetNormal(pContext, index2);
-				const SVec3 vT2 = GetTexCoord(pContext, index2);
-				i2rec=i2;
-
-				//if (vP==vP2 && vN==vN2 && vT==vT2)
-				if (vP.x==vP2.x && vP.y==vP2.y && vP.z==vP2.z &&
-					vN.x==vN2.x && vN.y==vN2.y && vN.z==vN2.z &&
-					vT.x==vT2.x && vT.y==vT2.y && vT.z==vT2.z)
-					bNotFound = TFALSE;
-				else
-					++l2;
-			}
-			
-			// merge if previously found
-			if (!bNotFound)
-				piTriList_in_and_out[i] = piTriList_in_and_out[i2rec];
-		}
-	}
-	else
-	{
-		int iL=iL_in, iR=iR_in;
-		assert((iR_in-iL_in)>0);	// at least 2 entries
-
-		// separate (by fSep) all points between iL_in and iR_in in pTmpVert[]
-		while (iL < iR)
-		{
-			tbool bReadyLeftSwap = TFALSE, bReadyRightSwap = TFALSE;
-			while ((!bReadyLeftSwap) && iL<iR)
-			{
-				assert(iL>=iL_in && iL<=iR_in);
-				bReadyLeftSwap = !(pTmpVert[iL].vert[channel]<fSep);
-				if (!bReadyLeftSwap) ++iL;
-			}
-			while ((!bReadyRightSwap) && iL<iR)
-			{
-				assert(iR>=iL_in && iR<=iR_in);
-				bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep;
-				if (!bReadyRightSwap) --iR;
-			}
-			assert( (iL<iR) || !(bReadyLeftSwap && bReadyRightSwap) );
-
-			if (bReadyLeftSwap && bReadyRightSwap)
-			{
-				const STmpVert sTmp = pTmpVert[iL];
-				assert(iL<iR);
-				pTmpVert[iL] = pTmpVert[iR];
-				pTmpVert[iR] = sTmp;
-				++iL; --iR;
-			}
-		}
-
-		assert(iL==(iR+1) || (iL==iR));
-		if (iL==iR)
-		{
-			const tbool bReadyRightSwap = pTmpVert[iR].vert[channel]<fSep;
-			if (bReadyRightSwap) ++iL;
-			else --iR;
-		}
-
-		// only need to weld when there is more than 1 instance of the (x,y,z)
-		if (iL_in < iR)
-			MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL_in, iR);	// weld all left of fSep
-		if (iL < iR_in)
-			MergeVertsFast(piTriList_in_and_out, pTmpVert, pContext, iL, iR_in);	// weld all right of (or equal to) fSep
-	}
-}
-
-static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries)
-{
-	// this can be optimized further using a tree structure or more hashing.
-	int e=0;
-	for (e=0; e<iEntries; e++)
-	{
-		int i = pTable[e];
-		const int index = piTriList_in_and_out[i];
-		const SVec3 vP = GetPosition(pContext, index);
-		const SVec3 vN = GetNormal(pContext, index);
-		const SVec3 vT = GetTexCoord(pContext, index);
-
-		tbool bNotFound = TTRUE;
-		int e2=0, i2rec=-1;
-		while (e2<e && bNotFound)
-		{
-			const int i2 = pTable[e2];
-			const int index2 = piTriList_in_and_out[i2];
-			const SVec3 vP2 = GetPosition(pContext, index2);
-			const SVec3 vN2 = GetNormal(pContext, index2);
-			const SVec3 vT2 = GetTexCoord(pContext, index2);
-			i2rec = i2;
-
-			if (veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2))
-				bNotFound = TFALSE;
-			else
-				++e2;
-		}
-		
-		// merge if previously found
-		if (!bNotFound)
-			piTriList_in_and_out[i] = piTriList_in_and_out[i2rec];
-	}
-}
-
-static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
-{
-	int iNumUniqueVerts = 0, t=0, i=0;
-	for (t=0; t<iNrTrianglesIn; t++)
-	{
-		for (i=0; i<3; i++)
-		{
-			const int offs = t*3 + i;
-			const int index = piTriList_in_and_out[offs];
-
-			const SVec3 vP = GetPosition(pContext, index);
-			const SVec3 vN = GetNormal(pContext, index);
-			const SVec3 vT = GetTexCoord(pContext, index);
-
-			tbool bFound = TFALSE;
-			int t2=0, index2rec=-1;
-			while (!bFound && t2<=t)
-			{
-				int j=0;
-				while (!bFound && j<3)
-				{
-					const int index2 = piTriList_in_and_out[t2*3 + j];
-					const SVec3 vP2 = GetPosition(pContext, index2);
-					const SVec3 vN2 = GetNormal(pContext, index2);
-					const SVec3 vT2 = GetTexCoord(pContext, index2);
-					
-					if (veq(vP,vP2) && veq(vN,vN2) && veq(vT,vT2))
-						bFound = TTRUE;
-					else
-						++j;
-				}
-				if (!bFound) ++t2;
-			}
-
-			assert(bFound);
-			// if we found our own
-			if (index2rec == index) { ++iNumUniqueVerts; }
-
-			piTriList_in_and_out[offs] = index2rec;
-		}
-	}
-}
-
-static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
-{
-	int iTSpacesOffs = 0, f=0, t=0;
-	int iDstTriIndex = 0;
-	for (f=0; f<pContext->m_pInterface->m_getNumFaces(pContext); f++)
-	{
-		const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f);
-		if (verts!=3 && verts!=4) continue;
-
-		pTriInfos[iDstTriIndex].iOrgFaceNumber = f;
-		pTriInfos[iDstTriIndex].iTSpacesOffs = iTSpacesOffs;
-
-		if (verts==3)
-		{
-			unsigned char * pVerts = pTriInfos[iDstTriIndex].vert_num;
-			pVerts[0]=0; pVerts[1]=1; pVerts[2]=2;
-			piTriList_out[iDstTriIndex*3+0] = MakeIndex(f, 0);
-			piTriList_out[iDstTriIndex*3+1] = MakeIndex(f, 1);
-			piTriList_out[iDstTriIndex*3+2] = MakeIndex(f, 2);
-			++iDstTriIndex;	// next
-		}
-		else
-		{
-			{
-				pTriInfos[iDstTriIndex+1].iOrgFaceNumber = f;
-				pTriInfos[iDstTriIndex+1].iTSpacesOffs = iTSpacesOffs;
-			}
-
-			{
-				// need an order independent way to evaluate
-				// tspace on quads. This is done by splitting
-				// along the shortest diagonal.
-				const int i0 = MakeIndex(f, 0);
-				const int i1 = MakeIndex(f, 1);
-				const int i2 = MakeIndex(f, 2);
-				const int i3 = MakeIndex(f, 3);
-				const SVec3 T0 = GetTexCoord(pContext, i0);
-				const SVec3 T1 = GetTexCoord(pContext, i1);
-				const SVec3 T2 = GetTexCoord(pContext, i2);
-				const SVec3 T3 = GetTexCoord(pContext, i3);
-				const float distSQ_02 = LengthSquared(vsub(T2,T0));
-				const float distSQ_13 = LengthSquared(vsub(T3,T1));
-				tbool bQuadDiagIs_02;
-				if (distSQ_02<distSQ_13)
-					bQuadDiagIs_02 = TTRUE;
-				else if (distSQ_13<distSQ_02)
-					bQuadDiagIs_02 = TFALSE;
-				else
-				{
-					const SVec3 P0 = GetPosition(pContext, i0);
-					const SVec3 P1 = GetPosition(pContext, i1);
-					const SVec3 P2 = GetPosition(pContext, i2);
-					const SVec3 P3 = GetPosition(pContext, i3);
-					const float distSQ_02 = LengthSquared(vsub(P2,P0));
-					const float distSQ_13 = LengthSquared(vsub(P3,P1));
-
-					bQuadDiagIs_02 = distSQ_13<distSQ_02 ? TFALSE : TTRUE;
-				}
-
-				if (bQuadDiagIs_02)
-				{
-					{
-						unsigned char * pVerts_A = pTriInfos[iDstTriIndex].vert_num;
-						pVerts_A[0]=0; pVerts_A[1]=1; pVerts_A[2]=2;
-					}
-					piTriList_out[iDstTriIndex*3+0] = i0;
-					piTriList_out[iDstTriIndex*3+1] = i1;
-					piTriList_out[iDstTriIndex*3+2] = i2;
-					++iDstTriIndex;	// next
-					{
-						unsigned char * pVerts_B = pTriInfos[iDstTriIndex].vert_num;
-						pVerts_B[0]=0; pVerts_B[1]=2; pVerts_B[2]=3;
-					}
-					piTriList_out[iDstTriIndex*3+0] = i0;
-					piTriList_out[iDstTriIndex*3+1] = i2;
-					piTriList_out[iDstTriIndex*3+2] = i3;
-					++iDstTriIndex;	// next
-				}
-				else
-				{
-					{
-						unsigned char * pVerts_A = pTriInfos[iDstTriIndex].vert_num;
-						pVerts_A[0]=0; pVerts_A[1]=1; pVerts_A[2]=3;
-					}
-					piTriList_out[iDstTriIndex*3+0] = i0;
-					piTriList_out[iDstTriIndex*3+1] = i1;
-					piTriList_out[iDstTriIndex*3+2] = i3;
-					++iDstTriIndex;	// next
-					{
-						unsigned char * pVerts_B = pTriInfos[iDstTriIndex].vert_num;
-						pVerts_B[0]=1; pVerts_B[1]=2; pVerts_B[2]=3;
-					}
-					piTriList_out[iDstTriIndex*3+0] = i1;
-					piTriList_out[iDstTriIndex*3+1] = i2;
-					piTriList_out[iDstTriIndex*3+2] = i3;
-					++iDstTriIndex;	// next
-				}
-			}
-		}
-
-		iTSpacesOffs += verts;
-		assert(iDstTriIndex<=iNrTrianglesIn);
-	}
-
-	for (t=0; t<iNrTrianglesIn; t++)
-		pTriInfos[t].iFlag = 0;
-
-	// return total amount of tspaces
-	return iTSpacesOffs;
-}
-
-static SVec3 GetPosition(const SMikkTSpaceContext * pContext, const int index)
-{
-	int iF, iI;
-	SVec3 res; float pos[3];
-	IndexToData(&iF, &iI, index);
-	pContext->m_pInterface->m_getPosition(pContext, pos, iF, iI);
-	res.x=pos[0]; res.y=pos[1]; res.z=pos[2];
-	return res;
-}
-
-static SVec3 GetNormal(const SMikkTSpaceContext * pContext, const int index)
-{
-	int iF, iI;
-	SVec3 res; float norm[3];
-	IndexToData(&iF, &iI, index);
-	pContext->m_pInterface->m_getNormal(pContext, norm, iF, iI);
-	res.x=norm[0]; res.y=norm[1]; res.z=norm[2];
-	return res;
-}
-
-static SVec3 GetTexCoord(const SMikkTSpaceContext * pContext, const int index)
-{
-	int iF, iI;
-	SVec3 res; float texc[2];
-	IndexToData(&iF, &iI, index);
-	pContext->m_pInterface->m_getTexCoord(pContext, texc, iF, iI);
-	res.x=texc[0]; res.y=texc[1]; res.z=1.0f;
-	return res;
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////////////////////////////
-
-typedef union {
-	struct
-	{
-		int i0, i1, f;
-	};
-	int array[3];
-} SEdge;
-
-static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int piTriListIn[], const int iNrTrianglesIn);
-static void BuildNeighborsSlow(STriInfo pTriInfos[], const int piTriListIn[], const int iNrTrianglesIn);
-
-// returns the texture area times 2
-static float CalcTexArea(const SMikkTSpaceContext * pContext, const int indices[])
-{
-	const SVec3 t1 = GetTexCoord(pContext, indices[0]);
-	const SVec3 t2 = GetTexCoord(pContext, indices[1]);
-	const SVec3 t3 = GetTexCoord(pContext, indices[2]);
-
-	const float t21x = t2.x-t1.x;
-	const float t21y = t2.y-t1.y;
-	const float t31x = t3.x-t1.x;
-	const float t31y = t3.y-t1.y;
-
-	const float fSignedAreaSTx2 = t21x*t31y - t21y*t31x;
-
-	return fSignedAreaSTx2<0 ? (-fSignedAreaSTx2) : fSignedAreaSTx2;
-}
-
-static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn)
-{
-	int f=0, i=0, t=0;
-	// pTriInfos[f].iFlag is cleared in GenerateInitialVerticesIndexList() which is called before this function.
-
-	// generate neighbor info list
-	for (f=0; f<iNrTrianglesIn; f++)
-		for (i=0; i<3; i++)
-		{
-			pTriInfos[f].FaceNeighbors[i] = -1;
-			pTriInfos[f].AssignedGroup[i] = NULL;
-
-			pTriInfos[f].vOs.x=0.0f; pTriInfos[f].vOs.y=0.0f; pTriInfos[f].vOs.z=0.0f;
-			pTriInfos[f].vOt.x=0.0f; pTriInfos[f].vOt.y=0.0f; pTriInfos[f].vOt.z=0.0f;
-			pTriInfos[f].fMagS = 0;
-			pTriInfos[f].fMagT = 0;
-
-			// assumed bad
-			pTriInfos[f].iFlag |= GROUP_WITH_ANY;
-		}
-
-	// evaluate first order derivatives
-	for (f=0; f<iNrTrianglesIn; f++)
-	{
-		// initial values
-		const SVec3 v1 = GetPosition(pContext, piTriListIn[f*3+0]);
-		const SVec3 v2 = GetPosition(pContext, piTriListIn[f*3+1]);
-		const SVec3 v3 = GetPosition(pContext, piTriListIn[f*3+2]);
-		const SVec3 t1 = GetTexCoord(pContext, piTriListIn[f*3+0]);
-		const SVec3 t2 = GetTexCoord(pContext, piTriListIn[f*3+1]);
-		const SVec3 t3 = GetTexCoord(pContext, piTriListIn[f*3+2]);
-
-		const float t21x = t2.x-t1.x;
-		const float t21y = t2.y-t1.y;
-		const float t31x = t3.x-t1.x;
-		const float t31y = t3.y-t1.y;
-		const SVec3 d1 = vsub(v2,v1);
-		const SVec3 d2 = vsub(v3,v1);
-
-		const float fSignedAreaSTx2 = t21x*t31y - t21y*t31x;
-		//assert(fSignedAreaSTx2!=0);
-		SVec3 vOs = vsub(vscale(t31y,d1), vscale(t21y,d2));	// eq 18
-		SVec3 vOt = vadd(vscale(-t31x,d1), vscale(t21x,d2)); // eq 19
-
-		pTriInfos[f].iFlag |= (fSignedAreaSTx2>0 ? ORIENT_PRESERVING : 0);
-
-		if ( NotZero(fSignedAreaSTx2) )
-		{
-			const float fAbsArea = fabsf(fSignedAreaSTx2);
-			const float fLenOs = Length(vOs);
-			const float fLenOt = Length(vOt);
-			const float fS = (pTriInfos[f].iFlag&ORIENT_PRESERVING)==0 ? (-1.0f) : 1.0f;
-			if ( NotZero(fLenOs) ) pTriInfos[f].vOs = vscale(fS/fLenOs, vOs);
-			if ( NotZero(fLenOt) ) pTriInfos[f].vOt = vscale(fS/fLenOt, vOt);
-
-			// evaluate magnitudes prior to normalization of vOs and vOt
-			pTriInfos[f].fMagS = fLenOs / fAbsArea;
-			pTriInfos[f].fMagT = fLenOt / fAbsArea;
-
-			// if this is a good triangle
-			if ( NotZero(pTriInfos[f].fMagS) && NotZero(pTriInfos[f].fMagT))
-				pTriInfos[f].iFlag &= (~GROUP_WITH_ANY);
-		}
-	}
-
-	// force otherwise healthy quads to a fixed orientation
-	while (t<(iNrTrianglesIn-1))
-	{
-		const int iFO_a = pTriInfos[t].iOrgFaceNumber;
-		const int iFO_b = pTriInfos[t+1].iOrgFaceNumber;
-		if (iFO_a==iFO_b)	// this is a quad
-		{
-			const tbool bIsDeg_a = (pTriInfos[t].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
-			const tbool bIsDeg_b = (pTriInfos[t+1].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
-			
-			// bad triangles should already have been removed by
-			// DegenPrologue(), but just in case check bIsDeg_a and bIsDeg_a are false
-			if ((bIsDeg_a||bIsDeg_b)==TFALSE)
-			{
-				const tbool bOrientA = (pTriInfos[t].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
-				const tbool bOrientB = (pTriInfos[t+1].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
-				// if this happens the quad has extremely bad mapping!!
-				if (bOrientA!=bOrientB)
-				{
-					//printf("found quad with bad mapping\n");
-					tbool bChooseOrientFirstTri = TFALSE;
-					if ((pTriInfos[t+1].iFlag&GROUP_WITH_ANY)!=0) bChooseOrientFirstTri = TTRUE;
-					else if ( CalcTexArea(pContext, &piTriListIn[t*3+0]) >= CalcTexArea(pContext, &piTriListIn[(t+1)*3+0]) )
-						bChooseOrientFirstTri = TTRUE;
-
-					// force match
-					{
-						const int t0 = bChooseOrientFirstTri ? t : (t+1);
-						const int t1 = bChooseOrientFirstTri ? (t+1) : t;
-						pTriInfos[t1].iFlag &= (~ORIENT_PRESERVING);	// clear first
-						pTriInfos[t1].iFlag |= (pTriInfos[t0].iFlag&ORIENT_PRESERVING);	// copy bit
-					}
-				}
-			}
-			t += 2;
-		}
-		else
-			++t;
-	}
-	
-	// match up edge pairs
-	{
-		SEdge * pEdges = (SEdge *) malloc(sizeof(SEdge)*iNrTrianglesIn*3);
-		if (pEdges==NULL)
-			BuildNeighborsSlow(pTriInfos, piTriListIn, iNrTrianglesIn);
-		else
-		{
-			BuildNeighborsFast(pTriInfos, pEdges, piTriListIn, iNrTrianglesIn);
-	
-			free(pEdges);
-		}
-	}
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////////////////////////////
-
-static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[], const int iMyTriIndex, SGroup * pGroup);
-static void AddTriToGroup(SGroup * pGroup, const int iTriIndex);
-
-static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupTrianglesBuffer[], const int piTriListIn[], const int iNrTrianglesIn)
-{
-	const int iNrMaxGroups = iNrTrianglesIn*3;
-	int iNrActiveGroups = 0;
-	int iOffset = 0, f=0, i=0;
-	(void)iNrMaxGroups;  /* quiet warnings in non debug mode */
-	for (f=0; f<iNrTrianglesIn; f++)
-	{
-		for (i=0; i<3; i++)
-		{
-			// if not assigned to a group
-			if ((pTriInfos[f].iFlag&GROUP_WITH_ANY)==0 && pTriInfos[f].AssignedGroup[i]==NULL)
-			{
-				tbool bOrPre;
-				int neigh_indexL, neigh_indexR;
-				const int vert_index = piTriListIn[f*3+i];
-				assert(iNrActiveGroups<iNrMaxGroups);
-				pTriInfos[f].AssignedGroup[i] = &pGroups[iNrActiveGroups];
-				pTriInfos[f].AssignedGroup[i]->iVertexRepresentitive = vert_index;
-				pTriInfos[f].AssignedGroup[i]->bOrientPreservering = (pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0;
-				pTriInfos[f].AssignedGroup[i]->iNrFaces = 0;
-				pTriInfos[f].AssignedGroup[i]->pFaceIndices = &piGroupTrianglesBuffer[iOffset];
-				++iNrActiveGroups;
-
-				AddTriToGroup(pTriInfos[f].AssignedGroup[i], f);
-				bOrPre = (pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
-				neigh_indexL = pTriInfos[f].FaceNeighbors[i];
-				neigh_indexR = pTriInfos[f].FaceNeighbors[i>0?(i-1):2];
-				if (neigh_indexL>=0) // neighbor
-				{
-					const tbool bAnswer =
-						AssignRecur(piTriListIn, pTriInfos, neigh_indexL,
-									pTriInfos[f].AssignedGroup[i] );
-					
-					const tbool bOrPre2 = (pTriInfos[neigh_indexL].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
-					const tbool bDiff = bOrPre!=bOrPre2 ? TTRUE : TFALSE;
-					assert(bAnswer || bDiff);
-					(void)bAnswer, (void)bDiff;  /* quiet warnings in non debug mode */
-				}
-				if (neigh_indexR>=0) // neighbor
-				{
-					const tbool bAnswer =
-						AssignRecur(piTriListIn, pTriInfos, neigh_indexR,
-									pTriInfos[f].AssignedGroup[i] );
-
-					const tbool bOrPre2 = (pTriInfos[neigh_indexR].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
-					const tbool bDiff = bOrPre!=bOrPre2 ? TTRUE : TFALSE;
-					assert(bAnswer || bDiff);
-					(void)bAnswer, (void)bDiff;  /* quiet warnings in non debug mode */
-				}
-
-				// update offset
-				iOffset += pTriInfos[f].AssignedGroup[i]->iNrFaces;
-				// since the groups are disjoint a triangle can never
-				// belong to more than 3 groups. Subsequently something
-				// is completely screwed if this assertion ever hits.
-				assert(iOffset <= iNrMaxGroups);
-			}
-		}
-	}
-
-	return iNrActiveGroups;
-}
-
-static void AddTriToGroup(SGroup * pGroup, const int iTriIndex)
-{
-	pGroup->pFaceIndices[pGroup->iNrFaces] = iTriIndex;
-	++pGroup->iNrFaces;
-}
-
-static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[],
-				 const int iMyTriIndex, SGroup * pGroup)
-{
-	STriInfo * pMyTriInfo = &psTriInfos[iMyTriIndex];
-
-	// track down vertex
-	const int iVertRep = pGroup->iVertexRepresentitive;
-	const int * pVerts = &piTriListIn[3*iMyTriIndex+0];
-	int i=-1;
-	if (pVerts[0]==iVertRep) i=0;
-	else if (pVerts[1]==iVertRep) i=1;
-	else if (pVerts[2]==iVertRep) i=2;
-	assert(i>=0 && i<3);
-
-	// early out
-	if (pMyTriInfo->AssignedGroup[i] == pGroup) return TTRUE;
-	else if (pMyTriInfo->AssignedGroup[i]!=NULL) return TFALSE;
-	if ((pMyTriInfo->iFlag&GROUP_WITH_ANY)!=0)
-	{
-		// first to group with a group-with-anything triangle
-		// determines it's orientation.
-		// This is the only existing order dependency in the code!!
-		if ( pMyTriInfo->AssignedGroup[0] == NULL &&
-			pMyTriInfo->AssignedGroup[1] == NULL &&
-			pMyTriInfo->AssignedGroup[2] == NULL )
-		{
-			pMyTriInfo->iFlag &= (~ORIENT_PRESERVING);
-			pMyTriInfo->iFlag |= (pGroup->bOrientPreservering ? ORIENT_PRESERVING : 0);
-		}
-	}
-	{
-		const tbool bOrient = (pMyTriInfo->iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE;
-		if (bOrient != pGroup->bOrientPreservering) return TFALSE;
-	}
-
-	AddTriToGroup(pGroup, iMyTriIndex);
-	pMyTriInfo->AssignedGroup[i] = pGroup;
-
-	{
-		const int neigh_indexL = pMyTriInfo->FaceNeighbors[i];
-		const int neigh_indexR = pMyTriInfo->FaceNeighbors[i>0?(i-1):2];
-		if (neigh_indexL>=0)
-			AssignRecur(piTriListIn, psTriInfos, neigh_indexL, pGroup);
-		if (neigh_indexR>=0)
-			AssignRecur(piTriListIn, psTriInfos, neigh_indexR, pGroup);
-	}
-
-
-
-	return TTRUE;
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////////////////////////////
-
-static tbool CompareSubGroups(const SSubGroup * pg1, const SSubGroup * pg2);
-static void QuickSort(int* pSortBuffer, int iLeft, int iRight, unsigned int uSeed);
-static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriListIn[], const STriInfo pTriInfos[], const SMikkTSpaceContext * pContext, const int iVertexRepresentitive);
-
-static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], const SGroup pGroups[],
-                             const int iNrActiveGroups, const int piTriListIn[], const float fThresCos,
-                             const SMikkTSpaceContext * pContext)
-{
-	STSpace * pSubGroupTspace = NULL;
-	SSubGroup * pUniSubGroups = NULL;
-	int * pTmpMembers = NULL;
-	int iMaxNrFaces=0, iUniqueTspaces=0, g=0, i=0;
-	for (g=0; g<iNrActiveGroups; g++)
-		if (iMaxNrFaces < pGroups[g].iNrFaces)
-			iMaxNrFaces = pGroups[g].iNrFaces;
-
-	if (iMaxNrFaces == 0) return TTRUE;
-
-	// make initial allocations
-	pSubGroupTspace = (STSpace *) malloc(sizeof(STSpace)*iMaxNrFaces);
-	pUniSubGroups = (SSubGroup *) malloc(sizeof(SSubGroup)*iMaxNrFaces);
-	pTmpMembers = (int *) malloc(sizeof(int)*iMaxNrFaces);
-	if (pSubGroupTspace==NULL || pUniSubGroups==NULL || pTmpMembers==NULL)
-	{
-		if (pSubGroupTspace!=NULL) free(pSubGroupTspace);
-		if (pUniSubGroups!=NULL) free(pUniSubGroups);
-		if (pTmpMembers!=NULL) free(pTmpMembers);
-		return TFALSE;
-	}
-
-
-	iUniqueTspaces = 0;
-	for (g=0; g<iNrActiveGroups; g++)
-	{
-		const SGroup * pGroup = &pGroups[g];
-		int iUniqueSubGroups = 0, s=0;
-
-		for (i=0; i<pGroup->iNrFaces; i++)	// triangles
-		{
-			const int f = pGroup->pFaceIndices[i];	// triangle number
-			int index=-1, iVertIndex=-1, iOF_1=-1, iMembers=0, j=0, l=0;
-			SSubGroup tmp_group;
-			tbool bFound;
-			SVec3 n, vOs, vOt;
-			if (pTriInfos[f].AssignedGroup[0]==pGroup) index=0;
-			else if (pTriInfos[f].AssignedGroup[1]==pGroup) index=1;
-			else if (pTriInfos[f].AssignedGroup[2]==pGroup) index=2;
-			assert(index>=0 && index<3);
-
-			iVertIndex = piTriListIn[f*3+index];
-			assert(iVertIndex==pGroup->iVertexRepresentitive);
-
-			// is normalized already
-			n = GetNormal(pContext, iVertIndex);
-			
-			// project
-			vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n));
-			vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n));
-			if ( VNotZero(vOs) ) vOs = Normalize(vOs);
-			if ( VNotZero(vOt) ) vOt = Normalize(vOt);
-
-			// original face number
-			iOF_1 = pTriInfos[f].iOrgFaceNumber;
-			
-			iMembers = 0;
-			for (j=0; j<pGroup->iNrFaces; j++)
-			{
-				const int t = pGroup->pFaceIndices[j];	// triangle number
-				const int iOF_2 = pTriInfos[t].iOrgFaceNumber;
-
-				// project
-				SVec3 vOs2 = vsub(pTriInfos[t].vOs, vscale(vdot(n,pTriInfos[t].vOs), n));
-				SVec3 vOt2 = vsub(pTriInfos[t].vOt, vscale(vdot(n,pTriInfos[t].vOt), n));
-				if ( VNotZero(vOs2) ) vOs2 = Normalize(vOs2);
-				if ( VNotZero(vOt2) ) vOt2 = Normalize(vOt2);
-
-				{
-					const tbool bAny = ( (pTriInfos[f].iFlag | pTriInfos[t].iFlag) & GROUP_WITH_ANY )!=0 ? TTRUE : TFALSE;
-					// make sure triangles which belong to the same quad are joined.
-					const tbool bSameOrgFace = iOF_1==iOF_2 ? TTRUE : TFALSE;
-
-					const float fCosS = vdot(vOs,vOs2);
-					const float fCosT = vdot(vOt,vOt2);
-
-					assert(f!=t || bSameOrgFace);	// sanity check
-					if (bAny || bSameOrgFace || (fCosS>fThresCos && fCosT>fThresCos))
-						pTmpMembers[iMembers++] = t;
-				}
-			}
-
-			// sort pTmpMembers
-			tmp_group.iNrFaces = iMembers;
-			tmp_group.pTriMembers = pTmpMembers;
-			if (iMembers>1)
-			{
-				unsigned int uSeed = INTERNAL_RND_SORT_SEED;	// could replace with a random seed?
-				QuickSort(pTmpMembers, 0, iMembers-1, uSeed);
-			}
-
-			// look for an existing match
-			bFound = TFALSE;
-			l=0;
-			while (l<iUniqueSubGroups && !bFound)
-			{
-				bFound = CompareSubGroups(&tmp_group, &pUniSubGroups[l]);
-				if (!bFound) ++l;
-			}
-			
-			// assign tangent space index
-			assert(bFound || l==iUniqueSubGroups);
-			//piTempTangIndices[f*3+index] = iUniqueTspaces+l;
-
-			// if no match was found we allocate a new subgroup
-			if (!bFound)
-			{
-				// insert new subgroup
-				int * pIndices = (int *) malloc(sizeof(int)*iMembers);
-				if (pIndices==NULL)
-				{
-					// clean up and return false
-					int s=0;
-					for (s=0; s<iUniqueSubGroups; s++)
-						free(pUniSubGroups[s].pTriMembers);
-					free(pUniSubGroups);
-					free(pTmpMembers);
-					free(pSubGroupTspace);
-					return TFALSE;
-				}
-				pUniSubGroups[iUniqueSubGroups].iNrFaces = iMembers;
-				pUniSubGroups[iUniqueSubGroups].pTriMembers = pIndices;
-				memcpy(pIndices, tmp_group.pTriMembers, iMembers*sizeof(int));
-				pSubGroupTspace[iUniqueSubGroups] =
-					EvalTspace(tmp_group.pTriMembers, iMembers, piTriListIn, pTriInfos, pContext, pGroup->iVertexRepresentitive);
-				++iUniqueSubGroups;
-			}
-
-			// output tspace
-			{
-				const int iOffs = pTriInfos[f].iTSpacesOffs;
-				const int iVert = pTriInfos[f].vert_num[index];
-				STSpace * pTS_out = &psTspace[iOffs+iVert];
-				assert(pTS_out->iCounter<2);
-				assert(((pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0) == pGroup->bOrientPreservering);
-				if (pTS_out->iCounter==1)
-				{
-					*pTS_out = AvgTSpace(pTS_out, &pSubGroupTspace[l]);
-					pTS_out->iCounter = 2;	// update counter
-					pTS_out->bOrient = pGroup->bOrientPreservering;
-				}
-				else
-				{
-					assert(pTS_out->iCounter==0);
-					*pTS_out = pSubGroupTspace[l];
-					pTS_out->iCounter = 1;	// update counter
-					pTS_out->bOrient = pGroup->bOrientPreservering;
-				}
-			}
-		}
-
-		// clean up and offset iUniqueTspaces
-		for (s=0; s<iUniqueSubGroups; s++)
-			free(pUniSubGroups[s].pTriMembers);
-		iUniqueTspaces += iUniqueSubGroups;
-	}
-
-	// clean up
-	free(pUniSubGroups);
-	free(pTmpMembers);
-	free(pSubGroupTspace);
-
-	return TTRUE;
-}
-
-static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriListIn[], const STriInfo pTriInfos[],
-                          const SMikkTSpaceContext * pContext, const int iVertexRepresentitive)
-{
-	STSpace res;
-	float fAngleSum = 0;
-	int face=0;
-	res.vOs.x=0.0f; res.vOs.y=0.0f; res.vOs.z=0.0f;
-	res.vOt.x=0.0f; res.vOt.y=0.0f; res.vOt.z=0.0f;
-	res.fMagS = 0; res.fMagT = 0;
-
-	for (face=0; face<iFaces; face++)
-	{
-		const int f = face_indices[face];
-
-		// only valid triangles get to add their contribution
-		if ( (pTriInfos[f].iFlag&GROUP_WITH_ANY)==0 )
-		{
-			SVec3 n, vOs, vOt, p0, p1, p2, v1, v2;
-			float fCos, fAngle, fMagS, fMagT;
-			int i=-1, index=-1, i0=-1, i1=-1, i2=-1;
-			if (piTriListIn[3*f+0]==iVertexRepresentitive) i=0;
-			else if (piTriListIn[3*f+1]==iVertexRepresentitive) i=1;
-			else if (piTriListIn[3*f+2]==iVertexRepresentitive) i=2;
-			assert(i>=0 && i<3);
-
-			// project
-			index = piTriListIn[3*f+i];
-			n = GetNormal(pContext, index);
-			vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n));
-			vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n));
-			if ( VNotZero(vOs) ) vOs = Normalize(vOs);
-			if ( VNotZero(vOt) ) vOt = Normalize(vOt);
-
-			i2 = piTriListIn[3*f + (i<2?(i+1):0)];
-			i1 = piTriListIn[3*f + i];
-			i0 = piTriListIn[3*f + (i>0?(i-1):2)];
-
-			p0 = GetPosition(pContext, i0);
-			p1 = GetPosition(pContext, i1);
-			p2 = GetPosition(pContext, i2);
-			v1 = vsub(p0,p1);
-			v2 = vsub(p2,p1);
-
-			// project
-			v1 = vsub(v1, vscale(vdot(n,v1),n)); if ( VNotZero(v1) ) v1 = Normalize(v1);
-			v2 = vsub(v2, vscale(vdot(n,v2),n)); if ( VNotZero(v2) ) v2 = Normalize(v2);
-
-			// weight contribution by the angle
-			// between the two edge vectors
-			fCos = vdot(v1,v2); fCos=fCos>1?1:(fCos<(-1) ? (-1) : fCos);
-			fAngle = (float) acos(fCos);
-			fMagS = pTriInfos[f].fMagS;
-			fMagT = pTriInfos[f].fMagT;
-
-			res.vOs=vadd(res.vOs, vscale(fAngle,vOs));
-			res.vOt=vadd(res.vOt,vscale(fAngle,vOt));
-			res.fMagS+=(fAngle*fMagS);
-			res.fMagT+=(fAngle*fMagT);
-			fAngleSum += fAngle;
-		}
-	}
-
-	// normalize
-	if ( VNotZero(res.vOs) ) res.vOs = Normalize(res.vOs);
-	if ( VNotZero(res.vOt) ) res.vOt = Normalize(res.vOt);
-	if (fAngleSum>0)
-	{
-		res.fMagS /= fAngleSum;
-		res.fMagT /= fAngleSum;
-	}
-
-	return res;
-}
-
-static tbool CompareSubGroups(const SSubGroup * pg1, const SSubGroup * pg2)
-{
-	tbool bStillSame=TTRUE;
-	int i=0;
-	if (pg1->iNrFaces!=pg2->iNrFaces) return TFALSE;
-	while (i<pg1->iNrFaces && bStillSame)
-	{
-		bStillSame = pg1->pTriMembers[i]==pg2->pTriMembers[i] ? TTRUE : TFALSE;
-		if (bStillSame) ++i;
-	}
-	return bStillSame;
-}
-
-static void QuickSort(int* pSortBuffer, int iLeft, int iRight, unsigned int uSeed)
-{
-	int iL, iR, n, index, iMid, iTmp;
-
-	// Random
-	unsigned int t=uSeed&31;
-	t=(uSeed<<t)|(uSeed>>(32-t));
-	uSeed=uSeed+t+3;
-	// Random end
-
-	iL=iLeft; iR=iRight;
-	n = (iR-iL)+1;
-	assert(n>=0);
-	index = (int) (uSeed%n);
-
-	iMid=pSortBuffer[index + iL];
-
-
-	do
-	{
-		while (pSortBuffer[iL] < iMid)
-			++iL;
-		while (pSortBuffer[iR] > iMid)
-			--iR;
-
-		if (iL <= iR)
-		{
-			iTmp = pSortBuffer[iL];
-			pSortBuffer[iL] = pSortBuffer[iR];
-			pSortBuffer[iR] = iTmp;
-			++iL; --iR;
-		}
-	}
-	while (iL <= iR);
-
-	if (iLeft < iR)
-		QuickSort(pSortBuffer, iLeft, iR, uSeed);
-	if (iL < iRight)
-		QuickSort(pSortBuffer, iL, iRight, uSeed);
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////////////////////////////////////////////////////////////////
-
-static void QuickSortEdges(SEdge * pSortBuffer, int iLeft, int iRight, const int channel, unsigned int uSeed);
-static void GetEdge(int * i0_out, int * i1_out, int * edgenum_out, const int indices[], const int i0_in, const int i1_in);
-
-static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int piTriListIn[], const int iNrTrianglesIn)
-{
-	// build array of edges
-	unsigned int uSeed = INTERNAL_RND_SORT_SEED;				// could replace with a random seed?
-	int iEntries=0, iCurStartIndex=-1, f=0, i=0;
-	for (f=0; f<iNrTrianglesIn; f++)
-		for (i=0; i<3; i++)
-		{
-			const int i0 = piTriListIn[f*3+i];
-			const int i1 = piTriListIn[f*3+(i<2?(i+1):0)];
-			pEdges[f*3+i].i0 = i0 < i1 ? i0 : i1;			// put minimum index in i0
-			pEdges[f*3+i].i1 = !(i0 < i1) ? i0 : i1;		// put maximum index in i1
-			pEdges[f*3+i].f = f;							// record face number
-		}
-
-	// sort over all edges by i0, this is the pricy one.
-	QuickSortEdges(pEdges, 0, iNrTrianglesIn*3-1, 0, uSeed);	// sort channel 0 which is i0
-
-	// sub sort over i1, should be fast.
-	// could replace this with a 64 bit int sort over (i0,i1)
-	// with i0 as msb in the quicksort call above.
-	iEntries = iNrTrianglesIn*3;
-	iCurStartIndex = 0;
-	for (i=1; i<iEntries; i++)
-	{
-		if (pEdges[iCurStartIndex].i0 != pEdges[i].i0)
-		{
-			const int iL = iCurStartIndex;
-			const int iR = i-1;
-			//const int iElems = i-iL;
-			iCurStartIndex = i;
-			QuickSortEdges(pEdges, iL, iR, 1, uSeed);	// sort channel 1 which is i1
-		}
-	}
-
-	// sub sort over f, which should be fast.
-	// this step is to remain compliant with BuildNeighborsSlow() when
-	// more than 2 triangles use the same edge (such as a butterfly topology).
-	iCurStartIndex = 0;
-	for (i=1; i<iEntries; i++)
-	{
-		if (pEdges[iCurStartIndex].i0 != pEdges[i].i0 || pEdges[iCurStartIndex].i1 != pEdges[i].i1)
-		{
-			const int iL = iCurStartIndex;
-			const int iR = i-1;
-			//const int iElems = i-iL;
-			iCurStartIndex = i;
-			QuickSortEdges(pEdges, iL, iR, 2, uSeed);	// sort channel 2 which is f
-		}
-	}
-
-	// pair up, adjacent triangles
-	for (i=0; i<iEntries; i++)
-	{
-		const int i0=pEdges[i].i0;
-		const int i1=pEdges[i].i1;
-		const int f = pEdges[i].f;
-		tbool bUnassigned_A;
-
-		int i0_A, i1_A;
-		int edgenum_A, edgenum_B=0;	// 0,1 or 2
-		GetEdge(&i0_A, &i1_A, &edgenum_A, &piTriListIn[f*3], i0, i1);	// resolve index ordering and edge_num
-		bUnassigned_A = pTriInfos[f].FaceNeighbors[edgenum_A] == -1 ? TTRUE : TFALSE;
-
-		if (bUnassigned_A)
-		{
-			// get true index ordering
-			int j=i+1, t;
-			tbool bNotFound = TTRUE;
-			while (j<iEntries && i0==pEdges[j].i0 && i1==pEdges[j].i1 && bNotFound)
-			{
-				tbool bUnassigned_B;
-				int i0_B, i1_B;
-				t = pEdges[j].f;
-				// flip i0_B and i1_B
-				GetEdge(&i1_B, &i0_B, &edgenum_B, &piTriListIn[t*3], pEdges[j].i0, pEdges[j].i1);	// resolve index ordering and edge_num
-				//assert(!(i0_A==i1_B && i1_A==i0_B));
-				bUnassigned_B =  pTriInfos[t].FaceNeighbors[edgenum_B]==-1 ? TTRUE : TFALSE;
-				if (i0_A==i0_B && i1_A==i1_B && bUnassigned_B)
-					bNotFound = TFALSE;
-				else
-					++j;
-			}
-
-			if (!bNotFound)
-			{
-				int t = pEdges[j].f;
-				pTriInfos[f].FaceNeighbors[edgenum_A] = t;
-				//assert(pTriInfos[t].FaceNeighbors[edgenum_B]==-1);
-				pTriInfos[t].FaceNeighbors[edgenum_B] = f;
-			}
-		}
-	}
-}
-
-static void BuildNeighborsSlow(STriInfo pTriInfos[], const int piTriListIn[], const int iNrTrianglesIn)
-{
-	int f=0, i=0;
-	for (f=0; f<iNrTrianglesIn; f++)
-	{
-		for (i=0; i<3; i++)
-		{
-			// if unassigned
-			if (pTriInfos[f].FaceNeighbors[i] == -1)
-			{
-				const int i0_A = piTriListIn[f*3+i];
-				const int i1_A = piTriListIn[f*3+(i<2?(i+1):0)];
-
-				// search for a neighbor
-				tbool bFound = TFALSE;
-				int t=0, j=0;
-				while (!bFound && t<iNrTrianglesIn)
-				{
-					if (t!=f)
-					{
-						j=0;
-						while (!bFound && j<3)
-						{
-							// in rev order
-							const int i1_B = piTriListIn[t*3+j];
-							const int i0_B = piTriListIn[t*3+(j<2?(j+1):0)];
-							//assert(!(i0_A==i1_B && i1_A==i0_B));
-							if (i0_A==i0_B && i1_A==i1_B)
-								bFound = TTRUE;
-							else
-								++j;
-						}
-					}
-					
-					if (!bFound) ++t;
-				}
-
-				// assign neighbors
-				if (bFound)
-				{
-					pTriInfos[f].FaceNeighbors[i] = t;
-					//assert(pTriInfos[t].FaceNeighbors[j]==-1);
-					pTriInfos[t].FaceNeighbors[j] = f;
-				}
-			}
-		}
-	}
-}
-
-static void QuickSortEdges(SEdge * pSortBuffer, int iLeft, int iRight, const int channel, unsigned int uSeed)
-{
-	unsigned int t;
-	int iL, iR, n, index, iMid;
-
-	// early out
-	SEdge sTmp;
-	const int iElems = iRight-iLeft+1;
-	if (iElems<2) return;
-	else if (iElems==2)
-	{
-		if (pSortBuffer[iLeft].array[channel] > pSortBuffer[iRight].array[channel])
-		{
-			sTmp = pSortBuffer[iLeft];
-			pSortBuffer[iLeft] = pSortBuffer[iRight];
-			pSortBuffer[iRight] = sTmp;
-		}
-		return;
-	}
-
-	// Random
-	t=uSeed&31;
-	t=(uSeed<<t)|(uSeed>>(32-t));
-	uSeed=uSeed+t+3;
-	// Random end
-
-	iL = iLeft;
-	iR = iRight;
-	n = (iR-iL)+1;
-	assert(n>=0);
-	index = (int) (uSeed%n);
-
-	iMid=pSortBuffer[index + iL].array[channel];
-
-	do
-	{
-		while (pSortBuffer[iL].array[channel] < iMid)
-			++iL;
-		while (pSortBuffer[iR].array[channel] > iMid)
-			--iR;
-
-		if (iL <= iR)
-		{
-			sTmp = pSortBuffer[iL];
-			pSortBuffer[iL] = pSortBuffer[iR];
-			pSortBuffer[iR] = sTmp;
-			++iL; --iR;
-		}
-	}
-	while (iL <= iR);
-
-	if (iLeft < iR)
-		QuickSortEdges(pSortBuffer, iLeft, iR, channel, uSeed);
-	if (iL < iRight)
-		QuickSortEdges(pSortBuffer, iL, iRight, channel, uSeed);
-}
-
-// resolve ordering and edge number
-static void GetEdge(int * i0_out, int * i1_out, int * edgenum_out, const int indices[], const int i0_in, const int i1_in)
-{
-	*edgenum_out = -1;
-	
-	// test if first index is on the edge
-	if (indices[0]==i0_in || indices[0]==i1_in)
-	{
-		// test if second index is on the edge
-		if (indices[1]==i0_in || indices[1]==i1_in)
-		{
-			edgenum_out[0]=0;	// first edge
-			i0_out[0]=indices[0];
-			i1_out[0]=indices[1];
-		}
-		else
-		{
-			edgenum_out[0]=2;	// third edge
-			i0_out[0]=indices[2];
-			i1_out[0]=indices[0];
-		}
-	}
-	else
-	{
-		// only second and third index is on the edge
-		edgenum_out[0]=1;	// second edge
-		i0_out[0]=indices[1];
-		i1_out[0]=indices[2];
-	}
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////
-/////////////////////////////////// Degenerate triangles ////////////////////////////////////
-
-static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int iNrTrianglesIn, const int iTotTris)
-{
-	int iNextGoodTriangleSearchIndex=-1;
-	tbool bStillFindingGoodOnes;
-
-	// locate quads with only one good triangle
-	int t=0;
-	while (t<(iTotTris-1))
-	{
-		const int iFO_a = pTriInfos[t].iOrgFaceNumber;
-		const int iFO_b = pTriInfos[t+1].iOrgFaceNumber;
-		if (iFO_a==iFO_b)	// this is a quad
-		{
-			const tbool bIsDeg_a = (pTriInfos[t].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
-			const tbool bIsDeg_b = (pTriInfos[t+1].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE;
-			if ((bIsDeg_a^bIsDeg_b)!=0)
-			{
-				pTriInfos[t].iFlag |= QUAD_ONE_DEGEN_TRI;
-				pTriInfos[t+1].iFlag |= QUAD_ONE_DEGEN_TRI;
-			}
-			t += 2;
-		}
-		else
-			++t;
-	}
-
-	// reorder list so all degen triangles are moved to the back
-	// without reordering the good triangles
-	iNextGoodTriangleSearchIndex = 1;
-	t=0;
-	bStillFindingGoodOnes = TTRUE;
-	while (t<iNrTrianglesIn && bStillFindingGoodOnes)
-	{
-		const tbool bIsGood = (pTriInfos[t].iFlag&MARK_DEGENERATE)==0 ? TTRUE : TFALSE;
-		if (bIsGood)
-		{
-			if (iNextGoodTriangleSearchIndex < (t+2))
-				iNextGoodTriangleSearchIndex = t+2;
-		}
-		else
-		{
-			int t0, t1;
-			// search for the first good triangle.
-			tbool bJustADegenerate = TTRUE;
-			while (bJustADegenerate && iNextGoodTriangleSearchIndex<iTotTris)
-			{
-				const tbool bIsGood = (pTriInfos[iNextGoodTriangleSearchIndex].iFlag&MARK_DEGENERATE)==0 ? TTRUE : TFALSE;
-				if (bIsGood) bJustADegenerate=TFALSE;
-				else ++iNextGoodTriangleSearchIndex;
-			}
-
-			t0 = t;
-			t1 = iNextGoodTriangleSearchIndex;
-			++iNextGoodTriangleSearchIndex;
-			assert(iNextGoodTriangleSearchIndex > (t+1));
-
-			// swap triangle t0 and t1
-			if (!bJustADegenerate)
-			{
-				int i=0;
-				for (i=0; i<3; i++)
-				{
-					const int index = piTriList_out[t0*3+i];
-					piTriList_out[t0*3+i] = piTriList_out[t1*3+i];
-					piTriList_out[t1*3+i] = index;
-				}
-				{
-					const STriInfo tri_info = pTriInfos[t0];
-					pTriInfos[t0] = pTriInfos[t1];
-					pTriInfos[t1] = tri_info;
-				}
-			}
-			else
-				bStillFindingGoodOnes = TFALSE;	// this is not supposed to happen
-		}
-
-		if (bStillFindingGoodOnes) ++t;
-	}
-
-	assert(bStillFindingGoodOnes);	// code will still work.
-	assert(iNrTrianglesIn == t);
-}
-
-static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn, const int iTotTris)
-{
-	int t=0, i=0;
-	// deal with degenerate triangles
-	// punishment for degenerate triangles is O(N^2)
-	for (t=iNrTrianglesIn; t<iTotTris; t++)
-	{
-		// degenerate triangles on a quad with one good triangle are skipped
-		// here but processed in the next loop
-		const tbool bSkip = (pTriInfos[t].iFlag&QUAD_ONE_DEGEN_TRI)!=0 ? TTRUE : TFALSE;
-
-		if (!bSkip)
-		{
-			for (i=0; i<3; i++)
-			{
-				const int index1 = piTriListIn[t*3+i];
-				// search through the good triangles
-				tbool bNotFound = TTRUE;
-				int j=0;
-				while (bNotFound && j<(3*iNrTrianglesIn))
-				{
-					const int index2 = piTriListIn[j];
-					if (index1==index2) bNotFound=TFALSE;
-					else ++j;
-				}
-
-				if (!bNotFound)
-				{
-					const int iTri = j/3;
-					const int iVert = j%3;
-					const int iSrcVert=pTriInfos[iTri].vert_num[iVert];
-					const int iSrcOffs=pTriInfos[iTri].iTSpacesOffs;
-					const int iDstVert=pTriInfos[t].vert_num[i];
-					const int iDstOffs=pTriInfos[t].iTSpacesOffs;
-					
-					// copy tspace
-					psTspace[iDstOffs+iDstVert] = psTspace[iSrcOffs+iSrcVert];
-				}
-			}
-		}
-	}
-
-	// deal with degenerate quads with one good triangle
-	for (t=0; t<iNrTrianglesIn; t++)
-	{
-		// this triangle belongs to a quad where the
-		// other triangle is degenerate
-		if ( (pTriInfos[t].iFlag&QUAD_ONE_DEGEN_TRI)!=0 )
-		{
-			SVec3 vDstP;
-			int iOrgF=-1, i=0;
-			tbool bNotFound;
-			unsigned char * pV = pTriInfos[t].vert_num;
-			int iFlag = (1<<pV[0]) | (1<<pV[1]) | (1<<pV[2]);
-			int iMissingIndex = 0;
-			if ((iFlag&2)==0) iMissingIndex=1;
-			else if ((iFlag&4)==0) iMissingIndex=2;
-			else if ((iFlag&8)==0) iMissingIndex=3;
-
-			iOrgF = pTriInfos[t].iOrgFaceNumber;
-			vDstP = GetPosition(pContext, MakeIndex(iOrgF, iMissingIndex));
-			bNotFound = TTRUE;
-			i=0;
-			while (bNotFound && i<3)
-			{
-				const int iVert = pV[i];
-				const SVec3 vSrcP = GetPosition(pContext, MakeIndex(iOrgF, iVert));
-				if (veq(vSrcP, vDstP)==TTRUE)
-				{
-					const int iOffs = pTriInfos[t].iTSpacesOffs;
-					psTspace[iOffs+iMissingIndex] = psTspace[iOffs+iVert];
-					bNotFound=TFALSE;
-				}
-				else
-					++i;
-			}
-			assert(!bNotFound);
-		}
-	}
-}
diff --git a/mikktspace/include/mikktspace/mikktspace.h b/mikktspace/include/mikktspace/mikktspace.h
deleted file mode 100644
index 52c44a7..0000000
--- a/mikktspace/include/mikktspace/mikktspace.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/** \file mikktspace/mikktspace.h
- *  \ingroup mikktspace
- */
-/**
- *  Copyright (C) 2011 by Morten S. Mikkelsen
- *
- *  This software is provided 'as-is', without any express or implied
- *  warranty.  In no event will the authors be held liable for any damages
- *  arising from the use of this software.
- *
- *  Permission is granted to anyone to use this software for any purpose,
- *  including commercial applications, and to alter it and redistribute it
- *  freely, subject to the following restrictions:
- *
- *  1. The origin of this software must not be misrepresented; you must not
- *     claim that you wrote the original software. If you use this software
- *     in a product, an acknowledgment in the product documentation would be
- *     appreciated but is not required.
- *  2. Altered source versions must be plainly marked as such, and must not be
- *     misrepresented as being the original software.
- *  3. This notice may not be removed or altered from any source distribution.
- */
-
-#ifndef __MIKKTSPACE_H__
-#define __MIKKTSPACE_H__
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Author: Morten S. Mikkelsen
- * Version: 1.0
- *
- * The files mikktspace.h and mikktspace.c are designed to be
- * stand-alone files and it is important that they are kept this way.
- * Not having dependencies on structures/classes/libraries specific
- * to the program, in which they are used, allows them to be copied
- * and used as is into any tool, program or plugin.
- * The code is designed to consistently generate the same
- * tangent spaces, for a given mesh, in any tool in which it is used.
- * This is done by performing an internal welding step and subsequently an order-independent evaluation
- * of tangent space for meshes consisting of triangles and quads.
- * This means faces can be received in any order and the same is true for
- * the order of vertices of each face. The generated result will not be affected
- * by such reordering. Additionally, whether degenerate (vertices or texture coordinates)
- * primitives are present or not will not affect the generated results either.
- * Once tangent space calculation is done the vertices of degenerate primitives will simply
- * inherit tangent space from neighboring non degenerate primitives.
- * The analysis behind this implementation can be found in my master's thesis
- * which is available for download --> http://image.diku.dk/projects/media/morten.mikkelsen.08.pdf
- * Note that though the tangent spaces at the vertices are generated in an order-independent way,
- * by this implementation, the interpolated tangent space is still affected by which diagonal is
- * chosen to split each quad. A sensible solution is to have your tools pipeline always
- * split quads by the shortest diagonal. This choice is order-independent and works with mirroring.
- * If these have the same length then compare the diagonals defined by the texture coordinates.
- * XNormal which is a tool for baking normal maps allows you to write your own tangent space plugin
- * and also quad triangulator plugin.
- */
-
-
-typedef int tbool;
-typedef struct SMikkTSpaceContext SMikkTSpaceContext;
-
-typedef struct {
-	// Returns the number of faces (triangles/quads) on the mesh to be processed.
-	int (*m_getNumFaces)(const SMikkTSpaceContext * pContext);
-
-	// Returns the number of vertices on face number iFace
-	// iFace is a number in the range {0, 1, ..., getNumFaces()-1}
-	int (*m_getNumVerticesOfFace)(const SMikkTSpaceContext * pContext, const int iFace);
-
-	// returns the position/normal/texcoord of the referenced face of vertex number iVert.
-	// iVert is in the range {0,1,2} for triangles and {0,1,2,3} for quads.
-	void (*m_getPosition)(const SMikkTSpaceContext * pContext, float fvPosOut[], const int iFace, const int iVert);
-	void (*m_getNormal)(const SMikkTSpaceContext * pContext, float fvNormOut[], const int iFace, const int iVert);
-	void (*m_getTexCoord)(const SMikkTSpaceContext * pContext, float fvTexcOut[], const int iFace, const int iVert);
-
-	// either (or both) of the two setTSpace callbacks can be set.
-	// The call-back m_setTSpaceBasic() is sufficient for basic normal mapping.
-
-	// This function is used to return the tangent and fSign to the application.
-	// fvTangent is a unit length vector.
-	// For normal maps it is sufficient to use the following simplified version of the bitangent which is generated at pixel/vertex level.
-	// bitangent = fSign * cross(vN, tangent);
-	// Note that the results are returned unindexed. It is possible to generate a new index list
-	// But averaging/overwriting tangent spaces by using an already existing index list WILL produce INCRORRECT results.
-	// DO NOT! use an already existing index list.
-	void (*m_setTSpaceBasic)(const SMikkTSpaceContext * pContext, const float fvTangent[], const float fSign, const int iFace, const int iVert);
-
-	// This function is used to return tangent space results to the application.
-	// fvTangent and fvBiTangent are unit length vectors and fMagS and fMagT are their
-	// true magnitudes which can be used for relief mapping effects.
-	// fvBiTangent is the "real" bitangent and thus may not be perpendicular to fvTangent.
-	// However, both are perpendicular to the vertex normal.
-	// For normal maps it is sufficient to use the following simplified version of the bitangent which is generated at pixel/vertex level.
-	// fSign = bIsOrientationPreserving ? 1.0f : (-1.0f);
-	// bitangent = fSign * cross(vN, tangent);
-	// Note that the results are returned unindexed. It is possible to generate a new index list
-	// But averaging/overwriting tangent spaces by using an already existing index list WILL produce INCRORRECT results.
-	// DO NOT! use an already existing index list.
-	void (*m_setTSpace)(const SMikkTSpaceContext * pContext, const float fvTangent[], const float fvBiTangent[], const float fMagS, const float fMagT,
-						const tbool bIsOrientationPreserving, const int iFace, const int iVert);
-} SMikkTSpaceInterface;
-
-struct SMikkTSpaceContext
-{
-	SMikkTSpaceInterface * m_pInterface;	// initialized with callback functions
-	void * m_pUserData;						// pointer to client side mesh data etc. (passed as the first parameter with every interface call)
-};
-
-// these are both thread safe!
-tbool genTangSpaceDefault(const SMikkTSpaceContext * pContext);	// Default (recommended) fAngularThreshold is 180 degrees (which means threshold disabled)
-tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThreshold);
-
-
-// To avoid visual errors (distortions/unwanted hard edges in lighting), when using sampled normal maps, the
-// normal map sampler must use the exact inverse of the pixel shader transformation.
-// The most efficient transformation we can possibly do in the pixel shader is
-// achieved by using, directly, the "unnormalized" interpolated tangent, bitangent and vertex normal: vT, vB and vN.
-// pixel shader (fast transform out)
-// vNout = normalize( vNt.x * vT + vNt.y * vB + vNt.z * vN );
-// where vNt is the tangent space normal. The normal map sampler must likewise use the
-// interpolated and "unnormalized" tangent, bitangent and vertex normal to be compliant with the pixel shader.
-// sampler does (exact inverse of pixel shader):
-// float3 row0 = cross(vB, vN);
-// float3 row1 = cross(vN, vT);
-// float3 row2 = cross(vT, vB);
-// float fSign = dot(vT, row0)<0 ? -1 : 1;
-// vNt = normalize( fSign * float3(dot(vNout,row0), dot(vNout,row1), dot(vNout,row2)) );
-// where vNout is the sampled normal in some chosen 3D space.
-//
-// Should you choose to reconstruct the bitangent in the pixel shader instead
-// of the vertex shader, as explained earlier, then be sure to do this in the normal map sampler also.
-// Finally, beware of quad triangulations. If the normal map sampler doesn't use the same triangulation of
-// quads as your renderer then problems will occur since the interpolated tangent spaces will differ
-// eventhough the vertex level tangent spaces match. This can be solved either by triangulating before
-// sampling/exporting or by using the order-independent choice of diagonal for splitting quads suggested earlier.
-// However, this must be used both by the sampler and your tools/rendering pipeline.
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/mikktspace/include/mikktspace/mikktspace.hh b/mikktspace/include/mikktspace/mikktspace.hh
new file mode 100644
index 0000000..9f1761c
--- /dev/null
+++ b/mikktspace/include/mikktspace/mikktspace.hh
@@ -0,0 +1,839 @@
+/* SPDX-FileCopyrightText: 2011 Morten S. Mikkelsen
+ * SPDX-FileCopyrightText: 2022 Blender Authors
+ *
+ * SPDX-License-Identifier: Apache-2.0 */
+
+/** \file
+ * \ingroup mikktspace
+ */
+
+#include <array>
+#include <algorithm>
+#include <cassert>
+#include <unordered_map>
+
+#ifdef WITH_TBB
+#  include <tbb/parallel_for.h>
+#endif
+
+typedef uint32_t uint;
+
+#include "mikk_atomic_hash_set.hh"
+#include "mikk_float3.hh"
+#include "mikk_util.hh"
+
+namespace mikk {
+
+static constexpr uint UNSET_ENTRY = 0xffffffffu;
+
+template<typename Mesh> class Mikktspace {
+  struct Triangle {
+    /* Stores neighboring triangle for group assignment. */
+    std::array<uint, 3> neighbor;
+    /* Stores assigned group of each vertex. */
+    std::array<uint, 3> group;
+    /* Stores vertex indices that make up the triangle. */
+    std::array<uint, 3> vertices;
+
+    /* Computed face tangent, will be accumulated into group. */
+    float3 tangent;
+
+    /* Index of the face that this triangle belongs to. */
+    uint faceIdx;
+    /* Index of the first of this triangle's vertices' TSpaces. */
+    uint tSpaceIdx;
+
+    /* Stores mapping from this triangle's vertices to the original
+     * face's vertices (relevant for quads). */
+    std::array<uint8_t, 3> faceVertex;
+
+    // flags
+    bool markDegenerate : 1;
+    bool quadOneDegenTri : 1;
+    bool groupWithAny : 1;
+    bool orientPreserving : 1;
+
+    Triangle(uint faceIdx_, uint tSpaceIdx_)
+        : tangent{0.0f},
+          faceIdx{faceIdx_},
+          tSpaceIdx{tSpaceIdx_},
+          markDegenerate{false},
+          quadOneDegenTri{false},
+          groupWithAny{true},
+          orientPreserving{false}
+    {
+      neighbor.fill(UNSET_ENTRY);
+      group.fill(UNSET_ENTRY);
+    }
+
+    void setVertices(uint8_t i0, uint8_t i1, uint8_t i2)
+    {
+      faceVertex[0] = i0;
+      faceVertex[1] = i1;
+      faceVertex[2] = i2;
+      vertices[0] = pack_index(faceIdx, i0);
+      vertices[1] = pack_index(faceIdx, i1);
+      vertices[2] = pack_index(faceIdx, i2);
+    }
+  };
+
+  struct Group {
+    float3 tangent;
+    uint vertexRepresentative;
+    bool orientPreserving;
+
+    Group(uint vertexRepresentative_, bool orientPreserving_)
+        : tangent{0.0f},
+          vertexRepresentative{vertexRepresentative_},
+          orientPreserving{orientPreserving_}
+    {
+    }
+
+    void normalizeTSpace()
+    {
+      tangent = tangent.normalize();
+    }
+
+    void accumulateTSpaceAtomic(float3 v_tangent)
+    {
+      float_add_atomic(&tangent.x, v_tangent.x);
+      float_add_atomic(&tangent.y, v_tangent.y);
+      float_add_atomic(&tangent.z, v_tangent.z);
+    }
+
+    void accumulateTSpace(float3 v_tangent)
+    {
+      tangent += v_tangent;
+    }
+  };
+
+  struct TSpace {
+    float3 tangent = float3(1.0f, 0.0f, 0.0f);
+    uint counter = 0;
+    bool orientPreserving = false;
+
+    void accumulateGroup(const Group &group)
+    {
+      assert(counter < 2);
+
+      if (counter == 0) {
+        tangent = group.tangent;
+      }
+      else if (tangent == group.tangent) {
+        // this if is important. Due to floating point precision
+        // averaging when ts0==ts1 will cause a slight difference
+        // which results in tangent space splits later on, so do nothing
+      }
+      else {
+        tangent = (tangent + group.tangent).normalize();
+      }
+
+      counter++;
+      orientPreserving = group.orientPreserving;
+    }
+  };
+
+  Mesh &mesh;
+
+  std::vector<Triangle> triangles;
+  std::vector<TSpace> tSpaces;
+  std::vector<Group> groups;
+
+  uint nrTSpaces, nrFaces, nrTriangles, totalTriangles;
+
+  int nrThreads;
+  bool isParallel;
+
+ public:
+  Mikktspace(Mesh &mesh_) : mesh(mesh_) {}
+
+  void genTangSpace()
+  {
+    nrFaces = uint(mesh.GetNumFaces());
+
+#ifdef WITH_TBB
+    nrThreads = tbb::this_task_arena::max_concurrency();
+    isParallel = (nrThreads > 1) && (nrFaces > 10000);
+#else
+    nrThreads = 1;
+    isParallel = false;
+#endif
+
+    // make an initial triangle --> face index list
+    generateInitialVerticesIndexList();
+
+    if (nrTriangles == 0) {
+      return;
+    }
+
+    // make a welded index list of identical positions and attributes (pos, norm, texc)
+    generateSharedVerticesIndexList();
+
+    // mark all triangle pairs that belong to a quad with only one
+    // good triangle. These need special treatment in degenEpilogue().
+    // Additionally, move all good triangles to the start of
+    // triangles[] without changing order and
+    // put the degenerate triangles last.
+    degenPrologue();
+
+    if (nrTriangles == 0) {
+      // No point in building tangents if there are no non-degenerate triangles, so just zero them
+      tSpaces.resize(nrTSpaces);
+    }
+    else {
+      // evaluate triangle level attributes and neighbor list
+      initTriangle();
+
+      // match up edge pairs
+      buildNeighbors();
+
+      // based on the 4 rules, identify groups based on connectivity
+      build4RuleGroups();
+
+      // make tspaces, each group is split up into subgroups.
+      // Finally a tangent space is made for every resulting subgroup
+      generateTSpaces();
+
+      // degenerate quads with one good triangle will be fixed by copying a space from
+      // the good triangle to the coinciding vertex.
+      // all other degenerate triangles will just copy a space from any good triangle
+      // with the same welded index in vertices[].
+      degenEpilogue();
+    }
+
+    uint index = 0;
+    for (uint f = 0; f < nrFaces; f++) {
+      const uint verts = mesh.GetNumVerticesOfFace(f);
+      if (verts != 3 && verts != 4) {
+        continue;
+      }
+
+      // set data
+      for (uint i = 0; i < verts; i++) {
+        const TSpace &tSpace = tSpaces[index++];
+        mesh.SetTangentSpace(f, i, tSpace.tangent, tSpace.orientPreserving);
+      }
+    }
+  }
+
+ protected:
+  template<typename F> void runParallel(uint start, uint end, F func)
+  {
+#ifdef WITH_TBB
+    if (isParallel) {
+      tbb::parallel_for(start, end, func);
+    }
+    else
+#endif
+    {
+      for (uint i = start; i < end; i++) {
+        func(i);
+      }
+    }
+  }
+
+  ///////////////////////////////////////////////////////////////////////////////////////////////////
+  ///////////////////////////////////////////////////////////////////////////////////////////////////
+
+  float3 getPosition(uint vertexID)
+  {
+    uint f, v;
+    unpack_index(f, v, vertexID);
+    return mesh.GetPosition(f, v);
+  }
+
+  float3 getNormal(uint vertexID)
+  {
+    uint f, v;
+    unpack_index(f, v, vertexID);
+    return mesh.GetNormal(f, v);
+  }
+
+  float3 getTexCoord(uint vertexID)
+  {
+    uint f, v;
+    unpack_index(f, v, vertexID);
+    return mesh.GetTexCoord(f, v);
+  }
+
+  ///////////////////////////////////////////////////////////////////////////////////////////////////
+  ///////////////////////////////////////////////////////////////////////////////////////////////////
+
+  void generateInitialVerticesIndexList()
+  {
+    nrTriangles = 0;
+    for (uint f = 0; f < nrFaces; f++) {
+      const uint verts = mesh.GetNumVerticesOfFace(f);
+      if (verts == 3) {
+        nrTriangles += 1;
+      }
+      else if (verts == 4) {
+        nrTriangles += 2;
+      }
+    }
+
+    triangles.reserve(nrTriangles);
+
+    nrTSpaces = 0;
+    for (uint f = 0; f < nrFaces; f++) {
+      const uint verts = mesh.GetNumVerticesOfFace(f);
+      if (verts != 3 && verts != 4) {
+        continue;
+      }
+
+      uint tA = uint(triangles.size());
+      triangles.emplace_back(f, nrTSpaces);
+      Triangle &triA = triangles[tA];
+
+      if (verts == 3) {
+        triA.setVertices(0, 1, 2);
+      }
+      else {
+        uint tB = uint(triangles.size());
+        triangles.emplace_back(f, nrTSpaces);
+        Triangle &triB = triangles[tB];
+
+        // need an order independent way to evaluate
+        // tspace on quads. This is done by splitting
+        // along the shortest diagonal.
+        float distSQ_02 = (mesh.GetTexCoord(f, 2) - mesh.GetTexCoord(f, 0)).length_squared();
+        float distSQ_13 = (mesh.GetTexCoord(f, 3) - mesh.GetTexCoord(f, 1)).length_squared();
+        bool quadDiagIs_02;
+        if (distSQ_02 != distSQ_13) {
+          quadDiagIs_02 = (distSQ_02 < distSQ_13);
+        }
+        else {
+          distSQ_02 = (mesh.GetPosition(f, 2) - mesh.GetPosition(f, 0)).length_squared();
+          distSQ_13 = (mesh.GetPosition(f, 3) - mesh.GetPosition(f, 1)).length_squared();
+          quadDiagIs_02 = !(distSQ_13 < distSQ_02);
+        }
+
+        if (quadDiagIs_02) {
+          triA.setVertices(0, 1, 2);
+          triB.setVertices(0, 2, 3);
+        }
+        else {
+          triA.setVertices(0, 1, 3);
+          triB.setVertices(1, 2, 3);
+        }
+      }
+
+      nrTSpaces += verts;
+    }
+  }
+
+  struct VertexHash {
+    Mikktspace<Mesh> *mikk;
+    inline uint operator()(const uint &k) const
+    {
+      return hash_float3x3(mikk->getPosition(k), mikk->getNormal(k), mikk->getTexCoord(k));
+    }
+  };
+
+  struct VertexEqual {
+    Mikktspace<Mesh> *mikk;
+    inline bool operator()(const uint &kA, const uint &kB) const
+    {
+      return mikk->getTexCoord(kA) == mikk->getTexCoord(kB) &&
+             mikk->getNormal(kA) == mikk->getNormal(kB) &&
+             mikk->getPosition(kA) == mikk->getPosition(kB);
+    }
+  };
+
+  /* Merge identical vertices.
+   * To find vertices with identical position, normal and texcoord, we calculate a hash of the 9
+   * values. Then, by sorting based on that hash, identical elements (having identical hashes) will
+   * be moved next to each other. Since there might be hash collisions, the elements of each block
+   * are then compared with each other and duplicates are merged.
+   */
+  template<bool isAtomic> void generateSharedVerticesIndexList_impl()
+  {
+    uint numVertices = nrTriangles * 3;
+    AtomicHashSet<uint, isAtomic, VertexHash, VertexEqual> set(numVertices, {this}, {this});
+    runParallel(0u, nrTriangles, [&](uint t) {
+      for (uint i = 0; i < 3; i++) {
+        auto res = set.emplace(triangles[t].vertices[i]);
+        if (!res.second) {
+          triangles[t].vertices[i] = res.first;
+        }
+      }
+    });
+  }
+  void generateSharedVerticesIndexList()
+  {
+    if (isParallel) {
+      generateSharedVerticesIndexList_impl<true>();
+    }
+    else {
+      generateSharedVerticesIndexList_impl<false>();
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////////////////////
+  /////////////////////////////////// Degenerate triangles ////////////////////////////////////
+
+  void degenPrologue()
+  {
+    // Mark all degenerate triangles
+    totalTriangles = nrTriangles;
+    std::atomic<uint> degenTriangles(0);
+    runParallel(0u, totalTriangles, [&](uint t) {
+      const float3 p0 = getPosition(triangles[t].vertices[0]);
+      const float3 p1 = getPosition(triangles[t].vertices[1]);
+      const float3 p2 = getPosition(triangles[t].vertices[2]);
+      if (p0 == p1 || p0 == p2 || p1 == p2)  // degenerate
+      {
+        triangles[t].markDegenerate = true;
+        degenTriangles.fetch_add(1);
+      }
+    });
+    nrTriangles -= degenTriangles.load();
+
+    if (totalTriangles == nrTriangles) {
+      return;
+    }
+
+    // locate quads with only one good triangle
+    runParallel(0u, totalTriangles - 1, [&](uint t) {
+      Triangle &triangleA = triangles[t], &triangleB = triangles[t + 1];
+      if (triangleA.faceIdx != triangleB.faceIdx) {
+        /* Individual triangle, skip. */
+        return;
+      }
+      if (triangleA.markDegenerate != triangleB.markDegenerate) {
+        triangleA.quadOneDegenTri = true;
+        triangleB.quadOneDegenTri = true;
+      }
+    });
+
+    std::stable_partition(triangles.begin(), triangles.end(), [](const Triangle &tri) {
+      return !tri.markDegenerate;
+    });
+  }
+
+  void degenEpilogue()
+  {
+    if (nrTriangles == totalTriangles) {
+      return;
+    }
+
+    std::unordered_map<uint, uint> goodTriangleMap;
+    for (uint t = 0; t < nrTriangles; t++) {
+      for (uint i = 0; i < 3; i++) {
+        goodTriangleMap.emplace(triangles[t].vertices[i], pack_index(t, i));
+      }
+    }
+
+    // deal with degenerate triangles
+    // punishment for degenerate triangles is O(nrTriangles) extra memory.
+    for (uint t = nrTriangles; t < totalTriangles; t++) {
+      // degenerate triangles on a quad with one good triangle are skipped
+      // here but processed in the next loop
+      if (triangles[t].quadOneDegenTri) {
+        continue;
+      }
+
+      for (uint i = 0; i < 3; i++) {
+        const auto entry = goodTriangleMap.find(triangles[t].vertices[i]);
+        if (entry == goodTriangleMap.end()) {
+          // Matching vertex from good triangle is not found.
+          continue;
+        }
+
+        uint tSrc, iSrc;
+        unpack_index(tSrc, iSrc, entry->second);
+        const uint iSrcVert = triangles[tSrc].faceVertex[iSrc];
+        const uint iSrcOffs = triangles[tSrc].tSpaceIdx;
+        const uint iDstVert = triangles[t].faceVertex[i];
+        const uint iDstOffs = triangles[t].tSpaceIdx;
+        // copy tspace
+        tSpaces[iDstOffs + iDstVert] = tSpaces[iSrcOffs + iSrcVert];
+      }
+    }
+
+    // deal with degenerate quads with one good triangle
+    for (uint t = 0; t < nrTriangles; t++) {
+      // this triangle belongs to a quad where the
+      // other triangle is degenerate
+      if (!triangles[t].quadOneDegenTri) {
+        continue;
+      }
+      uint vertFlag = (1u << triangles[t].faceVertex[0]) | (1u << triangles[t].faceVertex[1]) |
+                      (1u << triangles[t].faceVertex[2]);
+      uint missingFaceVertex = 0;
+      if ((vertFlag & 2) == 0) {
+        missingFaceVertex = 1;
+      }
+      else if ((vertFlag & 4) == 0) {
+        missingFaceVertex = 2;
+      }
+      else if ((vertFlag & 8) == 0) {
+        missingFaceVertex = 3;
+      }
+
+      uint faceIdx = triangles[t].faceIdx;
+      float3 dstP = mesh.GetPosition(faceIdx, missingFaceVertex);
+      bool found = false;
+      for (uint i = 0; i < 3; i++) {
+        const uint faceVertex = triangles[t].faceVertex[i];
+        const float3 srcP = mesh.GetPosition(faceIdx, faceVertex);
+        if (srcP == dstP) {
+          const uint offset = triangles[t].tSpaceIdx;
+          tSpaces[offset + missingFaceVertex] = tSpaces[offset + faceVertex];
+          found = true;
+          break;
+        }
+      }
+      assert(found);
+      (void)found;
+    }
+  }
+
+  ///////////////////////////////////////////////////////////////////////////////////////////////////
+  ///////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // returns the texture area times 2
+  float calcTexArea(uint tri)
+  {
+    const float3 t1 = getTexCoord(triangles[tri].vertices[0]);
+    const float3 t2 = getTexCoord(triangles[tri].vertices[1]);
+    const float3 t3 = getTexCoord(triangles[tri].vertices[2]);
+
+    const float t21x = t2.x - t1.x;
+    const float t21y = t2.y - t1.y;
+    const float t31x = t3.x - t1.x;
+    const float t31y = t3.y - t1.y;
+
+    const float signedAreaSTx2 = t21x * t31y - t21y * t31x;
+    return fabsf(signedAreaSTx2);
+  }
+
+  void initTriangle()
+  {
+    // triangles[f].iFlag is cleared in generateInitialVerticesIndexList()
+    // which is called before this function.
+
+    // evaluate first order derivatives
+    runParallel(0u, nrTriangles, [&](uint t) {
+      Triangle &triangle = triangles[t];
+
+      // initial values
+      const float3 v1 = getPosition(triangle.vertices[0]);
+      const float3 v2 = getPosition(triangle.vertices[1]);
+      const float3 v3 = getPosition(triangle.vertices[2]);
+      const float3 t1 = getTexCoord(triangle.vertices[0]);
+      const float3 t2 = getTexCoord(triangle.vertices[1]);
+      const float3 t3 = getTexCoord(triangle.vertices[2]);
+
+      const float t21x = t2.x - t1.x;
+      const float t21y = t2.y - t1.y;
+      const float t31x = t3.x - t1.x;
+      const float t31y = t3.y - t1.y;
+      const float3 d1 = v2 - v1, d2 = v3 - v1;
+
+      const float signedAreaSTx2 = t21x * t31y - t21y * t31x;
+      const float3 vOs = (t31y * d1) - (t21y * d2);   // eq 18
+      const float3 vOt = (-t31x * d1) + (t21x * d2);  // eq 19
+
+      triangle.orientPreserving = (signedAreaSTx2 > 0);
+
+      if (not_zero(signedAreaSTx2)) {
+        const float lenOs2 = vOs.length_squared();
+        const float lenOt2 = vOt.length_squared();
+        const float fS = triangle.orientPreserving ? 1.0f : (-1.0f);
+        if (not_zero(lenOs2)) {
+          triangle.tangent = vOs * (fS / sqrtf(lenOs2));
+        }
+
+        // if this is a good triangle
+        if (not_zero(lenOs2) && not_zero(lenOt2)) {
+          triangle.groupWithAny = false;
+        }
+      }
+    });
+
+    // force otherwise healthy quads to a fixed orientation
+    runParallel(0u, nrTriangles - 1, [&](uint t) {
+      Triangle &triangleA = triangles[t], &triangleB = triangles[t + 1];
+      if (triangleA.faceIdx != triangleB.faceIdx) {
+        // this is not a quad
+        return;
+      }
+
+      // bad triangles should already have been removed by
+      // degenPrologue(), but just in case check that neither are degenerate
+      if (!(triangleA.markDegenerate || triangleB.markDegenerate)) {
+        // if this happens the quad has extremely bad mapping!!
+        if (triangleA.orientPreserving != triangleB.orientPreserving) {
+          bool chooseOrientFirstTri = false;
+          if (triangleB.groupWithAny) {
+            chooseOrientFirstTri = true;
+          }
+          else if (calcTexArea(t) >= calcTexArea(t + 1)) {
+            chooseOrientFirstTri = true;
+          }
+
+          // force match
+          const uint t0 = chooseOrientFirstTri ? t : (t + 1);
+          const uint t1 = chooseOrientFirstTri ? (t + 1) : t;
+          triangles[t1].orientPreserving = triangles[t0].orientPreserving;
+        }
+      }
+    });
+  }
+
+  /////////////////////////////////////////////////////////////////////////////////////////////
+  /////////////////////////////////////////// Edges ///////////////////////////////////////////
+
+  struct NeighborShard {
+    struct Entry {
+      Entry(uint32_t key_, uint data_) : key(key_), data(data_) {}
+      uint key, data;
+    };
+    std::vector<Entry> entries;
+
+    NeighborShard(size_t capacity)
+    {
+      entries.reserve(capacity);
+    }
+
+    void buildNeighbors(Mikktspace<Mesh> *mikk)
+    {
+      /* Entries are added by iterating over t, so by using a stable sort,
+       * we don't have to compare based on t as well. */
+      {
+        std::vector<Entry> tempEntries(entries.size(), {0, 0});
+        radixsort(entries, tempEntries, [](const Entry &e) { return e.key; });
+      }
+
+      for (uint i = 0; i < entries.size(); i++) {
+        const Entry &a = entries[i];
+        uint tA, iA;
+        unpack_index(tA, iA, a.data);
+        Mikktspace<Mesh>::Triangle &triA = mikk->triangles[tA];
+
+        if (triA.neighbor[iA] != UNSET_ENTRY) {
+          continue;
+        }
+
+        uint i0A = triA.vertices[iA], i1A = triA.vertices[(iA != 2) ? (iA + 1) : 0];
+        for (uint j = i + 1; j < entries.size(); j++) {
+          const Entry &b = entries[j];
+          uint tB, iB;
+          unpack_index(tB, iB, b.data);
+          Mikktspace<Mesh>::Triangle &triB = mikk->triangles[tB];
+
+          if (b.key != a.key)
+            break;
+
+          if (triB.neighbor[iB] != UNSET_ENTRY) {
+            continue;
+          }
+
+          uint i1B = triB.vertices[iB], i0B = triB.vertices[(iB != 2) ? (iB + 1) : 0];
+          if (i0A == i0B && i1A == i1B) {
+            triA.neighbor[iA] = tB;
+            triB.neighbor[iB] = tA;
+            break;
+          }
+        }
+      }
+    }
+  };
+
+  void buildNeighbors()
+  {
+    /* In order to parallelize the processing, we divide the vertices into shards.
+     * Since only vertex pairs with the same key will be checked, we can process
+     * shards independently as long as we ensure that all vertices with the same
+     * key go into the same shard.
+     * This is done by hashing the key to get the shard index of each vertex.
+     */
+    // TODO: Two-step filling that first counts and then fills? Could be parallel then.
+    uint targetNrShards = isParallel ? uint(4 * nrThreads) : 1;
+    uint nrShards = 1, hashShift = 32;
+    while (nrShards < targetNrShards) {
+      nrShards *= 2;
+      hashShift -= 1;
+    }
+
+    /* Reserve 25% extra to account for variation due to hashing. */
+    size_t reserveSize = size_t(double(3 * nrTriangles) * 1.25 / nrShards);
+    std::vector<NeighborShard> shards(nrShards, {reserveSize});
+
+    for (uint t = 0; t < nrTriangles; t++) {
+      Triangle &triangle = triangles[t];
+      for (uint i = 0; i < 3; i++) {
+        const uint i0 = triangle.vertices[i];
+        const uint i1 = triangle.vertices[(i != 2) ? (i + 1) : 0];
+        const uint high = std::max(i0, i1), low = std::min(i0, i1);
+        const uint hash = hash_uint3(high, low, 0);
+        /* TODO: Reusing the hash here means less hash space inside each shard.
+         * Computing a second hash with a different seed it probably not worth it? */
+        const uint shard = isParallel ? (hash >> hashShift) : 0;
+        shards[shard].entries.emplace_back(hash, pack_index(t, i));
+      }
+    }
+
+    runParallel(0u, nrShards, [&](uint s) { shards[s].buildNeighbors(this); });
+  }
+
+  ///////////////////////////////////////////////////////////////////////////////////////////////////
+  ///////////////////////////////////////////////////////////////////////////////////////////////////
+
+  void assignRecur(const uint t, uint groupId)
+  {
+    if (t == UNSET_ENTRY) {
+      return;
+    }
+
+    Triangle &triangle = triangles[t];
+    Group &group = groups[groupId];
+
+    // track down vertex
+    const uint vertRep = group.vertexRepresentative;
+    uint i = 3;
+    if (triangle.vertices[0] == vertRep) {
+      i = 0;
+    }
+    else if (triangle.vertices[1] == vertRep) {
+      i = 1;
+    }
+    else if (triangle.vertices[2] == vertRep) {
+      i = 2;
+    }
+    assert(i < 3);
+
+    // early out
+    if (triangle.group[i] != UNSET_ENTRY) {
+      return;
+    }
+
+    if (triangle.groupWithAny) {
+      // first to group with a group-with-anything triangle
+      // determines its orientation.
+      // This is the only existing order dependency in the code!!
+      if (triangle.group[0] == UNSET_ENTRY && triangle.group[1] == UNSET_ENTRY &&
+          triangle.group[2] == UNSET_ENTRY)
+      {
+        triangle.orientPreserving = group.orientPreserving;
+      }
+    }
+
+    if (triangle.orientPreserving != group.orientPreserving) {
+      return;
+    }
+
+    triangle.group[i] = groupId;
+
+    const uint t_L = triangle.neighbor[i];
+    const uint t_R = triangle.neighbor[i > 0 ? (i - 1) : 2];
+    assignRecur(t_L, groupId);
+    assignRecur(t_R, groupId);
+  }
+
+  void build4RuleGroups()
+  {
+    /* NOTE: This could be parallelized by grouping all [t, i] pairs into
+     * shards by hash(triangles[t].vertices[i]). This way, each shard can be processed
+     * independently and in parallel.
+     * However, the `groupWithAny` logic needs special handling (e.g. lock a mutex when
+     * encountering a `groupWithAny` triangle, then sort it out, then unlock and proceed). */
+    for (uint t = 0; t < nrTriangles; t++) {
+      Triangle &triangle = triangles[t];
+      for (uint i = 0; i < 3; i++) {
+        // if not assigned to a group
+        if (triangle.groupWithAny || triangle.group[i] != UNSET_ENTRY) {
+          continue;
+        }
+
+        const uint newGroupId = uint(groups.size());
+        triangle.group[i] = newGroupId;
+
+        groups.emplace_back(triangle.vertices[i], bool(triangle.orientPreserving));
+
+        const uint t_L = triangle.neighbor[i];
+        const uint t_R = triangle.neighbor[i > 0 ? (i - 1) : 2];
+        assignRecur(t_L, newGroupId);
+        assignRecur(t_R, newGroupId);
+      }
+    }
+  }
+
+  ///////////////////////////////////////////////////////////////////////////////////////////////////
+  ///////////////////////////////////////////////////////////////////////////////////////////////////
+
+  template<bool atomic> void accumulateTSpaces(uint t)
+  {
+    const Triangle &triangle = triangles[t];
+    // only valid triangles get to add their contribution
+    if (triangle.groupWithAny) {
+      return;
+    }
+
+    /* Todo: Vectorize?
+     * Also: Could add special case for flat shading, when all normals are equal half of the fCos
+     * projections and two of the three tangent projections are unnecessary. */
+    std::array<float3, 3> n, p;
+    for (uint i = 0; i < 3; i++) {
+      n[i] = getNormal(triangle.vertices[i]);
+      p[i] = getPosition(triangle.vertices[i]);
+    }
+
+    std::array<float, 3> fCos = {dot(project(n[0], p[1] - p[0]), project(n[0], p[2] - p[0])),
+                                 dot(project(n[1], p[2] - p[1]), project(n[1], p[0] - p[1])),
+                                 dot(project(n[2], p[0] - p[2]), project(n[2], p[1] - p[2]))};
+
+    for (uint i = 0; i < 3; i++) {
+      uint groupId = triangle.group[i];
+      if (groupId != UNSET_ENTRY) {
+        float3 tangent = project(n[i], triangle.tangent) *
+                         fast_acosf(std::clamp(fCos[i], -1.0f, 1.0f));
+        if constexpr (atomic) {
+          groups[groupId].accumulateTSpaceAtomic(tangent);
+        }
+        else {
+          groups[groupId].accumulateTSpace(tangent);
+        }
+      }
+    }
+  }
+
+  void generateTSpaces()
+  {
+    if (isParallel) {
+      runParallel(0u, nrTriangles, [&](uint t) { accumulateTSpaces<true>(t); });
+    }
+    else {
+      for (uint t = 0; t < nrTriangles; t++) {
+        accumulateTSpaces<false>(t);
+      }
+    }
+
+    /* TODO: Worth parallelizing? Probably not. */
+    for (Group &group : groups) {
+      group.normalizeTSpace();
+    }
+
+    tSpaces.resize(nrTSpaces);
+
+    for (uint t = 0; t < nrTriangles; t++) {
+      Triangle &triangle = triangles[t];
+      for (uint i = 0; i < 3; i++) {
+        uint groupId = triangle.group[i];
+        if (groupId == UNSET_ENTRY) {
+          continue;
+        }
+        const Group group = groups[groupId];
+        assert(triangle.orientPreserving == group.orientPreserving);
+
+        // output tspace
+        const uint offset = triangle.tSpaceIdx;
+        const uint faceVertex = triangle.faceVertex[i];
+        tSpaces[offset + faceVertex].accumulateGroup(group);
+      }
+    }
+  }
+};
+
+}  // namespace mikk
diff --git a/mikktspace/mikktspace.txt b/mikktspace/mikktspace.txt
index b46b528..86b1a0b 100644
--- a/mikktspace/mikktspace.txt
+++ b/mikktspace/mikktspace.txt
@@ -1,18 +1,208 @@
+Copyright (C) 2022 Blender Authors
 Copyright (C) 2011 by Morten S. Mikkelsen
 
-This software is provided 'as-is', without any express or implied
-warranty.  In no event will the authors be held liable for any damages
-arising from the use of this software.
-
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it
-freely, subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not
-   claim that you wrote the original software. If you use this software
-   in a product, an acknowledgment in the product documentation would be
-   appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be
-   misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+