diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md
index e7b23f2ce..cb08eaeb7 100644
--- a/Docs/Whats_New.md
+++ b/Docs/Whats_New.md
@@ -730,6 +730,7 @@ Released 2020/12/09
 	- `VK_EXT_descriptor_indexing` (initial release limited to Metal Tier 1: 96/128 textures, 16 samplers)
 	- `VK_EXT_post_depth_coverage` (macOS)
 	- `VK_EXT_private_data`
+	- `VK_EXT_shader_demote_to_helper_invocation`
 	- `VK_EXT_subgroup_size_control`
 	- `VK_EXT_texture_compression_astc_hdr`
 	- `VK_AMD_shader_image_load_store` (macOS)
diff --git a/ExternalRevisions/SPIRV-Cross_repo_revision b/ExternalRevisions/SPIRV-Cross_repo_revision
index 9f5795606..cb37afb63 100644
--- a/ExternalRevisions/SPIRV-Cross_repo_revision
+++ b/ExternalRevisions/SPIRV-Cross_repo_revision
@@ -1 +1 @@
-4818f7e7ef7b7078a3a7a5a52c4a338e0dda22f4
+15a8b709ffb9f941ed7fc48013bc7deda5659275
diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h
index af6c3ffcf..9b0774d67 100644
--- a/MoltenVK/MoltenVK/API/mvk_private_api.h
+++ b/MoltenVK/MoltenVK/API/mvk_private_api.h
@@ -152,6 +152,7 @@ typedef struct {
 	VkBool32 needsSampleDrefLodArrayWorkaround;		/**< If true, sampling from arrayed depth images with explicit LoD is broken and needs a workaround. */
 	VkDeviceSize hostMemoryPageSize;				/**< The size of a page of host memory on this platform. */
 	VkBool32 dynamicVertexStride;					/**< If true, VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE is supported. */
+	uint32_t maxTransformFeedbackBuffers;           /**< The maximum transform feedback Buffers to support */
 } MVKPhysicalDeviceMetalFeatures;
 
 /**
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h
index 2b9696ab4..8c8fd9371 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.h
@@ -191,3 +191,105 @@ class MVKCmdDrawIndexedIndirect : public MVKCommand {
 	uint32_t _drawCount;
 	uint32_t _directCmdFirstInstance;
 };
+
+
+#pragma mark -
+#pragma mark MVKCmdBeginTransformFeedback
+
+/*
+ * The active transform feedback buffers will capture primitives emitted from the corresponding XfbBuffer in the bound
+ * graphics pipeline. Any XfbBuffer emitted that does not output to an active transform feedback buffer will not be
+ * captured.
+ */
+
+template <size_t N>
+class MVKCmdBeginTransformFeedback : public MVKCommand {
+public:
+    VkResult setContent(MVKCommandBuffer* cmdBuffer,
+                        uint32_t firstCounterBuffer,
+                        uint32_t counterBufferCount,
+                        const VkBuffer* counterBuffers,
+                        const VkDeviceSize* counterBufferOffsets);
+    void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+    MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+    MVKSmallVector<MVKMTLBufferBinding, N> _counterBuffers;
+};
+
+// Concrete template class implementations.
+typedef MVKCmdBeginTransformFeedback<1> MVKCmdBeginTransformFeedback1;
+typedef MVKCmdBeginTransformFeedback<2> MVKCmdBeginTransformFeedback2;
+typedef MVKCmdBeginTransformFeedback<4> MVKCmdBeginTransformFeedbackMulti;
+
+#pragma mark -
+#pragma mark MVKCmdBindTransformFeedbackBuffers
+
+/*
+ * The values taken from elements i of pBuffers, pOffsets and pSizes replace the current state for the transform
+ * feedback binding firstBinding + i, for i in [0, bindingCount). The transform feedback binding is updated to start
+ * at the offset indicated by pOffsets[i] from the start of the buffer pBuffers[i].
+ */
+template <size_t N>
+class MVKCmdBindTransformFeedbackBuffers : public MVKCommand {
+public:
+    VkResult setContent(MVKCommandBuffer* cmdBuffer,
+                        uint32_t firstBinding,
+                        uint32_t bindingCount,
+                        const VkBuffer* pBuffers,
+                        const VkDeviceSize* pOffsets,
+                        const VkDeviceSize* pSizes);
+    void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+    MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+    MVKSmallVector<MVKMTLBufferBinding, N> _bindings;
+};
+
+// Concrete template class implementations.
+typedef MVKCmdBindTransformFeedbackBuffers<1> MVKCmdBindTransformFeedbackBuffers1;
+typedef MVKCmdBindTransformFeedbackBuffers<2> MVKCmdBindTransformFeedbackBuffers2;
+typedef MVKCmdBindTransformFeedbackBuffers<4> MVKCmdBindTransformFeedbackBuffersMulti;
+
+#pragma mark -
+#pragma mark MVKCmdDrawIndirectByteCount
+
+/*
+ * Draw primitives where the vertex count is derived from the counter byte value in the counter buffer
+ */
+
+class MVKCmdDrawIndirectByteCount : public MVKCommand {
+public:
+    MVKCmdDrawIndirectByteCount() :
+            instanceCount(0), firstInstance(0), counterBuffer(), deviceSize(), stride() {}
+    VkResult setContent(MVKCommandBuffer* cmdBuffer,
+                   uint32_t instanceCount,
+                   uint32_t firstInstance,
+                   VkBuffer counterBuffer,
+                   uint32_t deviceSize,
+                   uint32_t stride);
+    void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+    MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+
+    uint32_t instanceCount;
+    uint32_t firstInstance;
+    VkBuffer counterBuffer;
+    uint32_t deviceSize;
+    uint32_t stride;
+};
+
+#pragma mark -
+#pragma mark MVKCmdEndTransformFeedback
+
+class MVKCmdEndTransformFeedback : public MVKCommand {
+public:
+    VkResult setContent(MVKCommandBuffer* cmdBuffer);
+    void encode(MVKCommandEncoder* cmdEncoder) override;
+
+protected:
+    MVKCommandTypePool<MVKCommand>* getTypePool(MVKCommandPool* cmdPool) override;
+};
diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm
index 87515ba12..1852e809f 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm
+++ b/MoltenVK/MoltenVK/Commands/MVKCmdDraw.mm
@@ -19,10 +19,7 @@
 #include "MVKCmdDraw.h"
 #include "MVKCommandBuffer.h"
 #include "MVKCommandPool.h"
-#include "MVKBuffer.h"
 #include "MVKPipeline.h"
-#include "MVKFoundation.h"
-#include "mvk_datatypes.hpp"
 
 
 #pragma mark -
@@ -41,7 +38,7 @@
     _bindings.reserve(bindingCount);
     MVKMTLBufferBinding b;
     for (uint32_t bindIdx = 0; bindIdx < bindingCount; bindIdx++) {
-        MVKBuffer* mvkBuffer = (MVKBuffer*)pBuffers[bindIdx];
+        auto* mvkBuffer = (MVKBuffer*)pBuffers[bindIdx];
         b.index = mvkDvc->getMetalBufferIndexForVertexAttributeBinding(firstBinding + bindIdx);
         b.mtlBuffer = mvkBuffer->getMTLBuffer();
         b.offset = mvkBuffer->getMTLBufferOffset() + pOffsets[bindIdx];
@@ -66,11 +63,11 @@
 #pragma mark -
 #pragma mark MVKCmdBindIndexBuffer
 
-VkResult MVKCmdBindIndexBuffer::setContent(MVKCommandBuffer* cmdBuff,
+VkResult MVKCmdBindIndexBuffer::setContent(MVKCommandBuffer*,
 										   VkBuffer buffer,
 										   VkDeviceSize offset,
 										   VkIndexType indexType) {
-	MVKBuffer* mvkBuffer = (MVKBuffer*)buffer;
+	auto* mvkBuffer = (MVKBuffer*)buffer;
 	_binding.mtlBuffer = mvkBuffer->getMTLBuffer();
 	_binding.offset = mvkBuffer->getMTLBufferOffset() + offset;
 	_binding.mtlIndexType = mvkMTLIndexTypeFromVkIndexType(indexType);
@@ -188,6 +185,16 @@
                 mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
                 if (pipeline->needsVertexOutputBuffer()) {
                     vtxOutBuff = cmdEncoder->getTempMTLBuffer(_vertexCount * _instanceCount * 4 * cmdEncoder->_pDeviceProperties->limits.maxVertexOutputComponents, true);
+                    if (cmdEncoder->getDevice()->_enabledTransformFeedbackFeatures.transformFeedback &&
+                        cmdEncoder->_transformFeedbackRunning) {
+                        for (auto& xfbBufferBinding : cmdEncoder->_graphicsResourcesState
+                                ._transformFeedbackBufferBindings)
+                        {
+                            [mtlTessCtlEncoder setBuffer:xfbBufferBinding.mtlBuffer
+                                                  offset:xfbBufferBinding.offset
+                                                 atIndex:xfbBufferBinding.index];
+                        }
+                    }
                     [mtlTessCtlEncoder setBuffer: vtxOutBuff->_mtlBuffer
                                           offset: vtxOutBuff->_offset
                                          atIndex: pipeline->getOutputBufferIndex().stages[kMVKShaderStageVertex]];
@@ -196,7 +203,7 @@
 				// If there are vertex bindings with a zero vertex divisor, I need to offset them by
 				// _firstInstance * stride, since that is the expected behaviour for a divisor of 0.
                 cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance);
-				id<MTLComputePipelineState> vtxState = pipeline->getTessVertexStageState();
+				id<MTLComputePipelineState> vtxState = pipeline->getVertexStageState();
 				if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
 #if MVK_MACOS_OR_IOS
 					[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(_vertexCount, _instanceCount, 1)
@@ -426,7 +433,7 @@
 				// If there are vertex bindings with a zero vertex divisor, I need to offset them by
 				// _firstInstance * stride, since that is the expected behaviour for a divisor of 0.
                 cmdEncoder->_graphicsResourcesState.offsetZeroDivisorVertexBuffers(stage, pipeline, _firstInstance);
-				id<MTLComputePipelineState> vtxState = ibb.mtlIndexType == MTLIndexTypeUInt16 ? pipeline->getTessVertexStageIndex16State() : pipeline->getTessVertexStageIndex32State();
+				id<MTLComputePipelineState> vtxState = ibb.mtlIndexType == MTLIndexTypeUInt16 ? pipeline->getVertexStageIndex16State() : pipeline->getVertexStageIndex32State();
 				if (cmdEncoder->getDevice()->_pMetalFeatures->nonUniformThreadgroups) {
 #if MVK_MACOS_OR_IOS
 					[mtlTessCtlEncoder dispatchThreads: MTLSizeMake(_indexCount, _instanceCount, 1)
@@ -568,7 +575,7 @@
 										VkDeviceSize offset,
 										uint32_t drawCount,
 										uint32_t stride) {
-	MVKBuffer* mvkBuffer = (MVKBuffer*)buffer;
+	auto* mvkBuffer = (MVKBuffer*)buffer;
 	_mtlIndirectBuffer = mvkBuffer->getMTLBuffer();
 	_mtlIndirectBufferOffset = mvkBuffer->getMTLBufferOffset() + offset;
 	_mtlIndirectBufferStride = stride;
@@ -694,7 +701,7 @@
         if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
             indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
         }
-		paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
+		paramsIncr = std::max<size_t>((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
 		VkDeviceSize paramsSize = paramsIncr * _drawCount;
         tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
         mtlIndBuff = tempIndirectBuff->_mtlBuffer;
@@ -712,7 +719,7 @@
         }
         tcLevelBuff = cmdEncoder->getTempMTLBuffer(patchCount * sizeof(MTLQuadTessellationFactorsHalf), true);
 
-        vtxThreadExecWidth = pipeline->getTessVertexStageState().threadExecutionWidth;
+        vtxThreadExecWidth = pipeline->getVertexStageState().threadExecutionWidth;
         NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
         tcWorkgroupSize = mvkLeastCommonMultiple(outControlPointCount, sgSize);
         while (tcWorkgroupSize > cmdEncoder->getDevice()->_pProperties->limits.maxComputeWorkGroupSize[0]) {
@@ -836,6 +843,7 @@
 			if ( !pipeline->hasValidMTLPipelineStates() ) { return; }	// Abort if this pipeline stage could not be compiled.
 
             switch (stage) {
+                //look for tesselation stage
                 case kMVKGraphicsStageVertex:
                     mtlTessCtlEncoder = cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl);
                     if (pipeline->needsVertexOutputBuffer()) {
@@ -1043,7 +1051,7 @@
         if (cmdEncoder->_pDeviceMetalFeatures->mslVersion >= 20100) {
             indirectSize += sizeof(MTLStageInRegionIndirectArguments) * _drawCount;
         }
-		paramsIncr = std::max((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
+		paramsIncr = std::max<size_t>((size_t)cmdEncoder->getDevice()->_pProperties->limits.minUniformBufferOffsetAlignment, sizeof(uint32_t) * 2);
 		VkDeviceSize paramsSize = paramsIncr * _drawCount;
         tempIndirectBuff = cmdEncoder->getTempMTLBuffer(indirectSize, true);
         mtlIndBuff = tempIndirectBuff->_mtlBuffer;
@@ -1063,7 +1071,7 @@
         vtxIndexBuff = cmdEncoder->getTempMTLBuffer(ibb.mtlBuffer.length, true);
 
         id<MTLComputePipelineState> vtxState;
-        vtxState = ibb.mtlIndexType == MTLIndexTypeUInt16 ? pipeline->getTessVertexStageIndex16State() : pipeline->getTessVertexStageIndex32State();
+        vtxState = ibb.mtlIndexType == MTLIndexTypeUInt16 ? pipeline->getVertexStageIndex16State() : pipeline->getVertexStageIndex32State();
         vtxThreadExecWidth = vtxState.threadExecutionWidth;
 
         NSUInteger sgSize = pipeline->getTessControlStageState().threadExecutionWidth;
@@ -1330,3 +1338,116 @@
     }
 }
 
+#pragma mark -
+#pragma mark MVKCmdBeginTransformFeedback
+
+template <size_t N>
+VkResult MVKCmdBeginTransformFeedback<N>::setContent(MVKCommandBuffer* cmdBuff,
+                                                     uint32_t firstCounterBuffer,
+                                                     uint32_t counterBufferCount,
+                                                     const VkBuffer* pCounterBuffers,
+                                                     const VkDeviceSize* pCounterBufferOffsets) {
+    MVKDevice* mvkDvc = cmdBuff->getDevice();
+    _counterBuffers.clear();    // Clear for reuse
+    _counterBuffers.reserve(counterBufferCount);
+    MVKMTLBufferBinding b;
+    for (uint32_t bindIdx = 0; bindIdx < counterBufferCount; bindIdx++) {
+        auto* mvkBuffer = (MVKBuffer*)pCounterBuffers[bindIdx];
+        b.index = mvkDvc->getMetalBufferIndexForTransformFeedbackCounterBinding(kMVKShaderStageVertex, firstCounterBuffer + bindIdx);
+        b.mtlBuffer = mvkBuffer->getMTLBuffer();
+        b.offset = mvkBuffer->getMTLBufferOffset() + pCounterBufferOffsets[bindIdx];
+        _counterBuffers.push_back(b);
+    }
+
+    return VK_SUCCESS;
+}
+
+template <size_t N>
+void MVKCmdBeginTransformFeedback<N>::encode(MVKCommandEncoder* cmdEncoder) {
+    if (cmdEncoder->_transformFeedbackRunning) {
+        return;
+    }
+
+    cmdEncoder->_graphicsResourcesState._transformFeedbackCounterBufferBinding = _counterBuffers[0];
+    cmdEncoder->_transformFeedbackRunning = true;
+}
+
+template class MVKCmdBeginTransformFeedback<1>;
+template class MVKCmdBeginTransformFeedback<2>;
+template class MVKCmdBeginTransformFeedback<4>;
+
+
+#pragma mark -
+#pragma mark MVKCmdBindTransformFeedbackBuffers
+
+template <size_t N>
+VkResult MVKCmdBindTransformFeedbackBuffers<N>::setContent(MVKCommandBuffer *cmdBuffer,
+                                                        uint32_t firstBinding, uint32_t bindingCount,
+                                                        const VkBuffer *pBuffers,
+                                                        const VkDeviceSize *pOffsets,
+                                                       const VkDeviceSize *pSizes) {
+    MVKDevice* mvkDvc = cmdBuffer->getDevice();
+    _bindings.clear();	// Clear for reuse
+    _bindings.reserve(bindingCount);
+    MVKMTLBufferBinding b;
+    for (uint32_t bindIdx = 0; bindIdx < bindingCount; bindIdx++) {
+        auto* mvkBuffer = (MVKBuffer*)pBuffers[bindIdx];
+        b.index = mvkDvc->getMetalBufferIndexForTransformFeedbackBinding(kMVKShaderStageVertex, firstBinding + bindIdx);
+        b.mtlBuffer = mvkBuffer->getMTLBuffer();
+        b.offset = mvkBuffer->getMTLBufferOffset() + pOffsets[bindIdx];
+        if (pSizes != nullptr) {
+            if (pSizes[bindIdx] == VK_WHOLE_SIZE) {
+                b.size = b.mtlBuffer.allocatedSize - b.offset;
+            } else {
+                b.size = pSizes[bindIdx];
+            }
+        } else {
+            b.size = b.mtlBuffer.allocatedSize - b.offset;
+        }
+        _bindings.push_back(b);
+    }
+
+    return VK_SUCCESS;
+}
+
+template <size_t N>
+void MVKCmdBindTransformFeedbackBuffers<N>::encode(MVKCommandEncoder *cmdEncoder) {
+    cmdEncoder->_graphicsResourcesState._transformFeedbackBufferBindings.clear();
+    for (auto& b : _bindings) { cmdEncoder->_graphicsResourcesState._transformFeedbackBufferBindings.push_back(b); }
+}
+
+template class MVKCmdBindTransformFeedbackBuffers<1>;
+template class MVKCmdBindTransformFeedbackBuffers<2>;
+template class MVKCmdBindTransformFeedbackBuffers<4>;
+
+
+#pragma mark -
+#pragma mark MVKCmdDrawIndirectByteCount
+
+VkResult MVKCmdDrawIndirectByteCount::setContent(MVKCommandBuffer *cmdBuffer,
+                                              uint32_t _instanceCount, uint32_t _firstInstance,
+                                              VkBuffer _counterBuffer, uint32_t _deviceSize, uint32_t _stride) {
+    /// @TODO SW: this extension allows drawing from the transform feedback buffers
+    instanceCount = _instanceCount;
+    firstInstance = _firstInstance;
+    counterBuffer = _counterBuffer;
+    deviceSize = _deviceSize;
+    stride = _stride;
+    return cmdBuffer->reportError(VK_ERROR_FEATURE_NOT_PRESENT,  "DrawIndirectByteCount is not yet implemented");
+}
+
+void MVKCmdDrawIndirectByteCount::encode(MVKCommandEncoder *cmdEncoder) {
+    /// @TODO SW: this extension allows drawing from the transform feedback buffers
+    (void) cmdEncoder;
+}
+
+#pragma mark -
+#pragma mark MVKCmdEndTransformFeedback
+
+VkResult MVKCmdEndTransformFeedback::setContent(MVKCommandBuffer *) {
+    return VK_SUCCESS;
+}
+
+void MVKCmdEndTransformFeedback::encode(MVKCommandEncoder *cmdEncoder) {
+    cmdEncoder->_transformFeedbackRunning = false;
+}
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
index 92d02e772..15130ae3f 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
@@ -466,6 +466,10 @@ class MVKCommandEncoder : public MVKBaseDeviceObject {
 	/** Indicates whether the current draw is an indexed draw. */
 	bool _isIndexedDraw;
 
+    /** If true, transform feedback is currently running. */
+    VkBool32 _transformFeedbackRunning;
+
+
 #pragma mark Construction
 
 	MVKCommandEncoder(MVKCommandBuffer* cmdBuffer,
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
index 4ac895d43..6a83dc570 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h
@@ -507,6 +507,8 @@ class MVKGraphicsResourcesCommandEncoderState : public MVKResourcesCommandEncode
 
     /** The type of index that will be used to render primitives. Exposed directly. */
     MVKIndexMTLBufferBinding _mtlIndexBufferBinding;
+    MVKMTLBufferBinding _transformFeedbackCounterBufferBinding;
+    MVKSmallVector<MVKMTLBufferBinding, 4> _transformFeedbackBufferBindings;
 
     /** Binds the specified index buffer. */
     void bindIndexBuffer(const MVKIndexMTLBufferBinding& binding) {
@@ -539,6 +541,8 @@ class MVKGraphicsResourcesCommandEncoderState : public MVKResourcesCommandEncode
                              bool needVertexViewBuffer,
                              bool needFragmentViewBuffer);
 
+    void bindTransformFeedbackBuffer(MVKMTLBufferBinding binding);
+
     void encodeBindings(MVKShaderStage stage,
                         const char* pStageName,
                         bool fullImageViewSwizzle,
diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def
index 6703a0bad..09922a9e0 100644
--- a/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def
+++ b/MoltenVK/MoltenVK/Commands/MVKCommandTypePools.def
@@ -108,10 +108,12 @@ MVK_CMD_TYPE_POOL(SetPatchControlPoints)
 MVK_CMD_TYPE_POOL(SetRasterizerDiscardEnable)
 MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BindVertexBuffers, 1, 2)
 MVK_CMD_TYPE_POOL(BindIndexBuffer)
+MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BindTransformFeedbackBuffers, 1, 2)
 MVK_CMD_TYPE_POOL(Draw)
 MVK_CMD_TYPE_POOL(DrawIndexed)
 MVK_CMD_TYPE_POOL(DrawIndirect)
 MVK_CMD_TYPE_POOL(DrawIndexedIndirect)
+MVK_CMD_TYPE_POOL(DrawIndirectByteCount)
 MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(CopyImage, 1)
 MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(BlitImage, 1)
 MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(ResolveImage, 1)
@@ -136,6 +138,8 @@ MVK_CMD_TYPE_POOL(PushDescriptorSetWithTemplate)
 MVK_CMD_TYPE_POOL(DebugMarkerBegin)
 MVK_CMD_TYPE_POOL(DebugMarkerEnd)
 MVK_CMD_TYPE_POOL(DebugMarkerInsert)
+MVK_CMD_TYPE_POOLS_FROM_2_THRESHOLDS(BeginTransformFeedback, 1, 2)
+MVK_CMD_TYPE_POOL(EndTransformFeedback)
 MVK_CMD_TYPE_POOLS_FROM_THRESHOLD(WaitEvents, 1)
 MVK_CMD_TYPE_POOL(SetEvent)
 MVK_CMD_TYPE_POOL_LAST(ResetEvent)
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
index be9c08fb4..4ef67d570 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h
@@ -89,6 +89,7 @@ static constexpr float      kMVKMinSampleLocationCoordinate = 0.0;
 static constexpr float      kMVKMaxSampleLocationCoordinate = (float)(kMVKSampleLocationCoordinateGridSize - 1) / (float)kMVKSampleLocationCoordinateGridSize;
 static constexpr VkExtent2D kMVKSampleLocationPixelGridSize = { 1, 1 };
 static constexpr VkExtent2D kMVKSampleLocationPixelGridSizeNotSupported = { 0, 0 };
+static constexpr uint32_t   kMVKMaxTransformFeedbackBufferCount = 1;	// TODO: Increase to 4.
 
 #if !MVK_XCODE_12
 typedef NSUInteger MTLTimestamp;
@@ -765,6 +766,12 @@ class MVKDevice : public MVKDispatchableVulkanAPIObject {
 	/** Returns the Metal vertex buffer index to use for the specified vertex attribute binding number.  */
 	uint32_t getMetalBufferIndexForVertexAttributeBinding(uint32_t binding);
 
+	/** Returns the Metal vertex buffer index to use for the specified transform feedback binding number.  */
+	uint32_t getMetalBufferIndexForTransformFeedbackBinding(MVKShaderStage stage, uint32_t binding);
+
+	/** Returns the Metal vertex buffer index to use for the specified transform feedback counter binding number.  */
+	uint32_t getMetalBufferIndexForTransformFeedbackCounterBinding(MVKShaderStage stage, uint32_t binding);
+
 	/** Returns the memory alignment required for the format when used in a texel buffer. */
 	VkDeviceSize getVkFormatTexelBufferAlignment(VkFormat format, MVKBaseObject* mvkObj);
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
index 0dd865f23..91bcd11bc 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
@@ -482,6 +482,12 @@
 				texelBuffAlignFeatures->texelBufferAlignment = _metalFeatures.texelBuffers && [_mtlDevice respondsToSelector: @selector(minimumLinearTextureAlignmentForPixelFormat:)];
 				break;
 			}
+			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
+				auto* xfbFeatures = (VkPhysicalDeviceTransformFeedbackFeaturesEXT*)next;
+				xfbFeatures->transformFeedback = true;
+				xfbFeatures->geometryStreams = false;
+				break;
+			}
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
 				auto* divisorFeatures = (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT*)next;
 				divisorFeatures->vertexAttributeInstanceRateDivisor = true;
@@ -786,6 +792,19 @@
 				sampLocnProps->variableSampleLocations = true;
 				break;
 			}
+			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
+				auto* xfbProps = (VkPhysicalDeviceTransformFeedbackPropertiesEXT*)next;
+				xfbProps->maxTransformFeedbackStreams = 1;																			// Must be 1 if geometryStreams isn't supported.
+				xfbProps->maxTransformFeedbackBuffers = kMVKMaxTransformFeedbackBufferCount;
+				xfbProps->maxTransformFeedbackBufferSize = _metalFeatures.maxMTLBufferSize;
+				xfbProps->maxTransformFeedbackStreamDataSize = (_properties.limits.maxFragmentInputComponents + 4) * sizeof(float);	// +4 more for the position.
+				xfbProps->maxTransformFeedbackBufferDataSize = xfbProps->maxTransformFeedbackStreamDataSize;
+				xfbProps->maxTransformFeedbackBufferDataStride = _metalFeatures.maxMTLBufferSize - xfbProps->maxTransformFeedbackBufferDataSize;
+				xfbProps->transformFeedbackQueries = VK_FALSE;
+				xfbProps->transformFeedbackStreamsLinesTriangles = VK_FALSE;
+				xfbProps->transformFeedbackRasterizationStreamSelect = VK_FALSE;
+				xfbProps->transformFeedbackDraw = VK_FALSE;
+			}
 			case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
 				auto* divisorProps = (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT*)next;
 				divisorProps->maxVertexAttribDivisor = kMVKUndefinedLargeUInt32;
@@ -2461,7 +2480,7 @@
 	_properties.limits.maxSamplerAnisotropy = 16;
 
     _properties.limits.maxVertexInputAttributes = 31;
-    _properties.limits.maxVertexInputBindings = 31;
+    _properties.limits.maxVertexInputBindings = 16;
 
     _properties.limits.maxVertexInputBindingStride = supportsMTLGPUFamily(Apple2) ? kMVKUndefinedLargeUInt32 : (4 * KIBI);
 	_properties.limits.maxVertexInputAttributeOffset = _properties.limits.maxVertexInputBindingStride - 1;
@@ -4440,6 +4459,22 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope
 	return ((_pMetalFeatures->maxPerStageBufferCount - 1) - binding);
 }
 
+uint32_t MVKDevice::getMetalBufferIndexForTransformFeedbackBinding(MVKShaderStage stage, uint32_t binding) {
+    binding = ((_pMetalFeatures->maxPerStageBufferCount - 1) - binding);
+	switch (stage) {
+		case kMVKShaderStageVertex:
+			binding -= _pProperties->limits.maxVertexInputBindings;
+			break;
+		default:
+			break;
+	}
+	return binding;
+}
+
+uint32_t MVKDevice::getMetalBufferIndexForTransformFeedbackCounterBinding(MVKShaderStage stage, uint32_t binding) {
+	return (getMetalBufferIndexForTransformFeedbackBinding(stage, binding) - _pMetalFeatures->maxTransformFeedbackBuffers);
+}
+
 VkDeviceSize MVKDevice::getVkFormatTexelBufferAlignment(VkFormat format, MVKBaseObject* mvkObj) {
 	VkDeviceSize deviceAlignment = 0;
 	id<MTLDevice> mtlDev = getMTLDevice();
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def
index d3856e8c3..d77dfeef2 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def
@@ -73,6 +73,7 @@ MVK_DEVICE_FEATURE_EXTN(Robustness2,                  ROBUSTNESS_2,
 MVK_DEVICE_FEATURE_EXTN(ShaderAtomicFloat,            SHADER_ATOMIC_FLOAT,             EXT,  12)
 MVK_DEVICE_FEATURE_EXTN(SwapchainMaintenance1,        SWAPCHAIN_MAINTENANCE_1,         EXT,   1)
 MVK_DEVICE_FEATURE_EXTN(TexelBufferAlignment,         TEXEL_BUFFER_ALIGNMENT,          EXT,   1)
+MVK_DEVICE_FEATURE_EXTN(TransformFeedback,            TRANSFORM_FEEDBACK,              EXT,   2)
 MVK_DEVICE_FEATURE_EXTN(VertexAttributeDivisor,       VERTEX_ATTRIBUTE_DIVISOR,        EXT,   2)
 MVK_DEVICE_FEATURE_EXTN(ShaderIntegerFunctions2,      SHADER_INTEGER_FUNCTIONS_2,      INTEL, 1)
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
index ea8152019..ed09244b8 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm
@@ -696,6 +696,12 @@
 	ADD_DVC_EXT_ENTRY_POINT(vkGetPhysicalDeviceMultisamplePropertiesEXT, EXT_SAMPLE_LOCATIONS);
 	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetSampleLocationsEXT, EXT_SAMPLE_LOCATIONS);
 	ADD_DVC_EXT_ENTRY_POINT(vkReleaseSwapchainImagesEXT, EXT_SWAPCHAIN_MAINTENANCE_1);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdBeginQueryIndexedEXT, EXT_TRANSFORM_FEEDBACK);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdBeginTransformFeedbackEXT, EXT_TRANSFORM_FEEDBACK);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdBindTransformFeedbackBuffersEXT, EXT_TRANSFORM_FEEDBACK);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdDrawIndirectByteCountEXT, EXT_TRANSFORM_FEEDBACK);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdEndQueryIndexedEXT, EXT_TRANSFORM_FEEDBACK);
+	ADD_DVC_EXT_ENTRY_POINT(vkCmdEndTransformFeedbackEXT, EXT_TRANSFORM_FEEDBACK);
 	ADD_DVC_EXT_ENTRY_POINT(vkGetRefreshCycleDurationGOOGLE, GOOGLE_DISPLAY_TIMING);
 	ADD_DVC_EXT_ENTRY_POINT(vkGetPastPresentationTimingGOOGLE, GOOGLE_DISPLAY_TIMING);
 	ADD_DVC_EXT_ENTRY_POINT(vkCmdSetLogicOpEXT, EXT_EXTENDED_DYNAMIC_STATE_2);
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
index 6827b5b9b..c1e65cd2d 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.h
@@ -289,6 +289,9 @@ class MVKGraphicsPipeline : public MVKPipeline {
     /** Returns whether this pipeline has tessellation shaders. */
     bool isTessellationPipeline() { return _tessInfo.patchControlPoints > 0; }
 
+    /** Returns whether this pipeline uses transform feedback. */
+    bool isTransformFeedbackPipeline() { return _isTransformFeedback; }
+
     /** Returns the number of output tessellation patch control points. */
     uint32_t getOutputControlPointCount() { return _outputControlPointCount; }
 
@@ -301,14 +304,14 @@ class MVKGraphicsPipeline : public MVKPipeline {
 	/** Returns the current tessellation level buffer binding for the tess. control shader. */
 	uint32_t getTessCtlLevelBufferIndex() { return _tessCtlLevelBufferIndex; }
 
-	/** Returns the MTLComputePipelineState object for the vertex stage of a tessellated draw with no indices. */
-	id<MTLComputePipelineState> getTessVertexStageState() { return _mtlTessVertexStageState; }
+	/** Returns the MTLComputePipelineState object for the vertex stage of a tessellated or transform feedback draw with no indices. */
+	id<MTLComputePipelineState> getVertexStageState() { return _mtlVertexStageState; }
 
-	/** Returns the MTLComputePipelineState object for the vertex stage of a tessellated draw with 16-bit indices. */
-	id<MTLComputePipelineState> getTessVertexStageIndex16State() { return _mtlTessVertexStageIndex16State; }
+	/** Returns the MTLComputePipelineState object for the vertex stage of a tessellated or transform feedback draw with 16-bit indices. */
+	id<MTLComputePipelineState> getVertexStageIndex16State() { return _mtlVertexStageIndex16State; }
 
-	/** Returns the MTLComputePipelineState object for the vertex stage of a tessellated draw with 32-bit indices. */
-	id<MTLComputePipelineState> getTessVertexStageIndex32State() { return _mtlTessVertexStageIndex32State; }
+	/** Returns the MTLComputePipelineState object for the vertex stage of a tessellated or transform feedback draw with 32-bit indices. */
+	id<MTLComputePipelineState> getVertexStageIndex32State() { return _mtlVertexStageIndex32State; }
 
 	/** Returns the MTLComputePipelineState object for the tessellation control stage of a tessellated draw. */
 	id<MTLComputePipelineState> getTessControlStageState() { return _mtlTessControlStageState; }
@@ -363,7 +366,7 @@ class MVKGraphicsPipeline : public MVKPipeline {
 
     id<MTLRenderPipelineState> getOrCompilePipeline(MTLRenderPipelineDescriptor* plDesc, id<MTLRenderPipelineState>& plState);
     id<MTLComputePipelineState> getOrCompilePipeline(MTLComputePipelineDescriptor* plDesc, id<MTLComputePipelineState>& plState, const char* compilerType);
-	bool compileTessVertexStageState(MTLComputePipelineDescriptor* vtxPLDesc, MVKMTLFunction* pVtxFunctions, VkPipelineCreationFeedback* pVertexFB);
+	bool compileVertexStageState(MTLComputePipelineDescriptor* vtxPLDesc, MVKMTLFunction* pVtxFunctions, VkPipelineCreationFeedback* pVertexFB);
 	bool compileTessControlStageState(MTLComputePipelineDescriptor* tcPLDesc, VkPipelineCreationFeedback* pTessCtlFB);
 	void initDynamicState(const VkGraphicsPipelineCreateInfo* pCreateInfo);
 	void initSampleLocations(const VkGraphicsPipelineCreateInfo* pCreateInfo);
@@ -374,13 +377,15 @@ class MVKGraphicsPipeline : public MVKPipeline {
     void addNextStageInputToShaderConversionConfig(SPIRVToMSLConversionConfiguration& shaderConfig, SPIRVShaderInputs& inputs);
     void addPrevStageOutputToShaderConversionConfig(SPIRVToMSLConversionConfiguration& shaderConfig, SPIRVShaderOutputs& outputs);
     MTLRenderPipelineDescriptor* newMTLRenderPipelineDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData, const VkPipelineShaderStageCreateInfo* pVertexSS, VkPipelineCreationFeedback* pVertexFB, const VkPipelineShaderStageCreateInfo* pFragmentSS, VkPipelineCreationFeedback* pFragmentFB);
-    MTLComputePipelineDescriptor* newMTLTessVertexStageDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData, SPIRVToMSLConversionConfiguration& shaderConfig, const VkPipelineShaderStageCreateInfo* pVertexSS, VkPipelineCreationFeedback* pVertexFB, const VkPipelineShaderStageCreateInfo* pTessCtlSS, MVKMTLFunction* pVtxFunctions);
+    MTLComputePipelineDescriptor* newMTLVertexStageDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData, SPIRVToMSLConversionConfiguration& shaderConfig, const VkPipelineShaderStageCreateInfo* pVertexSS, VkPipelineCreationFeedback* pVertexFB, const VkPipelineShaderStageCreateInfo* pNextSS, MVKShaderStage nextSSType, MVKMTLFunction* pVtxFunctions);
 	MTLComputePipelineDescriptor* newMTLTessControlStageDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData, SPIRVToMSLConversionConfiguration& shaderConfig, const VkPipelineShaderStageCreateInfo* pTessCtlSS, VkPipelineCreationFeedback* pTessCtlFB, const VkPipelineShaderStageCreateInfo* pVertexSS, const VkPipelineShaderStageCreateInfo* pTessEvalSS);
 	MTLRenderPipelineDescriptor* newMTLTessRasterStageDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData, SPIRVToMSLConversionConfiguration& shaderConfig, const VkPipelineShaderStageCreateInfo* pTessEvalSS, VkPipelineCreationFeedback* pTessEvalFB, const VkPipelineShaderStageCreateInfo* pFragmentSS, VkPipelineCreationFeedback* pFragmentFB, const VkPipelineShaderStageCreateInfo* pTessCtlSS);
+	MTLRenderPipelineDescriptor* newMTLXFBRasterStageDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo, const SPIRVTessReflectionData& reflectData, SPIRVToMSLConversionConfiguration& shaderConfig, const VkPipelineShaderStageCreateInfo* pFragmentSS, VkPipelineCreationFeedback* pFragmentFB, const VkPipelineShaderStageCreateInfo* pVertexSS, VkPipelineCreationFeedback* pVertexFB);
 	bool addVertexShaderToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderConfig, const VkPipelineShaderStageCreateInfo* pVertexSS, VkPipelineCreationFeedback* pVertexFB, const VkPipelineShaderStageCreateInfo*& pFragmentSS);
 	bool addVertexShaderToPipeline(MTLComputePipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderConfig, SPIRVShaderInputs& nextInputs, const VkPipelineShaderStageCreateInfo* pVertexSS, VkPipelineCreationFeedback* pVertexFB, MVKMTLFunction* pVtxFunctions);
 	bool addTessCtlShaderToPipeline(MTLComputePipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderConfig, SPIRVShaderOutputs& prevOutput, SPIRVShaderInputs& nextInputs, const VkPipelineShaderStageCreateInfo* pTessCtlSS, VkPipelineCreationFeedback* pTessCtlFB);
 	bool addTessEvalShaderToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderConfig, SPIRVShaderOutputs& prevOutput, const VkPipelineShaderStageCreateInfo* pTessEvalSS, VkPipelineCreationFeedback* pTessEvalFB, const VkPipelineShaderStageCreateInfo*& pFragmentSS);
+	bool addPassThruVertexShaderToPipeline(MTLRenderPipelineDescriptor* plDesc, SPIRVToMSLConversionConfiguration& shaderConfig, const VkPipelineShaderStageCreateInfo* pVertexSS, VkPipelineCreationFeedback* pVertexFB);
     bool addFragmentShaderToPipeline(MTLRenderPipelineDescriptor* plDesc, const VkGraphicsPipelineCreateInfo* pCreateInfo, SPIRVToMSLConversionConfiguration& shaderConfig, SPIRVShaderOutputs& prevOutput, const VkPipelineShaderStageCreateInfo* pFragmentSS, VkPipelineCreationFeedback* pFragmentFB);
 	template<class T>
 	bool addVertexInputToPipeline(T* inputDesc, const VkPipelineVertexInputStateCreateInfo* pVI, const SPIRVToMSLConversionConfiguration& shaderConfig);
@@ -392,10 +397,13 @@ class MVKGraphicsPipeline : public MVKPipeline {
 	bool verifyImplicitBuffer(bool needsBuffer, MVKShaderImplicitRezBinding& index, MVKShaderStage stage, const char* name);
 	uint32_t getTranslatedVertexBinding(uint32_t binding, uint32_t translationOffset, uint32_t maxBinding);
 	uint32_t getImplicitBufferIndex(MVKShaderStage stage, uint32_t bufferIndexOffset);
+	uint32_t getTransformFeedbackBufferIndex(MVKShaderStage stage);
+	uint32_t getTransformFeedbackCounterBufferIndex(MVKShaderStage stage);
 	MVKMTLFunction getMTLFunction(SPIRVToMSLConversionConfiguration& shaderConfig,
 								  const VkPipelineShaderStageCreateInfo* pShaderStage,
 								  VkPipelineCreationFeedback* pStageFB,
-								  const char* pStageName);
+								  const char* pStageName,
+								  bool passThru = false);
 	void markIfUsingPhysicalStorageBufferAddressesCapability(SPIRVToMSLConversionResultInfo& resultsInfo,
 															 MVKShaderStage stage);
 
@@ -414,9 +422,9 @@ class MVKGraphicsPipeline : public MVKPipeline {
 	MVKSmallVector<MVKShaderStage> _stagesUsingPhysicalStorageBufferAddressesCapability;
 	std::unordered_map<uint32_t, id<MTLRenderPipelineState>> _multiviewMTLPipelineStates;
 
-	id<MTLComputePipelineState> _mtlTessVertexStageState = nil;
-	id<MTLComputePipelineState> _mtlTessVertexStageIndex16State = nil;
-	id<MTLComputePipelineState> _mtlTessVertexStageIndex32State = nil;
+	id<MTLComputePipelineState> _mtlVertexStageState = nil;
+	id<MTLComputePipelineState> _mtlVertexStageIndex16State = nil;
+	id<MTLComputePipelineState> _mtlVertexStageIndex32State = nil;
 	id<MTLComputePipelineState> _mtlTessControlStageState = nil;
 	id<MTLRenderPipelineState> _mtlPipelineState = nil;
 
@@ -452,6 +460,7 @@ class MVKGraphicsPipeline : public MVKPipeline {
 	bool _isRasterizing = false;
 	bool _isRasterizingColor = false;
 	bool _sampleLocationsEnable = false;
+	bool _isTransformFeedback = false;
 };
 
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
index 0b2663400..d4cdb5767 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
@@ -20,7 +20,6 @@
 #include "MVKRenderPass.h"
 #include "MVKCommandBuffer.h"
 #include "MVKFoundation.h"
-#include "MVKOSExtensions.h"
 #include "MVKStrings.h"
 #include "MTLRenderPipelineDescriptor+MoltenVK.h"
 #include "mvk_datatypes.hpp"
@@ -240,6 +239,8 @@
     if (isTessellationPipeline()) {
         stages.push_back(kMVKGraphicsStageVertex);
         stages.push_back(kMVKGraphicsStageTessControl);
+    } else if (isTransformFeedbackPipeline()) {
+        stages.push_back(kMVKGraphicsStageVertex);
     }
     stages.push_back(kMVKGraphicsStageRasterization);
 }
@@ -263,11 +264,11 @@
             id<MTLComputePipelineState> plState;
 			const MVKIndexMTLBufferBinding& indexBuff = cmdEncoder->_graphicsResourcesState._mtlIndexBufferBinding;
             if (!cmdEncoder->_isIndexedDraw) {
-                plState = getTessVertexStageState();
+                plState = getVertexStageState();
             } else if (indexBuff.mtlIndexType == MTLIndexTypeUInt16) {
-                plState = getTessVertexStageIndex16State();
+                plState = getVertexStageIndex16State();
             } else {
-                plState = getTessVertexStageIndex32State();
+                plState = getVertexStageIndex32State();
             }
 
 			if ( !_hasValidMTLPipelineStates ) { return; }
@@ -324,17 +325,17 @@
     cmdEncoder->_graphicsResourcesState.bindViewRangeBuffer(_viewRangeBufferIndex, _needsVertexViewRangeBuffer, _needsFragmentViewRangeBuffer);
 }
 
-static const char vtxCompilerType[] = "Vertex stage pipeline for tessellation";
+static const char vtxCompilerType[] = "Vertex stage pipeline for tessellation/transform feedback";
 
-bool MVKGraphicsPipeline::compileTessVertexStageState(MTLComputePipelineDescriptor* vtxPLDesc,
-													  MVKMTLFunction* pVtxFunctions,
-													  VkPipelineCreationFeedback* pVertexFB) {
+bool MVKGraphicsPipeline::compileVertexStageState(MTLComputePipelineDescriptor* vtxPLDesc,
+												  MVKMTLFunction* pVtxFunctions,
+												  VkPipelineCreationFeedback* pVertexFB) {
 	uint64_t startTime = 0;
     if (pVertexFB) {
 		startTime = mvkGetTimestamp();
 	}
 	vtxPLDesc.computeFunction = pVtxFunctions[0].getMTLFunction();
-    bool res = !!getOrCompilePipeline(vtxPLDesc, _mtlTessVertexStageState, vtxCompilerType);
+    bool res = !!getOrCompilePipeline(vtxPLDesc, _mtlVertexStageState, vtxCompilerType);
 
 	vtxPLDesc.computeFunction = pVtxFunctions[1].getMTLFunction();
     vtxPLDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt16;
@@ -344,11 +345,11 @@
                 blDesc.stepFunction = MTLStepFunctionThreadPositionInGridXIndexed;
         }
     }
-    res |= !!getOrCompilePipeline(vtxPLDesc, _mtlTessVertexStageIndex16State, vtxCompilerType);
+    res |= !!getOrCompilePipeline(vtxPLDesc, _mtlVertexStageIndex16State, vtxCompilerType);
 
 	vtxPLDesc.computeFunction = pVtxFunctions[2].getMTLFunction();
     vtxPLDesc.stageInputDescriptor.indexType = MTLIndexTypeUInt32;
-    res |= !!getOrCompilePipeline(vtxPLDesc, _mtlTessVertexStageIndex32State, vtxCompilerType);
+    res |= !!getOrCompilePipeline(vtxPLDesc, _mtlVertexStageIndex32State, vtxCompilerType);
 
 	if (pVertexFB) {
 		if (!res) {
@@ -494,6 +495,14 @@
 		}
 	}
 
+	_isTransformFeedback = getUsesTransformFeedback(((MVKShaderModule*)pVertexSS->module)->getSPIRV(),
+													spv::ExecutionModelVertex,
+													pVertexSS->pName,
+													reflectErrorLog);
+    if (!reflectErrorLog.empty()) {
+        setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Failed to reflect vertex shader: %s", reflectErrorLog.c_str()));
+    }
+
 	// Tessellation - must ignore allowed bad pTessellationState pointer if not tess pipeline
 	_outputControlPointCount = reflectData.numControlPoints;
 	mvkSetOrClear(&_tessInfo, (pTessCtlSS && pTessEvalSS) ? pCreateInfo->pTessellationState : nullptr);
@@ -668,9 +677,9 @@ static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) {
 													 VkPipelineCreationFeedback* pTessEvalFB,
 													 const VkPipelineShaderStageCreateInfo* pFragmentSS,
 													 VkPipelineCreationFeedback* pFragmentFB) {
-	_mtlTessVertexStageState = nil;
-	_mtlTessVertexStageIndex16State = nil;
-	_mtlTessVertexStageIndex32State = nil;
+	_mtlVertexStageState = nil;
+	_mtlVertexStageIndex16State = nil;
+	_mtlVertexStageIndex32State = nil;
 	_mtlTessControlStageState = nil;
 	_mtlPipelineState = nil;
 
@@ -682,7 +691,7 @@ static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) {
 	if (isUsingMetalArgumentBuffers()) { _descriptorBindingUse.resize(_descriptorSetCount); }
 	if (isUsingPipelineStageMetalArgumentBuffers()) { _mtlArgumentEncoders.resize(_descriptorSetCount); }
 
-	if (!isTessellationPipeline()) {
+	if (!isTessellationPipeline() && !isTransformFeedbackPipeline()) {
 		MTLRenderPipelineDescriptor* plDesc = newMTLRenderPipelineDescriptor(pCreateInfo, reflectData, pVertexSS, pVertexFB, pFragmentSS, pFragmentFB);	// temp retain
 		if (plDesc) {
 			const VkPipelineRenderingCreateInfo* pRendInfo = getRenderingCreateInfo(pCreateInfo);
@@ -717,7 +726,7 @@ static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) {
 		} else {
 			_hasValidMTLPipelineStates = false;
 		}
-	} else {
+	} else if (isTessellationPipeline()) {
 		// In this case, we need to create three render pipelines. But, the way Metal handles
 		// index buffers for compute stage-in means we have to create three pipelines for
 		// stage 1 (five pipelines in total).
@@ -725,11 +734,11 @@ static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) {
 		initShaderConversionConfig(shaderConfig, pCreateInfo, reflectData);
 
 		MVKMTLFunction vtxFunctions[3] = {};
-		MTLComputePipelineDescriptor* vtxPLDesc = newMTLTessVertexStageDescriptor(pCreateInfo, reflectData, shaderConfig, pVertexSS, pVertexFB, pTessCtlSS, vtxFunctions);					// temp retained
+		MTLComputePipelineDescriptor* vtxPLDesc = newMTLVertexStageDescriptor(pCreateInfo, reflectData, shaderConfig, pVertexSS, pVertexFB, pTessCtlSS, kMVKShaderStageTessCtl, vtxFunctions);					// temp retained
 		MTLComputePipelineDescriptor* tcPLDesc = newMTLTessControlStageDescriptor(pCreateInfo, reflectData, shaderConfig, pTessCtlSS, pTessCtlFB, pVertexSS, pTessEvalSS);					// temp retained
 		MTLRenderPipelineDescriptor* rastPLDesc = newMTLTessRasterStageDescriptor(pCreateInfo, reflectData, shaderConfig, pTessEvalSS, pTessEvalFB, pFragmentSS, pFragmentFB, pTessCtlSS);	// temp retained
 		if (vtxPLDesc && tcPLDesc && rastPLDesc) {
-			if (compileTessVertexStageState(vtxPLDesc, vtxFunctions, pVertexFB)) {
+			if (compileVertexStageState(vtxPLDesc, vtxFunctions, pVertexFB)) {
 				if (compileTessControlStageState(tcPLDesc, pTessCtlFB)) {
 					getOrCompilePipeline(rastPLDesc, _mtlPipelineState);
 				}
@@ -740,6 +749,28 @@ static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) {
 		[vtxPLDesc release];	// temp release
 		[tcPLDesc release];		// temp release
 		[rastPLDesc release];	// temp release
+	} else {
+		// In this case, we need to create two render pipelines. But, the way Metal handles
+		// index buffers for compute stage-in means we have to create three pipelines for
+		// stage 1 (four pipelines in total).
+		SPIRVToMSLConversionConfiguration shaderConfig;
+		initShaderConversionConfig(shaderConfig, pCreateInfo, reflectData);
+
+		MVKMTLFunction vtxFunctions[3] = {};
+		MTLComputePipelineDescriptor* vtxPLDesc = newMTLVertexStageDescriptor(pCreateInfo, reflectData, shaderConfig, pVertexSS, pVertexFB, pFragmentSS, kMVKShaderStageFragment, vtxFunctions);					// temp retained
+		MTLRenderPipelineDescriptor* rastPLDesc = nil;
+		if (_isRasterizing) {
+			rastPLDesc = newMTLXFBRasterStageDescriptor(pCreateInfo, reflectData, shaderConfig, pFragmentSS, pFragmentFB, pVertexSS, pVertexFB);	// temp retained
+		}
+		if (vtxPLDesc && (!_isRasterizing || rastPLDesc)) {
+			if (compileVertexStageState(vtxPLDesc, vtxFunctions, pVertexFB) && _isRasterizing) {
+				getOrCompilePipeline(rastPLDesc, _mtlPipelineState);
+			}
+		} else {
+			_hasValidMTLPipelineStates = false;
+		}
+		[vtxPLDesc release];	// temp release
+		[rastPLDesc release];	// temp release
 	}
 
 	if (pPipelineFB) {
@@ -791,32 +822,61 @@ static MVKRenderStateType getRenderStateType(VkDynamicState vkDynamicState) {
 	return plDesc;
 }
 
-// Returns a retained MTLComputePipelineDescriptor for the vertex stage of a tessellated draw constructed from this instance, or nil if an error occurs.
-// It is the responsibility of the caller to release the returned descriptor.
-MTLComputePipelineDescriptor* MVKGraphicsPipeline::newMTLTessVertexStageDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo,
-																				   const SPIRVTessReflectionData& reflectData,
-																				   SPIRVToMSLConversionConfiguration& shaderConfig,
-																				   const VkPipelineShaderStageCreateInfo* pVertexSS,
-																				   VkPipelineCreationFeedback* pVertexFB,
-																				   const VkPipelineShaderStageCreateInfo* pTessCtlSS,
-																				   MVKMTLFunction* pVtxFunctions) {
+static inline spv::ExecutionModel mvkShaderStageToSPVExecutionModel(MVKShaderStage mvkStage) {
+	switch (mvkStage) {
+		case kMVKShaderStageVertex:
+			return spv::ExecutionModelVertex;
+		case kMVKShaderStageTessCtl:
+			return spv::ExecutionModelTessellationControl;
+		case kMVKShaderStageTessEval:
+			return spv::ExecutionModelTessellationEvaluation;
+		case kMVKShaderStageFragment:
+			return spv::ExecutionModelFragment;
+		case kMVKShaderStageCompute:
+			return spv::ExecutionModelGLCompute;
+		default:
+			return spv::ExecutionModelMax;
+	}
+}
+
+static inline const char* mvkShaderStageDesc(MVKShaderStage mvkStage) {
+	static const char *stages[] = {
+		"vertex",
+		"tessellation control",
+		"tessellation evaluation",
+		"fragment",
+		"compute",
+	};
+	return stages[mvkStage];
+}
+
+// Returns a retained MTLComputePipelineDescriptor for the vertex stage of a tessellated or transform feedback draw constructed from this instance,
+// or nil if an error occurs. It is the responsibility of the caller to release the returned descriptor.
+MTLComputePipelineDescriptor* MVKGraphicsPipeline::newMTLVertexStageDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo,
+																			   const SPIRVTessReflectionData& reflectData,
+																			   SPIRVToMSLConversionConfiguration& shaderConfig,
+																			   const VkPipelineShaderStageCreateInfo* pVertexSS,
+																			   VkPipelineCreationFeedback* pVertexFB,
+																			   const VkPipelineShaderStageCreateInfo* pNextSS,
+																			   MVKShaderStage nextSSType,
+																			   MVKMTLFunction* pVtxFunctions) {
 	MTLComputePipelineDescriptor* plDesc = [MTLComputePipelineDescriptor new];	// retained
 
-	SPIRVShaderInputs tcInputs;
+	SPIRVShaderInputs nextInputs;
 	std::string errorLog;
-	if (!getShaderInputs(((MVKShaderModule*)pTessCtlSS->module)->getSPIRV(), spv::ExecutionModelTessellationControl, pTessCtlSS->pName, tcInputs, errorLog) ) {
-		setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Failed to get tessellation control inputs: %s", errorLog.c_str()));
+	if (!getShaderInputs(((MVKShaderModule*)pNextSS->module)->getSPIRV(), mvkShaderStageToSPVExecutionModel(nextSSType), pNextSS->pName, nextInputs, errorLog) ) {
+		setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Failed to get %s inputs: %s", mvkShaderStageDesc(nextSSType), errorLog.c_str()));
 		return nil;
 	}
 
 	// Filter out anything but builtins. We couldn't do this before because we needed to make sure
 	// locations were assigned correctly.
-	tcInputs.erase(std::remove_if(tcInputs.begin(), tcInputs.end(), [](const SPIRVShaderInterfaceVariable& var) {
+	nextInputs.erase(std::remove_if(nextInputs.begin(), nextInputs.end(), [](const SPIRVShaderInterfaceVariable& var) {
 		return var.builtin != spv::BuiltInPosition && var.builtin != spv::BuiltInPointSize && var.builtin != spv::BuiltInClipDistance && var.builtin != spv::BuiltInCullDistance;
-	}), tcInputs.end());
+	}), nextInputs.end());
 
 	// Add shader stages.
-	if (!addVertexShaderToPipeline(plDesc, pCreateInfo, shaderConfig, tcInputs, pVertexSS, pVertexFB, pVtxFunctions)) { return nil; }
+	if (!addVertexShaderToPipeline(plDesc, pCreateInfo, shaderConfig, nextInputs, pVertexSS, pVertexFB, pVtxFunctions)) { return nil; }
 
 	// Vertex input
 	plDesc.stageInputDescriptor = [MTLStageInputOutputDescriptor stageInputOutputDescriptor];
@@ -1020,6 +1080,42 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3
 	return plDesc;
 }
 
+// Returns a retained MTLRenderPipelineDescriptor for the last stage of a transform feedback draw constructed from this instance, or nil if an error occurs.
+// It is the responsibility of the caller to release the returned descriptor.
+MTLRenderPipelineDescriptor* MVKGraphicsPipeline::newMTLXFBRasterStageDescriptor(const VkGraphicsPipelineCreateInfo* pCreateInfo,
+																				 const SPIRVTessReflectionData& reflectData,
+																				 SPIRVToMSLConversionConfiguration& shaderConfig,
+																				 const VkPipelineShaderStageCreateInfo* pFragmentSS,
+																				 VkPipelineCreationFeedback* pFragmentFB,
+																				 const VkPipelineShaderStageCreateInfo* pVertexSS,
+																				 VkPipelineCreationFeedback* pVertexFB) {
+	MTLRenderPipelineDescriptor* plDesc = [MTLRenderPipelineDescriptor new];	// retained
+
+	SPIRVShaderOutputs vtxOutputs;
+	std::string errorLog;
+	if (!getShaderOutputs(((MVKShaderModule*)pVertexSS->module)->getSPIRV(), spv::ExecutionModelVertex, pVertexSS->pName, vtxOutputs, errorLog) ) {
+		setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, "Failed to get vertex outputs: %s", errorLog.c_str()));
+		return nil;
+	}
+
+	// Add shader stages. Compile vertex shader before others just in case conversion changes anything...like rasterizaion disable.
+	if (!addPassThruVertexShaderToPipeline(plDesc, shaderConfig, pVertexSS, pVertexFB)) {
+		[plDesc release];
+		return nil;
+	}
+
+	// Fragment shader - only add if rasterization is enabled
+	if (!addFragmentShaderToPipeline(plDesc, pCreateInfo, shaderConfig, vtxOutputs, pFragmentSS, pFragmentFB)) {
+		[plDesc release];
+		return nil;
+	}
+
+	// Output
+	addFragmentOutputToPipeline(plDesc, pCreateInfo);
+
+	return plDesc;
+}
+
 bool MVKGraphicsPipeline::verifyImplicitBuffer(bool needsBuffer, MVKShaderImplicitRezBinding& index, MVKShaderStage stage, const char* name) {
 	const char* stageNames[] = {
 		"Vertex",
@@ -1049,6 +1145,10 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3
 	shaderConfig.options.mslOptions.buffer_size_buffer_index = _bufferSizeBufferIndex.stages[kMVKShaderStageVertex];
 	shaderConfig.options.mslOptions.dynamic_offsets_buffer_index = _dynamicOffsetBufferIndex.stages[kMVKShaderStageVertex];
 	shaderConfig.options.mslOptions.view_mask_buffer_index = _viewRangeBufferIndex.stages[kMVKShaderStageVertex];
+	if (isTransformFeedbackPipeline()) {
+		shaderConfig.options.mslOptions.xfb_counter_buffer_index_base = getTransformFeedbackCounterBufferIndex(kMVKShaderStageVertex);
+		shaderConfig.options.mslOptions.xfb_output_buffer_index_base = getTransformFeedbackBufferIndex(kMVKShaderStageVertex);
+	}
 	shaderConfig.options.mslOptions.capture_output_to_buffer = false;
 	shaderConfig.options.mslOptions.disable_rasterization = !_isRasterizing;
     addVertexInputToShaderConversionConfig(shaderConfig, pCreateInfo);
@@ -1340,6 +1440,19 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3
 	return true;
 }
 
+// Adds a pass-through vertex shader which reads transform feedback buffers to the pipeline description.
+bool MVKGraphicsPipeline::addPassThruVertexShaderToPipeline(MTLRenderPipelineDescriptor* plDesc,
+															SPIRVToMSLConversionConfiguration& shaderConfig,
+															const VkPipelineShaderStageCreateInfo* pVertexSS,
+															VkPipelineCreationFeedback* pVertexFB) {
+	shaderConfig.options.entryPointName = pVertexSS->pName;
+	MVKMTLFunction func = getMTLFunction(shaderConfig, pVertexSS, pVertexFB, "Vertex (pass-through)", true);
+	if ( !func.getMTLFunction() ) { return false; }
+	plDesc.vertexFunction = func.getMTLFunction();
+
+	return true;
+}
+
 #if !MVK_XCODE_15
 static const NSUInteger MTLBufferLayoutStrideDynamic = NSUIntegerMax;
 #endif
@@ -1752,6 +1865,14 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3
 	return getMetalBufferIndexForVertexAttributeBinding(_reservedVertexAttributeBufferCount.stages[stage] + bufferIndexOffset);
 }
 
+uint32_t MVKGraphicsPipeline::getTransformFeedbackBufferIndex(MVKShaderStage stage) {
+	return _device->getMetalBufferIndexForTransformFeedbackBinding(stage, 0);
+}
+
+uint32_t MVKGraphicsPipeline::getTransformFeedbackCounterBufferIndex(MVKShaderStage stage) {
+	return _device->getMetalBufferIndexForTransformFeedbackCounterBinding(stage, 0);
+}
+
 // Set the number of vertex attribute buffers consumed by this pipeline at each stage.
 // Any implicit buffers needed by this pipeline will be assigned indexes below the range
 // defined by this count below the max number of Metal buffer bindings per stage.
@@ -1783,7 +1904,7 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3
 	// The number of reserved bindings we need for the vertex stage is determined from the largest vertex
 	// attribute binding number, plus any synthetic buffer bindings created to support translated offsets.
 	mvkClear<uint32_t>(_reservedVertexAttributeBufferCount.stages, kMVKShaderStageCount);
-	_reservedVertexAttributeBufferCount.stages[kMVKShaderStageVertex] = (maxBinding + 1) + xltdBuffCnt;
+	_reservedVertexAttributeBufferCount.stages[kMVKShaderStageVertex] = (maxBinding + 1) + xltdBuffCnt + (isTransformFeedbackPipeline() ? 2*kMVKMaxTransformFeedbackBufferCount : 0);
 	_reservedVertexAttributeBufferCount.stages[kMVKShaderStageTessCtl] = kMVKTessCtlNumReservedBuffers;
 	_reservedVertexAttributeBufferCount.stages[kMVKShaderStageTessEval] = kMVKTessEvalNumReservedBuffers;
 }
@@ -1954,12 +2075,14 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3
 MVKMTLFunction MVKGraphicsPipeline::getMTLFunction(SPIRVToMSLConversionConfiguration& shaderConfig,
 												   const VkPipelineShaderStageCreateInfo* pShaderStage,
 												   VkPipelineCreationFeedback* pStageFB,
-												   const char* pStageName) {
+												   const char* pStageName,
+												   bool passThru) {
 	MVKShaderModule* shaderModule = (MVKShaderModule*)pShaderStage->module;
 	MVKMTLFunction func = shaderModule->getMTLFunction(&shaderConfig,
 													   pShaderStage->pSpecializationInfo,
 													   this,
-													   pStageFB);
+													   pStageFB,
+													   passThru);
 	if ( !func.getMTLFunction() ) {
 		if (shouldFailOnPipelineCompileRequired()) {
 			setConfigurationResult(VK_PIPELINE_COMPILE_REQUIRED);
@@ -1983,9 +2106,9 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3
 
 MVKGraphicsPipeline::~MVKGraphicsPipeline() {
 	@synchronized (getMTLDevice()) {
-		[_mtlTessVertexStageState release];
-		[_mtlTessVertexStageIndex16State release];
-		[_mtlTessVertexStageIndex32State release];
+		[_mtlVertexStageState release];
+		[_mtlVertexStageIndex16State release];
+		[_mtlVertexStageIndex32State release];
 		[_mtlTessControlStageState release];
 		[_mtlPipelineState release];
 	}
@@ -2147,7 +2270,7 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3
     shaderConfig.options.mslOptions.indirect_params_buffer_index = _indirectParamsIndex.stages[kMVKShaderStageCompute];
 	shaderConfig.options.mslOptions.replace_recursive_inputs = mvkOSVersionIsAtLeast(14.0, 17.0, 1.0);
 
-    MVKMTLFunction func = ((MVKShaderModule*)pSS->module)->getMTLFunction(&shaderConfig, pSS->pSpecializationInfo, this, pStageFB);
+    MVKMTLFunction func = ((MVKShaderModule*)pSS->module)->getMTLFunction(&shaderConfig, pSS->pSpecializationInfo, this, pStageFB, false);
 	if ( !func.getMTLFunction() ) {
 		if (shouldFailOnPipelineCompileRequired()) {
 			setConfigurationResult(VK_PIPELINE_COMPILE_REQUIRED);
@@ -2646,7 +2769,8 @@ void serialize(Archive & archive, SPIRVToMSLConversionResultInfo& scr) {
 				scr.needsInputThreadgroupMem,
 				scr.needsDispatchBaseBuffer,
 				scr.needsViewRangeBuffer,
-				scr.usesPhysicalStorageBufferAddressesCapability);
+				scr.usesPhysicalStorageBufferAddressesCapability,
+				scr.needsTransformFeedback);
 	}
 
 }
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm
index a742690d8..94a130349 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm
@@ -34,7 +34,7 @@
 #pragma mark -
 #pragma mark MVKRenderSubpass
 
-MVKVulkanAPIObject* MVKRenderSubpass::getVulkanAPIObject() { return _renderPass->getVulkanAPIObject(); };
+MVKVulkanAPIObject* MVKRenderSubpass::getVulkanAPIObject() { return _renderPass->getVulkanAPIObject(); }
 
 bool MVKRenderSubpass::hasColorAttachments() {
 	for (auto& ca : _colorAttachments) {
@@ -633,7 +633,7 @@
 #pragma mark -
 #pragma mark MVKAttachmentDescription
 
-MVKVulkanAPIObject* MVKAttachmentDescription::getVulkanAPIObject() { return _renderPass->getVulkanAPIObject(); };
+MVKVulkanAPIObject* MVKAttachmentDescription::getVulkanAPIObject() { return _renderPass->getVulkanAPIObject(); }
 
 VkFormat MVKAttachmentDescription::getFormat() { return _info.format; }
 
@@ -1090,7 +1090,6 @@
 	return useAlt ? pAltAtt : pAtt;
 }
 
-
 #pragma mark -
 #pragma mark Support functions
 
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h
index be4f25454..e79abaa30 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h
@@ -109,7 +109,8 @@ class MVKShaderLibrary : public MVKBaseObject {
 
 	MVKMTLFunction getMTLFunction(const VkSpecializationInfo* pSpecializationInfo,
 								  VkPipelineCreationFeedback* pShaderFeedback,
-								  MVKShaderModule* shaderModule);
+								  MVKShaderModule* shaderModule,
+								  bool passThruFunc);
 	void handleCompilationError(NSError* err, const char* opDesc);
     MTLFunctionConstant* getFunctionConstant(NSArray<MTLFunctionConstant*>* mtlFCs, NSUInteger mtlFCID);
 	void compileLibrary(const std::string& msl);
@@ -213,7 +214,8 @@ class MVKShaderModule : public MVKVulkanAPIDeviceObject {
 	MVKMTLFunction getMTLFunction(SPIRVToMSLConversionConfiguration* pShaderConfig,
 								  const VkSpecializationInfo* pSpecializationInfo,
 								  MVKPipeline* pipeline,
-								  VkPipelineCreationFeedback* pShaderFeedback);
+								  VkPipelineCreationFeedback* pShaderFeedback,
+								  bool passThruFunc);
 
 	/** Convert the SPIR-V to MSL, using the specified shader conversion configuration. */
 	bool convert(SPIRVToMSLConversionConfiguration* pShaderConfig,
@@ -237,6 +239,7 @@ class MVKShaderModule : public MVKVulkanAPIDeviceObject {
 
 	void propagateDebugName() override {}
 	MVKGLSLConversionShaderStage getMVKGLSLConversionShaderStage(SPIRVToMSLConversionConfiguration* pShaderConfig);
+	void generatePassThruVertexShader(const std::string& entryPoint, SPIRVToMSLConversionResult& conversionResult);
 
 	MVKShaderLibraryCache _shaderLibraryCache;
 	SPIRVToMSLConverter _spvConverter;
diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
index 8619a0da9..013b1ed09 100644
--- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
+++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm
@@ -19,6 +19,8 @@
 #include "MVKShaderModule.h"
 #include "MVKPipeline.h"
 #include "MVKFoundation.h"
+#include <sstream>
+#include <unordered_map>
 
 using namespace std;
 
@@ -70,13 +72,17 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD
 
 MVKMTLFunction MVKShaderLibrary::getMTLFunction(const VkSpecializationInfo* pSpecializationInfo,
 												VkPipelineCreationFeedback* pShaderFeedback,
-												MVKShaderModule* shaderModule) {
+												MVKShaderModule* shaderModule,
+												bool passThruFunc) {
 
 	if ( !_mtlLibrary ) { return MVKMTLFunctionNull; }
 
 	@synchronized (_owner->getMTLDevice()) {
 		@autoreleasepool {
 			NSString* mtlFuncName = @(_shaderConversionResultInfo.entryPoint.mtlFunctionName.c_str());
+			if (passThruFunc) {
+				mtlFuncName = [mtlFuncName stringByAppendingString:@"_PassThru"];
+			}
 			MVKDevice* mvkDev = _owner->getDevice();
 
 			uint64_t startTime = pShaderFeedback ? mvkGetTimestamp() : mvkDev->getPerformanceTimestamp();
@@ -333,7 +339,8 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD
 MVKMTLFunction MVKShaderModule::getMTLFunction(SPIRVToMSLConversionConfiguration* pShaderConfig,
 											   const VkSpecializationInfo* pSpecializationInfo,
 											   MVKPipeline* pipeline,
-											   VkPipelineCreationFeedback* pShaderFeedback) {
+											   VkPipelineCreationFeedback* pShaderFeedback,
+											   bool passThruFunc) {
 	MVKShaderLibrary* mvkLib = _directMSLLibrary;
 	if ( !mvkLib ) {
 		uint64_t startTime = pShaderFeedback ? mvkGetTimestamp() : _device->getPerformanceTimestamp();
@@ -349,7 +356,7 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD
 		pShaderConfig->markAllInterfaceVarsAndResourcesUsed();
 	}
 
-	return mvkLib ? mvkLib->getMTLFunction(pSpecializationInfo, pShaderFeedback, this) : MVKMTLFunctionNull;
+	return mvkLib ? mvkLib->getMTLFunction(pSpecializationInfo, pShaderFeedback, this, passThruFunc) : MVKMTLFunctionNull;
 }
 
 bool MVKShaderModule::convert(SPIRVToMSLConversionConfiguration* pShaderConfig,
@@ -381,6 +388,9 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD
 
 	if (wasConverted) {
 		if (shouldLogCode) { MVKLogInfo("%s", conversionResult.resultLog.c_str()); }
+		if (conversionResult.resultInfo.needsTransformFeedback) {
+			generatePassThruVertexShader(pShaderConfig->options.entryPointName, conversionResult);
+		}
 	} else {
 		reportError(VK_ERROR_INVALID_SHADER_NV, "Unable to convert SPIR-V to MSL:\n%s", conversionResult.resultLog.c_str());
 	}
@@ -404,6 +414,255 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD
 	}
 }
 
+static std::string mvkTypeToMSL(const SPIRVShaderOutput& output) {
+	std::ostringstream os;
+	switch (output.baseType) {
+		case SPIRV_CROSS_NAMESPACE::SPIRType::Boolean:
+			os << "bool";
+			break;
+		case SPIRV_CROSS_NAMESPACE::SPIRType::SByte:
+			os << "char";
+			break;
+		case SPIRV_CROSS_NAMESPACE::SPIRType::UByte:
+			os << "uchar";
+			break;
+		case SPIRV_CROSS_NAMESPACE::SPIRType::Short:
+			os << "short";
+			break;
+		case SPIRV_CROSS_NAMESPACE::SPIRType::UShort:
+			os << "ushort";
+			break;
+		case SPIRV_CROSS_NAMESPACE::SPIRType::Int:
+			os << "int";
+			break;
+		case SPIRV_CROSS_NAMESPACE::SPIRType::UInt:
+			os << "uint";
+			break;
+		case SPIRV_CROSS_NAMESPACE::SPIRType::Int64:
+			os << "long";
+			break;
+		case SPIRV_CROSS_NAMESPACE::SPIRType::UInt64:
+			os << "ulong";
+			break;
+		case SPIRV_CROSS_NAMESPACE::SPIRType::Half:
+			os << "half";
+			break;
+		case SPIRV_CROSS_NAMESPACE::SPIRType::Float:
+			os << "float";
+			break;
+		case SPIRV_CROSS_NAMESPACE::SPIRType::Double:
+			os << "double";
+			break;
+		default:
+			os << "unknown";
+			break;
+	}
+	if (output.vecWidth > 1) {
+		os << output.vecWidth;
+	}
+	return os.str();
+}
+
+static std::string mvkBuiltInToName(const SPIRVShaderOutput& output) {
+	std::ostringstream os;
+	switch (output.builtin) {
+		case spv::BuiltInPosition:
+			return "gl_Position";
+		case spv::BuiltInPointSize:
+			return "gl_PointSize";
+		case spv::BuiltInClipDistance:
+			os << "gl_ClipDistance_" << output.arrayIndex;
+			return os.str();
+		case spv::BuiltInCullDistance:
+			os << "gl_CullDistance_" << output.arrayIndex;
+			return os.str();
+		default:
+			// No other builtins should appear as a vertex shader output.
+			return "unknown";
+	}
+}
+
+static std::string mvkBuiltInToAttr(const SPIRVShaderOutput& output) {
+	std::ostringstream os;
+	switch (output.builtin) {
+		case spv::BuiltInPosition:
+			return "position";
+		case spv::BuiltInPointSize:
+			return "point_size";
+		case spv::BuiltInClipDistance:
+			os << "user(clip" << output.arrayIndex << ")";
+			return os.str();
+		case spv::BuiltInCullDistance:
+			os << "user(cull" << output.arrayIndex << ")";
+			return os.str();
+		default:
+			// No other builtins should appear as a vertex shader output.
+			return "unknown";
+	}
+}
+
+static std::string mvkVertexAttrToName(const SPIRVShaderOutput& output) {
+	std::ostringstream os;
+	os << "m" << output.location;
+	if (output.component > 0)
+		os << "_" << output.component;
+	return os.str();
+}
+
+static std::string mvkVertexAttrToUserAttr(const SPIRVShaderOutput& output) {
+	std::ostringstream os;
+	os << "locn" << output.location;
+	if (output.component > 0)
+		os << "_" << output.component;
+	return os.str();
+}
+
+void MVKShaderModule::generatePassThruVertexShader(const std::string& entryName, SPIRVToMSLConversionResult& conversionResult) {
+	MVKSmallVector<SPIRVShaderOutput, 16> vtxOutputs;
+	std::string errorLog;
+	std::unordered_map<uint32_t, std::vector<size_t>> xfbBuffers;
+	uint32_t clipDistances = 0;
+	getShaderOutputs(getSPIRV(), spv::ExecutionModelVertex, entryName, vtxOutputs, errorLog);
+	for (size_t i = 0; i < vtxOutputs.size(); ++i) {
+		xfbBuffers[vtxOutputs[i].xfbBufferIndex].push_back(i);
+	}
+	// Sort XFB buffers by offset; sort the other outputs by index.
+	for (auto& buffer : xfbBuffers) {
+		if (buffer.first == (uint32_t)(-1)) {
+			std::sort(buffer.second.begin(), buffer.second.end(), [&vtxOutputs](uint32_t a, uint32_t b) {
+				if (vtxOutputs[a].location == vtxOutputs[b].location)
+					return vtxOutputs[a].component < vtxOutputs[b].component;
+				return vtxOutputs[a].location < vtxOutputs[b].location;
+			});
+		} else {
+			std::sort(buffer.second.begin(), buffer.second.end(), [&vtxOutputs](uint32_t a, uint32_t b) {
+				return vtxOutputs[a].xfbBufferOffset < vtxOutputs[b].xfbBufferOffset;
+			});
+		}
+	}
+	// Emit the buffer structures used by the passthrough function. We can't
+	// reuse the ones from SPIRV-Cross because the reflection gave us the vertex
+	// outputs broken up (no structs or arrays), but SPIRV-Cross has the
+	// structs and arrays intact. Mapping between the two is difficult without
+	// additional information and code.
+	// FIXME: Do we want to use a "fast string concatenation" type here?
+	conversionResult.msl += "\n";
+	for (const auto& buffer : xfbBuffers) {
+		if (buffer.first == (uint32_t)(-1)) {
+			conversionResult.msl += "struct " + entryName + "_pt_misc\n"
+				"{\n";
+		} else {
+			std::ostringstream os;
+			os << "struct " << entryName + "_pt_xfb" << buffer.first << "\n";
+			conversionResult.msl += os.str() + "{\n";
+		}
+		uint32_t offset = 0;
+		uint32_t padNo = 0;
+		uint32_t stride = vtxOutputs[buffer.second[0]].xfbBufferStride;
+		for (const auto& outputIdx : buffer.second) {
+			if (offset < vtxOutputs[outputIdx].xfbBufferOffset) {
+				// Emit padding to put us at the right offset.
+				std::ostringstream os;
+				os << "    char pad" << padNo++ << "[" << vtxOutputs[outputIdx].xfbBufferOffset - offset << "];\n";
+				conversionResult.msl += os.str();
+			} else {
+				offset = vtxOutputs[outputIdx].xfbBufferOffset;
+			}
+			// If the offset isn't at the natural alignment of the type, we'll have to use a packed type.
+			conversionResult.msl += "    ";
+			if (offset % getShaderOutputAlignment(vtxOutputs[outputIdx]) != 0)
+				conversionResult.msl += "packed_";
+			conversionResult.msl +=	mvkTypeToMSL(vtxOutputs[outputIdx]) + " ";
+			if (vtxOutputs[outputIdx].builtin != spv::BuiltInMax) {
+				conversionResult.msl += mvkBuiltInToName(vtxOutputs[outputIdx]);
+			} else {
+				conversionResult.msl += mvkVertexAttrToName(vtxOutputs[outputIdx]);
+			}
+			conversionResult.msl += ";\n";
+			offset = vtxOutputs[outputIdx].xfbBufferOffset + getShaderOutputSize(vtxOutputs[outputIdx]);
+		}
+		// Emit additional padding for the buffer stride.
+		if (stride != offset) {
+			std::ostringstream os;
+			os << "    char pad" << padNo++ << "[" << stride - offset << "];\n";
+			conversionResult.msl += os.str();
+		}
+		conversionResult.msl += "};\n";
+	}
+	// Emit the vertex stage output structure.
+	conversionResult.msl += "\n"
+		"struct " + entryName + "_passthru\n"
+		"{\n";
+	for (const auto& output : vtxOutputs) {
+		conversionResult.msl += "    " + mvkTypeToMSL(output) + " ";
+		if (output.builtin != spv::BuiltInMax) {
+			conversionResult.msl += mvkBuiltInToName(output) + " [[" + mvkBuiltInToAttr(output) + "]]";
+			if (output.builtin == spv::BuiltInClipDistance) {
+				clipDistances++;
+			}
+		} else {
+			conversionResult.msl += mvkVertexAttrToName(output) + " [[user(" + mvkVertexAttrToUserAttr(output) + "]]";
+		}
+		conversionResult.msl += ";\n";
+	}
+	if (clipDistances > 0) {
+		std::ostringstream os;
+		os << "    float gl_ClipDistance [[clip_distance]] [" << clipDistances << "];\n";
+		conversionResult.msl += os.str();
+	}
+	conversionResult.msl += "};\n\n";
+	conversionResult.msl += "vertex " + entryName + "_passthru " + entryName + "PassThru(";
+	std::ostringstream os;
+	// Emit parameters for XFB buffers and the other output buffer.
+	for (const auto& buffer : xfbBuffers) {
+		if (!os.str().empty())
+			os << ", ";
+		if (buffer.first == -1)
+			os << "const device " << entryName << "_pt_misc* misc_in [[buffer(4)]]";
+		else
+			os << "const device " << entryName << "_pt_xfb" << buffer.first << "* xfb" << buffer.first << " [[buffer(" << buffer.first << ")]]";
+	}
+	conversionResult.msl += os.str();
+	conversionResult.msl += ")\n"
+		"{\n"
+		"    " + entryName + "_passthru out;\n";
+	// Emit loads from the XFB buffers and stores to stage out.
+	for (const auto& output : vtxOutputs) {
+		std::ostringstream loadStore;
+		loadStore << "out.";
+		if (output.builtin != spv::BuiltInMax) {
+			loadStore << mvkBuiltInToName(output);
+		} else {
+			loadStore << mvkVertexAttrToName(output);
+		}
+		loadStore << " = ";
+		if (output.xfbBufferIndex == -1) {
+			loadStore << "misc_in[gl_VertexIndex].";
+		} else {
+			loadStore << "xfb" << output.xfbBufferIndex << "[gl_VertexIndex].";
+		}
+		if (output.builtin != spv::BuiltInMax) {
+			loadStore << mvkBuiltInToName(output);
+		} else {
+			loadStore << mvkVertexAttrToName(output);
+		}
+		conversionResult.msl += "    " + loadStore.str() + ";\n";
+		if (output.builtin == spv::BuiltInClipDistance) {
+			std::ostringstream clipLoadStore;
+			clipLoadStore << "out.gl_ClipDistance[" << output.arrayIndex << "] = ";
+			if (output.xfbBufferIndex == -1) {
+				clipLoadStore << "misc_in[gl_VertexIndex].";
+			} else {
+				clipLoadStore << "xfb" << output.xfbBufferIndex << "[gl_VertexIndex].";
+			}
+			clipLoadStore << mvkBuiltInToName(output);
+			conversionResult.msl += "    " + clipLoadStore.str() + ";\n";
+		}
+	}
+	conversionResult.msl += "    return out;\n"
+		"}\n";
+}
+
 void MVKShaderModule::setWorkgroupSize(uint32_t x, uint32_t y, uint32_t z) {
 	if(_directMSLLibrary) { _directMSLLibrary->setWorkgroupSize(x, y, z); }
 }
diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
index d8c222bdd..8da7cf374 100644
--- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def
+++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def
@@ -136,6 +136,7 @@ MVK_EXTENSION(EXT_swapchain_colorspace,               EXT_SWAPCHAIN_COLOR_SPACE,
 MVK_EXTENSION(EXT_swapchain_maintenance1,             EXT_SWAPCHAIN_MAINTENANCE_1,            DEVICE,   10.11,  8.0,  1.0)
 MVK_EXTENSION(EXT_texel_buffer_alignment,             EXT_TEXEL_BUFFER_ALIGNMENT,             DEVICE,   10.13, 11.0,  1.0)
 MVK_EXTENSION(EXT_texture_compression_astc_hdr,       EXT_TEXTURE_COMPRESSION_ASTC_HDR,       DEVICE,   11.0,  13.0,  1.0)
+MVK_EXTENSION(EXT_transform_feedback,                 EXT_TRANSFORM_FEEDBACK,                 DEVICE,   10.11,  8.0,  1.0)
 MVK_EXTENSION(EXT_vertex_attribute_divisor,           EXT_VERTEX_ATTRIBUTE_DIVISOR,           DEVICE,   10.11,  8.0,  1.0)
 MVK_EXTENSION(AMD_draw_indirect_count,                AMD_DRAW_INDIRECT_COUNT,                DEVICE,   MVK_NA, MVK_NA, MVK_NA)
 MVK_EXTENSION(AMD_gpu_shader_half_float,              AMD_GPU_SHADER_HALF_FLOAT,              DEVICE,   10.11,  8.0,  1.0)
diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
index d3aa660a0..f77cec258 100644
--- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h
+++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h
@@ -102,7 +102,7 @@ typedef enum : uint8_t {
 
 /** Represents a given stage of a graphics pipeline. */
 enum MVKGraphicsStage {
-	kMVKGraphicsStageVertex = 0,	/**< The tessellation vertex compute shader stage. */
+	kMVKGraphicsStageVertex = 0,	/**< The tessellation or transform feedback vertex compute shader stage. */
 	kMVKGraphicsStageTessControl,	/**< The tessellation control compute shader stage. */
 	kMVKGraphicsStageRasterization	/**< The rest of the pipeline. */
 };
diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
index c08c5b3a5..bdb5dcde3 100644
--- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm
+++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm
@@ -3922,7 +3922,7 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkExportMetalObjectsEXT(
 #pragma mark -
 #pragma mark VK_EXT_sample_locations extension
 
-void vkGetPhysicalDeviceMultisamplePropertiesEXT(
+MVK_PUBLIC_VULKAN_SYMBOL void vkGetPhysicalDeviceMultisamplePropertiesEXT(
 	VkPhysicalDevice                            physicalDevice,
 	VkSampleCountFlagBits                       samples,
 	VkMultisamplePropertiesEXT*                 pMultisampleProperties) {
@@ -3933,7 +3933,7 @@ void vkGetPhysicalDeviceMultisamplePropertiesEXT(
 	MVKTraceVulkanCallEnd();
 }
 
-void vkCmdSetSampleLocationsEXT(
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetSampleLocationsEXT(
 	VkCommandBuffer                             commandBuffer,
 	const VkSampleLocationsInfoEXT*             pSampleLocationsInfo) {
 
@@ -3943,6 +3943,50 @@ void vkCmdSetSampleLocationsEXT(
 }
 
 
+#pragma mark -
+#pragma mark VK_EXT_transform_feedback extension
+
+MVK_PUBLIC_VULKAN_STUB(vkCmdBeginQueryIndexedEXT, void, VkCommandBuffer, VkQueryPool, uint32_t, VkQueryControlFlags, uint32_t)
+MVK_PUBLIC_VULKAN_STUB(vkCmdDrawIndirectByteCountEXT, void, VkCommandBuffer, uint32_t, uint32_t, VkBuffer, VkDeviceSize, uint32_t, uint32_t)
+MVK_PUBLIC_VULKAN_STUB(vkCmdEndQueryIndexedEXT, void, VkCommandBuffer, VkQueryPool, uint32_t, uint32_t)
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBeginTransformFeedbackEXT(
+	VkCommandBuffer                             commandBuffer,
+	uint32_t                                    firstCounterBuffer,
+	uint32_t                                    counterBufferCount,
+	const VkBuffer*                             pCounterBuffers,
+	const VkDeviceSize*                         pCounterBufferOffsets) {
+
+	MVKTraceVulkanCallStart();
+	MVKAddCmdFrom2Thresholds(BeginTransformFeedback, counterBufferCount, 1, 2, commandBuffer, firstCounterBuffer, counterBufferCount, pCounterBuffers, pCounterBufferOffsets);
+	MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBindTransformFeedbackBuffersEXT(
+	VkCommandBuffer                             commandBuffer,
+	uint32_t                                    firstBinding,
+	uint32_t                                    bindingCount,
+	const VkBuffer*                             pBuffers,
+	const VkDeviceSize*                         pOffsets,
+	const VkDeviceSize*                         pSizes) {
+
+	MVKTraceVulkanCallStart();
+	MVKAddCmdFrom2Thresholds(BindTransformFeedbackBuffers, bindingCount, 1, 2, commandBuffer, firstBinding, bindingCount, pBuffers, pOffsets, pSizes);
+	MVKTraceVulkanCallEnd();
+}
+
+MVK_PUBLIC_VULKAN_SYMBOL void vkCmdEndTransformFeedbackEXT(
+	VkCommandBuffer                             commandBuffer,
+	uint32_t,
+	uint32_t,
+	const VkBuffer*,
+	const VkDeviceSize*) {
+
+	MVKTraceVulkanCallStart();
+	MVKAddCmd(EndTransformFeedback, commandBuffer);
+	MVKTraceVulkanCallEnd();
+}
+
 #pragma mark -
 #pragma mark VK_GOOGLE_display_timing extension
 
diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVReflection.h b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVReflection.h
index 105cefff8..ea9090e32 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVReflection.h
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVReflection.h
@@ -74,6 +74,9 @@ namespace mvk {
 		/** The component index of the variable. */
 		uint32_t component;
 
+		/** The array index, if this is an array, or -1 otherwise. */
+		uint32_t arrayIndex;
+
 		/**
 		 * If this is the first member of a struct, this will contain the alignment
 		 * of the struct containing this variable, otherwise this will be zero.
@@ -88,6 +91,24 @@ namespace mvk {
 
 		/** Whether this variable is actually used (read or written) by the shader. */
 		bool isUsed;
+
+		/**
+		 * The index of the transform feedback buffer, if this is an output captured
+		 * by transform feedback; otherwise, this will be -1.
+		 */
+		uint32_t xfbBufferIndex;
+
+		/**
+		 * The offset within the transform feedback buffer, if this is an output captured
+		 * by transform feedback.
+		 */
+		uint32_t xfbBufferOffset;
+
+		/**
+		 * The per-vertex stride of the transform feedback buffer, if this is an output
+		 * captured by transform feedback.
+		 */
+		uint32_t xfbBufferStride;
 	};
 	typedef SPIRVShaderInterfaceVariable SPIRVShaderOutput;
 
@@ -237,8 +258,10 @@ namespace mvk {
 	static inline uint32_t getShaderInterfaceStructMembers(const SPIRV_CROSS_NAMESPACE::CompilerReflection& reflect,
 														   Vi& vars, SPIRVShaderInterfaceVariable* pParentFirstMember,
 														   const SPIRV_CROSS_NAMESPACE::SPIRType* structType, spv::StorageClass storage,
-														   bool patch, uint32_t loc) {
+														   bool patch, uint32_t loc, uint32_t xfbBuffer, uint32_t xfbOffset,
+														   uint32_t xfbStride) {
 		bool isUsed = true;
+		bool isBlock = reflect.has_decoration(structType->self, spv::DecorationBlock);
 		auto biType = spv::BuiltInMax;
 		SPIRVShaderInterfaceVariable* pFirstMember = nullptr;
 		size_t mbrCnt = structType->member_types.size();
@@ -250,6 +273,11 @@ namespace mvk {
 				loc = reflect.get_member_decoration(structType->self, mbrIdx, spv::DecorationLocation);
 				cmp = reflect.get_member_decoration(structType->self, mbrIdx, spv::DecorationComponent);
 			}
+			uint32_t structOffset = reflect.get_member_decoration(structType->self, mbrIdx, spv::DecorationOffset);
+			if (isBlock && reflect.has_member_decoration(structType->self, mbrIdx, spv::DecorationOffset)) {
+				xfbBuffer = reflect.get_member_decoration(structType->self, mbrIdx, spv::DecorationXfbBuffer);
+				xfbStride = reflect.get_member_decoration(structType->self, mbrIdx, spv::DecorationXfbStride);
+			}
 			patch = patch || reflect.has_member_decoration(structType->self, mbrIdx, spv::DecorationPatch);
 			if (reflect.has_member_decoration(structType->self, mbrIdx, spv::DecorationBuiltIn)) {
 				biType = (spv::BuiltIn)reflect.get_member_decoration(structType->self, mbrIdx, spv::DecorationBuiltIn);
@@ -258,12 +286,12 @@ namespace mvk {
 			const SPIRV_CROSS_NAMESPACE::SPIRType* type = &reflect.get_type(structType->member_types[mbrIdx]);
 			uint32_t elemCnt = (type->array.empty() ? 1 : type->array[0]) * type->columns;
 			for (uint32_t elemIdx = 0; elemIdx < elemCnt; elemIdx++) {
-				if (type->basetype == SPIRV_CROSS_NAMESPACE::SPIRType::Struct)
-					loc = getShaderInterfaceStructMembers(reflect, vars, pFirstMember, type, storage, patch, loc);
-				else {
+				if (type->basetype == SPIRV_CROSS_NAMESPACE::SPIRType::Struct) {
+					loc = getShaderInterfaceStructMembers(reflect, vars, pFirstMember, type, storage, patch, loc, xfbBuffer, xfbOffset + structOffset, xfbStride);
+				} else {
 					// The alignment of a structure is the same as the largest member of the structure.
 					// Consequently, the first flattened member of a structure should align with structure itself.
-					vars.push_back({type->basetype, type->vecsize, loc, cmp, 0, biType, patch, isUsed});
+					vars.push_back({type->basetype, type->vecsize, loc, cmp, 0, 0, biType, patch, isUsed, xfbBuffer, xfbOffset + structOffset, xfbStride});
 					auto& currOutput = vars.back();
 					if ( !pFirstMember ) { pFirstMember = &currOutput; }
 					pFirstMember->firstStructMemberAlignment = std::max(pFirstMember->firstStructMemberAlignment, getShaderOutputSize(currOutput));
@@ -286,7 +314,7 @@ namespace mvk {
 														Vo& outputs, SPIRVShaderOutput* pParentFirstMember,
 														const SPIRV_CROSS_NAMESPACE::SPIRType* structType, spv::StorageClass storage,
 														bool patch, uint32_t loc) {
-		return getShaderInterfaceStructMembers(reflect, outputs, pParentFirstMember, structType, storage, patch, loc);
+		return getShaderInterfaceStructMembers(reflect, outputs, pParentFirstMember, structType, storage, patch, loc, 0, 0, 0);
 	}
 
 	/** Given a shader in SPIR-V format, returns interface reflection data. */
@@ -332,6 +360,14 @@ namespace mvk {
 				if (reflect.has_decoration(varID, spv::DecorationComponent)) {
 					cmp = reflect.get_decoration(varID, spv::DecorationComponent);
 				}
+				uint32_t xfbBuffer = -1;
+				uint32_t xfbOffset = 0;
+				uint32_t xfbStride = 0;
+				if (reflect.has_decoration(varID, spv::DecorationOffset)) {
+					xfbBuffer = reflect.get_decoration(varID, spv::DecorationXfbBuffer);
+					xfbOffset = reflect.get_decoration(varID, spv::DecorationOffset);
+					xfbStride = reflect.get_decoration(varID, spv::DecorationXfbStride);
+				}
 				// For tessellation shaders, peel away the initial array type. SPIRV-Cross adds the array back automatically.
 				// Only some builtins will be arrayed here.
 				if ((model == spv::ExecutionModelTessellationControl || (model == spv::ExecutionModelTessellationEvaluation && storage == spv::StorageClassInput)) && !patch &&
@@ -343,9 +379,9 @@ namespace mvk {
 				for (uint32_t i = 0; i < elemCnt; i++) {
 					if (type->basetype == SPIRV_CROSS_NAMESPACE::SPIRType::Struct) {
 						SPIRVShaderInterfaceVariable* pFirstMember = nullptr;
-						loc = getShaderInterfaceStructMembers(reflect, vars, pFirstMember, type, storage, patch, loc);
+						loc = getShaderInterfaceStructMembers(reflect, vars, pFirstMember, type, storage, patch, loc, xfbBuffer, xfbOffset, xfbStride);
 					} else {
-						vars.push_back({type->basetype, type->vecsize, loc, cmp, 0, biType, patch, isUsed});
+						vars.push_back({type->basetype, type->vecsize, loc, cmp, i, 0, biType, patch, isUsed, xfbBuffer, xfbOffset, xfbStride});
 						loc = addSat(loc, 1);
 					}
 				}
@@ -379,5 +415,31 @@ namespace mvk {
 		return getShaderInterfaceVariables(spirv, spv::StorageClassInput, model, entryName, outputs, errorLog);
 	}
 
+    template<typename Vs>
+    static inline bool getUsesTransformFeedback(const Vs& spirv, spv::ExecutionModel model, const std::string&
+    entryName, std::string& errorLog) {
+#ifndef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS
+        try {
+#endif
+            SPIRV_CROSS_NAMESPACE::CompilerReflection transFeedbackReflect(spirv);
+
+            if (!entryName.empty()) {
+                transFeedbackReflect.set_entry_point(entryName, model);
+            }
+
+            transFeedbackReflect.compile();
+
+            const SPIRV_CROSS_NAMESPACE::Bitset& txbModes = transFeedbackReflect.get_execution_mode_bitset();
+
+            return txbModes.get(spv::ExecutionModeXfb);
+
+#ifndef SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS
+        } catch (SPIRV_CROSS_NAMESPACE::CompilerError& ex) {
+            errorLog = ex.what();
+            return false;
+        }
+#endif
+    }
+
 }
 #endif
diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
index ced660aa8..6459e04b3 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.cpp
@@ -367,6 +367,7 @@ MVK_PUBLIC_SYMBOL bool SPIRVToMSLConverter::convert(SPIRVToMSLConversionConfigur
 	conversionResult.resultInfo.needsDispatchBaseBuffer = pMSLCompiler && pMSLCompiler->needs_dispatch_base_buffer();
 	conversionResult.resultInfo.needsViewRangeBuffer = pMSLCompiler && pMSLCompiler->needs_view_mask_buffer();
 	conversionResult.resultInfo.usesPhysicalStorageBufferAddressesCapability = usesPhysicalStorageBufferAddressesCapability(pMSLCompiler);
+	conversionResult.resultInfo.needsTransformFeedback = pMSLCompiler && pMSLCompiler->needs_transform_feedback();
 
 	// When using Metal argument buffers, if the shader is provided with dynamic buffer offsets,
 	// then it needs a buffer to hold these dynamic offsets.
diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
index 1789ee93c..8df005eff 100644
--- a/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
+++ b/MoltenVKShaderConverter/MoltenVKShaderConverter/SPIRVToMSLConverter.h
@@ -245,6 +245,7 @@ namespace mvk {
 		bool needsDispatchBaseBuffer = false;
 		bool needsViewRangeBuffer = false;
 		bool usesPhysicalStorageBufferAddressesCapability = false;
+		bool needsTransformFeedback = false;
 
 	} SPIRVToMSLConversionResultInfo;
 
diff --git a/mvkGitRevDerived.h.in b/mvkGitRevDerived.h.in
new file mode 100644
index 000000000..efb8b8672
--- /dev/null
+++ b/mvkGitRevDerived.h.in
@@ -0,0 +1,2 @@
+// Auto-generated by MoltenVK
+static const char* mvkRevString = "@MVK_GIT_REV@";
\ No newline at end of file