diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 03f9f2a9e..7a0de21ae 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -645,7 +645,7 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl if (dsChanged) { auto& usageDirty = _metalUsageDirtyDescriptors[descSetIndex]; usageDirty.resize(descSet->getDescriptorCount()); - usageDirty.setAllBits(); + usageDirty.enableAllBits(); } // Update dynamic buffer offsets @@ -717,7 +717,7 @@ - (void)setDepthBoundsTestAMD:(BOOL)enable minDepth:(float)minDepth maxDepth:(fl MVKCommandEncoderState::markDirty(); if (_cmdEncoder->isUsingMetalArgumentBuffers()) { for (uint32_t dsIdx = 0; dsIdx < kMVKMaxDescriptorSetCount; dsIdx++) { - _metalUsageDirtyDescriptors[dsIdx].setAllBits(); + _metalUsageDirtyDescriptors[dsIdx].enableAllBits(); } } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h index 96a9cf841..00ed01307 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.h @@ -305,9 +305,9 @@ class MVKDescriptorPool : public MVKVulkanAPIDeviceObject { MVKSmallVector _descriptorSets; MVKBitArray _descriptorSetAvailablility; - id _metalArgumentBuffer; - NSUInteger _nextMetalArgumentBufferOffset; MVKMTLBufferAllocator _mtlBufferAllocator; + id _metalArgumentBuffer = nil; + NSUInteger _nextMetalArgumentBufferOffset = 0; MVKDescriptorTypePool _uniformBufferDescriptors; MVKDescriptorTypePool _storageBufferDescriptors; @@ -322,7 +322,8 @@ class MVKDescriptorPool : public MVKVulkanAPIDeviceObject { MVKDescriptorTypePool _uniformTexelBufferDescriptors; MVKDescriptorTypePool _storageTexelBufferDescriptors; - VkDescriptorPoolCreateFlags _flags; + VkDescriptorPoolCreateFlags _flags = 0; + size_t _maxAllocDescSetCount = 0; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm index f692b08bd..8474559af 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptorSet.mm @@ -292,7 +292,7 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf for (uint32_t bindIdx = 0; bindIdx < bindCnt; bindIdx++) { auto& dslBind = _bindings[bindIdx]; if (context.isResourceUsed(spvExecModels[stage], descSetIndex, dslBind.getBinding())) { - bindingUse.setBit(bindIdx); + bindingUse.enableBit(bindIdx); descSetIsUsed = true; } } @@ -628,9 +628,9 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf bool& dynamicAllocation, MVKDescriptorPool* pool) { VkResult errRslt = VK_ERROR_OUT_OF_POOL_MEMORY; - size_t availDescIdx = _availability.getIndexOfFirstSetBit(); + size_t availDescIdx = _availability.getIndexOfFirstEnabledBit(); if (availDescIdx < size()) { - _availability.clearBit(availDescIdx); // Mark the descriptor as taken + _availability.disableBit(availDescIdx); // Mark the descriptor as taken *pMVKDesc = &_descriptors[availDescIdx]; (*pMVKDesc)->reset(); // Reset descriptor before reusing. dynamicAllocation = false; @@ -657,7 +657,7 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf DescriptorClass* pFirstDesc = _descriptors.data(); int64_t descIdx = pDesc >= pFirstDesc ? pDesc - pFirstDesc : pFirstDesc - pDesc; if (descIdx >= 0 && descIdx < size()) { - _availability.setBit(descIdx); + _availability.enableBit(descIdx); } else { mvkDesc->destroy(); } @@ -666,13 +666,13 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf // Preallocated descriptors will be reset when they are reused template void MVKDescriptorTypePool::reset() { - _availability.setAllBits(); + _availability.enableAllBits(); } template size_t MVKDescriptorTypePool::getRemainingDescriptorCount() { size_t enabledCount = 0; - _availability.enumerateEnabledBits(false, [&](size_t bitIdx) { enabledCount++; return true; }); + _availability.enumerateEnabledBits([&](size_t bitIdx) { enabledCount++; return true; }); return enabledCount; } @@ -740,7 +740,7 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf uint64_t mtlArgBuffEncAlignedSize = mvkAlignByteCount(mtlArgBuffEncSize, getMetalFeatures().mtlBufferAlignment); size_t dsCnt = _descriptorSetAvailablility.size(); - _descriptorSetAvailablility.enumerateEnabledBits(true, [&](size_t dsIdx) { + _descriptorSetAvailablility.enumerateEnabledBits([&](size_t dsIdx) { bool isSpaceAvail = true; // If not using Metal arg buffers, space will always be available. MVKDescriptorSet* mvkDS = &_descriptorSets[dsIdx]; NSUInteger mtlArgBuffOffset = mvkDS->getMetalArgumentBuffer().getMetalArgumentBufferOffset(); @@ -772,11 +772,12 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf if (rslt) { freeDescriptorSet(mvkDS, false); } else { + _descriptorSetAvailablility.disableBit(dsIdx); + _maxAllocDescSetCount = std::max(_maxAllocDescSetCount, dsIdx + 1); *pVKDS = (VkDescriptorSet)mvkDS; } return false; } else { - _descriptorSetAvailablility.setBit(dsIdx); // We didn't consume this one after all, so it's still available return true; } }); @@ -800,7 +801,7 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf mvkDS->free(isPoolReset); if ( !isPoolReset ) { size_t dsIdx = mvkDS - _descriptorSets.data(); - _descriptorSetAvailablility.setBit(dsIdx); + _descriptorSetAvailablility.enableBit(dsIdx); } } else { reportError(VK_ERROR_INITIALIZATION_FAILED, "A descriptor set is being returned to a descriptor pool that did not allocate it."); @@ -810,11 +811,10 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf // Free allocated descriptor sets and reset descriptor pools. // Don't waste time freeing desc sets that were never allocated. VkResult MVKDescriptorPool::reset(VkDescriptorPoolResetFlags flags) { - size_t dsCnt = _descriptorSetAvailablility.getLowestNeverClearedBitIndex(); - for (uint32_t dsIdx = 0; dsIdx < dsCnt; dsIdx++) { + for (uint32_t dsIdx = 0; dsIdx < _maxAllocDescSetCount; dsIdx++) { freeDescriptorSet(&_descriptorSets[dsIdx], true); } - _descriptorSetAvailablility.setAllBits(); + _descriptorSetAvailablility.enableAllBits(); _uniformBufferDescriptors.reset(); _storageBufferDescriptors.reset(); @@ -830,6 +830,7 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf _storageTexelBufferDescriptors.reset(); _nextMetalArgumentBufferOffset = 0; + _maxAllocDescSetCount = 0; return VK_SUCCESS; } @@ -1003,9 +1004,6 @@ static void populateAuxBuffer(mvk::SPIRVToMSLConversionConfiguration& shaderConf } void MVKDescriptorPool::initMetalArgumentBuffer(const VkDescriptorPoolCreateInfo* pCreateInfo) { - _metalArgumentBuffer = nil; - _nextMetalArgumentBufferOffset = 0; - if ( !isUsingMetalArgumentBuffers() ) { return; } auto& mtlFeats = getMetalFeatures(); diff --git a/MoltenVK/MoltenVK/Utility/MVKBitArray.h b/MoltenVK/MoltenVK/Utility/MVKBitArray.h index 6f9afa3dd..48c967a00 100755 --- a/MoltenVK/MoltenVK/Utility/MVKBitArray.h +++ b/MoltenVK/MoltenVK/Utility/MVKBitArray.h @@ -27,117 +27,84 @@ /** Represents an array of bits, optimized for reduced storage and fast scanning for bits that are set. */ class MVKBitArray { - static constexpr size_t SectionMaskSize = 6; // 64 bits - static constexpr size_t SectionBitCount = (size_t)1U << SectionMaskSize; - static constexpr size_t SectionByteCount = SectionBitCount / 8; - static constexpr uint64_t SectionMask = SectionBitCount - 1; - public: /** - * Returns the value of the bit, and optionally clears that bit if it was set. - * Returns false if the bitIndex is beyond the size of this array, returns false. + * Returns the value of the bit, and optionally disables that bit if it was enabled. + * Returns false if the bitIndex is beyond the size of this array. */ - bool getBit(size_t bitIndex, bool shouldClear = false) { + bool getBit(size_t bitIndex, bool shouldDisable = false) { if (bitIndex >= _bitCount) { return false; } - bool val = mvkIsAnyFlagEnabled(getSection(getIndexOfSection(bitIndex)), getSectionSetMask(bitIndex)); - if (shouldClear && val) { clearBit(bitIndex); } + bool val = mvkIsAnyFlagEnabled(getSection(getIndexOfSection(bitIndex)), getBitPositionSectionMask(bitIndex)); + if (val && shouldDisable) { disableBit(bitIndex); } return val; } - /** Sets the value of the bit to the val (or to 1 by default). */ - void setBit(size_t bitIndex, bool val = true) { + /** Sets the value of the bit to the val. */ + void setBit(size_t bitIndex, bool val) { if (bitIndex >= _bitCount) { return; } - size_t secIdx = getIndexOfSection(bitIndex); + auto secIdx = getIndexOfSection(bitIndex); + auto& sectionData = getSection(secIdx); if (val) { - mvkEnableFlags(getSection(secIdx), getSectionSetMask(bitIndex)); - if (secIdx < _clearedSectionCount) { _clearedSectionCount = secIdx; } + mvkEnableFlags(getSection(secIdx), getBitPositionSectionMask(bitIndex)); } else { - mvkDisableFlags(getSection(secIdx), getSectionSetMask(bitIndex)); - if (secIdx == _clearedSectionCount && !getSection(secIdx)) { _clearedSectionCount++; } - _lowestNeverClearedBitIndex = std::max(_lowestNeverClearedBitIndex, bitIndex + 1); + mvkDisableFlags(getSection(secIdx), getBitPositionSectionMask(bitIndex)); } - } - - /** Sets the value of the bit to 0. */ - void clearBit(size_t bitIndex) { setBit(bitIndex, false); } - /** Sets all bits in the array to 1. */ - void setAllBits() { - // Nothing to do if no bits have been cleared (also ensure _lowestNeverClearedBitIndex doesn't go negative) - if (_lowestNeverClearedBitIndex) { - size_t endSecIdx = getIndexOfSection(_lowestNeverClearedBitIndex - 1); - for (size_t secIdx = 0; secIdx <= endSecIdx; secIdx++) { - getSection(secIdx) = ~0; + // Adjust fully disabled tracker + if (isFullyDisabled(sectionData)) { + if (secIdx == _fullyDisabledSectionCount) { + auto secCnt = getSectionCount(); + while (++_fullyDisabledSectionCount < secCnt && isFullyDisabled(getSection(_fullyDisabledSectionCount))); } + } else { + _fullyDisabledSectionCount = std::min(_fullyDisabledSectionCount, (uint32_t)secIdx); } - _clearedSectionCount = 0; - _lowestNeverClearedBitIndex = 0; - } - /** Clears all bits in the array to 0. */ - void clearAllBits() { - size_t secCnt = getSectionCount(); - while (_clearedSectionCount < secCnt) { - getSection(_clearedSectionCount++) = 0; + // Adjust partially disabled tracker + if (isFullyEnabled(sectionData)) { + if (secIdx + 1 == _partiallyDisabledSectionCount) { + while (--_partiallyDisabledSectionCount > 0 && isFullyEnabled(getSection(_partiallyDisabledSectionCount - 1))); + } + } else { + _partiallyDisabledSectionCount = std::max(_partiallyDisabledSectionCount, (uint32_t)secIdx + 1); } - _lowestNeverClearedBitIndex = _bitCount; } - /** - * Returns the index of the first bit that is set, at or after the specified index, - * and optionally clears that bit. If no bits are set, returns the size() of this bit array. - */ - size_t getIndexOfFirstSetBit(size_t startIndex, bool shouldClear) { - size_t startSecIdx = getIndexOfSection(startIndex); - if (startSecIdx < _clearedSectionCount) { - startSecIdx = _clearedSectionCount; - startIndex = 0; - } - size_t bitIdx = startSecIdx << SectionMaskSize; - size_t secCnt = getSectionCount(); - for (size_t secIdx = startSecIdx; secIdx < secCnt; secIdx++) { - size_t lclBitIdx = getIndexOfFirstSetBitInSection(getSection(secIdx), getBitIndexInSection(startIndex)); - bitIdx += lclBitIdx; - if (lclBitIdx < SectionBitCount) { - if (startSecIdx == _clearedSectionCount && !getSection(startSecIdx)) { _clearedSectionCount = secIdx; } - if (shouldClear) { clearBit(bitIdx); } - return std::min(bitIdx, _bitCount); - } - startIndex = 0; + /** Enables the bit. */ + void enableBit(size_t bitIndex) { setBit(bitIndex, true); } + + /** Enables all bits in the array. */ + void enableAllBits() { + for (size_t secIdx = 0; secIdx < _partiallyDisabledSectionCount; secIdx++) { + getSection(secIdx) = FullyEnabledSectionMask; } - return std::min(bitIdx, _bitCount); + _partiallyDisabledSectionCount = 0; + _fullyDisabledSectionCount = 0; } - /** - * Returns the index of the first bit that is set, at or after the specified index. - * If no bits are set, returns the size() of this bit array. - */ - size_t getIndexOfFirstSetBit(size_t startIndex) { - return getIndexOfFirstSetBit(startIndex, false); - } + /** Disables the bit. */ + void disableBit(size_t bitIndex) { setBit(bitIndex, false); } - /** - * Returns the index of the first bit that is set and optionally clears that bit. - * If no bits are set, returns the size() of this bit array. - */ - size_t getIndexOfFirstSetBit(bool shouldClear) { - return getIndexOfFirstSetBit(0, shouldClear); + /** Disables all bits in the array. */ + void disableAllBits() { + size_t secCnt = getSectionCount(); + for (size_t secIdx = _fullyDisabledSectionCount; secIdx < secCnt; secIdx++) { + getSection(secIdx) = 0; + } + _partiallyDisabledSectionCount = (uint32_t)secCnt; + _fullyDisabledSectionCount = (uint32_t)secCnt; } - /** - * Returns the index of the lowest bit that has never been cleared since the last time all the bits were set or cleared. - * In other words, this bit, and all above it, have never been cleared since the last time they were all set or cleared. - */ - size_t getLowestNeverClearedBitIndex() { return _lowestNeverClearedBitIndex; } - - /** - * Returns the index of the first bit that is set. - * If no bits are set, returns the size() of this bit array. - */ - size_t getIndexOfFirstSetBit() { - return getIndexOfFirstSetBit(0, false); + /** Returns the index of the first bit that is enabled, at or after the specified index. */ + size_t getIndexOfFirstEnabledBit(size_t startIndex = 0) { + size_t secIdx = getIndexOfSection(startIndex); + if (secIdx < _fullyDisabledSectionCount) { + secIdx = _fullyDisabledSectionCount; + startIndex = 0; + } + return std::min((secIdx * SectionBitCount) + getIndexOfFirstEnabledBitInSection(getSection(secIdx), getBitIndexInSection(startIndex)), _bitCount); } /** @@ -149,13 +116,11 @@ class MVKBitArray { * The custom function should return true to continue processing further bits, or false * to stop processing further bits. This function returns false if any of the invocations * of the custom function halted further invocations, and returns true otherwise. - * - * If shouldClear is true, each enabled bit is cleared before the custom function executes. */ - bool enumerateEnabledBits(bool shouldClear, std::function func) { - for (size_t bitIdx = getIndexOfFirstSetBit(shouldClear); + bool enumerateEnabledBits(std::function func) { + for (size_t bitIdx = getIndexOfFirstEnabledBit(); bitIdx < _bitCount; - bitIdx = getIndexOfFirstSetBit(++bitIdx, shouldClear)) { + bitIdx = getIndexOfFirstEnabledBit(++bitIdx)) { if ( !func(bitIdx) ) { return false; } } @@ -166,7 +131,7 @@ class MVKBitArray { size_t size() const { return _bitCount; } /** Returns whether this array is empty. */ - bool empty() const { return !_bitCount; } + bool empty() const { return _bitCount == 0; } /** * Resize this array to the specified number of bits. @@ -174,57 +139,61 @@ class MVKBitArray { * The value of existing bits that fit within the new size are retained, and any * new bits that are added to accommodate the new size are set to the given value. * - * If the new size is larger than the existing size, new memory may be allocated. - * If the new size is less than the existing size, consumed memory is retained - * unless the size is set to zero. + * If the new size is larger than the existing size, new memory is allocated. + * If the new size is less than the existing size, consumed memory is retained. */ void resize(size_t size, bool val = false) { + assert(size < SectionBitCount * std::numeric_limits::max()); // Limited by _partially/fullyDisabledSectionCount + if (size == _bitCount) { return; } size_t oldBitCnt = _bitCount; size_t oldSecCnt = getSectionCount(); - size_t oldEndBitCnt = oldSecCnt << SectionMaskSize; + size_t oldEndBitCnt = oldSecCnt * SectionBitCount; - // Some magic here. If we need only one section, _data is used as that section, - // and it will be stomped on if we reallocate, so we cache it here. - uint64_t* oldData = _data; - uint64_t* pOldData = oldSecCnt > 1 ? oldData : (uint64_t*)&oldData; - - _bitCount = size; + _bitCount = size; // After here, functions refer to new data characteristics. + // If the number of data sections is not growing, we retain the existing data memory, + // to avoid having to reallocate if this array is resized larger in the future. + // If the number of data sections is growing, we need to expand memory. size_t newSecCnt = getSectionCount(); - if (newSecCnt == 0) { - // Clear out the existing data - if (oldSecCnt > 1) { free(pOldData); } - _data = 0; - _clearedSectionCount = 0; - _lowestNeverClearedBitIndex = 0; - } else if (newSecCnt == oldSecCnt) { + if (newSecCnt == oldSecCnt) { + // The number of data sections is staying the same. // Keep the existing data, but fill any bits in the last section // that were beyond the old bit count with the new initial value. - for (size_t bitIdx = oldBitCnt; bitIdx < oldEndBitCnt; bitIdx++) { setBit(bitIdx, val); } + for (size_t bitIdx = oldBitCnt; bitIdx < _bitCount; bitIdx++) { setBit(bitIdx, val); } } else if (newSecCnt > oldSecCnt) { - size_t oldByteCnt = oldSecCnt * SectionByteCount; - size_t newByteCnt = newSecCnt * SectionByteCount; - - // If needed, allocate new memory. - if (newSecCnt > 1) { _data = (uint64_t*)malloc(newByteCnt); } - - // Fill the new memory with the new initial value, copy the old contents to - // the new memory, fill any bits in the old last section that were beyond - // the old bit count with the new initial value, and remove the old memory. - uint64_t* pNewData = getData(); - memset(pNewData, val ? ~0 : 0, newByteCnt); - memcpy(pNewData, pOldData, oldByteCnt); + // The number of data sections is growing. + // Reallocate new memory to keep the existing contents. + _data = (uint64_t*)realloc(_data, newSecCnt * SectionByteCount); + + // Fill any bits in the last section that were beyond the old bit count with the fill value. for (size_t bitIdx = oldBitCnt; bitIdx < oldEndBitCnt; bitIdx++) { setBit(bitIdx, val); } - if (oldSecCnt > 1) { free(pOldData); } - if (!val) { _lowestNeverClearedBitIndex = _bitCount; } // Cover additional sections - // If the entire old array and the new array are cleared, move the uncleared indicator to the new end. - if (_clearedSectionCount == oldSecCnt && !val) { _clearedSectionCount = newSecCnt; } + // Fill the additional sections with the fill value. + uint64_t* pExtraData = &_data[oldSecCnt]; + memset(pExtraData, val ? (uint8_t)FullyEnabledSectionMask : 0, (newSecCnt - oldSecCnt) * SectionByteCount); + + // If the additional sections have been cleared, extend the associated trackers. + if ( !val ) { + if (_partiallyDisabledSectionCount == oldSecCnt) { _partiallyDisabledSectionCount = (uint32_t)newSecCnt; } + if (_fullyDisabledSectionCount == oldSecCnt) { _fullyDisabledSectionCount = (uint32_t)newSecCnt; } + } + } else { + // The number of data sections is shrinking. + // Retain existing allocation, but ensure these values still fit. + _partiallyDisabledSectionCount = std::min(_partiallyDisabledSectionCount, (uint32_t)newSecCnt); + _fullyDisabledSectionCount = std::min(_fullyDisabledSectionCount, (uint32_t)newSecCnt); } - // If we shrank, ensure this value still fits - if (_lowestNeverClearedBitIndex > _bitCount) { _lowestNeverClearedBitIndex = _bitCount; } + } + + /** Resets back to zero size and frees all data. */ + void reset() { + free(_data); + _data = nullptr; + _bitCount = 0; + _partiallyDisabledSectionCount = 0; + _fullyDisabledSectionCount = 0; } /** Constructs an instance for the specified number of bits, and sets the initial value of all the bits. */ @@ -232,68 +201,49 @@ class MVKBitArray { MVKBitArray(const MVKBitArray& other) { resize(other._bitCount); - memcpy(getData(), other.getData(), getSectionCount() * SectionByteCount); - _clearedSectionCount = other._clearedSectionCount; - _lowestNeverClearedBitIndex = other._lowestNeverClearedBitIndex; + memcpy(_data, other._data, getSectionCount() * SectionByteCount); } MVKBitArray& operator=(const MVKBitArray& other) { - resize(0); // Clear out the old memory resize(other._bitCount); - memcpy(getData(), other.getData(), getSectionCount() * SectionByteCount); - _clearedSectionCount = other._clearedSectionCount; - _lowestNeverClearedBitIndex = other._lowestNeverClearedBitIndex; + memcpy(_data, other._data, getSectionCount() * SectionByteCount); return *this; } - ~MVKBitArray() { resize(0); } + ~MVKBitArray() { reset(); } protected: - // Returns a pointer do the data. - // Some magic here. If we need only one section, _data is used as that section. - uint64_t* getData() const { - return getSectionCount() > 1 ? _data : (uint64_t*)&_data; - } - - // Returns a reference to the section. - uint64_t& getSection(size_t secIdx) { - return getData()[secIdx]; - } - - // Returns the number of sections. - size_t getSectionCount() const { - return _bitCount ? getIndexOfSection(_bitCount - 1) + 1 : 0; - } - - // Returns the index of the section that contains the specified bit. - static size_t getIndexOfSection(size_t bitIndex) { - return bitIndex >> SectionMaskSize; - } + uint64_t& getSection(size_t secIdx) { return _data[secIdx]; } + size_t getSectionCount() const { return _bitCount ? getIndexOfSection(_bitCount - 1) + 1 : 0; } - // Converts the bit index to a local bit index within a section, and returns that local bit index. - static size_t getBitIndexInSection(size_t bitIndex) { - return bitIndex & SectionMask; - } + static size_t getIndexOfSection(size_t bitIndex) { return bitIndex / SectionBitCount; } + static uint8_t getBitIndexInSection(size_t bitIndex) { return bitIndex & (SectionBitCount - 1); } + static bool isFullyEnabled(uint64_t sectionData) { return sectionData == FullyEnabledSectionMask; } + static bool isFullyDisabled(uint64_t sectionData) { return sectionData == 0; } // Returns a section mask containing a single 1 value in the bit in the section that // corresponds to the specified global bit index, and 0 values in all other bits. - static uint64_t getSectionSetMask(size_t bitIndex) { + static uint64_t getBitPositionSectionMask(size_t bitIndex) { return (uint64_t)1U << ((SectionBitCount - 1) - getBitIndexInSection(bitIndex)); } - // Returns the local index of the first set bit in the section, starting from the highest order bit. - // Clears all bits ahead of the start bit so they will be ignored, then counts the number of zeros - // ahead of the set bit. If there are no set bits, returns the number of bits in a section. - static size_t getIndexOfFirstSetBitInSection(uint64_t section, size_t lclStartBitIndex) { - uint64_t lclStartMask = ~(uint64_t)0; + // Returns the local index of the first enabled bit in the section, starting from the highest order bit. + // Disables all bits ahead of the start bit so they will be ignored, then counts the number of zeros + // ahead of the set bit. If there are no enabled bits, returns the number of bits in a section. + static uint8_t getIndexOfFirstEnabledBitInSection(uint64_t section, uint8_t lclStartBitIndex) { + uint64_t lclStartMask = FullyEnabledSectionMask; lclStartMask >>= lclStartBitIndex; section &= lclStartMask; return section ? __builtin_clzll(section) : SectionBitCount; } + static constexpr size_t SectionBitCount = 64; + static constexpr size_t SectionByteCount = SectionBitCount / 8; + static constexpr uint64_t FullyEnabledSectionMask = ~static_cast(0); + uint64_t* _data = nullptr; size_t _bitCount = 0; - size_t _clearedSectionCount = 0; // Tracks where to start looking for bits that are set - size_t _lowestNeverClearedBitIndex = 0; // Tracks the lowest bit that has never been cleared + uint32_t _partiallyDisabledSectionCount = 0; // Tracks where to stop filling when enabling all bits + uint32_t _fullyDisabledSectionCount = 0; // Tracks where to start looking for enabled bits };