Skip to content
This repository has been archived by the owner on Apr 2, 2021. It is now read-only.

Commit

Permalink
Non aligned SSE fix, cellular optimisation
Browse files Browse the repository at this point in the history
  • Loading branch information
Auburn committed Jul 6, 2016
1 parent 718165b commit 24a1822
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 29 deletions.
9 changes: 4 additions & 5 deletions FastNoiseSIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ int FastNoiseSIMD::s_currentSIMDLevel = -1;
void cpuid(int32_t out[4], int32_t x) {
__cpuidex(out, x, 0);
}
__int64 xgetbv(unsigned int x) {
uint64_t xgetbv(unsigned int x) {
return _xgetbv(x);
}
#else
Expand Down Expand Up @@ -107,7 +107,7 @@ int GetFastestSIMD()

if (osAVXSuport && cpuAVXSuport)
{
__int64 xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK);
if ((xcrFeatureMask & 0x6) != 0x6)
return FN_SSE41;
}
Expand Down Expand Up @@ -167,9 +167,9 @@ int FastNoiseSIMD::GetSIMDLevel()

void FastNoiseSIMD::FreeNoiseSet(float* floatArray)
{
#ifdef FN_ALIGNED_SETS
GetSIMDLevel();

#ifdef FN_ALIGNED_SETS
if (s_currentSIMDLevel > FN_NO_SIMD_FALLBACK)
#ifdef _WIN32
_aligned_free(floatArray);
Expand Down Expand Up @@ -267,7 +267,6 @@ void FastNoiseSIMD::FillSamplingVectorSet(FastNoiseVectorSet* vectorSet, int sam

int sampleSize = 1 << sampleScale;
int sampleMask = sampleSize - 1;
float scaleModifier = float(sampleSize);

int xSizeSample = xSize;
int ySizeSample = ySize;
Expand Down Expand Up @@ -431,5 +430,5 @@ void FastNoiseVectorSet::SetSize(int _size)

xSet = FastNoiseSIMD::GetEmptySet(alignedSize * 3);
ySet = xSet + alignedSize;
zSet = xSet + alignedSize * 2;
zSet = ySet + alignedSize;
}
56 changes: 33 additions & 23 deletions FastNoiseSIMD_internal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,11 @@ static SIMDf SIMDf_NUM(1);

#ifdef FN_ALIGNED_SETS
#define SIMDf_STORE(p,a) _mm256_store_ps(p,a)
#define SIMDf_LOAD(p) _mm256_load_ps(p)
#else
#define SIMDf_STORE(p,a) _mm256_storeu_ps(p,a)
#define SIMDf_LOAD(p) _mm256_loadu_ps(p)
#endif
#define SIMDf_LOAD(p) _mm256_load_ps(p)

#define SIMDf_ADD(a,b) _mm256_add_ps(a,b)
#define SIMDf_SUB(a,b) _mm256_sub_ps(a,b)
Expand Down Expand Up @@ -178,10 +179,11 @@ static SIMDf SIMDf_NUM(1);

#ifdef FN_ALIGNED_SETS
#define SIMDf_STORE(p,a) _mm_store_ps(p,a)
#define SIMDf_LOAD(p) _mm_load_ps(p)
#else
#define SIMDf_STORE(p,a) _mm_storeu_ps(p,a)
#define SIMDf_LOAD(p) _mm_loadu_ps(p)
#endif
#define SIMDf_LOAD(p) _mm_load_ps(p)

#define SIMDf_ADD(a,b) _mm_add_ps(a,b)
#define SIMDf_SUB(a,b) _mm_sub_ps(a,b)
Expand Down Expand Up @@ -1389,8 +1391,6 @@ void SIMD_LEVEL_CLASS::FillCellularSet(float* noiseSet, FastNoiseVectorSet* vect

SIMDi seedV = SIMDi_SET(m_seed);
SIMDf freqV = SIMDf_SET(m_frequency);
SIMDf lacunarityV = SIMDf_SET(m_lacunarity);
SIMDf gainV = SIMDf_SET(m_gain);
SIMDf xOffsetV = SIMDf_SET(xOffset*m_frequency);
SIMDf yOffsetV = SIMDf_SET(yOffset*m_frequency);
SIMDf zOffsetV = SIMDf_SET(zOffset*m_frequency);
Expand Down Expand Up @@ -1443,9 +1443,9 @@ void SIMD_LEVEL_CLASS::FillSampledNoiseSet(float* noiseSet, int xStart, int ySta
int sampleMask = sampleSize - 1;
float scaleModifier = float(sampleSize);

int xOffset = sampleSize - (xStart & sampleMask) & sampleMask;
int yOffset = sampleSize - (yStart & sampleMask) & sampleMask;
int zOffset = sampleSize - (zStart & sampleMask) & sampleMask;
int xOffset = (sampleSize - (xStart & sampleMask)) & sampleMask;
int yOffset = (sampleSize - (yStart & sampleMask)) & sampleMask;
int zOffset = (sampleSize - (zStart & sampleMask)) & sampleMask;

int xSizeSample = xSize + xOffset;
int ySizeSample = ySize + yOffset;
Expand Down Expand Up @@ -1487,16 +1487,21 @@ void SIMD_LEVEL_CLASS::FillSampledNoiseSet(float* noiseSet, int xStart, int ySta
for (int y = 0; y < ySizeSample - 1; y++)
{
SIMDi zSIMD = zBase;

SIMDf c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, 0)]);
SIMDf c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, 0)]);
SIMDf c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, 0)]);
SIMDf c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, 0)]);
for (int z = 0; z < zSizeSample - 1; z++)
{
SIMDf c000 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, z)]);
SIMDf c100 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, z)]);
SIMDf c010 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, z)]);
SIMDf c110 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, z)]);
SIMDf c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, z + 1)]);
SIMDf c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, z + 1)]);
SIMDf c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, z + 1)]);
SIMDf c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, z + 1)]);
SIMDf c000 = c001;
SIMDf c100 = c101;
SIMDf c010 = c011;
SIMDf c110 = c111;
c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, z + 1)]);
c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, z + 1)]);
c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, z + 1)]);
c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, z + 1)]);

SIMDi localCountSIMD = SIMDi_NUM(incremental);

Expand Down Expand Up @@ -1605,16 +1610,21 @@ void SIMD_LEVEL_CLASS::FillSampledNoiseSet(float* noiseSet, FastNoiseVectorSet*
for (int y = 0; y < ySizeSample - 1; y++)
{
SIMDi zSIMD = SIMDi_SET_ZERO();

SIMDf c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, 0)]);
SIMDf c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, 0)]);
SIMDf c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, 0)]);
SIMDf c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, 0)]);
for (int z = 0; z < zSizeSample - 1; z++)
{
SIMDf c000 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, z)]);
SIMDf c100 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, z)]);
SIMDf c010 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, z)]);
SIMDf c110 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, z)]);
SIMDf c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, z + 1)]);
SIMDf c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, z + 1)]);
SIMDf c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, z + 1)]);
SIMDf c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, z + 1)]);
SIMDf c000 = c001;
SIMDf c100 = c101;
SIMDf c010 = c011;
SIMDf c110 = c111;
c001 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y, z + 1)]);
c101 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y, z + 1)]);
c011 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x, y + 1, z + 1)]);
c111 = SIMDf_SET(noiseSetSample[SAMPLE_INDEX(x + 1, y + 1, z + 1)]);

SIMDi localCountSIMD = SIMDi_NUM(incremental);

Expand Down
2 changes: 1 addition & 1 deletion FastNoiseSIMD_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
//

#ifndef SIMD_LEVEL_H
#error Don't include this file without defining SIMD_LEVEL_H
#error Dont include this file without defining SIMD_LEVEL_H
#else
#define FASTNOISE_SIMD_CLASS2(x) FastNoiseSIMD_L##x
#define FASTNOISE_SIMD_CLASS(level) FASTNOISE_SIMD_CLASS2(level)
Expand Down

0 comments on commit 24a1822

Please sign in to comment.