Skip to content

Commit

Permalink
improvements and fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Oct 24, 2023
1 parent 401299c commit 1629010
Show file tree
Hide file tree
Showing 7 changed files with 671 additions and 710 deletions.
2 changes: 1 addition & 1 deletion JxlCoder.podspec
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Pod::Spec.new do |s|
s.name = 'JxlCoder'
s.version = '1.2.5'
s.version = '1.2.6'
s.summary = 'JXL coder for iOS and MacOS'
s.description = 'Provides support for JXL files in iOS and MacOS'
s.homepage = 'https://github.com/awxkee/jxl-coder-swift'
Expand Down
12 changes: 10 additions & 2 deletions Sources/jxlc/JxlWorker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ bool DecodeBasicInfo(const uint8_t *jxl, size_t size, size_t *xsize, size_t *ysi
bool EncodeJxlOneshot(const std::vector<uint8_t> &pixels, const uint32_t xsize,
const uint32_t ysize, std::vector<uint8_t> *compressed,
JxlPixelType colorspace, JxlCompressionOption compression_option,
float compression_distance, int effort) {
float compressionDistance, int effort) {
auto enc = JxlEncoderMake(/*memory_manager=*/nullptr);
auto runner = JxlThreadParallelRunnerMake(
/*memory_manager=*/nullptr,
Expand Down Expand Up @@ -311,10 +311,18 @@ bool EncodeJxlOneshot(const std::vector<uint8_t> &pixels, const uint32_t xsize,
}

if (JXL_ENC_SUCCESS !=
JxlEncoderSetFrameDistance(frameSettings, compression_distance)) {
JxlEncoderSetFrameDistance(frameSettings, compressionDistance)) {
return false;
}

if (colorspace == rgba) {
if (JXL_ENC_SUCCESS !=
JxlEncoderSetExtraChannelDistance(frameSettings, 0, compressionDistance)) {
return false;
}
}


if (JxlEncoderFrameSettingsSetOption(frameSettings,
JXL_ENC_FRAME_SETTING_EFFORT, effort) != JXL_ENC_SUCCESS) {
return false;
Expand Down
4 changes: 2 additions & 2 deletions Sources/jxlc/RgbaScaler.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
#import "XScaler.hpp"

typedef NS_ENUM(NSInteger, JxlIPixelFormat) {
kU8 NS_SWIFT_NAME(Uniform8),
kF16 NS_SWIFT_NAME(Float16)
kU8 NS_SWIFT_NAME(uniform8),
kF16 NS_SWIFT_NAME(float16)
};

@interface RgbaScaler : NSObject
Expand Down
134 changes: 0 additions & 134 deletions Sources/jxlc/RgbaScaler.mm
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@

@implementation RgbaScaler

//static bool API_AVAILABLE(macos(13.0), ios(16.0), watchos(9.0), tvos(16.0))
static bool scaleF16iOS16(std::vector<uint8_t> &src, int components, int width, int height, int newWidth, int newHeight, XSampler sampler) {
// if (components != 4) {
std::vector<uint8_t> dst(components * sizeof(uint16_t) * newWidth * newHeight);

scaleImageFloat16(reinterpret_cast<uint16_t*>(src.data()),
Expand All @@ -44,109 +42,9 @@ static bool scaleF16iOS16(std::vector<uint8_t> &src, int components, int width,

src = dst;
return true;
// }
//
// std::vector<uint8_t> dst(4 * sizeof(uint16_t) * newWidth * newHeight);
//
// vImage_Buffer srcBuffer = {
// .data = (void*)src.data(),
// .width = static_cast<vImagePixelCount>(width),
// .height = static_cast<vImagePixelCount>(height),
// .rowBytes = width * 4 * sizeof(uint16_t)
// };
//
// vImage_Buffer dstBuffer = {
// .data = dst.data(),
// .width = static_cast<vImagePixelCount>(newWidth),
// .height = static_cast<vImagePixelCount>(newHeight),
// .rowBytes = newWidth * 4 * sizeof(uint16_t)
// };
//
// auto result = vImageScale_ARGB16F(&srcBuffer, &dstBuffer, nullptr, kvImageUseFP16Accumulator);
// if (result != kvImageNoError) {
// return false;
// }
// src = dst;
// return true;
}

static bool scaleF16iOSPre16(std::vector<uint8_t> &src, int components, int width, int height, int newWidth, int newHeight, XSampler sampler) {

vImage_Buffer srcBuffer = {
.data = (void*)src.data(),
.width = static_cast<vImagePixelCount>(width * components),
.height = static_cast<vImagePixelCount>(height),
.rowBytes = width * components * sizeof(uint16_t)
};

vImage_Buffer dstBuffer = {
.data = src.data(),
.width = static_cast<vImagePixelCount>(width * components),
.height = static_cast<vImagePixelCount>(height),
.rowBytes = width * components * sizeof(uint16_t)
};
vImage_Error vEerror = vImageConvert_16Fto16U(&srcBuffer, &dstBuffer, kvImageNoFlags);
if (vEerror != kvImageNoError) {
return false;
}

if (components == 4) {

std::vector<uint8_t> dst(components * sizeof(uint16_t) * newWidth * newHeight);

vImage_Buffer srcBuffer = {
.data = (void*)src.data(),
.width = static_cast<vImagePixelCount>(width),
.height = static_cast<vImagePixelCount>(height),
.rowBytes = width * 4 * sizeof(uint16_t)
};

vImage_Buffer dstBuffer = {
.data = dst.data(),
.width = static_cast<vImagePixelCount>(newWidth),
.height = static_cast<vImagePixelCount>(newHeight),
.rowBytes = newWidth * 4 * sizeof(uint16_t)
};

auto result = vImageScale_ARGB16U(&srcBuffer, &dstBuffer, nullptr, kvImageNoFlags);
if (result != kvImageNoError) {
return false;
}
src = dst;
} else {
std::vector<uint8_t> dst(components * sizeof(uint16_t) * newWidth * newHeight);

scaleImageU16(reinterpret_cast<uint16_t*>(src.data()),
components * sizeof(uint16_t) * width, width, height, reinterpret_cast<uint16_t*>(dst.data()),
components * sizeof(uint16_t) * newWidth, newWidth, newHeight, components, 16, sampler);
src = dst;
}

{
vImage_Buffer srcBuffer = {
.data = (void*)src.data(),
.width = static_cast<vImagePixelCount>(newWidth * components),
.height = static_cast<vImagePixelCount>(newHeight),
.rowBytes = newWidth * components * sizeof(uint16_t)
};

vImage_Buffer dstBuffer = {
.data = (void*)src.data(),
.width = static_cast<vImagePixelCount>(newWidth * components),
.height = static_cast<vImagePixelCount>(newHeight),
.rowBytes = newWidth * components * sizeof(uint16_t)
};
const float scale = 1.0f / float((1 << 16) - 1);
vImage_Error vEerror = vImageConvert_16Uto16F(&srcBuffer, &dstBuffer, kvImageNoFlags);
if (vEerror != kvImageNoError) {
return false;
}
}
return true;
}

+ (bool)scaleRGB8:(std::vector<uint8_t> &)src components:(int)components width:(int)width height:(int)height newWidth:(int)newWidth newHeight:(int)newHeight sampler:(XSampler)sampler {
// if (components != 4) {
std::vector<uint8_t> dst(components * sizeof(uint8_t) * newWidth * newHeight);

scaleImageU8(reinterpret_cast<uint8_t*>(src.data()),
Expand All @@ -155,36 +53,10 @@ + (bool)scaleRGB8:(std::vector<uint8_t> &)src components:(int)components width:(
src = dst;

return true;
// }
//
// std::vector<uint8_t> dst(4 * sizeof(uint8_t) * newWidth * newHeight);
//
// vImage_Buffer srcBuffer = {
// .data = (void*)src.data(),
// .width = static_cast<vImagePixelCount>(width),
// .height = static_cast<vImagePixelCount>(height),
// .rowBytes = width * 4 * sizeof(uint8_t)
// };
//
// vImage_Buffer dstBuffer = {
// .data = dst.data(),
// .width = static_cast<vImagePixelCount>(newWidth),
// .height = static_cast<vImagePixelCount>(newHeight),
// .rowBytes = newWidth * 4 * sizeof(uint8_t)
// };
//
// auto result = vImageScale_ARGB8888(&srcBuffer, &dstBuffer, nullptr, kvImageNoFlags);
// if (result != kvImageNoError) {
// return false;
// }
//
// src = dst;
// return true;
}

+(bool) scaleData:(std::vector<uint8_t>&)src width:(int)width height:(int)height newWidth:(int)newWidth newHeight:(int)newHeight components:(int)components pixelFormat:(JxlIPixelFormat)pixelFormat sampler:(XSampler)sampler {

//Flipping not supported
if (newWidth < 0 || newHeight < 0) {
return false;
}
Expand All @@ -194,14 +66,8 @@ +(bool) scaleData:(std::vector<uint8_t>&)src width:(int)width height:(int)height
return [self scaleRGB8:src components:components width:width height:height newWidth:newWidth newHeight:newHeight sampler:sampler];
} else if (pixelFormat == kF16) {
return scaleF16iOS16(src, components, width, height, newWidth, newHeight, sampler);
// if (@available(iOS 16.0, macOS 13.0, *)) {
// return scaleF16iOS16(src, components, width, height, newWidth, newHeight, sampler);
// } else {
// return scaleF16iOSPre16(src, components, width, height, newWidth, newHeight, sampler);
// }
}
} catch (const std::bad_alloc& e) {
// Memory allocation has failed
return false;
}
return false;
Expand Down
7 changes: 6 additions & 1 deletion Sources/jxlc/ScaleInterpolator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
using namespace half_float;
using namespace std;

#if defined(__clang__)
#pragma clang fp contract(fast) exceptions(ignore) reassociate(on)
#endif

// P Found using maxima
//
// y(x) := 4 * x * (%pi-x) / (%pi^2) ;
Expand Down Expand Up @@ -206,7 +210,8 @@ inline T sinc(T x) {
template <typename T>
inline T LanczosWindow(T x, const T a) {
if (abs(x) < a) {
return sinc(T(M_PI) * x) * sinc(T(M_PI) * x / a);
T rv = T(M_PI) * x;
return sinc(rv) * sinc(rv / a);
}
return T(0.0);
}
Expand Down
88 changes: 67 additions & 21 deletions Sources/jxlc/ScaleInterpolator.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,68 @@ T CubicBSpline(T t);
#if __arm64__
#include <arm_neon.h>

inline float32x4_t Cos(const float32x4_t d) {
__attribute__((always_inline))
static inline float32x4_t Cos(const float32x4_t d) {

constexpr float C0 = 0.99940307;
constexpr float C1 = -0.49558072;
constexpr float C2 = 0.03679168;
const float32x4_t C0 = vdupq_n_f32(0.99940307);
const float32x4_t C1 = vdupq_n_f32(-0.49558072);
const float32x4_t C2 = vdupq_n_f32(0.03679168);
constexpr float C3 = -0.00434102;
float32x4_t x2 = vmulq_f32(d, d);
return vmlaq_f32(vdupq_n_f32(C0), x2, vmlaq_f32(vdupq_n_f32(C1), x2, vmlaq_f32(vdupq_n_f32(C2), x2, vdupq_n_f32(C3))));
return vmlaq_f32(C0, x2, vmlaq_f32(C1, x2, vmlaq_n_f32(C2, x2, C3)));
}

inline float32x4_t CubicInterpolation(const float32x4_t d,
__attribute__((always_inline))
static inline float32x4_t FastSin(const float32x4_t v) {
constexpr float A = 4.0f/(M_PI*M_PI);
const float32x4_t P = vdupq_n_f32(0.1952403377008734f);
const float32x4_t Q = vdupq_n_f32(0.01915214119105392f);
const float32x4_t N_PI = vdupq_n_f32(M_PI);

float32x4_t y = vmulq_f32(vmulq_n_f32(v, A), vsubq_f32(N_PI, v));

const float32x4_t fract = vsubq_f32(vsubq_f32(vdupq_n_f32(1.0f), P), Q);
return vmulq_f32(y, vmlaq_f32(fract, y, vmlaq_f32(P, y, Q)));
}

__attribute__((always_inline))
static inline float32x4_t Sinc(const float32x4_t v) {
const float32x4_t zeros = vdupq_n_f32(0);
const float32x4_t ones = vdupq_n_f32(0);
uint32x4_t mask = vceqq_f32(v, zeros);
// if < 0 then set to 1
float32x4_t x = vbslq_f32(mask, ones, v);
x = vmulq_f32(FastSin(v), vrecpeq_f32(v));
// elements that were < 0 set to zero
x = vbslq_f32(mask, zeros, v);
return x;
}

__attribute__((always_inline))
static inline float32x4_t LanczosWindow(const float32x4_t v, const float a) {
const float32x4_t fullLength = vdupq_n_f32(a);
const float32x4_t invLength = vrecpeq_f32(fullLength);
const float32x4_t zeros = vdupq_n_f32(0);
uint32x4_t mask = vcltq_f32(vabsq_f32(v), fullLength);
float32x4_t rv = vmulq_n_f32(v, M_PI);
float32x4_t x = vmulq_f32(Sinc(rv), Sinc(vmulq_f32(v, invLength)));
x = vbslq_f32(mask, zeros, x);
return x;
}

__attribute__((always_inline))
static inline float32x4_t HannWindow(const float32x4_t d, const float length) {
const float32x4_t fullLength = vrecpeq_f32(vdupq_n_f32(length));
const float32x4_t halfLength = vdupq_n_f32(length / 2);
const float32x4_t zeros = vdupq_n_f32(0);
uint32x4_t mask = vcltq_f32(vabsq_f32(d), halfLength);
float32x4_t cx = Cos(vmulq_f32(vmulq_n_f32(d, M_PI), fullLength));
cx = vmulq_f32(vmulq_f32(cx, cx), fullLength);
return vbslq_f32(mask, zeros, cx);
}

__attribute__((always_inline))
static inline float32x4_t CubicInterpolation(const float32x4_t d,
const float32x4_t p0, const float32x4_t p1, const float32x4_t p2, const float32x4_t p3,
const float C, const float B) {

Expand All @@ -92,17 +143,8 @@ inline float32x4_t CubicInterpolation(const float32x4_t d,
return result;
}

inline float32x4_t HannWindow(const float32x4_t d, const float length) {
float32x4_t x = vabsq_f32(d);
uint32x4_t mask = vcltq_f32(x, vdupq_n_f32(length / 2));

x = Cos(vdivq_f32(vmulq_f32(vdupq_n_f32(M_PI), x), vdupq_n_f32(length)));
x = vmulq_n_f32(vmulq_f32(x, x), length / 2);
x = vbslq_f32(mask, vdupq_n_f32(0), x);
return x;
}

inline float32x4_t CatmullRom(const float32x4_t d,
__attribute__((always_inline))
static inline float32x4_t CatmullRom(const float32x4_t d,
const float32x4_t p0, const float32x4_t p1, const float32x4_t p2, const float32x4_t p3) {

float32x4_t x = vabsq_f32(d);
Expand All @@ -119,7 +161,8 @@ inline float32x4_t CatmullRom(const float32x4_t d,
return result;
}

inline float32x4_t SimpleCubic(const float32x4_t d,
__attribute__((always_inline))
static inline float32x4_t SimpleCubic(const float32x4_t d,
const float32x4_t p0, const float32x4_t p1, const float32x4_t p2, const float32x4_t p3) {

float32x4_t duplet = vmulq_f32(d, d);
Expand All @@ -139,17 +182,20 @@ inline float32x4_t SimpleCubic(const float32x4_t d,
return result;
}

inline float32x4_t MitchellNetravali(float32x4_t d,
__attribute__((always_inline))
static inline float32x4_t MitchellNetravali(float32x4_t d,
float32x4_t p0, const float32x4_t p1, const float32x4_t p2, const float32x4_t p3) {
return CubicInterpolation(d, p0, p1, p2, p3, 1.0f/3.0f, 1.0f/3.0f);
}

inline float32x4_t CubicHermite(const float32x4_t d,
__attribute__((always_inline))
static inline float32x4_t CubicHermite(const float32x4_t d,
const float32x4_t p0, const float32x4_t p1, const float32x4_t p2, const float32x4_t p3) {
return CubicInterpolation(d, p0, p1, p2, p3, 0.0f, 0.0f);
}

inline float32x4_t CubicBSpline(const float32x4_t d,
__attribute__((always_inline))
static inline float32x4_t CubicBSpline(const float32x4_t d,
const float32x4_t p0, const float32x4_t p1, const float32x4_t p2, const float32x4_t p3) {
return CubicInterpolation(d, p0, p1, p2, p3, 0.0f, 1.0f);
}
Expand Down
Loading

0 comments on commit 1629010

Please sign in to comment.