diff --git a/jxl/src/color/tf.rs b/jxl/src/color/tf.rs index e158c92..7bc6a9c 100644 --- a/jxl/src/color/tf.rs +++ b/jxl/src/color/tf.rs @@ -46,6 +46,10 @@ const PQ_INV_EOTF_P_SMALL: [f32; 5] = [ const PQ_INV_EOTF_Q_SMALL: [f32; 5] = [3.371868e1, 1.477719e3, 1.608477e4, -4.389884e4, -2.072546e5]; +const HLG_A: f64 = 0.17883277; +const HLG_B: f64 = 1.0 - 4.0 * HLG_A; +const HLG_C: f64 = 0.5599107295; + /// Converts the linear samples with the sRGB transfer curve. // Fast linear to sRGB conversion, ported from libjxl. Max error ~1.7e-4 pub fn linear_to_srgb_fast(samples: &mut [f32]) { @@ -242,6 +246,144 @@ pub fn pq_to_linear(intensity_target: f32, samples: &mut [f32]) { } } +fn hlg_ootf_inner_precise(exp: f64, [lr, lg, lb]: [f32; 3], [sr, sg, sb]: [&mut [f32]; 3]) { + if exp.abs() < 0.1 { + return; + } + + let lr = lr as f64; + let lg = lg as f64; + let lb = lb as f64; + for ((r, g), b) in std::iter::zip(sr, sg).zip(sb) { + let dr = *r as f64; + let dg = *g as f64; + let db = *b as f64; + let mixed = dr.mul_add(lr, dg.mul_add(lg, db * lb)); + let mult = mixed.powf(exp); + *r = (dr * mult) as f32; + *g = (dg * mult) as f32; + *b = (db * mult) as f32; + } +} + +fn hlg_ootf_inner(exp: f32, [lr, lg, lb]: [f32; 3], [sr, sg, sb]: [&mut [f32]; 3]) { + if exp.abs() < 0.1 { + return; + } + + for ((r, g), b) in std::iter::zip(sr, sg).zip(sb) { + let mixed = r.mul_add(lr, g.mul_add(lg, *b * lb)); + let mult = crate::util::fast_powf(mixed, exp); + *r *= mult; + *g *= mult; + *b *= mult; + } +} + +/// Converts scene-referred linear samples to display-referred linear samples using HLG OOTF. +/// +/// This version uses double precision arithmetic internally. +pub fn hlg_scene_to_display_precise( + intensity_display: f32, + luminance_rgb: [f32; 3], + samples_rgb: [&mut [f32]; 3], +) { + let system_gamma = 1.2f64 * 1.111f64.powf((intensity_display as f64 / 1e3).log2()); + let gamma_sub_one = system_gamma - 1.0; + hlg_ootf_inner_precise(gamma_sub_one, luminance_rgb, samples_rgb); +} + +/// Converts display-referred linear samples to scene-referred linear samples using HLG inverse +/// OOTF. +/// +/// This version uses double precision arithmetic internally. +pub fn hlg_display_to_scene_precise( + intensity_display: f32, + luminance_rgb: [f32; 3], + samples_rgb: [&mut [f32]; 3], +) { + let system_gamma = 1.2f64 * 1.111f64.powf((intensity_display as f64 / 1e3).log2()); + let one_sub_gamma = 1.0 - system_gamma; + hlg_ootf_inner_precise(one_sub_gamma / system_gamma, luminance_rgb, samples_rgb); +} + +/// Converts scene-referred linear samples to display-referred linear samples using HLG OOTF. +/// +/// This version uses `fast_powf` to compute power function. +pub fn hlg_scene_to_display( + intensity_display: f32, + luminance_rgb: [f32; 3], + samples_rgb: [&mut [f32]; 3], +) { + let system_gamma = 1.2f32 * 1.111f32.powf((intensity_display / 1e3).log2()); + let gamma_sub_one = system_gamma - 1.0; + hlg_ootf_inner(gamma_sub_one, luminance_rgb, samples_rgb); +} + +/// Converts display-referred linear samples to scene-referred linear samples using HLG inverse +/// OOTF. +/// +/// This version uses `fast_powf` to compute power function. +pub fn hlg_display_to_scene( + intensity_display: f32, + luminance_rgb: [f32; 3], + samples_rgb: [&mut [f32]; 3], +) { + let system_gamma = 1.2f32 * 1.111f32.powf((intensity_display / 1e3).log2()); + let one_sub_gamma = 1.0 - system_gamma; + hlg_ootf_inner(one_sub_gamma / system_gamma, luminance_rgb, samples_rgb); +} + +/// Converts scene-referred linear sample to HLG signal. +/// +/// This version uses double precision arithmetic internally. +pub fn scene_to_hlg_precise(samples: &mut [f32]) { + for s in samples { + let a = s.abs() as f64; + let y = if a <= 1.0 / 12.0 { + (3.0 * a).sqrt() + } else { + // TODO(tirr-c): maybe use mul_add? + HLG_A * (12.0 * a - HLG_B).ln() + HLG_C + }; + *s = (y as f32).copysign(*s); + } +} + +/// Converts HLG signal to scene-referred linear sample. +/// +/// This version uses double precision arithmetic internally. +pub fn hlg_to_scene_precise(samples: &mut [f32]) { + for s in samples { + let a = s.abs() as f64; + let y = if a <= 0.5 { + a * a * 3.0 + } else { + (((a - HLG_C) / HLG_A).exp() + HLG_B) / 12.0 + }; + *s = (y as f32).copysign(*s); + } +} + +/// Converts scene-referred linear sample to HLG signal. +/// +/// This version uses `fast_log2f` to apply logarithmic function. +// Max error: ~5e-7 +pub fn scene_to_hlg(samples: &mut [f32]) { + for s in samples { + let a = s.abs(); + let y = if a <= 1.0 / 12.0 { + (3.0 * a).sqrt() + } else { + // TODO(tirr-c): maybe use mul_add? + let log = crate::util::fast_log2f(12.0 * a - HLG_B as f32); + // log2 x = ln x / ln 2, therefore ln x = (ln 2)(log2 x) + (HLG_A * std::f64::consts::LN_2) as f32 * log + HLG_C as f32 + }; + *s = y.copysign(*s); + } +} + #[cfg(test)] mod test { use test_log::test; @@ -270,7 +412,7 @@ mod test { #[test] fn srgb_roundtrip_arb() { arbtest::arbtest(|u| { - let samples: Vec = arb_samples(u)?; + let samples = arb_samples(u)?; let mut output = samples.clone(); linear_to_srgb(&mut output); @@ -283,7 +425,7 @@ mod test { #[test] fn bt709_roundtrip_arb() { arbtest::arbtest(|u| { - let samples: Vec = arb_samples(u)?; + let samples = arb_samples(u)?; let mut output = samples.clone(); linear_to_bt709(&mut output); @@ -296,7 +438,7 @@ mod test { #[test] fn linear_to_srgb_fast_arb() { arbtest::arbtest(|u| { - let mut samples: Vec = arb_samples(u)?; + let mut samples = arb_samples(u)?; let mut fast = samples.clone(); linear_to_srgb(&mut samples); @@ -310,7 +452,7 @@ mod test { fn linear_to_pq_arb() { arbtest::arbtest(|u| { let intensity_target = u.int_in_range(9900..=10100)? as f32; - let mut samples: Vec = arb_samples(u)?; + let mut samples = arb_samples(u)?; let mut precise = samples.clone(); linear_to_pq(intensity_target, &mut samples); @@ -325,7 +467,7 @@ mod test { fn pq_to_linear_arb() { arbtest::arbtest(|u| { let intensity_target = u.int_in_range(9900..=10100)? as f32; - let mut samples: Vec = arb_samples(u)?; + let mut samples = arb_samples(u)?; let mut precise = samples.clone(); pq_to_linear(intensity_target, &mut samples); @@ -334,4 +476,87 @@ mod test { Ok(()) }); } + + #[test] + fn hlg_ootf_arb() { + arbtest::arbtest(|u| { + let intensity_target = u.int_in_range(900..=1100)? as f32; + + let lr = 0.2 + u.int_in_range(0..=255)? as f32 / 255.0; + let lb = 0.2 + u.int_in_range(0..=255)? as f32 / 255.0; + let lg = 1.0 - lr - lb; + let luminance_rgb = [lr, lg, lb]; + + let r = u.int_in_range(0u32..=(1 << 24))? as f32 / (1 << 24) as f32; + let g = u.int_in_range(0u32..=(1 << 24))? as f32 / (1 << 24) as f32; + let b = u.int_in_range(0u32..=(1 << 24))? as f32 / (1 << 24) as f32; + + let mut fast_r = r; + let mut fast_g = g; + let mut fast_b = b; + let fast = [ + std::slice::from_mut(&mut fast_r), + std::slice::from_mut(&mut fast_g), + std::slice::from_mut(&mut fast_b), + ]; + hlg_display_to_scene(intensity_target, luminance_rgb, fast); + + let mut precise_r = r; + let mut precise_g = g; + let mut precise_b = b; + let precise = [ + std::slice::from_mut(&mut precise_r), + std::slice::from_mut(&mut precise_g), + std::slice::from_mut(&mut precise_b), + ]; + hlg_display_to_scene(intensity_target, luminance_rgb, precise); + + assert_all_almost_eq!( + &[fast_r, fast_g, fast_b], + &[precise_r, precise_g, precise_b], + 7.2e-7 + ); + + let mut fast_r = r; + let mut fast_g = g; + let mut fast_b = b; + let fast = [ + std::slice::from_mut(&mut fast_r), + std::slice::from_mut(&mut fast_g), + std::slice::from_mut(&mut fast_b), + ]; + hlg_scene_to_display(intensity_target, luminance_rgb, fast); + + let mut precise_r = r; + let mut precise_g = g; + let mut precise_b = b; + let precise = [ + std::slice::from_mut(&mut precise_r), + std::slice::from_mut(&mut precise_g), + std::slice::from_mut(&mut precise_b), + ]; + hlg_scene_to_display(intensity_target, luminance_rgb, precise); + + assert_all_almost_eq!( + &[fast_r, fast_g, fast_b], + &[precise_r, precise_g, precise_b], + 7.2e-7 + ); + + Ok(()) + }); + } + + #[test] + fn scene_to_hlg_arb() { + arbtest::arbtest(|u| { + let mut samples = arb_samples(u)?; + let mut precise = samples.clone(); + + scene_to_hlg(&mut samples); + scene_to_hlg_precise(&mut precise); + assert_all_almost_eq!(&samples, &precise, 5e-7); + Ok(()) + }); + } } diff --git a/jxl/src/util/fast_math.rs b/jxl/src/util/fast_math.rs index f964b96..1ea5751 100644 --- a/jxl/src/util/fast_math.rs +++ b/jxl/src/util/fast_math.rs @@ -11,7 +11,7 @@ const POW2F_NUMER_COEFFS: [f32; 3] = [1.01749063e1, 4.88687798e1, 9.85506591e1]; const POW2F_DENOM_COEFFS: [f32; 4] = [2.10242958e-1, -2.22328856e-2, -1.94414990e1, 9.85506633e1]; #[inline] -fn fast_pow2f(x: f32) -> f32 { +pub fn fast_pow2f(x: f32) -> f32 { let x_floor = x.floor(); let exp = f32::from_bits(((x_floor as i32 + 127) as u32) << 23); let frac = x - x_floor; @@ -40,11 +40,11 @@ const LOG2F_Q: [f32; 3] = [ ]; #[inline] -fn fast_log2f(x: f32) -> f32 { +pub fn fast_log2f(x: f32) -> f32 { let x_bits = x.to_bits() as i32; - let exp_bits = x_bits - 0x3f2aaaab; + let exp_bits = x_bits.wrapping_sub(0x3f2aaaab); let exp_shifted = exp_bits >> 23; - let mantissa = f32::from_bits((x_bits - (exp_shifted << 23)) as u32); + let mantissa = f32::from_bits((x_bits.wrapping_sub(exp_shifted << 23)) as u32); let exp_val = exp_shifted as f32; let x = mantissa - 1.0;