Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement HLG transfer functions #86

Merged
merged 1 commit into from
Jan 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
301 changes: 263 additions & 38 deletions jxl/src/color/tf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,39 +13,6 @@ const SRGB_POWTABLE_LOWER: [u8; 16] = [
0x00, 0xb7, 0x04, 0x0d, 0xcb, 0xe7, 0x41, 0x68, 0x51, 0xd1, 0xeb, 0xf2, 0x00, 0xb7, 0x04, 0x0d,
];

const PQ_M1: f64 = 2610.0 / 16384.0;
const PQ_M2: f64 = (2523.0 / 4096.0) * 128.0;
const PQ_C1: f64 = 3424.0 / 4096.0;
const PQ_C2: f64 = (2413.0 / 4096.0) * 32.0;
const PQ_C3: f64 = (2392.0 / 4096.0) * 32.0;

const PQ_EOTF_P: [f32; 5] = [
2.6297566e-4,
-6.235531e-3,
7.386023e-1,
2.6455317,
5.500349e-1,
];
const PQ_EOTF_Q: [f32; 5] = [
4.213501e2,
-4.2873682e2,
1.7436467e2,
-3.3907887e1,
2.6771877,
];

const PQ_INV_EOTF_P: [f32; 5] = [1.351392e-2, -1.095778, 5.522776e1, 1.492516e2, 4.838434e1];
const PQ_INV_EOTF_Q: [f32; 5] = [1.012416, 2.016708e1, 9.26371e1, 1.120607e2, 2.590418e1];
const PQ_INV_EOTF_P_SMALL: [f32; 5] = [
9.863406e-6,
3.881234e-1,
1.352821e2,
6.889862e4,
-2.864824e5,
];
const PQ_INV_EOTF_Q_SMALL: [f32; 5] =
[3.371868e1, 1.477719e3, 1.608477e4, -4.389884e4, -2.072546e5];

/// Converts the linear samples with the sRGB transfer curve.
// Fast linear to sRGB conversion, ported from libjxl. Max error ~1.7e-4
pub fn linear_to_srgb_fast(samples: &mut [f32]) {
Expand Down Expand Up @@ -160,6 +127,12 @@ pub fn bt709_to_linear(samples: &mut [f32]) {
}
}

const PQ_M1: f64 = 2610.0 / 16384.0;
const PQ_M2: f64 = (2523.0 / 4096.0) * 128.0;
const PQ_C1: f64 = 3424.0 / 4096.0;
const PQ_C2: f64 = (2413.0 / 4096.0) * 32.0;
const PQ_C3: f64 = (2392.0 / 4096.0) * 32.0;

/// Converts linear sample to PQ signal using PQ inverse EOTF, where linear sample value of 1.0
/// represents `intensity_target` display nits.
///
Expand Down Expand Up @@ -202,6 +175,33 @@ pub fn pq_to_linear_precise(intensity_target: f32, samples: &mut [f32]) {
}
}

const PQ_EOTF_P: [f32; 5] = [
2.6297566e-4,
-6.235531e-3,
7.386023e-1,
2.6455317,
5.500349e-1,
];
const PQ_EOTF_Q: [f32; 5] = [
4.213501e2,
-4.2873682e2,
1.7436467e2,
-3.3907887e1,
2.6771877,
];

const PQ_INV_EOTF_P: [f32; 5] = [1.351392e-2, -1.095778, 5.522776e1, 1.492516e2, 4.838434e1];
const PQ_INV_EOTF_Q: [f32; 5] = [1.012416, 2.016708e1, 9.26371e1, 1.120607e2, 2.590418e1];
const PQ_INV_EOTF_P_SMALL: [f32; 5] = [
9.863406e-6,
3.881234e-1,
1.352821e2,
6.889862e4,
-2.864824e5,
];
const PQ_INV_EOTF_Q_SMALL: [f32; 5] =
[3.371868e1, 1.477719e3, 1.608477e4, -4.389884e4, -2.072546e5];

/// Converts linear sample to PQ signal using PQ inverse EOTF, where linear sample value of 1.0
/// represents `intensity_target` display nits.
///
Expand Down Expand Up @@ -242,6 +242,148 @@ pub fn pq_to_linear(intensity_target: f32, samples: &mut [f32]) {
}
}

const HLG_A: f64 = 0.17883277;
const HLG_B: f64 = 1.0 - 4.0 * HLG_A;
const HLG_C: f64 = 0.5599107295;

fn hlg_ootf_inner_precise(exp: f64, [lr, lg, lb]: [f32; 3], [sr, sg, sb]: [&mut [f32]; 3]) {
if exp.abs() < 0.1 {
return;
}

let lr = lr as f64;
let lg = lg as f64;
let lb = lb as f64;
for ((r, g), b) in std::iter::zip(sr, sg).zip(sb) {
let dr = *r as f64;
let dg = *g as f64;
let db = *b as f64;
let mixed = dr.mul_add(lr, dg.mul_add(lg, db * lb));
let mult = mixed.powf(exp);
*r = (dr * mult) as f32;
*g = (dg * mult) as f32;
*b = (db * mult) as f32;
}
}

fn hlg_ootf_inner(exp: f32, [lr, lg, lb]: [f32; 3], [sr, sg, sb]: [&mut [f32]; 3]) {
if exp.abs() < 0.1 {
return;
}

for ((r, g), b) in std::iter::zip(sr, sg).zip(sb) {
let mixed = r.mul_add(lr, g.mul_add(lg, *b * lb));
let mult = crate::util::fast_powf(mixed, exp);
*r *= mult;
*g *= mult;
*b *= mult;
}
}

/// Converts scene-referred linear samples to display-referred linear samples using HLG OOTF.
///
/// This version uses double precision arithmetic internally.
pub fn hlg_scene_to_display_precise(
intensity_display: f32,
luminance_rgb: [f32; 3],
samples_rgb: [&mut [f32]; 3],
) {
let system_gamma = 1.2f64 * 1.111f64.powf((intensity_display as f64 / 1e3).log2());
let gamma_sub_one = system_gamma - 1.0;
hlg_ootf_inner_precise(gamma_sub_one, luminance_rgb, samples_rgb);
}

/// Converts display-referred linear samples to scene-referred linear samples using HLG inverse
/// OOTF.
///
/// This version uses double precision arithmetic internally.
pub fn hlg_display_to_scene_precise(
intensity_display: f32,
luminance_rgb: [f32; 3],
samples_rgb: [&mut [f32]; 3],
) {
let system_gamma = 1.2f64 * 1.111f64.powf((intensity_display as f64 / 1e3).log2());
let one_sub_gamma = 1.0 - system_gamma;
hlg_ootf_inner_precise(one_sub_gamma / system_gamma, luminance_rgb, samples_rgb);
}

/// Converts scene-referred linear samples to display-referred linear samples using HLG OOTF.
///
/// This version uses `fast_powf` to compute power function.
pub fn hlg_scene_to_display(
intensity_display: f32,
luminance_rgb: [f32; 3],
samples_rgb: [&mut [f32]; 3],
) {
let system_gamma = 1.2f32 * 1.111f32.powf((intensity_display / 1e3).log2());
let gamma_sub_one = system_gamma - 1.0;
hlg_ootf_inner(gamma_sub_one, luminance_rgb, samples_rgb);
}

/// Converts display-referred linear samples to scene-referred linear samples using HLG inverse
/// OOTF.
///
/// This version uses `fast_powf` to compute power function.
pub fn hlg_display_to_scene(
intensity_display: f32,
luminance_rgb: [f32; 3],
samples_rgb: [&mut [f32]; 3],
) {
let system_gamma = 1.2f32 * 1.111f32.powf((intensity_display / 1e3).log2());
let one_sub_gamma = 1.0 - system_gamma;
hlg_ootf_inner(one_sub_gamma / system_gamma, luminance_rgb, samples_rgb);
}

/// Converts scene-referred linear sample to HLG signal.
///
/// This version uses double precision arithmetic internally.
pub fn scene_to_hlg_precise(samples: &mut [f32]) {
for s in samples {
let a = s.abs() as f64;
let y = if a <= 1.0 / 12.0 {
(3.0 * a).sqrt()
} else {
// TODO(tirr-c): maybe use mul_add?
HLG_A * (12.0 * a - HLG_B).ln() + HLG_C
};
*s = (y as f32).copysign(*s);
}
}

/// Converts HLG signal to scene-referred linear sample.
///
/// This version uses double precision arithmetic internally.
pub fn hlg_to_scene_precise(samples: &mut [f32]) {
for s in samples {
let a = s.abs() as f64;
let y = if a <= 0.5 {
a * a * 3.0
} else {
(((a - HLG_C) / HLG_A).exp() + HLG_B) / 12.0
};
*s = (y as f32).copysign(*s);
}
}

/// Converts scene-referred linear sample to HLG signal.
///
/// This version uses `fast_log2f` to apply logarithmic function.
// Max error: ~5e-7
pub fn scene_to_hlg(samples: &mut [f32]) {
for s in samples {
let a = s.abs();
let y = if a <= 1.0 / 12.0 {
(3.0 * a).sqrt()
} else {
// TODO(tirr-c): maybe use mul_add?
let log = crate::util::fast_log2f(12.0 * a - HLG_B as f32);
// log2 x = ln x / ln 2, therefore ln x = (ln 2)(log2 x)
(HLG_A * std::f64::consts::LN_2) as f32 * log + HLG_C as f32
};
*s = y.copysign(*s);
}
}

#[cfg(test)]
mod test {
use test_log::test;
Expand Down Expand Up @@ -270,7 +412,7 @@ mod test {
#[test]
fn srgb_roundtrip_arb() {
arbtest::arbtest(|u| {
let samples: Vec<f32> = arb_samples(u)?;
let samples = arb_samples(u)?;
let mut output = samples.clone();

linear_to_srgb(&mut output);
Expand All @@ -283,7 +425,7 @@ mod test {
#[test]
fn bt709_roundtrip_arb() {
arbtest::arbtest(|u| {
let samples: Vec<f32> = arb_samples(u)?;
let samples = arb_samples(u)?;
let mut output = samples.clone();

linear_to_bt709(&mut output);
Expand All @@ -296,7 +438,7 @@ mod test {
#[test]
fn linear_to_srgb_fast_arb() {
arbtest::arbtest(|u| {
let mut samples: Vec<f32> = arb_samples(u)?;
let mut samples = arb_samples(u)?;
let mut fast = samples.clone();

linear_to_srgb(&mut samples);
Expand All @@ -310,7 +452,7 @@ mod test {
fn linear_to_pq_arb() {
arbtest::arbtest(|u| {
let intensity_target = u.int_in_range(9900..=10100)? as f32;
let mut samples: Vec<f32> = arb_samples(u)?;
let mut samples = arb_samples(u)?;
let mut precise = samples.clone();

linear_to_pq(intensity_target, &mut samples);
Expand All @@ -325,7 +467,7 @@ mod test {
fn pq_to_linear_arb() {
arbtest::arbtest(|u| {
let intensity_target = u.int_in_range(9900..=10100)? as f32;
let mut samples: Vec<f32> = arb_samples(u)?;
let mut samples = arb_samples(u)?;
let mut precise = samples.clone();

pq_to_linear(intensity_target, &mut samples);
Expand All @@ -334,4 +476,87 @@ mod test {
Ok(())
});
}

#[test]
fn hlg_ootf_arb() {
arbtest::arbtest(|u| {
let intensity_target = u.int_in_range(900..=1100)? as f32;

let lr = 0.2 + u.int_in_range(0..=255)? as f32 / 255.0;
let lb = 0.2 + u.int_in_range(0..=255)? as f32 / 255.0;
let lg = 1.0 - lr - lb;
let luminance_rgb = [lr, lg, lb];

let r = u.int_in_range(0u32..=(1 << 24))? as f32 / (1 << 24) as f32;
let g = u.int_in_range(0u32..=(1 << 24))? as f32 / (1 << 24) as f32;
let b = u.int_in_range(0u32..=(1 << 24))? as f32 / (1 << 24) as f32;

let mut fast_r = r;
let mut fast_g = g;
let mut fast_b = b;
let fast = [
std::slice::from_mut(&mut fast_r),
std::slice::from_mut(&mut fast_g),
std::slice::from_mut(&mut fast_b),
];
hlg_display_to_scene(intensity_target, luminance_rgb, fast);

let mut precise_r = r;
let mut precise_g = g;
let mut precise_b = b;
let precise = [
std::slice::from_mut(&mut precise_r),
std::slice::from_mut(&mut precise_g),
std::slice::from_mut(&mut precise_b),
];
hlg_display_to_scene(intensity_target, luminance_rgb, precise);

assert_all_almost_eq!(
&[fast_r, fast_g, fast_b],
&[precise_r, precise_g, precise_b],
7.2e-7
);

let mut fast_r = r;
let mut fast_g = g;
let mut fast_b = b;
let fast = [
std::slice::from_mut(&mut fast_r),
std::slice::from_mut(&mut fast_g),
std::slice::from_mut(&mut fast_b),
];
hlg_scene_to_display(intensity_target, luminance_rgb, fast);

let mut precise_r = r;
let mut precise_g = g;
let mut precise_b = b;
let precise = [
std::slice::from_mut(&mut precise_r),
std::slice::from_mut(&mut precise_g),
std::slice::from_mut(&mut precise_b),
];
hlg_scene_to_display(intensity_target, luminance_rgb, precise);

assert_all_almost_eq!(
&[fast_r, fast_g, fast_b],
&[precise_r, precise_g, precise_b],
7.2e-7
);

Ok(())
});
}

#[test]
fn scene_to_hlg_arb() {
arbtest::arbtest(|u| {
let mut samples = arb_samples(u)?;
let mut precise = samples.clone();

scene_to_hlg(&mut samples);
scene_to_hlg_precise(&mut precise);
assert_all_almost_eq!(&samples, &precise, 5e-7);
Ok(())
});
}
}
8 changes: 4 additions & 4 deletions jxl/src/util/fast_math.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ const POW2F_NUMER_COEFFS: [f32; 3] = [1.01749063e1, 4.88687798e1, 9.85506591e1];
const POW2F_DENOM_COEFFS: [f32; 4] = [2.10242958e-1, -2.22328856e-2, -1.94414990e1, 9.85506633e1];

#[inline]
fn fast_pow2f(x: f32) -> f32 {
pub fn fast_pow2f(x: f32) -> f32 {
let x_floor = x.floor();
let exp = f32::from_bits(((x_floor as i32 + 127) as u32) << 23);
let frac = x - x_floor;
Expand Down Expand Up @@ -40,11 +40,11 @@ const LOG2F_Q: [f32; 3] = [
];

#[inline]
fn fast_log2f(x: f32) -> f32 {
pub fn fast_log2f(x: f32) -> f32 {
let x_bits = x.to_bits() as i32;
let exp_bits = x_bits - 0x3f2aaaab;
let exp_bits = x_bits.wrapping_sub(0x3f2aaaab);
let exp_shifted = exp_bits >> 23;
let mantissa = f32::from_bits((x_bits - (exp_shifted << 23)) as u32);
let mantissa = f32::from_bits((x_bits.wrapping_sub(exp_shifted << 23)) as u32);
let exp_val = exp_shifted as f32;

let x = mantissa - 1.0;
Expand Down
Loading