From 31636d1656778eea5701efa2b55c27149af78294 Mon Sep 17 00:00:00 2001 From: Seppo Ingalsuo Date: Fri, 17 Jan 2025 20:01:49 +0200 Subject: [PATCH] Math: Replace exp() function with more accurate and fast version The function sofm_exp_int32() if replaced with new sofm_exp_approx() function. It's not a direct replacement so the name is changed. The input range changes from +/- 5 to +/- 8. The wrapper function sofm_exp_fixed() with input range -16 .. +7.6 is updated to use the new function with a simpler range reduction for large negative input values. All current exponent function usage in SOF components is for the wrapper function that remains compatible. The function sofm_db2lin_fixed() is updated to handle larger negative input decibels values made possible by the new more accurate exponent function. The accuracy in exponent function for input ranges from -8 to +8 or -16 to 7.6 depending on used exponent function improves from absolute error of 1 to 1000 ppm to constant less than 1 ppm. The cycles count for exponent function calculate decreases from about 369 max to 78 max on MTL platform for HiFi code version with -O2 or -O3 optimization. The generic C version on MTL platform is max 88 cycles. The DRC component is a heavy user of the exponent function. The saving in MTL build of DRC is 10.5 MCPS, it drops from 22.17 to 11.70 with profiler run: "scripts/sof-testbench-helper.sh -x -m drc -p profile.txt" The saving in MTL build of multiband DRC is 30.2 MCPS, it drops from 127.95 to 97.78 with profiler run: "scripts/sof-testbench-helper.sh -x -m drc_multiband -p profile.txt" This patch also updates the cmocka test. The test functions are changed due to changed function name, changed achievable accuracy. Also the random values test points vector is replaced fixed a fixed linearly spaced vector for repeatable test and risk for random failures. Signed-off-by: Shriram Shastry Signed-off-by: Seppo Ingalsuo --- src/include/sof/math/exp_fcn.h | 61 +-- src/math/exp_fcn.c | 363 ++++++-------- src/math/exp_fcn_hifi.c | 469 +++++++----------- test/cmocka/src/math/arithmetic/exponential.c | 262 ++++++---- 4 files changed, 527 insertions(+), 628 deletions(-) diff --git a/src/include/sof/math/exp_fcn.h b/src/include/sof/math/exp_fcn.h index 425f400b23de..0aa3dbead623 100644 --- a/src/include/sof/math/exp_fcn.h +++ b/src/include/sof/math/exp_fcn.h @@ -1,8 +1,9 @@ /* SPDX-License-Identifier: BSD-3-Clause * - * Copyright(c) 2022 Intel Corporation. All rights reserved. + * Copyright(c) 2022-2025 Intel Corporation. * * Author: Shriram Shastry + * Seppo Ingalsuo * */ #ifndef __SOFM_EXP_FCN_H__ @@ -26,40 +27,40 @@ #endif -/* TODO: Is there a MCPS difference */ -#define USING_QCONVERT 1 +/* Q5.27 int32(round(log((2^31 - 1)/2^20) * 2^27)) */ +#define SOFM_EXP_FIXED_INPUT_MAX 1023359037 -#if USING_QCONVERT +/* Q8.24 int32(round((log((2^31 - 1)/2^20) * 20 / log(10)) * 2^24)) */ +#define SOFM_DB2LIN_INPUT_MAX 1111097957 -#include -#define SOFM_EXP_FIXED_INPUT_MIN Q_CONVERT_FLOAT(-11.5, 27) /* Q5.27 */ -#define SOFM_EXP_FIXED_INPUT_MAX Q_CONVERT_FLOAT(7.6245, 27) /* Q5.27 */ -#define SOFM_EXP_TWO_Q27 Q_CONVERT_FLOAT(2.0, 27) /* Q5.27 */ -#define SOFM_EXP_MINUS_TWO_Q27 Q_CONVERT_FLOAT(-2.0, 27) /* Q5.27 */ -#define SOFM_EXP_ONE_Q20 Q_CONVERT_FLOAT(1.0, 20) /* Q12.20 */ -#define SOFM_EXP_MINUS_100_Q24 Q_CONVERT_FLOAT(-100.0, 24) /* Q8.24 */ -#define SOFM_EXP_LOG10_DIV20_Q27 Q_CONVERT_FLOAT(0.1151292546, 27) /* Q5.27 */ - -#else - -#define SOFM_EXP_FIXED_INPUT_MIN -1543503872 /* Q_CONVERT_FLOAT(-11.5, 27) */ -#define SOFM_EXP_FIXED_INPUT_MAX 1023343067 /* Q_CONVERT_FLOAT(7.6245, 27) */ -#define SOFM_EXP_TWO_Q27 268435456 /* Q_CONVERT_FLOAT(2.0, 27) */ -#define SOFM_EXP_MINUS_TWO_Q27 -268435456 /* Q_CONVERT_FLOAT(-2.0, 27) */ -#define SOFM_EXP_ONE_Q20 1048576 /* Q_CONVERT_FLOAT(1.0, 20) */ -#define SOFM_EXP_MINUS_100_Q24 -1677721600 /* Q_CONVERT_FLOAT(-100.0, 24) */ -#define SOFM_EXP_LOG10_DIV20_Q27 15452387 /* Q_CONVERT_FLOAT(0.1151292546, 27) */ +/** + * Calculates exponent function exp(x) = e^x with accurate and efficient technique that + * includes range reduction operations, approximation with Taylor series, and reconstruction + * operations to compensate the range reductions. + * @param x The input argument as Q4.28 from -8 to +8 + * @return The calculated e^x value as Q13.19 from 3.3546e-04 to 2981.0 + */ +int32_t sofm_exp_approx(int32_t x); -#endif +/** + * Calculated exponent function exp(x) = e^x by using sofm_exp_approx(). The input range for + * large arguments is internally reduced to -8 to +8 with rule exp(x) = exp(x/2) * exp(x/2) + * and reconstructed back. This function is essentially a wrapper for compatibility with + * existing usage of exp() function and Q-format choice. Note that the return value saturates + * to INT32_MAX with input arguments larger than 7.6246. + * @param x The input argument as Q5.27 from -16 to +16 + * @return The calculated e^x value as Q12.20 + */ +int32_t sofm_exp_fixed(int32_t x); -#define SOFM_EXP_BIT_MASK_LOW_Q27P5 0x0000000008000000 -#define SOFM_EXP_BIT_MASK_Q62P2 0x4000000000000000LL -#define SOFM_EXP_QUOTIENT_SCALE 0x40000000 -#define SOFM_EXP_TERMS_Q23P9 0x800000 -#define SOFM_EXP_LSHIFT_BITS 0x2000 -int32_t sofm_exp_int32(int32_t x); -int32_t sofm_exp_fixed(int32_t x); +/** + * Converts a decibels value to liner amplitude lin = 10^(db/20) value with optimized + * equation exp(db * log(10)/20). Note that due to range limitation of sofm_exp_fixed() + * the output saturates to maximum with about +66 dB input. + * @param db Decibels value in Q8.24 format, from -128 to +66.226 + * @return Linear value in Q12.20 format, from 3.9811e-07 to 2048 + */ int32_t sofm_db2lin_fixed(int32_t db); #endif /* __SOFM_EXP_FCN_H__ */ diff --git a/src/math/exp_fcn.c b/src/math/exp_fcn.c index 6ada84202212..9c194b80de86 100644 --- a/src/math/exp_fcn.c +++ b/src/math/exp_fcn.c @@ -1,228 +1,170 @@ // SPDX-License-Identifier: BSD-3-Clause /* - * Copyright(c) 2022 Intel Corporation. All rights reserved. + * Copyright(c) 2022-2025 Intel Corporation. * * Author: Shriram Shastry + * Seppo Ingalsuo * */ #include #include #include -#include -#include #include -#include +#include #include -#include -#include #if defined(EXPONENTIAL_GENERIC) -#define SOFM_CONVERG_ERROR 28823037607936LL // error smaller than 1e-4,1/2 ^ -44.7122876200884 +#define SOFM_EXP_ONE_OVER_LOG2_Q30 1549082005 /* Q2.30 int32(round(1/log(2) * 2^30)) */ +#define SOFM_EXP_LOG2_Q31 1488522236 /* Q1.31 int32(round(log(2) * 2^31)) */ +#define SOFM_EXP_FIXED_INPUT_MINUS8 -1073741824 /* Q5.27 int32(-8 * 2^27) */ +#define SOFM_EXP_FIXED_INPUT_PLUS8 1073741823 /* Q5.27 int32(8 * 2^27) */ +#define SOFM_EXP_LOG10_DIV20_Q27 15452387 /* Q5.27 int32(round(log(10)/20*2^27)) */ -/* inv multiplication lookup table */ -/* LUT = ceil(1/factorial(b_n) * 2 ^ 63) */ -static const int64_t exp_iv_ilookup[19] = { - 4611686018427387904LL, - 1537228672809129301LL, - 384307168202282325LL, - 76861433640456465LL, - 12810238940076077LL, - 1830034134296582LL, - 228754266787072LL, - 25417140754119LL, - 2541714075411LL, - 231064915946LL, - 19255409662LL, - 1481185358LL, - 105798954LL, - 7053264LL, - 440829LL, - 25931LL, - 1441LL, - 76LL, - 4LL -}; - -/* Function Declarations */ -/* - * Arguments : int64_t in_0 - * int64_t in_1 - * uint64_t *ptroutbitshi - * uint64_t *ptroutbitslo - * Multiplication of two signed int64 numbers - * Return Type : void - * Description:Perform element-wise multiplication on in_0 and in_1 - * while keeping the required product word length and fractional - * length in mind. mul_s64 function divide the 64-bit quantities - * into two 32-bit words, multiply the low words to produce the - * lowest and second-lowest words in the result, then both pairs - * of low and high words from different numbers to produce the - * second and third lowest words in the result, and finally both - * high words to produce the two highest words in the outcome. - * Add them all up, taking carry into consideration. - * - * The 64 x 64 bit multiplication of operands in_0 and in_1 is - * shown in the image below. The 64-bit operand in_0, in_1 is - * represented by the notation in0_H, in1_H for the top 32 bits - * and in0_L, in1_L for the bottom 32 bits. - * - * in0_H : in0_L - * x in1_H : in1_L - * --------------------- - * P0 in0_L x in1_L - * P1 in0_H x in1_L 64 bit inner multiplication - * P2 in0_L x in1_H 64 bit inner multiplication - * P3 in0_H x in1_H - * -------------------- - * [64 x 64 bit multiplication] sum of inner products - * All combinations are multiplied by one another and then added. - * Each inner product is moved into its proper power location, given the names - * of the inner products, redoing the addition where 000 represents 32 zero - * bits. The inner products can be added together in 64 bit addition. The sum - * of two 64-bit numbers yields a 65-bit output. - * (P0H:P0L) - * P1H(P1L:000) - * P2H(P2L:000) - * P3H:P3L(000:000) - * .......(aaa:P0L) - * By combining P0H:P0L and P1L:000. This can lead to a carry, denote as CRY0. - * The partial result is then multiplied by P2L:000. - * We call it CRY1 because it has the potential to carry again. - * (CRY0 + CRY1)P0H:P0L - * ( P1H)P1L:000 - * ( P2H)P2L:000 - * (P3H: P3L)000:000 - * -------------------- - * (ccc:bbb)aaa:P0L - * P1H, P2H, and P3H:P3L are added to the carry CRY0 + CRY1. This increase will - * not result in an overflow. +/* The table contains exponents of value v, where values of v + * are the 3 bit 2's complement signed values presented by bits + * of index 0..7. * + * v = [(0:3)/8 (-4:-1)/8]; + * uint32(round(exp(v) * 2^31)) */ -static inline void mul_s64(int64_t in_0, int64_t in_1, uint64_t *ptroutbitshi, - uint64_t *ptroutbitslo) -{ - uint64_t absin0 = ABS(in_0); - uint64_t absin1 = ABS(in_1); - uint64_t in0hi = absin0 >> 32; - uint64_t in0lo = absin0 & UINT32_MAX; - uint64_t in1hi = absin1 >> 32; - uint64_t prodhilo; - uint64_t prodlohi; - - absin0 = absin1 & UINT32_MAX; - /* multiplication */ - prodhilo = in0hi * absin0; - prodlohi = in0lo * in1hi; - absin0 *= in0lo; - - in0lo = absin0 + (prodlohi << 32); - absin1 = in0lo < absin0 ? 1 : 0; - absin0 = in0lo; - /* shift and add */ - in0lo += prodhilo << 32; - if (in0lo < absin0) - absin1++; - /* carry */ - absin0 = absin1 + in0hi * in1hi + (prodlohi >> 32) + (prodhilo >> 32); - /* 2's complement */ - if (in_0 && in_1 && (in_0 > 0) != (in_1 > 0)) { - /* bit inversion */ - absin0 = ~absin0; - in0lo = ~in0lo; - /* add to low byte */ - in0lo++; - if (!in0lo) - absin0++; - } - /* pointer- output high and low bytes */ - *ptroutbitshi = absin0; - *ptroutbitslo = in0lo; -} +static const uint32_t sofm_exp_3bit_lookup[8] = { + 2147483648, 2433417774, 2757423586, 3124570271, + 1302514674, 1475942488, 1672461947, 1895147668 +}; -/* - * Arguments : int64_t a - * int64_t b - * Return Type : int64_t +/* Taylor polynomial coefficients for x^3..x^6, calculated + * uint32(round(1 ./ factorial(3:6) * 2^32)) */ -static inline int64_t lomul_s64_sr_sat_near(int64_t a, int64_t b) -{ - uint64_t u64_rhi; - uint64_t u64_rlo; - - mul_s64(a, b, &u64_rhi, &u64_rlo); - const bool roundup = (u64_rlo & SOFM_EXP_BIT_MASK_LOW_Q27P5) != 0; - - u64_rlo = (u64_rhi << 36 | u64_rlo >> 28) + (roundup ? 1 : 0); - return u64_rlo; -} +static const uint32_t sofm_exp_taylor_coeffs[4] = { + 715827883, 178956971, 35791394, 5965232 +}; -/* function f(x) = a^x, x is variable and a is base +/* function f(x) = e^x * - * Arguments : int32_t x(Q4.28) - * input range -5 to 5 + * Arguments : int32_t x (Q4.28) + * input range -8 to 8 * - * Return Type : int32_t ts(Q9.23) - * output range 0.0067465305 to 148.4131488800 - *+------------------+-----------------+--------+--------+ - *| x | ts (returntype) | x | ts | - *+----+-----+-------+----+----+-------+--------+--------+ - *|WLen| FLen|Signbit|WLen|FLen|Signbit| Qformat| Qformat| - *+----+-----+-------+----+----+-------+--------+--------+ - *| 32 | 28 | 1 | 32 | 23 | 0 | 4.28 | 9.23 | - *+------------------+-----------------+--------+--------+ + * Return Type : int32_t (Q13.19) + * output range 3.3546e-04 to 2981.0 */ -int32_t sofm_exp_int32(int32_t x) +int32_t sofm_exp_approx(int32_t x) { - uint64_t ou0Hi; - uint64_t ou0Lo; - int64_t qt; - int32_t b_n; - int32_t ts = SOFM_EXP_TERMS_Q23P9; /* Q23.9 */ - int64_t dividend = (x + SOFM_EXP_LSHIFT_BITS) >> 14; /* x in Q50.14 */ - static const int32_t i_emin = -1342177280; /* Q4.28 */ - static const int32_t o_emin = 56601; /* Q9.23 */ - static const int32_t i_emax = 1342177280; /* Q4.28 */ - static const int32_t o_emax = 1244979733; /* Q9.23 */ + uint32_t taylor_first_2; + uint32_t exp_a_b_32bit; + uint32_t taylor_extra; + uint32_t rnd_one; + uint32_t b_f32; + uint32_t b_pow; + uint32_t exp_a; + uint32_t exp_b; + uint32_t term; + uint32_t b; + int32_t x_times_one_over_log2; + int32_t e_times_log2; + int32_t x_32bit; + int32_t y_32bit; + int32_t e; + int32_t r; + int shift_value; + int a; + + /* ------------------------------------------------------------------------- + * FIRST RANGE REDUCTION --------------------------------------------------- + * ------------------------------------------------------------------------- + */ + x_times_one_over_log2 = Q_MULTSR_32X32((int64_t)x, SOFM_EXP_ONE_OVER_LOG2_Q30, 28, 30, 26); + e = Q_SHIFT_RND(x_times_one_over_log2, 26, 0); - /* Out of range input(x>5, x<-5), */ - /* return clipped value x > 5= e^5, and x<-5 = e^-5 */ - if (x < i_emin) - return o_emin; /* 0.0067473649978638 in Q9.23 */ + /* Q6.31, but we only keep the bottom 31 bits */ + e_times_log2 = (uint32_t)e * SOFM_EXP_LOG2_Q31; - if (x > i_emax) - return o_emax; /* 148.4131494760513306 in Q9.23 */ + /* ------------------------------------------------------------------------- + * SECOND RANGE REDUCTION -------------------------------------------------- + * y = a + b + * ------------------------------------------------------------------------- + */ + x_32bit = (int32_t)((uint32_t)x << 3); /* S4.31 */ + y_32bit = x_32bit - e_times_log2; /* S0.31, in ~[-0.34, +0.34] */ + a = (y_32bit >> 28) & 7; /* just the 3 top bits of "y" */ + b = y_32bit & 0x0FFFFFFF; /* bottom 31-3 = 28 bits. format U-3.31 */ + exp_a = sofm_exp_3bit_lookup[a]; + b_f32 = (b << 1) | 0x4; /* U0.32, align b on 32-bits of fraction */ + + /* Taylor approximation : base part + iterations + * Base part : 1 + b + b^2/2! + * Iterative part : b^3/3! + b^4/4! + b^5/5! + b^6/6! + * : Term count determined dynamically using e. + * + * Base part: NOTE: delay adding the "1" in "1 + b + b^2/2" until after we + * add the iterative part in. This gives us one more guard bit. + * NOTE: u_int32 x u_int32 => {hi, lo}. We only need {hi} for b_pow. + */ + b_pow = (uint64_t)b_f32 * b_f32 >> 32; + taylor_first_2 = b_f32 + (b_pow >> 1); /* 0.32 */ + taylor_extra = 0; + term = 1; + if (e < -10) + goto ITER_END; + + b_pow = (uint64_t)b_f32 * b_pow >> 32; + term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[0] >> 32; + taylor_extra += term; + if (e < -5) + goto ITER_END; + + b_pow = (uint64_t)b_f32 * b_pow >> 32; + term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[1] >> 32; + taylor_extra += term; + if (e < 0) + goto ITER_END; + + b_pow = (uint64_t)b_f32 * b_pow >> 32; + term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[2] >> 32; + taylor_extra += term; + if (e < 6) + goto ITER_END; + + b_pow = (uint64_t)b_f32 * b_pow >> 32; + term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[3] >> 32; + taylor_extra += term; + +ITER_END: + + /* Implement rounding to 31 fractional bits.. */ + taylor_first_2 = taylor_first_2 + taylor_extra + 1; + + /* Add the missing "1" for the Taylor series "1+b+b^2/2+...." */ + exp_b = ((uint32_t)1 << 31) + (taylor_first_2 >> 1); /* U1.31 */ + + /* ------------------------------------------------------------------------- + * FIRST RECONSTRUCTION ---------------------------------------------------- + * ------------------------------------------------------------------------- + */ - /* pre-computation of 1st & 2nd terms */ - mul_s64(dividend, SOFM_EXP_BIT_MASK_Q62P2, &ou0Hi, &ou0Lo); - qt = (ou0Hi << 46) | (ou0Lo >> 18);/* Q6.26 */ - ts += (int32_t)((qt >> 35) + ((qt & SOFM_EXP_QUOTIENT_SCALE) >> 18)); - dividend = lomul_s64_sr_sat_near(dividend, x); - for (b_n = 0; b_n < ARRAY_SIZE(exp_iv_ilookup); b_n++) { - mul_s64(dividend, exp_iv_ilookup[b_n], &ou0Hi, &ou0Lo); - qt = (ou0Hi << 45) | (ou0Lo >> 19); + /* U1.31 * U1.31 = U2.62 */ + exp_a_b_32bit = (uint64_t)exp_a * exp_b >> 31; - /* sum of the remaining terms */ - ts += (int32_t)((qt >> 35) + ((qt & SOFM_EXP_QUOTIENT_SCALE) ? 1 : 0)); - dividend = lomul_s64_sr_sat_near(dividend, x); + /* ------------------------------------------------------------------------- + * SECOND RECONSTRUCTION --------------------------------------------------- + * ------------------------------------------------------------------------- + */ - qt = ABS(qt); - /* For inputs between -5 and 5, (qt < SOFM_CONVERG_ERROR) is always true */ - if (qt < SOFM_CONVERG_ERROR) - break; - } - return ts; + /* Rounding to nearest */ + shift_value = 12 - e; + rnd_one = (shift_value > 0 ? (1 << (shift_value - 1)) : 0); + exp_a_b_32bit += rnd_one; + r = (int32_t)(exp_a_b_32bit >> shift_value); + return r; } +EXPORT_SYMBOL(sofm_exp_approx); -/* Fixed point exponent function for approximate range -11.5 .. 7.6 - * that corresponds to decibels range -100 .. +66 dB. +/* Fixed point exponent function for approximate range -16 .. 7.6 + * that corresponds to decibels range -120 .. +66 dB. * * The functions uses rule exp(x) = exp(x/2) * exp(x/2) to reduce - * the input argument for private small value exp() function that is - * accurate with input range -2.0 .. +2.0. The number of possible - * divisions by 2 is computed into variable n. The returned value is - * exp()^(2^n). + * the input argument for the exponent function. * * Input is Q5.27, -16.0 .. +16.0, but note the input range limitation * Output is Q12.20, 0.0 .. +2048.0 @@ -230,39 +172,24 @@ int32_t sofm_exp_int32(int32_t x) int32_t sofm_exp_fixed(int32_t x) { - int32_t xs; - int32_t y; - int32_t y0; - int i; - int n = 0; - - if (x < SOFM_EXP_FIXED_INPUT_MIN) - return 0; + int32_t x0, y0, y1; if (x > SOFM_EXP_FIXED_INPUT_MAX) return INT32_MAX; - /* x is Q5.27 */ - xs = x; - while (xs >= SOFM_EXP_TWO_Q27 || xs <= SOFM_EXP_MINUS_TWO_Q27) { - xs >>= 1; - n++; + if (x < SOFM_EXP_FIXED_INPUT_MINUS8 || x > SOFM_EXP_FIXED_INPUT_PLUS8) { + /* Divide by 2, convert Q27 to Q28 is x as such */ + y0 = sofm_exp_approx(x); + y1 = Q_MULTSR_32X32((int64_t)y0, y0, 19, 19, 20); + return y1; } - /* sofm_exp_int32() input is Q4.28, while x1 is Q5.27 - * sofm_exp_int32() output is Q9.23, while y0 is Q12.20 - */ - y0 = Q_SHIFT_RND(sofm_exp_int32(Q_SHIFT_LEFT(xs, 27, 28)), 23, 20); - y = SOFM_EXP_ONE_Q20; - for (i = 0; i < (1 << n); i++) - y = (int32_t)Q_MULTSR_32X32((int64_t)y, y0, 20, 20, 20); - - return y; + x0 = sat_int32((int64_t)x << 1); + y0 = sofm_exp_approx(x0); + return sat_int32((int64_t)y0 << 1); } EXPORT_SYMBOL(sofm_exp_fixed); -#endif /* EXPONENTIAL_GENERIC */ - /* Decibels to linear conversion: The function uses exp() to calculate * the linear value. The argument is multiplied by log(10)/20 to * calculate equivalent of 10^(db/20). @@ -279,11 +206,13 @@ int32_t sofm_db2lin_fixed(int32_t db) { int32_t arg; - if (db < SOFM_EXP_MINUS_100_Q24) - return 0; + if (db > SOFM_DB2LIN_INPUT_MAX) + return INT32_MAX; /* Q8.24 x Q5.27, result needs to be Q5.27 */ arg = (int32_t)Q_MULTSR_32X32((int64_t)db, SOFM_EXP_LOG10_DIV20_Q27, 24, 27, 27); return sofm_exp_fixed(arg); } EXPORT_SYMBOL(sofm_db2lin_fixed); + +#endif /* EXPONENTIAL_GENERIC */ diff --git a/src/math/exp_fcn_hifi.c b/src/math/exp_fcn_hifi.c index d41071335fd9..e1722d602110 100644 --- a/src/math/exp_fcn_hifi.c +++ b/src/math/exp_fcn_hifi.c @@ -1,20 +1,16 @@ // SPDX-License-Identifier: BSD-3-Clause /* - *Copyright(c) 2023 Intel Corporation. All rights reserved. + *Copyright(c) 2023-2025 Intel Corporation. * * Author: Shriram Shastry - * + * Seppo Ingalsuo */ +#include #include #include #include -#include -#include -#include #include -#include -#include #if defined(SOFM_EXPONENTIAL_HIFI3) || defined(SOFM_EXPONENTIAL_HIFI4) || \ defined(SOFM_EXPONENTIAL_HIFI5) @@ -27,330 +23,213 @@ #include #endif -#include -#include - -#define SOFM_CONVERG_ERROR 28823037624320LL /* error smaller than 1e-4,1/2 ^ -44.7122876209085 */ +#define SOFM_EXP_ONE_OVER_LOG2_Q30 1549082005 /* Q2.30 int32(round(1/log(2) * 2^30)) */ +#define SOFM_EXP_LOG2_Q31 1488522236 /* Q1.31 int32(round(log(2) * 2^31)) */ +#define SOFM_EXP_FIXED_INPUT_MINUS8 -1073741824 /* Q5.27 int32(-8 * 2^27) */ +#define SOFM_EXP_FIXED_INPUT_PLUS8 1073741823 /* Q5.27 int32(8 * 2^27) */ +#define SOFM_EXP_LOG10_DIV20_Q27 15452387 /* Q5.27 int32(round(log(10)/20*2^27)) */ -/* - * Arguments : int64_t in_0 - * int64_t in_1 - * uint64_t *ptroutbitshi - * uint64_t *ptroutbitslo - * Return Type : void - * Description:Perform element-wise multiplication on in_0 and in_1 - * while keeping the required product word length and fractional - * length in mind. mul_s64 function divide the 64-bit quantities - * into two 32-bit words,multiply the low words to produce the - * lowest and second-lowest words in the result, then both pairs - * of low and high words from different numbers to produce the - * second and third lowest words in the result, and finally both - * high words to produce the two highest words in the outcome. - * Add them all up, taking carry into consideration. - * - * The 64 x 64 bit multiplication of operands in_0 and in_1 is - * shown in the image below. The 64-bit operand in_0,in_1 is - * represented by the notation in0_H, in1_H for the top 32 bits - * and in0_L, in1_L for the bottom 32 bits. - * - * in0_H : in0_L - * x in1_H : in1_L - * --------------------- - * P0 in0_L x in1_L - * P1 in0_H x in1_L 64 bit inner multiplication - * P2 in0_L x in1_H 64 bit inner multiplication - * P3 in0_H x in1_H - * -------------------- - * [64 x 64 bit multiplication] sum of inner products - * All combinations are multiplied by one another and then added. - * Each inner product is moved into its proper power location.given the names - * of the inner products, redoing the addition where 000 represents 32 zero - * bits.The inner products can be added together in 64 bit addition.The sum - * of two 64-bit numbers yields a 65-bit output. - * (P0H:P0L) - * P1H(P1L:000) - * P2H(P2L:000) - * P3H:P3L(000:000) - * .......(aaa:P0L) - * By combining P0H:P0L and P1L:000. This can lead to a carry, denote as CRY0. - * The partial result is then multiplied by P2L:000. - * We call it CRY1 because it has the potential to carry again. - * (CRY0 + CRY1)P0H:P0L - * ( P1H)P1L:000 - * ( P2H)P2L:000 - * (P3H: P3L)000:000 - * -------------------- - * (ccc:bbb)aaa:P0L - * P1H, P2H, and P3H:P3L are added to the carry CRY0 + CRY1.This increase will - * not result in an overflow. +/* The table contains exponents of value v, where values of v + * are the 3 bit 2's complement signed values presented by bits + * of index 0..7. * + * v = [(0:3)/8 (-4:-1)/8]; + * uint32(round(exp(v) * 2^31)) */ -static void mul_s64(ae_int64 in_0, ae_int64 in_1, ae_int64 *__restrict__ ptroutbitshi, - ae_int64 *__restrict__ ptroutbitslo) -{ - ae_int64 producthihi, producthilo, productlolo; - ae_int64 producthi, product_hl_lh_h, product_hl_lh_l, carry; - -#if (SOFM_EXPONENTIAL_HIFI4 == 1 || SOFM_EXPONENTIAL_HIFI5 == 1) - - ae_int32x2 in0_32 = AE_MOVINT32X2_FROMINT64(in_0); - ae_int32x2 in1_32 = AE_MOVINT32X2_FROMINT64(in_1); - - ae_ep ep_lolo = AE_MOVEA(0); - ae_ep ep_hilo = AE_MOVEA(0); - ae_ep ep_HL_LH = AE_MOVEA(0); - - producthihi = AE_MUL32_HH(in0_32, in1_32); - - /* AE_MULZAAD32USEP.HL.LH - Unsigned lower parts and signed higher 32-bit parts dual */ - /* multiply and accumulate operation on two 32x2 operands with 72-bit output */ - /* Input-32x32-bit(in1_32xin0_32)into 72-bit multiplication operations */ - /* Output-lower 64 bits of the result are stored in producthilo */ - /* Output-upper eight bits are stored in ep_hilo */ - AE_MULZAAD32USEP_HL_LH(ep_hilo, producthilo, in1_32, in0_32); - productlolo = AE_MUL32U_LL(in0_32, in1_32); - - product_hl_lh_h = AE_SRAI72(ep_hilo, producthilo, 32); - product_hl_lh_l = AE_SLAI64(producthilo, 32); - - /* The AE_ADD72 procedure adds two 72-bit elements. The first 72-bit value is created */ - /* by concatenating the MSBs and LSBs of operands ep[7:0] and d[63:0]. Similarly, the */ - /* second value is created by concatenating bits from operands ep1[7:0] and d1[63:0]. */ - AE_ADD72(ep_lolo, productlolo, ep_HL_LH, product_hl_lh_l); - - carry = AE_SRAI72(ep_lolo, productlolo, 32); - - carry = AE_SRLI64(carry, 32); - producthi = AE_ADD64(producthihi, carry); - - producthi = AE_ADD64(producthi, product_hl_lh_h); - *ptroutbitslo = productlolo; -#elif SOFM_EXPONENTIAL_HIFI3 == 1 - - ae_int64 producthi_1c; - ae_int64 producthi_2c; - ae_int64 productlo_2c; - ae_int64 productlo; - - ae_int64 s0 = AE_SRLI64(in_0, 63); - ae_int64 s1 = AE_SRLI64(in_1, 63); - bool x_or = (bool)((int)(int64_t)s0 ^ (int)(int64_t)s1); - - ae_int32x2 in0_32 = AE_MOVINT32X2_FROMINT64(AE_ABS64(in_0)); - ae_int32x2 in1_32 = AE_MOVINT32X2_FROMINT64(AE_ABS64(in_1)); - - producthihi = AE_MUL32_HH(in0_32, in1_32); - producthilo = AE_MUL32U_LL(in1_32, AE_MOVINT32X2_FROMINT64 - ((AE_MOVINT64_FROMINT32X2(in0_32) >> 32))); - producthilo += AE_MUL32U_LL(AE_MOVINT32X2_FROMINT64 - ((AE_MOVINT64_FROMINT32X2(in1_32) >> 32)), in0_32); - productlolo = AE_MUL32U_LL(in0_32, in1_32); - - product_hl_lh_h = AE_SRAI64(producthilo, 32); - product_hl_lh_l = AE_SLAI64(producthilo, 32); - - productlo = AE_ADD64(productlolo, product_hl_lh_l); - producthi = AE_ADD64(producthihi, product_hl_lh_h); - - carry = AE_ADD64(AE_SRLI64(productlolo, 1), AE_SRLI64(product_hl_lh_l, 1)); - carry = AE_SRLI64(carry, 63); - producthi = AE_ADD64(producthi, carry); - - producthi_1c = AE_NOT64(producthi); - producthi_2c = AE_NEG64(producthi); - productlo_2c = AE_NEG64(productlo); - - if (x_or) { - if (productlo == (ae_int64)0) { - producthi = producthi_2c; - } else { - producthi = producthi_1c; - productlo = productlo_2c; - } - } - *ptroutbitslo = productlo; -#endif //(XCHAL_HAVE_HIFI4 || XCHAL_HAVE_HIFI5) - - *ptroutbitshi = producthi; -} +static const uint32_t sofm_exp_3bit_lookup[8] = { + 2147483648, 2433417774, 2757423586, 3124570271, + 1302514674, 1475942488, 1672461947, 1895147668 +}; -/* - * Arguments : int64_t a - * int64_t b - * Return Type : int64_t +/* Taylor polynomial coefficients for x^3..x^6, calculated + * uint32(round(1 ./ factorial(3:6) * 2^32)) */ -static int64_t lomul_s64_sr_sat_near(int64_t a, int64_t b) -{ - ae_int64 u64_chi; - ae_int64 u64_clo; - ae_int64 temp; - - mul_s64(a, b, &u64_chi, &u64_clo); - - ae_int64 roundup = AE_AND64(u64_clo, SOFM_EXP_BIT_MASK_LOW_Q27P5); - - roundup = AE_SRLI64(roundup, 27); - temp = AE_OR64(AE_SLAI64(u64_chi, 36), AE_SRLI64(u64_clo, 28)); - - return AE_ADD64(temp, roundup); -} - -static const int64_t onebyfact_Q63[19] = { - 4611686018427387904LL, - 1537228672809129301LL, - 384307168202282325LL, - 76861433640456465LL, - 12810238940076077LL, - 1830034134296582LL, - 228754266787072LL, - 25417140754119LL, - 2541714075411LL, - 231064915946LL, - 19255409662LL, - 1481185358LL, - 105798954LL, - 7053264LL, - 440829LL, - 25931LL, - 1441LL, - 76LL, - 4LL +static const uint32_t sofm_exp_taylor_coeffs[4] = { + 715827883, 178956971, 35791394, 5965232 }; -/* f(x) = a^x, x is variable and a is base +/* function f(x) = e^x * - * Arguments : int32_t x(Q4.28) - * input range -5 to 5 + * Arguments : int32_t x (Q4.28) + * input range -8 to 8 * - * Return Type : int32_t ts(Q9.23) - * output range 0.0067465305 to 148.4131488800 - *+------------------+-----------------+--------+--------+ - *| x | ts (returntype) | x | ts | - *+----+-----+-------+----+----+-------+--------+--------+ - *|WLen| FLen|Signbit|WLen|FLen|Signbit| Qformat| Qformat| - *+----+-----+-------+----+----+-------+--------+--------+ - *| 32 | 28 | 1 | 32 | 23 | 0 | 4.28 | 9.23 | - *+------------------+-----------------+--------+--------+ + * Return Type : int32_t (Q13.19) + * output range 3.3546e-04 to 2981.0 */ -int32_t sofm_exp_int32(int32_t x) +int32_t sofm_exp_approx(int32_t x) { - ae_int64 outhi; - ae_int64 outlo; - ae_int64 qt; - ae_int64 onebyfact; - ae_int64 temp; - - ae_int64 *ponebyfact_Q63 = (ae_int64 *)onebyfact_Q63; - ae_int64 ts = SOFM_EXP_TERMS_Q23P9; - ae_int64 mp = (x + SOFM_EXP_LSHIFT_BITS) >> 14; /* x in Q50.14 */; - xtbool flag; - int64_t b_n; - - mul_s64(mp, SOFM_EXP_BIT_MASK_Q62P2, &outhi, &outlo); - qt = AE_OR64(AE_SLAI64(outhi, 46), AE_SRLI64(outlo, 18)); - - temp = AE_SRAI64(AE_ADD64(qt, SOFM_EXP_QUOTIENT_SCALE), 35); - - ts = AE_ADD64(ts, temp); - - mp = lomul_s64_sr_sat_near(mp, (int64_t)x); - - for (b_n = 0; b_n < 64;) { - AE_L64_IP(onebyfact, ponebyfact_Q63, 8); + ae_int64 p; + uint32_t taylor_first_2; + uint32_t taylor_extra; + uint32_t b_f32; + uint32_t b_pow; + uint32_t exp_a; + uint32_t exp_b; + uint32_t term; + uint32_t b; + int32_t x_times_one_over_log2; + int32_t e_times_log2; + int32_t x_32bit; + int32_t y_32bit; + int32_t e; + int32_t r; + int a; + + //x = 843314857; + + /* ------------------------------------------------------------------------- + * FIRST RANGE REDUCTION --------------------------------------------------- + * ------------------------------------------------------------------------- + * Multiply gives q28 * q30 -> q58, without shift the rounded value + * would be q42 (58 - 16). For q26 result, shift right by 16 (42 - 26). + */ + p = AE_MUL32_LL(x, SOFM_EXP_ONE_OVER_LOG2_Q30); + x_times_one_over_log2 = AE_ROUND32F48SASYM(AE_SRAI64(p, 16)); - mul_s64(mp, onebyfact, &outhi, &outlo); - qt = AE_OR64(AE_SLAI64(outhi, 45), AE_SRLI64(outlo, 19)); + /* Shift, round to q0 */ + e = AE_SRAI32R(x_times_one_over_log2, 26); - temp = AE_SRAI64(AE_ADD64(qt, SOFM_EXP_QUOTIENT_SCALE), 35); - ts = AE_ADD64(ts, temp); - mp = lomul_s64_sr_sat_near(mp, (int64_t)x); + /* Q6.31, but we only keep the bottom 31 bits */ + e_times_log2 = (uint32_t)e * SOFM_EXP_LOG2_Q31; - const ae_int64 sign = AE_NEG64(qt); + /* ------------------------------------------------------------------------- + * SECOND RANGE REDUCTION -------------------------------------------------- + * y = a + b + * ------------------------------------------------------------------------- + */ + x_32bit = AE_SLAI32(x, 3); /* S4.31, overflow to S1.31 */ + y_32bit = x_32bit - e_times_log2; /* S0.31, in ~[-0.34, +0.34] */ + a = (y_32bit >> 28) & 7; /* just the 3 top bits of "y" */ + b = y_32bit & 0x0FFFFFFF; /* bottom 31-3 = 28 bits. format U-3.31 */ + exp_a = sofm_exp_3bit_lookup[a]; + b_f32 = (b << 1) | 0x4; /* U0.32, align b on 32-bits of fraction */ + + /* Taylor approximation : base part + iterations + * Base part : 1 + b + b^2/2! + * Iterative part : b^3/3! + b^4/4! + b^5/5! + b^6/6! + * : Term count determined dynamically using e. + * + * Base part: NOTE: delay adding the "1" in "1 + b + b^2/2" until after we + * add the iterative part in. This gives us one more guard bit. + * NOTE: u_int32 x u_int32 => {hi, lo}. We only need {hi} for b_pow. + */ + b_pow = (uint64_t)b_f32 * b_f32 >> 32; + taylor_first_2 = b_f32 + (b_pow >> 1); /* 0.32 */ + taylor_extra = 0; + term = 1; + if (e < -10) + goto ITER_END; + + b_pow = (uint64_t)b_f32 * b_pow >> 32; + term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[0] >> 32; + taylor_extra += term; + if (e < -5) + goto ITER_END; + + b_pow = (uint64_t)b_f32 * b_pow >> 32; + term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[1] >> 32; + taylor_extra += term; + if (e < 0) + goto ITER_END; + + b_pow = (uint64_t)b_f32 * b_pow >> 32; + term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[2] >> 32; + taylor_extra += term; + if (e < 6) + goto ITER_END; + + b_pow = (uint64_t)b_f32 * b_pow >> 32; + term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[3] >> 32; + taylor_extra += term; + +ITER_END: + + /* Implement rounding to 31 fractional bits.. */ + taylor_first_2 = taylor_first_2 + taylor_extra + 1; + + /* Add the missing "1" for the Taylor series "1+b+b^2/2+...." */ + exp_b = ((uint32_t)1 << 31) + (taylor_first_2 >> 1); /* U1.31 */ + + /* ------------------------------------------------------------------------- + * FIRST RECONSTRUCTION ---------------------------------------------------- + * ------------------------------------------------------------------------- + */ - flag = AE_LT64(qt, 0); - AE_MOVT64(qt, sign, flag); + /* U1.31 * U1.31 = U2.62 */ + p = (ae_int64)((uint64_t)exp_a * exp_b); - if (!(qt < (ae_int64)SOFM_CONVERG_ERROR)) - b_n++; - else - b_n = 64; - } + /* ------------------------------------------------------------------------- + * SECOND RECONSTRUCTION --------------------------------------------------- + * ------------------------------------------------------------------------- + */ - return AE_MOVAD32_L(AE_MOVINT32X2_FROMINT64(ts)); + /* Rounding to nearest, + * using f48 round with shift value for right shift is negative + * q62 to q31 shift right is -31, for round instruction shift left is +16, + * compensate shift e by right shift is -12: 16 - 31 - 12 = -27 + */ + p = AE_SLAA64(p, e - 27); + r = AE_ROUND32F48SASYM(p); + return r; } -/* Fixed point exponent function for approximate range -11.5 .. 7.6 - * that corresponds to decibels range -100 .. +66 dB. +/* Fixed point exponent function for approximate range -16 .. 7.6246. * * The functions uses rule exp(x) = exp(x/2) * exp(x/2) to reduce - * the input argument for private small value exp() function that is - * accurate with input range -2.0 .. +2.0. The number of possible - * divisions by 2 is computed into variable n. The returned value is - * exp()^(2^n). + * the input argument for function sofm_exp_approx() with input + * range from -8 to +8. * * Input is Q5.27, -16.0 .. +16.0, but note the input range limitation * Output is Q12.20, 0.0 .. +2048.0 */ - int32_t sofm_exp_fixed(int32_t x) { - ae_f64 p; - ae_int32 y0; - ae_int32 y; - ae_int32 xs; - int32_t n; - int shift; - int i; - - if (x < SOFM_EXP_FIXED_INPUT_MIN) - return 0; + int32_t x0, y0, y1; if (x > SOFM_EXP_FIXED_INPUT_MAX) return INT32_MAX; - /* This returns number of right shifts needed to scale value x to |x| < 2. - * The behavior differs slightly for positive and negative values but it - * is not problem for sofm_exp_int32() function. E.g. - * - * x = 268435455 (1.9999999925), shift = 0 - * x = 268435456 (2.0000000000), shift = 1 - * x = 268435457 (2.0000000075), shift = 1 - * - * x = -268435457 (-2.0000000075), shift = 1 - * x = -268435456 (-2.0000000000), shift = 0 - * x = -268435455 (-1.9999999925), shift = 0 - * - * If the shift is zero, just return result from sofm_exp_int32() with - * input Q format and output Q format adjusts. - */ - shift = (int)AE_MAX32(0, 3 - AE_NSAZ32_L(x)); - if (!shift) - return AE_SRAI32R(sofm_exp_int32(AE_MOVAD32_L(AE_SLAI32S(x, 1))), 3); + /* No need to check for > 8, the input max is lower, about 7.6 */ + if (x < SOFM_EXP_FIXED_INPUT_MINUS8) { + /* Divide by 2, convert Q27 to Q28 is x as such */ + y0 = sofm_exp_approx(x); + /* Multiply gives q19 * q19 -> q38, without shift the rounded value + * would be q22 (38 - 16). For q20 shift right by 2. + */ + y1 = AE_ROUND32F48SASYM(AE_SRAI64(AE_MUL32_LL(y0, y0), 2)); + return y1; + } - /* Shifting x one less right to save an additional Q27 to Q28 conversion - * shift for sofm_exp_int32() - */ - n = 1 << shift; - xs = AE_SRAA32RS(x, shift - 1); + x0 = AE_SLAI32S(x, 1); + y0 = sofm_exp_approx(x0); + return AE_SLAI32S(y0, 1); +} +EXPORT_SYMBOL(sofm_exp_fixed); - /* sofm_exp_int32() input is Q4.28, while x1 is Q5.27 - * sofm_exp_int32() output is Q9.23, while y0 is Q12.20 - */ - y0 = AE_SRAI32R(sofm_exp_int32(xs), 3); - y = SOFM_EXP_ONE_Q20; +/* Decibels to linear conversion: The function uses exp() to calculate + * the linear value. The argument is multiplied by log(10)/20 to + * calculate equivalent of 10^(db/20). + * + * The error in conversion is less than 0.1 dB for -89..+66 dB range. Do not + * use the code for argument less than -100 dB. The code simply returns zero + * as linear value for such very small value. + * + * Input is Q8.24 (max 128.0) + * output is Q12.20 (max 2048.0) + */ - /* AE multiply returns Q41 from Q20 * Q20. To get Q20 it need to be - * shifted right by 21. Since the used round instruction is aligned - * to the high 32 bits it is shifted instead left by 32 - 21 = 11: - */ - for (i = 0; i < n; i++) { - p = AE_SLAI64S(AE_MULF32S_LL(y, y0), 11); - y = AE_ROUND32F64SASYM(p); - } +int32_t sofm_db2lin_fixed(int32_t db) +{ + int32_t arg; + + if (db > SOFM_DB2LIN_INPUT_MAX) + return INT32_MAX; - return y; + /* Q8.24 x Q5.27, result needs to be Q5.27 */ + arg = (int32_t)Q_MULTSR_32X32((int64_t)db, SOFM_EXP_LOG10_DIV20_Q27, 24, 27, 27); + return sofm_exp_fixed(arg); } -EXPORT_SYMBOL(sofm_exp_fixed); +EXPORT_SYMBOL(sofm_db2lin_fixed); #endif diff --git a/test/cmocka/src/math/arithmetic/exponential.c b/test/cmocka/src/math/arithmetic/exponential.c index f35299fd3f1b..5c378c971809 100644 --- a/test/cmocka/src/math/arithmetic/exponential.c +++ b/test/cmocka/src/math/arithmetic/exponential.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: BSD-3-Clause /* - * Copyright(c) 2022 Intel Corporation. All rights reserved. + * Copyright(c) 2022-2025 Intel Corporation. * * Author: Shriram Shastry + * Seppo Ingalsuo */ #include @@ -22,110 +23,143 @@ #include #include -#define ULP_TOLERANCE 5.60032793 -#define ULP_SCALE 0.0000999999999749 +#define ULP_TOLERANCE 2.0e-6 +#define ULP_SCALE 1.0 #define NUMTESTSAMPLES 256 -#define NUMTESTSAMPLES_MIDDLE_TEST2 100 -#define NUMTESTSAMPLES_SMALL_TEST2 100 -#define NUMTESTSAMPLES_LARGE_TEST2 100 -#define ABS_DELTA_TOLERANCE_TEST1 1.2e-2 -#define REL_DELTA_TOLERANCE_TEST1 2.8e-4 -#define ABS_DELTA_TOLERANCE_TEST2 1.7e-6 -#define REL_DELTA_TOLERANCE_TEST2 3.7e-2 -#define ABS_DELTA_TOLERANCE_TEST3 1.2e-1 -#define REL_DELTA_TOLERANCE_TEST3 7.7e-5 +#define NUMTESTSAMPLES_TEST2 100 +#define ABS_DELTA_TOLERANCE_TEST2 2.0e-6 +#define REL_DELTA_TOLERANCE_TEST2 1000.0 /* rel. error is large with values near zero */ +#define NUMTESTSAMPLES_TEST3 100 +#define ABS_DELTA_TOLERANCE_TEST3 2.0e-6 +#define REL_DELTA_TOLERANCE_TEST3 10.0e-2 #define SOFM_EXP_FIXED_ARG_MIN -11.5 #define SOFM_EXP_FIXED_ARG_MAX 7.6245 -/* testvector in Q4.28, -5 to +5 */ -/* - * Arguments :double a - * double b - * double *r - * int32_t *b_i - * Return Type :void +#define NUMTESTSAMPLES_TEST4 100 +#define ABS_DELTA_TOLERANCE_TEST4 2.5e-5 +#define REL_DELTA_TOLERANCE_TEST4 1000.0 /* rel. error is large with values near zero */ + +/** + * Saturates input to 32 bits + * @param x Input value + * @return Saturated output value + */ +static int32_t saturate32(int64_t x) +{ + if (x < INT32_MIN) + return INT32_MIN; + else if (x > INT32_MAX) + return INT32_MAX; + + return x; +} + +/** + * Generates linearly spaced values for a vector with end points and number points in + * desired fractional Q-format for 32 bit integer. If the test values exceed int32_t + * range, the values are saturated to INT32_MIN to INT32_MAX range. + * + * @param a First value of test vector + * @param b Last value of test vector + * @param step_count Number of values in vector + * @param point Calculate n-th point of vector 0 .. step_count - 1 + * @param qformat Number of fractional bits y in Qx.y format + * @param fout Pointer to calculated test vector value, double + * @param iout Pointer to calculated test vector value, int32_t */ -static void gen_exp_testvector(double a, double b, double *r, int32_t *b_i) +static void gen_testvector_linspace_int32(double a, double b, int step_count, int point, + int qformat, double *fout, int32_t *iout) { - double a2; - double b2; - - *r = a + rand() % (int32_t)(b - a + 1); - a2 = *r * 268435456; - b2 = fabs(a2); - if (b2 < 4503599627370496LL) - a2 = floor(a2 + 0.5); - - if (a2 < 2147483648LL) - *b_i = (a2 >= -2147483648LL) ? a2 : INT32_MIN; - else - *b_i = INT32_MAX; + double fstep = (b - a) / (step_count - 1); + double fvalue = a + fstep * point; + int64_t itmp; + + itmp = (int64_t)round(fvalue * (double)(1 << qformat)); + *iout = saturate32(itmp); + *fout = (double)*iout / (1 << qformat); + return; } -static void test_function_sofm_exp_int32(void **state) +/** + * Test for sofm_exp_approx() + * @param state Cmocka internal state + */ +static void test_function_sofm_exp_approx(void **state) { (void)state; int32_t accum; int i; double a_i; - double max_ulp; - double a_tmp = -5.0123456789; - double b_tmp = 5.0123456789; - double r; + double max_ulp = 0; + double ulp; + double a_tmp = -8; + double b_tmp = 8; int32_t b_i; - srand((unsigned int)time(NULL)); for (i = 0; i < NUMTESTSAMPLES; i++) { - gen_exp_testvector(a_tmp, b_tmp, &r, &b_i); - a_i = (double)b_i / (1 << 28); + gen_testvector_linspace_int32(a_tmp, b_tmp, NUMTESTSAMPLES, i, 28, &a_i, &b_i); + accum = sofm_exp_approx(b_i); + ulp = fabs(exp(a_i) - (double)accum / (1 << 19)) / ULP_SCALE; + if (ulp > max_ulp) + max_ulp = ulp; - accum = sofm_exp_int32(b_i); - max_ulp = fabs(exp(a_i) - (double)accum / (1 << 23)) / ULP_SCALE; - - if (max_ulp > ULP_TOLERANCE) { + if (ulp > ULP_TOLERANCE) { printf("%s: ULP for %.16f: value = %.16f, Exp = %.16f\n", __func__, - max_ulp, (double)b_i / (1 << 28), (double)accum / (1 << 23)); - assert_true(max_ulp <= ULP_TOLERANCE); + ulp, (double)b_i / (1 << 28), (double)accum / (1 << 19)); + assert_true(false); } } + printf("%s: Worst-case ULP: %g ULP_SCALE %g\n", __func__, max_ulp, ULP_SCALE); } -static void gen_exp_testvector_linspace_q27(double a, double b, int step_count, - int point, int32_t *value) -{ - double fstep = (b - a) / (step_count - 1); - double fvalue = a + fstep * point; - - *value = (int32_t)round(fvalue * 134217728.0); -} -static double ref_exp(double value) +/** + * Calculate reference exponent value + * @param x Input value + * @param qformat Fractional bits y in Qx.y format + * @return Saturated exponent value to match fractional format + */ +static double ref_exp(double x, int qformat) { - if (value < SOFM_EXP_FIXED_ARG_MIN) - return 0.0; - else if (value > SOFM_EXP_FIXED_ARG_MAX) - return 2048.0; - else - return exp(value); + double yf; + int64_t yi; + + yf = exp(x); + yi = yf * (1 << qformat); + if (yi > INT32_MAX) + yi = INT32_MAX; + else if (yi < INT32_MIN) + yi = INT32_MIN; + + yf = (double)yi / (1 << qformat); + return yf; } +/** + * Calculates test exponent function and compares result to reference exponent. + * @param ivalue Fractional format input value Q5.27 + * @param iexp_value Fractional format output value Q12.20 + * @param abs_delta_max Calculated absolute error + * @param rel_delta_max Calculated relative error + * @param abs_delta_tolerance Tolerance for absolute error + * @param rel_delta_tolerance Tolerance for relative error + */ static void test_exp_with_input_value(int32_t ivalue, int32_t *iexp_value, double *abs_delta_max, double *rel_delta_max, double abs_delta_tolerance, double rel_delta_tolerance) { double fvalue, fexp_value, ref_exp_value; double rel_delta, abs_delta; + double eps = 1e-9; *iexp_value = sofm_exp_fixed(ivalue); fvalue = (double)ivalue / (1 << 27); /* Q5.27 */ fexp_value = (double)*iexp_value / (1 << 20); /* Q12.20 */ - /* printf("val = %10.6f, exp = %12.6f\n", fvalue, fexp_value); */ - - ref_exp_value = ref_exp(fvalue); + ref_exp_value = ref_exp(fvalue, 20); abs_delta = fabs(ref_exp_value - fexp_value); - rel_delta = abs_delta / ref_exp_value; + rel_delta = abs_delta / (ref_exp_value + eps); if (abs_delta > *abs_delta_max) *abs_delta_max = abs_delta; @@ -145,54 +179,110 @@ static void test_exp_with_input_value(int32_t ivalue, int32_t *iexp_value, } } +/** + * Test for sofm_exp_fixed() + * @param state Cmocka internal state + */ static void test_function_sofm_exp_fixed(void **state) { (void)state; - double rel_delta_max, abs_delta_max; + double tmp; int32_t ivalue, iexp_value; int i; + /* Test max int32_t range with coarse grid */ rel_delta_max = 0; abs_delta_max = 0; - for (i = 0; i < NUMTESTSAMPLES_MIDDLE_TEST2; i++) { - gen_exp_testvector_linspace_q27(-5, 5, NUMTESTSAMPLES_MIDDLE_TEST2, i, &ivalue); + for (i = 0; i < NUMTESTSAMPLES_TEST2; i++) { + gen_testvector_linspace_int32(-16, 16, NUMTESTSAMPLES_TEST2, i, 27, &tmp, &ivalue); test_exp_with_input_value(ivalue, &iexp_value, &abs_delta_max, &rel_delta_max, - ABS_DELTA_TOLERANCE_TEST1, REL_DELTA_TOLERANCE_TEST1); - + ABS_DELTA_TOLERANCE_TEST2, REL_DELTA_TOLERANCE_TEST2); } - printf("%s: Absolute max error was %.6e (middle).\n", __func__, abs_delta_max); - printf("%s: Relative max error was %.6e (middle).\n", __func__, rel_delta_max); + printf("%s: Absolute max error was %.6e (max range).\n", __func__, abs_delta_max); + printf("%s: Relative max error was %.6e (max range).\n", __func__, rel_delta_max); + /* Test max int32_t middle range with fine grid */ rel_delta_max = 0; abs_delta_max = 0; - for (i = 0; i < NUMTESTSAMPLES_SMALL_TEST2; i++) { - gen_exp_testvector_linspace_q27(-16, -5, NUMTESTSAMPLES_SMALL_TEST2, i, &ivalue); + for (i = 0; i < NUMTESTSAMPLES_TEST3; i++) { + gen_testvector_linspace_int32(SOFM_EXP_FIXED_ARG_MIN, SOFM_EXP_FIXED_ARG_MAX, + NUMTESTSAMPLES_TEST3, i, 27, &tmp, &ivalue); test_exp_with_input_value(ivalue, &iexp_value, &abs_delta_max, &rel_delta_max, - ABS_DELTA_TOLERANCE_TEST2, REL_DELTA_TOLERANCE_TEST2); + ABS_DELTA_TOLERANCE_TEST3, REL_DELTA_TOLERANCE_TEST3); } - printf("%s: Absolute max error was %.6e (small).\n", __func__, abs_delta_max); - printf("%s: Relative max error was %.6e (small).\n", __func__, rel_delta_max); + printf("%s: Absolute max error was %.6e (middle).\n", __func__, abs_delta_max); + printf("%s: Relative max error was %.6e (middle).\n", __func__, rel_delta_max); +} + +/** + * Reference function for dB to linear conversion + * @param x Input value + * @param qformat Fractional bits y in Qx.y format for saturation + * @return Saturated linear value + */ +static double ref_db2lin(double x, int qformat) +{ + double fref; + int64_t iref; + + fref = pow(10, x / 20); + iref = fref * (1 << qformat); + return (double)saturate32(iref) / (1 << qformat); +} + +/** + * Test for sofm_db2lin_fixed() + * @param state Cmocka internal state + */ +static void test_function_sofm_db2lin_fixed(void **state) +{ + (void)state; + double abs_delta, rel_delta, abs_delta_max, rel_delta_max; + double fin, fout, fref; + double eps = 1e-9; + int32_t iin, iout; + int i; rel_delta_max = 0; abs_delta_max = 0; - for (i = 0; i < NUMTESTSAMPLES_LARGE_TEST2; i++) { - gen_exp_testvector_linspace_q27(5, 15.9999, NUMTESTSAMPLES_LARGE_TEST2, i, &ivalue); - test_exp_with_input_value(ivalue, &iexp_value, &abs_delta_max, &rel_delta_max, - ABS_DELTA_TOLERANCE_TEST3, REL_DELTA_TOLERANCE_TEST3); - } + for (i = 0; i < NUMTESTSAMPLES_TEST2; i++) { + gen_testvector_linspace_int32(-128, 128, NUMTESTSAMPLES_TEST4, i, 24, &fin, &iin); + iout = sofm_db2lin_fixed(iin); + fout = (double)iout / (1 << 20); + fref = ref_db2lin(fin, 20); + abs_delta = fabs(fref - fout); + rel_delta = abs_delta / (fref + eps); + if (abs_delta > abs_delta_max) + abs_delta_max = abs_delta; + + if (rel_delta > rel_delta_max) + rel_delta_max = rel_delta; + + if (abs_delta > ABS_DELTA_TOLERANCE_TEST4) { + printf("%s: Absolute error %g exceeds limit %g, input %g output %g.\n", + __func__, abs_delta, ABS_DELTA_TOLERANCE_TEST4, fin, fout); + assert_true(false); + } - printf("%s: Absolute max error was %.6e (large).\n", __func__, abs_delta_max); - printf("%s: Relative max error was %.6e (large).\n", __func__, rel_delta_max); + if (rel_delta > REL_DELTA_TOLERANCE_TEST4) { + printf("%s: Relative error %g exceeds limit %g, input %g output %g.\n", + __func__, rel_delta, REL_DELTA_TOLERANCE_TEST4, fin, fout); + assert_true(false); + } + } + printf("%s: Absolute max error was %.6e.\n", __func__, abs_delta_max); + printf("%s: Relative max error was %.6e.\n", __func__, rel_delta_max); } int main(void) { const struct CMUnitTest tests[] = { - cmocka_unit_test(test_function_sofm_exp_int32), + cmocka_unit_test(test_function_sofm_exp_approx), cmocka_unit_test(test_function_sofm_exp_fixed), + cmocka_unit_test(test_function_sofm_db2lin_fixed), }; cmocka_set_message_output(CM_OUTPUT_TAP);