From 31636d1656778eea5701efa2b55c27149af78294 Mon Sep 17 00:00:00 2001
From: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Date: Fri, 17 Jan 2025 20:01:49 +0200
Subject: [PATCH] Math: Replace exp() function with more accurate and fast
 version

The function sofm_exp_int32() if replaced with new sofm_exp_approx()
function. It's not a direct replacement so the name is
changed. The input range changes from +/- 5 to +/- 8.

The wrapper function sofm_exp_fixed() with input range -16 .. +7.6
is updated to use the new function with a simpler range reduction
for large negative input values. All current exponent function
usage in SOF components is for the wrapper function that remains
compatible.

The function sofm_db2lin_fixed() is updated to handle larger
negative input decibels values made possible by the new more
accurate exponent function.

The accuracy in exponent function for input ranges from -8 to +8 or
-16 to 7.6 depending on used exponent function improves from
absolute error of 1 to 1000 ppm to constant less than 1 ppm.

The cycles count for exponent function calculate decreases from
about 369 max to 78 max on MTL platform for HiFi code version with
-O2 or -O3 optimization. The generic C version on MTL platform is
max 88 cycles.

The DRC component is a heavy user of the exponent function. The
saving in MTL build of DRC is 10.5 MCPS, it drops from 22.17 to
11.70 with profiler run:

"scripts/sof-testbench-helper.sh -x -m drc -p profile.txt"

The saving in MTL build of multiband DRC is 30.2 MCPS, it drops from
127.95 to 97.78 with profiler run:

"scripts/sof-testbench-helper.sh -x -m drc_multiband -p profile.txt"

This patch also updates the cmocka test. The test functions are
changed due to changed function name, changed achievable accuracy.
Also the random values test points vector is replaced fixed a
fixed linearly spaced vector for repeatable test and risk for
random failures.

Signed-off-by: Shriram Shastry <malladi.sastry@linux.intel.com>
Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
---
 src/include/sof/math/exp_fcn.h                |  61 +--
 src/math/exp_fcn.c                            | 363 ++++++--------
 src/math/exp_fcn_hifi.c                       | 469 +++++++-----------
 test/cmocka/src/math/arithmetic/exponential.c | 262 ++++++----
 4 files changed, 527 insertions(+), 628 deletions(-)

diff --git a/src/include/sof/math/exp_fcn.h b/src/include/sof/math/exp_fcn.h
index 425f400b23de..0aa3dbead623 100644
--- a/src/include/sof/math/exp_fcn.h
+++ b/src/include/sof/math/exp_fcn.h
@@ -1,8 +1,9 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  *
- * Copyright(c) 2022 Intel Corporation. All rights reserved.
+ * Copyright(c) 2022-2025 Intel Corporation.
  *
  * Author: Shriram Shastry <malladi.sastry@linux.intel.com>
+ *         Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
  *
  */
 #ifndef __SOFM_EXP_FCN_H__
@@ -26,40 +27,40 @@
 
 #endif
 
-/* TODO: Is there a MCPS difference */
-#define USING_QCONVERT	1
+/* Q5.27 int32(round(log((2^31 - 1)/2^20) * 2^27)) */
+#define SOFM_EXP_FIXED_INPUT_MAX	1023359037
 
-#if USING_QCONVERT
+/* Q8.24 int32(round((log((2^31 - 1)/2^20) * 20 / log(10)) * 2^24)) */
+#define SOFM_DB2LIN_INPUT_MAX		1111097957
 
-#include <sof/audio/format.h>
-#define SOFM_EXP_FIXED_INPUT_MIN	Q_CONVERT_FLOAT(-11.5, 27)		/* Q5.27 */
-#define SOFM_EXP_FIXED_INPUT_MAX	Q_CONVERT_FLOAT(7.6245, 27)		/* Q5.27 */
-#define SOFM_EXP_TWO_Q27		Q_CONVERT_FLOAT(2.0, 27)		/* Q5.27 */
-#define SOFM_EXP_MINUS_TWO_Q27		Q_CONVERT_FLOAT(-2.0, 27)		/* Q5.27 */
-#define SOFM_EXP_ONE_Q20		Q_CONVERT_FLOAT(1.0, 20)		/* Q12.20 */
-#define SOFM_EXP_MINUS_100_Q24		Q_CONVERT_FLOAT(-100.0, 24)		/* Q8.24 */
-#define SOFM_EXP_LOG10_DIV20_Q27	Q_CONVERT_FLOAT(0.1151292546, 27)	/* Q5.27 */
-
-#else
-
-#define SOFM_EXP_FIXED_INPUT_MIN	-1543503872	/* Q_CONVERT_FLOAT(-11.5, 27) */
-#define SOFM_EXP_FIXED_INPUT_MAX	 1023343067	/* Q_CONVERT_FLOAT(7.6245, 27) */
-#define SOFM_EXP_TWO_Q27		 268435456	/* Q_CONVERT_FLOAT(2.0, 27) */
-#define SOFM_EXP_MINUS_TWO_Q27		-268435456	/* Q_CONVERT_FLOAT(-2.0, 27) */
-#define SOFM_EXP_ONE_Q20		 1048576	/* Q_CONVERT_FLOAT(1.0, 20) */
-#define SOFM_EXP_MINUS_100_Q24		-1677721600	/* Q_CONVERT_FLOAT(-100.0, 24) */
-#define SOFM_EXP_LOG10_DIV20_Q27	 15452387	/* Q_CONVERT_FLOAT(0.1151292546, 27) */
+/**
+ * Calculates exponent function exp(x) = e^x with accurate and efficient technique that
+ * includes range reduction operations, approximation with Taylor series, and reconstruction
+ * operations to compensate the range reductions.
+ * @param	x	The input argument as Q4.28 from -8 to +8
+ * @return		The calculated e^x value as Q13.19 from 3.3546e-04 to 2981.0
+ */
+int32_t sofm_exp_approx(int32_t x);
 
-#endif
+/**
+ * Calculated exponent function exp(x) = e^x by using sofm_exp_approx(). The input range for
+ * large arguments is internally reduced to -8 to +8 with rule exp(x) = exp(x/2) * exp(x/2)
+ * and reconstructed back. This function is essentially a wrapper for compatibility with
+ * existing usage of exp() function and Q-format choice. Note that the return value saturates
+ * to INT32_MAX with input arguments larger than 7.6246.
+ * @param	x	The input argument as Q5.27 from -16 to +16
+ * @return		The calculated e^x value as Q12.20
+ */
+int32_t sofm_exp_fixed(int32_t x);
 
-#define SOFM_EXP_BIT_MASK_LOW_Q27P5 0x0000000008000000
-#define SOFM_EXP_BIT_MASK_Q62P2 0x4000000000000000LL
-#define SOFM_EXP_QUOTIENT_SCALE 0x40000000
-#define SOFM_EXP_TERMS_Q23P9 0x800000
-#define SOFM_EXP_LSHIFT_BITS 0x2000
 
-int32_t sofm_exp_int32(int32_t x);
-int32_t sofm_exp_fixed(int32_t x);
+/**
+ * Converts a decibels value to liner amplitude lin = 10^(db/20) value with optimized
+ * equation exp(db * log(10)/20). Note that due to range limitation of sofm_exp_fixed()
+ * the output saturates to maximum with about +66 dB input.
+ * @param	db	Decibels value in Q8.24 format, from -128 to +66.226
+ * @return		Linear value in Q12.20 format, from 3.9811e-07 to 2048
+ */
 int32_t sofm_db2lin_fixed(int32_t db);
 
 #endif /* __SOFM_EXP_FCN_H__ */
diff --git a/src/math/exp_fcn.c b/src/math/exp_fcn.c
index 6ada84202212..9c194b80de86 100644
--- a/src/math/exp_fcn.c
+++ b/src/math/exp_fcn.c
@@ -1,228 +1,170 @@
 // SPDX-License-Identifier: BSD-3-Clause
 /*
- * Copyright(c) 2022 Intel Corporation. All rights reserved.
+ * Copyright(c) 2022-2025 Intel Corporation.
  *
  * Author: Shriram Shastry <malladi.sastry@linux.intel.com>
+ *         Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
  *
  */
 
 #include <sof/audio/format.h>
 #include <sof/math/exp_fcn.h>
 #include <sof/math/numbers.h>
-#include <sof/common.h>
-#include <rtos/bit.h>
 #include <rtos/symbol.h>
-#include <stdbool.h>
+#include <sof/common.h>
 #include <stdint.h>
-#include <stddef.h>
-#include <errno.h>
 
 #if defined(EXPONENTIAL_GENERIC)
 
-#define SOFM_CONVERG_ERROR 28823037607936LL	// error smaller than 1e-4,1/2 ^ -44.7122876200884
+#define SOFM_EXP_ONE_OVER_LOG2_Q30	 1549082005	/* Q2.30 int32(round(1/log(2) * 2^30)) */
+#define SOFM_EXP_LOG2_Q31		 1488522236	/* Q1.31 int32(round(log(2) * 2^31)) */
+#define SOFM_EXP_FIXED_INPUT_MINUS8	-1073741824	/* Q5.27 int32(-8 * 2^27) */
+#define SOFM_EXP_FIXED_INPUT_PLUS8	 1073741823	/* Q5.27 int32(8 * 2^27) */
+#define SOFM_EXP_LOG10_DIV20_Q27	 15452387	/* Q5.27 int32(round(log(10)/20*2^27)) */
 
-/* inv multiplication lookup table */
-/* LUT = ceil(1/factorial(b_n) * 2 ^ 63) */
-static const int64_t exp_iv_ilookup[19] = {
-	4611686018427387904LL,
-	1537228672809129301LL,
-	384307168202282325LL,
-	76861433640456465LL,
-	12810238940076077LL,
-	1830034134296582LL,
-	228754266787072LL,
-	25417140754119LL,
-	2541714075411LL,
-	231064915946LL,
-	19255409662LL,
-	1481185358LL,
-	105798954LL,
-	7053264LL,
-	440829LL,
-	25931LL,
-	1441LL,
-	76LL,
-	4LL
-};
-
-/* Function Declarations */
-/*
- * Arguments	: int64_t in_0
- *		  int64_t in_1
- *		  uint64_t *ptroutbitshi
- *		  uint64_t *ptroutbitslo
- * Multiplication of two signed int64 numbers
- * Return Type	: void
- * Description:Perform element-wise multiplication on in_0 and in_1
- * while keeping the required product word length and fractional
- * length in mind. mul_s64 function divide the 64-bit quantities
- * into two 32-bit words, multiply the low words to produce the
- * lowest and second-lowest words in the result, then both pairs
- * of low and high words from different numbers to produce the
- * second and third lowest words in the result, and finally both
- * high words to produce the two highest words in the outcome.
- * Add them all up, taking carry into consideration.
- *
- * The 64 x 64 bit multiplication of operands in_0 and in_1 is
- * shown in the image below. The 64-bit operand in_0, in_1 is
- * represented by the notation in0_H, in1_H for the top 32 bits
- * and in0_L, in1_L for the bottom 32 bits.
- *
- *				in0_H : in0_L
- *			x	in1_H : in1_L
- *			---------------------
- *	P0			in0_L x in1_L
- *	P1		in0_H x in1_L		64 bit inner multiplication
- *	P2		in0_L x in1_H		64 bit inner multiplication
- *	P3	in0_H x in1_H
- *			--------------------
- *			[64 x 64 bit multiplication] sum of inner products
- * All combinations are multiplied by one another and then added.
- * Each inner product is moved into its proper power location, given the names
- * of the inner products, redoing the addition where 000 represents 32 zero
- * bits. The inner products can be added together in 64 bit addition. The sum
- * of two 64-bit numbers yields a 65-bit output.
- *		   (P0H:P0L)
- *		P1H(P1L:000)
- *		P2H(P2L:000)
- *	P3H:P3L(000:000)
- *	.......(aaa:P0L)
- * By combining P0H:P0L and P1L:000. This can lead to a carry, denote as CRY0.
- * The partial result is then multiplied by P2L:000.
- * We call it CRY1 because it has the potential to carry again.
- *	(CRY0 + CRY1)P0H:P0L
- *	(	 P1H)P1L:000
- *	(	 P2H)P2L:000
- *	(P3H:	 P3L)000:000
- *	--------------------
- *	(ccc:bbb)aaa:P0L
- * P1H, P2H, and P3H:P3L are added to the carry CRY0 + CRY1. This increase will
- * not result in an overflow.
+/* The table contains exponents of value v, where values of v
+ * are the 3 bit 2's complement signed values presented by bits
+ * of index 0..7.
  *
+ * v = [(0:3)/8 (-4:-1)/8];
+ * uint32(round(exp(v) * 2^31))
  */
-static inline void mul_s64(int64_t in_0, int64_t in_1, uint64_t *ptroutbitshi,
-			   uint64_t *ptroutbitslo)
-{
-	uint64_t absin0 = ABS(in_0);
-	uint64_t absin1 = ABS(in_1);
-	uint64_t in0hi = absin0 >> 32;
-	uint64_t in0lo = absin0 & UINT32_MAX;
-	uint64_t in1hi = absin1 >> 32;
-	uint64_t prodhilo;
-	uint64_t prodlohi;
-
-	absin0 = absin1 & UINT32_MAX;
-	/* multiplication */
-	prodhilo = in0hi * absin0;
-	prodlohi = in0lo * in1hi;
-	absin0 *= in0lo;
-
-	in0lo = absin0 + (prodlohi << 32);
-	absin1 = in0lo < absin0 ? 1 : 0;
-	absin0 = in0lo;
-	/* shift and add */
-	in0lo += prodhilo << 32;
-	if (in0lo < absin0)
-		absin1++;
-	/* carry */
-	absin0 = absin1 + in0hi * in1hi + (prodlohi >> 32) + (prodhilo >> 32);
-	/* 2's complement */
-	if (in_0  && in_1  && (in_0 > 0) != (in_1 > 0)) {
-		/* bit inversion */
-		absin0 = ~absin0;
-		in0lo = ~in0lo;
-		/* add to low byte */
-		in0lo++;
-		if (!in0lo)
-			absin0++;
-	}
-	/* pointer- output high and low bytes */
-	*ptroutbitshi = absin0;
-	*ptroutbitslo = in0lo;
-}
+static const uint32_t sofm_exp_3bit_lookup[8] = {
+	2147483648, 2433417774, 2757423586, 3124570271,
+	1302514674, 1475942488, 1672461947, 1895147668
+};
 
-/*
- * Arguments	: int64_t a
- *		  int64_t b
- * Return Type	: int64_t
+/* Taylor polynomial coefficients for x^3..x^6, calculated
+ * uint32(round(1 ./ factorial(3:6) * 2^32))
  */
-static inline int64_t lomul_s64_sr_sat_near(int64_t a, int64_t b)
-{
-	uint64_t u64_rhi;
-	uint64_t u64_rlo;
-
-	mul_s64(a, b, &u64_rhi, &u64_rlo);
-	const bool roundup = (u64_rlo & SOFM_EXP_BIT_MASK_LOW_Q27P5) != 0;
-
-	u64_rlo = (u64_rhi << 36 | u64_rlo >> 28) + (roundup ? 1 : 0);
-	return u64_rlo;
-}
+static const uint32_t sofm_exp_taylor_coeffs[4] = {
+	715827883, 178956971, 35791394, 5965232
+};
 
-/* function f(x) = a^x, x is variable and a is base
+/* function f(x) = e^x
  *
- * Arguments	: int32_t x(Q4.28)
- * input range -5 to 5
+ * Arguments	: int32_t x (Q4.28)
+ * input range -8 to 8
  *
- * Return Type	: int32_t ts(Q9.23)
- * output range 0.0067465305 to 148.4131488800
- *+------------------+-----------------+--------+--------+
- *| x		     | ts (returntype) |   x    |  ts    |
- *+----+-----+-------+----+----+-------+--------+--------+
- *|WLen| FLen|Signbit|WLen|FLen|Signbit| Qformat| Qformat|
- *+----+-----+-------+----+----+-------+--------+--------+
- *| 32 | 28  |  1    | 32 | 23 |   0   | 4.28   | 9.23   |
- *+------------------+-----------------+--------+--------+
+ * Return Type	: int32_t (Q13.19)
+ * output range 3.3546e-04 to 2981.0
  */
-int32_t sofm_exp_int32(int32_t x)
+int32_t sofm_exp_approx(int32_t x)
 {
-	uint64_t ou0Hi;
-	uint64_t ou0Lo;
-	int64_t qt;
-	int32_t b_n;
-	int32_t ts =  SOFM_EXP_TERMS_Q23P9; /* Q23.9 */
-	int64_t dividend = (x + SOFM_EXP_LSHIFT_BITS) >> 14; /* x in Q50.14 */
-	static const int32_t i_emin = -1342177280; /* Q4.28 */
-	static const int32_t o_emin = 56601; /* Q9.23 */
-	static const int32_t i_emax = 1342177280; /* Q4.28 */
-	static const int32_t o_emax = 1244979733; /* Q9.23 */
+	uint32_t taylor_first_2;
+	uint32_t exp_a_b_32bit;
+	uint32_t taylor_extra;
+	uint32_t rnd_one;
+	uint32_t b_f32;
+	uint32_t b_pow;
+	uint32_t exp_a;
+	uint32_t exp_b;
+	uint32_t term;
+	uint32_t b;
+	int32_t x_times_one_over_log2;
+	int32_t e_times_log2;
+	int32_t x_32bit;
+	int32_t y_32bit;
+	int32_t e;
+	int32_t r;
+	int shift_value;
+	int a;
+
+	/* -------------------------------------------------------------------------
+	 * FIRST RANGE REDUCTION ---------------------------------------------------
+	 * -------------------------------------------------------------------------
+	 */
+	x_times_one_over_log2 = Q_MULTSR_32X32((int64_t)x, SOFM_EXP_ONE_OVER_LOG2_Q30, 28, 30, 26);
+	e = Q_SHIFT_RND(x_times_one_over_log2, 26, 0);
 
-	/* Out of range input(x>5, x<-5), */
-	/* return clipped value x > 5= e^5, and x<-5 = e^-5 */
-	if (x < i_emin)
-		return o_emin; /* 0.0067473649978638 in Q9.23 */
+	/* Q6.31, but we only keep the bottom 31 bits */
+	e_times_log2 = (uint32_t)e * SOFM_EXP_LOG2_Q31;
 
-	if (x > i_emax)
-		return o_emax; /* 148.4131494760513306 in Q9.23 */
+	/* -------------------------------------------------------------------------
+	 * SECOND RANGE REDUCTION --------------------------------------------------
+	 * y = a + b
+	 * -------------------------------------------------------------------------
+	 */
+	x_32bit = (int32_t)((uint32_t)x << 3);	/* S4.31 */
+	y_32bit = x_32bit - e_times_log2;	/* S0.31, in ~[-0.34, +0.34] */
+	a = (y_32bit >> 28) & 7;		/* just the 3 top bits of "y" */
+	b = y_32bit & 0x0FFFFFFF;		/* bottom 31-3 = 28 bits. format U-3.31 */
+	exp_a = sofm_exp_3bit_lookup[a];
+	b_f32 = (b << 1) | 0x4;			/* U0.32, align b on 32-bits of fraction */
+
+	/* Taylor approximation : base part + iterations
+	 * Base part      : 1 + b + b^2/2!
+	 * Iterative part : b^3/3! + b^4/4! + b^5/5! + b^6/6!
+	 *                : Term count determined dynamically using e.
+	 *
+	 * Base part: NOTE: delay adding the "1" in "1 + b + b^2/2" until after we
+	 * add the iterative part in. This gives us one more guard bit.
+	 * NOTE: u_int32 x u_int32 => {hi, lo}. We only need {hi} for b_pow.
+	 */
+	b_pow = (uint64_t)b_f32 * b_f32 >> 32;
+	taylor_first_2 = b_f32 + (b_pow >> 1); /* 0.32 */
+	taylor_extra = 0;
+	term = 1;
+	if (e < -10)
+		goto ITER_END;
+
+	b_pow = (uint64_t)b_f32 * b_pow >> 32;
+	term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[0] >> 32;
+	taylor_extra += term;
+	if (e < -5)
+		goto ITER_END;
+
+	b_pow = (uint64_t)b_f32 * b_pow >> 32;
+	term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[1] >> 32;
+	taylor_extra += term;
+	if (e < 0)
+		goto ITER_END;
+
+	b_pow = (uint64_t)b_f32 * b_pow >> 32;
+	term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[2] >> 32;
+	taylor_extra += term;
+	if (e < 6)
+		goto ITER_END;
+
+	b_pow = (uint64_t)b_f32 * b_pow >> 32;
+	term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[3] >> 32;
+	taylor_extra += term;
+
+ITER_END:
+
+	/* Implement rounding to 31 fractional bits.. */
+	taylor_first_2 = taylor_first_2 + taylor_extra + 1;
+
+	/* Add the missing "1" for the Taylor series "1+b+b^2/2+...." */
+	exp_b = ((uint32_t)1 << 31) + (taylor_first_2 >> 1); /* U1.31 */
+
+	/* -------------------------------------------------------------------------
+	 * FIRST RECONSTRUCTION ----------------------------------------------------
+	 * -------------------------------------------------------------------------
+	 */
 
-	/* pre-computation of 1st & 2nd terms */
-	mul_s64(dividend, SOFM_EXP_BIT_MASK_Q62P2, &ou0Hi, &ou0Lo);
-	qt = (ou0Hi << 46) | (ou0Lo >> 18);/* Q6.26 */
-	ts += (int32_t)((qt >> 35) + ((qt & SOFM_EXP_QUOTIENT_SCALE) >> 18));
-	dividend = lomul_s64_sr_sat_near(dividend, x);
-	for (b_n = 0; b_n < ARRAY_SIZE(exp_iv_ilookup); b_n++) {
-		mul_s64(dividend, exp_iv_ilookup[b_n], &ou0Hi, &ou0Lo);
-		qt = (ou0Hi << 45) | (ou0Lo >> 19);
+	/* U1.31 * U1.31 = U2.62 */
+	exp_a_b_32bit = (uint64_t)exp_a * exp_b >> 31;
 
-		/* sum of the remaining terms */
-		ts += (int32_t)((qt >> 35) + ((qt & SOFM_EXP_QUOTIENT_SCALE) ? 1 : 0));
-		dividend = lomul_s64_sr_sat_near(dividend, x);
+	/* -------------------------------------------------------------------------
+	 * SECOND RECONSTRUCTION ---------------------------------------------------
+	 * -------------------------------------------------------------------------
+	 */
 
-		qt  = ABS(qt);
-		/* For inputs between -5 and 5, (qt < SOFM_CONVERG_ERROR) is always true */
-		if (qt < SOFM_CONVERG_ERROR)
-			break;
-	}
-	return ts;
+	/* Rounding to nearest */
+	shift_value = 12 - e;
+	rnd_one = (shift_value > 0 ? (1 << (shift_value - 1)) : 0);
+	exp_a_b_32bit += rnd_one;
+	r = (int32_t)(exp_a_b_32bit >> shift_value);
+	return r;
 }
+EXPORT_SYMBOL(sofm_exp_approx);
 
-/* Fixed point exponent function for approximate range -11.5 .. 7.6
- * that corresponds to decibels range -100 .. +66 dB.
+/* Fixed point exponent function for approximate range -16 .. 7.6
+ * that corresponds to decibels range -120 .. +66 dB.
  *
  * The functions uses rule exp(x) = exp(x/2) * exp(x/2) to reduce
- * the input argument for private small value exp() function that is
- * accurate with input range -2.0 .. +2.0. The number of possible
- * divisions by 2 is computed into variable n. The returned value is
- * exp()^(2^n).
+ * the input argument for the exponent function.
  *
  * Input  is Q5.27, -16.0 .. +16.0, but note the input range limitation
  * Output is Q12.20, 0.0 .. +2048.0
@@ -230,39 +172,24 @@ int32_t sofm_exp_int32(int32_t x)
 
 int32_t sofm_exp_fixed(int32_t x)
 {
-	int32_t xs;
-	int32_t y;
-	int32_t y0;
-	int i;
-	int n = 0;
-
-	if (x < SOFM_EXP_FIXED_INPUT_MIN)
-		return 0;
+	int32_t x0, y0, y1;
 
 	if (x > SOFM_EXP_FIXED_INPUT_MAX)
 		return INT32_MAX;
 
-	/* x is Q5.27 */
-	xs = x;
-	while (xs >= SOFM_EXP_TWO_Q27 || xs <= SOFM_EXP_MINUS_TWO_Q27) {
-		xs >>= 1;
-		n++;
+	if (x < SOFM_EXP_FIXED_INPUT_MINUS8 || x > SOFM_EXP_FIXED_INPUT_PLUS8) {
+		/* Divide by 2, convert Q27 to Q28 is x as such */
+		y0 = sofm_exp_approx(x);
+		y1 = Q_MULTSR_32X32((int64_t)y0, y0, 19, 19, 20);
+		return y1;
 	}
 
-	/* sofm_exp_int32() input is Q4.28, while x1 is Q5.27
-	 * sofm_exp_int32() output is Q9.23, while y0 is Q12.20
-	 */
-	y0 = Q_SHIFT_RND(sofm_exp_int32(Q_SHIFT_LEFT(xs, 27, 28)), 23, 20);
-	y = SOFM_EXP_ONE_Q20;
-	for (i = 0; i < (1 << n); i++)
-		y = (int32_t)Q_MULTSR_32X32((int64_t)y, y0, 20, 20, 20);
-
-	return y;
+	x0 = sat_int32((int64_t)x << 1);
+	y0 = sofm_exp_approx(x0);
+	return sat_int32((int64_t)y0 << 1);
 }
 EXPORT_SYMBOL(sofm_exp_fixed);
 
-#endif /* EXPONENTIAL_GENERIC */
-
 /* Decibels to linear conversion: The function uses exp() to calculate
  * the linear value. The argument is multiplied by log(10)/20 to
  * calculate equivalent of 10^(db/20).
@@ -279,11 +206,13 @@ int32_t sofm_db2lin_fixed(int32_t db)
 {
 	int32_t arg;
 
-	if (db < SOFM_EXP_MINUS_100_Q24)
-		return 0;
+	if (db > SOFM_DB2LIN_INPUT_MAX)
+		return INT32_MAX;
 
 	/* Q8.24 x Q5.27, result needs to be Q5.27 */
 	arg = (int32_t)Q_MULTSR_32X32((int64_t)db, SOFM_EXP_LOG10_DIV20_Q27, 24, 27, 27);
 	return sofm_exp_fixed(arg);
 }
 EXPORT_SYMBOL(sofm_db2lin_fixed);
+
+#endif /* EXPONENTIAL_GENERIC */
diff --git a/src/math/exp_fcn_hifi.c b/src/math/exp_fcn_hifi.c
index d41071335fd9..e1722d602110 100644
--- a/src/math/exp_fcn_hifi.c
+++ b/src/math/exp_fcn_hifi.c
@@ -1,20 +1,16 @@
 // SPDX-License-Identifier: BSD-3-Clause
 /*
- *Copyright(c) 2023 Intel Corporation. All rights reserved.
+ *Copyright(c) 2023-2025 Intel Corporation.
  *
  * Author: Shriram Shastry <malladi.sastry@linux.intel.com>
- *
+ *         Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
  */
 
+#include <sof/audio/format.h>
 #include <sof/math/exp_fcn.h>
 #include <sof/common.h>
 #include <rtos/symbol.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
 #include <stdint.h>
-#include <stddef.h>
-#include <errno.h>
 
 #if defined(SOFM_EXPONENTIAL_HIFI3) || defined(SOFM_EXPONENTIAL_HIFI4) || \
 	    defined(SOFM_EXPONENTIAL_HIFI5)
@@ -27,330 +23,213 @@
 #include <xtensa/tie/xt_hifi3.h>
 #endif
 
-#include <xtensa/tie/xt_hifi2.h>
-#include <xtensa/tie/xt_FP.h>
-
-#define SOFM_CONVERG_ERROR 28823037624320LL /* error smaller than 1e-4,1/2 ^ -44.7122876209085 */
+#define SOFM_EXP_ONE_OVER_LOG2_Q30	 1549082005	/* Q2.30 int32(round(1/log(2) * 2^30)) */
+#define SOFM_EXP_LOG2_Q31		 1488522236	/* Q1.31 int32(round(log(2) * 2^31)) */
+#define SOFM_EXP_FIXED_INPUT_MINUS8	-1073741824	/* Q5.27 int32(-8 * 2^27) */
+#define SOFM_EXP_FIXED_INPUT_PLUS8	 1073741823	/* Q5.27 int32(8 * 2^27) */
+#define SOFM_EXP_LOG10_DIV20_Q27	 15452387	/* Q5.27 int32(round(log(10)/20*2^27)) */
 
-/*
- * Arguments	: int64_t in_0
- *		  int64_t in_1
- *		  uint64_t *ptroutbitshi
- *		  uint64_t *ptroutbitslo
- * Return Type	: void
- * Description:Perform element-wise multiplication on in_0 and in_1
- * while keeping the required product word length and fractional
- * length in mind. mul_s64 function divide the 64-bit quantities
- * into two 32-bit words,multiply the low words to produce the
- * lowest and second-lowest words in the result, then both pairs
- * of low and high words from different numbers to produce the
- * second and third lowest words in the result, and finally both
- * high words to produce the two highest words in the outcome.
- * Add them all up, taking carry into consideration.
- *
- * The 64 x 64 bit multiplication of operands in_0 and in_1 is
- * shown in the image below. The 64-bit operand in_0,in_1 is
- * represented by the notation in0_H, in1_H for the top 32 bits
- * and in0_L, in1_L for the bottom 32 bits.
- *
- *				in0_H : in0_L
- *			x	in1_H : in1_L
- *			---------------------
- *	P0			in0_L x in1_L
- *	P1		in0_H x in1_L		64 bit inner multiplication
- *	P2		in0_L x in1_H		64 bit inner multiplication
- *	P3	in0_H x in1_H
- *			--------------------
- *			[64 x 64 bit multiplication] sum of inner products
- * All combinations are multiplied by one another and then added.
- * Each inner product is moved into its proper power location.given the names
- * of the inner products, redoing the addition where 000 represents 32 zero
- * bits.The inner products can be added together in 64 bit addition.The sum
- * of two 64-bit numbers yields a 65-bit output.
- *		   (P0H:P0L)
- *		P1H(P1L:000)
- *		P2H(P2L:000)
- *	P3H:P3L(000:000)
- *	.......(aaa:P0L)
- * By combining P0H:P0L and P1L:000. This can lead to a carry, denote as CRY0.
- * The partial result is then multiplied by P2L:000.
- * We call it CRY1 because it has the potential to carry again.
- *	(CRY0 + CRY1)P0H:P0L
- *	(	 P1H)P1L:000
- *	(	 P2H)P2L:000
- *	(P3H:	 P3L)000:000
- *	--------------------
- *	(ccc:bbb)aaa:P0L
- * P1H, P2H, and P3H:P3L are added to the carry CRY0 + CRY1.This increase will
- * not result in an overflow.
+/* The table contains exponents of value v, where values of v
+ * are the 3 bit 2's complement signed values presented by bits
+ * of index 0..7.
  *
+ * v = [(0:3)/8 (-4:-1)/8];
+ * uint32(round(exp(v) * 2^31))
  */
-static void mul_s64(ae_int64 in_0, ae_int64 in_1, ae_int64 *__restrict__ ptroutbitshi,
-		    ae_int64 *__restrict__ ptroutbitslo)
-{
-	ae_int64 producthihi, producthilo, productlolo;
-	ae_int64 producthi, product_hl_lh_h, product_hl_lh_l, carry;
-
-#if (SOFM_EXPONENTIAL_HIFI4 == 1 || SOFM_EXPONENTIAL_HIFI5 == 1)
-
-	ae_int32x2 in0_32 = AE_MOVINT32X2_FROMINT64(in_0);
-	ae_int32x2 in1_32 = AE_MOVINT32X2_FROMINT64(in_1);
-
-	ae_ep ep_lolo = AE_MOVEA(0);
-	ae_ep ep_hilo = AE_MOVEA(0);
-	ae_ep ep_HL_LH = AE_MOVEA(0);
-
-	producthihi = AE_MUL32_HH(in0_32, in1_32);
-
-	/* AE_MULZAAD32USEP.HL.LH - Unsigned lower parts and signed higher 32-bit parts dual */
-	/* multiply and accumulate operation on two 32x2 operands with 72-bit output */
-	/* Input-32x32-bit(in1_32xin0_32)into 72-bit multiplication operations */
-	/* Output-lower 64 bits of the result are stored in producthilo */
-	/* Output-upper eight bits are stored in ep_hilo */
-	AE_MULZAAD32USEP_HL_LH(ep_hilo, producthilo, in1_32, in0_32);
-	productlolo = AE_MUL32U_LL(in0_32, in1_32);
-
-	product_hl_lh_h = AE_SRAI72(ep_hilo, producthilo, 32);
-	product_hl_lh_l = AE_SLAI64(producthilo, 32);
-
-	/* The AE_ADD72 procedure adds two 72-bit elements. The first 72-bit value is created */
-	/* by concatenating the MSBs and LSBs of operands ep[7:0] and d[63:0]. Similarly, the */
-	/* second value is created by concatenating bits from operands ep1[7:0] and d1[63:0]. */
-	AE_ADD72(ep_lolo, productlolo, ep_HL_LH, product_hl_lh_l);
-
-	carry = AE_SRAI72(ep_lolo, productlolo, 32);
-
-	carry = AE_SRLI64(carry, 32);
-	producthi = AE_ADD64(producthihi, carry);
-
-	producthi = AE_ADD64(producthi, product_hl_lh_h);
-	*ptroutbitslo = productlolo;
-#elif SOFM_EXPONENTIAL_HIFI3 == 1
-
-	ae_int64 producthi_1c;
-	ae_int64 producthi_2c;
-	ae_int64 productlo_2c;
-	ae_int64 productlo;
-
-	ae_int64 s0 = AE_SRLI64(in_0, 63);
-	ae_int64 s1 = AE_SRLI64(in_1, 63);
-	bool x_or = (bool)((int)(int64_t)s0 ^ (int)(int64_t)s1);
-
-	ae_int32x2 in0_32 = AE_MOVINT32X2_FROMINT64(AE_ABS64(in_0));
-	ae_int32x2 in1_32 = AE_MOVINT32X2_FROMINT64(AE_ABS64(in_1));
-
-	producthihi = AE_MUL32_HH(in0_32, in1_32);
-	producthilo = AE_MUL32U_LL(in1_32, AE_MOVINT32X2_FROMINT64
-				  ((AE_MOVINT64_FROMINT32X2(in0_32) >> 32)));
-	producthilo += AE_MUL32U_LL(AE_MOVINT32X2_FROMINT64
-				  ((AE_MOVINT64_FROMINT32X2(in1_32) >> 32)), in0_32);
-	productlolo = AE_MUL32U_LL(in0_32, in1_32);
-
-	product_hl_lh_h = AE_SRAI64(producthilo, 32);
-	product_hl_lh_l = AE_SLAI64(producthilo, 32);
-
-	productlo = AE_ADD64(productlolo, product_hl_lh_l);
-	producthi = AE_ADD64(producthihi, product_hl_lh_h);
-
-	carry = AE_ADD64(AE_SRLI64(productlolo, 1), AE_SRLI64(product_hl_lh_l, 1));
-	carry = AE_SRLI64(carry, 63);
-	producthi = AE_ADD64(producthi, carry);
-
-	producthi_1c = AE_NOT64(producthi);
-	producthi_2c = AE_NEG64(producthi);
-	productlo_2c = AE_NEG64(productlo);
-
-	if (x_or) {
-		if (productlo == (ae_int64)0) {
-			producthi = producthi_2c;
-		} else {
-			producthi = producthi_1c;
-			productlo = productlo_2c;
-		}
-	}
-	*ptroutbitslo = productlo;
-#endif //(XCHAL_HAVE_HIFI4 || XCHAL_HAVE_HIFI5)
-
-	*ptroutbitshi = producthi;
-}
+static const uint32_t sofm_exp_3bit_lookup[8] = {
+	2147483648, 2433417774, 2757423586, 3124570271,
+	1302514674, 1475942488, 1672461947, 1895147668
+};
 
-/*
- * Arguments	: int64_t a
- *		  int64_t b
- * Return Type	: int64_t
+/* Taylor polynomial coefficients for x^3..x^6, calculated
+ * uint32(round(1 ./ factorial(3:6) * 2^32))
  */
-static int64_t lomul_s64_sr_sat_near(int64_t a, int64_t b)
-{
-	ae_int64 u64_chi;
-	ae_int64 u64_clo;
-	ae_int64 temp;
-
-	mul_s64(a, b, &u64_chi, &u64_clo);
-
-	ae_int64 roundup = AE_AND64(u64_clo, SOFM_EXP_BIT_MASK_LOW_Q27P5);
-
-	roundup = AE_SRLI64(roundup, 27);
-	temp = AE_OR64(AE_SLAI64(u64_chi, 36), AE_SRLI64(u64_clo, 28));
-
-	return AE_ADD64(temp, roundup);
-}
-
-static const int64_t onebyfact_Q63[19] = {
-		4611686018427387904LL,
-		1537228672809129301LL,
-		384307168202282325LL,
-		76861433640456465LL,
-		12810238940076077LL,
-		1830034134296582LL,
-		228754266787072LL,
-		25417140754119LL,
-		2541714075411LL,
-		231064915946LL,
-		19255409662LL,
-		1481185358LL,
-		105798954LL,
-		7053264LL,
-		440829LL,
-		25931LL,
-		1441LL,
-		76LL,
-		4LL
+static const uint32_t sofm_exp_taylor_coeffs[4] = {
+	715827883, 178956971, 35791394, 5965232
 };
 
-/* f(x) = a^x, x is variable and a is base
+/* function f(x) = e^x
  *
- * Arguments	: int32_t x(Q4.28)
- * input range -5 to 5
+ * Arguments	: int32_t x (Q4.28)
+ * input range -8 to 8
  *
- * Return Type	: int32_t ts(Q9.23)
- * output range 0.0067465305 to 148.4131488800
- *+------------------+-----------------+--------+--------+
- *| x		     | ts (returntype) |   x	|  ts	 |
- *+----+-----+-------+----+----+-------+--------+--------+
- *|WLen| FLen|Signbit|WLen|FLen|Signbit| Qformat| Qformat|
- *+----+-----+-------+----+----+-------+--------+--------+
- *| 32 | 28  |	1    | 32 | 23 |   0   | 4.28	| 9.23	 |
- *+------------------+-----------------+--------+--------+
+ * Return Type	: int32_t (Q13.19)
+ * output range 3.3546e-04 to 2981.0
  */
-int32_t sofm_exp_int32(int32_t x)
+int32_t sofm_exp_approx(int32_t x)
 {
-	ae_int64 outhi;
-	ae_int64 outlo;
-	ae_int64 qt;
-	ae_int64 onebyfact;
-	ae_int64 temp;
-
-	ae_int64 *ponebyfact_Q63 = (ae_int64 *)onebyfact_Q63;
-	ae_int64 ts = SOFM_EXP_TERMS_Q23P9;
-	ae_int64 mp = (x + SOFM_EXP_LSHIFT_BITS) >> 14; /* x in Q50.14 */;
-	xtbool flag;
-	int64_t b_n;
-
-	mul_s64(mp, SOFM_EXP_BIT_MASK_Q62P2, &outhi, &outlo);
-	qt = AE_OR64(AE_SLAI64(outhi, 46), AE_SRLI64(outlo, 18));
-
-	temp = AE_SRAI64(AE_ADD64(qt, SOFM_EXP_QUOTIENT_SCALE), 35);
-
-	ts = AE_ADD64(ts, temp);
-
-	mp = lomul_s64_sr_sat_near(mp, (int64_t)x);
-
-	for (b_n = 0; b_n < 64;) {
-		AE_L64_IP(onebyfact, ponebyfact_Q63, 8);
+	ae_int64 p;
+	uint32_t taylor_first_2;
+	uint32_t taylor_extra;
+	uint32_t b_f32;
+	uint32_t b_pow;
+	uint32_t exp_a;
+	uint32_t exp_b;
+	uint32_t term;
+	uint32_t b;
+	int32_t x_times_one_over_log2;
+	int32_t e_times_log2;
+	int32_t x_32bit;
+	int32_t y_32bit;
+	int32_t e;
+	int32_t r;
+	int a;
+
+	//x = 843314857;
+
+	/* -------------------------------------------------------------------------
+	 * FIRST RANGE REDUCTION ---------------------------------------------------
+	 * -------------------------------------------------------------------------
+	 * Multiply gives q28 * q30 -> q58, without shift the rounded value
+	 * would be q42 (58 - 16). For q26 result, shift right by 16 (42 - 26).
+	 */
+	p = AE_MUL32_LL(x, SOFM_EXP_ONE_OVER_LOG2_Q30);
+	x_times_one_over_log2 = AE_ROUND32F48SASYM(AE_SRAI64(p, 16));
 
-		mul_s64(mp, onebyfact, &outhi, &outlo);
-		qt = AE_OR64(AE_SLAI64(outhi, 45), AE_SRLI64(outlo, 19));
+	/* Shift, round to q0 */
+	e = AE_SRAI32R(x_times_one_over_log2, 26);
 
-		temp = AE_SRAI64(AE_ADD64(qt, SOFM_EXP_QUOTIENT_SCALE), 35);
-		ts = AE_ADD64(ts, temp);
 
-		mp = lomul_s64_sr_sat_near(mp, (int64_t)x);
+	/* Q6.31, but we only keep the bottom 31 bits */
+	e_times_log2 = (uint32_t)e * SOFM_EXP_LOG2_Q31;
 
-		const ae_int64 sign = AE_NEG64(qt);
+	/* -------------------------------------------------------------------------
+	 * SECOND RANGE REDUCTION --------------------------------------------------
+	 * y = a + b
+	 * -------------------------------------------------------------------------
+	 */
+	x_32bit = AE_SLAI32(x, 3);		/* S4.31, overflow to S1.31 */
+	y_32bit = x_32bit - e_times_log2;	/* S0.31, in ~[-0.34, +0.34] */
+	a = (y_32bit >> 28) & 7;		/* just the 3 top bits of "y" */
+	b = y_32bit & 0x0FFFFFFF;		/* bottom 31-3 = 28 bits. format U-3.31 */
+	exp_a = sofm_exp_3bit_lookup[a];
+	b_f32 = (b << 1) | 0x4;			/* U0.32, align b on 32-bits of fraction */
+
+	/* Taylor approximation : base part + iterations
+	 * Base part      : 1 + b + b^2/2!
+	 * Iterative part : b^3/3! + b^4/4! + b^5/5! + b^6/6!
+	 *                : Term count determined dynamically using e.
+	 *
+	 * Base part: NOTE: delay adding the "1" in "1 + b + b^2/2" until after we
+	 * add the iterative part in. This gives us one more guard bit.
+	 * NOTE: u_int32 x u_int32 => {hi, lo}. We only need {hi} for b_pow.
+	 */
+	b_pow = (uint64_t)b_f32 * b_f32 >> 32;
+	taylor_first_2 = b_f32 + (b_pow >> 1); /* 0.32 */
+	taylor_extra = 0;
+	term = 1;
+	if (e < -10)
+		goto ITER_END;
+
+	b_pow = (uint64_t)b_f32 * b_pow >> 32;
+	term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[0] >> 32;
+	taylor_extra += term;
+	if (e < -5)
+		goto ITER_END;
+
+	b_pow = (uint64_t)b_f32 * b_pow >> 32;
+	term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[1] >> 32;
+	taylor_extra += term;
+	if (e < 0)
+		goto ITER_END;
+
+	b_pow = (uint64_t)b_f32 * b_pow >> 32;
+	term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[2] >> 32;
+	taylor_extra += term;
+	if (e < 6)
+		goto ITER_END;
+
+	b_pow = (uint64_t)b_f32 * b_pow >> 32;
+	term = (uint64_t)b_pow * sofm_exp_taylor_coeffs[3] >> 32;
+	taylor_extra += term;
+
+ITER_END:
+
+	/* Implement rounding to 31 fractional bits.. */
+	taylor_first_2 = taylor_first_2 + taylor_extra + 1;
+
+	/* Add the missing "1" for the Taylor series "1+b+b^2/2+...." */
+	exp_b = ((uint32_t)1 << 31) + (taylor_first_2 >> 1); /* U1.31 */
+
+	/* -------------------------------------------------------------------------
+	 * FIRST RECONSTRUCTION ----------------------------------------------------
+	 * -------------------------------------------------------------------------
+	 */
 
-		flag = AE_LT64(qt, 0);
-		AE_MOVT64(qt, sign, flag);
+	/* U1.31 * U1.31 = U2.62 */
+	p = (ae_int64)((uint64_t)exp_a * exp_b);
 
-		if (!(qt < (ae_int64)SOFM_CONVERG_ERROR))
-			b_n++;
-		else
-			b_n = 64;
-	}
+	/* -------------------------------------------------------------------------
+	 * SECOND RECONSTRUCTION ---------------------------------------------------
+	 * -------------------------------------------------------------------------
+	 */
 
-	return AE_MOVAD32_L(AE_MOVINT32X2_FROMINT64(ts));
+	/* Rounding to nearest,
+	 * using f48 round with shift value for right shift is negative
+	 * q62 to q31 shift right is -31, for round instruction shift left is +16,
+	 * compensate shift e by right shift is -12: 16 - 31 - 12 = -27
+	 */
+	p = AE_SLAA64(p, e - 27);
+	r = AE_ROUND32F48SASYM(p);
+	return r;
 }
 
-/* Fixed point exponent function for approximate range -11.5 .. 7.6
- * that corresponds to decibels range -100 .. +66 dB.
+/* Fixed point exponent function for approximate range -16 .. 7.6246.
  *
  * The functions uses rule exp(x) = exp(x/2) * exp(x/2) to reduce
- * the input argument for private small value exp() function that is
- * accurate with input range -2.0 .. +2.0. The number of possible
- * divisions by 2 is computed into variable n. The returned value is
- * exp()^(2^n).
+ * the input argument for function sofm_exp_approx() with input
+ * range from -8 to +8.
  *
  * Input  is Q5.27, -16.0 .. +16.0, but note the input range limitation
  * Output is Q12.20, 0.0 .. +2048.0
  */
-
 int32_t sofm_exp_fixed(int32_t x)
 {
-	ae_f64 p;
-	ae_int32 y0;
-	ae_int32 y;
-	ae_int32 xs;
-	int32_t n;
-	int shift;
-	int i;
-
-	if (x < SOFM_EXP_FIXED_INPUT_MIN)
-		return 0;
+	int32_t x0, y0, y1;
 
 	if (x > SOFM_EXP_FIXED_INPUT_MAX)
 		return INT32_MAX;
 
-	/* This returns number of right shifts needed to scale value x to |x| < 2.
-	 * The behavior differs slightly for positive and negative values but it
-	 * is not problem for sofm_exp_int32() function. E.g.
-	 *
-	 * x =  268435455 (1.9999999925), shift = 0
-	 * x =  268435456 (2.0000000000), shift = 1
-	 * x =  268435457 (2.0000000075), shift = 1
-	 *
-	 * x = -268435457 (-2.0000000075), shift = 1
-	 * x = -268435456 (-2.0000000000), shift = 0
-	 * x = -268435455 (-1.9999999925), shift = 0
-	 *
-	 * If the shift is zero, just return result from sofm_exp_int32() with
-	 * input Q format and output Q format adjusts.
-	 */
-	shift = (int)AE_MAX32(0, 3 - AE_NSAZ32_L(x));
-	if (!shift)
-		return AE_SRAI32R(sofm_exp_int32(AE_MOVAD32_L(AE_SLAI32S(x, 1))), 3);
+	/* No need to check for > 8, the input max is lower, about 7.6 */
+	if (x < SOFM_EXP_FIXED_INPUT_MINUS8) {
+		/* Divide by 2, convert Q27 to Q28 is x as such */
+		y0 = sofm_exp_approx(x);
+		/* Multiply gives q19 * q19 -> q38, without shift the rounded value
+		 * would be q22 (38 - 16). For q20 shift right by 2.
+		 */
+		y1 = AE_ROUND32F48SASYM(AE_SRAI64(AE_MUL32_LL(y0, y0), 2));
+		return y1;
+	}
 
-	/* Shifting x one less right to save an additional Q27 to Q28 conversion
-	 * shift for sofm_exp_int32()
-	 */
-	n = 1 << shift;
-	xs = AE_SRAA32RS(x, shift - 1);
+	x0 = AE_SLAI32S(x, 1);
+	y0 = sofm_exp_approx(x0);
+	return AE_SLAI32S(y0, 1);
+}
+EXPORT_SYMBOL(sofm_exp_fixed);
 
-	/* sofm_exp_int32() input is Q4.28, while x1 is Q5.27
-	 * sofm_exp_int32() output is Q9.23, while y0 is Q12.20
-	 */
-	y0 = AE_SRAI32R(sofm_exp_int32(xs), 3);
-	y = SOFM_EXP_ONE_Q20;
+/* Decibels to linear conversion: The function uses exp() to calculate
+ * the linear value. The argument is multiplied by log(10)/20 to
+ * calculate equivalent of 10^(db/20).
+ *
+ * The error in conversion is less than 0.1 dB for -89..+66 dB range. Do not
+ * use the code for argument less than -100 dB. The code simply returns zero
+ * as linear value for such very small value.
+ *
+ * Input is Q8.24 (max 128.0)
+ * output is Q12.20 (max 2048.0)
+ */
 
-	/* AE multiply returns Q41 from Q20 * Q20. To get Q20 it need to be
-	 * shifted right by 21. Since the used round instruction is aligned
-	 * to the high 32 bits it is shifted instead left by 32 - 21 = 11:
-	 */
-	for (i = 0; i < n; i++) {
-		p = AE_SLAI64S(AE_MULF32S_LL(y, y0), 11);
-		y = AE_ROUND32F64SASYM(p);
-	}
+int32_t sofm_db2lin_fixed(int32_t db)
+{
+	int32_t arg;
+
+	if (db > SOFM_DB2LIN_INPUT_MAX)
+		return INT32_MAX;
 
-	return y;
+	/* Q8.24 x Q5.27, result needs to be Q5.27 */
+	arg = (int32_t)Q_MULTSR_32X32((int64_t)db, SOFM_EXP_LOG10_DIV20_Q27, 24, 27, 27);
+	return sofm_exp_fixed(arg);
 }
-EXPORT_SYMBOL(sofm_exp_fixed);
+EXPORT_SYMBOL(sofm_db2lin_fixed);
 
 #endif
diff --git a/test/cmocka/src/math/arithmetic/exponential.c b/test/cmocka/src/math/arithmetic/exponential.c
index f35299fd3f1b..5c378c971809 100644
--- a/test/cmocka/src/math/arithmetic/exponential.c
+++ b/test/cmocka/src/math/arithmetic/exponential.c
@@ -1,8 +1,9 @@
 // SPDX-License-Identifier: BSD-3-Clause
 /*
- * Copyright(c) 2022 Intel Corporation. All rights reserved.
+ * Copyright(c) 2022-2025 Intel Corporation.
  *
  * Author: Shriram Shastry <malladi.sastry@linux.intel.com>
+ *         Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
  */
 
 #include <stdbool.h>
@@ -22,110 +23,143 @@
 #include <rtos/string.h>
 #include <sof/common.h>
 
-#define ULP_TOLERANCE 5.60032793
-#define ULP_SCALE 0.0000999999999749
+#define ULP_TOLERANCE 2.0e-6
+#define ULP_SCALE 1.0
 #define NUMTESTSAMPLES 256
 
-#define NUMTESTSAMPLES_MIDDLE_TEST2 100
-#define NUMTESTSAMPLES_SMALL_TEST2 100
-#define NUMTESTSAMPLES_LARGE_TEST2 100
-#define ABS_DELTA_TOLERANCE_TEST1 1.2e-2
-#define REL_DELTA_TOLERANCE_TEST1 2.8e-4
-#define ABS_DELTA_TOLERANCE_TEST2 1.7e-6
-#define REL_DELTA_TOLERANCE_TEST2 3.7e-2
-#define ABS_DELTA_TOLERANCE_TEST3 1.2e-1
-#define REL_DELTA_TOLERANCE_TEST3 7.7e-5
+#define NUMTESTSAMPLES_TEST2 100
+#define ABS_DELTA_TOLERANCE_TEST2 2.0e-6
+#define REL_DELTA_TOLERANCE_TEST2 1000.0 /* rel. error is large with values near zero */
+#define NUMTESTSAMPLES_TEST3 100
+#define ABS_DELTA_TOLERANCE_TEST3 2.0e-6
+#define REL_DELTA_TOLERANCE_TEST3 10.0e-2
 #define SOFM_EXP_FIXED_ARG_MIN -11.5
 #define SOFM_EXP_FIXED_ARG_MAX 7.6245
 
-/* testvector in Q4.28, -5 to +5 */
-/*
- * Arguments	:double a
- *		 double b
- *		 double *r
- *		 int32_t *b_i
- * Return Type	:void
+#define NUMTESTSAMPLES_TEST4 100
+#define ABS_DELTA_TOLERANCE_TEST4 2.5e-5
+#define REL_DELTA_TOLERANCE_TEST4 1000.0 /* rel. error is large with values near zero */
+
+/**
+ * Saturates input to 32 bits
+ * @param	x	Input value
+ * @return		Saturated output value
+ */
+static int32_t saturate32(int64_t x)
+{
+	if (x < INT32_MIN)
+		return INT32_MIN;
+	else if (x > INT32_MAX)
+		return INT32_MAX;
+
+	return x;
+}
+
+/**
+ * Generates linearly spaced values for a vector with end points and number points in
+ * desired fractional Q-format for 32 bit integer. If the test values exceed int32_t
+ * range, the values are saturated to INT32_MIN to INT32_MAX range.
+ *
+ * @param a		First value of test vector
+ * @param b		Last value of test vector
+ * @param step_count	Number of values in vector
+ * @param point		Calculate n-th point of vector 0 .. step_count - 1
+ * @param qformat	Number of fractional bits y in Qx.y format
+ * @param fout		Pointer to calculated test vector value, double
+ * @param iout		Pointer to calculated test vector value, int32_t
  */
-static void gen_exp_testvector(double a, double b, double *r, int32_t *b_i)
+static void gen_testvector_linspace_int32(double a, double b, int step_count, int point,
+					  int qformat, double *fout, int32_t *iout)
 {
-	double a2;
-	double b2;
-
-	*r = a + rand() % (int32_t)(b - a + 1);
-	a2 = *r * 268435456;
-	b2 = fabs(a2);
-	if (b2 < 4503599627370496LL)
-		a2 = floor(a2 + 0.5);
-
-	if (a2 < 2147483648LL)
-		*b_i = (a2 >= -2147483648LL) ? a2 : INT32_MIN;
-	else
-		*b_i = INT32_MAX;
+	double fstep = (b - a) / (step_count - 1);
+	double fvalue = a + fstep * point;
+	int64_t itmp;
+
+	itmp = (int64_t)round(fvalue * (double)(1 << qformat));
+	*iout = saturate32(itmp);
+	*fout = (double)*iout / (1 << qformat);
+	return;
 }
 
-static void test_function_sofm_exp_int32(void **state)
+/**
+ * Test for sofm_exp_approx()
+ * @param state		Cmocka internal state
+ */
+static void test_function_sofm_exp_approx(void **state)
 {
 	(void)state;
 
 	int32_t accum;
 	int i;
 	double a_i;
-	double max_ulp;
-	double a_tmp = -5.0123456789;
-	double b_tmp =  5.0123456789;
-	double r;
+	double max_ulp = 0;
+	double ulp;
+	double a_tmp = -8;
+	double b_tmp =  8;
 	int32_t b_i;
 
-	srand((unsigned int)time(NULL));
 	for (i = 0; i < NUMTESTSAMPLES; i++) {
-		gen_exp_testvector(a_tmp, b_tmp, &r, &b_i);
-		a_i = (double)b_i / (1 << 28);
+		gen_testvector_linspace_int32(a_tmp, b_tmp, NUMTESTSAMPLES, i, 28, &a_i, &b_i);
+		accum = sofm_exp_approx(b_i);
+		ulp = fabs(exp(a_i) - (double)accum / (1 << 19)) / ULP_SCALE;
+		if (ulp > max_ulp)
+			max_ulp = ulp;
 
-		accum = sofm_exp_int32(b_i);
-		max_ulp = fabs(exp(a_i) - (double)accum / (1 << 23)) / ULP_SCALE;
-
-		if (max_ulp > ULP_TOLERANCE) {
+		if (ulp > ULP_TOLERANCE) {
 			printf("%s: ULP for %.16f: value = %.16f, Exp = %.16f\n", __func__,
-			       max_ulp, (double)b_i / (1 << 28), (double)accum / (1 << 23));
-			assert_true(max_ulp <= ULP_TOLERANCE);
+			       ulp, (double)b_i / (1 << 28), (double)accum / (1 << 19));
+			assert_true(false);
 		}
 	}
+	printf("%s: Worst-case ULP: %g ULP_SCALE %g\n", __func__, max_ulp, ULP_SCALE);
 }
 
-static void gen_exp_testvector_linspace_q27(double a, double b, int step_count,
-					    int point, int32_t *value)
-{
-	double fstep = (b - a) / (step_count - 1);
-	double fvalue = a + fstep * point;
-
-	*value = (int32_t)round(fvalue * 134217728.0);
-}
 
-static double ref_exp(double value)
+/**
+ * Calculate reference exponent value
+ * @param x		Input value
+ * @param qformat	Fractional bits y in Qx.y format
+ * @return		Saturated exponent value to match fractional format
+ */
+static double ref_exp(double x, int qformat)
 {
-	if (value < SOFM_EXP_FIXED_ARG_MIN)
-		return 0.0;
-	else if (value > SOFM_EXP_FIXED_ARG_MAX)
-		return 2048.0;
-	else
-		return exp(value);
+	double yf;
+	int64_t yi;
+
+	yf = exp(x);
+	yi = yf * (1 << qformat);
+	if (yi > INT32_MAX)
+		yi = INT32_MAX;
+	else if (yi < INT32_MIN)
+		yi = INT32_MIN;
+
+	yf = (double)yi / (1 << qformat);
+	return yf;
 }
 
+/**
+ * Calculates test exponent function and compares result to reference exponent.
+ * @param ivalue		Fractional format input value Q5.27
+ * @param iexp_value		Fractional format output value Q12.20
+ * @param abs_delta_max		Calculated absolute error
+ * @param rel_delta_max		Calculated relative error
+ * @param abs_delta_tolerance	Tolerance for absolute error
+ * @param rel_delta_tolerance	Tolerance for relative error
+ */
 static void test_exp_with_input_value(int32_t ivalue, int32_t *iexp_value,
 				      double *abs_delta_max, double *rel_delta_max,
 				      double abs_delta_tolerance, double rel_delta_tolerance)
 {
 	double fvalue, fexp_value, ref_exp_value;
 	double rel_delta, abs_delta;
+	double eps = 1e-9;
 
 	*iexp_value = sofm_exp_fixed(ivalue);
 	fvalue = (double)ivalue / (1 << 27); /* Q5.27 */
 	fexp_value = (double)*iexp_value / (1 << 20); /* Q12.20 */
-	/* printf("val = %10.6f, exp = %12.6f\n", fvalue, fexp_value); */
-
-	ref_exp_value = ref_exp(fvalue);
+	ref_exp_value = ref_exp(fvalue, 20);
 	abs_delta = fabs(ref_exp_value - fexp_value);
-	rel_delta = abs_delta / ref_exp_value;
+	rel_delta = abs_delta / (ref_exp_value + eps);
 	if (abs_delta > *abs_delta_max)
 		*abs_delta_max = abs_delta;
 
@@ -145,54 +179,110 @@ static void test_exp_with_input_value(int32_t ivalue, int32_t *iexp_value,
 	}
 }
 
+/**
+ * Test for sofm_exp_fixed()
+ * @param state		Cmocka internal state
+ */
 static void test_function_sofm_exp_fixed(void **state)
 {
 	(void)state;
-
 	double rel_delta_max, abs_delta_max;
+	double tmp;
 	int32_t ivalue, iexp_value;
 	int i;
 
+	/* Test max int32_t range with coarse grid */
 	rel_delta_max = 0;
 	abs_delta_max = 0;
-	for (i = 0; i < NUMTESTSAMPLES_MIDDLE_TEST2; i++) {
-		gen_exp_testvector_linspace_q27(-5, 5, NUMTESTSAMPLES_MIDDLE_TEST2, i, &ivalue);
+	for (i = 0; i < NUMTESTSAMPLES_TEST2; i++) {
+		gen_testvector_linspace_int32(-16, 16, NUMTESTSAMPLES_TEST2, i, 27, &tmp, &ivalue);
 		test_exp_with_input_value(ivalue, &iexp_value, &abs_delta_max, &rel_delta_max,
-					  ABS_DELTA_TOLERANCE_TEST1, REL_DELTA_TOLERANCE_TEST1);
-
+					  ABS_DELTA_TOLERANCE_TEST2, REL_DELTA_TOLERANCE_TEST2);
 	}
 
-	printf("%s: Absolute max error was %.6e (middle).\n", __func__, abs_delta_max);
-	printf("%s: Relative max error was %.6e (middle).\n", __func__, rel_delta_max);
+	printf("%s: Absolute max error was %.6e (max range).\n", __func__, abs_delta_max);
+	printf("%s: Relative max error was %.6e (max range).\n", __func__, rel_delta_max);
 
+	/* Test max int32_t middle range with fine grid */
 	rel_delta_max = 0;
 	abs_delta_max = 0;
-	for (i = 0; i < NUMTESTSAMPLES_SMALL_TEST2; i++) {
-		gen_exp_testvector_linspace_q27(-16, -5, NUMTESTSAMPLES_SMALL_TEST2, i, &ivalue);
+	for (i = 0; i < NUMTESTSAMPLES_TEST3; i++) {
+		gen_testvector_linspace_int32(SOFM_EXP_FIXED_ARG_MIN, SOFM_EXP_FIXED_ARG_MAX,
+					      NUMTESTSAMPLES_TEST3, i, 27, &tmp, &ivalue);
 		test_exp_with_input_value(ivalue, &iexp_value, &abs_delta_max, &rel_delta_max,
-					  ABS_DELTA_TOLERANCE_TEST2, REL_DELTA_TOLERANCE_TEST2);
+					  ABS_DELTA_TOLERANCE_TEST3, REL_DELTA_TOLERANCE_TEST3);
 	}
 
-	printf("%s: Absolute max error was %.6e (small).\n", __func__, abs_delta_max);
-	printf("%s: Relative max error was %.6e (small).\n", __func__, rel_delta_max);
+	printf("%s: Absolute max error was %.6e (middle).\n", __func__, abs_delta_max);
+	printf("%s: Relative max error was %.6e (middle).\n", __func__, rel_delta_max);
+}
+
+/**
+ * Reference function for dB to linear conversion
+ * @param x		Input value
+ * @param qformat	Fractional bits y in Qx.y format for saturation
+ * @return		Saturated linear value
+ */
+static double ref_db2lin(double x, int qformat)
+{
+	double fref;
+	int64_t iref;
+
+	fref = pow(10, x / 20);
+	iref = fref * (1 << qformat);
+	return (double)saturate32(iref) / (1 << qformat);
+}
+
+/**
+ * Test for sofm_db2lin_fixed()
+ * @param state		Cmocka internal state
+ */
+static void test_function_sofm_db2lin_fixed(void **state)
+{
+	(void)state;
+	double abs_delta, rel_delta, abs_delta_max, rel_delta_max;
+	double fin, fout, fref;
+	double eps = 1e-9;
+	int32_t iin, iout;
+	int i;
 
 	rel_delta_max = 0;
 	abs_delta_max = 0;
-	for (i = 0; i < NUMTESTSAMPLES_LARGE_TEST2; i++) {
-		gen_exp_testvector_linspace_q27(5, 15.9999, NUMTESTSAMPLES_LARGE_TEST2, i, &ivalue);
-		test_exp_with_input_value(ivalue, &iexp_value, &abs_delta_max, &rel_delta_max,
-					  ABS_DELTA_TOLERANCE_TEST3, REL_DELTA_TOLERANCE_TEST3);
-	}
+	for (i = 0; i < NUMTESTSAMPLES_TEST2; i++) {
+		gen_testvector_linspace_int32(-128, 128, NUMTESTSAMPLES_TEST4, i, 24, &fin, &iin);
+		iout = sofm_db2lin_fixed(iin);
+		fout = (double)iout / (1 << 20);
+		fref = ref_db2lin(fin, 20);
+		abs_delta = fabs(fref - fout);
+		rel_delta = abs_delta / (fref + eps);
+		if (abs_delta > abs_delta_max)
+			abs_delta_max = abs_delta;
+
+		if (rel_delta > rel_delta_max)
+			rel_delta_max = rel_delta;
+
+		if (abs_delta > ABS_DELTA_TOLERANCE_TEST4) {
+			printf("%s: Absolute error %g exceeds limit %g, input %g output %g.\n",
+			       __func__, abs_delta, ABS_DELTA_TOLERANCE_TEST4, fin, fout);
+			assert_true(false);
+		}
 
-	printf("%s: Absolute max error was %.6e (large).\n", __func__, abs_delta_max);
-	printf("%s: Relative max error was %.6e (large).\n", __func__, rel_delta_max);
+		if (rel_delta > REL_DELTA_TOLERANCE_TEST4) {
+			printf("%s: Relative error %g exceeds limit %g, input %g output %g.\n",
+			       __func__, rel_delta, REL_DELTA_TOLERANCE_TEST4, fin, fout);
+			assert_true(false);
+		}
+	}
+	printf("%s: Absolute max error was %.6e.\n", __func__, abs_delta_max);
+	printf("%s: Relative max error was %.6e.\n", __func__, rel_delta_max);
 }
 
 int main(void)
 {
 	const struct CMUnitTest tests[] = {
-		cmocka_unit_test(test_function_sofm_exp_int32),
+		cmocka_unit_test(test_function_sofm_exp_approx),
 		cmocka_unit_test(test_function_sofm_exp_fixed),
+		cmocka_unit_test(test_function_sofm_db2lin_fixed),
 	};
 
 	cmocka_set_message_output(CM_OUTPUT_TAP);