From 34498a8a2ad6759c55af89e34a716b1a4fec5339 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Tue, 28 May 2024 14:12:21 +0530 Subject: [PATCH] SVD functions fixed point implementation --- lib_com/ivas_prot.h | 26 +- lib_com/options.h | 1 + lib_dec/ivas_dirac_output_synthesis_cov.c | 418 ++++---- lib_dec/ivas_svd_dec.c | 1159 ++++++++++++++++++++- 4 files changed, 1400 insertions(+), 204 deletions(-) diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index 11d2199a1..4bb9dcaa4 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -4823,7 +4823,9 @@ Word16 matrix_product_mant_exp( Word32 *Z_fx, /* o : resulting matrix after the matrix multiplication */ Word16 *Z_e /* o : resulting matrix after the matrix multiplication */ ); +#endif +#ifdef IVAS_FLOAT_FIXED void mat2svdMat_fx( const Word32 *mat, /* i : matrix as column ordered vector */ Word32 svdMat[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS], /* o : matrix as two-dimensional arry */ @@ -4838,7 +4840,7 @@ void svdMat2mat_fx( const Word16 nRows, /* i : number of rows of the matrix */ const Word16 mCols /* i : number of columns of the matrix */ ); -#endif +#else void mat2svdMat( const float *mat, /* i : matrix as column ordered vector */ @@ -4854,6 +4856,7 @@ void svdMat2mat( const int16_t nRows, /* i : number of rows of the matrix */ const int16_t mCols /* i : number of columns of the matrix */ ); +#endif int16_t matrix_diag_product( const float *X, /* i : left hand matrix */ @@ -4896,6 +4899,7 @@ void cmplx_matrix_square( float *imagZ /* o : imaginary part of the resulting matrix */ ); +#ifndef IVAS_FLOAT_FIXED int16_t computeMixingMatrices( const int16_t num_inputs, /* i : number of input channels */ const int16_t num_outputs, /* i : number of output channels */ @@ -4909,7 +4913,7 @@ int16_t computeMixingMatrices( float *Cr /* o : residual covariance matrix */ ); -#ifdef IVAS_FLOAT_FIXED +#else Word16 computeMixingMatrices_fx( const Word16 num_inputs, /* i : number of input channels */ const Word16 num_outputs, /* i : number of output channels */ @@ -4931,6 +4935,7 @@ Word16 computeMixingMatrices_fx( ); #endif +#ifndef IVAS_FLOAT_FIXED int16_t computeMixingMatricesResidual( const int16_t num_outputs, /* i : number of output channels */ const float *Cx, /* i : vector containing the diagonal diffuse prototype covariance */ @@ -4940,7 +4945,7 @@ int16_t computeMixingMatricesResidual( float *mixing_matrix /* o : resulting residual mixing matrix */ ); -#ifdef IVAS_FLOAT_FIXED +#else Word16 computeMixingMatricesResidual_fx( const Word32 num_outputs, /* i : number of output channels */ const Word32 *Cx_fx, /* i : vector containing the diagonal diffuse prototype covariance */ @@ -4956,6 +4961,19 @@ Word16 computeMixingMatricesResidual_fx( ); #endif +#ifdef IVAS_FLOAT_FIXED +/*! r: error or success */ +Word16 svd_fx( + Word32 InputMatrix[][MAX_OUTPUT_CHANNELS], /* i : matrix to be decomposed (M) */ + Word16 InputMatrix_e, + Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* o : left singular vectors (U) */ + Word32 singularValues[MAX_OUTPUT_CHANNELS], /* o : singular values vector (S) */ + Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* o : right singular vectors (V) */ + Word16 *singularValues_fx_e, + const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed */ + const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed */ +); +#else /*! r: error or success */ int16_t svd( float InputMatrix[][MAX_OUTPUT_CHANNELS], /* i : matrix to be decomposed (M) */ @@ -4965,6 +4983,8 @@ int16_t svd( const int16_t nChannelsL, /* i : number of rows in the matrix to be decomposed */ const int16_t nChannelsC /* i : number of columns in the matrix to be decomposed */ ); +#endif + #ifdef IVAS_FLOAT_FIXED ivas_error ivas_dirac_dec_output_synthesis_cov_open_fx( DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters */ diff --git a/lib_com/options.h b/lib_com/options.h index 5f8aa90fe..3ebbc28ef 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -137,6 +137,7 @@ #define FIX_747_TDBWE_ENERGY_BURST #define FIX_770_DISCONTINUITIES_SW_TCX2ACELP // Fix discontinuities when switching from TCX to ACELP #define FIX_680_CNG_FRAME_BOUNDARIES_ISSUE /* Step was right shift by 2, which made the OVA wrong */ +#define NONBE_FIX_1069_SVD_TUNING /* FhG: issue 1069: tune SVD constants */ /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ diff --git a/lib_dec/ivas_dirac_output_synthesis_cov.c b/lib_dec/ivas_dirac_output_synthesis_cov.c index 21ae3761b..42d1c3ce9 100644 --- a/lib_dec/ivas_dirac_output_synthesis_cov.c +++ b/lib_dec/ivas_dirac_output_synthesis_cov.c @@ -1229,6 +1229,7 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( * compute a mixing matrix using the convariance synthesis approach *-------------------------------------------------------------------*/ +#ifndef IVAS_FLOAT_FIXED int16_t computeMixingMatrices( const int16_t num_inputs, /* i : number of input channels */ const int16_t num_outputs, /* i : number of output channels */ @@ -1489,7 +1490,7 @@ int16_t computeMixingMatrices( -#ifdef IVAS_FLOAT_FIXED +#else Word16 computeMixingMatrices_fx( const Word16 num_inputs, /* i : number of input channels */ const Word16 num_outputs, /* i : number of output channels */ @@ -1510,28 +1511,24 @@ Word16 computeMixingMatrices_fx( Word16 *Cr_e ) { - int16_t i, j; - int16_t out = EXIT_SUCCESS; - int16_t nL, nC; - int16_t lengthCx = num_inputs; - int16_t lengthCy = num_outputs; - float svd_in_buffer[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - float svd_u_buffer[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - float svd_s_buffer[MAX_OUTPUT_CHANNELS]; - float svd_v_buffer[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + Word16 i, j; + Word16 out = EXIT_SUCCESS; + Word16 nL, nC; + Word16 lengthCx = num_inputs; + Word16 lengthCy = num_outputs; Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS]; Word32 Cy_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS]; - Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; //Q31 out + Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; //Q31 out //Word16 mat_mult_buffer1_fx_e; Word16 Cx_fx_e; Word16 Cy_fx_e; - Word16 svd_u_buffer_fx_e[MAX_OUTPUT_CHANNELS]; - Word16 svd_v_buffer_fx_e[MAX_OUTPUT_CHANNELS]; + //Word16 svd_u_buffer_fx_e[MAX_OUTPUT_CHANNELS]; + //Word16 svd_v_buffer_fx_e[MAX_OUTPUT_CHANNELS]; Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS]; Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS]; @@ -1614,19 +1611,8 @@ Word16 computeMixingMatrices_fx( /* Processing the SVD */ mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 ); - FOR ( Word32 g = 0; g < lengthCy; g++ ) - { - me2f_buf( ( svd_in_buffer_fx[g] ), Cy_fx_e, ( svd_in_buffer[g] ), lengthCy ); - } - svd( svd_in_buffer, svd_u_buffer, svd_s_buffer, svd_v_buffer, lengthCy, lengthCy ); - - f2me_buf(svd_s_buffer, svd_s_buffer_fx, &svd_s_buffer_fx_e, lengthCy); - FOR(Word32 g = 0; g < lengthCy; g++) - { - f2me_buf((svd_u_buffer[g]), (svd_u_buffer_fx[g]), &svd_u_buffer_fx_e[g], lengthCy); - f2me_buf((svd_v_buffer[g]), (svd_v_buffer_fx[g]), &svd_v_buffer_fx_e[g], lengthCy); - } + svd_fx(svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, &svd_s_buffer_fx_e, lengthCy, lengthCy ); /* Computing Ky */ @@ -1639,7 +1625,7 @@ Word16 computeMixingMatrices_fx( tmp_e = svd_s_buffer_fx_e; L_tmp = Sqrt32(svd_s_buffer_fx[j], &tmp_e); Ky_fx[i + j * lengthCy] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); - Ky_fx_e[i + j * lengthCy] = svd_u_buffer_fx_e[i] + tmp_e; + Ky_fx_e[i + j * lengthCy] = + tmp_e; } } @@ -1664,21 +1650,8 @@ Word16 computeMixingMatrices_fx( /* Processing the SVD */ mat2svdMat_fx( Cx_fx, svd_in_buffer_fx, lengthCx, lengthCx, 0 ); - FOR ( Word32 g = 0; g < lengthCx; g++ ) - { - me2f_buf( ( svd_in_buffer_fx[g] ), Cx_fx_e, ( svd_in_buffer[g] ), lengthCx ); - } - - - svd( svd_in_buffer, svd_u_buffer, svd_s_buffer, svd_v_buffer, lengthCx, lengthCx ); - f2me_buf(svd_s_buffer, svd_s_buffer_fx, &svd_s_buffer_fx_e, lengthCx); - FOR(Word32 g = 0; g < lengthCy; g++) - { - f2me_buf((svd_u_buffer[g]), (svd_u_buffer_fx[g]), &svd_u_buffer_fx_e[g], lengthCx); - f2me_buf((svd_v_buffer[g]), (svd_v_buffer_fx[g]), &svd_v_buffer_fx_e[g], lengthCx); - } - + svd_fx( svd_in_buffer_fx, Cx_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, &svd_s_buffer_fx_e, lengthCx, lengthCx ); FOR ( i = 0; i < lengthCx; ++i ) { @@ -1687,7 +1660,7 @@ Word16 computeMixingMatrices_fx( tmp_e = svd_s_buffer_fx_e; L_tmp = Sqrt32(svd_s_buffer_fx[j], &tmp_e); Kx_fx[i + j * lengthCx] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); - Kx_fx_e[i + j * lengthCx] = svd_u_buffer_fx_e[i] + tmp_e; + Kx_fx_e[i + j * lengthCx] = tmp_e; } } @@ -1776,7 +1749,7 @@ Word16 computeMixingMatrices_fx( FOR ( j = 0; j < lengthCx; ++j ) { Kx_reg_inv_fx[i + j * lengthCx] = Mpy_32_16_1(svd_u_buffer_fx[j][i], reg_fac_fx ); - Kx_reg_inv_e[i + j * lengthCx] = svd_u_buffer_fx_e[j] + scale; + Kx_reg_inv_e[i + j * lengthCx] = scale; } } @@ -1906,17 +1879,17 @@ Word16 computeMixingMatrices_fx( mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 1 ); #ifdef IVAS_FLOAT_FIXED_TO_BE_REMOVED - FOR ( Word32 g = 0; g < lengthCy; g++ ) + /*FOR ( Word32 g = 0; g < lengthCy; g++ ) { me2f_buf( ( svd_in_buffer_fx[g] ), mat_mult_buffer1_e, ( svd_in_buffer[g] ), lengthCx ); - } + }*/ nL = lengthCy; move16(); nC = lengthCx; move16(); - svd( svd_in_buffer, svd_v_buffer, svd_s_buffer, svd_u_buffer, nL, nC ); + svd_fx(svd_in_buffer_fx, mat_mult_buffer1_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, &svd_s_buffer_fx_e, nL, nC ); #endif } ELSE @@ -1924,17 +1897,12 @@ Word16 computeMixingMatrices_fx( mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 ); #ifdef IVAS_FLOAT_FIXED_TO_BE_REMOVED - FOR ( Word32 g = 0; g < lengthCx; g++ ) - { - me2f_buf( ( svd_in_buffer_fx[g] ), mat_mult_buffer1_e, ( svd_in_buffer[g] ), lengthCy ); - } - nL = lengthCx; move16(); nC = lengthCy; move16(); - svd( svd_in_buffer, svd_u_buffer, svd_s_buffer, svd_v_buffer, nL, nC ); + svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, &svd_s_buffer_fx_e, nL, nC ); #endif } @@ -1943,53 +1911,11 @@ Word16 computeMixingMatrices_fx( /* can be skipped: lambda is always column-truncated identity matrix, so this operation just truncates V to num_input_channel columns */ - Word16 min_q = -1; - move16(); - FOR ( Word32 g = 0; g < lengthCy; g++ ) - { - f2me_buf( svd_v_buffer[g], svd_v_buffer_fx[g], &svd_v_buffer_fx_e[g], lengthCx ); - IF ( svd_v_buffer_fx_e[g] > min_q) - { - min_q = svd_v_buffer_fx_e[g]; - } - } - - FOR ( Word32 g = 0; g < lengthCy; g++ ) - { - FOR ( Word32 h = 0; h < lengthCx; h++ ) - { - svd_v_buffer_fx[g][h] = L_shr( svd_v_buffer_fx[g][h], min_q - svd_v_buffer_fx_e[g] ); - } - } - svd_v_buffer_fx_e[0] = min_q; - move16(); - - min_q = -1; - move16(); - FOR ( Word32 g = 0; g < lengthCx; g++ ) - { - f2me_buf( svd_u_buffer[g], svd_u_buffer_fx[g], &svd_u_buffer_fx_e[g], lengthCx ); - IF ( svd_u_buffer_fx_e[g] > min_q ) - { - min_q = svd_u_buffer_fx_e[g]; - move16(); - } - } - - FOR ( Word32 g = 0; g < lengthCx; g++ ) - { - FOR ( Word32 h = 0; h < lengthCx; h++ ) - { - svd_u_buffer_fx[g][h] = L_shr( svd_u_buffer_fx[g][h], min_q - svd_u_buffer_fx_e[g] ); - } - } - svd_u_buffer_fx_e[0] = min_q; - move16(); svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx ); svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx ); - mat_mult_buffer1_e = svd_v_buffer_fx_e[0]; - mat_mult_buffer2_e = svd_u_buffer_fx_e[0]; + mat_mult_buffer1_e = 0; + mat_mult_buffer2_e = 0; guard_bits = find_guarded_bits_fx(lengthCx + 1); FOR(i = 0; i < lengthCx * lengthCy; ++i) @@ -2151,6 +2077,7 @@ Word16 computeMixingMatrices_fx( * compute a residual mixing matrix using the covariance synthesis approach *-------------------------------------------------------------------*/ +#ifndef IVAS_FLOAT_FIXED int16_t computeMixingMatricesResidual( const int16_t num_outputs, /* i : number of output channels */ const float *Cx, /* i : vector containing the diagonal diffuse prototype covariance */ @@ -2343,7 +2270,7 @@ int16_t computeMixingMatricesResidual( return out; } -#ifdef IVAS_FLOAT_FIXED +#else Word16 computeMixingMatricesResidual_fx( const Word32 num_outputs, /* i : number of output channels */ const Word32 *Cx_fx, /* i : vector containing the diagonal diffuse prototype covariance */ @@ -2363,20 +2290,10 @@ Word16 computeMixingMatricesResidual_fx( Word16 lengthCx = extract_l(num_outputs); Word16 lengthCy = extract_l(num_outputs); -#ifdef IVAS_FLOAT_FIXED_TO_BE_REMOVED - float svd_in_buffer[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - float svd_u_buffer[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - float svd_s_buffer[MAX_OUTPUT_CHANNELS]; - float svd_v_buffer[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; -#endif - Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; - Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - - Word16 svd_u_buffer_fx_e[MAX_OUTPUT_CHANNELS]; - Word16 svd_v_buffer_fx_e[MAX_OUTPUT_CHANNELS]; + Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; //Q31 out + Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; //Q31 out Word16 mat_mult_buffer1_buff_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; @@ -2428,20 +2345,7 @@ Word16 computeMixingMatricesResidual_fx( /* linear array to svd buffer */ mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 ); -#ifdef IVAS_FLOAT_FIXED_TO_BE_REMOVED - FOR ( Word32 g = 0; g < lengthCy; g++ ) - { - me2f_buf( ( svd_in_buffer_fx[g] ), Cy_fx_e, ( svd_in_buffer[g] ), lengthCy ); - } - - svd(svd_in_buffer, svd_u_buffer, svd_s_buffer, svd_v_buffer, lengthCy, lengthCy); - f2me_buf(svd_s_buffer, svd_s_buffer_fx, &svd_s_buffer_fx_e, lengthCy); - FOR(Word32 g = 0; g < lengthCy; g++) - { - f2me_buf((svd_u_buffer[g]), (svd_u_buffer_fx[g]), &svd_u_buffer_fx_e[g], lengthCy); - f2me_buf((svd_v_buffer[g]), (svd_v_buffer_fx[g]), &svd_v_buffer_fx_e[g], lengthCy); - } -#endif + svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, &svd_s_buffer_fx_e, lengthCy, lengthCy ); /* Computing Ky */ FOR ( i = 0; i < lengthCy; ++i ) @@ -2451,7 +2355,7 @@ Word16 computeMixingMatricesResidual_fx( tmp_e = svd_s_buffer_fx_e; L_tmp = Sqrt32(svd_s_buffer_fx[j], &tmp_e); Ky_fx[i + j * lengthCy] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); - Ky_fx_e[i + j * lengthCy] = svd_u_buffer_fx_e[i] + tmp_e; + Ky_fx_e[i + j * lengthCy] = tmp_e; } } @@ -2646,61 +2550,20 @@ Word16 computeMixingMatricesResidual_fx( mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 ); - FOR ( Word32 g = 0; g < lengthCx; g++ ) + /*FOR ( Word32 g = 0; g < lengthCx; g++ ) { me2f_buf( ( svd_in_buffer_fx[g] ), mat_mult_buffer1_e, ( svd_in_buffer[g] ), lengthCy ); - } + }*/ -#ifdef IVAS_FLOAT_FIXED_TO_BE_REMOVED - svd( svd_in_buffer, svd_u_buffer, svd_s_buffer, svd_v_buffer, lengthCx, lengthCy ); + svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, &svd_s_buffer_fx_e, lengthCx, lengthCy ); /* Actually Processing P */ - Word16 min_q = -1; - FOR ( Word32 g = 0; g < lengthCy; g++ ) - { - f2me_buf( svd_v_buffer[g], svd_v_buffer_fx[g], &svd_v_buffer_fx_e[g], lengthCx ); - IF ( svd_v_buffer_fx_e[g] > min_q ) - { - min_q = svd_v_buffer_fx_e[g]; - } - } - FOR ( Word32 g = 0; g < lengthCy; g++ ) - { - FOR ( Word32 h = 0; h < lengthCx; h++ ) - { - svd_v_buffer_fx[g][h] = L_shr( svd_v_buffer_fx[g][h], min_q - svd_v_buffer_fx_e[g] ); - } - } - svd_v_buffer_fx_e[0] = min_q; - - min_q = -1; - move16(); - FOR ( Word32 g = 0; g < lengthCx; g++ ) - { - f2me_buf( svd_u_buffer[g], svd_u_buffer_fx[g], &svd_u_buffer_fx_e[g], lengthCx ); - IF ( svd_u_buffer_fx_e[g] > min_q ) - { - min_q = svd_u_buffer_fx_e[g]; - move16(); - } - } - - FOR ( Word32 g = 0; g < lengthCx; g++ ) - { - FOR ( Word32 h = 0; h < lengthCx; h++ ) - { - svd_u_buffer_fx[g][h] = L_shr( svd_u_buffer_fx[g][h], min_q - svd_u_buffer_fx_e[g] ); - } - } - svd_u_buffer_fx_e[0] = min_q; - move16(); -#endif svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx ); svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx ); - mat_mult_buffer2_e = svd_u_buffer_fx_e[0]; + mat_mult_buffer2_e = 0; move16(); - mat_mult_buffer1_e = svd_v_buffer_fx_e[0]; + mat_mult_buffer1_e = 0; move16(); Word16 guard_bits = find_guarded_bits_fx( lengthCx + 1 ); @@ -2728,7 +2591,7 @@ Word16 computeMixingMatricesResidual_fx( matrix_product_fx( mat_mult_buffer1_fx, lengthCy, lengthCx, 0, mat_mult_buffer2_fx, lengthCx, lengthCx, 1, mat_mult_buffer3_fx ); - mat_mult_buffer3_e = svd_v_buffer_fx_e[0] + svd_u_buffer_fx_e[0]; + mat_mult_buffer3_e = 0; /*-----------------------------------------------------------------* * Formulate M @@ -2835,7 +2698,7 @@ Word16 computeMixingMatricesResidual_fx( * * *-------------------------------------------------------------------*/ - +#ifdef IVAS_FLOAT_FIXED int16_t computeMixingMatricesISM( const int16_t num_inputs, const int16_t num_responses, @@ -2887,6 +2750,8 @@ int16_t computeMixingMatricesISM( Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS]; + Word16 svd_s_buffer_fx_e; Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word16 mat_mult_buffer1_fx_e; @@ -3034,36 +2899,44 @@ int16_t computeMixingMatricesISM( #ifdef IVAS_FLOAT_FIXED f2me_buf( mat_mult_buffer1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e, lengthCx * num_responses ); mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 1 ); - - FOR ( Word32 g = 0; g < num_responses; g++ ) - { - me2f_buf( ( svd_in_buffer_fx[g] ), mat_mult_buffer1_fx_e, ( svd_in_buffer[g] ), lengthCx ); - } #else mat2svdMat( mat_mult_buffer1, svd_in_buffer, lengthCx, num_responses, 1 ); #endif nL = num_responses; nC = lengthCx; - svd( svd_in_buffer, svd_v_buffer, svd_s_buffer, svd_u_buffer, nL, nC ); + svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, &svd_s_buffer_fx_e, nL, nC ); + + //local fix2float: to be removed + for ( i = 0; i < nL; i++) { + me2f_buf(svd_v_buffer_fx[i], 0, svd_v_buffer[i], nC); + } + for ( i = 0; i < nC; i++) { + me2f_buf(svd_u_buffer_fx[i], 0, svd_u_buffer[i], nC); + } + me2f_buf(svd_s_buffer_fx, svd_s_buffer_fx_e, svd_s_buffer, nC); } else { #ifdef IVAS_FLOAT_FIXED f2me_buf( mat_mult_buffer1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e, lengthCx * num_responses ); mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 0 ); - - FOR ( Word32 g = 0; g < lengthCx; g++ ) - { - me2f_buf( ( svd_in_buffer_fx[g] ), mat_mult_buffer1_fx_e, ( svd_in_buffer[g] ), num_responses ); - } #else mat2svdMat( mat_mult_buffer1, svd_in_buffer, lengthCx, num_responses, 0 ); #endif nL = lengthCx; nC = num_responses; - svd( svd_in_buffer, svd_u_buffer, svd_s_buffer, svd_v_buffer, nL, nC ); + svd_fx(svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, &svd_s_buffer_fx_e, nL, nC ); + + //local fix2float: to be removed + for ( i = 0; i < nL; i++) { + me2f_buf(svd_u_buffer_fx[i], 0, svd_u_buffer[i], nC); + } + for ( i = 0; i < nC; i++) { + me2f_buf(svd_v_buffer_fx[i], 0, svd_v_buffer[i], nC); + } + me2f_buf(svd_s_buffer_fx, svd_s_buffer_fx_e, svd_s_buffer, nC); } /* Actually Processing P */ @@ -3188,3 +3061,180 @@ int16_t computeMixingMatricesISM( return out; } +#else +int16_t computeMixingMatricesISM( + const int16_t num_inputs, + const int16_t num_responses, + const int16_t num_outputs, + const float *responses, + const float *ener, + const float *Cx_diag, + const float *Cy_diag, + const float *Q, + const int16_t energy_compensation_flag, + const float reg_Sx, + const float reg_ghat, + float *mixing_matrix ) +{ + int16_t i, out; + int16_t lengthCx, lengthCy; + float *Cy_tilde_p; + float *adj; + float limit; + float svd_in_buffer[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + float svd_u_buffer[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + float svd_s_buffer[MAX_OUTPUT_CHANNELS]; + float svd_v_buffer[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; + float Kx[MAX_TRANSPORT_CHANNELS]; + float Ky[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; + float Kx_reg_inv[MAX_TRANSPORT_CHANNELS]; + float Q_Cx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; + float Cy_hat_diag[MAX_OUTPUT_CHANNELS]; + float G_hat[MAX_OUTPUT_CHANNELS]; + float mat_mult_buffer1[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; + float mat_mult_buffer2[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; + float mat_mult_buffer3[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; + int16_t nL, nC; + + push_wmops( "dirac_cov_mix_mat" ); + + out = EXIT_SUCCESS; + lengthCx = num_inputs; + lengthCy = num_outputs; + + set_zero( svd_s_buffer, MAX_OUTPUT_CHANNELS ); + for ( i = 0; i < MAX_OUTPUT_CHANNELS; i++ ) + { + set_zero( svd_in_buffer[i], MAX_OUTPUT_CHANNELS ); + set_zero( svd_u_buffer[i], MAX_OUTPUT_CHANNELS ); + set_zero( svd_v_buffer[i], MAX_OUTPUT_CHANNELS ); + } + + /* Decomposition of Cy = Ky*Ky' */ + /* Ky = responses*diag(ener) */ + matrix_diag_product( responses, lengthCy, num_responses, 0, ener, num_responses, Ky ); + + /* Decomposition of Cx -> Computing Kx */ + v_sqrt( Cx_diag, Kx, lengthCx ); + + /* Regularization of Sx */ + maximum( Kx, lengthCx, &limit ); + limit = limit * reg_Sx + EPSILON; + + for ( i = 0; i < lengthCx; ++i ) + { + svd_s_buffer[i] = ( ( Kx[i] > limit ) ? Kx[i] : limit ); + } + + limit = 0.0f; + + /* regularized Kx-1 */ + + for ( i = 0; i < lengthCx; ++i ) + { + float reg_fac = ( 1.0f / svd_s_buffer[i] ); + Kx_reg_inv[i] = reg_fac; + } + + /************************ normalization matrix G hat **********************/ + + /* Computing Q*Cx*Q' */ + matrix_diag_product( Q, lengthCy, lengthCx, 0, Cx_diag, lengthCx, Q_Cx ); + matrix_product_diag( Q_Cx, lengthCy, lengthCx, 0, Q, lengthCy, lengthCx, 1, Cy_hat_diag ); + + /* Computing Cy_hat_diag */ + for ( i = 0; i < lengthCy; ++i ) + { + if ( Cy_hat_diag[i] > limit ) + { + limit = Cy_hat_diag[i]; + } + } + + + limit = limit * reg_ghat + EPSILON; + + /* Computing G_hat */ + for ( i = 0; i < lengthCy; ++i ) + { + if ( limit > Cy_hat_diag[i] ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */ + { + Cy_hat_diag[i] = limit; + } + G_hat[i] = sqrtf( Cy_diag[i] / Cy_hat_diag[i] ); + } + + /************************ Formulate optimal P **********************/ + + /* Computing the input matrix Kx'*Q'*G_hat'*Ky */ + diag_matrix_product( Kx, lengthCx, Q, lengthCy, lengthCx, 1, mat_mult_buffer1 ); + matrix_diag_product( mat_mult_buffer1, lengthCx, lengthCy, 0, G_hat, lengthCy, mat_mult_buffer2 ); + matrix_product( mat_mult_buffer2, lengthCx, lengthCy, 0, Ky, lengthCy, num_responses, 0, mat_mult_buffer1 ); + + if ( lengthCx < num_responses ) + { + mat2svdMat( mat_mult_buffer1, svd_in_buffer, lengthCx, num_responses, 1 ); + nL = num_responses; + nC = lengthCx; + svd( svd_in_buffer, svd_v_buffer, svd_s_buffer, svd_u_buffer, nL, nC ); + } + else + { + mat2svdMat( mat_mult_buffer1, svd_in_buffer, lengthCx, num_responses, 0 ); + nL = lengthCx; + nC = num_responses; + svd( svd_in_buffer, svd_u_buffer, svd_s_buffer, svd_v_buffer, nL, nC ); + } + + /* Actually Processing P */ + + /* can be skipped: lambda is always column-truncated identity matrix, so this operation just truncates V to num_input_channel columns */ + svdMat2mat( svd_v_buffer, mat_mult_buffer1, num_responses, lengthCx ); + svdMat2mat( svd_u_buffer, mat_mult_buffer2, lengthCx, lengthCx ); + + matrix_product( mat_mult_buffer1, num_responses, lengthCx, 0, mat_mult_buffer2, lengthCx, lengthCx, 1, mat_mult_buffer3 ); + + /************************ Formulate M **********************/ + + matrix_product( Ky, lengthCy, num_responses, 0, mat_mult_buffer3, num_responses, lengthCx, 0, mat_mult_buffer1 ); + + matrix_diag_product( mat_mult_buffer1, lengthCy, lengthCx, 0, Kx_reg_inv, lengthCx, mixing_matrix ); + + /*********************** Energy Compensation ****************/ + + /* Compute Cy_tilde = M*Cx*M' */ + matrix_diag_product( mixing_matrix, lengthCy, lengthCx, 0, Cx_diag, lengthCx, mat_mult_buffer1 ); + matrix_product( mat_mult_buffer1, lengthCy, lengthCx, 0, mixing_matrix, lengthCy, lengthCx, 1, mat_mult_buffer2 ); + + if ( energy_compensation_flag == 1 ) + { + adj = svd_s_buffer; + Cy_tilde_p = mat_mult_buffer2; + for ( i = 0; i < lengthCy; ++i ) + { + /* Avoid correction for very small energies, main diagonal elements of Cy_tilde_p may be negative */ + if ( Cy_tilde_p[i + i * lengthCy] < 0.0f ) + { + adj[i] = 1.0f; + } + else + { + adj[i] = sqrtf( Cy_diag[i] / ( Cy_tilde_p[i + i * lengthCy] + EPSILON ) ); + } + + if ( adj[i] > 4.0f ) + { + adj[i] = 4.0f; + } + } + + diag_matrix_product( adj, lengthCy, mixing_matrix, lengthCy, lengthCx, 0, mat_mult_buffer3 ); + + mvr2r( mat_mult_buffer3, mixing_matrix, lengthCy * lengthCx ); + } + + pop_wmops(); + + return out; +} +#endif \ No newline at end of file diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index eb1f7e16b..a107a4614 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -38,6 +38,8 @@ #include "ivas_cnst.h" #include #include "wmc_auto.h" +#include "prot_fx1.h" +#include "prot_fx2.h" /*-----------------------------------------------------------------------* @@ -45,16 +47,27 @@ *-----------------------------------------------------------------------*/ /* The SVD is sensitive to changes to the following constants, so please be careful when trying to tune things */ +#define SVD_MAX_NUM_ITERATION 75 /* maximum number of interations before exiting the SVD */ +#ifndef IVAS_FLOAT_FIXED +#ifdef NONBE_FIX_1069_SVD_TUNING +#define SVD_MINIMUM_VALUE 1e-32f /* minimum value */ +#define CONVERGENCE_FACTOR 1.0e-04f /* factor for SVD convergence */ +#define SVD_ZERO_FLUSH_THRESHOLD 0.0f +#else #define SVD_MINIMUM_VALUE 1e-32f /* minimum value */ #define CONVERGENCE_FACTOR 1.19209290e-07f /* factor for SVD convergence */ -#define SVD_MAX_NUM_ITERATION 75 /* maximum number of interations before exiting the SVD */ #define SVD_ZERO_FLUSH_THRESHOLD 1.0e-20f - - +#endif +#else +#define SVD_MINIMUM_VALUE_FX ( 2 ) /* minimum value */ +#define SVD_ZERO_FLUSH_THRESHOLD_FX ( 0 ) +#define CONVERGENCE_FACTOR_FX 214748 /* factor for SVD convergence (as per latest float code: 1.0e-04f) */ +#endif /*-----------------------------------------------------------------------* * Local function prototypes *-----------------------------------------------------------------------*/ +#ifndef IVAS_FLOAT_FIXED static float GivensRotation( const float x, const float z ); static void biDiagonalReductionLeft( float singularVectors[][MAX_OUTPUT_CHANNELS], float singularValues[MAX_OUTPUT_CHANNELS], float secDiag[MAX_OUTPUT_CHANNELS], const int16_t nChannelsL, const int16_t nChannelsC, const int16_t currChannel, float *sig_x, float *g ); @@ -78,6 +91,127 @@ static float maxWithSign( const float a ); static void flushToZeroArray( float arr[MAX_OUTPUT_CHANNELS], const int16_t length ); static void flushToZeroMat( float mat[][MAX_OUTPUT_CHANNELS], const int16_t m, const int16_t n ); +#else +static void HouseholderReduction_fx( + Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], + Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], + Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], + Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], + Word16 singularVectors_Left_e, + Word16 *singularValues_fx_e, + Word16 *secDiag_fx_e, + const int16_t nChannelsL, + const int16_t nChannelsC, + Word32 *eps_x_fx, + Word16 *eps_x_fx_e); + +static void biDiagonalReductionLeft_fx( + Word32 singularVectors[][MAX_OUTPUT_CHANNELS], + Word32 singularValues[MAX_OUTPUT_CHANNELS], + Word32 secDiag[MAX_OUTPUT_CHANNELS], + Word16 *singularVectors_e, + Word16 *singularValues_e, + Word16 *secDiag_e, + const Word16 nChannelsL, + const Word16 nChannelsC, + const Word16 currChannel, + Word32 *sig_x, + Word16 *sig_x_e, + Word32 *g); // Q31 + +static void biDiagonalReductionRight_fx( + Word32 singularVectors[][MAX_OUTPUT_CHANNELS], + Word32 secDiag[MAX_OUTPUT_CHANNELS], + Word16 *singularVectors_e, + Word16 *secDiag_e, + const Word16 nChannelsL, + const Word16 nChannelsC, + const Word16 currChannel, + Word32 *sig_x, + Word16 *sig_x_e, + Word32 *g); //Q31 + +static void singularVectorsAccumulationLeft_fx( + Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], //Q31 output + Word32 singularValues[MAX_OUTPUT_CHANNELS], + Word16 singularVectors_e, + Word16 singularValues_e, + const Word16 nChannelsL, + const Word16 nChannelsC); + +static void singularVectorsAccumulationRight_fx( + Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], + Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], //Q31 output + Word32 secDiag[MAX_OUTPUT_CHANNELS], + Word16 singularVectors_e, + Word16 secDiag_e, + const Word16 nChannelsC); + +static Word32 maxWithSign_fx( + const Word32 a); + +#if 0 +static void flushToZeroArray_fx( + Word32 arr[MAX_OUTPUT_CHANNELS], + const Word16 length); + +static void flushToZeroMat_fx( + Word32 mat[][MAX_OUTPUT_CHANNELS], + const Word16 m, + const Word16 n ); +#endif + +static Word16 BidagonalDiagonalisation_fx( + Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) Q31 */ + Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */ + Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) Q31 */ + Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* i/o: */ + Word16 *singularValues_fx_e, /* i/o: singular values vector (S) */ + Word16 *secDiag_fx_e, /* i/o: */ + const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed */ + const Word16 nChannelsC, /* i : number of columns in the matrix to be decomposed */ + const Word32 eps_x, /* i : */ + const Word16 eps_x_e /* i : */ +); + +static void ApplyRotation_fx( + Word32 singularVector[][MAX_OUTPUT_CHANNELS], + const Word32 c, + const Word16 c_e, + const Word32 s, + const Word16 s_e, + Word32 x11, + Word16 x11_e, + Word32 x12, + Word16 x12_e, + Word32 *d, + Word16 *d_e, + Word32 *g, + Word16 *g_e, + const Word16 currentIndex1, + const Word16 currentIndex2, + const Word16 nChannels); + +static Word32 GivensRotation_fx( + const Word32 x, + const Word16 x_e, + const Word32 z, + const Word16 z_e, + Word16 *out_e); + +static void ApplyQRTransform_fx( + Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) Q31 */ + Word32 singularValues[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */ + Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) Q31 */ + Word32 secDiag[MAX_OUTPUT_CHANNELS], /* i/o: */ + Word16 singularValues_e[MAX_OUTPUT_CHANNELS], + Word16 secDiag_e[MAX_OUTPUT_CHANNELS], + const Word16 startIndex, /* i : */ + const Word16 currentIndex, /* i : */ + const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed */ + const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed */ +); +#endif #ifdef IVAS_FLOAT_FIXED /*------------------------------------------------------------------------- @@ -160,7 +294,7 @@ void svdMat2mat_fx( return; } -#endif +#else /*------------------------------------------------------------------------- * mat2svdMat() @@ -244,7 +378,7 @@ void svdMat2mat( return; } - +#endif /*------------------------------------------------------------------------- * svd() @@ -252,6 +386,90 @@ void svdMat2mat( * perform a singular value decomposition X=USV of a matrix X *-------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED + /*! r: error or success */ +Word16 svd_fx( + Word32 InputMatrix[][MAX_OUTPUT_CHANNELS], /* i : matrix to be decomposed (M) */ + Word16 InputMatrix_e, + Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* o : left singular vectors (U) (Q31) */ + Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* o : singular values vector (S) */ + Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* o : right singular vectors (V) (Q31) */ + Word16 *singularValues_fx_e, + const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed */ + const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed */ +) +{ + Word16 iCh, jCh; + Word16 lengthSingularValues; + Word16 errorMessage, condition; + //int16_t max_length = ((nChannelsL > nChannelsC) ? nChannelsL : nChannelsC); + Word32 secDiag_fx[MAX_OUTPUT_CHANNELS]; + Word16 secDiag_fx_e = 0; + Word32 eps_x_fx = 0, temp_fx; + Word16 eps_x_fx_e = 0; + push_wmops("svd_fx"); + + set32_fx(secDiag_fx, 0, MAX_OUTPUT_CHANNELS); + + /* Collecting Values */ + FOR(iCh = 0; iCh < nChannelsL; iCh++) + { + FOR(jCh = 0; jCh < nChannelsC; jCh++) + { + singularVectors_Left_fx[iCh][jCh] = InputMatrix[iCh][jCh]; + } + } + + *singularValues_fx_e = 0; + + /* Householder reduction */ + HouseholderReduction_fx(singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, InputMatrix_e, singularValues_fx_e, &secDiag_fx_e, nChannelsL, nChannelsC, &eps_x_fx, &eps_x_fx_e); + + /* Set extremely small values to zero if needed */ + //flushToZeroArray(singularValues, max_length); + //flushToZeroMat(singularVectors_Left, nChannelsL, nChannelsL); + //flushToZeroMat(singularVectors_Right, nChannelsC, nChannelsC); + + /* BidagonalDiagonalisation */ + errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, &secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e); + + /* Sort the singular values descending order */ + lengthSingularValues = min(nChannelsL, nChannelsC); + + DO + { + condition = 0; + FOR( iCh = 0; iCh < lengthSingularValues - 1; iCh++ ) + { + IF( LT_32( singularValues_fx[iCh], singularValues_fx[iCh + 1] ) ) + { + condition = 1; + temp_fx = singularValues_fx[iCh]; + singularValues_fx[iCh] = singularValues_fx[iCh + 1]; + singularValues_fx[iCh + 1] = temp_fx; + + FOR(jCh = 0; jCh < nChannelsL; ++jCh) + { + temp_fx = singularVectors_Left_fx[jCh][iCh]; + singularVectors_Left_fx[jCh][iCh] = singularVectors_Left_fx[jCh][iCh + 1]; + singularVectors_Left_fx[jCh][iCh + 1] = temp_fx; + } + + FOR(jCh = 0; jCh < nChannelsC; ++jCh) + { + temp_fx = singularVectors_Right_fx[jCh][iCh]; + singularVectors_Right_fx[jCh][iCh] = singularVectors_Right_fx[jCh][iCh + 1]; + singularVectors_Right_fx[jCh][iCh + 1] = temp_fx; + } + } + } + } WHILE (EQ_16(condition, 1)); + + pop_wmops(); + return (errorMessage); +} + +#else /*! r: error or success */ int16_t svd( float InputMatrix[][MAX_OUTPUT_CHANNELS], /* i : matrix to be decomposed (M) */ @@ -328,7 +546,7 @@ int16_t svd( pop_wmops(); return ( errorMessage ); } - +#endif /*-----------------------------------------------------------------------* * Local functions @@ -340,6 +558,162 @@ int16_t svd( * *-------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +static Word16 BidagonalDiagonalisation_fx( + Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) Q31 */ + Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */ + Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) Q31 */ + Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], /* i/o: */ + Word16 *singularValues_fx_e, /* i/o: singular values vector (S) */ + Word16 *secDiag_fx_e, /* i/o: */ + const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed */ + const Word16 nChannelsC, /* i : number of columns in the matrix to be decomposed */ + const Word32 eps_x, /* i : */ + const Word16 eps_x_e /* i : */ +) +{ + Word16 kCh, nCh, iCh, jCh, split; + Word32 c, s, f1, f2; + Word16 c_e, s_e, f1_e, f2_e; + Word16 x11_e = 0, x12_e = 0; + Word16 temp_exp; + Word32 g = 0; + Word16 g_e = 0; + Word16 convergence, iteration, found_split; + Word16 error = 0; + Word16 singularValues_new_e[MAX_OUTPUT_CHANNELS], secDiag_new_e[MAX_OUTPUT_CHANNELS]; + set16_fx( singularValues_new_e, *singularValues_fx_e, MAX_OUTPUT_CHANNELS ); + set16_fx( secDiag_new_e, *secDiag_fx_e, MAX_OUTPUT_CHANNELS ); + + FOR( iCh = nChannelsC - 1; iCh >= 0; iCh-- ) /* nChannelsC */ + { + convergence = 0; + iteration = 0; + split = iCh - 1; + + WHILE ( EQ_16(convergence, 0) ) + { + iteration++; + found_split = 1; + + FOR( jCh = iCh; jCh >= 0; jCh-- ) + { + split = jCh - 1; + IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( secDiag_fx[jCh] ), secDiag_new_e[jCh], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is secDiag[ch] vanishing compared to eps_x */ + { + found_split = 0; + BREAK; + } + IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( singularValues_fx[split] ), singularValues_new_e[split], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */ + { + BREAK; + } + } + + convergence = ( jCh == iCh ) ? 1 : 0; + + IF ( found_split ) + { + s = MAX_32; + s_e = 0; + c = 0; + c_e = 0; + + FOR ( kCh = jCh; kCh <= iCh; kCh++ ) + { + g = Mpy_32_32( s, secDiag_fx[kCh] ); + g_e = add(s_e, secDiag_new_e[kCh]); + secDiag_fx[kCh] = Mpy_32_32( c, secDiag_fx[kCh] ); + secDiag_new_e[kCh] = add(c_e, secDiag_new_e[kCh]); + IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( g ), g_e, Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */ + { + BREAK; + } + + c = singularValues_fx[kCh]; + c_e = singularValues_new_e[kCh]; + singularValues_fx[kCh] = GivensRotation_fx( g, g_e, singularValues_fx[kCh], singularValues_new_e[kCh], &singularValues_new_e[kCh] ); + c = BASOP_Util_Divide3232_Scale_cadence( c, maxWithSign_fx(singularValues_fx[kCh] ), &temp_exp ); + c_e = add( temp_exp, sub( c_e, singularValues_new_e[kCh] ) ); + IF( GT_16( c_e, 0 ) ) + { + c = L_shl_sat(c, c_e); // Q31 + c_e = 0; + } + s = BASOP_Util_Divide3232_Scale_cadence( -g, maxWithSign_fx(singularValues_fx[kCh] ), &temp_exp ); + s_e = add( temp_exp, sub( g_e, singularValues_new_e[kCh] ) ); + IF( GT_16( s_e, 0 ) ) + { + s = L_shl_sat(s, s_e); // Q31 + s_e = 0; + } + + ApplyRotation_fx(singularVectors_Left_fx, c, c_e, s, s_e, 0, x11_e, 0, x12_e, &f1, &f1_e, &f2, &f2_e, kCh, split, nChannelsL ); /* nChannelsL */ + } + } + + IF ( convergence ) + { + singularValues_fx[iCh] = (Word32) singularValues_fx[iCh]; + IF( LT_32( singularValues_fx[iCh], 0 ) ) + { + singularValues_fx[iCh] = L_negate(singularValues_fx[iCh]); + FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */ + { + singularVectors_Right_fx[nCh][iCh] = L_negate(singularVectors_Right_fx[nCh][iCh]); + } + } + } + ELSE + { + IF( GT_16( iteration, SVD_MAX_NUM_ITERATION ) ) + { + IF( LT_32( singularValues_fx[iCh], 0 ) ) + { + singularValues_fx[iCh] = L_negate(singularValues_fx[iCh]); + + FOR( nCh = 0; nCh < nChannelsC; nCh++ ) /* nChannelsC */ + { + singularVectors_Right_fx[nCh][iCh] = L_negate(singularVectors_Right_fx[nCh][iCh]); + } + } + error = 1; + convergence = 1; + } + ELSE + { + ApplyQRTransform_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_new_e, secDiag_new_e, jCh, iCh, nChannelsL, nChannelsC ); /* nChannelsC */ + } + } + } + } + + //rescaling block + Word16 max_exp = -31; + FOR (iCh = 0; iCh < nChannelsC; iCh++) { + IF(singularValues_fx[iCh]) { + max_exp = s_max(max_exp,singularValues_new_e[iCh]); + } + } + *singularValues_fx_e = max_exp; + FOR(iCh = 0; iCh < nChannelsC; iCh++) { + singularValues_fx[iCh] = L_shr_r(singularValues_fx[iCh], sub(*singularValues_fx_e, singularValues_new_e[iCh])); + } + + max_exp = -31; + FOR(iCh = 0; iCh < nChannelsC; iCh++) { + IF(secDiag_fx[iCh]) { + max_exp = s_max(max_exp, secDiag_new_e[iCh]); + } + } + *secDiag_fx_e = max_exp; + FOR(iCh = 0; iCh < nChannelsC; iCh++) { + secDiag_fx[iCh] = L_shr_r(secDiag_fx[iCh], sub(*secDiag_fx_e, secDiag_new_e[iCh])); + } + + return ( error ); +} +#else static int16_t BidagonalDiagonalisation( float singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) */ float singularValues[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */ @@ -444,7 +818,7 @@ static int16_t BidagonalDiagonalisation( return ( error ); } - +#endif /*------------------------------------------------------------------------- * ApplyQRTransform() @@ -452,6 +826,145 @@ static int16_t BidagonalDiagonalisation( * *-------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +static void ApplyQRTransform_fx( + Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) Q31 */ + Word32 singularValues[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */ + Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], /* i/o: right singular vectors (V) Q31 */ + Word32 secDiag[MAX_OUTPUT_CHANNELS], /* i/o: */ + Word16 singularValues_e[MAX_OUTPUT_CHANNELS], + Word16 secDiag_e[MAX_OUTPUT_CHANNELS], + const Word16 startIndex, /* i : */ + const Word16 currentIndex, /* i : */ + const Word16 nChannelsL, /* i : number of rows in the matrix to be decomposed */ + const Word16 nChannelsC /* i : number of columns in the matrix to be decomposed */ +) +{ + Word16 ch, split; + Word32 d = 0, g = 0, r = 0, x_ii = 0, x_split = 0, x_kk = 0, mu = 0, aux = 0; + Word16 d_e = 0, g_e = 0, r_e = 0, x_ii_e = 0, x_split_e = 0, x_kk_e = 0, mu_e = 0, aux_e = 0; + Word32 L_temp1, L_temp2, L_temp3, L_temp4; + Word16 L_temp1_e, L_temp2_e, L_temp3_e, L_temp4_e, temp_exp; + Word32 c = MAX_32; + Word16 c_e = 0; + Word32 s = MAX_32; + Word16 s_e = 0; + + x_kk = singularValues[currentIndex]; + x_kk_e = singularValues_e[currentIndex]; + x_ii = singularValues[startIndex]; + x_ii_e = singularValues_e[startIndex]; + split = currentIndex - 1; + + x_split = singularValues[split]; + x_split_e = singularValues_e[split]; + g = secDiag[split]; + g_e = secDiag_e[split]; + r = secDiag[currentIndex]; + r_e = secDiag_e[currentIndex]; + + //d = (x_split + x_kk) * (x_split - x_kk) + (g + r) * (g - r); + L_temp1 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, x_kk, x_kk_e, &L_temp1_e ); + L_temp2 = BASOP_Util_Add_Mant32Exp( x_split, x_split_e, -x_kk, x_kk_e, &L_temp2_e ); + L_temp3 = BASOP_Util_Add_Mant32Exp( g, g_e, r, r_e, &L_temp3_e ); + L_temp4 = BASOP_Util_Add_Mant32Exp( g, g_e, -r, r_e, &L_temp4_e ); + d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( L_temp3, L_temp4 ), add( L_temp3_e, L_temp4_e ), &d_e ); + + //d /= maxWithSign((r + r) * x_split); + L_temp1 = BASOP_Util_Add_Mant32Exp( r, r_e, r, r_e, &L_temp1_e ); + L_temp1 = maxWithSign_fx( Mpy_32_32( L_temp1, x_split ) ); + L_temp1_e = add( L_temp1_e, x_split_e ); + d = BASOP_Util_Divide3232_Scale_cadence( d, L_temp1, &temp_exp ); + d_e = add( temp_exp, sub( d_e, L_temp1_e ) ); + + g = GivensRotation_fx(MAX_32, 0, d, d_e, &g_e); + + //mu = x_split / maxWithSign(d + (d >= 0.0f ? 1 : (-1)) * fabsf(g)) - r; + L_temp1 = d >= 0 ? L_abs( g ) : -L_abs( g ); + L_temp1_e = g_e; + L_temp2 = maxWithSign_fx( BASOP_Util_Add_Mant32Exp( d, d_e, L_temp1, L_temp1_e, &L_temp2_e ) ); + mu = BASOP_Util_Divide3232_Scale_cadence( x_split, L_temp2, &mu_e ); + mu_e = add( mu_e, sub( x_split_e, L_temp2_e ) ); + mu = BASOP_Util_Add_Mant32Exp( mu, mu_e, -r, r_e, &mu_e ); + + //d = ((x_ii + x_kk) * (x_ii - x_kk) + r * mu) / maxWithSign(x_ii); + L_temp1 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, x_kk, x_kk_e, &L_temp1_e ); + L_temp2 = BASOP_Util_Add_Mant32Exp( x_ii, x_ii_e, -x_kk, x_kk_e, &L_temp2_e ); + d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( L_temp1, L_temp2 ), add( L_temp1_e, L_temp2_e ), Mpy_32_32( r, mu ), add( r_e, mu_e ), &d_e ); + d = BASOP_Util_Divide3232_Scale_cadence( d, maxWithSign_fx( x_ii ), &temp_exp ); + d_e = add( temp_exp, sub( d_e, x_ii_e ) ); + + /*QR transformation*/ + FOR(ch = startIndex; ch <= split; ch++) + { + r = Mpy_32_32(s, secDiag[ch + 1]); + r_e = add(s_e,secDiag_e[ch + 1]); + g = Mpy_32_32(c, secDiag[ch + 1]); + g_e = add(c_e,secDiag_e[ch + 1]); + + secDiag[ch] = GivensRotation_fx( d, d_e, r, r_e, &secDiag_e[ch] ); + c = BASOP_Util_Divide3232_Scale_cadence( d, maxWithSign_fx( secDiag[ch] ), &c_e ); + c_e = add(c_e, sub( d_e, secDiag_e[ch] ) ); + IF( GT_16( c_e, 0 ) ) + { + c = L_shl_sat(c, c_e); // Q31 + c_e = 0; + } + s = BASOP_Util_Divide3232_Scale_cadence( r, maxWithSign_fx( secDiag[ch] ), &s_e ); + s_e = add(s_e, sub( r_e, secDiag_e[ch] ) ); + IF( GT_16( s_e, 0 ) ) + { + s = L_shl_sat(s, s_e); // Q31 + s_e = 0; + } + + r = Mpy_32_32(s, singularValues[ch + 1]); + r_e = add(s_e,singularValues_e[ch + 1]); + x_split = Mpy_32_32(c, singularValues[ch + 1]); + x_split_e = add(c_e,singularValues_e[ch + 1]); + + aux = g; + aux_e = g_e; + + //ApplyRotation(singularVectors_Right, c, s, x_ii, aux, &d, &g, ch + 1, ch, nChannelsC); + ApplyRotation_fx( singularVectors_Right, c, c_e, s, s_e, x_ii, x_ii_e, aux, aux_e, &d, &d_e, &g, &g_e, ch + 1, ch, nChannelsC ); + + singularValues[ch] = GivensRotation_fx( d, d_e, r, r_e, &singularValues_e[ch] ); + IF( GT_32( L_abs( singularValues[ch] ), Mpy_32_32( CONVERGENCE_FACTOR_FX, L_abs( singularValues[ch] ) ) ) ) + { + aux = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, singularValues[ch], &aux_e ); + aux_e = add( aux_e, sub( 1, singularValues_e[ch] ) ); + + c = Mpy_32_32( d, aux ); + c_e = add( d_e, aux_e ); + IF( GT_16( c_e, 0 ) ) + { + c = L_shl_sat(c, c_e); // Q31 + c_e = 0; + } + + s = Mpy_32_32( r, aux ); + s_e = add( r_e, aux_e ); + IF( GT_16( s_e, 0 ) ) + { + s = L_shl_sat(s, s_e); // Q31 + s_e = 0; + } + } + + //ApplyRotation(singularVectors_Left, c, s, g, x_split, &d, &x_ii, ch + 1, ch, nChannelsL); + ApplyRotation_fx(singularVectors_Left, c, c_e, s, s_e, g, g_e, x_split, x_split_e, &d, &d_e, &x_ii, &x_ii_e, ch + 1, ch, nChannelsL); + } + + secDiag[startIndex] = 0; + secDiag[currentIndex] = d; + secDiag_e[currentIndex] = d_e; + singularValues[currentIndex] = x_ii; + singularValues_e[currentIndex] = x_ii_e; + + return; +} +#else static void ApplyQRTransform( float singularVectors_Left[][MAX_OUTPUT_CHANNELS], /* i/o: left singular vectors (U) */ float singularValues[MAX_OUTPUT_CHANNELS], /* i/o: singular values vector (S) */ @@ -515,7 +1028,7 @@ static void ApplyQRTransform( return; } - +#endif /*------------------------------------------------------------------------- * ApplyRotation() @@ -523,6 +1036,44 @@ static void ApplyQRTransform( * *-------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +static void ApplyRotation_fx( + Word32 singularVector[][MAX_OUTPUT_CHANNELS], + const Word32 c, + const Word16 c_e, + const Word32 s, + const Word16 s_e, + Word32 x11, + Word16 x11_e, + Word32 x12, + Word16 x12_e, + Word32 *d, + Word16 *d_e, + Word32 *g, + Word16 *g_e, + const Word16 currentIndex1, + const Word16 currentIndex2, + const Word16 nChannels ) +{ + Word16 ch; + Word16 temp_exp; + + *d = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x11 ), add(c_e,x11_e), Mpy_32_32( s, x12 ), add(s_e,x12_e), d_e ); + *g = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x12 ), add(c_e,x12_e), Mpy_32_32( -s, x11 ), add(s_e,x11_e), g_e ); + + FOR( ch = 0; ch < nChannels; ch++ ) + { + x11 = singularVector[ch][currentIndex2]; + x12 = singularVector[ch][currentIndex1]; + singularVector[ch][currentIndex2] = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x11 ), c_e, Mpy_32_32( s, x12 ), s_e, &temp_exp ); + singularVector[ch][currentIndex2] = L_shl_sat(singularVector[ch][currentIndex2], temp_exp); + singularVector[ch][currentIndex1] = BASOP_Util_Add_Mant32Exp( Mpy_32_32( c, x12 ), c_e, Mpy_32_32( -s, x11 ), s_e, &temp_exp); + singularVector[ch][currentIndex1] = L_shl_sat(singularVector[ch][currentIndex1], temp_exp); + } + + return; +} +#else static void ApplyRotation( float singularVector[][MAX_OUTPUT_CHANNELS], const float c, @@ -550,7 +1101,7 @@ static void ApplyRotation( return; } - +#endif /*------------------------------------------------------------------------- * HouseholderReduction() @@ -558,6 +1109,48 @@ static void ApplyRotation( * *-------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +static void HouseholderReduction_fx( + Word32 singularVectors_Left_fx[][MAX_OUTPUT_CHANNELS], + Word32 singularValues_fx[MAX_OUTPUT_CHANNELS], + Word32 singularVectors_Right_fx[][MAX_OUTPUT_CHANNELS], + Word32 secDiag_fx[MAX_OUTPUT_CHANNELS], + Word16 singularVectors_Left_e, + Word16 *singularValues_fx_e, + Word16 *secDiag_fx_e, + const int16_t nChannelsL, + const int16_t nChannelsC, + Word32 *eps_x_fx, + Word16 *eps_x_fx_e) +{ + int16_t nCh; + //float g = 0.0f, sig_x = 0.0f;// to be removed + Word32 g_fx = 0, sig_x_fx = 0; + Word16 sig_x_fx_e = 0; + + /* Bidiagonal Reduction for every channel */ + FOR(nCh = 0; nCh < nChannelsC; nCh++) /* nChannelsC */ + { + biDiagonalReductionLeft_fx(singularVectors_Left_fx, singularValues_fx, secDiag_fx, &singularVectors_Left_e, singularValues_fx_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx); + biDiagonalReductionRight_fx(singularVectors_Left_fx, secDiag_fx, &singularVectors_Left_e, secDiag_fx_e, nChannelsL, nChannelsC, nCh, &sig_x_fx, &sig_x_fx_e, &g_fx); + + Word16 L_temp_e; + Word32 L_temp = BASOP_Util_Add_Mant32Exp( L_abs( singularValues_fx[nCh] ), *singularValues_fx_e, L_abs( secDiag_fx[nCh] ), *secDiag_fx_e, &L_temp_e); + IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( L_temp, L_temp_e, *eps_x_fx, *eps_x_fx_e ), 1 ) ) + { + *eps_x_fx = L_temp; + *eps_x_fx_e = L_temp_e; + } + } + + /* SingularVecotr Accumulation */ + singularVectorsAccumulationRight_fx( singularVectors_Left_fx, singularVectors_Right_fx, secDiag_fx, singularVectors_Left_e, *secDiag_fx_e, nChannelsC ); + + singularVectorsAccumulationLeft_fx(singularVectors_Left_fx, singularValues_fx, singularVectors_Left_e, *singularValues_fx_e, nChannelsL, nChannelsC); + + return; +} +#else static void HouseholderReduction( float singularVectors_Left[][MAX_OUTPUT_CHANNELS], float singularValues[MAX_OUTPUT_CHANNELS], @@ -584,7 +1177,7 @@ static void HouseholderReduction( return; } - +#endif /*------------------------------------------------------------------------- * biDiagonalReductionLeft() @@ -592,6 +1185,153 @@ static void HouseholderReduction( * *-------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +static void biDiagonalReductionLeft_fx( + Word32 singularVectors[][MAX_OUTPUT_CHANNELS], + Word32 singularValues[MAX_OUTPUT_CHANNELS], + Word32 secDiag[MAX_OUTPUT_CHANNELS], + Word16 *singularVectors_e, + Word16 *singularValues_e, + Word16 *secDiag_e, + const Word16 nChannelsL, + const Word16 nChannelsC, + const Word16 currChannel, + Word32 *sig_x, + Word16 *sig_x_e, + Word32 *g ) // Q31 +{ + Word16 iCh, jCh, idx; + Word32 norm_x, f, r; + Word16 norm_x_e, f_e, r_e; + Word16 sing_exp[MAX_OUTPUT_CHANNELS]; + Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 }; + Word32 L_temp; + Word16 L_temp_e; + FOR (jCh = 0; jCh < MAX_OUTPUT_CHANNELS; jCh++) { + set_s( sing_exp2[jCh], *singularVectors_e, MAX_OUTPUT_CHANNELS); + } + + secDiag[currChannel] = Mpy_32_32( *sig_x, *g ); + //rescaling block + IF( GT_16( *sig_x_e, *secDiag_e ) ) + { + FOR ( Word16 i = 0; i < MAX_OUTPUT_CHANNELS; i++ ) + { + IF( NE_16( i, currChannel ) ) + { + secDiag[i] = L_shl( secDiag[i], sub( *secDiag_e, *sig_x_e ) ); + } + } + *secDiag_e = *sig_x_e; + } + ELSE IF ( LT_16( *sig_x_e, *secDiag_e ) ) + { + secDiag[currChannel] = L_shr_r( secDiag[currChannel], sub( *secDiag_e, *sig_x_e ) ); + } + + /* Setting values to 0 */ + ( *sig_x ) = 0; + ( *g ) = 0; + + IF( LT_16( currChannel, nChannelsL ) ) /* i <= m */ + { + idx = currChannel; + + FOR ( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[jCh][currChannel] ), *singularVectors_e, sig_x_e ); + } + + IF ( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ + { + norm_x = 0; + norm_x_e = 0; + FOR ( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + singularVectors[jCh][currChannel] = BASOP_Util_Divide3232_Scale_cadence( singularVectors[jCh][currChannel], maxWithSign_fx( *sig_x ), &sing_exp[jCh] ); + sing_exp[jCh] = add( sing_exp[jCh], sub( *singularVectors_e, *sig_x_e ) ); + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sing_exp[jCh], 1 ), &norm_x_e ); + } + IF( GT_16( norm_x_e, 0 ) ) + { + norm_x = MAX_32; + norm_x_e = 0; + } + L_temp_e = norm_x_e; + L_temp = Sqrt32( norm_x, &L_temp_e ); + L_temp = L_shl_r( L_temp, L_temp_e ); // Q31 + ( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) ); + + r = BASOP_Util_Add_Mant32Exp( Mpy_32_32( ( *g ), singularVectors[currChannel][idx] ), sing_exp[currChannel], -norm_x, norm_x_e, &r_e ); + singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp( singularVectors[currChannel][idx], sing_exp[idx], -( *g ), 0, &sing_exp[currChannel] ); + + FOR ( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ + { + norm_x = 0; + norm_x_e = 0; + FOR ( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( sing_exp[jCh], *singularVectors_e ), &norm_x_e ); + } + + f = BASOP_Util_Divide3232_Scale_cadence( norm_x, maxWithSign_fx( r ), &f_e ); + f_e = add( f_e, sub( norm_x_e, r_e ) ); + + FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + singularVectors[jCh][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors[jCh][iCh], *singularVectors_e, Mpy_32_32( f, singularVectors[jCh][currChannel] ), add( f_e, sing_exp[jCh] ), &sing_exp2[jCh][iCh] ); + } + } + + + FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + singularVectors[jCh][currChannel] = Mpy_32_32( singularVectors[jCh][currChannel], ( *sig_x ) ); + sing_exp2[jCh][currChannel] = add( sing_exp[jCh], *sig_x_e ); + } + + //rescaling block + Word16 exp_max = *singularVectors_e; + FOR(iCh = 0; iCh < nChannelsC; iCh++) + { + FOR(jCh = 0; jCh < nChannelsL; jCh++) + { + exp_max = s_max(exp_max, sing_exp2[jCh][iCh]); + } + } + + FOR(iCh = 0; iCh < nChannelsC; iCh++) + { + FOR(jCh = 0; jCh < nChannelsL; jCh++) + { + singularVectors[jCh][iCh] = L_shr_r(singularVectors[jCh][iCh], sub(exp_max, sing_exp2[jCh][iCh])); + } + } + *singularVectors_e = exp_max; + } + + //rescaling block + singularValues[currChannel] = Mpy_32_32( ( *sig_x ), ( *g ) ); + IF( GT_16( *sig_x_e, *singularValues_e ) ) + { + FOR( Word16 i = 0; i < MAX_OUTPUT_CHANNELS; i++ ) + { + IF( NE_16( i, currChannel ) ) + { + singularValues[i] = L_shl( singularValues[i], sub( *singularValues_e, *sig_x_e ) ); + } + } + *singularValues_e = *sig_x_e; + } + ELSE IF ( LT_16( *sig_x_e, *singularValues_e ) ) + { + singularValues[currChannel] = L_shr_r( singularValues[currChannel], sub( *singularValues_e, *sig_x_e ) ); + } + } + + return; +} +#else static void biDiagonalReductionLeft( float singularVectors[][MAX_OUTPUT_CHANNELS], float singularValues[MAX_OUTPUT_CHANNELS], @@ -662,7 +1402,7 @@ static void biDiagonalReductionLeft( return; } - +#endif /*------------------------------------------------------------------------- * biDiagonalReductionRight() @@ -670,6 +1410,129 @@ static void biDiagonalReductionLeft( * *-------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +static void biDiagonalReductionRight_fx( + Word32 singularVectors[][MAX_OUTPUT_CHANNELS], + Word32 secDiag[MAX_OUTPUT_CHANNELS], + Word16 *singularVectors_e, + Word16 *secDiag_e, + const Word16 nChannelsL, + const Word16 nChannelsC, + const Word16 currChannel, + Word32 *sig_x, + Word16 *sig_x_e, + Word32 *g ) +{ + Word16 iCh, jCh, idx; + Word32 norm_x, r; + Word16 norm_x_e, r_e; + Word16 sing_exp[MAX_OUTPUT_CHANNELS]; + Word16 secDiag_exp[MAX_OUTPUT_CHANNELS]; + Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 }; + Word32 L_temp; + Word16 L_temp_e; + FOR (jCh = 0; jCh < MAX_OUTPUT_CHANNELS; jCh++) { + set_s(sing_exp2[jCh], *singularVectors_e, MAX_OUTPUT_CHANNELS); + } + set_s(secDiag_exp, *secDiag_e, MAX_OUTPUT_CHANNELS); + + /* Setting values to 0 */ + ( *sig_x ) = 0; + ( *g ) = 0; + + IF( LT_16( currChannel, nChannelsL ) && NE_16( currChannel, sub( nChannelsC, 1 ) ) ) /* i <=m && i !=n */ + { + idx = add(currChannel, 1); + + FOR ( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ + { + ( *sig_x ) = BASOP_Util_Add_Mant32Exp( *sig_x, *sig_x_e, L_abs( singularVectors[currChannel][jCh]), *singularVectors_e, sig_x_e ); + } + + IF ( ( *sig_x ) ) /*(fabsf(*sig_x) > EPSILON * fabsf(*sig_x)) { */ + { + norm_x = 0; + norm_x_e = 0; + + FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /*nChannelsC */ + { + singularVectors[currChannel][jCh] = BASOP_Util_Divide3232_Scale_cadence(singularVectors[currChannel][jCh], maxWithSign_fx(*sig_x), &sing_exp[jCh]); + sing_exp[jCh] = add(sing_exp[jCh], sub(*singularVectors_e, *sig_x_e)); + norm_x = BASOP_Util_Add_Mant32Exp(norm_x, norm_x_e, Mpy_32_32(singularVectors[currChannel][jCh], singularVectors[currChannel][jCh]), shl(sing_exp[jCh], 1), &norm_x_e); + } + IF( GT_16( norm_x_e, 0 ) ) + { + norm_x = MAX_32; + norm_x_e = 0; + } + L_temp_e = norm_x_e; + L_temp = Sqrt32(norm_x, &L_temp_e); + L_temp = L_shl_r(L_temp, L_temp_e); // Q31 + ( *g ) = L_negate( GE_32( singularVectors[currChannel][idx], 0 ) ? L_temp : L_negate( L_temp ) ); + + r = BASOP_Util_Add_Mant32Exp(Mpy_32_32((*g), singularVectors[currChannel][idx]), sing_exp[idx], -norm_x, norm_x_e, &r_e); + singularVectors[currChannel][idx] = BASOP_Util_Add_Mant32Exp(singularVectors[currChannel][idx], sing_exp[idx], -(*g), 0, &sing_exp[idx]); + + FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ + { + secDiag[jCh] = BASOP_Util_Divide3232_Scale_cadence(singularVectors[currChannel][jCh], maxWithSign_fx(r), &secDiag_exp[jCh]); + secDiag_exp[jCh] = add( secDiag_exp[jCh], sub( sing_exp[jCh], r_e) ); + } + + FOR( iCh = currChannel + 1; iCh < nChannelsL; iCh++ ) /* nChannelsL */ + { + norm_x = 0; + norm_x_e = 0; + FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ + { + norm_x = BASOP_Util_Add_Mant32Exp(norm_x, norm_x_e, Mpy_32_32(singularVectors[iCh][jCh], singularVectors[currChannel][jCh]), add( *singularVectors_e, sing_exp[jCh] ), &norm_x_e); + } + + FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ + { + singularVectors[iCh][jCh] = BASOP_Util_Add_Mant32Exp(singularVectors[iCh][jCh], *singularVectors_e, Mpy_32_32(norm_x, secDiag[jCh]), add(norm_x_e, secDiag_exp[jCh]), &sing_exp2[iCh][jCh]); + } + } + + FOR( jCh = idx; jCh < nChannelsC; jCh++ ) /* nChannelsC */ + { + singularVectors[currChannel][jCh] = Mpy_32_32( singularVectors[currChannel][jCh], ( *sig_x ) ); + sing_exp2[currChannel][jCh] = add( sing_exp[jCh], *sig_x_e ); + } + + /*rescaling block*/ + Word16 exp_max = *secDiag_e; + FOR(jCh = 0; jCh < nChannelsC; jCh++) { + exp_max = s_max(exp_max, secDiag_exp[jCh]); + } + FOR(jCh = 0; jCh < nChannelsC; jCh++) { + secDiag[jCh] = L_shr_r( secDiag[jCh], sub( exp_max, secDiag_exp[jCh] ) ); + } + + + exp_max = *singularVectors_e; + FOR(iCh = 0; iCh < nChannelsL; iCh++) + { + FOR(jCh = 0; jCh < nChannelsC; jCh++) + { + exp_max = s_max(exp_max, sing_exp2[iCh][jCh]); + } + } + + FOR(iCh = 0; iCh < nChannelsL ; iCh++) + { + FOR(jCh = 0; jCh < nChannelsC; jCh++) + { + singularVectors[iCh][jCh] = L_shr_r(singularVectors[iCh][jCh], sub(exp_max, sing_exp2[iCh][jCh])); + } + } + *singularVectors_e = exp_max; + } + } + + return; +} +#else static void biDiagonalReductionRight( float singularVectors[][MAX_OUTPUT_CHANNELS], float secDiag[MAX_OUTPUT_CHANNELS], @@ -736,7 +1599,7 @@ static void biDiagonalReductionRight( return; } - +#endif /*------------------------------------------------------------------------- * singularVectorsAccumulationLeft() @@ -744,6 +1607,85 @@ static void biDiagonalReductionRight( * *-------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +static void singularVectorsAccumulationLeft_fx( + Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], //Q31 output + Word32 singularValues[MAX_OUTPUT_CHANNELS], + Word16 singularVectors_e, + Word16 singularValues_e, + const Word16 nChannelsL, + const Word16 nChannelsC) +{ + Word16 nCh, iCh, k; + Word16 nChannels; + Word32 norm_y, t_jj, t_ii; + Word16 norm_y_e, t_jj_e, t_ii_e, temp_exp; + Word16 sing_exp2[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 }; + FOR(nCh = 0; nCh < MAX_OUTPUT_CHANNELS; nCh++) { + set_s(sing_exp2[nCh], singularVectors_e, MAX_OUTPUT_CHANNELS); + } + + /* Processing */ + nChannels = min(nChannelsL, nChannelsC); /* min(nChannelsL,ChannelsC) */ + //FILE *fp = fopen("t_ii_out.txt","a"); + FOR(nCh = nChannels - 1; nCh >= 0; nCh--) /* min(nChannelsL,ChannelsC) */ + { + t_ii = singularValues[nCh]; + t_ii_e = singularValues_e; + + FOR(iCh = nCh + 1; iCh < nChannelsC; iCh++) /* nChannelsC */ + { + singularVectors_Left[nCh][iCh] = 0; + } + + IF(t_ii) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/ + { + t_ii = BASOP_Util_Divide3232_Scale_cadence(ONE_IN_Q30, maxWithSign_fx(t_ii), &temp_exp); + t_ii_e = add( 1, sub( temp_exp, t_ii_e ) ); + //fprintf( fp, "%e\n", me2f( t_ii, t_ii_e ) ); + FOR(iCh = nCh + 1; iCh < nChannelsC; iCh++) /* nChannelsC */ + { + norm_y = 0; + norm_y_e = 0; + FOR(k = nCh + 1; k < nChannelsL; k++) /* nChannelsL */ + { + norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[k][nCh], singularVectors_Left[k][iCh] ), add( sing_exp2[k][nCh], sing_exp2[k][iCh] ), &norm_y_e ); + } + t_jj = BASOP_Util_Divide3232_Scale_cadence( Mpy_32_32( t_ii, norm_y ), maxWithSign_fx( singularVectors_Left[nCh][nCh] ), &temp_exp ); //t_ii_e+norm_y_e-*singularVectors_e, + t_jj_e = add( temp_exp, sub( add( t_ii_e, norm_y_e ), sing_exp2[nCh][nCh] ) ); + + FOR(k = nCh; k < nChannelsL; k++) /* nChannelsL */ + { + singularVectors_Left[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[k][iCh], sing_exp2[k][iCh], Mpy_32_32( t_jj, singularVectors_Left[k][nCh] ), add( t_jj_e, sing_exp2[k][nCh] ), &sing_exp2[k][iCh] ); + } + } + + FOR(iCh = nCh; iCh < nChannelsL; iCh++) /* nChannelsL */ + { + singularVectors_Left[iCh][nCh] = Mpy_32_32( singularVectors_Left[iCh][nCh], t_ii ); + sing_exp2[iCh][nCh] = add( sing_exp2[iCh][nCh], t_ii_e ); + } + } + ELSE + { + FOR(iCh = nCh; iCh < nChannelsL; iCh++) /* nChannelsL */ + { + singularVectors_Left[iCh][nCh] = 0; + } + } + + singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], sing_exp2[nCh][nCh], ONE_IN_Q30, 1, &sing_exp2[nCh][nCh] ); + } + //fclose(fp); + FOR(nCh = 0; nCh < nChannelsL; nCh++) { + FOR(iCh = 0; iCh < nChannelsC; iCh++) { + singularVectors_Left[nCh][iCh] = L_shl_sat( singularVectors_Left[nCh][iCh], sing_exp2[nCh][iCh] ); + } + } + + return; +} +#else static void singularVectorsAccumulationLeft( float singularVectors_Left[][MAX_OUTPUT_CHANNELS], float singularValues[MAX_OUTPUT_CHANNELS], @@ -804,7 +1746,7 @@ static void singularVectorsAccumulationLeft( return; } - +#endif /*------------------------------------------------------------------------- * singularVectorsAccumulationRight() @@ -812,6 +1754,72 @@ static void singularVectorsAccumulationLeft( * *-------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +static void singularVectorsAccumulationRight_fx( + Word32 singularVectors_Left[][MAX_OUTPUT_CHANNELS], + Word32 singularVectors_Right[][MAX_OUTPUT_CHANNELS], //Q31 + Word32 secDiag[MAX_OUTPUT_CHANNELS], + Word16 singularVectors_e, + Word16 secDiag_e, + const Word16 nChannelsC ) +{ + Word16 nCh, iCh, k; + Word16 nChannels; + Word32 norm_y, t_ii, ratio_float; + Word16 norm_y_e, temp_exp1, sing_right_exp[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS] = { 0 }; + + /* Processing */ + nChannels = nChannelsC; /* nChannelsC */ + + /* avoid compiler warning */ + t_ii = secDiag[nChannels - 1]; + + FOR( nCh = nChannels - 1; nCh >= 0; nCh-- ) /* nChannelsC, min(nChannelsLmnChannelsC) otherwise */ + { + + IF( LT_16( nCh, sub( nChannelsC, 1 ) ) ) /* nChannelsC */ + { + IF( t_ii ) /*if (fabsf(t_ii) > EPSILON *fabsf(t_ii)) {*/ + { + + FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC*/ + { + ratio_float = BASOP_Util_Divide3232_Scale_cadence( singularVectors_Left[nCh][iCh], maxWithSign_fx( singularVectors_Left[nCh][nCh + 1]), &temp_exp1); + singularVectors_Right[iCh][nCh] = BASOP_Util_Divide3232_Scale_cadence( ratio_float, maxWithSign_fx( t_ii ), &sing_right_exp[iCh][nCh]); + sing_right_exp[iCh][nCh] = add(sing_right_exp[iCh][nCh], sub( temp_exp1, secDiag_e ) ); + //singularVectors_Right[iCh][nCh] = L_shl_sat( singularVectors_Right[iCh][nCh], temp_exp2 ); + } + + FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ + { + norm_y = 0; + norm_y_e = 0; + + FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */ + { + norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_e, sing_right_exp[k][iCh] ), &norm_y_e ); + } + + FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */ + { + singularVectors_Right[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Right[k][iCh], sing_right_exp[k][iCh], Mpy_32_32( norm_y, singularVectors_Right[k][nCh] ), add( norm_y_e, sing_right_exp[k][nCh] ), &sing_right_exp[k][iCh] ); + singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], sing_right_exp[k][iCh] ); + sing_right_exp[k][iCh] = 0; + } + } + } + + FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ + { + singularVectors_Right[nCh][iCh] = singularVectors_Right[iCh][nCh] = 0; + } + } + singularVectors_Right[nCh][nCh] = MAX_32; + t_ii = secDiag[nCh]; + } + return; +} +#else static void singularVectorsAccumulationRight( float singularVectors_Left[][MAX_OUTPUT_CHANNELS], float singularVectors_Right[][MAX_OUTPUT_CHANNELS], @@ -869,7 +1877,7 @@ static void singularVectorsAccumulationRight( return; } - +#endif /*------------------------------------------------------------------------- * GivensRotation() @@ -877,6 +1885,60 @@ static void singularVectorsAccumulationRight( * *-------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +static Word32 GivensRotation_fx( + const Word32 x, + const Word16 x_e, + const Word32 z, + const Word16 z_e, + Word16 *out_e) +{ + Word32 x_abs, z_abs; + Word32 cotan, tan, r; + Word16 temp_exp; + Word32 L_temp; + x_abs = L_abs( x ); + z_abs = L_abs( z ); + IF( LE_32( x_abs, Mpy_32_32( CONVERGENCE_FACTOR_FX, x_abs ) ) && LE_32( z_abs, Mpy_32_32( CONVERGENCE_FACTOR_FX, z_abs ) ) ) + { + r = 0; + } + ELSE IF( GE_16( BASOP_Util_Cmp_Mant32Exp( x_abs, x_e, z_abs, z_e ), 0 ) ) + { + IF( LE_32( x_abs, SVD_MINIMUM_VALUE_FX ) ) + { + r = 0; + } + ELSE + { + cotan = BASOP_Util_Divide3232_Scale_cadence( z_abs, x_abs, &temp_exp ); + temp_exp = add( temp_exp, sub( z_e, x_e ) ); + L_temp = BASOP_Util_Add_Mant32Exp( ONE_IN_Q30, 1, Mpy_32_32( cotan, cotan ), 2*temp_exp, &temp_exp ); + L_temp = Sqrt32( L_temp, &temp_exp ); + r = Mpy_32_32( x_abs, L_temp ); + *out_e = add( x_e, temp_exp ); + } + } + ELSE + { + IF( LE_32( z_abs, SVD_MINIMUM_VALUE_FX ) ) + { + r = 0; + } + ELSE + { + tan = BASOP_Util_Divide3232_Scale_cadence( x_abs, z_abs, &temp_exp ); + temp_exp = add( temp_exp, sub( x_e, z_e ) ); + L_temp = BASOP_Util_Add_Mant32Exp( ONE_IN_Q30, 1, Mpy_32_32( tan, tan ), 2*temp_exp, &temp_exp ); + L_temp = Sqrt32( L_temp, &temp_exp ); + r = Mpy_32_32( z_abs, L_temp ); + *out_e = add( z_e, temp_exp ); + } + } + + return ( r ); +} +#else static float GivensRotation( const float x, const float z ) @@ -916,7 +1978,7 @@ static float GivensRotation( return ( r ); } - +#endif /*------------------------------------------------------------------------- * maxWithSign() @@ -924,6 +1986,24 @@ static float GivensRotation( * *-------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +static Word32 maxWithSign_fx( + const Word32 a ) +{ + IF( GT_32( L_abs( a ), SVD_MINIMUM_VALUE_FX ) ) + { + return a; + } + ELSE IF( LT_32( a, 0 ) ) + { + return -SVD_MINIMUM_VALUE_FX; + } + ELSE + { + return SVD_MINIMUM_VALUE_FX; + } +} +#else static float maxWithSign( const float a ) { @@ -940,7 +2020,7 @@ static float maxWithSign( return SVD_MINIMUM_VALUE; } } - +#endif /*------------------------------------------------------------------------- * flushToZeroArray() @@ -948,6 +2028,26 @@ static float maxWithSign( * *-------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +#if 0 +static void flushToZeroArray_fx( + Word32 arr[MAX_OUTPUT_CHANNELS], + const Word16 length ) +{ + Word16 i; + + FOR ( i = 0; i < length; ++i ) + { + IF( LT_32(L_abs( arr[i] ), SVD_ZERO_FLUSH_THRESHOLD_FX )) + { + arr[i] = 0; + } + } + + return; +} +#endif +#else static void flushToZeroArray( float arr[MAX_OUTPUT_CHANNELS], const int16_t length ) @@ -964,7 +2064,7 @@ static void flushToZeroArray( return; } - +#endif /*------------------------------------------------------------------------- * flushToZeroMat() @@ -972,6 +2072,30 @@ static void flushToZeroArray( * *-------------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +#if 0 +static void flushToZeroMat_fx( + Word32 mat[][MAX_OUTPUT_CHANNELS], + const Word16 m, + const Word16 n ) +{ + Word16 i, j; + + FOR( i = 0; i < m; ++i ) + { + FOR( j = 0; j < n; ++j ) + { + IF( LT_32( L_abs( mat[i][j] ), SVD_ZERO_FLUSH_THRESHOLD_FX ) ) + { + mat[i][j] = 0; + } + } + } + + return; +} +#endif +#else static void flushToZeroMat( float mat[][MAX_OUTPUT_CHANNELS], const int16_t m, @@ -992,3 +2116,4 @@ static void flushToZeroMat( return; } +#endif -- GitLab