diff --git a/lib_com/cnst.h b/lib_com/cnst.h index 3d49d2738b86fabd3d6a1069b9d0b7caac3ea63a..8b0f4438370f0b35481cb44027a66c5bdbd0cb03 100644 --- a/lib_com/cnst.h +++ b/lib_com/cnst.h @@ -2016,6 +2016,8 @@ typedef enum _DCTTYPE #define EPSILON_FX_SMALL 1 #define EPSILON_FIX (1) #define EPSILON_FX (Word32)1 +#define EPSILON_FX_M 1208925819 +#define EPSILON_FX_E -49 #define MAX_SEGMENT_LENGTH 480 diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index e762c13d548635366839fb44e10403df48913997..5d458df883c98929c78b044ecb139c45bb08d01a 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -1333,10 +1333,10 @@ void ivas_ism_dec_digest_tc( ); #ifdef IVAS_FLOAT_FIXED -void ivas_param_ism_dec_digest_tc( +void ivas_param_ism_dec_digest_tc_fx( Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ - const UWord16 nCldfbSlots, /* i : number of CLFBS slots in the transport channels */ - Word32 *transport_channels_f[] /* i : synthesized core-coder transport channels/DirAC output */ + const UWord16 nCldfbSlots, /* i : number of CLFBS slots in the transport channels */ + Word32 *transport_channels_f[] /* i : synthesized core-coder transport channels/DirAC output */ ); #else void ivas_param_ism_dec_digest_tc( @@ -3436,6 +3436,15 @@ void v_min( const int16_t N /* i : Vector length */ ); +#ifdef IVAS_FLOAT_FIXED +void v_sqrt_fx( + const Word32 x[], /* i : Input vector */ + Word16 exp[], + Word32 y[], /* o : Output vector that contains sqrt(x) */ + const Word16 N /* i : Vector length */ +); +#endif + void v_sqrt( const float x[], /* i : Input vector */ float y[], /* o : Output vector that contains sqrt(x) */ @@ -4807,6 +4816,21 @@ int16_t matrix_product( ); #ifdef IVAS_FLOAT_FIXED +Word16 matrix_product_mant_exp_fx( + const Word32 *X_fx, /* i : left hand matrix */ + const Word16 X_fx_e, /* i : left hand matrix */ + const Word16 rowsX, /* i : number of rows of the left hand matrix */ + const Word16 colsX, /* i : number of columns of the left hand matrix */ + const Word16 transpX, /* i : flag indicating the transposition of the left hand matrix prior to the multiplication */ + const Word32 *Y_fx, /* i : right hand matrix */ + const Word16 Y_fx_e, /* i : right hand matrix */ + const Word16 rowsY, /* i : number of rows of the right hand matrix */ + const Word16 colsY, /* i : number of columns of the right hand matrix */ + const Word16 transpY, /* i : flag indicating the transposition of the right hand matrix prior to the multiplication */ + Word32 *Z_fx, /* o : resulting matrix after the matrix multiplication */ + Word16 *Z_fx_e /* o : resulting matrix after the matrix multiplication */ +); + Word16 matrix_product_fx( const Word32 *X_fx, /* i : left hand matrix */ const Word16 rowsX, /* i : number of rows of the left hand matrix */ @@ -5114,6 +5138,26 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx( ); #endif +#ifdef IVAS_FLOAT_FIXED +Word16 computeMixingMatricesISM_fx( + const Word16 num_inputs, + const Word16 num_responses, + const Word16 num_outputs, + const Word32 *responses_fx, + const Word16 responses_e, + const Word32 *ener_fx, + const Word16 ener_e, + const Word32 *Cx_diag_fx, + const Word16 Cx_diag_e, + const Word32 *Cy_diag_fx, + const Word16 Cy_diag_e, + const Word16 *Q_16fx, //Q15 + const Word16 energy_compensation_flag, + const Word32 reg_Sx_fx, + const Word32 reg_ghat_fx, + Word32 *mixing_matrix_fx, + Word16 *mixing_matrix_e); +#else int16_t computeMixingMatricesISM( const int16_t num_inputs, const int16_t num_responses, @@ -5128,6 +5172,7 @@ int16_t computeMixingMatricesISM( const float reg_ghat, float *mixing_matrix ); +#endif void FdCngEncodeDiracMDCTStereoSID( CPE_ENC_HANDLE hCPE /* i/o: CPE encoder state structure */ diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c index da11a7ac9b1ad848b7ea42812425f394207ffe0e..91824fff94516d18d318267cf8b8013c56aecd93 100644 --- a/lib_com/ivas_tools.c +++ b/lib_com/ivas_tools.c @@ -778,6 +778,24 @@ void v_min( * * square root of vector *-------------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED +void v_sqrt_fx( + const Word32 x[], /* i : Input vector */ + Word16 exp[], + Word32 y[], /* o : Output vector that contains sqrt(x) */ + const Word16 N /* i : Vector length */ +) +{ + Word16 i; + + FOR( i = 0; i < N; i++ ) + { + y[i] = Sqrt32( x[i], &exp[i] ); + } + + return; +} +#endif void v_sqrt( const float x[], /* i : Input vector */ @@ -1275,6 +1293,159 @@ int16_t matrix_product( } #ifdef IVAS_FLOAT_FIXED +Word16 matrix_product_mant_exp_fx( + const Word32 *X_fx, /* i : left hand matrix */ + const Word16 X_fx_e, /* i : left hand matrix */ + const Word16 rowsX, /* i : number of rows of the left hand matrix */ + const Word16 colsX, /* i : number of columns of the left hand matrix */ + const Word16 transpX, /* i : flag indicating the transposition of the left hand matrix prior to the multiplication */ + const Word32 *Y_fx, /* i : right hand matrix */ + const Word16 Y_fx_e, /* i : right hand matrix */ + const Word16 rowsY, /* i : number of rows of the right hand matrix */ + const Word16 colsY, /* i : number of columns of the right hand matrix */ + const Word16 transpY, /* i : flag indicating the transposition of the right hand matrix prior to the multiplication */ + Word32 *Z_fx, /* o : resulting matrix after the matrix multiplication */ + Word16 *Z_fx_e /* o : resulting matrix after the matrix multiplication */ +) +{ + Word16 i, j, k; + Word32 *Zp_fx = Z_fx; + Word16 out_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; + Word16 *Zp_fx_e = out_e; + Word16 row, col; + + /* Processing */ + test(); + test(); + test(); + IF( EQ_16( transpX, 1 ) && EQ_16( transpY, 0 ) ) /* We use X transpose */ + { + IF( NE_16( rowsX, rowsY ) ) + { + return EXIT_FAILURE; + } + FOR( j = 0; j < colsY; ++j ) + { + FOR( i = 0; i < colsX; ++i ) + { + ( *Zp_fx ) = 0; + move32(); + ( *Zp_fx_e ) = 0; + move16(); + FOR( k = 0; k < rowsX; ++k ) + { + ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); + } + Zp_fx++; + Zp_fx_e++; + } + } + row = colsY; + col = colsX; + } + ELSE IF( EQ_16( transpX, 0 ) && EQ_16( transpY, 1 ) ) /* We use Y transpose */ + { + IF( NE_16( colsX, colsY ) ) + { + return EXIT_FAILURE; + } + FOR( j = 0; j < rowsY; ++j ) + { + FOR( i = 0; i < rowsX; ++i ) + { + ( *Zp_fx ) = 0; + move32(); + ( *Zp_fx_e ) = 0; + move16(); + FOR( k = 0; k < colsX; ++k ) + { + ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); + } + Zp_fx++; + Zp_fx_e++; + } + } + row = rowsY; + col = rowsX; + } + ELSE IF( EQ_16( transpX, 1 ) && EQ_16( transpY, 1 ) ) /* We use both transpose */ + { + IF( NE_16( rowsX, colsY ) ) + { + return EXIT_FAILURE; + } + FOR( j = 0; j < rowsY; ++j ) + { + FOR( i = 0; i < colsX; ++i ) + { + ( *Zp_fx ) = 0; + move32(); + ( *Zp_fx_e ) = 0; + move16(); + FOR( k = 0; k < colsX; ++k ) + { + ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); + } + + Zp_fx++; + Zp_fx_e++; + } + } + row = rowsY; + col = colsX; + } + ELSE /* Regular case */ + { + IF( NE_16( colsX, rowsY ) ) + { + return EXIT_FAILURE; + } + + FOR( j = 0; j < colsY; ++j ) + { + FOR( i = 0; i < rowsX; ++i ) + { + ( *Zp_fx ) = 0; + move32(); + ( *Zp_fx_e ) = 0; + move16(); + FOR( k = 0; k < colsX; ++k ) + { + ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); + } + Zp_fx++; + Zp_fx_e++; + } + } + row = colsY; + col = rowsX; + } + Zp_fx = Z_fx; + Zp_fx_e = out_e; + Word16 max_exp = -31; + FOR( j = 0; j < row; ++j ) + { + FOR( i = 0; i < col; ++i ) + { + max_exp = s_max( max_exp, *Zp_fx_e ); + Zp_fx_e++; + } + } + Zp_fx_e = out_e; + *Z_fx_e = max_exp; + FOR( j = 0; j < row; ++j ) + { + FOR( i = 0; i < col; ++i ) + { + *Zp_fx = L_shr_r( *Zp_fx, sub( *Z_fx_e, *Zp_fx_e ) ); + Zp_fx++; + Zp_fx_e++; + } + } + + return EXIT_SUCCESS; +} + Word16 matrix_product_fx( const Word32 *X_fx, /* i : left hand matrix */ const Word16 rowsX, /* i : number of rows of the left hand matrix */ diff --git a/lib_dec/ivas_dirac_output_synthesis_cov.c b/lib_dec/ivas_dirac_output_synthesis_cov.c index 34bed34af3ac0de7b7febde5c69d63e8d0153bb3..b9774af5bfa5a3eb4b20982d72a787d4408275a4 100644 --- a/lib_dec/ivas_dirac_output_synthesis_cov.c +++ b/lib_dec/ivas_dirac_output_synthesis_cov.c @@ -2694,370 +2694,321 @@ Word16 computeMixingMatricesResidual_fx( * *-------------------------------------------------------------------*/ #ifdef IVAS_FLOAT_FIXED -int16_t computeMixingMatricesISM( - const int16_t num_inputs, - const int16_t num_responses, - const int16_t num_outputs, - const float *responses, - const float *ener, - const float *Cx_diag, - const float *Cy_diag, - const float *Q, - const int16_t energy_compensation_flag, - const float reg_Sx, - const float reg_ghat, - float *mixing_matrix ) +Word16 computeMixingMatricesISM_fx( + const Word16 num_inputs, + const Word16 num_responses, + const Word16 num_outputs, + const Word32 *responses_fx, + const Word16 responses_e, + const Word32 *ener_fx, + const Word16 ener_e, + const Word32 *Cx_diag_fx, + const Word16 Cx_diag_e, + const Word32 *Cy_diag_fx, + const Word16 Cy_diag_e, + const Word16 *Q_16fx, // Q15 + const Word16 energy_compensation_flag, + const Word32 reg_Sx_fx, + const Word32 reg_ghat_fx, + Word32 *mixing_matrix_fx, + Word16 *mixing_matrix_e ) { - int16_t i, out; - int16_t lengthCx, lengthCy; - float *Cy_tilde_p; - float *adj; - float limit; - float svd_in_buffer[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - float svd_u_buffer[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - float svd_s_buffer[MAX_OUTPUT_CHANNELS]; - float svd_v_buffer[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - float Kx[MAX_TRANSPORT_CHANNELS]; - float Ky[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; - float Kx_reg_inv[MAX_TRANSPORT_CHANNELS]; - float Cy_hat_diag[MAX_OUTPUT_CHANNELS]; - float G_hat[MAX_OUTPUT_CHANNELS]; - float mat_mult_buffer1[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; - float mat_mult_buffer2[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; - float mat_mult_buffer3[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; - int16_t nL, nC; -#ifdef IVAS_FLOAT_FIXED - Word32 responses_fx[PARAM_ISM_MAX_CHAN * MAX_NUM_OBJECTS]; - Word32 ener_fx[MAX_NUM_OBJECTS]; + Word16 i, out; + Word16 lengthCx, lengthCy; + Word16 nL, nC; + + Word32 *Cy_tilde_p_fx; + Word32 *adj_fx; + Word32 limit_fx; + Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; Word32 Q_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; - Word32 Cx_diag_fx[MAX_OUTPUT_CHANNELS]; Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; Word32 G_hat_fx[MAX_OUTPUT_CHANNELS]; Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS]; - Word32 mixing_matrix_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; - Word32 adj_fx[MAX_OUTPUT_CHANNELS]; Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; Word32 Kx_fx[MAX_TRANSPORT_CHANNELS]; - Word16 responses_e, ener_e, Ky_e, Q_e, Cx_diag_e, Q_Cx_e, mat_mult_buffer1_e, G_hat_e, mat_mult_buffer2_e, Kx_reg_inv_e, mixing_matrix_e, adj_e, mat_mult_buffer3_e, Kx_e; + Word16 Ky_e, Q_e, Q_Cx_e, mat_mult_buffer1_e, G_hat_e, mat_mult_buffer2_e, Kx_reg_inv_e, adj_e, mat_mult_buffer3_e, Kx_e; Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; -#ifdef MSAN_FIX - Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS] = { 0 }; -#else Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS]; -#endif // MSAN_FIX Word16 svd_s_buffer_fx_e; Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; - Word16 mat_mult_buffer1_fx_e; - Word16 svd_u_buffer_fx_e[MAX_OUTPUT_CHANNELS]; - Word16 svd_v_buffer_fx_e[MAX_OUTPUT_CHANNELS]; -#else - float Q_Cx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; -#endif + Word16 temp_e[MAX_OUTPUT_CHANNELS]; push_wmops( "dirac_cov_mix_mat" ); out = EXIT_SUCCESS; lengthCx = num_inputs; lengthCy = num_outputs; - set_zero( svd_s_buffer, MAX_OUTPUT_CHANNELS ); - for ( i = 0; i < MAX_OUTPUT_CHANNELS; i++ ) + FOR( i = 0; i < lengthCy * lengthCx; i++ ) { - set_zero( svd_in_buffer[i], MAX_OUTPUT_CHANNELS ); - set_zero( svd_u_buffer[i], MAX_OUTPUT_CHANNELS ); - set_zero( svd_v_buffer[i], MAX_OUTPUT_CHANNELS ); + IF( Q_16fx[i] == MAX_16 ) + { + Q_fx[i] = MAX_32; + } + ELSE + { + Q_fx[i] = L_deposit_h( Q_16fx[i] ); + } + } + Q_e = 0; + set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS ); + FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ ) + { + set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS ); + set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS ); + set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS ); } /* Decomposition of Cy = Ky*Ky' */ /* Ky = responses*diag(ener) */ -#ifdef IVAS_FLOAT_FIXED - f2me_buf( responses, responses_fx, &responses_e, lengthCy * num_responses ); - f2me_buf( ener, ener_fx, &ener_e, num_responses ); - matrix_diag_product_fx( responses_fx, responses_e, lengthCy, num_responses, 0, ener_fx, ener_e, num_responses, Ky_fx, &Ky_e ); - me2f_buf( Ky_fx, Ky_e, Ky, lengthCy * num_responses ); -#else - matrix_diag_product( responses, lengthCy, num_responses, 0, ener, num_responses, Ky ); -#endif - /* Decomposition of Cx -> Computing Kx */ - v_sqrt( Cx_diag, Kx, lengthCx ); + set16_fx( temp_e, Cx_diag_e, lengthCx ); + v_sqrt_fx( Cx_diag_fx, temp_e, Kx_fx, lengthCx ); + Kx_e = temp_e[0]; + FOR( i = 1; i < lengthCx; i++ ) + { + Kx_e = s_max( Kx_e, temp_e[i] ); + } + FOR( i = 0; i < lengthCx; i++ ) + { + Kx_fx[i] = L_shr_r( Kx_fx[i], sub( Kx_e, temp_e[i] ) ); + } /* Regularization of Sx */ - maximum( Kx, lengthCx, &limit ); - limit = limit * reg_Sx + EPSILON; + maximum_32_fx( Kx_fx, lengthCx, &limit_fx ); + limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx ); - for ( i = 0; i < lengthCx; ++i ) + FOR( i = 0; i < lengthCx; ++i ) { - svd_s_buffer[i] = ( ( Kx[i] > limit ) ? Kx[i] : limit ); + svd_s_buffer_fx[i] = ( ( Kx_fx[i] > limit_fx ) ? Kx_fx[i] : limit_fx ); } + svd_s_buffer_fx_e = Kx_e; - limit = 0.0f; + limit_fx = 0; /* regularized Kx-1 */ - for ( i = 0; i < lengthCx; ++i ) + FOR( i = 0; i < lengthCx; ++i ) { - float reg_fac = ( 1.0f / svd_s_buffer[i] ); - Kx_reg_inv[i] = reg_fac; + IF( svd_s_buffer_fx[i] ) + { + Word32 reg_fac = BASOP_Util_Divide3232_Scale_cadence( MAX_32, svd_s_buffer_fx[i], &temp_e[i] ); + Kx_reg_inv_fx[i] = reg_fac; + temp_e[i] = sub( temp_e[i], svd_s_buffer_fx_e ); + } + ELSE + { + Word32 reg_fac = BASOP_Util_Divide3232_Scale_cadence( MAX_32, EPSILON_FX_M, &temp_e[i] ); + Kx_reg_inv_fx[i] = reg_fac; + temp_e[i] = sub( temp_e[i], EPSILON_FX_E ); + } + } + Kx_reg_inv_e = temp_e[0]; + FOR( i = 1; i < lengthCx; i++ ) + { + Kx_reg_inv_e = s_max( Kx_reg_inv_e, temp_e[i] ); + } + FOR( i = 0; i < lengthCx; i++ ) + { + Kx_reg_inv_fx[i] = L_shr_r( Kx_reg_inv_fx[i], sub( Kx_reg_inv_e, temp_e[i] ) ); } /************************ normalization matrix G hat **********************/ /* Computing Q*Cx*Q' */ -#ifdef IVAS_FLOAT_FIXED Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS]; Word16 Cy_hat_diag_e; - f2me_buf( Q, Q_fx, &Q_e, lengthCy * lengthCx ); - f2me_buf( Cx_diag, Cx_diag_fx, &Cx_diag_e, lengthCx ); - matrix_diag_product_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, Q_Cx_fx, &Q_Cx_e ); Word16 guard_bits = find_guarded_bits_fx( lengthCx + 1 ); - for ( i = 0; i < lengthCy * lengthCx; ++i ) + FOR( i = 0; i < lengthCy * lengthCx; ++i ) { - if ( Q_Cx_e > Q_e ) + IF( GT_16( Q_Cx_e, Q_e ) ) { Q_fx[i] = L_shr( Q_fx[i], guard_bits ); } - else + ELSE { Q_Cx_fx[i] = L_shr( Q_Cx_fx[i], guard_bits ); } } - if ( Q_Cx_e > Q_e ) + IF( GT_16( Q_Cx_e, Q_e ) ) { Q_e += guard_bits; } - else + ELSE { Q_Cx_e += guard_bits; } matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e ); - me2f_buf( Cy_hat_diag_fx, Cy_hat_diag_e, Cy_hat_diag, lengthCy ); -#else - matrix_diag_product( Q, lengthCy, lengthCx, 0, Cx_diag, lengthCx, Q_Cx ); - matrix_product_diag( Q_Cx, lengthCy, lengthCx, 0, Q, lengthCy, lengthCx, 1, Cy_hat_diag ); -#endif - /* Computing Cy_hat_diag */ - for ( i = 0; i < lengthCy; ++i ) + FOR( i = 0; i < lengthCy; ++i ) { - if ( Cy_hat_diag[i] > limit ) + IF( GT_32( Cy_hat_diag_fx[i], limit_fx ) ) { - limit = Cy_hat_diag[i]; + limit_fx = Cy_hat_diag_fx[i]; } } - - limit = limit * reg_ghat + EPSILON; + limit_fx = Mpy_32_32( limit_fx, reg_ghat_fx ); /* Computing G_hat */ - for ( i = 0; i < lengthCy; ++i ) + FOR( i = 0; i < lengthCy; ++i ) { - if ( limit > Cy_hat_diag[i] ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */ + IF( GT_32( limit_fx, Cy_hat_diag_fx[i] ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */ { - Cy_hat_diag[i] = limit; + Cy_hat_diag_fx[i] = limit_fx; } - G_hat[i] = sqrtf( Cy_diag[i] / Cy_hat_diag[i] ); + IF( Cy_diag_fx[i] ) + { + IF( Cy_hat_diag_fx[i] ) + { + G_hat_fx[i] = BASOP_Util_Divide3232_Scale_cadence( Cy_diag_fx[i], Cy_hat_diag_fx[i], &temp_e[i] ); + temp_e[i] = add( temp_e[i], sub( Cy_diag_e, Cy_hat_diag_e ) ); + G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] ); + } + ELSE + { + G_hat_fx[i] = BASOP_Util_Divide3232_Scale_cadence( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] ); + temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) ); + G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] ); + } + } + ELSE + { + G_hat_fx[i] = 0; + temp_e[i] = 0; + } + // G_hat[i] = sqrtf( Cy_diag[i] / Cy_hat_diag[i] ); + } + G_hat_e = temp_e[0]; + FOR( i = 1; i < lengthCy; i++ ) + { + G_hat_e = s_max( G_hat_e, temp_e[i] ); + } + FOR( i = 0; i < lengthCy; i++ ) + { + G_hat_fx[i] = L_shr_r( G_hat_fx[i], sub( G_hat_e, temp_e[i] ) ); } /************************ Formulate optimal P **********************/ /* Computing the input matrix Kx'*Q'*G_hat'*Ky */ -#ifdef IVAS_FLOAT_FIXED - f2me_buf( Kx, Kx_fx, &Kx_e, lengthCx ); - f2me_buf( Q, Q_fx, &Q_e, lengthCy * lengthCx ); - diag_matrix_product_fx( Kx_fx, Kx_e, lengthCx, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_e ); - f2me_buf( G_hat, G_hat_fx, &G_hat_e, lengthCy ); - matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_e, lengthCy, mat_mult_buffer2_fx, &mat_mult_buffer2_e ); - me2f_buf( mat_mult_buffer2_fx, mat_mult_buffer2_e, mat_mult_buffer2, lengthCx * lengthCy ); -#else - diag_matrix_product( Kx, lengthCx, Q, lengthCy, lengthCx, 1, mat_mult_buffer1 ); - matrix_diag_product( mat_mult_buffer1, lengthCx, lengthCy, 0, G_hat, lengthCy, mat_mult_buffer2 ); -#endif - matrix_product( mat_mult_buffer2, lengthCx, lengthCy, 0, Ky, lengthCy, num_responses, 0, mat_mult_buffer1 ); + matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCy, 0, Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e ); - if ( lengthCx < num_responses ) + IF( LT_16( lengthCx, num_responses ) ) { - -#ifdef IVAS_FLOAT_FIXED - f2me_buf( mat_mult_buffer1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e, lengthCx * num_responses ); mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 1 ); -#else - mat2svdMat( mat_mult_buffer1, svd_in_buffer, lengthCx, num_responses, 1 ); -#endif nL = num_responses; nC = lengthCx; - svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, &svd_s_buffer_fx_e, nL, nC ); - - // local fix2float: to be removed - for ( i = 0; i < nL; i++ ) - { - me2f_buf( svd_v_buffer_fx[i], 0, svd_v_buffer[i], nC ); - } - for ( i = 0; i < nC; i++ ) - { - me2f_buf( svd_u_buffer_fx[i], 0, svd_u_buffer[i], nC ); - } - me2f_buf( svd_s_buffer_fx, svd_s_buffer_fx_e, svd_s_buffer, nC ); + svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, &svd_s_buffer_fx_e, nL, nC ); } - else + ELSE { -#ifdef IVAS_FLOAT_FIXED - f2me_buf( mat_mult_buffer1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e, lengthCx * num_responses ); mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 0 ); -#else - mat2svdMat( mat_mult_buffer1, svd_in_buffer, lengthCx, num_responses, 0 ); -#endif nL = lengthCx; nC = num_responses; - svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, &svd_s_buffer_fx_e, nL, nC ); - - // local fix2float: to be removed - for ( i = 0; i < nL; i++ ) - { - me2f_buf( svd_u_buffer_fx[i], 0, svd_u_buffer[i], nC ); - } - for ( i = 0; i < nC; i++ ) - { - me2f_buf( svd_v_buffer_fx[i], 0, svd_v_buffer[i], nC ); - } - me2f_buf( svd_s_buffer_fx, svd_s_buffer_fx_e, svd_s_buffer, nC ); + svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, &svd_s_buffer_fx_e, nL, nC ); } /* Actually Processing P */ /* can be skipped: lambda is always column-truncated identity matrix, so this operation just truncates V to num_input_channel columns */ -#ifdef IVAS_FLOAT_FIXED - Word16 min_q = -1; - FOR( Word32 g = 0; g < num_responses; g++ ) - { - f2me_buf( svd_v_buffer[g], svd_v_buffer_fx[g], &svd_v_buffer_fx_e[g], lengthCx ); - IF( svd_v_buffer_fx_e[g] > min_q ) - { - min_q = svd_v_buffer_fx_e[g]; - } - } - - FOR( Word32 g = 0; g < num_responses; g++ ) - { - FOR( Word32 h = 0; h < lengthCx; h++ ) - { - svd_v_buffer_fx[g][h] = L_shr( svd_v_buffer_fx[g][h], min_q - svd_v_buffer_fx_e[g] ); - } - } - svd_v_buffer_fx_e[0] = min_q; - - min_q = -1; - FOR( Word32 g = 0; g < lengthCx; g++ ) - { - f2me_buf( svd_u_buffer[g], svd_u_buffer_fx[g], &svd_u_buffer_fx_e[g], lengthCx ); - IF( svd_u_buffer_fx_e[g] > min_q ) - { - min_q = svd_u_buffer_fx_e[g]; - } - } - - FOR( Word32 g = 0; g < lengthCx; g++ ) - { - FOR( Word32 h = 0; h < lengthCx; h++ ) - { - svd_u_buffer_fx[g][h] = L_shr( svd_u_buffer_fx[g][h], min_q - svd_u_buffer_fx_e[g] ); - } - } - svd_u_buffer_fx_e[0] = min_q; svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, num_responses, lengthCx ); + mat_mult_buffer1_e = 0; svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx ); + mat_mult_buffer2_e = 0; - me2f_buf( mat_mult_buffer1_fx, svd_v_buffer_fx_e[0], mat_mult_buffer1, num_responses * lengthCx ); - me2f_buf( mat_mult_buffer2_fx, svd_u_buffer_fx_e[0], mat_mult_buffer2, lengthCx * lengthCx ); -#else - svdMat2mat( svd_v_buffer, mat_mult_buffer1, num_responses, lengthCx ); - svdMat2mat( svd_u_buffer, mat_mult_buffer2, lengthCx, lengthCx ); -#endif - matrix_product( mat_mult_buffer1, num_responses, lengthCx, 0, mat_mult_buffer2, lengthCx, lengthCx, 1, mat_mult_buffer3 ); + matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, num_responses, lengthCx, 0, mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1, mat_mult_buffer3_fx, &mat_mult_buffer3_e ); /************************ Formulate M **********************/ - matrix_product( Ky, lengthCy, num_responses, 0, mat_mult_buffer3, num_responses, lengthCx, 0, mat_mult_buffer1 ); - -#ifdef IVAS_FLOAT_FIXED - f2me_buf( mat_mult_buffer1, mat_mult_buffer1_fx, &mat_mult_buffer1_e, lengthCx * lengthCy ); - f2me_buf( Kx_reg_inv, Kx_reg_inv_fx, &Kx_reg_inv_e, lengthCx ); - - matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, mixing_matrix_fx, &mixing_matrix_e ); + matrix_product_mant_exp_fx( Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, num_responses, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e ); - me2f_buf( mixing_matrix_fx, mixing_matrix_e, mixing_matrix, lengthCy * lengthCx ); -#else - matrix_diag_product( mat_mult_buffer1, lengthCy, lengthCx, 0, Kx_reg_inv, lengthCx, mixing_matrix ); -#endif + matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, mixing_matrix_fx, mixing_matrix_e ); /*********************** Energy Compensation ****************/ /* Compute Cy_tilde = M*Cx*M' */ -#ifdef IVAS_FLOAT_FIXED - f2me_buf( mixing_matrix, mixing_matrix_fx, &mixing_matrix_e, lengthCx * lengthCy ); - f2me_buf( Cx_diag, Cx_diag_fx, &Cx_diag_e, lengthCx ); + matrix_diag_product_fx( mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, mat_mult_buffer1_fx, &mat_mult_buffer1_e ); - matrix_diag_product_fx( mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, mat_mult_buffer1_fx, &mat_mult_buffer1_e ); + matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, &mat_mult_buffer2_e ); - me2f_buf( mat_mult_buffer1_fx, mat_mult_buffer1_e, mat_mult_buffer1, lengthCx * lengthCy ); -#else - matrix_diag_product( mixing_matrix, lengthCy, lengthCx, 0, Cx_diag, lengthCx, mat_mult_buffer1 ); -#endif - matrix_product( mat_mult_buffer1, lengthCy, lengthCx, 0, mixing_matrix, lengthCy, lengthCx, 1, mat_mult_buffer2 ); - - if ( energy_compensation_flag == 1 ) + IF( EQ_16( energy_compensation_flag, 1 ) ) { - adj = svd_s_buffer; - Cy_tilde_p = mat_mult_buffer2; - for ( i = 0; i < lengthCy; ++i ) + adj_fx = svd_s_buffer_fx; + Cy_tilde_p_fx = mat_mult_buffer2_fx; + FOR( i = 0; i < lengthCy; ++i ) { /* Avoid correction for very small energies, main diagonal elements of Cy_tilde_p may be negative */ - if ( Cy_tilde_p[i + i * lengthCy] < 0.0f ) + IF( LT_32( Cy_tilde_p_fx[i + i * lengthCy], 0 ) ) { - adj[i] = 1.0f; + adj_fx[i] = MAX_32; + temp_e[i] = 0; } - else + ELSE { - adj[i] = sqrtf( Cy_diag[i] / ( Cy_tilde_p[i + i * lengthCy] + EPSILON ) ); + IF( Cy_diag_fx[i] ) + { + IF( Cy_tilde_p_fx[i + i * lengthCy] ) + { + adj_fx[i] = BASOP_Util_Divide3232_Scale_cadence( Cy_diag_fx[i], Cy_tilde_p_fx[i + i * lengthCy], &temp_e[i] ); + temp_e[i] = add( temp_e[i], sub( Cy_diag_e, mat_mult_buffer2_e ) ); + adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] ); + } + ELSE + { + adj_fx[i] = BASOP_Util_Divide3232_Scale_cadence( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] ); + temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) ); + adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] ); + } + } + ELSE + { + adj_fx[i] = 0; + temp_e[i] = 0; + } } - if ( adj[i] > 4.0f ) + // if ( adj[i] > 4.0f ) + IF( GT_16( BASOP_Util_Cmp_Mant32Exp( adj_fx[i], temp_e[i], MAX_32, 2 ), 0 ) ) { - adj[i] = 4.0f; + adj_fx[i] = MAX_32; + temp_e[i] = 2; } } + adj_e = temp_e[0]; + FOR( i = 1; i < lengthCy; i++ ) + { + adj_e = s_max( adj_e, temp_e[i] ); + } + FOR( i = 0; i < lengthCy; i++ ) + { + adj_fx[i] = L_shr_r( adj_fx[i], sub( adj_e, temp_e[i] ) ); + } -#ifdef IVAS_FLOAT_FIXED - f2me_buf( adj, adj_fx, &adj_e, lengthCy ); - f2me_buf( mixing_matrix, mixing_matrix_fx, &mixing_matrix_e, lengthCy * lengthCx ); - - diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e ); - - me2f_buf( mat_mult_buffer3_fx, mat_mult_buffer3_e, mat_mult_buffer3, lengthCx * lengthCy ); -#else - diag_matrix_product( adj, lengthCy, mixing_matrix, lengthCy, lengthCx, 0, mat_mult_buffer3 ); -#endif + diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e ); - mvr2r( mat_mult_buffer3, mixing_matrix, lengthCy * lengthCx ); + Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, lengthCy * lengthCx ); + *mixing_matrix_e = mat_mult_buffer3_e; } pop_wmops(); diff --git a/lib_dec/ivas_ism_param_dec.c b/lib_dec/ivas_ism_param_dec.c index 6d540c937c3506bfd58aecd3a9230950508c235f..a1c7c23566cae9eac6ea295c6e9805400daec541 100644 --- a/lib_dec/ivas_ism_param_dec.c +++ b/lib_dec/ivas_ism_param_dec.c @@ -300,11 +300,17 @@ static void ivas_param_ism_collect_slot_fx( Word16 brange[2]; Word32 tmp_fx; Word16 exp_tmp; + Word16 i, j; /* loop over parameter bands to collect transport channel energies */ Word16 exp_ref_power_buf[CLDFB_NO_CHANNELS_MAX]; Word16 exp_cx_diag_buf[CLDFB_NO_CHANNELS_MAX][PARAM_ISM_MAX_DMX]; + set16_fx( exp_ref_power_buf, *exp_ref_power, CLDFB_NO_CHANNELS_MAX ); + for ( i = 0; i < CLDFB_NO_CHANNELS_MAX; i++ ) + { + set16_fx( exp_cx_diag_buf[i], *exp_cx_diag, PARAM_ISM_MAX_DMX ); + } FOR( band_idx = 0; band_idx < hParamIsmDec->hParamIsm->nbands; band_idx++ ) { brange[0] = hParamIsmDec->hParamIsm->band_grouping[band_idx]; @@ -321,9 +327,9 @@ static void ivas_param_ism_collect_slot_fx( tmp_fx = BASOP_Util_Add_Mant32Exp( tmp_fx, exp_tmp, var2, add( exp_imag, exp_imag ), &exp_tmp ); Word16 exp_cx_diag_new = 0, exp_ref_power_new = 0; - cx_diag_fx[bin_idx][ch] = BASOP_Util_Add_Mant32Exp( cx_diag_fx[bin_idx][ch], *exp_cx_diag, tmp_fx, exp_tmp, &exp_cx_diag_new ); + cx_diag_fx[bin_idx][ch] = BASOP_Util_Add_Mant32Exp( cx_diag_fx[bin_idx][ch], exp_cx_diag_buf[bin_idx][ch], tmp_fx, exp_tmp, &exp_cx_diag_new ); move32(); - ref_power_fx[bin_idx] = BASOP_Util_Add_Mant32Exp( ref_power_fx[bin_idx], *exp_ref_power, tmp_fx, exp_tmp, &exp_ref_power_new ); + ref_power_fx[bin_idx] = BASOP_Util_Add_Mant32Exp( ref_power_fx[bin_idx], exp_ref_power_buf[bin_idx], tmp_fx, exp_tmp, &exp_ref_power_new ); move32(); exp_cx_diag_buf[bin_idx][ch] = exp_cx_diag_new; move16(); @@ -333,29 +339,23 @@ static void ivas_param_ism_collect_slot_fx( } /*make common exponent*/ - Word16 max_exp_cx_diag = 0, max_exp_ref_power = 0; - Word16 a, b; - FOR( band_idx = 0; band_idx < hParamIsmDec->hParamIsm->nbands; band_idx++ ) + Word16 max_exp_cx_diag = exp_cx_diag_buf[0][0], max_exp_ref_power = exp_ref_power_buf[0]; + FOR( i = 0; i < CLDFB_NO_CHANNELS_MAX; i++ ) { - a = hParamIsmDec->hParamIsm->band_grouping[band_idx]; - b = hParamIsmDec->hParamIsm->band_grouping[band_idx + 1]; - FOR( bin_idx = a; bin_idx < b; bin_idx++ ) + FOR( j = 0; j < PARAM_ISM_MAX_DMX; j++ ) { - max_exp_cx_diag = s_max( max_exp_cx_diag, exp_cx_diag_buf[bin_idx][ch] ); - max_exp_ref_power = s_max( max_exp_ref_power, exp_ref_power_buf[bin_idx] ); + max_exp_cx_diag = s_max( max_exp_cx_diag, exp_cx_diag_buf[i][j] ); } + max_exp_ref_power = s_max( max_exp_ref_power, exp_ref_power_buf[i] ); } - - FOR( band_idx = 0; band_idx < hParamIsmDec->hParamIsm->nbands; band_idx++ ) + FOR( i = 0; i < CLDFB_NO_CHANNELS_MAX; i++ ) { - a = hParamIsmDec->hParamIsm->band_grouping[band_idx]; - b = hParamIsmDec->hParamIsm->band_grouping[band_idx + 1]; - FOR( bin_idx = a; bin_idx < b; bin_idx++ ) + FOR( j = 0; j < PARAM_ISM_MAX_DMX; j++ ) { - cx_diag_fx[bin_idx][ch] = L_shr( cx_diag_fx[bin_idx][ch], sub( max_exp_cx_diag, exp_cx_diag_buf[bin_idx][ch] ) ); - ref_power_fx[bin_idx] = L_shr( ref_power_fx[bin_idx], sub( max_exp_ref_power, exp_ref_power_buf[bin_idx] ) ); + cx_diag_fx[i][j] = L_shr_r( cx_diag_fx[i][j], sub( max_exp_cx_diag, exp_cx_diag_buf[i][j] ) ); } + ref_power_fx[i] = L_shr_r( ref_power_fx[i], sub( max_exp_ref_power, exp_ref_power_buf[i] ) ); } *exp_cx_diag = max_exp_cx_diag; @@ -397,6 +397,143 @@ static void ivas_param_ism_collect_slot( } #endif +#ifdef IVAS_FLOAT_FIXED +static void ivas_param_ism_compute_mixing_matrix_fx( + const int16_t nchan_ism, /* i : number of ISM channels */ + PARAM_ISM_DEC_HANDLE hParamIsmDec, /* i/o: decoder ParamISM handle */ + ISM_DTX_DATA_DEC hISMDTX, /* i : ISM DTX handle */ + Word32 direct_response_fx[MAX_NUM_OBJECTS][PARAM_ISM_MAX_CHAN], // Q30 + const Word16 nchan_transport, + const Word16 nchan_out_woLFE, + Word32 cx_diag_fx[][PARAM_ISM_MAX_DMX], + Word16 cx_diag_e, + Word32 ref_power_fx[], + Word16 ref_power_e, + Word32 mixing_matrix_fx[CLDFB_NO_CHANNELS_MAX][PARAM_ISM_MAX_CHAN * PARAM_ISM_MAX_DMX], + Word16 mixing_matrix_e[CLDFB_NO_CHANNELS_MAX] ) +{ + Word16 band_idx, bin_idx; + Word16 i, w, obj_indx; + Word16 brange[2]; + Word32 direct_power_fx[MAX_NUM_OBJECTS]; + Word32 cy_diag_fx[PARAM_ISM_MAX_CHAN]; + Word32 cy_diag_tmp_fx[MAX_NUM_OBJECTS][PARAM_ISM_MAX_CHAN]; + Word32 *dir_res_ptr_fx; + Word16 *proto_matrix_fx; + Word32 response_matrix_fx[PARAM_ISM_MAX_CHAN * MAX_NUM_OBJECTS]; + Word16 num_wave; + Word16 dir_res_ptr_e, cy_diag_e, cy_diag_e_arr[PARAM_ISM_MAX_CHAN], cy_diag_tmp_e[MAX_NUM_OBJECTS], response_matrix_e, direct_power_e, temp_e[PARAM_ISM_MAX_CHAN]; + + proto_matrix_fx = hParamIsmDec->hParamIsmRendering->proto_matrix_fx; + + assert( ( nchan_ism == 3 ) || ( nchan_ism == 4 ) ); + assert( nchan_transport == 2 ); + + IF( hParamIsmDec->hParamIsm->flag_noisy_speech || hISMDTX.dtx_flag ) + { + num_wave = nchan_ism; + } + ELSE + { + num_wave = MAX_PARAM_ISM_WAVE; + } + set32_fx( response_matrix_fx, 0, PARAM_ISM_MAX_CHAN * MAX_NUM_OBJECTS ); + response_matrix_e = 0; + + /* loop over parameter bands to compute the mixing matrix */ + FOR( band_idx = 0; band_idx < hParamIsmDec->hParamIsm->nbands; band_idx++ ) + { + brange[0] = hParamIsmDec->hParamIsm->band_grouping[band_idx]; + brange[1] = hParamIsmDec->hParamIsm->band_grouping[band_idx + 1]; + + /* Compute covaraince matrix from direct response*/ + FOR( w = 0; w < num_wave; w++ ) + { + set32_fx( cy_diag_tmp_fx[w], 0, nchan_out_woLFE ); + + IF( hParamIsmDec->hParamIsm->flag_noisy_speech || hISMDTX.dtx_flag ) + { + dir_res_ptr_fx = direct_response_fx[w]; + } + ELSE + { + obj_indx = hParamIsmDec->hParamIsm->obj_indices[band_idx][0][w]; + dir_res_ptr_fx = direct_response_fx[obj_indx]; + } + Copy32( dir_res_ptr_fx, response_matrix_fx + w * nchan_out_woLFE, nchan_out_woLFE ); // Q30 + dir_res_ptr_e = 1; + response_matrix_e = 1; + /* we only need the diagonal of Cy*/ + matrix_product_diag_fx( dir_res_ptr_fx, dir_res_ptr_e, nchan_out_woLFE, 1, 0, dir_res_ptr_fx, dir_res_ptr_e, 1, nchan_out_woLFE, 0, cy_diag_tmp_fx[w], &cy_diag_tmp_e[w] ); + } + + FOR( bin_idx = brange[0]; bin_idx < brange[1]; bin_idx++ ) + { + + set32_fx( cy_diag_fx, 0, nchan_out_woLFE ); + set16_fx( cy_diag_e_arr, 0, nchan_out_woLFE ); + FOR( w = 0; w < num_wave; w++ ) + { + IF( hParamIsmDec->hParamIsm->flag_noisy_speech || hISMDTX.dtx_flag ) + { + // direct_power[w] = ( 1.0f / nchan_ism ) * ref_power[bin_idx]; + SWITCH( nchan_ism ) + { + case 2: + direct_power_fx[w] = L_shr_r( ref_power_fx[bin_idx], 1 ); + BREAK; + case 3: + direct_power_fx[w] = Mpy_32_16_1( ref_power_fx[bin_idx], 24576 ); + BREAK; + case 4: + direct_power_fx[w] = L_shr_r( ref_power_fx[bin_idx], 2 ); + BREAK; + } + } + ELSE + { + direct_power_fx[w] = Mpy_32_16_1( ref_power_fx[bin_idx], hParamIsmDec->power_ratios_fx[band_idx][0][w] ); + } + direct_power_e = ref_power_e; + IF( NE_32( direct_power_fx[w], 0 ) ) + { + FOR( i = 0; i < nchan_out_woLFE; i++ ) + { + cy_diag_fx[i] = BASOP_Util_Add_Mant32Exp( cy_diag_fx[i], cy_diag_e_arr[i], Mpy_32_32( direct_power_fx[w], cy_diag_tmp_fx[w][i] ), add( direct_power_e, cy_diag_tmp_e[w] ), &cy_diag_e_arr[i] ); + } + } + temp_e[w] = direct_power_e; + direct_power_fx[w] = Sqrt32( direct_power_fx[w], &temp_e[w] ); + } + cy_diag_e = cy_diag_e_arr[0]; + FOR( i = 1; i < nchan_out_woLFE; i++ ) + { + cy_diag_e = s_max( cy_diag_e, cy_diag_e_arr[i] ); + } + FOR( i = 0; i < nchan_out_woLFE; i++ ) + { + cy_diag_fx[i] = L_shr_r( cy_diag_fx[i], sub( cy_diag_e, cy_diag_e_arr[i] ) ); + } + + direct_power_e = temp_e[0]; + FOR( w = 1; w < num_wave; w++ ) + { + direct_power_e = s_max( direct_power_e, temp_e[w] ); + } + FOR( w = 0; w < num_wave; w++ ) + { + direct_power_fx[w] = L_shr_r( direct_power_fx[w], sub( direct_power_e, temp_e[w] ) ); + } + + /* Compute mixing matrix */ + computeMixingMatricesISM_fx( nchan_transport, num_wave, nchan_out_woLFE, response_matrix_fx, response_matrix_e, direct_power_fx, direct_power_e, cx_diag_fx[bin_idx], cx_diag_e, cy_diag_fx, cy_diag_e, proto_matrix_fx, 1, + PARAM_MC_REG_SX_FX, PARAM_MC_REG_GHAT_FX, mixing_matrix_fx[bin_idx], &mixing_matrix_e[bin_idx] ); + } + } + + return; +} +#else static void ivas_param_ism_compute_mixing_matrix( const int16_t nchan_ism, /* i : number of ISM channels */ PARAM_ISM_DEC_HANDLE hParamIsmDec, /* i/o: decoder ParamISM handle */ @@ -418,11 +555,6 @@ static void ivas_param_ism_compute_mixing_matrix( float *proto_matrix; float response_matrix[PARAM_ISM_MAX_CHAN * MAX_NUM_OBJECTS]; int16_t num_wave; -#ifdef IVAS_FLOAT_FIXED - Word32 dir_res_ptr_fx[PARAM_ISM_MAX_CHAN]; - Word32 cy_diag_tmp_fx[PARAM_ISM_MAX_CHAN]; - Word16 dir_res_ptr_e, cy_diag_tmp_e; -#endif proto_matrix = hParamIsmDec->hParamIsmRendering->proto_matrix; @@ -461,24 +593,7 @@ static void ivas_param_ism_compute_mixing_matrix( } mvr2r( dir_res_ptr, response_matrix + w * nchan_out_woLFE, nchan_out_woLFE ); /* we only need the diagonal of Cy*/ -#ifdef IVAS_FLOAT_FIXED - f2me_buf( dir_res_ptr, dir_res_ptr_fx, &dir_res_ptr_e, nchan_out_woLFE ); - - Word16 guard_bits = 1; - - for ( i = 0; i < nchan_out_woLFE; ++i ) - { - dir_res_ptr_fx[i] = L_shr( dir_res_ptr_fx[i], guard_bits ); - } - - dir_res_ptr_e += guard_bits; - - matrix_product_diag_fx( dir_res_ptr_fx, dir_res_ptr_e, nchan_out_woLFE, 1, 0, dir_res_ptr_fx, dir_res_ptr_e, 1, nchan_out_woLFE, 0, cy_diag_tmp_fx, &cy_diag_tmp_e ); - - me2f_buf( cy_diag_tmp_fx, cy_diag_tmp_e, cy_diag_tmp[w], nchan_out_woLFE ); -#else matrix_product_diag( dir_res_ptr, nchan_out_woLFE, 1, 0, dir_res_ptr, 1, nchan_out_woLFE, 0, cy_diag_tmp[w] ); -#endif } for ( bin_idx = brange[0]; bin_idx < brange[1]; bin_idx++ ) @@ -493,11 +608,7 @@ static void ivas_param_ism_compute_mixing_matrix( } else { -#ifndef IVAS_FLOAT_FIXED direct_power[w] = hParamIsmDec->power_ratios[band_idx][0][w] * ref_power[bin_idx]; -#else - direct_power[w] = fix16_to_float( hParamIsmDec->power_ratios_fx[band_idx][0][w], Q15 ) * ref_power[bin_idx]; -#endif } if ( direct_power[w] != 0.f ) @@ -518,6 +629,7 @@ static void ivas_param_ism_compute_mixing_matrix( return; } +#endif #ifdef IVAS_FLOAT_FIXED static void ivas_param_ism_render_slot_fx( @@ -1975,7 +2087,7 @@ void ivas_ism_dec_digest_tc( * *-------------------------------------------------------------------------*/ #ifdef IVAS_FLOAT_FIXED -void ivas_param_ism_dec_digest_tc( +void ivas_param_ism_dec_digest_tc_fx( Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ const UWord16 nCldfbSlots, /* i : number of CLFBS slots in the transport channels */ Word32 *transport_channels[] /* i : synthesized core-coder transport channels/DirAC output */ @@ -1984,9 +2096,7 @@ void ivas_param_ism_dec_digest_tc( Word16 exp_ref_power = 31, exp_cx_diag = 31; Word16 exp_real_tmp = 0, exp_imag_tmp = 0; Word32 ref_power_fx[CLDFB_NO_CHANNELS_MAX]; - Word16 ref_power_e[CLDFB_NO_CHANNELS_MAX]; Word32 cx_diag_fx[CLDFB_NO_CHANNELS_MAX][PARAM_ISM_MAX_DMX]; - Word16 cx_diag_e[CLDFB_NO_CHANNELS_MAX][PARAM_ISM_MAX_DMX]; Word16 q_tc = 13; Word16 ch, nchan_transport, nchan_out, nchan_out_woLFE, i; Word16 slot_idx, bin_idx; @@ -1997,7 +2107,6 @@ void ivas_param_ism_dec_digest_tc( PARAM_ISM_DEC_HANDLE hParamIsmDec; SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom; Word16 fade_len; - Word16 max_exp_ref_power, max_exp_cx_diag; /* Initialization */ hParamIsmDec = st_ivas->hParamIsmDec; @@ -2035,10 +2144,10 @@ void ivas_param_ism_dec_digest_tc( FOR( bin_idx = 0; bin_idx < CLDFB_NO_CHANNELS_MAX; bin_idx++ ) { set_zero_fx( cx_diag_fx[bin_idx], PARAM_ISM_MAX_DMX ); - set16_fx( cx_diag_e[bin_idx], 31, PARAM_ISM_MAX_DMX ); } + exp_cx_diag = 0; set_zero_fx( ref_power_fx, CLDFB_NO_CHANNELS_MAX ); - set16_fx( ref_power_e, 31, CLDFB_NO_CHANNELS_MAX ); + exp_ref_power = 0; /* Frame-level Processing */ /* De-quantization */ @@ -2116,12 +2225,8 @@ void ivas_param_ism_dec_digest_tc( q_tc = 13; } - exp_ref_power = 31; - max_exp_ref_power = 0; FOR( ch = 0; ch < nchan_transport; ch++ ) { - max_exp_cx_diag = 0; - exp_cx_diag = 31; /* CLDFB Analysis */ FOR( slot_idx = 0; slot_idx < nCldfbSlots; slot_idx++ ) { @@ -2158,27 +2263,6 @@ void ivas_param_ism_dec_digest_tc( exp_imag_tmp += scale_factor_imag; scale_sig32( &hParamIsmDec->hParamIsmRendering->Cldfb_RealBuffer_tc_fx[slot_idx * hSpatParamRendCom->num_freq_bands * nchan_transport + ch * hSpatParamRendCom->num_freq_bands], hSpatParamRendCom->num_freq_bands, -scale_factor_real ); scale_sig32( &hParamIsmDec->hParamIsmRendering->Cldfb_ImagBuffer_tc_fx[slot_idx * hSpatParamRendCom->num_freq_bands * nchan_transport + ch * hSpatParamRendCom->num_freq_bands], hSpatParamRendCom->num_freq_bands, -scale_factor_imag ); - max_exp_ref_power = s_max( max_exp_ref_power, exp_ref_power ); - max_exp_cx_diag = s_max( max_exp_cx_diag, exp_cx_diag ); - FOR( Word16 band_idx = 0; band_idx < hParamIsmDec->hParamIsm->nbands; band_idx++ ) - { - Word16 a = hParamIsmDec->hParamIsm->band_grouping[band_idx]; - Word16 b = hParamIsmDec->hParamIsm->band_grouping[band_idx + 1]; - FOR( bin_idx = a; bin_idx < b; bin_idx++ ) - { - cx_diag_e[bin_idx][ch] = exp_cx_diag; - ref_power_e[bin_idx] = exp_ref_power; - } - } - FOR( bin_idx = 0; bin_idx < CLDFB_NO_CHANNELS_MAX; bin_idx++ ) - { - cx_diag_fx[bin_idx][ch] = L_shl( cx_diag_fx[bin_idx][ch], cx_diag_e[bin_idx][ch] - max_exp_cx_diag ); - cx_diag_e[bin_idx][ch] = max_exp_cx_diag; - ref_power_fx[bin_idx] = L_shl( ref_power_fx[bin_idx], ref_power_e[bin_idx] - max_exp_ref_power ); - ref_power_e[bin_idx] = max_exp_ref_power; - } - exp_ref_power = max_exp_ref_power; - exp_cx_diag = max_exp_cx_diag; } } /* Obtain Mixing Matrix on a frame-level */ @@ -2194,9 +2278,6 @@ void ivas_param_ism_dec_digest_tc( } #endif #if 1 /*TODO : To be removed (Fixed to float) */ - float ref_power[CLDFB_NO_CHANNELS_MAX]; - float cx_diag[CLDFB_NO_CHANNELS_MAX][PARAM_ISM_MAX_DMX]; - float direct_response[MAX_NUM_OBJECTS][PARAM_ISM_MAX_CHAN]; FOR( ch = 0; ch < nchan_transport; ch++ ) { scale_sig32( st_ivas->cldfbAnaDec[ch]->cldfb_state_fx, st_ivas->cldfbAnaDec[ch]->cldfb_size, sub( Q11, Q13 ) ); @@ -2208,25 +2289,17 @@ void ivas_param_ism_dec_digest_tc( fixedToFloat_arrL( &hParamIsmDec->hParamIsmRendering->Cldfb_ImagBuffer_tc_fx[slot_idx * hSpatParamRendCom->num_freq_bands * nchan_transport + ch * hSpatParamRendCom->num_freq_bands], &hParamIsmDec->hParamIsmRendering->Cldfb_ImagBuffer_tc[slot_idx * hSpatParamRendCom->num_freq_bands * nchan_transport + ch * hSpatParamRendCom->num_freq_bands], Q8, hSpatParamRendCom->num_freq_bands ); } } - FOR( bin_idx = 0; bin_idx < CLDFB_NO_CHANNELS_MAX; bin_idx++ ) - { - set_f( hParamIsmDec->hParamIsmRendering->mixing_matrix_lin[bin_idx], 0.0f, PARAM_ISM_MAX_CHAN * PARAM_ISM_MAX_DMX ); /*To be removed when dependency on mixing_matrix_lin is removed*/ - } - FOR( Word16 band_idx = 0; band_idx < CLDFB_NO_CHANNELS_MAX; band_idx++ ) - { - FOR( bin_idx = 0; bin_idx < PARAM_ISM_MAX_DMX; bin_idx++ ) - { - cx_diag[band_idx][bin_idx] = me2f( cx_diag_fx[band_idx][bin_idx], cx_diag_e[band_idx][bin_idx] ); - } - ref_power[band_idx] = me2f( ref_power_fx[band_idx], max_exp_ref_power ); - } - FOR( i = 0; i < st_ivas->nchan_ism; i++ ) - { - fixedToFloat_arrL( direct_response_fx[i], direct_response[i], Q30, PARAM_ISM_MAX_CHAN ); - } #endif /* Compute mixing matrix */ - ivas_param_ism_compute_mixing_matrix( st_ivas->nchan_ism, hParamIsmDec, st_ivas->hISMDTX, direct_response, nchan_transport, nchan_out_woLFE, cx_diag, ref_power, hParamIsmDec->hParamIsmRendering->mixing_matrix_lin ); + Word16 temp_exp[CLDFB_NO_CHANNELS_MAX]; + ivas_param_ism_compute_mixing_matrix_fx( st_ivas->nchan_ism, hParamIsmDec, st_ivas->hISMDTX, direct_response_fx, nchan_transport, nchan_out_woLFE, cx_diag_fx, exp_cx_diag, ref_power_fx, exp_ref_power, hParamIsmDec->hParamIsmRendering->mixing_matrix_lin_fx, temp_exp ); + + /////////fix2float (to be removed) + for ( i = 0; i < CLDFB_NO_CHANNELS_MAX; i++ ) + { + me2f_buf( hParamIsmDec->hParamIsmRendering->mixing_matrix_lin_fx[i], temp_exp[i], hParamIsmDec->hParamIsmRendering->mixing_matrix_lin[i], PARAM_ISM_MAX_CHAN * PARAM_ISM_MAX_DMX ); + } + ////////////////////////////////// pop_wmops(); diff --git a/lib_dec/ivas_jbm_dec.c b/lib_dec/ivas_jbm_dec.c index 88692037ef02e18311f6c4dd4d9a930b9f5a336e..81f83a3afd0f59fc63b41cd9e39b36564b4c49c5 100644 --- a/lib_dec/ivas_jbm_dec.c +++ b/lib_dec/ivas_jbm_dec.c @@ -2200,7 +2200,7 @@ void ivas_jbm_dec_feed_tc_to_renderer( } } #endif - ivas_param_ism_dec_digest_tc( st_ivas, n_render_timeslots, p_data_f_fx ); + ivas_param_ism_dec_digest_tc_fx( st_ivas, n_render_timeslots, p_data_f_fx ); fixedToFloat_arrL( p_data_f_fx[0], p_data_f[0], Q13, MAX_CLDFB_DIGEST_CHANNELS * MAX_JBM_L_FRAME48k ); #else ivas_param_ism_dec_digest_tc( st_ivas, n_render_timeslots, p_data_f ); diff --git a/lib_dec/ivas_svd_dec.c b/lib_dec/ivas_svd_dec.c index 98d876cadbe557db858cb56edc3df27c6052c2af..218be76403458facef2ad088b6d5bda5a63d0b03 100644 --- a/lib_dec/ivas_svd_dec.c +++ b/lib_dec/ivas_svd_dec.c @@ -434,7 +434,7 @@ Word16 svd_fx( errorMessage = BidagonalDiagonalisation_fx( singularVectors_Left_fx, singularValues_fx, singularVectors_Right_fx, secDiag_fx, singularValues_fx_e, &secDiag_fx_e, nChannelsL, nChannelsC, eps_x_fx, eps_x_fx_e ); /* Sort the singular values descending order */ - lengthSingularValues = min( nChannelsL, nChannelsC ); + lengthSingularValues = s_min( nChannelsL, nChannelsC ); DO {