LTV test optimizations - 01072025 (d8cd608d) · Commits · SA4 / Audio / IVAS BASOP

lib_com/ivas_rom_com_fx.c

+8 −8

Original line number	Diff line number	Diff line
		@@ -485,14 +485,14 @@ const Word32 dft_res_gains_q_fx[][2] = {

		// Q13
		const Word16 McMASA_LFEGain_vectors_fx_q13[64] = {
		3112, 2703, 1556, 1638, -1310, -1802, -2867, -2785,
		4096, 4096, 4096, 4096, -6553, -3276, 8355, 819,
		-4096, -4096, -4096, -4096, -4587, -983, -6389, 11141,
		-8355, 9666, -4669, 2703, 5898, -9256, 7946, -5079,
		-7454, 7618, 8192, -9011, 14172, -1884, -6389, -6881,
		7782, -13107, -2785, 7618, 7127, 3850, -15564, 4259,
		5488, 11632, -7946, -10158, 6799, 4751, 4997, -16711,
		-6553, -12943, 6717, 11632, -17530, 2129, 6881, 8355
		3113, 2703, 1556, 1638, -1311, -1802, -2867, -2785,
		4096, 4096, 4096, 4096, -6554, -3277, 8356, 819,
		-4096, -4096, -4096, -4096, -4588, -983, -6390, 11141,
		-8356, 9667, -4669, 2703, 5898, -9257, 7946, -5079,
		-7455, 7619, 8192, -9011, 14172, -1884, -6390, -6881,
		7782, -13107, -2785, 7619, 7127, 3850, -15565, 4260,
		5489, 11633, -7946, -10158, 6799, 4751, 4997, -16712,
		-6554, -12943, 6717, 11633, -17531, 2130, 6881, 8356
		};

		// Q25

lib_com/options.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -79,6 +79,7 @@


		/* Note: each compile switch (FIX_1101_...) is independent from the other ones */
		#define OPT_MCH_DEC_V1_NBE
		#define OPT_MCH_DEC_V1_BE
		#define OPT_MCT_ENC_V2_NBE
		#define OPT_SBA_DEC_V2_NBE

lib_dec/ivas_dirac_output_synthesis_cov_fx.c

+43 −0

Original line number	Diff line number	Diff line
		@@ -886,7 +886,11 @@ Word16 computeMixingMatrices_fx(
		move16();
		FOR( i = 1; i < lengthCx; i++ )
		{
		#ifdef OPT_MCH_DEC_V1_NBE
		IF( GT_32( svd_s_buffer_fx[i], L_shl_sat( limit_fx, sub( limit_e, svd_s_buffer_e[i] ) ) ) )
		#else /* OPT_MCH_DEC_V1_NBE */
		IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) > 0 )
		#endif /* OPT_MCH_DEC_V1_NBE */
		{
		limit_fx = svd_s_buffer_fx[i];
		move32();
		@@ -896,6 +900,7 @@ Word16 computeMixingMatrices_fx(
		}

		limit_e = add( limit_e, reg_Sx_e );

		#ifdef OPT_MCH_DEC_V1_BE
		limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_Sx_fx );
		#else /* OPT_MCH_DEC_V1_BE */
		@@ -905,7 +910,11 @@ Word16 computeMixingMatrices_fx(

		FOR( i = 0; i < lengthCx; ++i )
		{
		#ifdef OPT_MCH_DEC_V1_NBE
		IF( LT_32( L_shl_sat( svd_s_buffer_fx[i], sub( svd_s_buffer_e[i], limit_e ) ), limit_fx ) )
		#else /* OPT_MCH_DEC_V1_NBE */
		IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) < 0 )
		#endif /* OPT_MCH_DEC_V1_NBE */
		{
		svd_s_buffer_fx[i] = limit_fx;
		move32();
		@@ -950,9 +959,16 @@ Word16 computeMixingMatrices_fx(
		matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );


		#ifdef OPT_MCH_DEC_V1_NBE
		Word16 com_e = sub( limit_e, Cy_hat_diag_e );
		#endif /* OPT_MCH_DEC_V1_NBE */
		FOR( i = 0; i < lengthCy; ++i )
		{
		#ifdef OPT_MCH_DEC_V1_NBE
		IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
		#else /* OPT_MCH_DEC_V1_NBE */
		IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
		#endif /* OPT_MCH_DEC_V1_NBE */
		{
		limit_fx = Cy_hat_diag_fx[i];
		move32();
		@@ -968,11 +984,19 @@ Word16 computeMixingMatrices_fx(
		#endif /* OPT_MCH_DEC_V1_BE */
		limit_e = add( limit_e, reg_ghat_e );

		#ifdef OPT_MCH_DEC_V1_NBE
		com_e = sub( Cy_hat_diag_e, limit_e );
		#endif /* OPT_MCH_DEC_V1_NBE */
		FOR( i = 0; i < lengthCy; ++i )
		{
		Cy_hat_diag_buff_e[i] = Cy_hat_diag_e;
		move16();

		#ifdef OPT_MCH_DEC_V1_NBE
		IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) )
		#else /* OPT_MCH_DEC_V1_NBE */
		IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_buff_e[i] ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
		#endif /* OPT_MCH_DEC_V1_NBE */
		{
		Cy_hat_diag_fx[i] = limit_fx;
		move32();
		@@ -1392,7 +1416,11 @@ Word16 computeMixingMatricesResidual_fx(

		FOR( i = 0; i < lengthCx; ++i )
		{
		#ifdef OPT_MCH_DEC_V1_NBE
		IF( GT_32( Kx_fx[i], L_shl_sat( limit_fx, sub( limit_e, Kx_fx_e[i] ) ) ) )
		#else /* OPT_MCH_DEC_V1_NBE */
		IF( BASOP_Util_Cmp_Mant32Exp( Kx_fx[i], Kx_fx_e[i], limit_fx, limit_e ) > 0 )
		#endif /* OPT_MCH_DEC_V1_NBE */
		{
		div_tmp = Kx_fx[i];
		move32();
		@@ -1433,9 +1461,16 @@ Word16 computeMixingMatricesResidual_fx(
		Cy_hat_diag_e = Cx_e;
		move16();

		#ifdef OPT_MCH_DEC_V1_NBE
		Word16 com_e = sub( limit_e, Cy_hat_diag_e );
		#endif /* OPT_MCH_DEC_V1_NBE */
		FOR( i = 0; i < lengthCy; ++i )
		{
		#ifdef OPT_MCH_DEC_V1_NBE
		IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
		#else /* OPT_MCH_DEC_V1_NBE */
		IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
		#endif /* OPT_MCH_DEC_V1_NBE */
		{
		limit_fx = Cy_hat_diag_fx[i];
		move32();
		@@ -1453,11 +1488,19 @@ Word16 computeMixingMatricesResidual_fx(
		limit_e = add( limit_e, reg_ghat_e );

		/* Computing G_hat */

		#ifdef OPT_MCH_DEC_V1_NBE
		com_e = sub( Cy_hat_diag_e, limit_e );
		#endif /* OPT_MCH_DEC_V1_NBE */
		FOR( i = 0; i < lengthCy; ++i )
		{
		Cy_hat_diag_fx_e[i] = Cy_hat_diag_e;
		move16();
		#ifdef OPT_MCH_DEC_V1_NBE
		IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
		#else /* OPT_MCH_DEC_V1_NBE */
		IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_e ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
		#endif /* OPT_MCH_DEC_V1_NBE */
		{
		Cy_hat_diag_fx[i] = limit_fx;
		move32();

lib_dec/ivas_ism_param_dec_fx.c

+5 −5

Original line number	Diff line number	Diff line
		@@ -1339,14 +1339,14 @@ void ivas_ism_param_dec_tc_gain_ajust_fx(
		{
		gain_fx = 0;
		move16();
		tmp_e1 = 31;
		tmp_e1 = 0;
		move16();
		}
		ELSE
		{ /handling denominator equals to zero/
		gain_fx = 1;
		ELSE /handling denominator equals to zero/
		{
		gain_fx = 32767; //(max value of Word16 in Q0)
		move16();
		tmp_e1 = -32767; //(-1.0f in Q15) + 1
		tmp_e1 = 15;
		move16();
		}
		}

lib_dec/ivas_svd_dec_fx.c

+114 −4

Original line number	Diff line number	Diff line
		@@ -322,7 +322,11 @@ Word16 svd_fx(
		move16();
		FOR( iCh = 0; iCh < lengthSingularValues - 1; iCh++ )
		{
		#ifdef OPT_MCH_DEC_V1_NBE
		IF( LT_32( L_shl_sat( singularValues_fx[iCh], sub( singularValues_fx_e[iCh], singularValues_fx_e[iCh + 1] ) ), singularValues_fx[iCh + 1] ) )
		#else /* OPT_MCH_DEC_V1_NBE */
		IF( BASOP_Util_Cmp_Mant32Exp( singularValues_fx[iCh], singularValues_fx_e[iCh], singularValues_fx[iCh + 1], singularValues_fx_e[iCh + 1] ) < 0 )
		#endif /* OPT_MCH_DEC_V1_NBE */
		{
		condition = 1;
		move16();
		@@ -428,13 +432,23 @@ static Word16 BidagonalDiagonalisation_fx(
		FOR( jCh = iCh; jCh >= 0; jCh-- )
		{
		split = sub( jCh, 1 ); /* Q0 */
		#ifdef OPT_MCH_DEC_V1_NBE
		Word16 com_e = s_max( secDiag_new_e[jCh], eps_x_e );
		IF( LE_32( L_shr( L_abs( secDiag_fx[jCh] ), sub( com_e, secDiag_new_e[jCh] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is secDiag[ch] vanishing compared to eps_x */
		#else /* OPT_MCH_DEC_V1_NBE */
		IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( secDiag_fx[jCh] ), secDiag_new_e[jCh], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is secDiag[ch] vanishing compared to eps_x */
		#endif /* OPT_MCH_DEC_V1_NBE */
		{
		found_split = 0;
		move16();
		BREAK;
		}
		#ifdef OPT_MCH_DEC_V1_NBE
		com_e = s_max( singularValues_new_e[split], eps_x_e );
		IF( LE_32( L_shr( L_abs( singularValues_fx[split] ), sub( com_e, singularValues_new_e[split] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is singularValues[split] vanishing compared to eps_x */
		#else /* OPT_MCH_DEC_V1_NBE */
		IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( singularValues_fx[split] ), singularValues_new_e[split], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */
		#endif /* OPT_MCH_DEC_V1_NBE */
		{
		BREAK;
		}
		@@ -469,7 +483,12 @@ static Word16 BidagonalDiagonalisation_fx(
		g_e = add( s_e, secDiag_new_e[kCh] );
		secDiag_fx[kCh] = Mpy_32_32( c, secDiag_fx[kCh] ); /* exp(c_e + secDiag_new_e) */
		secDiag_new_e[kCh] = add( c_e, secDiag_new_e[kCh] );
		#ifdef OPT_MCH_DEC_V1_NBE
		Word16 com_e = s_max( g_e, eps_x_e );
		IF( LE_32( L_shr( L_abs( g ), sub( com_e, g_e ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) )
		#else /* OPT_MCH_DEC_V1_NBE */
		IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( g ), g_e, Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */
		#endif /* OPT_MCH_DEC_V1_NBE */
		{
		BREAK;
		}
		@@ -929,9 +948,15 @@ static void biDiagonalReductionLeft_fx(
		Word16 invVal_e;
		Word32 invVal;
		invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e );
		#ifdef OPT_MCH_DEC_V1_NBE
		Word64 temp = 0;
		move64();
		Word16 max_e = MIN_16;
		#else /* OPT_MCH_DEC_V1_NBE */
		norm_x = 0;
		move32();
		norm_x_e = 0;
		#endif /* OPT_MCH_DEC_V1_NBE */
		move16();
		FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		{
		@@ -940,8 +965,25 @@ static void biDiagonalReductionLeft_fx(
		move32();
		singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e );
		move16();
		#ifdef OPT_MCH_DEC_V1_NBE
		max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] );
		#else /* OPT_MCH_DEC_V1_NBE */
		norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */
		#endif /* OPT_MCH_DEC_V1_NBE */
		}

		#ifdef OPT_MCH_DEC_V1_NBE
		FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		{
		temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors2_e[jCh][currChannel] ), 1 ) ) );
		}

		Word16 nrm = W_norm( temp );
		nrm = sub( nrm, 32 );
		norm_x = W_shl_sat_l( temp, nrm );
		norm_x_e = sub( add( max_e, max_e ), nrm );
		#endif /* OPT_MCH_DEC_V1_NBE */

		IF( GT_16( norm_x_e, 0 ) )
		{
		norm_x = MAX_32;
		@@ -969,6 +1011,30 @@ static void biDiagonalReductionLeft_fx(

		FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
		{
		#ifdef OPT_MCH_DEC_V1_NBE
		Word16 max2_e = MIN_16;
		max_e = MIN_16;
		move16();
		move16();
		temp = 0;
		move64();

		FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		{
		max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] ); /* exp(norm_x_e) */
		max2_e = s_max( max2_e, singularVectors2_e[jCh][iCh] ); /* exp(norm_x_e) */
		}
		max_e = add( max_e, max2_e );

		FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */
		{
		temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ) ) ) );
		}
		nrm = W_norm( temp );
		nrm = sub( nrm, 32 );
		norm_x = W_shl_sat_l( temp, nrm );
		norm_x_e = sub( max_e, nrm );
		#else /* OPT_MCH_DEC_V1_NBE */
		norm_x = 0;
		move32();
		norm_x_e = 0;
		@@ -977,6 +1043,7 @@ static void biDiagonalReductionLeft_fx(
		{
		norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */
		}
		#endif /* OPT_MCH_DEC_V1_NBE */

		f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */
		f_e = add( invVal_e, sub( norm_x_e, r_e ) );
		@@ -1228,8 +1295,16 @@ static void singularVectorsAccumulationLeft_fx(
		move32();
		}
		}
		#ifdef OPT_MCH_DEC_V1_NBE
		Word16 exp = s_max( singularVectors_Left_e[nCh][nCh], 1 );
		singularVectors_Left[nCh][nCh] = L_sub( L_shr( singularVectors_Left[nCh][nCh], sub( exp, singularVectors_Left_e[nCh][nCh] ) ), L_shr( MINUS_ONE_IN_Q31, exp ) ); /* exp(sing_exp2) */
		move32();
		singularVectors_Left_e[nCh][nCh] = exp;
		move16();
		#else /* OPT_MCH_DEC_V1_NBE */
		singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], singularVectors_Left_e[nCh][nCh], ONE_IN_Q30, 1, &singularVectors_Left_e[nCh][nCh] ); /* exp(sing_exp2) */
		move32();
		#endif /* OPT_MCH_DEC_V1_NBE */
		}
		// fclose(fp);
		FOR( nCh = 0; nCh < nChannelsL; nCh++ )
		@@ -1292,21 +1367,56 @@ static void singularVectorsAccumulationRight_fx(

		FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */
		{
		#ifdef OPT_MCH_DEC_V1_NBE
		Word64 norm_val = 0;
		move64();
		Word16 maxL_e = MIN_16;
		Word16 maxR_e = MIN_16;
		Word16 maxR2_e = MIN_16;
		move16();
		move16();
		move16();
		FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
		{
		maxL_e = s_max( maxL_e, singularVectors_Left_e[nCh][k] );
		maxR_e = s_max( maxR_e, sing_right_exp[k][iCh] );
		maxR2_e = s_max( maxR2_e, sing_right_exp[k][nCh] );
		}
		#else /* OPT_MCH_DEC_V1_NBE */
		norm_y = 0;
		move32();
		norm_y_e = 0;
		move16();
		#endif /* OPT_MCH_DEC_V1_NBE */

		FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
		{
		#ifdef OPT_MCH_DEC_V1_NBE
		norm_val = W_mac_32_32( norm_val, L_shr( singularVectors_Left[nCh][k], sub( maxL_e, singularVectors_Left_e[nCh][k] ) ), L_shr( singularVectors_Right[k][iCh], sub( maxR_e, sing_right_exp[k][iCh] ) ) );
		#else /* OPT_MCH_DEC_V1_NBE */
		norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_Left_e[nCh][k], sing_right_exp[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */
		#endif /* OPT_MCH_DEC_V1_NBE */
		}
		#ifdef OPT_MCH_DEC_V1_NBE
		norm_y_e = W_norm( norm_val );
		norm_y = W_extract_h( W_shl( norm_val, norm_y_e ) );
		norm_y_e = sub( add( maxL_e, maxR_e ), norm_y_e );

		Word16 max_new = s_max( maxR_e, add( maxR2_e, norm_y_e ) );
		#endif /* OPT_MCH_DEC_V1_NBE */
		FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */
		{
		#ifdef OPT_MCH_DEC_V1_NBE
		Word32 temp = Mpy_32_32( norm_y, singularVectors_Right[k][nCh] );
		Word32 op2 = L_shr( temp, sub( max_new, add( norm_y_e, sing_right_exp[k][nCh] ) ) );
		singularVectors_Right[k][iCh] = L_add_sat( L_shr( singularVectors_Right[k][iCh], sub( max_new, sing_right_exp[k][iCh] ) ), op2 ); /* exp(sing_right_exp) */
		move32();
		singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], max_new ); /* Q31 */
		#else /* OPT_MCH_DEC_V1_NBE */
		singularVectors_Right[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Right[k][iCh], sing_right_exp[k][iCh], Mpy_32_32( norm_y, singularVectors_Right[k][nCh] ), add( norm_y_e, sing_right_exp[k][nCh] ), &sing_right_exp[k][iCh] ); /* exp(sing_right_exp) */
		move32();
		singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], sing_right_exp[k][iCh] ); /* Q31 */
		#endif /* OPT_MCH_DEC_V1_NBE */
		move32();
		sing_right_exp[k][iCh] = 0;
		move16();