From 53143c0b4e42bedcd7e3f7f77ab4dadc5fc32455 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Fri, 4 Jul 2025 16:51:28 +0530 Subject: [PATCH] Multichannel path decoder optimizations: Removal of BASOP_Util_Add_Mant32Exp and BASOP_Util_Cmp_Mant32Exp --- lib_com/options.h | 1 + lib_dec/ivas_dirac_output_synthesis_cov_fx.c | 45 ++++++- lib_dec/ivas_svd_dec_fx.c | 130 +++++++++++++++++-- lib_rend/ivas_dirac_decorr_dec_fx.c | 99 +++++++++++++- 4 files changed, 263 insertions(+), 12 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 5c1552fc3..de20f6959 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -79,6 +79,7 @@ /* Note: each compile switch (FIX_1101_...) is independent from the other ones */ +#define OPT_MCH_DEC_V1_NBE #define OPT_MCH_DEC_V1_BE #define OPT_MCT_ENC_V2_NBE #define OPT_SBA_DEC_V2_NBE diff --git a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c index ea03e1ddb..238ca3704 100644 --- a/lib_dec/ivas_dirac_output_synthesis_cov_fx.c +++ b/lib_dec/ivas_dirac_output_synthesis_cov_fx.c @@ -886,7 +886,11 @@ Word16 computeMixingMatrices_fx( move16(); FOR( i = 1; i < lengthCx; i++ ) { +#ifdef OPT_MCH_DEC_V1_NBE + IF( GT_32( svd_s_buffer_fx[i], L_shl_sat( limit_fx, sub( limit_e, svd_s_buffer_e[i] ) ) ) ) +#else /* OPT_MCH_DEC_V1_NBE */ IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) > 0 ) +#endif /* OPT_MCH_DEC_V1_NBE */ { limit_fx = svd_s_buffer_fx[i]; move32(); @@ -896,6 +900,7 @@ Word16 computeMixingMatrices_fx( } limit_e = add( limit_e, reg_Sx_e ); + #ifdef OPT_MCH_DEC_V1_BE limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_Sx_fx ); #else /* OPT_MCH_DEC_V1_BE */ @@ -905,7 +910,11 @@ Word16 computeMixingMatrices_fx( FOR( i = 0; i < lengthCx; ++i ) { +#ifdef OPT_MCH_DEC_V1_NBE + IF( LT_32( L_shl_sat( svd_s_buffer_fx[i], sub( svd_s_buffer_e[i], limit_e ) ), limit_fx ) ) +#else /* OPT_MCH_DEC_V1_NBE */ IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) < 0 ) +#endif /* OPT_MCH_DEC_V1_NBE */ { svd_s_buffer_fx[i] = limit_fx; move32(); @@ -950,9 +959,16 @@ Word16 computeMixingMatrices_fx( matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e ); +#ifdef OPT_MCH_DEC_V1_NBE + Word16 com_e = sub( limit_e, Cy_hat_diag_e ); +#endif /* OPT_MCH_DEC_V1_NBE */ FOR( i = 0; i < lengthCy; ++i ) { +#ifdef OPT_MCH_DEC_V1_NBE + IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) ) +#else /* OPT_MCH_DEC_V1_NBE */ IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 ) +#endif /* OPT_MCH_DEC_V1_NBE */ { limit_fx = Cy_hat_diag_fx[i]; move32(); @@ -968,11 +984,19 @@ Word16 computeMixingMatrices_fx( #endif /* OPT_MCH_DEC_V1_BE */ limit_e = add( limit_e, reg_ghat_e ); +#ifdef OPT_MCH_DEC_V1_NBE + com_e = sub( Cy_hat_diag_e, limit_e ); +#endif /* OPT_MCH_DEC_V1_NBE */ FOR( i = 0; i < lengthCy; ++i ) { Cy_hat_diag_buff_e[i] = Cy_hat_diag_e; move16(); + +#ifdef OPT_MCH_DEC_V1_NBE + IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) ) +#else /* OPT_MCH_DEC_V1_NBE */ IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_buff_e[i] ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */ +#endif /* OPT_MCH_DEC_V1_NBE */ { Cy_hat_diag_fx[i] = limit_fx; move32(); @@ -1392,7 +1416,11 @@ Word16 computeMixingMatricesResidual_fx( FOR( i = 0; i < lengthCx; ++i ) { +#ifdef OPT_MCH_DEC_V1_NBE + IF( GT_32( Kx_fx[i], L_shl_sat( limit_fx, sub( limit_e, Kx_fx_e[i] ) ) ) ) +#else /* OPT_MCH_DEC_V1_NBE */ IF( BASOP_Util_Cmp_Mant32Exp( Kx_fx[i], Kx_fx_e[i], limit_fx, limit_e ) > 0 ) +#endif /* OPT_MCH_DEC_V1_NBE */ { div_tmp = Kx_fx[i]; move32(); @@ -1433,9 +1461,16 @@ Word16 computeMixingMatricesResidual_fx( Cy_hat_diag_e = Cx_e; move16(); +#ifdef OPT_MCH_DEC_V1_NBE + Word16 com_e = sub( limit_e, Cy_hat_diag_e ); +#endif /* OPT_MCH_DEC_V1_NBE */ FOR( i = 0; i < lengthCy; ++i ) { +#ifdef OPT_MCH_DEC_V1_NBE + IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) ) +#else /* OPT_MCH_DEC_V1_NBE */ IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 ) +#endif /* OPT_MCH_DEC_V1_NBE */ { limit_fx = Cy_hat_diag_fx[i]; move32(); @@ -1453,11 +1488,19 @@ Word16 computeMixingMatricesResidual_fx( limit_e = add( limit_e, reg_ghat_e ); /* Computing G_hat */ + +#ifdef OPT_MCH_DEC_V1_NBE + com_e = sub( Cy_hat_diag_e, limit_e ); +#endif /* OPT_MCH_DEC_V1_NBE */ FOR( i = 0; i < lengthCy; ++i ) { Cy_hat_diag_fx_e[i] = Cy_hat_diag_e; move16(); +#ifdef OPT_MCH_DEC_V1_NBE + IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */ +#else /* OPT_MCH_DEC_V1_NBE */ IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_e ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */ +#endif /* OPT_MCH_DEC_V1_NBE */ { Cy_hat_diag_fx[i] = limit_fx; move32(); @@ -1483,7 +1526,7 @@ Word16 computeMixingMatricesResidual_fx( #ifdef OPT_MCH_DEC_V1_BE Kx_fx[i] = Mpy_32_32( Kx_fx[i], G_hat_fx[i] ); // Q(31-(Kx_fx_e+G_hag_e)) #else /* OPT_MCH_DEC_V1_BE */ - L_tmp = Mpy_32_32( Kx_fx[i], G_hat_fx[i] ); // Q(31-(Kx_fx_e+G_hag_e)) + L_tmp = Mpy_32_32( Kx_fx[i], G_hat_fx[i] ); // Q(31-(Kx_fx_e+G_hag_e)) Kx_fx[i] = L_tmp; #endif /* OPT_MCH_DEC_V1_BE */ move32(); diff --git a/lib_dec/ivas_svd_dec_fx.c b/lib_dec/ivas_svd_dec_fx.c index 2afe48db6..ba65c4b7b 100644 --- a/lib_dec/ivas_svd_dec_fx.c +++ b/lib_dec/ivas_svd_dec_fx.c @@ -322,7 +322,11 @@ Word16 svd_fx( move16(); FOR( iCh = 0; iCh < lengthSingularValues - 1; iCh++ ) { +#ifdef OPT_MCH_DEC_V1_NBE + IF( LT_32( L_shl_sat( singularValues_fx[iCh], sub( singularValues_fx_e[iCh], singularValues_fx_e[iCh + 1] ) ), singularValues_fx[iCh + 1] ) ) +#else /* OPT_MCH_DEC_V1_NBE */ IF( BASOP_Util_Cmp_Mant32Exp( singularValues_fx[iCh], singularValues_fx_e[iCh], singularValues_fx[iCh + 1], singularValues_fx_e[iCh + 1] ) < 0 ) +#endif /* OPT_MCH_DEC_V1_NBE */ { condition = 1; move16(); @@ -427,14 +431,24 @@ static Word16 BidagonalDiagonalisation_fx( FOR( jCh = iCh; jCh >= 0; jCh-- ) { - split = sub( jCh, 1 ); /* Q0 */ - IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( secDiag_fx[jCh] ), secDiag_new_e[jCh], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is secDiag[ch] vanishing compared to eps_x */ +#ifdef OPT_MCH_DEC_V1_NBE + Word16 com_e = s_max( secDiag_new_e[jCh], eps_x_e ); + IF( LE_32( L_shr( L_abs( secDiag_fx[jCh] ), sub( com_e, secDiag_new_e[jCh] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is secDiag[ch] vanishing compared to eps_x */ +#else + split = sub( jCh, 1 ); /* Q0 */ /* OPT_MCH_DEC_V1_NBE */ + IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( secDiag_fx[jCh] ), secDiag_new_e[jCh], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is secDiag[ch] vanishing compared to eps_x */ +#endif /* OPT_MCH_DEC_V1_NBE */ { found_split = 0; move16(); BREAK; } +#ifdef OPT_MCH_DEC_V1_NBE + com_e = s_max( singularValues_new_e[jCh - 1], eps_x_e ); + IF( LE_32( L_shr( L_abs( singularValues_fx[jCh - 1] ), sub( com_e, singularValues_new_e[jCh - 1] ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) /* is singularValues[jCh - 1] vanishing compared to eps_x */ +#else /* OPT_MCH_DEC_V1_NBE */ IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( singularValues_fx[split] ), singularValues_new_e[split], Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */ +#endif /* OPT_MCH_DEC_V1_NBE */ { BREAK; } @@ -462,14 +476,21 @@ static Word16 BidagonalDiagonalisation_fx( move32(); c_e = 0; move16(); - +#ifdef OPT_MCH_DEC_V1_NBE + split = sub( jCh, 1 ); /* Q0 */ +#endif /* OPT_MCH_DEC_V1_NBE */ FOR( kCh = jCh; kCh <= iCh; kCh++ ) { g = Mpy_32_32( s, secDiag_fx[kCh] ); /* exp(s_e + secDiag_new_e) */ g_e = add( s_e, secDiag_new_e[kCh] ); secDiag_fx[kCh] = Mpy_32_32( c, secDiag_fx[kCh] ); /* exp(c_e + secDiag_new_e) */ secDiag_new_e[kCh] = add( c_e, secDiag_new_e[kCh] ); - IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( g ), g_e, Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */ +#ifdef OPT_MCH_DEC_V1_NBE + Word16 com_e = s_max( g_e, eps_x_e ); + IF( LE_32( L_shr( L_abs( g ), sub( com_e, g_e ) ), L_shr( Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), sub( com_e, eps_x_e ) ) ) ) +#else /* OPT_MCH_DEC_V1_NBE */ + IF( LE_16( BASOP_Util_Cmp_Mant32Exp( L_abs( g ), g_e, Mpy_32_32( CONVERGENCE_FACTOR_FX, eps_x ), eps_x_e ), 0 ) ) /* is singularValues[split] vanishing compared to eps_x */ +#endif /* OPT_MCH_DEC_V1_NBE */ { BREAK; } @@ -794,8 +815,8 @@ static void ApplyRotation_fx( #ifdef OPT_MCH_DEC_V1_BE singularVector[ch][currentIndex2] = W_shl_sat_l( temp, op_e ); // Q(singularVector) #else /* OPT_MCH_DEC_V1_BE */ - temp = W_shr( temp, op_e ); // Q(singularVector) - singularVector[ch][currentIndex2] = W_sat_l( temp ); // Q(singularVector) + temp = W_shr( temp, op_e ); // Q(singularVector) + singularVector[ch][currentIndex2] = W_sat_l( temp ); // Q(singularVector) #endif /* OPT_MCH_DEC_V1_BE */ move32(); @@ -803,8 +824,8 @@ static void ApplyRotation_fx( #ifdef OPT_MCH_DEC_V1_BE singularVector[ch][currentIndex1] = W_shl_sat_l( temp, op_e ); // Q(singularVector) #else /* OPT_MCH_DEC_V1_BE */ - temp = W_shr( temp, op_e ); // Q(singularVector) - singularVector[ch][currentIndex1] = W_sat_l( temp ); // Q(singularVector) + temp = W_shr( temp, op_e ); // Q(singularVector) + singularVector[ch][currentIndex1] = W_sat_l( temp ); // Q(singularVector) #endif /* OPT_MCH_DEC_V1_BE */ move32(); } @@ -929,9 +950,15 @@ static void biDiagonalReductionLeft_fx( Word16 invVal_e; Word32 invVal; invVal = BASOP_Util_Divide3232_Scale_newton( MAXVAL_WORD32, maxWithSign_fx( *sig_x ), &invVal_e ); +#ifdef OPT_MCH_DEC_V1_NBE + Word64 temp = 0; + move64(); + Word16 max_e = MIN_16; +#else /* OPT_MCH_DEC_V1_NBE */ norm_x = 0; move32(); norm_x_e = 0; +#endif /* OPT_MCH_DEC_V1_NBE */ move16(); FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ { @@ -940,8 +967,25 @@ static void biDiagonalReductionLeft_fx( move32(); singularVectors2_e[jCh][currChannel] = sub( add( invVal_e, sub( singularVectors2_e[jCh][currChannel], *sig_x_e ) ), temp_e ); move16(); +#ifdef OPT_MCH_DEC_V1_NBE + max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] ); +#else /* OPT_MCH_DEC_V1_NBE */ norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( singularVectors2_e[jCh][currChannel], 1 ), &norm_x_e ); /* exp(norm_x_e) */ +#endif /* OPT_MCH_DEC_V1_NBE */ } + +#ifdef OPT_MCH_DEC_V1_NBE + FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][currChannel] ), shl( sub( max_e, singularVectors2_e[jCh][currChannel] ), 1 ) ) ); + } + + Word16 nrm = W_norm( temp ); + nrm = sub( nrm, 32 ); + norm_x = W_shl_sat_l( temp, nrm ); + norm_x_e = sub( add( max_e, max_e ), nrm ); +#endif /* OPT_MCH_DEC_V1_NBE */ + IF( GT_16( norm_x_e, 0 ) ) { norm_x = MAX_32; @@ -969,6 +1013,30 @@ static void biDiagonalReductionLeft_fx( FOR( iCh = currChannel + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ { +#ifdef OPT_MCH_DEC_V1_NBE + Word16 max2_e = MIN_16; + max_e = MIN_16; + move16(); + move16(); + temp = 0; + move64(); + + FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + max_e = s_max( max_e, singularVectors2_e[jCh][currChannel] ); /* exp(norm_x_e) */ + max2_e = s_max( max2_e, singularVectors2_e[jCh][iCh] ); /* exp(norm_x_e) */ + } + max_e = add( max_e, max2_e ); + + FOR( jCh = idx; jCh < nChannelsL; jCh++ ) /* nChannelsL */ + { + temp = W_add( temp, L_shr( Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), sub( max_e, add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ) ) ) ); + } + nrm = W_norm( temp ); + nrm = sub( nrm, 32 ); + norm_x = W_shl_sat_l( temp, nrm ); + norm_x_e = sub( max_e, nrm ); +#else /* OPT_MCH_DEC_V1_NBE */ norm_x = 0; move32(); norm_x_e = 0; @@ -977,6 +1045,7 @@ static void biDiagonalReductionLeft_fx( { norm_x = BASOP_Util_Add_Mant32Exp( norm_x, norm_x_e, Mpy_32_32( singularVectors[jCh][currChannel], singularVectors[jCh][iCh] ), add( singularVectors2_e[jCh][currChannel], singularVectors2_e[jCh][iCh] ), &norm_x_e ); /* exp(norm_x_e) */ } +#endif /* OPT_MCH_DEC_V1_NBE */ f = Mpy_32_32( norm_x, invVal ); /* invVal_e + (norm_x_e - r_e) */ f_e = add( invVal_e, sub( norm_x_e, r_e ) ); @@ -1228,8 +1297,16 @@ static void singularVectorsAccumulationLeft_fx( move32(); } } +#ifdef OPT_MCH_DEC_V1_NBE + Word16 exp = s_max( singularVectors_Left_e[nCh][nCh], 1 ); + singularVectors_Left[nCh][nCh] = L_sub( L_shr( singularVectors_Left[nCh][nCh], sub( exp, singularVectors_Left_e[nCh][nCh] ) ), L_shr( MINUS_ONE_IN_Q31, exp ) ); /* exp(sing_exp2) */ + move32(); + singularVectors_Left_e[nCh][nCh] = exp; + move16(); +#else /* OPT_MCH_DEC_V1_NBE */ singularVectors_Left[nCh][nCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Left[nCh][nCh], singularVectors_Left_e[nCh][nCh], ONE_IN_Q30, 1, &singularVectors_Left_e[nCh][nCh] ); /* exp(sing_exp2) */ move32(); +#endif /* OPT_MCH_DEC_V1_NBE */ } // fclose(fp); FOR( nCh = 0; nCh < nChannelsL; nCh++ ) @@ -1292,21 +1369,56 @@ static void singularVectorsAccumulationRight_fx( FOR( iCh = nCh + 1; iCh < nChannelsC; iCh++ ) /* nChannelsC */ { +#ifdef OPT_MCH_DEC_V1_NBE + Word64 norm_val = 0; + move64(); + Word16 maxL_e = MIN_16; + Word16 maxR_e = MIN_16; + Word16 maxR2_e = MIN_16; + move16(); + move16(); + move16(); + FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */ + { + maxL_e = s_max( maxL_e, singularVectors_Left_e[nCh][k] ); + maxR_e = s_max( maxR_e, sing_right_exp[k][iCh] ); + maxR2_e = s_max( maxR2_e, sing_right_exp[k][nCh] ); + } +#else /* OPT_MCH_DEC_V1_NBE */ norm_y = 0; move32(); norm_y_e = 0; move16(); +#endif /* OPT_MCH_DEC_V1_NBE */ FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */ { - norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_Left_e[nCh][k], sing_right_exp[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ +#ifdef OPT_MCH_DEC_V1_NBE + norm_val = W_mac_32_32( norm_val, L_shr( singularVectors_Left[nCh][k], sub( maxL_e, singularVectors_Left_e[nCh][k] ) ), L_shr( singularVectors_Right[k][iCh], sub( maxR_e, sing_right_exp[k][iCh] ) ) ); +#else /* OPT_MCH_DEC_V1_NBE */ + norm_y = BASOP_Util_Add_Mant32Exp( norm_y, norm_y_e, Mpy_32_32( singularVectors_Left[nCh][k], singularVectors_Right[k][iCh] ), add( singularVectors_Left_e[nCh][k], sing_right_exp[k][iCh] ), &norm_y_e ); /* exp(norm_y_e) */ +#endif /* OPT_MCH_DEC_V1_NBE */ } +#ifdef OPT_MCH_DEC_V1_NBE + norm_y_e = W_norm( norm_val ); + norm_y = W_extract_h( W_shl( norm_val, norm_y_e ) ); + norm_y_e = sub( add( maxL_e, maxR_e ), norm_y_e ); + Word16 max_new = s_max( maxR_e, add( maxR2_e, norm_y_e ) ); +#endif /* OPT_MCH_DEC_V1_NBE */ FOR( k = nCh + 1; k < nChannelsC; k++ ) /* nChannelsC */ { +#ifdef OPT_MCH_DEC_V1_NBE + Word32 temp = Mpy_32_32( norm_y, singularVectors_Right[k][nCh] ); + Word32 op2 = L_shr( temp, sub( max_new, add( norm_y_e, sing_right_exp[k][nCh] ) ) ); + singularVectors_Right[k][iCh] = L_add_sat( L_shr( singularVectors_Right[k][iCh], sub( max_new, sing_right_exp[k][iCh] ) ), op2 ); /* exp(sing_right_exp) */ + move32(); + singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], max_new ); /* Q31 */ +#else /* OPT_MCH_DEC_V1_NBE */ singularVectors_Right[k][iCh] = BASOP_Util_Add_Mant32Exp( singularVectors_Right[k][iCh], sing_right_exp[k][iCh], Mpy_32_32( norm_y, singularVectors_Right[k][nCh] ), add( norm_y_e, sing_right_exp[k][nCh] ), &sing_right_exp[k][iCh] ); /* exp(sing_right_exp) */ move32(); singularVectors_Right[k][iCh] = L_shl_sat( singularVectors_Right[k][iCh], sing_right_exp[k][iCh] ); /* Q31 */ +#endif /* OPT_MCH_DEC_V1_NBE */ move32(); sing_right_exp[k][iCh] = 0; move16(); diff --git a/lib_rend/ivas_dirac_decorr_dec_fx.c b/lib_rend/ivas_dirac_decorr_dec_fx.c index b7c0ebbbc..28c3cd5e7 100644 --- a/lib_rend/ivas_dirac_decorr_dec_fx.c +++ b/lib_rend/ivas_dirac_decorr_dec_fx.c @@ -514,9 +514,9 @@ void ivas_dirac_dec_decorr_process_fx( max_band_decorr = h_freq_domain_decorr_ap_params->max_band_decorr; move16(); -#ifdef OPT_MCH_DEC_V1_BE +#if ( defined OPT_MCH_DEC_V1_NBE || defined OPT_MCH_DEC_V1_BE ) Word16 decorX2 = shl( max_band_decorr, 1 ); -#endif /* OPT_MCH_DEC_V1_BE */ +#endif set32_fx( onset_filter_fx, ONE_IN_Q31, imult1616( num_protos_diff, num_freq_bands ) ); @@ -740,6 +740,7 @@ void ivas_dirac_dec_decorr_process_fx( Word64 aux_64[2 * MAX_OUTPUT_CHANNELS * CLDFB_NO_CHANNELS_MAX]; Word16 e_reverb_energy_smooth, e_direct_energy_smooth; Word16 offset1; + #ifndef OPT_MCH_DEC_V1_BE Word16 offset2; #endif /* OPT_MCH_DEC_V1_BE */ @@ -868,15 +869,25 @@ void ivas_dirac_dec_decorr_process_fx( Word16 max_e = s_max( aux_e, e_reverb_energy_smooth ); Word16 shr_aux = sub( max_e, aux_e ); /* Note: headroom is zero */ Word16 shr_res = sub( max_e, e_reverb_energy_smooth ); /* Note: headroom is zero */ +#ifdef OPT_MCH_DEC_V1_NBE + Word32 temp1 = L_shr( ONE_M_DIRAC_DUCK_ALPHA, shr_aux ); + Word32 temp2 = L_shr( DIRAC_DUCK_ALPHA_FX, shr_res ); +#endif /* OPT_MCH_DEC_V1_NBE */ /* Note: DIRAC_DUCK_ALPHA_FX and ONE_M_DIRAC_DUCK_ALPHA are both in Q31 (e=0) */ /* => a multiplication with this values does not change the q/e value. */ FOR( Word16 i = 0; i < len; i++ ) { +#ifdef OPT_MCH_DEC_V1_NBE + h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i] = Madd_32_32( + Mpy_32_32( aux_buffer_fx[i], temp1 ), + h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i], temp2 ); +#else /* OPT_MCH_DEC_V1_NBE */ h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i] = L_add( L_shr( Mpy_32_32( aux_buffer_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_aux ), L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_res ) ); +#endif /* OPT_MCH_DEC_V1_NBE */ move32(); } e_reverb_energy_smooth = max_e; @@ -889,12 +900,22 @@ void ivas_dirac_dec_decorr_process_fx( Word16 max_x = s_max( den_e, e_direct_energy_smooth ); Word16 shr_den = sub( max_x, den_e ); /* Note: headroom is zero */ Word16 shr_des = sub( max_x, e_direct_energy_smooth ); /* Note: headroom is zero */ +#ifdef OPT_MCH_DEC_V1_NBE + temp1 = L_shr( ONE_M_DIRAC_DUCK_ALPHA, shr_den ); + temp2 = L_shr( DIRAC_DUCK_ALPHA_FX, shr_des ); +#endif /* OPT_MCH_DEC_V1_NBE */ FOR( Word16 i = 0; i < len; i++ ) { +#ifdef OPT_MCH_DEC_V1_NBE + h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i] = Madd_32_32( + Mpy_32_32( direct_energy_fx[i], temp1 ), + h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i], temp2 ); +#else /* OPT_MCH_DEC_V1_NBE */ h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i] = L_add( L_shr( Mpy_32_32( direct_energy_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_den ), L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_des ) ); +#endif /* OPT_MCH_DEC_V1_NBE */ move32(); } e_direct_energy_smooth = max_x; @@ -903,6 +924,7 @@ void ivas_dirac_dec_decorr_process_fx( move16(); // scaling energy buffers for better precision for higher values// +#ifndef OPT_MCH_DEC_V1_NBE q_shift = L_norm_arr( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ) ); IF( q_shift != 0 ) { @@ -917,6 +939,7 @@ void ivas_dirac_dec_decorr_process_fx( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = add( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth, q_shift ); move16(); } +#endif h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = s_min( MAX_Q_FX, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth ); h_freq_domain_decorr_ap_state->q_direct_energy_smooth = s_min( MAX_Q_FX, h_freq_domain_decorr_ap_state->q_direct_energy_smooth ); @@ -928,15 +951,32 @@ void ivas_dirac_dec_decorr_process_fx( move16(); FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx ) { +#ifdef OPT_MCH_DEC_V1_NBE + q_shift = s_min( q_shift, + L_norm_arr( &frame_dec_fx[2 * ch_idx * num_freq_bands], decorX2 ) ); +#else /* OPT_MCH_DEC_V1_NBE */ q_shift = s_min( q_shift, sub( L_norm_arr( &frame_dec_fx[2 * ch_idx * num_freq_bands], shl( max_band_decorr, 1 ) ), Q2 ) ); +#endif /* OPT_MCH_DEC_V1_NBE */ } +#ifdef OPT_MCH_DEC_V1_NBE + q_shift = sub( q_shift, 2 ); + FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx ) + { + Scale_sig32( &frame_dec_fx[2 * ch_idx * num_freq_bands], decorX2, q_shift ); + } + q_frame_f = add( q_frame_f, q_shift ); + Word16 diff1 = sub( e_direct_energy_smooth, e_reverb_energy_smooth ); + Word16 diff2 = add( Q30, diff1 ); + diff1 = sub( Q30, diff1 ); +#else /* OPT_MCH_DEC_V1_NBE */ FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx ) { Scale_sig32( &frame_dec_fx[shl( imult1616( ch_idx, num_freq_bands ), 1 )], shl( max_band_decorr, 1 ), q_shift ); } q_frame_f = add( q_frame_f, q_shift ); +#endif /* OPT_MCH_DEC_V1_NBE */ FOR( ch_idx = 0; ch_idx < num_channels; ch_idx++ ) { @@ -956,6 +996,14 @@ void ivas_dirac_dec_decorr_process_fx( move32(); move32(); +#ifdef OPT_MCH_DEC_V1_NBE + Word64 temp_1 = W_sub( W_shl( reverb_energy_loc, diff1 ), W_mult0_32_32( direct_energy_loc, DIRAC_DUCK_GAMMA_FX ) ); + Word64 temp_2 = W_sub( W_shl( direct_energy_loc, diff2 ), W_mult0_32_32( reverb_energy_loc, DIRAC_DUCK_GAMMA_FX ) ); + + IF( temp_1 > 0 ) + { + duck_gain = BASOP_Util_Divide3232_Scale( Mpy_32_32( direct_energy_loc, DIRAC_DUCK_GAMMA_FX ), L_add( reverb_energy_loc, EPSILON_FX ), &e_duck_gain ); +#else /* OPT_MCH_DEC_V1_NBE */ Word32 temp_1 = Mpy_32_32( direct_energy_loc, DIRAC_DUCK_GAMMA_FX ); // e+1 Word32 temp_2 = Mpy_32_32( reverb_energy_loc, DIRAC_DUCK_GAMMA_FX ); // e+1 Word16 comp_flag_1 = BASOP_Util_Cmp_Mant32Exp( reverb_energy_loc, e_reverb_energy_smooth, temp_1, add( e_direct_energy_smooth, 1 ) ); @@ -963,6 +1011,7 @@ void ivas_dirac_dec_decorr_process_fx( IF( EQ_16( comp_flag_1, 1 ) ) { duck_gain = BASOP_Util_Divide3232_Scale( temp_1, L_add( reverb_energy_loc, EPSILON_FX ), &e_duck_gain ); +#endif /* OPT_MCH_DEC_V1_NBE */ e_duck_gain = add( e_duck_gain, sub( add( e_direct_energy_smooth, 1 ), e_reverb_energy_smooth ) ); duck_gain = Sqrt16( duck_gain, &e_duck_gain ); @@ -974,10 +1023,17 @@ void ivas_dirac_dec_decorr_process_fx( move32(); move32(); } +#ifdef OPT_MCH_DEC_V1_NBE + ELSE IF( temp_2 > 0 ) + { + + duck_gain = BASOP_Util_Divide3232_Scale( direct_energy_loc, L_add( Mpy_32_32( reverb_energy_loc, DIRAC_DUCK_GAMMA_FX ), EPSILON_FX ), &e_duck_gain ); +#else /* OPT_MCH_DEC_V1_NBE */ ELSE IF( EQ_16( comp_flag_2, 1 ) ) { duck_gain = BASOP_Util_Divide3232_Scale( direct_energy_loc, L_add( temp_2, EPSILON_FX ), &e_duck_gain ); +#endif /* OPT_MCH_DEC_V1_NBE */ e_duck_gain = add( e_duck_gain, sub( e_direct_energy_smooth, add( e_reverb_energy_smooth, 1 ) ) ); duck_gain = Sqrt16( duck_gain, &e_duck_gain ); @@ -1048,6 +1104,44 @@ void ivas_dirac_dec_decorr_process_fx( IF( EQ_16( h_freq_domain_decorr_ap_params->add_back_onsets_on, 1 ) ) { +#ifdef OPT_MCH_DEC_V1_NBE + IF( q_if_local ) + { + FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx ) + { + offset = imult1616( proto_index_dir[ch_idx], num_freq_bands ); + + FOR( k = 0; k < max_band_decorr; ++k ) + { + Word32 op2 = L_shr( L_sub( ONE_IN_Q31, onset_filter_fx[offset + k] ), q_if_local ); + aux_buffer_fx[2 * k] = Mpy_32_32( input_frame_fx[2 * ( offset + k )], op2 ); + aux_buffer_fx[2 * k + 1] = Mpy_32_32( input_frame_fx[2 * ( offset + k ) + 1], op2 ); // q_frame_f + move32(); + move32(); + } + + v_add_fx( &frame_dec_fx[2 * ch_idx * num_freq_bands], aux_buffer_fx, &frame_dec_fx[2 * ch_idx * num_freq_bands], decorX2 ); + } + } + ELSE + { + FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx ) + { + offset = imult1616( proto_index_dir[ch_idx], num_freq_bands ); + + FOR( k = 0; k < max_band_decorr; ++k ) + { + Word32 op2 = L_sub( ONE_IN_Q31, onset_filter_fx[offset + k] ); + aux_buffer_fx[2 * k] = Mpy_32_32( input_frame_fx[2 * ( offset + k )], op2 ); + aux_buffer_fx[2 * k + 1] = Mpy_32_32( input_frame_fx[2 * ( offset + k ) + 1], op2 ); // q_frame_f + move32(); + move32(); + } + + v_add_fx( &frame_dec_fx[2 * ch_idx * num_freq_bands], aux_buffer_fx, &frame_dec_fx[2 * ch_idx * num_freq_bands], decorX2 ); + } + } +#else /* OPT_MCH_DEC_V1_NBE */ FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx ) { offset = imult1616( proto_index_dir[ch_idx], num_freq_bands ); @@ -1062,6 +1156,7 @@ void ivas_dirac_dec_decorr_process_fx( v_add_fx( &frame_dec_fx[2 * ch_idx * num_freq_bands], aux_buffer_fx, &frame_dec_fx[2 * ch_idx * num_freq_bands], shl( max_band_decorr, 1 ) ); } +#endif /* OPT_MCH_DEC_V1_NBE */ } /* avoid decorrelation above maximum frequency -> set to zero the remaining frequencies*/ -- GitLab