Loading lib_com/cldfb.c +414 −36 File changed.Preview size limit exceeded, changes collapsed. Show changes lib_com/ivas_dirac_com_fx.c +105 −2 Original line number Diff line number Diff line Loading @@ -917,15 +917,16 @@ void computeDiffuseness_fixed( move32(); Word32 *p_tmp; const Word32 *p_tmp_c; #ifndef OPT_SBA_DEC_PATH Word16 min_q_shift1, min_q_shift2, exp1, exp2, q_tmp; Word16 q_ene, q_intensity, q_intensity_slow; #endif /* OPT_SBA_DEC_PATH */ /* Compute Intensity slow and energy slow buffer_intensity and buffer_energy */ set_zero_fx( intensity_slow, i_mult( DIRAC_NUM_DIMS, CLDFB_NO_CHANNELS_MAX ) ); set_zero_fx( intensity_slow_abs, CLDFB_NO_CHANNELS_MAX ); set_zero_fx( energy_slow, CLDFB_NO_CHANNELS_MAX ); #ifndef OPT_SBA_DEC_PATH /* Calculate max possible shift for the buffer buffer_energy and buffer_intensity */ min_q_shift1 = Q31; move16(); Loading Loading @@ -1064,6 +1065,108 @@ void computeDiffuseness_fixed( move32(); } } #else /* OPT_SBA_DEC_PATH */ Word16 gaurd_bits = find_guarded_bits_fx( DIRAC_NO_COL_AVG_DIFF ); Word16 norm_arr = getScaleFactor32( buffer_energy, i_mult( DIRAC_NO_COL_AVG_DIFF, num_freq_bands ) ); Word16 shift_ene = sub( norm_arr, gaurd_bits ); Word16 q_ene = q_factor_energy[0]; // = add(*q_factor_energy, shift_ene); move16(); norm_arr = 31; move16(); FOR( i = 0; i < DIRAC_NUM_DIMS; i++ ) { FOR( j = 0; j < DIRAC_NO_COL_AVG_DIFF; j++ ) { norm_arr = s_min( norm_arr, getScaleFactor32( buffer_intensity[i][j], num_freq_bands ) ); } } Word16 shift_inten = sub( norm_arr, gaurd_bits ); Word16 q_inten = q_factor_intensity[0]; // = add(*q_factor_intensity, shift_inten); move16(); FOR( i = 1; i < DIRAC_NO_COL_AVG_DIFF; ++i ) { q_ene = s_min( q_ene, q_factor_energy[i] ); q_inten = s_min( q_inten, q_factor_intensity[i] ); } q_ene = add( q_ene, shift_ene ); q_inten = add( q_inten, shift_inten ); FOR( i = 0; i < DIRAC_NO_COL_AVG_DIFF; ++i ) { /* Energy slow */ p_tmp_c = buffer_energy + i * num_freq_bands; shift_ene = sub( q_ene, q_factor_energy[i] ); shift_inten = sub( q_inten, q_factor_intensity[i] ); FOR( k = 0; k < num_freq_bands; k++ ) { energy_slow[k] = L_add( energy_slow[k], L_shl( *p_tmp_c, shift_ene ) ); move32(); p_tmp_c++; } /* Intensity slow */ FOR( j = 0; j < DIRAC_NUM_DIMS; ++j ) { p_tmp = buffer_intensity[j][i]; FOR( k = 0; k < num_freq_bands; k++ ) { intensity_slow[j * num_freq_bands + k] = L_add( intensity_slow[j * num_freq_bands + k], L_shl( *p_tmp, shift_inten ) ); move32(); p_tmp++; } } } gaurd_bits = shr( add( find_guarded_bits_fx( DIRAC_NUM_DIMS ), 1 ), 1 ); norm_arr = getScaleFactor32( intensity_slow, i_mult( DIRAC_NUM_DIMS, num_freq_bands ) ); Word16 shift = sub( norm_arr, gaurd_bits ); scale_sig32( intensity_slow, i_mult( DIRAC_NUM_DIMS, num_freq_bands ), shift ); q_inten = add( q_inten, shift ); /* intensity_slow.^2 + intensity_slow_abs*/ FOR( j = 0; j < DIRAC_NUM_DIMS; ++j ) { p_tmp = intensity_slow + j * num_freq_bands; FOR( k = 0; k < num_freq_bands; k++ ) { intensity_slow_abs[k] = Madd_32_32( intensity_slow_abs[k], p_tmp[k], p_tmp[k] ); move32(); } } Word16 q_inten_slow = sub( add( q_inten, q_inten ), 31 ); /* Compute Diffuseness */ Word16 exp1, exp2, q_tmp; Word16 diff = sub( 62, q_ene ); p_tmp = intensity_slow_abs; FOR( i = 0; i < num_freq_bands; ++i ) { exp1 = sub( 31, q_inten_slow ); tmp = Sqrt32( p_tmp[i], &exp1 ); tmp = BASOP_Util_Divide3232_Scale_cadence( tmp, L_add( energy_slow[i], EPSILLON_FX ), &exp2 ); q_tmp = sub( diff, add( exp1, exp2 ) ); // bring to Q30 tmp = L_shl_sat( tmp, sub( Q30, q_tmp ) ); diffuseness[i] = L_sub( ONE_IN_Q30, tmp ); move32(); if ( LT_32( diffuseness[i], 0 ) ) { diffuseness[i] = 0; move32(); } } #endif /* OPT_SBA_DEC_PATH */ *q_diffuseness = Q30; move16(); Loading lib_com/ivas_filters_fx.c +14 −3 Original line number Diff line number Diff line Loading @@ -223,7 +223,6 @@ void ivas_filter_process_exp_fx( return; } /*-----------------------------------------------------------------------------------------* * Function ivas_iir_2_filter() * Loading Loading @@ -263,13 +262,25 @@ static void ivas_iir_2_filter_fx( { L_tmp_prod = Mpy_32_32( filter_state->num_fx[stage][j], tmp_pIn_buf_i_fx ); // Q31-L_prod_e L_prod_e = add( filter_state->num_e[stage][j], tmp_pIn_buf_i_e ); #ifndef OPT_SBA_DEC_PATH L_tmp = BASOP_Util_Add_Mant32Exp( filter_state->state_fx[stage][j], filter_state->state_e[stage][j], L_tmp_prod, L_prod_e, &L_tmp_e ); // Q31 - L_tmp_e #else /* OPT_SBA_DEC_PATH */ L_tmp_e = s_max( filter_state->state_e[stage][j], L_prod_e ); L_tmp_e = add( L_tmp_e, 1 ); L_tmp = L_add( L_shr( filter_state->state_fx[stage][j], sub( L_tmp_e, filter_state->state_e[stage][j] ) ), L_shr( L_tmp_prod, sub( L_tmp_e, L_prod_e ) ) ); #endif /* OPT_SBA_DEC_PATH */ L_tmp_prod = Mpy_32_32( filter_state->den_fx[stage][j], pOut_fx[i] ); // Q31 - ( pIn_Out_e[i]+filter_state->den_e[stage][j] ) L_prod_e = add( pIn_Out_e[i], filter_state->den_e[stage][j] ); #ifndef OPT_SBA_DEC_PATH filter_state->state_fx[stage][j - 1] = BASOP_Util_Add_Mant32Exp( L_tmp, L_tmp_e, L_negate( L_tmp_prod ), L_prod_e, &filter_state->state_e[stage][j - 1] ); // Q31 - filter_state->state_e[stage][j - 1] #else /* OPT_SBA_DEC_PATH */ Word16 com_e = s_max( L_tmp_e, L_prod_e ); filter_state->state_e[stage][j - 1] = add( com_e, 1 ); filter_state->state_fx[stage][j - 1] = L_sub( L_shr( L_tmp, sub( filter_state->state_e[stage][j - 1], L_tmp_e ) ), L_shr( L_tmp_prod, sub( filter_state->state_e[stage][j - 1], L_prod_e ) ) ); move16(); #endif /* OPT_SBA_DEC_PATH */ move32(); /*In case when exponent is less than -31 the value is very small and negligible hence resetting it to zero to avoid exponent overflow*/ IF( LT_16( filter_state->state_e[stage][j - 1], -31 ) ) Loading lib_com/ivas_transient_det_fx.c +8 −0 Original line number Diff line number Diff line Loading @@ -403,7 +403,15 @@ void ivas_td_decorr_get_ducking_gains_fx( FOR( i = 0; i < frame_len; i++ ) { // e_fast_fx[i] = L_add( L_abs( e_fast_fx[i] ), L_shr( IVAS_TDET_PARM_ENV_EPS_fx, q_factor_diff ) ); /*Q14*/ #ifndef OPT_SBA_DEC_PATH e_fast_fx[i] = BASOP_Util_Add_Mant32Exp( L_abs( e_fast_fx[i] ), e_fast_e[i], IVAS_TDET_PARM_ENV_EPS_fx, 0, &e_fast_e[i] ); #else /* OPT_SBA_DEC_PATH */ Word32 tmp = L_abs( e_fast_fx[i] ); Word16 nrm = norm_l( tmp ); nrm = sub( nrm, 1 ); // 1 headroom for add e_fast_e[i] = sub( e_fast_e[i], nrm ); e_fast_fx[i] = L_add( L_shl( tmp, nrm ), L_shr( IVAS_TDET_PARM_ENV_EPS_fx, e_fast_e[i] ) ); #endif /* OPT_SBA_DEC_PATH */ move32(); e_slow_fx[i] = e_fast_fx[i]; move32(); Loading lib_com/options.h +2 −0 Original line number Diff line number Diff line Loading @@ -72,6 +72,8 @@ /* Note: each compile switch (FIX_1101_...) is independent from the other ones */ //#define OPT_STEREO_32KBPS_V1 /* Optimization made in stereo decoding path for 32kbps decoding */ #define OPT_AVOID_STATE_BUF_RESCALE /* Optimization made to avoid rescale of synth state buffer */ #define OPT_SBA_DEC_PATH /* Optimization made in SBA decoding path */ #define OPT_IVAS_FILTER_ROM /* Optimization made in IVAS filter table */ #define FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, nonbe*/ #define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, nonbe*/ /* Both following 2 macros (IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST*) are independent from each other, they refer to different code blocks */ Loading Loading
lib_com/ivas_dirac_com_fx.c +105 −2 Original line number Diff line number Diff line Loading @@ -917,15 +917,16 @@ void computeDiffuseness_fixed( move32(); Word32 *p_tmp; const Word32 *p_tmp_c; #ifndef OPT_SBA_DEC_PATH Word16 min_q_shift1, min_q_shift2, exp1, exp2, q_tmp; Word16 q_ene, q_intensity, q_intensity_slow; #endif /* OPT_SBA_DEC_PATH */ /* Compute Intensity slow and energy slow buffer_intensity and buffer_energy */ set_zero_fx( intensity_slow, i_mult( DIRAC_NUM_DIMS, CLDFB_NO_CHANNELS_MAX ) ); set_zero_fx( intensity_slow_abs, CLDFB_NO_CHANNELS_MAX ); set_zero_fx( energy_slow, CLDFB_NO_CHANNELS_MAX ); #ifndef OPT_SBA_DEC_PATH /* Calculate max possible shift for the buffer buffer_energy and buffer_intensity */ min_q_shift1 = Q31; move16(); Loading Loading @@ -1064,6 +1065,108 @@ void computeDiffuseness_fixed( move32(); } } #else /* OPT_SBA_DEC_PATH */ Word16 gaurd_bits = find_guarded_bits_fx( DIRAC_NO_COL_AVG_DIFF ); Word16 norm_arr = getScaleFactor32( buffer_energy, i_mult( DIRAC_NO_COL_AVG_DIFF, num_freq_bands ) ); Word16 shift_ene = sub( norm_arr, gaurd_bits ); Word16 q_ene = q_factor_energy[0]; // = add(*q_factor_energy, shift_ene); move16(); norm_arr = 31; move16(); FOR( i = 0; i < DIRAC_NUM_DIMS; i++ ) { FOR( j = 0; j < DIRAC_NO_COL_AVG_DIFF; j++ ) { norm_arr = s_min( norm_arr, getScaleFactor32( buffer_intensity[i][j], num_freq_bands ) ); } } Word16 shift_inten = sub( norm_arr, gaurd_bits ); Word16 q_inten = q_factor_intensity[0]; // = add(*q_factor_intensity, shift_inten); move16(); FOR( i = 1; i < DIRAC_NO_COL_AVG_DIFF; ++i ) { q_ene = s_min( q_ene, q_factor_energy[i] ); q_inten = s_min( q_inten, q_factor_intensity[i] ); } q_ene = add( q_ene, shift_ene ); q_inten = add( q_inten, shift_inten ); FOR( i = 0; i < DIRAC_NO_COL_AVG_DIFF; ++i ) { /* Energy slow */ p_tmp_c = buffer_energy + i * num_freq_bands; shift_ene = sub( q_ene, q_factor_energy[i] ); shift_inten = sub( q_inten, q_factor_intensity[i] ); FOR( k = 0; k < num_freq_bands; k++ ) { energy_slow[k] = L_add( energy_slow[k], L_shl( *p_tmp_c, shift_ene ) ); move32(); p_tmp_c++; } /* Intensity slow */ FOR( j = 0; j < DIRAC_NUM_DIMS; ++j ) { p_tmp = buffer_intensity[j][i]; FOR( k = 0; k < num_freq_bands; k++ ) { intensity_slow[j * num_freq_bands + k] = L_add( intensity_slow[j * num_freq_bands + k], L_shl( *p_tmp, shift_inten ) ); move32(); p_tmp++; } } } gaurd_bits = shr( add( find_guarded_bits_fx( DIRAC_NUM_DIMS ), 1 ), 1 ); norm_arr = getScaleFactor32( intensity_slow, i_mult( DIRAC_NUM_DIMS, num_freq_bands ) ); Word16 shift = sub( norm_arr, gaurd_bits ); scale_sig32( intensity_slow, i_mult( DIRAC_NUM_DIMS, num_freq_bands ), shift ); q_inten = add( q_inten, shift ); /* intensity_slow.^2 + intensity_slow_abs*/ FOR( j = 0; j < DIRAC_NUM_DIMS; ++j ) { p_tmp = intensity_slow + j * num_freq_bands; FOR( k = 0; k < num_freq_bands; k++ ) { intensity_slow_abs[k] = Madd_32_32( intensity_slow_abs[k], p_tmp[k], p_tmp[k] ); move32(); } } Word16 q_inten_slow = sub( add( q_inten, q_inten ), 31 ); /* Compute Diffuseness */ Word16 exp1, exp2, q_tmp; Word16 diff = sub( 62, q_ene ); p_tmp = intensity_slow_abs; FOR( i = 0; i < num_freq_bands; ++i ) { exp1 = sub( 31, q_inten_slow ); tmp = Sqrt32( p_tmp[i], &exp1 ); tmp = BASOP_Util_Divide3232_Scale_cadence( tmp, L_add( energy_slow[i], EPSILLON_FX ), &exp2 ); q_tmp = sub( diff, add( exp1, exp2 ) ); // bring to Q30 tmp = L_shl_sat( tmp, sub( Q30, q_tmp ) ); diffuseness[i] = L_sub( ONE_IN_Q30, tmp ); move32(); if ( LT_32( diffuseness[i], 0 ) ) { diffuseness[i] = 0; move32(); } } #endif /* OPT_SBA_DEC_PATH */ *q_diffuseness = Q30; move16(); Loading
lib_com/ivas_filters_fx.c +14 −3 Original line number Diff line number Diff line Loading @@ -223,7 +223,6 @@ void ivas_filter_process_exp_fx( return; } /*-----------------------------------------------------------------------------------------* * Function ivas_iir_2_filter() * Loading Loading @@ -263,13 +262,25 @@ static void ivas_iir_2_filter_fx( { L_tmp_prod = Mpy_32_32( filter_state->num_fx[stage][j], tmp_pIn_buf_i_fx ); // Q31-L_prod_e L_prod_e = add( filter_state->num_e[stage][j], tmp_pIn_buf_i_e ); #ifndef OPT_SBA_DEC_PATH L_tmp = BASOP_Util_Add_Mant32Exp( filter_state->state_fx[stage][j], filter_state->state_e[stage][j], L_tmp_prod, L_prod_e, &L_tmp_e ); // Q31 - L_tmp_e #else /* OPT_SBA_DEC_PATH */ L_tmp_e = s_max( filter_state->state_e[stage][j], L_prod_e ); L_tmp_e = add( L_tmp_e, 1 ); L_tmp = L_add( L_shr( filter_state->state_fx[stage][j], sub( L_tmp_e, filter_state->state_e[stage][j] ) ), L_shr( L_tmp_prod, sub( L_tmp_e, L_prod_e ) ) ); #endif /* OPT_SBA_DEC_PATH */ L_tmp_prod = Mpy_32_32( filter_state->den_fx[stage][j], pOut_fx[i] ); // Q31 - ( pIn_Out_e[i]+filter_state->den_e[stage][j] ) L_prod_e = add( pIn_Out_e[i], filter_state->den_e[stage][j] ); #ifndef OPT_SBA_DEC_PATH filter_state->state_fx[stage][j - 1] = BASOP_Util_Add_Mant32Exp( L_tmp, L_tmp_e, L_negate( L_tmp_prod ), L_prod_e, &filter_state->state_e[stage][j - 1] ); // Q31 - filter_state->state_e[stage][j - 1] #else /* OPT_SBA_DEC_PATH */ Word16 com_e = s_max( L_tmp_e, L_prod_e ); filter_state->state_e[stage][j - 1] = add( com_e, 1 ); filter_state->state_fx[stage][j - 1] = L_sub( L_shr( L_tmp, sub( filter_state->state_e[stage][j - 1], L_tmp_e ) ), L_shr( L_tmp_prod, sub( filter_state->state_e[stage][j - 1], L_prod_e ) ) ); move16(); #endif /* OPT_SBA_DEC_PATH */ move32(); /*In case when exponent is less than -31 the value is very small and negligible hence resetting it to zero to avoid exponent overflow*/ IF( LT_16( filter_state->state_e[stage][j - 1], -31 ) ) Loading
lib_com/ivas_transient_det_fx.c +8 −0 Original line number Diff line number Diff line Loading @@ -403,7 +403,15 @@ void ivas_td_decorr_get_ducking_gains_fx( FOR( i = 0; i < frame_len; i++ ) { // e_fast_fx[i] = L_add( L_abs( e_fast_fx[i] ), L_shr( IVAS_TDET_PARM_ENV_EPS_fx, q_factor_diff ) ); /*Q14*/ #ifndef OPT_SBA_DEC_PATH e_fast_fx[i] = BASOP_Util_Add_Mant32Exp( L_abs( e_fast_fx[i] ), e_fast_e[i], IVAS_TDET_PARM_ENV_EPS_fx, 0, &e_fast_e[i] ); #else /* OPT_SBA_DEC_PATH */ Word32 tmp = L_abs( e_fast_fx[i] ); Word16 nrm = norm_l( tmp ); nrm = sub( nrm, 1 ); // 1 headroom for add e_fast_e[i] = sub( e_fast_e[i], nrm ); e_fast_fx[i] = L_add( L_shl( tmp, nrm ), L_shr( IVAS_TDET_PARM_ENV_EPS_fx, e_fast_e[i] ) ); #endif /* OPT_SBA_DEC_PATH */ move32(); e_slow_fx[i] = e_fast_fx[i]; move32(); Loading
lib_com/options.h +2 −0 Original line number Diff line number Diff line Loading @@ -72,6 +72,8 @@ /* Note: each compile switch (FIX_1101_...) is independent from the other ones */ //#define OPT_STEREO_32KBPS_V1 /* Optimization made in stereo decoding path for 32kbps decoding */ #define OPT_AVOID_STATE_BUF_RESCALE /* Optimization made to avoid rescale of synth state buffer */ #define OPT_SBA_DEC_PATH /* Optimization made in SBA decoding path */ #define OPT_IVAS_FILTER_ROM /* Optimization made in IVAS filter table */ #define FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, nonbe*/ #define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, nonbe*/ /* Both following 2 macros (IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST*) are independent from each other, they refer to different code blocks */ Loading