Loading lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -99,4 +99,5 @@ #define FIX_1107_VADDINC /* FhG: Optimize v_add_inc_fx() for most frequent case */ #define FIX_1009_OPT_PARAMMC_RENDER /* FhG: Optimize ivas_param_mc_dec_render_fx() */ #define FIX_1109_OPTIM_MCT_STEREO_IGF_DEC /* FhG: optimize mctStereoIGF_dec_fx() */ #define FIX_1110_OPTIM_DIRAC_DECORR_PROC /* FhG: optimize ivas_dirac_dec_decorr_process() */ #endif lib_rend/ivas_dirac_decorr_dec.c +160 −3 Original line number Diff line number Diff line Loading @@ -57,6 +57,12 @@ #define DIRAC_DUCK_ALPHA_FX 1717986944 /* Q31 */ #define ONE_M_DIRAC_DUCK_ALPHA 429496736 /* Q31 */ #ifdef FIX_1110_OPTIM_DIRAC_DECORR_PROC /* Maximal useful q-format, represents range of 2^-126 (float min) */ #define MAX_Q_FX 157 #endif /*------------------------------------------------------------------------- * Local function prototypes *------------------------------------------------------------------------*/ Loading Loading @@ -583,6 +589,8 @@ void ivas_dirac_dec_decorr_process_fx( Word16 decorr_buff_tot_len = imult1616( imult1616( shl( decorr_buffer_len, 1 ), max_band_decorr ), num_channels ); guarded_bits = 0; #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC FOR( Word16 i = 0; i < decorr_buff_tot_len; i++ ) { IF( h_freq_domain_decorr_ap_state->decorr_buffer_fx[i] != 0 ) Loading @@ -593,6 +601,23 @@ void ivas_dirac_dec_decorr_process_fx( q_shift = sub( getScaleFactor32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ), guarded_bits ); Scale_sig32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len, q_shift ); q_decorr_buf = add( q_decorr_buf, q_shift ); #else Flag is_zero = is_zero_arr( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ); if ( is_zero == 0 ) { guarded_bits = 3; } IF( is_zero == 0 ) { q_shift = sub( getScaleFactor32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ), guarded_bits ); IF( q_shift != 0 ) { Scale_sig32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len, q_shift ); q_decorr_buf = add( q_decorr_buf, q_shift ); } } #endif q_shift = getScaleFactor32( aux_buffer_fx, imult1616( imult1616( 2, num_protos_dir ), max_band_decorr_temp ) ); Loading Loading @@ -655,10 +680,15 @@ void ivas_dirac_dec_decorr_process_fx( /* MA part of filter impulse response */ FOR( l = 0; l < filter_length; l++ ) { #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC frame_ma_fx[2 * l] = Mpy_32_16_1( input_real_fx, filter_coeff_num_real_fx[l] ); // Q_qux -3 = q_deorr // frame_ma_fx[2 * l] = L_shr(frame_ma_fx[2 * l],3); // scaling to q_decorr_buf frame_ma_fx[add( shl( l, 1 ), 1 )] = Mpy_32_16_1( input_imag_fx, filter_coeff_num_real_fx[l] ); // Q_qux - 3 = q_deorr // frame_ma_fx[2 * l + 1] = L_shr(frame_ma_fx[2 * l + 1], 3); // scaling to q_decorr_buf #else frame_ma_fx[2 * l] = Mpy_32_16_1( input_real_fx, filter_coeff_num_real_fx[l] ); // Q_qux -3 = q_deorr frame_ma_fx[2 * l + 1] = Mpy_32_16_1( input_imag_fx, filter_coeff_num_real_fx[l] ); // Q_qux - 3 = q_deorr #endif move32(); move32(); } Loading @@ -672,13 +702,21 @@ void ivas_dirac_dec_decorr_process_fx( /*get values for AR part */ filter_frame_real_fx = decorr_buffer_ptr_fx[0]; // q_decorr filter_frame_imag_fx = decorr_buffer_ptr_fx[1]; // q_deocrr filter_frame_imag_fx = decorr_buffer_ptr_fx[1]; // q_decorr #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC decorr_buffer_ptr_fx += shl( decorr_buffer_step, 1 ); #else Word16 decorr_buffer_step2x = shl( decorr_buffer_step, 1 ); decorr_buffer_ptr_fx += decorr_buffer_step2x; move16(); #endif FOR( l = 1; l < filter_length; l++ ) { // q adjustment needed// #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC decorr_buffer_ptr_fx[0] = L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] ); // q_decorr Word32 temp_1 = Mpy_32_16_1( filter_frame_real_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 temp_1 = L_shl( temp_1, 3 ); // q_decorr Loading @@ -691,7 +729,20 @@ void ivas_dirac_dec_decorr_process_fx( move32(); move32(); move32(); #else Word32 temp_1 = Mpy_32_16_1( filter_frame_real_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 temp_1 = L_shl( temp_1, 3 ); // q_decorr decorr_buffer_ptr_fx[0] = L_sub( L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] ), temp_1 ); // q_deocor move32(); Word32 temp_2 = Mpy_32_16_1( filter_frame_imag_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 temp_2 = L_shl( temp_2, 3 ); // q_decorr decorr_buffer_ptr_fx[1] = L_sub( L_add( decorr_buffer_ptr_fx[1], frame_ma_fx[2 * l + 1] ), temp_2 ); // q_decorr move32(); decorr_buffer_ptr_fx += decorr_buffer_step2x; move16(); #endif } } } Loading Loading @@ -748,6 +799,11 @@ void ivas_dirac_dec_decorr_process_fx( q_direct_energy = q_aux_buffer; move16(); #ifdef FIX_1110_OPTIM_DIRAC_DECORR_PROC /* Attention: this loop reports norm=0, whenever any data is 0. */ /* Therefore, useful left-shifts are skipped, accuracy is lost. */ #endif #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC /* calculate the power of the decorrelated signal */ FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx ) { Loading @@ -760,6 +816,37 @@ void ivas_dirac_dec_decorr_process_fx( norm = s_min( norm, W_norm( aux_64[add( offset2, i )] ) ); } } #else /* calculate the power of the decorrelated signal */ Word64 *m64_aux = aux_64; move32(); Word64 min64 = (Word64) 0; move64(); Word32 *m32_frame_dec_fx = frame_dec_fx; move32(); offset1 = shl( num_freq_bands, 1 ); offset2 = shl( max_band_decorr, 1 ); FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx ) { FOR( Word16 i = 0; i < offset2; i++ ) { m64_aux[i] = W_mult0_32_32( m32_frame_dec_fx[i], m32_frame_dec_fx[i] ); move64(); if ( GT_64( m64_aux[i], min64 ) ) { min64 = m64_aux[i]; move64(); } } m64_aux += offset2; m32_frame_dec_fx += offset1; move64(); move32(); } norm = W_norm( min64 ); #endif FOR( Word16 i = 0; i < shl( imult1616( num_channels, max_band_decorr ), 1 ); i++ ) { Loading @@ -775,6 +862,8 @@ void ivas_dirac_dec_decorr_process_fx( } /* smooth energies */ #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC v_multc_fixed( aux_buffer_fx, ONE_M_DIRAC_DUCK_ALPHA, aux_buffer_fx, imult1616( num_channels, max_band_decorr ) ); // q_aux_buffer v_multc_fixed( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, DIRAC_DUCK_ALPHA_FX, h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ) ); // same-q Loading @@ -801,6 +890,64 @@ void ivas_dirac_dec_decorr_process_fx( Scale_sig32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ), q_shift ); h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = add( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth, q_shift ); move16(); #else Word16 len = imult1616( num_channels, max_band_decorr ); Word16 aux_e = sub( 31, q_aux_buffer ); Word16 max_e = s_max( aux_e, e_reverb_energy_smooth ); Word16 shr_aux = sub( max_e, aux_e ); /* Note: headroom is zero */ Word16 shr_res = sub( max_e, e_reverb_energy_smooth ); /* Note: headroom is zero */ /* Note: DIRAC_DUCK_ALPHA_FX and ONE_M_DIRAC_DUCK_ALPHA are both in Q31 (e=0) */ /* => a multiplication with this values does not change the q/e value. */ FOR( Word16 i = 0; i < len; i++ ) { h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i] = L_add( L_shr( Mpy_32_32( aux_buffer_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_aux ), L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_res ) ); move32(); } e_reverb_energy_smooth = max_e; move16(); h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = sub( 31, e_reverb_energy_smooth ); move16(); len = imult1616( num_protos_dir, max_band_decorr ); Word16 den_e = sub( 31, q_direct_energy ); Word16 max_x = s_max( den_e, e_direct_energy_smooth ); Word16 shr_den = sub( max_x, den_e ); /* Note: headroom is zero */ Word16 shr_des = sub( max_x, e_direct_energy_smooth ); /* Note: headroom is zero */ FOR( Word16 i = 0; i < len; i++ ) { h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i] = L_add( L_shr( Mpy_32_32( direct_energy_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_den ), L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_des ) ); move32(); } e_direct_energy_smooth = max_x; move16(); h_freq_domain_decorr_ap_state->q_direct_energy_smooth = sub( 31, e_direct_energy_smooth ); move16(); // scaling energy buffers for better precision for higher values// q_shift = L_norm_arr( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ) ); IF( q_shift != 0 ) { Scale_sig32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ), q_shift ); h_freq_domain_decorr_ap_state->q_direct_energy_smooth = add( h_freq_domain_decorr_ap_state->q_direct_energy_smooth, q_shift ); move16(); } q_shift = L_norm_arr( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ) ); IF( q_shift != 0 ) { Scale_sig32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ), q_shift ); h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = add( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth, q_shift ); move16(); } h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = min( MAX_Q_FX, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth ); h_freq_domain_decorr_ap_state->q_direct_energy_smooth = min( MAX_Q_FX, h_freq_domain_decorr_ap_state->q_direct_energy_smooth ); #endif e_reverb_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth ); e_direct_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_direct_energy_smooth ); Loading Loading @@ -856,8 +1003,13 @@ void ivas_dirac_dec_decorr_process_fx( duck_gain = shl( duck_gain, sub( e_duck_gain, 1 ) ); // Q14 #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 1 ); // q_frame_f frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )], duck_gain ), 1 ); // q_frame_f #else frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 1 ); // q_frame_f frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 1 ); // q_frame_f #endif move32(); move32(); } Loading @@ -878,8 +1030,13 @@ void ivas_dirac_dec_decorr_process_fx( { duck_gain = shl( duck_gain, sub( e_duck_gain, 2 ) ); // Q13 } #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 ); // q_frame_dec frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )], duck_gain ), 2 ); // q_frame_dec #else frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 ); // q_frame_dec frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 2 ); // q_frame_dec #endif move32(); move32(); } Loading Loading
lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -99,4 +99,5 @@ #define FIX_1107_VADDINC /* FhG: Optimize v_add_inc_fx() for most frequent case */ #define FIX_1009_OPT_PARAMMC_RENDER /* FhG: Optimize ivas_param_mc_dec_render_fx() */ #define FIX_1109_OPTIM_MCT_STEREO_IGF_DEC /* FhG: optimize mctStereoIGF_dec_fx() */ #define FIX_1110_OPTIM_DIRAC_DECORR_PROC /* FhG: optimize ivas_dirac_dec_decorr_process() */ #endif
lib_rend/ivas_dirac_decorr_dec.c +160 −3 Original line number Diff line number Diff line Loading @@ -57,6 +57,12 @@ #define DIRAC_DUCK_ALPHA_FX 1717986944 /* Q31 */ #define ONE_M_DIRAC_DUCK_ALPHA 429496736 /* Q31 */ #ifdef FIX_1110_OPTIM_DIRAC_DECORR_PROC /* Maximal useful q-format, represents range of 2^-126 (float min) */ #define MAX_Q_FX 157 #endif /*------------------------------------------------------------------------- * Local function prototypes *------------------------------------------------------------------------*/ Loading Loading @@ -583,6 +589,8 @@ void ivas_dirac_dec_decorr_process_fx( Word16 decorr_buff_tot_len = imult1616( imult1616( shl( decorr_buffer_len, 1 ), max_band_decorr ), num_channels ); guarded_bits = 0; #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC FOR( Word16 i = 0; i < decorr_buff_tot_len; i++ ) { IF( h_freq_domain_decorr_ap_state->decorr_buffer_fx[i] != 0 ) Loading @@ -593,6 +601,23 @@ void ivas_dirac_dec_decorr_process_fx( q_shift = sub( getScaleFactor32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ), guarded_bits ); Scale_sig32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len, q_shift ); q_decorr_buf = add( q_decorr_buf, q_shift ); #else Flag is_zero = is_zero_arr( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ); if ( is_zero == 0 ) { guarded_bits = 3; } IF( is_zero == 0 ) { q_shift = sub( getScaleFactor32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ), guarded_bits ); IF( q_shift != 0 ) { Scale_sig32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len, q_shift ); q_decorr_buf = add( q_decorr_buf, q_shift ); } } #endif q_shift = getScaleFactor32( aux_buffer_fx, imult1616( imult1616( 2, num_protos_dir ), max_band_decorr_temp ) ); Loading Loading @@ -655,10 +680,15 @@ void ivas_dirac_dec_decorr_process_fx( /* MA part of filter impulse response */ FOR( l = 0; l < filter_length; l++ ) { #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC frame_ma_fx[2 * l] = Mpy_32_16_1( input_real_fx, filter_coeff_num_real_fx[l] ); // Q_qux -3 = q_deorr // frame_ma_fx[2 * l] = L_shr(frame_ma_fx[2 * l],3); // scaling to q_decorr_buf frame_ma_fx[add( shl( l, 1 ), 1 )] = Mpy_32_16_1( input_imag_fx, filter_coeff_num_real_fx[l] ); // Q_qux - 3 = q_deorr // frame_ma_fx[2 * l + 1] = L_shr(frame_ma_fx[2 * l + 1], 3); // scaling to q_decorr_buf #else frame_ma_fx[2 * l] = Mpy_32_16_1( input_real_fx, filter_coeff_num_real_fx[l] ); // Q_qux -3 = q_deorr frame_ma_fx[2 * l + 1] = Mpy_32_16_1( input_imag_fx, filter_coeff_num_real_fx[l] ); // Q_qux - 3 = q_deorr #endif move32(); move32(); } Loading @@ -672,13 +702,21 @@ void ivas_dirac_dec_decorr_process_fx( /*get values for AR part */ filter_frame_real_fx = decorr_buffer_ptr_fx[0]; // q_decorr filter_frame_imag_fx = decorr_buffer_ptr_fx[1]; // q_deocrr filter_frame_imag_fx = decorr_buffer_ptr_fx[1]; // q_decorr #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC decorr_buffer_ptr_fx += shl( decorr_buffer_step, 1 ); #else Word16 decorr_buffer_step2x = shl( decorr_buffer_step, 1 ); decorr_buffer_ptr_fx += decorr_buffer_step2x; move16(); #endif FOR( l = 1; l < filter_length; l++ ) { // q adjustment needed// #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC decorr_buffer_ptr_fx[0] = L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] ); // q_decorr Word32 temp_1 = Mpy_32_16_1( filter_frame_real_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 temp_1 = L_shl( temp_1, 3 ); // q_decorr Loading @@ -691,7 +729,20 @@ void ivas_dirac_dec_decorr_process_fx( move32(); move32(); move32(); #else Word32 temp_1 = Mpy_32_16_1( filter_frame_real_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 temp_1 = L_shl( temp_1, 3 ); // q_decorr decorr_buffer_ptr_fx[0] = L_sub( L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] ), temp_1 ); // q_deocor move32(); Word32 temp_2 = Mpy_32_16_1( filter_frame_imag_fx, filter_coeff_den_real_fx[l] ); // q_decorr - 3 temp_2 = L_shl( temp_2, 3 ); // q_decorr decorr_buffer_ptr_fx[1] = L_sub( L_add( decorr_buffer_ptr_fx[1], frame_ma_fx[2 * l + 1] ), temp_2 ); // q_decorr move32(); decorr_buffer_ptr_fx += decorr_buffer_step2x; move16(); #endif } } } Loading Loading @@ -748,6 +799,11 @@ void ivas_dirac_dec_decorr_process_fx( q_direct_energy = q_aux_buffer; move16(); #ifdef FIX_1110_OPTIM_DIRAC_DECORR_PROC /* Attention: this loop reports norm=0, whenever any data is 0. */ /* Therefore, useful left-shifts are skipped, accuracy is lost. */ #endif #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC /* calculate the power of the decorrelated signal */ FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx ) { Loading @@ -760,6 +816,37 @@ void ivas_dirac_dec_decorr_process_fx( norm = s_min( norm, W_norm( aux_64[add( offset2, i )] ) ); } } #else /* calculate the power of the decorrelated signal */ Word64 *m64_aux = aux_64; move32(); Word64 min64 = (Word64) 0; move64(); Word32 *m32_frame_dec_fx = frame_dec_fx; move32(); offset1 = shl( num_freq_bands, 1 ); offset2 = shl( max_band_decorr, 1 ); FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx ) { FOR( Word16 i = 0; i < offset2; i++ ) { m64_aux[i] = W_mult0_32_32( m32_frame_dec_fx[i], m32_frame_dec_fx[i] ); move64(); if ( GT_64( m64_aux[i], min64 ) ) { min64 = m64_aux[i]; move64(); } } m64_aux += offset2; m32_frame_dec_fx += offset1; move64(); move32(); } norm = W_norm( min64 ); #endif FOR( Word16 i = 0; i < shl( imult1616( num_channels, max_band_decorr ), 1 ); i++ ) { Loading @@ -775,6 +862,8 @@ void ivas_dirac_dec_decorr_process_fx( } /* smooth energies */ #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC v_multc_fixed( aux_buffer_fx, ONE_M_DIRAC_DUCK_ALPHA, aux_buffer_fx, imult1616( num_channels, max_band_decorr ) ); // q_aux_buffer v_multc_fixed( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, DIRAC_DUCK_ALPHA_FX, h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ) ); // same-q Loading @@ -801,6 +890,64 @@ void ivas_dirac_dec_decorr_process_fx( Scale_sig32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ), q_shift ); h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = add( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth, q_shift ); move16(); #else Word16 len = imult1616( num_channels, max_band_decorr ); Word16 aux_e = sub( 31, q_aux_buffer ); Word16 max_e = s_max( aux_e, e_reverb_energy_smooth ); Word16 shr_aux = sub( max_e, aux_e ); /* Note: headroom is zero */ Word16 shr_res = sub( max_e, e_reverb_energy_smooth ); /* Note: headroom is zero */ /* Note: DIRAC_DUCK_ALPHA_FX and ONE_M_DIRAC_DUCK_ALPHA are both in Q31 (e=0) */ /* => a multiplication with this values does not change the q/e value. */ FOR( Word16 i = 0; i < len; i++ ) { h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i] = L_add( L_shr( Mpy_32_32( aux_buffer_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_aux ), L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_res ) ); move32(); } e_reverb_energy_smooth = max_e; move16(); h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = sub( 31, e_reverb_energy_smooth ); move16(); len = imult1616( num_protos_dir, max_band_decorr ); Word16 den_e = sub( 31, q_direct_energy ); Word16 max_x = s_max( den_e, e_direct_energy_smooth ); Word16 shr_den = sub( max_x, den_e ); /* Note: headroom is zero */ Word16 shr_des = sub( max_x, e_direct_energy_smooth ); /* Note: headroom is zero */ FOR( Word16 i = 0; i < len; i++ ) { h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i] = L_add( L_shr( Mpy_32_32( direct_energy_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_den ), L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_des ) ); move32(); } e_direct_energy_smooth = max_x; move16(); h_freq_domain_decorr_ap_state->q_direct_energy_smooth = sub( 31, e_direct_energy_smooth ); move16(); // scaling energy buffers for better precision for higher values// q_shift = L_norm_arr( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ) ); IF( q_shift != 0 ) { Scale_sig32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ), q_shift ); h_freq_domain_decorr_ap_state->q_direct_energy_smooth = add( h_freq_domain_decorr_ap_state->q_direct_energy_smooth, q_shift ); move16(); } q_shift = L_norm_arr( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ) ); IF( q_shift != 0 ) { Scale_sig32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ), q_shift ); h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = add( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth, q_shift ); move16(); } h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = min( MAX_Q_FX, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth ); h_freq_domain_decorr_ap_state->q_direct_energy_smooth = min( MAX_Q_FX, h_freq_domain_decorr_ap_state->q_direct_energy_smooth ); #endif e_reverb_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth ); e_direct_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_direct_energy_smooth ); Loading Loading @@ -856,8 +1003,13 @@ void ivas_dirac_dec_decorr_process_fx( duck_gain = shl( duck_gain, sub( e_duck_gain, 1 ) ); // Q14 #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 1 ); // q_frame_f frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )], duck_gain ), 1 ); // q_frame_f #else frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 1 ); // q_frame_f frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 1 ); // q_frame_f #endif move32(); move32(); } Loading @@ -878,8 +1030,13 @@ void ivas_dirac_dec_decorr_process_fx( { duck_gain = shl( duck_gain, sub( e_duck_gain, 2 ) ); // Q13 } #ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 ); // q_frame_dec frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )], duck_gain ), 2 ); // q_frame_dec #else frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 ); // q_frame_dec frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 2 ); // q_frame_dec #endif move32(); move32(); } Loading