Commit e1432023 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch '1110-complexity-optimize-ivas_dirac_dec_decorr_process_fx' into 'main'

Resolve "[Complexity] Optimize ivas_dirac_dec_decorr_process_fx()"

Closes #1110

See merge request !882
parents 4d073acc 0cd5c2e1
Loading
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -99,4 +99,5 @@
#define FIX_1107_VADDINC                        /* FhG: Optimize v_add_inc_fx() for most frequent case */
#define FIX_1009_OPT_PARAMMC_RENDER             /* FhG: Optimize ivas_param_mc_dec_render_fx() */
#define FIX_1109_OPTIM_MCT_STEREO_IGF_DEC       /* FhG: optimize mctStereoIGF_dec_fx() */
#define FIX_1110_OPTIM_DIRAC_DECORR_PROC        /* FhG: optimize ivas_dirac_dec_decorr_process() */
#endif
+160 −3
Original line number Diff line number Diff line
@@ -57,6 +57,12 @@
#define DIRAC_DUCK_ALPHA_FX    1717986944 /* Q31 */
#define ONE_M_DIRAC_DUCK_ALPHA 429496736  /* Q31 */

#ifdef FIX_1110_OPTIM_DIRAC_DECORR_PROC
/* Maximal useful q-format, represents range of 2^-126 (float min) */
#define MAX_Q_FX 157


#endif
/*-------------------------------------------------------------------------
 * Local function prototypes
 *------------------------------------------------------------------------*/
@@ -583,6 +589,8 @@ void ivas_dirac_dec_decorr_process_fx(

        Word16 decorr_buff_tot_len = imult1616( imult1616( shl( decorr_buffer_len, 1 ), max_band_decorr ), num_channels );
        guarded_bits = 0;

#ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC
        FOR( Word16 i = 0; i < decorr_buff_tot_len; i++ )
        {
            IF( h_freq_domain_decorr_ap_state->decorr_buffer_fx[i] != 0 )
@@ -593,6 +601,23 @@ void ivas_dirac_dec_decorr_process_fx(
        q_shift = sub( getScaleFactor32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ), guarded_bits );
        Scale_sig32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len, q_shift );
        q_decorr_buf = add( q_decorr_buf, q_shift );
#else
        Flag is_zero = is_zero_arr( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len );
        if ( is_zero == 0 )
        {
            guarded_bits = 3;
        }

        IF( is_zero == 0 )
        {
            q_shift = sub( getScaleFactor32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len ), guarded_bits );
            IF( q_shift != 0 )
            {
                Scale_sig32( h_freq_domain_decorr_ap_state->decorr_buffer_fx, decorr_buff_tot_len, q_shift );
                q_decorr_buf = add( q_decorr_buf, q_shift );
            }
        }
#endif

        q_shift = getScaleFactor32( aux_buffer_fx, imult1616( imult1616( 2, num_protos_dir ), max_band_decorr_temp ) );

@@ -655,10 +680,15 @@ void ivas_dirac_dec_decorr_process_fx(
                    /* MA part of filter impulse response */
                    FOR( l = 0; l < filter_length; l++ )
                    {
#ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC
                        frame_ma_fx[2 * l] = Mpy_32_16_1( input_real_fx, filter_coeff_num_real_fx[l] ); // Q_qux -3 = q_deorr
                        // frame_ma_fx[2 * l] = L_shr(frame_ma_fx[2 * l],3); // scaling to q_decorr_buf
                        frame_ma_fx[add( shl( l, 1 ), 1 )] = Mpy_32_16_1( input_imag_fx, filter_coeff_num_real_fx[l] ); // Q_qux - 3 = q_deorr
                                                                                                                        // frame_ma_fx[2 * l + 1] = L_shr(frame_ma_fx[2 * l + 1], 3); // scaling to q_decorr_buf
#else
                        frame_ma_fx[2 * l] = Mpy_32_16_1( input_real_fx, filter_coeff_num_real_fx[l] );     // Q_qux -3 = q_deorr
                        frame_ma_fx[2 * l + 1] = Mpy_32_16_1( input_imag_fx, filter_coeff_num_real_fx[l] ); // Q_qux - 3 = q_deorr
#endif
                        move32();
                        move32();
                    }
@@ -672,13 +702,21 @@ void ivas_dirac_dec_decorr_process_fx(

                    /*get values for AR part */
                    filter_frame_real_fx = decorr_buffer_ptr_fx[0]; // q_decorr
                    filter_frame_imag_fx = decorr_buffer_ptr_fx[1]; // q_deocrr
                    filter_frame_imag_fx = decorr_buffer_ptr_fx[1]; // q_decorr

#ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC
                    decorr_buffer_ptr_fx += shl( decorr_buffer_step, 1 );
#else
                    Word16 decorr_buffer_step2x = shl( decorr_buffer_step, 1 );

                    decorr_buffer_ptr_fx += decorr_buffer_step2x;
                    move16();
#endif

                    FOR( l = 1; l < filter_length; l++ )
                    {
                        // q adjustment needed//
#ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC
                        decorr_buffer_ptr_fx[0] = L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] );                 // q_decorr
                        Word32 temp_1 = Mpy_32_16_1( filter_frame_real_fx, filter_coeff_den_real_fx[l] );               // q_decorr - 3
                        temp_1 = L_shl( temp_1, 3 );                                                                    // q_decorr
@@ -691,7 +729,20 @@ void ivas_dirac_dec_decorr_process_fx(
                        move32();
                        move32();
                        move32();
#else
                        Word32 temp_1 = Mpy_32_16_1( filter_frame_real_fx, filter_coeff_den_real_fx[l] );                // q_decorr - 3
                        temp_1 = L_shl( temp_1, 3 );                                                                     // q_decorr
                        decorr_buffer_ptr_fx[0] = L_sub( L_add( decorr_buffer_ptr_fx[0], frame_ma_fx[2 * l] ), temp_1 ); // q_deocor
                        move32();

                        Word32 temp_2 = Mpy_32_16_1( filter_frame_imag_fx, filter_coeff_den_real_fx[l] );                    // q_decorr - 3
                        temp_2 = L_shl( temp_2, 3 );                                                                         // q_decorr
                        decorr_buffer_ptr_fx[1] = L_sub( L_add( decorr_buffer_ptr_fx[1], frame_ma_fx[2 * l + 1] ), temp_2 ); // q_decorr
                        move32();

                        decorr_buffer_ptr_fx += decorr_buffer_step2x;
                        move16();
#endif
                    }
                }
            }
@@ -748,6 +799,11 @@ void ivas_dirac_dec_decorr_process_fx(
            q_direct_energy = q_aux_buffer;
            move16();

#ifdef FIX_1110_OPTIM_DIRAC_DECORR_PROC
            /* Attention: this loop reports norm=0, whenever any data is 0. */
            /* Therefore, useful left-shifts are skipped, accuracy is lost. */
#endif
#ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC
            /* calculate the power of the decorrelated signal */
            FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
            {
@@ -760,6 +816,37 @@ void ivas_dirac_dec_decorr_process_fx(
                    norm = s_min( norm, W_norm( aux_64[add( offset2, i )] ) );
                }
            }
#else
            /* calculate the power of the decorrelated signal */
            Word64 *m64_aux = aux_64;
            move32();
            Word64 min64 = (Word64) 0;
            move64();
            Word32 *m32_frame_dec_fx = frame_dec_fx;
            move32();
            offset1 = shl( num_freq_bands, 1 );
            offset2 = shl( max_band_decorr, 1 );


            FOR( ch_idx = 0; ch_idx < num_channels; ++ch_idx )
            {
                FOR( Word16 i = 0; i < offset2; i++ )
                {
                    m64_aux[i] = W_mult0_32_32( m32_frame_dec_fx[i], m32_frame_dec_fx[i] );
                    move64();
                    if ( GT_64( m64_aux[i], min64 ) )
                    {
                        min64 = m64_aux[i];
                        move64();
                    }
                }
                m64_aux += offset2;
                m32_frame_dec_fx += offset1;
                move64();
                move32();
            }
            norm = W_norm( min64 );
#endif

            FOR( Word16 i = 0; i < shl( imult1616( num_channels, max_band_decorr ), 1 ); i++ )
            {
@@ -775,6 +862,8 @@ void ivas_dirac_dec_decorr_process_fx(
            }

            /* smooth energies */

#ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC
            v_multc_fixed( aux_buffer_fx, ONE_M_DIRAC_DUCK_ALPHA, aux_buffer_fx, imult1616( num_channels, max_band_decorr ) ); // q_aux_buffer

            v_multc_fixed( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, DIRAC_DUCK_ALPHA_FX, h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ) ); // same-q
@@ -801,6 +890,64 @@ void ivas_dirac_dec_decorr_process_fx(
            Scale_sig32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ), q_shift );
            h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = add( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth, q_shift );
            move16();
#else
            Word16 len = imult1616( num_channels, max_band_decorr );
            Word16 aux_e = sub( 31, q_aux_buffer );
            Word16 max_e = s_max( aux_e, e_reverb_energy_smooth );
            Word16 shr_aux = sub( max_e, aux_e );                  /* Note: headroom is zero */
            Word16 shr_res = sub( max_e, e_reverb_energy_smooth ); /* Note: headroom is zero */

            /* Note: DIRAC_DUCK_ALPHA_FX and ONE_M_DIRAC_DUCK_ALPHA are both in Q31 (e=0) */
            /*       => a multiplication with this values does not change the q/e value.  */

            FOR( Word16 i = 0; i < len; i++ )
            {
                h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i] = L_add(
                    L_shr( Mpy_32_32( aux_buffer_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_aux ),
                    L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_res ) );
                move32();
            }
            e_reverb_energy_smooth = max_e;
            move16();
            h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = sub( 31, e_reverb_energy_smooth );
            move16();

            len = imult1616( num_protos_dir, max_band_decorr );
            Word16 den_e = sub( 31, q_direct_energy );
            Word16 max_x = s_max( den_e, e_direct_energy_smooth );
            Word16 shr_den = sub( max_x, den_e );                  /* Note: headroom is zero */
            Word16 shr_des = sub( max_x, e_direct_energy_smooth ); /* Note: headroom is zero */

            FOR( Word16 i = 0; i < len; i++ )
            {
                h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i] = L_add(
                    L_shr( Mpy_32_32( direct_energy_fx[i], ONE_M_DIRAC_DUCK_ALPHA ), shr_den ),
                    L_shr( Mpy_32_32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx[i], DIRAC_DUCK_ALPHA_FX ), shr_des ) );
                move32();
            }
            e_direct_energy_smooth = max_x;
            move16();
            h_freq_domain_decorr_ap_state->q_direct_energy_smooth = sub( 31, e_direct_energy_smooth );
            move16();

            // scaling energy buffers for better precision for higher values//
            q_shift = L_norm_arr( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ) );
            IF( q_shift != 0 )
            {
                Scale_sig32( h_freq_domain_decorr_ap_state->direct_energy_smooth_fx, imult1616( num_protos_dir, max_band_decorr ), q_shift );
                h_freq_domain_decorr_ap_state->q_direct_energy_smooth = add( h_freq_domain_decorr_ap_state->q_direct_energy_smooth, q_shift );
                move16();
            }
            q_shift = L_norm_arr( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ) );
            IF( q_shift != 0 )
            {
                Scale_sig32( h_freq_domain_decorr_ap_state->reverb_energy_smooth_fx, imult1616( num_channels, max_band_decorr ), q_shift );
                h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = add( h_freq_domain_decorr_ap_state->q_reverb_energy_smooth, q_shift );
                move16();
            }
            h_freq_domain_decorr_ap_state->q_reverb_energy_smooth = min( MAX_Q_FX, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth );
            h_freq_domain_decorr_ap_state->q_direct_energy_smooth = min( MAX_Q_FX, h_freq_domain_decorr_ap_state->q_direct_energy_smooth );
#endif

            e_reverb_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_reverb_energy_smooth );
            e_direct_energy_smooth = sub( 31, h_freq_domain_decorr_ap_state->q_direct_energy_smooth );
@@ -856,8 +1003,13 @@ void ivas_dirac_dec_decorr_process_fx(

                        duck_gain = shl( duck_gain, sub( e_duck_gain, 1 ) ); // Q14

#ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC
                        frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 1 );                                 // q_frame_f
                        frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )], duck_gain ), 1 ); // q_frame_f
#else
                        frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 1 );         // q_frame_f
                        frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 1 ); // q_frame_f
#endif
                        move32();
                        move32();
                    }
@@ -878,8 +1030,13 @@ void ivas_dirac_dec_decorr_process_fx(
                        {
                            duck_gain = shl( duck_gain, sub( e_duck_gain, 2 ) ); // Q13
                        }
#ifndef FIX_1110_OPTIM_DIRAC_DECORR_PROC
                        frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 );                                 // q_frame_dec
                        frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[add( shl( band_idx, 1 ), 1 )], duck_gain ), 2 ); // q_frame_dec
#else
                        frame_dec_fx_ptr[2 * band_idx] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx], duck_gain ), 2 );         // q_frame_dec
                        frame_dec_fx_ptr[2 * band_idx + 1] = L_shl( Mpy_32_16_1( frame_dec_fx_ptr[2 * band_idx + 1], duck_gain ), 2 ); // q_frame_dec
#endif
                        move32();
                        move32();
                    }