Commit 7acfb9db authored by Arthur Tritthart's avatar Arthur Tritthart Committed by Manuel Jander
Browse files

improve high complexity of param_mc_prm_est: MC/7-1-4/128kBit reduced by 166 WMOPS

parent dbb240eb
Loading
Loading
Loading
Loading
+13 −4
Original line number Diff line number Diff line
@@ -68,9 +68,18 @@
#endif

/* Note: each compile switch (FIX_1101_...) is independent from the other ones */
//#define OPT_STEREO_32KBPS_V1                    /* Optimization made in stereo decoding path for 32kbps decoding */
#define OPT_AVOID_STATE_BUF_RESCALE             /* Optimization made to avoid rescale of synth state buffer */
#define FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx                 /*FhG: WMOPS tuning, nonbe*/
#define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot   /*FhG: WMOPS tuning, nonbe*/
#define FIX_1379_MASA_ANGLE_ROUND
#define FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_USE_MADD_ADD_WEIGHTS  /* FhG: Defines 1.0f-weight variables, uses Madd operation instead of L_add_sat */
#define FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_SPLIT_LOOPS           /* FhG: Splits single loop with IF-statements into two low-complex loops */
#define FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_HQ_CONSTANTS          /* FhG: IMPROVE PRECISION: Uses 1/6 and 1/20 in full-precise Q31 constants instead of Q15 */
#define FIX_1101_IVAS_SPAR_DEC_UPMIXER_SF_USE_UNIQUE_SHL        /* FhG: Uses unique shift amount in each loop iteration */
#define FIX_11_1_IVAS_SPAR_DEC_UPMIXER_SF_RND_COEFFS            /* FhG  ivas_spar_com.c: Zeroes very small negative coeffs via L_shr_r (was L_shr) */
#define FIX_ISSUE_1237                          /* VA: replacement of Copy_Scale_sig_16_32_DEPREC() that are doing 16 bits left shift by Copy_Scale_sig_16_32_no_sat() */
#define FIX_ISSUE_1237_KEEP_EVS_BE              /* VA: Fix to keep EVS bitexactness to 26.444 */
#define FIX_ISSUE_1214                          /* Ittiam: Fix for issue 1214: Energy leakage in IGF tiles for MDCT-stereo @64kbps SWB*/
#define FIX_881_HILBERT_FILTER                  /* VA: improve the precision of the Hilbert filter to remove 2kHz unwanted tone */
#define FIX_ISSUE_1245                          /* Ittiam: Fix for issue 1245: Basop Encoder: Audible noise for silent Stereo input DTX on @24.4 kbps, @32 kbps*/
#define FIX_MINOR_SVD_WMOPS_MR1010X             /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */
#define SVD_WMOPS_OPT                           /* Ittiam : SVD related optimizations */
#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST  /* FhG: reduces WMOPS of param_mc_prm_est, bit-exact to previous version */
#endif
+83 −0
Original line number Diff line number Diff line
@@ -720,9 +720,16 @@ static void ivas_param_mc_param_est_enc_fx(
        }
    }

#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
    Word16 gb = find_guarded_bits_fx( l_ts );
    Word16 add20gb = add( 20, gb );
#endif

    FOR( ts = start_ts; ts < num_time_slots; ts++ )
    {
#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
        Word16 gb = find_guarded_bits_fx( l_ts );
#endif
        ivas_fb_mixer_get_windowed_fr_fx( hParamMC->hFbMixer, pcm_in_fx, p_slot_frame_f_real_fx, p_slot_frame_f_imag_fx, l_ts, l_ts, hParamMC->hFbMixer->fb_cfg->num_in_chans, gb );
        ivas_fb_mixer_update_prior_input_fx( hParamMC->hFbMixer, pcm_in_fx, l_ts, hParamMC->hFbMixer->fb_cfg->num_in_chans );

@@ -807,10 +814,25 @@ static void ivas_param_mc_param_est_enc_fx(
                {
                    FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 )
                    {
#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
                        a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e );
                        b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e );
                        c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e );
                        d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e );
#else
                        a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band]);
                        a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e);
                        a_e = sub(add20gb, a_e);
                        b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] );
                        b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e );
                        b_e = sub( add20gb, b_e );
                        c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] );
                        c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e );
                        c_e = sub( add20gb, c_e );
                        d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] );
                        d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e );
                        d_e = sub( add20gb, d_e );
#endif

                        /* (a-ib)(c+id) = ac + bd + i(ad-bc) */
                        L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e );
@@ -841,6 +863,7 @@ static void ivas_param_mc_param_est_enc_fx(

                FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 )
                {
#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
                    dmx_real_fx[ch_idx1] = 0;
                    move32();
                    dmx_real_e[ch_idx1] = 0;
@@ -860,13 +883,49 @@ static void ivas_param_mc_param_est_enc_fx(
                        move32();
                        p_dmx_fac_fx++;
                    }
#else
                    Word32 real_fx = L_add(0, 0);
                    Word16 real_e = 0;
                    move16();
                    Word32 imag_fx = L_add( 0, 0 );
                    Word16 imag_e = 0;
                    move16();

                    FOR( inp_ch = 0; inp_ch < nchan_input; inp_ch++ )
                    {
                        L_tmp = Mpy_32_32( slot_frame_f_real_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) );
                        real_fx = BASOP_Util_Add_Mant32Exp( real_fx, real_e, L_tmp, add20gb, &real_e );
                        L_tmp = Mpy_32_32( slot_frame_f_imag_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) );
                        imag_fx = BASOP_Util_Add_Mant32Exp( imag_fx, imag_e, L_tmp, add( 20, gb ), &imag_e );
                        p_dmx_fac_fx++;
                    }
                    dmx_real_fx[ch_idx1] = real_fx;
                    move32();
                    dmx_real_e[ch_idx1] = real_e;
                    move16();
                    dmx_imag_fx[ch_idx1] = imag_fx;
                    move32();
                    dmx_imag_e[ch_idx1] = imag_e;
                    move16();
#endif
                }

                /* Cx for transport channels */
                FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 )
                {
#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
                    a_fx = dmx_real_fx[ch_idx1];
                    move32();
                    a_e = dmx_real_e[ch_idx1];
                    move16();
                    b_fx = dmx_imag_fx[ch_idx1];
                    move32();
                    b_e = dmx_imag_e[ch_idx1];
                    move16();
#endif
                    FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 )
                    {
#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
                        a_fx = dmx_real_fx[ch_idx1];
                        move32();
                        a_e = dmx_real_e[ch_idx1];
@@ -888,6 +947,12 @@ static void ivas_param_mc_param_est_enc_fx(
                        L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e );
                        Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e,
                                                                                                &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] );
#else
                        /* (a-ib)(c+id) = ac + bd + i(ad-bc) */
                        L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, dmx_real_fx[ch_idx2] ), add( a_e, dmx_real_e[ch_idx2] ), Mpy_32_32( b_fx, dmx_imag_fx[ch_idx2] ), add( b_e, dmx_imag_e[ch_idx2] ), &tmp_e );
                        Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e,
                                                                                                &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] );
#endif
                        move32();
                    }
                }
@@ -895,12 +960,30 @@ static void ivas_param_mc_param_est_enc_fx(
                /* Cy for input channels */
                FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 )
                {
#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
                    a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band] );
                    a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e );
                    a_e = sub( add20gb, a_e );
                    b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] );
                    b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e );
                    b_e = sub( add20gb, b_e );
#endif
                    FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 )
                    {
#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
                        a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e );
                        b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e );
                        c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e );
                        d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e );
#else

                        c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] );
                        c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e );
                        c_e = sub( add20gb, c_e );
                        d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] );
                        d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e );
                        d_e = sub( add20gb, d_e );
#endif

                        /* (a-ib)(c+id) = ac + bd + i(ad-bc) */
                        L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e );