Commit fd003f8c authored by Arthur Tritthart's avatar Arthur Tritthart
Browse files

improve high complexity of param_mc_prm_est: MC/7-1-4/128kBit reduced by 166 WMOPS

parent b8499d44
Loading
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -154,4 +154,5 @@
#define FIX_ISSUE_1245                          /* Ittiam: Fix for issue 1245: Basop Encoder: Audible noise for silent Stereo input DTX on @24.4 kbps, @32 kbps*/
#define FIX_MINOR_SVD_WMOPS_MR1010X             /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */
#define SVD_WMOPS_OPT                           /* Ittiam : SVD related optimizations */
#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST  /* FhG: reduces WMOPS of param_mc_prm_est, bit-exact to previous version */
#endif
+83 −0
Original line number Diff line number Diff line
@@ -723,9 +723,16 @@ static void ivas_param_mc_param_est_enc_fx(
        }
    }

#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
    Word16 gb = find_guarded_bits_fx( l_ts );
    Word16 add20gb = add( 20, gb );
#endif

    FOR( ts = start_ts; ts < num_time_slots; ts++ )
    {
#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
        Word16 gb = find_guarded_bits_fx( l_ts );
#endif
        ivas_fb_mixer_get_windowed_fr_fx( hParamMC->hFbMixer, pcm_in_fx, p_slot_frame_f_real_fx, p_slot_frame_f_imag_fx, l_ts, l_ts, hParamMC->hFbMixer->fb_cfg->num_in_chans, gb );
        ivas_fb_mixer_update_prior_input_fx( hParamMC->hFbMixer, pcm_in_fx, l_ts, hParamMC->hFbMixer->fb_cfg->num_in_chans );

@@ -810,10 +817,25 @@ static void ivas_param_mc_param_est_enc_fx(
                {
                    FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 )
                    {
#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
                        a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e );
                        b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e );
                        c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e );
                        d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e );
#else
                        a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band]);
                        a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e);
                        a_e = sub(add20gb, a_e);
                        b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] );
                        b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e );
                        b_e = sub( add20gb, b_e );
                        c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] );
                        c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e );
                        c_e = sub( add20gb, c_e );
                        d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] );
                        d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e );
                        d_e = sub( add20gb, d_e );
#endif

                        /* (a-ib)(c+id) = ac + bd + i(ad-bc) */
                        L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e );
@@ -844,6 +866,7 @@ static void ivas_param_mc_param_est_enc_fx(

                FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 )
                {
#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
                    dmx_real_fx[ch_idx1] = 0;
                    move32();
                    dmx_real_e[ch_idx1] = 0;
@@ -863,13 +886,49 @@ static void ivas_param_mc_param_est_enc_fx(
                        move32();
                        p_dmx_fac_fx++;
                    }
#else
                    Word32 real_fx = L_add(0, 0);
                    Word16 real_e = 0;
                    move16();
                    Word32 imag_fx = L_add( 0, 0 );
                    Word16 imag_e = 0;
                    move16();

                    FOR( inp_ch = 0; inp_ch < nchan_input; inp_ch++ )
                    {
                        L_tmp = Mpy_32_32( slot_frame_f_real_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) );
                        real_fx = BASOP_Util_Add_Mant32Exp( real_fx, real_e, L_tmp, add20gb, &real_e );
                        L_tmp = Mpy_32_32( slot_frame_f_imag_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) );
                        imag_fx = BASOP_Util_Add_Mant32Exp( imag_fx, imag_e, L_tmp, add( 20, gb ), &imag_e );
                        p_dmx_fac_fx++;
                    }
                    dmx_real_fx[ch_idx1] = real_fx;
                    move32();
                    dmx_real_e[ch_idx1] = real_e;
                    move16();
                    dmx_imag_fx[ch_idx1] = imag_fx;
                    move32();
                    dmx_imag_e[ch_idx1] = imag_e;
                    move16();
#endif
                }

                /* Cx for transport channels */
                FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 )
                {
#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
                    a_fx = dmx_real_fx[ch_idx1];
                    move32();
                    a_e = dmx_real_e[ch_idx1];
                    move16();
                    b_fx = dmx_imag_fx[ch_idx1];
                    move32();
                    b_e = dmx_imag_e[ch_idx1];
                    move16();
#endif
                    FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 )
                    {
#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
                        a_fx = dmx_real_fx[ch_idx1];
                        move32();
                        a_e = dmx_real_e[ch_idx1];
@@ -891,6 +950,12 @@ static void ivas_param_mc_param_est_enc_fx(
                        L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e );
                        Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e,
                                                                                                &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] );
#else
                        /* (a-ib)(c+id) = ac + bd + i(ad-bc) */
                        L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, dmx_real_fx[ch_idx2] ), add( a_e, dmx_real_e[ch_idx2] ), Mpy_32_32( b_fx, dmx_imag_fx[ch_idx2] ), add( b_e, dmx_imag_e[ch_idx2] ), &tmp_e );
                        Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e,
                                                                                                &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] );
#endif
                        move32();
                    }
                }
@@ -898,12 +963,30 @@ static void ivas_param_mc_param_est_enc_fx(
                /* Cy for input channels */
                FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 )
                {
#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
                    a_e = norm_l( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band] );
                    a_fx = L_shl( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], a_e );
                    a_e = sub( add20gb, a_e );
                    b_e = norm_l( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band] );
                    b_fx = L_shl( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], b_e );
                    b_e = sub( add20gb, b_e );
#endif
                    FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 )
                    {
#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST
                        a_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &a_e );
                        b_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band], add( 20, gb ), 0, 0, &b_e );
                        c_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &c_e );
                        d_fx = BASOP_Util_Add_Mant32Exp( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], add( 20, gb ), 0, 0, &d_e );
#else

                        c_e = norm_l( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band] );
                        c_fx = L_shl( slot_frame_f_real_fx[ch_idx2][cur_cldfb_band], c_e );
                        c_e = sub( add20gb, c_e );
                        d_e = norm_l( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band] );
                        d_fx = L_shl( slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band], d_e );
                        d_e = sub( add20gb, d_e );
#endif

                        /* (a-ib)(c+id) = ac + bd + i(ad-bc) */
                        L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e );