Commit 82971b4d authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch '1462-improve-wmops-performance-of-ivas_param_mc_param_est_enc_fx-step-1' into 'main'

Resolve "Improve WMOPS Performance of  ivas_param_mc_param_est_enc_fx()  step 1"

Closes #1462

See merge request !1378
parents 0b3eb12b e586b634
Loading
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -78,10 +78,10 @@
#define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot   /*FhG: WMOPS tuning, nonbe*/
/* Both following 2 macros (IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST*) are independent from each other, they refer to different code blocks */
#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE    /* FhG: reduces WMOPS of param_mc_prm_est, bit-exact to previous version */
#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version */
//#define IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE /* FhG: reduces WMOPS of param_mc_prm_est, not bit-exact to previous version. Obsoleted by MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE. */
#define HARM_PUSH_BIT
#define HARM_ENC_INIT
//#define HARM_SCE_INIT
#define DIV32_OPT_NEWTON                               /* FhG: faster 32 by 32 bit division */ 

#define	MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of Cy calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Obsoletes IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE. */
#endif
+139 −10
Original line number Diff line number Diff line
@@ -59,7 +59,7 @@ static void ivas_param_mc_range_encoder_fx( const Word16 *seq_in, const Word16 n


#define ATTACKTHRESHOLD_E 4
static void ivas_param_mc_quantize_ilds_fx( PARAM_MC_ENC_HANDLE hParamMC, Word32 Cy_fx[MAX_CICP_CHANNELS][MAX_CICP_CHANNELS], Word16 Cy_e[MAX_CICP_CHANNELS][MAX_CICP_CHANNELS], Word32 Cx[PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS], Word16 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS], const Word16 freq_idx, const Word16 nchan_input, const Word16 nchan_transport, Word16 *ILD_idx_out, Word16 ILD_q[PARAM_MC_SZ_ILD_MAP] );
static void ivas_param_mc_quantize_ilds_fx( PARAM_MC_ENC_HANDLE hParamMC, Word32 Cy_fx[MAX_CICP_CHANNELS][MAX_CICP_CHANNELS], Word16 Cy_e[MAX_CICP_CHANNELS][MAX_CICP_CHANNELS], Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS], Word16 Cx_e[PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS], const Word16 freq_idx, const Word16 nchan_input, const Word16 nchan_transport, Word16 *ILD_idx_out, Word16 ILD_q[PARAM_MC_SZ_ILD_MAP] );

static void ivas_param_mc_parameter_quantizer_fx( const Word32 *x, const Word16 *x_e, const Word16 L, const Word16 sz_quantizer, const Word16 *quantizer_fx, const Word16 Q_quant, Word16 *quant_idx, Word16 *y );

@@ -655,8 +655,13 @@ static void ivas_param_mc_param_est_enc_fx(
    Word16 dmx_imag_e[PARAM_MC_MAX_TRANSPORT_CHANS];  /* Downmix channel - Imag Part */
    Word32 a_fx, b_fx, c_fx, d_fx;                    /* Tmp complex values */
    Word16 a_e, b_e, c_e, d_e;                        /* Tmp complex values */
#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE
    Word64 Cy_sum_real_64[PARAM_MC_MAX_PARAMETER_BANDS][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS];
    Word64 Cy_sum_imag_64[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS];
#else
    Word32 Cy_sum_imag_fx[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS];
    Word16 Cy_sum_imag_e[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][MAX_CICP_CHANNELS][MAX_CICP_CHANNELS];
#endif
    Word32 Cx_sum_imag_fx[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS];
    Word16 Cx_sum_imag_e[PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC][PARAM_MC_MAX_TRANSPORT_CHANS][PARAM_MC_MAX_TRANSPORT_CHANS];
    Word32 real_part_fx, imag_part_fx;
@@ -685,13 +690,26 @@ static void ivas_param_mc_param_est_enc_fx(
    move16();
    band_step = 1;
    move16();
#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE
    FOR( cur_param_band = 0; cur_param_band < PARAM_MC_MAX_PARAMETER_BANDS; cur_param_band++ )
    {
        FOR( ch_idx1 = 0; ch_idx1 < MAX_CICP_CHANNELS; ch_idx1++ )
        {
            set64_fx( Cy_sum_real_64[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS );
        }
    }
#endif

    FOR( cur_param_band = 0; cur_param_band < PARAM_MC_MAX_PARAM_BAND_ABS_COV_ENC; cur_param_band++ )
    {
        FOR( ch_idx1 = 0; ch_idx1 < MAX_CICP_CHANNELS; ch_idx1++ )
        {
            set32_fx( Cy_sum_imag_fx[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS );
            set16_fx( Cy_sum_imag_e[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS );
#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE
            set64_fx( Cy_sum_imag_64[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS );
#else
            set32_fx( Cy_sum_fx[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS );
            set16_fx( Cy_sum_e[cur_param_band][ch_idx1], 0, MAX_CICP_CHANNELS );
#endif
        }

        FOR( ch_idx1 = 0; ch_idx1 < PARAM_MC_MAX_TRANSPORT_CHANS; ch_idx1++ )
@@ -832,7 +850,33 @@ static void ivas_param_mc_param_est_enc_fx(
                        move32();
                    }
                }
#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE
                FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 )
                {
                    a_fx = slot_frame_f_real_fx[ch_idx1][cur_cldfb_band];
                    b_fx = slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band];
                    move32();
                    FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 )
                    {
                        Word16 norm;
                        c_fx = slot_frame_f_real_fx[ch_idx2][cur_cldfb_band];
                        d_fx = slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band];
                        move32();
                        // Conjugated complex multiplication (a-ib)(c+id) = ac+bd + i(ad-bc)
                        Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2],
                                                                                  W_add( W_mult0_32_32( a_fx, c_fx ), W_mult0_32_32( b_fx, d_fx ) ) );
                        move64();
                        Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2],
                                                                                  W_sub( W_mult0_32_32( a_fx, d_fx ), W_mult0_32_32( b_fx, c_fx ) ) );
                        move64();

                        // convert the 64 bit fixpoint back into the 48 bit float format
                        norm = W_norm( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] );
                        Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], norm ) );
                        Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] = sub( sub( 62, gb ), norm );
                    }
                }
#else
                /* Cy for input channels */
                FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 )
                {
@@ -890,6 +934,7 @@ static void ivas_param_mc_param_est_enc_fx(
                        move32();
                    }
                }
#endif
            }
        }

@@ -1023,6 +1068,28 @@ static void ivas_param_mc_param_est_enc_fx(
                        move16();
                    }
#endif
#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE
                    a_fx = slot_frame_f_real_fx[ch_idx1][cur_cldfb_band];
                    b_fx = slot_frame_f_imag_fx[ch_idx1][cur_cldfb_band];
                    move32();
                    move32();
                    FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 )
                    {
                        Word16 norm;
                        c_fx = slot_frame_f_real_fx[ch_idx2][cur_cldfb_band];
                        d_fx = slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band];
                        move32();
                        move32();
                        // Conjugated complex multiplication (a-ib)(c+id) = ac+bd + i(ad-bc)
                        Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2],
                                                                                  W_add( W_mult0_32_32( a_fx, c_fx ), W_mult0_32_32( b_fx, d_fx ) ) );
                        move64();
                        // convert the 64 bit fixpoint back into the 48 bit float format
                        norm = W_norm( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] );
                        Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], norm ) );
                        Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] = sub( sub( 62, gb ), norm );
                    }
#else
                    FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 )
                    {
#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE
@@ -1054,6 +1121,7 @@ static void ivas_param_mc_param_est_enc_fx(
                                                                                                &Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] );
                        move32();
                    }
#endif
                }
            }
        }
@@ -1075,6 +1143,16 @@ static void ivas_param_mc_param_est_enc_fx(
                move32();
                Cy_sum_e[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0;
                move16();
#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE
                Cy_sum_real_64[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
                move64();
                Cy_sum_real_64[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0;
                move64();
                Cy_sum_imag_64[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
                move64();
                Cy_sum_imag_64[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0;
                move64();
#else
                Cy_sum_imag_fx[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
                move32();
                Cy_sum_imag_e[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
@@ -1083,6 +1161,7 @@ static void ivas_param_mc_param_est_enc_fx(
                move32();
                Cy_sum_imag_e[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0;
                move16();
#endif
            }
        }

@@ -1090,6 +1169,12 @@ static void ivas_param_mc_param_est_enc_fx(
        {
            FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 )
            {
#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE
                Cy_sum_real_64[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
                move64();
                Cy_sum_real_64[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0;
                move64();
#endif
                Cy_sum_fx[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
                move32();
                Cy_sum_e[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
@@ -1134,10 +1219,18 @@ static void ivas_param_mc_param_est_enc_fx(
                /* get ICLDs */
                FOR( k = 0; k < nchan_input; ++k )
                {
#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE
                    Word16 norm;
                    // convert the 64 bit fixpoint back into the 48 bit float format
                    norm = W_norm( Cy_sum_real_64[cur_param_band][k][k] );
                    Nrg_fx[k] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][k][k], norm ) );
                    Nrg_e[k] = sub( sub( 62, gb ), norm );
#else
                    Nrg_fx[k] = Cy_sum_fx[cur_param_band][k][k];
                    move32();
                    Nrg_e[k] = Cy_sum_e[cur_param_band][k][k];
                    move16();
#endif
                }
                FOR( k = 0; k < num_ilds_to_code; ++k )
                {
@@ -1204,6 +1297,13 @@ static void ivas_param_mc_param_est_enc_fx(
            {
                FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 )
                {
#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE
                    Cy_sum_real_64[cur_param_band - 1][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] );
                    move64();
                    Cy_sum_imag_64[cur_param_band - 1][ch_idx1][ch_idx2] = W_add( Cy_sum_imag_64[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] );
                    move64();

#else
                    Cy_sum_fx[cur_param_band - 1][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cy_sum_fx[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2],
                                                                                                Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2],
                                                                                                &Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2] );
@@ -1212,6 +1312,7 @@ static void ivas_param_mc_param_est_enc_fx(
                                                                                                     Cy_sum_imag_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_imag_e[cur_param_band][ch_idx1][ch_idx2],
                                                                                                     &Cy_sum_imag_e[cur_param_band - 1][ch_idx1][ch_idx2] );
                    move32();
#endif
                }
            }
        }
@@ -1235,10 +1336,14 @@ static void ivas_param_mc_param_est_enc_fx(
                {
                    FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 )
                    {
#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE
                        Cy_sum_real_64[cur_param_band - 1][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] );
#else
                        Cy_sum_fx[cur_param_band - 1][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cy_sum_fx[cur_param_band - 1][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2],
                                                                                                    Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cy_sum_e[cur_param_band][ch_idx1][ch_idx2],
                                                                                                    &Cy_sum_e[cur_param_band - 1][ch_idx1][ch_idx2] );
                        move32();
#endif
                    }
                }
            }
@@ -1247,7 +1352,24 @@ static void ivas_param_mc_param_est_enc_fx(
        band_step = 2;
        move16();
    }

#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE
    {
        // convert the 64 bit fixpoint back into the 48 bit float format
        FOR( cur_param_band = 0; cur_param_band < PARAM_MC_MAX_PARAMETER_BANDS; cur_param_band++ )
        {
            FOR( ch_idx1 = 0; ch_idx1 < MAX_CICP_CHANNELS; ch_idx1++ )
            {
                FOR( ch_idx2 = 0; ch_idx2 < MAX_CICP_CHANNELS; ch_idx2++ )
                {
                    Word16 norm;
                    norm = W_norm( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] );
                    Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], norm ) );
                    Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] = sub( sub( 62, gb ), norm );
                }
            }
        }
    }
#endif

    /* map complex covariances to real values */
    FOR( cur_param_band = 0; cur_param_band < hParamMC->max_param_band_abs_cov; cur_param_band += band_step )
@@ -1284,15 +1406,22 @@ static void ivas_param_mc_param_est_enc_fx(
        {
            FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ch_idx2++ )
            {
                real_part_fx = Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2];
                move32();
                real_part_e = Cy_sum_e[cur_param_band][ch_idx1][ch_idx2];
                move16();
#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE
                Word16 norm;
                // convert the 64 bit fixpoint back into the 48 bit float format
                norm = W_norm( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2] );
                imag_part_fx = W_extract_h( W_shl( Cy_sum_imag_64[cur_param_band][ch_idx1][ch_idx2], norm ) );
                imag_part_e = sub( sub( 62, gb ), norm );
#else
                imag_part_fx = Cy_sum_imag_fx[cur_param_band][ch_idx1][ch_idx2];
                move32();
                imag_part_e = Cy_sum_imag_e[cur_param_band][ch_idx1][ch_idx2];
                move16();

#endif
                real_part_fx = Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2];
                move32();
                real_part_e = Cy_sum_e[cur_param_band][ch_idx1][ch_idx2];
                move16();
                real_part_fx = Mpy_32_32( real_part_fx, real_part_fx );
                imag_part_fx = Mpy_32_32( imag_part_fx, imag_part_fx );