Commit 8e892511 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch 'ivas_mcmasa_dmx_fx_nbe_opt' into 'main'

Optimization changes for ivas_mcmasa_dmx_fx function - non bit exact [allow regression]

See merge request !1866
parents a878b04b be8a2c4b
Loading
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -80,6 +80,7 @@

/* Note: each compile switch (FIX_1101_...) is independent from the other ones */
#define OPT_MCH_DEC_V1_NBE
#define OPT_MCT_ENC_48KB_NBE
#define OPT_MCH_DEC_V1_BE
#define OPT_MCT_ENC_V2_NBE
#define OPT_SBA_DEC_V2_NBE
+61 −3
Original line number Diff line number Diff line
@@ -1942,9 +1942,17 @@ static void ivas_mcmasa_dmx_fx(
    Word32 alpha_fx, L_tmp, L_tmp1;
    Word16 multiChEne_e, scale, downmixEne_e = 0, prevEQ_e, tmp, currEQ_e, instEQ_e;
    move16();
#ifdef OPT_MCT_ENC_48KB_NBE
    Word16 max_exp, tmp_exp, separateChannelFlag;
    Word64 tmp_64;
    Word64 multiChEne_64_fx = 0;
    Word64 downmixEne_64_fx = 0;
    move64();
    move64();
#endif

    numAnalysisChannels = sub( nchan_inp, 1 );
    IF( hMcMasa->separateChannelEnabled )
    if ( hMcMasa->separateChannelEnabled )
    {
        numAnalysisChannels = sub( nchan_inp, 2 );
    }
@@ -1957,22 +1965,45 @@ static void ivas_mcmasa_dmx_fx(
    {
        FOR( i = 0; i < input_frame; i++ )
        {
#ifdef OPT_MCT_ENC_48KB_NBE
            multiChEne_64_fx = W_mac_32_32( multiChEne_64_fx, data_fx[j][i], data_fx[j][i] ); // exp: 2*data_e
#else
            L_tmp1 = BASOP_Util_Add_Mant32Exp( data_fx[j][i], data_e, 0, 0, &scale );
            L_tmp = Mpy_32_32( L_tmp1, L_tmp1 ); // data_e + data_e
            multiChEne_fx = BASOP_Util_Add_Mant32Exp( L_tmp, scale + scale, multiChEne_fx, multiChEne_e, &scale );
            multiChEne_e = scale;
            move16();
#endif
        }
    }

#ifdef OPT_MCT_ENC_48KB_NBE
    tmp = shl( data_e, 1 );
#endif
    IF( EQ_16( nchan_transport, 2 ) )
    {
        Word16 numSideChannels; /* Channels other than left, right, center */
        Word16 leftIndex, rightIndex;
#ifdef OPT_MCT_ENC_48KB_NBE
        Word16 tmp_16;

        separateChannelFlag = 1;
        move16();
        if ( hMcMasa->separateChannelEnabled )
        {
            separateChannelFlag = 0;
            move16();
        }
#endif

        numSideChannels = sub( shr( numAnalysisChannels, 1 ), 1 );
        FOR( j = 0; j < numSideChannels; j++ )
        {
#ifdef OPT_MCT_ENC_48KB_NBE
            tmp_16 = add( shl( j, 1 ), 2 );

            leftIndex = add( tmp_16, separateChannelFlag );
            rightIndex = add( add( tmp_16, 1 ), separateChannelFlag );
#else
            IF( hMcMasa->separateChannelEnabled )
            {
                leftIndex = add( shl( j, 1 ), 2 );
@@ -1983,7 +2014,7 @@ static void ivas_mcmasa_dmx_fx(
                leftIndex = add( shl( j, 1 ), 3 );
                rightIndex = add( shl( j, 1 ), 4 );
            }

#endif
            FOR( i = 0; i < input_frame; i++ )
            {
                data_fx[0][i] = L_add( data_fx[0][i], data_fx[leftIndex][i] ); // data_e
@@ -2023,15 +2054,28 @@ static void ivas_mcmasa_dmx_fx(
    {
        FOR( i = 0; i < input_frame; i++ )
        {
#ifdef OPT_MCT_ENC_48KB_NBE
            downmixEne_64_fx = W_mac_32_32( downmixEne_64_fx, data_fx[j][i], data_fx[j][i] ); // exp: 2*data_e
#else
            L_tmp1 = BASOP_Util_Add_Mant32Exp( data_fx[j][i], data_e, 0, 0, &scale );
            L_tmp = Mpy_32_32( L_tmp1, L_tmp1 ); // data_e + data_e
            downmixEne_fx = BASOP_Util_Add_Mant32Exp( L_tmp, scale + scale, downmixEne_fx, downmixEne_e, &downmixEne_e );
#endif
        }
    }

    alpha_fx = 214748364; // 0.1 in Q31
    move32();

#ifdef OPT_MCT_ENC_48KB_NBE
    scale = W_norm( multiChEne_64_fx );
    multiChEne_fx = W_extract_h( W_shl( multiChEne_64_fx, scale ) );
    multiChEne_e = sub( tmp, scale );

    scale = W_norm( downmixEne_64_fx );
    downmixEne_fx = W_extract_h( W_shl( downmixEne_64_fx, scale ) );
    downmixEne_e = sub( tmp, scale );
#endif
    L_tmp = Mpy_32_32( alpha_fx, multiChEne_fx );
    L_tmp1 = Mpy_32_32( 1932735284 /* 0.9f in Q31 */, hMcMasa->prevMultiChEne_fx );
    hMcMasa->prevMultiChEne_fx = BASOP_Util_Add_Mant32Exp( L_tmp, multiChEne_e, L_tmp1, hMcMasa->prevMultiChEne_e, &hMcMasa->prevMultiChEne_e );
@@ -2056,12 +2100,26 @@ static void ivas_mcmasa_dmx_fx(
    hMcMasa->prevEQ_e = currEQ_e;
    move16();

#ifdef OPT_MCT_ENC_48KB_NBE
    max_exp = s_max( prevEQ_e, currEQ_e );
    prevEQ_fx = L_shl( prevEQ_fx, sub( prevEQ_e, max_exp ) ); // exp:max_exp
    currEQ_fx = L_shl( currEQ_fx, sub( currEQ_e, max_exp ) ); // exp:max_exp
    tmp_exp = add( max_exp, 16 );
#endif

    FOR( i = 0; i < input_frame; i++ )
    {
#ifdef OPT_MCT_ENC_48KB_NBE
        tmp_64 = W_mac_32_32( W_mult_32_16( currEQ_fx, hMcMasa->interpolator_fx[i] ), prevEQ_fx, L_sub( ONE_IN_Q15, hMcMasa->interpolator_fx[i] ) ); // exp:max_exp +16
        scale = W_norm( tmp_64 );
        instEQ_fx = W_extract_h( W_shl( tmp_64, scale ) );
        instEQ_e = sub( tmp_exp, scale );
#else
        L_tmp = Mpy_32_32( L_deposit_h( hMcMasa->interpolator_fx[i] ), currEQ_fx );
        L_tmp1 = L_sub( 1073741824 /* 1 in Q30 */, L_lshr( L_deposit_h( hMcMasa->interpolator_fx[i] ), 1 ) );
        L_tmp1 = Mpy_32_32( L_tmp1, prevEQ_fx );
        instEQ_fx = BASOP_Util_Add_Mant32Exp( L_tmp, currEQ_e, L_tmp1, add( prevEQ_e, 1 ), &instEQ_e );
#endif

        FOR( j = 0; j < nchan_transport; j++ )
        {