Commit 41f49e06 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Merge branch '1439-complexity-overhead-stereo-to-stereo-32kbps-fb-float-vs-basop-BE' into 'main'

Resolve "Complexity Overhead Stereo to Stereo - 32kbps FB FLOAT vs BASOP" - Bitexact Changes

Closes #1439

See merge request !1375
parents 6bedfe13 f07a6f96
Loading
Loading
Loading
Loading
Loading
+7 −2
Original line number Diff line number Diff line
@@ -86,10 +86,15 @@
//#define HARM_SCE_INIT
#define DIV32_OPT_NEWTON                               /* FhG: faster 32 by 32 bit division */ 
#define	MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of Cy calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Obsoletes IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE. */
#define FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat            /*FhG: reduces WMOPS - bit-exact*/
#define FIX_1439_SPEEDUP_stereo_icBWE_dec_fx                    /*FhG: reduces WMOPS - bit-exact*/
#define FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx                    /*FhG: reduces WMOPS - bit-exact*/
#define FIX_1439_SPEEDUP_synthesise_fb_high_band_fx             /*FhG: reduces WMOPS - bit-exact*/
#define FIX_1481_HARDCODE_DIV                          /* FhG: hardcode division results in stereo_dmx_evs_init_encoder_fx() */

#define TEST_HR


#define REMOVE_EVS_DUPLICATES                   /* remove core-coder duplicated functions, ACELP low-band decoder */

#endif
+45 −1
Original line number Diff line number Diff line
@@ -6714,7 +6714,6 @@ void elliptic_bpf_48k_generic_fx(
        memory_fx[0][i] = shl_sat( memory_fx0[0][i], sub( *Q_input_fx, memory_fx_Q[0] ) );
        memory2_fx[1][i] = L_shl_sat( memory_fx2[1][i], sub( add( *Q_input_fx, 11 ), memory_fx_Q[1] ) );
        memory2_fx[2][i] = L_shl_sat( memory_fx2[2][i], sub( add( *Q_input_fx, 6 ), memory_fx_Q[2] ) );
        memory2_fx[3][i] = L_shl_sat( memory_fx2[3][i], sub( add( *Q_input_fx, 1 ), memory_fx_Q[3] ) );
        move32();
        move32();
        move32();
@@ -7074,6 +7073,50 @@ void synthesise_fb_high_band_fx(
    tmp3 = add( sub( Qout, add( sub( 1, exp ), exp_tmp ) ), 16 ); /*Qout - (1 -exp +exp_tmp) + 16 */
    FOR( i = 0; i < L_FRAME48k; i++ )
    {
#ifdef FIX_1439_SPEEDUP_synthesise_fb_high_band_fx
        L_tmp = Mult_32_16( ratio2, tmp[i] ); /* Q(16-exp+exp_tmp-15 = 1-exp+exp_tmp) */
        Word32 L_tmp32;
        Word16 tmp16;

        // if (L_tmp < 0)
        if ( L_tmp < 0 )
        {
            L_tmp32 = L_negate( L_tmp );
        }
        if ( L_tmp < 0 )
        {
            L_tmp32 = L_shl_sat( L_tmp32, tmp3 );
        }
        if ( L_tmp < 0 )
        {
            tmp16 = extract_h( L_tmp32 );
        }
        if ( L_tmp < 0 )
        {
            tmp16 = negate( tmp16 );
        }

        // if (L_tmp == 0)
        if ( L_tmp == 0 )
        {
            tmp16 = 0;
            move16();
        }

        // if (L_tmp > 0)
        if ( L_tmp > 0 )
        {
            L_tmp32 = L_shl_sat( L_tmp, tmp3 );
        }
        if ( L_tmp > 0 )
        {
            tmp16 = extract_h( L_tmp32 );
        }

        output[i] = tmp16;
        move16();

#else
        L_tmp = Mult_32_16( ratio2, tmp[i] ); /* Q(16-exp+exp_tmp-15 = 1-exp+exp_tmp) */
        IF( L_tmp < 0 )
        {
@@ -7085,6 +7128,7 @@ void synthesise_fb_high_band_fx(
            output[i] = extract_h( L_shl_sat( L_tmp, tmp3 ) ); /*Qout*/
            move16();
        }
#endif
    }
    return;
}
+25 −1
Original line number Diff line number Diff line
@@ -794,14 +794,38 @@ void Copy_Scale_sig_16_32_no_sat(
        }
        return;
    }
#ifdef FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat
    L_tmp = L_shl_o( 1, exp0 - 1, &Overflow );

    IF( L_tmp >= 0x7FFF )
    {
        FOR( i = 0; i < lg; i++ )
        {
            // y[i] = L_mult0(x[i], L_tmp);
            y[i] = W_extract_l( W_mult_32_16( L_tmp, x[i] ) );
        move32(); /* saturation can occur here */
            move32(); /* Overflow can occur here */
        }
        return;
    }
    // ELSE
    {
        Word16 tmp = extract_l( L_tmp );
        FOR( i = 0; i < lg; i++ )
        {
            y[i] = L_mult( x[i], tmp );
            move32();
        }
    }
#else
    L_tmp = L_shl_o( 1, exp0 - 1, &Overflow );
    FOR( i = 0; i < lg; i++ )
    {
        // y[i] = L_mult0(x[i], L_tmp);
        y[i] = W_extract_l( W_mult_32_16( L_tmp, x[i] ) );
        move32(); /* Overflow can occur here */
    }
#endif
}

void Copy_Scale_sig_32_16(
    const Word32 x[], /* i  : signal to scale input           Qx        */
+10 −0
Original line number Diff line number Diff line
@@ -904,6 +904,9 @@ void stereo_icBWE_dec_fx(
    winSlope_fx = div_s( 1, winLen_fx );                       /* Q15 */
    alpha_fx = winSlope_fx;                                    /* Q15 */
    move16();
#ifdef FIX_1439_SPEEDUP_stereo_icBWE_dec_fx
    Word16 winSlope_fx_ = sub( 32767 /* 1.0 in Q15*/, winSlope_fx );
#endif
    FOR( i = 0; i < winLen_fx; i++ )
    {
        L_tmp = L_mult0( alpha_fx, icbweM2Ref_fx );                                                        /* Q29 */
@@ -911,10 +914,17 @@ void stereo_icBWE_dec_fx(
        tmp = shl( round_fx( L_tmp ), 1 );                                                                 /* Q14 */
        synthRef_fx[i] = Mpy_32_16_1( synthRef_fx[i], tmp );                                               /* Qsyn - 1 */
        move32();
#ifdef FIX_1439_SPEEDUP_stereo_icBWE_dec_fx
        if ( LE_16( alpha_fx, winSlope_fx_ ) )
        {
            alpha_fx = add( alpha_fx, winSlope_fx ); /* Q15 */
        }
#else
        IF( LE_16( alpha_fx, sub( 32767 /* 1.0 in Q15*/, winSlope_fx ) ) )
        {
            alpha_fx = add( alpha_fx, winSlope_fx ); /* Q15 */
        }
#endif
    }

    FOR( ; i < NS2SA_FX2( st->output_Fs, FRAME_SIZE_NS ); i++ )
+10 −0
Original line number Diff line number Diff line
@@ -7022,14 +7022,24 @@ void ivas_swb_tbe_dec_fx(

            tmp1 = 0;
            move16();

#ifdef FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx
            Word32 idx32 = L_shr_r( 0x00333333, 10 ); /*NUM_SHB_SUBFR/L_FRAME16k*/ // Q16
#endif

            FOR( i = 0; i < L_FRAME16k; i++ )
            {
#ifndef FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx
                Word16 idx = 0;
                move16();
                IF( i != 0 )
                {
                    idx = idiv1616( i_mult( NUM_SHB_SUBFR, i ), L_FRAME16k );
                }
#else
                Word16 idx;
                idx = extract_h( imult3216( idx32, i ) ); /*Q0*/
#endif
                L_tmp1 = Mult_32_16( L_tmp, GainShape_fx[idx] );                           /* Q : 18 + tmp +15 -15*/
                White_exc16k_fx[i] = round_fx( Mult_32_16( L_tmp1, White_exc16k_fx[i] ) ); /* 18 + tmp +*Q_white_exc -15 -16 */
                move16();