Commit 56a9c797 authored by Anjaneyulu Sana's avatar Anjaneyulu Sana
Browse files

Optimization changes for Encoder Decoder

parent b4a8a48f
Loading
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1536,6 +1536,7 @@ enum
#define SHB_OVERLAP_LEN                     ( L_FRAME16k - L_SHB_LAHEAD ) / ( NUM_SHB_SUBFR - 1 )
#define QUANT_DIST_INIT                     ( 10000000000.0f )      /* Quantiser search distance initialisation */
#define HIBND_ACB_L_FAC                     5 / 2                   /* SHB Interpolation Factor */
#define HIBND_ACB_L_FAC_Q1                  ( 5 )                   /* SHB Interpolation Factor Q1 */
#define NUM_HILBERTS                        2
#define HILBERT_ORDER1                      5
#define HILBERT_ORDER2                      4
+21 −21
Original line number Diff line number Diff line
@@ -4978,15 +4978,15 @@ static void fft_len20_fx(
    cmplx tt[4];
    cmplx y[20];

    xx[0] = CL_shr( x[0], SCALEFACTOR20 ); // Qx
    xx[0] = x[0]; // CL_shr( x[0], SCALEFACTOR20 ); // Qx
    move64();
    xx[1] = CL_shr( x[16], SCALEFACTOR20 ); // Qx
    xx[1] = x[16]; // CL_shr( x[16], SCALEFACTOR20 ); // Qx
    move64();
    xx[2] = CL_shr( x[12], SCALEFACTOR20 ); // Qx
    xx[2] = x[12]; // CL_shr( x[12], SCALEFACTOR20 ); // Qx
    move64();
    xx[3] = CL_shr( x[8], SCALEFACTOR20 ); // Qx
    xx[3] = x[8]; // CL_shr( x[8], SCALEFACTOR20 ); // Qx
    move64();
    xx[4] = CL_shr( x[4], SCALEFACTOR20 ); // Qx
    xx[4] = x[4]; // CL_shr( x[4], SCALEFACTOR20 ); // Qx
    move64();

    s[0] = CL_add( xx[1], xx[4] );
@@ -5023,15 +5023,15 @@ static void fft_len20_fx(
    y[12] = CL_msu_j( s[2], s[3] );
    move64();

    xx[0] = CL_shr( x[5], SCALEFACTOR20 );
    xx[0] = x[5]; // CL_shr( x[5], SCALEFACTOR20 );
    move64();
    xx[1] = CL_shr( x[1], SCALEFACTOR20 );
    xx[1] = x[1]; // CL_shr( x[1], SCALEFACTOR20 );
    move64();
    xx[2] = CL_shr( x[17], SCALEFACTOR20 );
    xx[2] = x[17]; // CL_shr( x[17], SCALEFACTOR20 );
    move64();
    xx[3] = CL_shr( x[13], SCALEFACTOR20 );
    xx[3] = x[13]; // CL_shr( x[13], SCALEFACTOR20 );
    move64();
    xx[4] = CL_shr( x[9], SCALEFACTOR20 );
    xx[4] = x[9]; // CL_shr( x[9], SCALEFACTOR20 );
    move64();

    s[0] = CL_add( xx[1], xx[4] );
@@ -5068,15 +5068,15 @@ static void fft_len20_fx(
    y[13] = CL_msu_j( s[2], s[3] );
    move64();

    xx[0] = CL_shr( x[10], SCALEFACTOR20 );
    xx[0] = x[10];  // CL_shr( x[10], SCALEFACTOR20 );
    move64();
    xx[1] = CL_shr( x[6], SCALEFACTOR20 );
    xx[1] = x[6]; // CL_shr( x[6], SCALEFACTOR20 );
    move64();
    xx[2] = CL_shr( x[2], SCALEFACTOR20 );
    xx[2] = x[2]; // CL_shr( x[2], SCALEFACTOR20 );
    move64();
    xx[3] = CL_shr( x[18], SCALEFACTOR20 );
    xx[3] = x[18]; // CL_shr( x[18], SCALEFACTOR20 );
    move64();
    xx[4] = CL_shr( x[14], SCALEFACTOR20 );
    xx[4] = x[14]; // CL_shr( x[14], SCALEFACTOR20 );
    move64();

    s[0] = CL_add( xx[1], xx[4] );
@@ -5113,15 +5113,15 @@ static void fft_len20_fx(
    y[14] = CL_msu_j( s[2], s[3] );
    move64();

    xx[0] = CL_shr( x[15], SCALEFACTOR20 );
    xx[0] = x[15]; // CL_shr( x[15], SCALEFACTOR20 );
    move64();
    xx[1] = CL_shr( x[11], SCALEFACTOR20 );
    xx[1] = x[11]; // CL_shr( x[11], SCALEFACTOR20 );
    move64();
    xx[2] = CL_shr( x[7], SCALEFACTOR20 );
    xx[2] = x[7]; // CL_shr( x[7], SCALEFACTOR20 );
    move64();
    xx[3] = CL_shr( x[3], SCALEFACTOR20 );
    xx[3] = x[3]; // CL_shr( x[3], SCALEFACTOR20 );
    move64();
    xx[4] = CL_shr( x[19], SCALEFACTOR20 );
    xx[4] = x[19]; // CL_shr( x[19], SCALEFACTOR20 );
    move64();

    s[0] = CL_add( xx[1], xx[4] );
@@ -7173,7 +7173,7 @@ void rfft_fx(
                move32();
                x[( length - ( i << 1 ) )] = Mpy_32_16_1( L_add( t1, t3 ), 16384 /*0.5.Q15*/ );
                move32();
                x[( ( length - ( i << 1 ) ) + 1 )] = Mpy_32_16_1( L_negate( L_add( t2, t4 ) ), 16384 /*0.5.Q15*/ );
                x[( ( length - ( i << 1 ) ) + 1 )] = Mpy_32_16_1( ( L_add( t2, t4 ) ), -16384 /*0.5.Q15*/ );
                move32();
            }

+29 −86
Original line number Diff line number Diff line
@@ -469,8 +469,9 @@ void hp20_fx_32(
{
    Word16 i;
    Word32 a1_fx, a2_fx, b1_fx, b2_fx;
    Word16 Qx0, Qx1, Qx2, Qy1, Qprev_y1, Qy2, Qprev_y2, Qmin;
    Word64 x0_fx64, x1_fx64, x2_fx64, y0_fx64, y1_fx64, y2_fx64, R1, R2, R3, R4, R5;
    Word16 Qy1, Qy2, Qmin;
    Word64 y0_fx64, y1_fx64, y2_fx64;
    Word32 x0, x1, x2;

    IF( EQ_32( Fs, 8000 ) )
    {
@@ -521,20 +522,22 @@ void hp20_fx_32(
    move32();
    move32();

    Qprev_y1 = extract_l( mem_fx[4] );
    Qprev_y2 = extract_l( mem_fx[5] );
    y1_fx64 = W_deposit32_l( mem_fx[0] );
    y2_fx64 = W_deposit32_l( mem_fx[1] );
    x0_fx64 = W_deposit32_l( mem_fx[2] );
    x1_fx64 = W_deposit32_l( mem_fx[3] );
    y1_fx64 = W_add( W_deposit32_l( mem_fx[0] ), W_deposit32_h( mem_fx[1] ) );
    y2_fx64 = W_add( W_deposit32_l( mem_fx[2] ), W_deposit32_h( mem_fx[3] ) );

    x0 = mem_fx[4];
    move32();
    x1 = mem_fx[5];
    move32();

    FOR( i = 0; i < lg; i++ )
    {
        x2_fx64 = x1_fx64;
        move64();
        x1_fx64 = x0_fx64;
        move64();
        x0_fx64 = W_deposit32_l( signal_fx[i] );
        x2 = x1;
        move32();
        x1 = x0;
        move32();
        x0 = signal_fx[i];
        move32();

        Qy1 = W_norm( y1_fx64 );
        if ( y1_fx64 == 0 )
@@ -542,9 +545,6 @@ void hp20_fx_32(
            Qy1 = 62;
            move16();
        }
        Qy1 = sub( Qy1, 34 );
        R1 = W_mult0_32_32( W_extract_l( W_shl( y1_fx64, Qy1 ) ), a1_fx );
        Qy1 = add( Qy1, Qprev_y1 );

        Qy2 = W_norm( y2_fx64 );
        if ( y2_fx64 == 0 )
@@ -552,89 +552,32 @@ void hp20_fx_32(
            Qy2 = 62;
            move16();
        }
        Qy2 = sub( Qy2, 34 );
        R2 = W_mult0_32_32( W_extract_l( W_shl( y2_fx64, Qy2 ) ), a2_fx );
        Qy2 = add( Qy2, Qprev_y2 );

        Qx0 = W_norm( x0_fx64 );
        if ( x0_fx64 == 0 )
        {
            Qx0 = 62;
            move16();
        }
        Qx0 = sub( Qx0, 34 );
        R3 = W_mult0_32_32( W_extract_l( W_shl( x0_fx64, Qx0 ) ), b2_fx );

        Qx1 = W_norm( x1_fx64 );
        if ( x1_fx64 == 0 )
        {
            Qx1 = 62;
            move16();
        }
        Qx1 = sub( Qx1, 34 );
        R4 = W_mult0_32_32( W_extract_l( W_shl( x1_fx64, Qx1 ) ), b1_fx );

        Qx2 = W_norm( x2_fx64 );
        if ( x2_fx64 == 0 )
        {
            Qx2 = 62;
            move16();
        }
        Qx2 = sub( Qx2, 34 );
        R5 = W_mult0_32_32( W_extract_l( W_shl( x2_fx64, Qx2 ) ), b2_fx );

        Qmin = s_min( Qy1, Qy2 );

        y0_fx64 = W_add( W_shr( R1, sub( Qy1, Qmin ) ), W_shr( R2, sub( Qy2, Qmin ) ) );

        Qmin = s_min( Qmin, Qx0 );
        Qmin = s_min( Qmin, Qx1 );
        Qmin = s_min( Qmin, Qx2 );
        Qmin = sub( Qmin, 34 );

        y0_fx64 = W_add( W_shr( y0_fx64, sub( s_min( Qy1, Qy2 ), Qmin ) ), W_add( W_shr( R3, sub( Qx0, Qmin ) ), W_add( W_shr( R4, sub( Qx1, Qmin ) ), W_shr( R5, sub( Qx2, Qmin ) ) ) ) );
        y0_fx64 = W_mac_32_32( W_mult_32_32( W_shl_sat_l( y1_fx64, Qmin ), a1_fx ), W_shl_sat_l( y2_fx64, Qmin ), a2_fx ); // Qmin + Q29 + Q30 + 1

        y0_fx64 = W_shr( y0_fx64, 29 );

        signal_fx[i] = W_extract_l( W_shr( y0_fx64, Qmin ) );
        move32();
        IF( signal_fx[i] < 0 )
        {
            signal_fx[i] = L_add( signal_fx[i], 1 );
        Word64 temp = W_mac_32_32( W_mac_32_32( W_mult_32_32( x2, b2_fx ), x1, b1_fx ), x0, b2_fx ); // Q30
        Word64 y0_fx = W_shr( y0_fx64, add( Qmin, Q30 ) );                                           // Q30
        y0_fx64 = W_add( temp, y0_fx );                                                              // Q30
        signal_fx[i] = W_extract_l( W_shr( y0_fx64, Q30 ) );
        move32();
        }

        y2_fx64 = y1_fx64;
        y1_fx64 = y0_fx64;
        Qprev_y2 = Qprev_y1;
        Qprev_y1 = Qmin;
        move64();
        y1_fx64 = y0_fx64;
        move64();
        move16();
        move16();
    }

    Qy1 = W_norm( y1_fx64 );
    test();
    IF( y1_fx64 != 0 && LT_16( Qy1, 32 ) )
    {
        y1_fx64 = W_shr( y1_fx64, sub( 32, Qy1 ) );
        Qprev_y1 = sub( Qprev_y1, sub( 32, Qy1 ) );
    }

    Qy2 = W_norm( y2_fx64 );
    test();
    IF( y2_fx64 != 0 && LT_16( Qy2, 32 ) )
    {
        y2_fx64 = W_shr( y2_fx64, sub( 32, Qy2 ) );
        Qprev_y2 = sub( Qprev_y2, sub( 32, Qy2 ) );
    }

    mem_fx[0] = W_extract_l( y1_fx64 );
    mem_fx[1] = W_extract_l( y2_fx64 );
    mem_fx[2] = W_extract_l( x0_fx64 );
    mem_fx[3] = W_extract_l( x1_fx64 );
    mem_fx[4] = Qprev_y1;
    mem_fx[5] = Qprev_y2;
    mem_fx[1] = W_extract_h( y1_fx64 );
    mem_fx[2] = W_extract_l( y2_fx64 );
    mem_fx[3] = W_extract_h( y2_fx64 );
    mem_fx[4] = x0;
    mem_fx[5] = x1;

    move32();
    move32();
    move32();
+1 −0
Original line number Diff line number Diff line
@@ -156,6 +156,7 @@
#define FIX_ISSUE_1214                          /* Ittiam: Fix for issue 1214: Energy leakage in IGF tiles for MDCT-stereo @64kbps SWB*/
#define FIX_881_HILBERT_FILTER                  /* VA: improve the precision of the Hilbert filter to remove 2kHz unwanted tone */
#define FIX_ISSUE_1245                          /* Ittiam: Fix for issue 1245: Basop Encoder: Audible noise for silent Stereo input DTX on @24.4 kbps, @32 kbps*/
#define FIX_ISSUE_1291                          /* Ittiam: Wrong use of imult1616() in ACELP rescaling */
#define FIX_920_IGF_INIT_ERROR                  /* FhG: issue 920: fix bitrate mismatch in initial IGF config to avoid error message in same cases */
#define FIX_MINOR_SVD_WMOPS_MR1010X             /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */
#define SVD_WMOPS_OPT                           /* Ittiam : SVD related optimizations */
+5 −1
Original line number Diff line number Diff line
@@ -748,7 +748,11 @@ ivas_error acelp_core_dec_ivas_fx(
            IF( st->hMusicPF && st->hGSCDec )
            {
                Rescale_exc( st->hMusicPF->dct_post_old_exc_fx, exc_fx, bwe_exc_fx, st->hGSCDec->last_exc_dct_in_fx, st->L_frame,
#ifdef FIX_ISSUE_1291
                             shr( imult1616( st->L_frame, HIBND_ACB_L_FAC_Q1 ), 1 ), 0, &( st->Q_exc ), st->Q_subfr, NULL, 0, INACTIVE );
#else
                             imult1616( st->L_frame, HIBND_ACB_L_FAC ), 0, &( st->Q_exc ), st->Q_subfr, NULL, 0, INACTIVE );
#endif
            }
            IF( st->hPFstat != NULL )
            {
@@ -799,7 +803,7 @@ ivas_error acelp_core_dec_ivas_fx(
            Copy( syn1_fx + st->L_frame - L_SYN_MEM_CLAS_ESTIM, st->mem_syn_clas_estim_fx, L_SYN_MEM_CLAS_ESTIM );

            /* save and delay synthesis to be used by SWB BWE */
            Copy_Scale_sig( syn1_fx, temp_buf_fx, st->L_frame, sub( -1, st->Q_syn ) ); // Q_syn
            Copy_Scale_sig( syn1_fx, temp_buf_fx, st->L_frame, sub( -1, st->Q_syn ) ); // Q_syn -> Q(-1)
            IF( st->hBWE_FD != NULL )
            {
#ifdef FIX_ISSUE_1290
Loading