Commit 4e6820a1 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

LTV testing for optimizations - 13032025

parent 31b60873
Loading
Loading
Loading
Loading
Loading
+437 −14
Original line number Diff line number Diff line
@@ -400,8 +400,8 @@ void cldfbAnalysis_ts_fx(
            rr12_fx = L_sub( r1_fx, r2_fx );             // q -1
            ri12_fx = L_negate( L_add( i1_fx, i2_fx ) ); // q - 1
            /*cplxMult(&rBuffer[2*k],&rBuffer[2*k+1],rr12,ri12,rot_vctr_re[k],rot_vctr_im[k]);*/
            rBuffer_fx[2 * k] = L_sub( Mpy_32_32( rr12_fx, rot_vctr_re_fx[k] ), Mpy_32_32( ri12_fx, rot_vctr_im_fx[k] ) );     // q - 3
            rBuffer_fx[2 * k + 1] = L_add( Mpy_32_32( rr12_fx, rot_vctr_im_fx[k] ), Mpy_32_32( ri12_fx, rot_vctr_re_fx[k] ) ); // q - 3
            rBuffer_fx[2 * k] = Msub_32_32( Mpy_32_32( rr12_fx, rot_vctr_re_fx[k] ), ri12_fx, rot_vctr_im_fx[k] );     // q - 3
            rBuffer_fx[2 * k + 1] = Madd_32_32( Mpy_32_32( rr12_fx, rot_vctr_im_fx[k] ), ri12_fx, rot_vctr_re_fx[k] ); // q - 3
            move32();
            move32();

@@ -409,8 +409,8 @@ void cldfbAnalysis_ts_fx(
            ir12_fx = L_add( r1_fx, r2_fx ); // q - 1
            ii12_fx = L_sub( i1_fx, i2_fx ); // q - 1
            /*cplxMult(&iBuffer[2*k],&iBuffer[2*k+1],ir12,ii12,rot_vctr_re[k],rot_vctr_im[k]);*/
            iBuffer_fx[2 * k] = L_sub( Mpy_32_32( ir12_fx, rot_vctr_re_fx[k] ), Mpy_32_32( ii12_fx, rot_vctr_im_fx[k] ) );     // q - 3
            iBuffer_fx[2 * k + 1] = L_add( Mpy_32_32( ir12_fx, rot_vctr_im_fx[k] ), Mpy_32_32( ii12_fx, rot_vctr_re_fx[k] ) ); // q - 3
            iBuffer_fx[2 * k] = Msub_32_32( Mpy_32_32( ir12_fx, rot_vctr_re_fx[k] ), ii12_fx, rot_vctr_im_fx[k] );     // q - 3
            iBuffer_fx[2 * k + 1] = Madd_32_32( Mpy_32_32( ir12_fx, rot_vctr_im_fx[k] ), ii12_fx, rot_vctr_re_fx[k] ); // q - 3
            move32();
            move32();
        }
@@ -451,8 +451,8 @@ void cldfbAnalysis_ts_fx(
            rr12_fx = L_add( r1_fx, r2_fx ); // q - 1
            ri12_fx = L_sub( i1_fx, i2_fx ); // q - 1
            /*cplxMult(&rBuffer[2*k],&rBuffer[2*k+1],rr12,ri12,rot_vctr_re[k],rot_vctr_im[k]);*/
            rBuffer_fx[2 * k] = L_sub( Mpy_32_32( rr12_fx, rot_vctr_re_fx[k] ), Mpy_32_32( ri12_fx, rot_vctr_im_fx[k] ) );     // q - 3
            rBuffer_fx[2 * k + 1] = L_add( Mpy_32_32( rr12_fx, rot_vctr_im_fx[k] ), Mpy_32_32( ri12_fx, rot_vctr_re_fx[k] ) ); // q - 3
            rBuffer_fx[2 * k] = Msub_32_32( Mpy_32_32( rr12_fx, rot_vctr_re_fx[k] ), ri12_fx, rot_vctr_im_fx[k] );     // q - 3
            rBuffer_fx[2 * k + 1] = Madd_32_32( Mpy_32_32( rr12_fx, rot_vctr_im_fx[k] ), ri12_fx, rot_vctr_re_fx[k] ); // q - 3
            move32();
            move32();

@@ -460,8 +460,8 @@ void cldfbAnalysis_ts_fx(
            ir12_fx = L_sub( r1_fx, r2_fx ); // q - 1
            ii12_fx = L_add( i1_fx, i2_fx ); // q - 1
            /*cplxMult(&iBuffer[2*k],&iBuffer[2*k+1],ir12,ii12,rot_vctr_re[k],rot_vctr_im[k]);*/
            iBuffer_fx[2 * k] = L_sub( Mpy_32_32( ir12_fx, rot_vctr_re_fx[k] ), Mpy_32_32( ii12_fx, rot_vctr_im_fx[k] ) );     // q - 3
            iBuffer_fx[2 * k + 1] = L_add( Mpy_32_32( ir12_fx, rot_vctr_im_fx[k] ), Mpy_32_32( ii12_fx, rot_vctr_re_fx[k] ) ); // q - 3
            iBuffer_fx[2 * k] = Msub_32_32( Mpy_32_32( ir12_fx, rot_vctr_re_fx[k] ), ii12_fx, rot_vctr_im_fx[k] );     // q - 3
            iBuffer_fx[2 * k + 1] = Madd_32_32( Mpy_32_32( ir12_fx, rot_vctr_im_fx[k] ), ii12_fx, rot_vctr_re_fx[k] ); // q - 3
            move32();
            move32();
        }
@@ -490,8 +490,8 @@ void cldfbAnalysis_ts_fx(
        FOR( k = 0; k < M2; k++ )
        {
            /*cplxMult(&realBuffer[M1-1-(2*k)],&realBuffer[2*k],rBuffer[2*k],rBuffer[2*k+1],rot_vctr_re[k],rot_vctr_im[k]);*/
            realBuffer_fx[( ( M1 - 1 ) - ( k * 2 ) )] = L_sub( Mpy_32_32( rBuffer_fx[2 * k], rot_vctr_re_fx[k] ), Mpy_32_32( rBuffer_fx[2 * k + 1], rot_vctr_im_fx[k] ) ); // q - 5
            realBuffer_fx[2 * k] = L_add( Mpy_32_32( rBuffer_fx[2 * k], rot_vctr_im_fx[k] ), Mpy_32_32( rBuffer_fx[2 * k + 1], rot_vctr_re_fx[k] ) );                      // q - 5
            realBuffer_fx[( ( M1 - 1 ) - ( k * 2 ) )] = Msub_32_32( Mpy_32_32( rBuffer_fx[2 * k], rot_vctr_re_fx[k] ), rBuffer_fx[2 * k + 1], rot_vctr_im_fx[k] ); // q - 5
            realBuffer_fx[2 * k] = Madd_32_32( Mpy_32_32( rBuffer_fx[2 * k], rot_vctr_im_fx[k] ), rBuffer_fx[2 * k + 1], rot_vctr_re_fx[k] );                      // q - 5
            move32();
            move32();
        }
@@ -520,8 +520,8 @@ void cldfbAnalysis_ts_fx(
        {
            /* do it inplace */
            /*cplxMult(&imagBuffer[2*k],&imagBuffer[M1-1-(2*k)],iBuffer[2*k],iBuffer[2*k+1],rot_vctr_re[k],rot_vctr_im[k]);*/
            imagBuffer_fx[2 * k] = L_sub( Mpy_32_32( iBuffer_fx[2 * k], rot_vctr_re_fx[k] ), Mpy_32_32( iBuffer_fx[2 * k + 1], rot_vctr_im_fx[k] ) );                  // q - 5
            imagBuffer_fx[( M1 - 1 ) - ( k * 2 )] = L_add( Mpy_32_32( iBuffer_fx[2 * k], rot_vctr_im_fx[k] ), Mpy_32_32( iBuffer_fx[2 * k + 1], rot_vctr_re_fx[k] ) ); // q - 5
            imagBuffer_fx[2 * k] = Msub_32_32( Mpy_32_32( iBuffer_fx[2 * k], rot_vctr_re_fx[k] ), iBuffer_fx[2 * k + 1], rot_vctr_im_fx[k] );                  // q - 5
            imagBuffer_fx[( M1 - 1 ) - ( k * 2 )] = Madd_32_32( Mpy_32_32( iBuffer_fx[2 * k], rot_vctr_im_fx[k] ), iBuffer_fx[2 * k + 1], rot_vctr_re_fx[k] ); // q - 5
            move32();
            move32();
        }
@@ -542,8 +542,8 @@ void cldfbAnalysis_ts_fx(
                    /*cplxMult(&realBuffer[k], &imagBuffer[k], realBuffer[k], imagBuffer[k], rot_vctr_delay_re[k], rot_vctr_delay_im[k]);*/
                    /*realBuffer[k] = rBuffer[k];
                    imagBuffer[k] = iBuffer[k];*/
                    cplx_aux_fx = L_sub( Mpy_32_32( realBuffer_fx[k], rot_vctr_delay_re_fx[k] ), Mpy_32_32( imagBuffer_fx[k], rot_vctr_delay_im_fx[k] ) );      // q - 5
                    imagBuffer_fx[k] = L_add( Mpy_32_32( realBuffer_fx[k], rot_vctr_delay_im_fx[k] ), Mpy_32_32( imagBuffer_fx[k], rot_vctr_delay_re_fx[k] ) ); // q - 5
                    cplx_aux_fx = Msub_32_32( Mpy_32_32( realBuffer_fx[k], rot_vctr_delay_re_fx[k] ), imagBuffer_fx[k], rot_vctr_delay_im_fx[k] );      // q - 5
                    imagBuffer_fx[k] = Madd_32_32( Mpy_32_32( realBuffer_fx[k], rot_vctr_delay_im_fx[k] ), imagBuffer_fx[k], rot_vctr_delay_re_fx[k] ); // q - 5
                    realBuffer_fx[k] = cplx_aux_fx;
                    move32();
                    move32();
@@ -1305,6 +1305,429 @@ void cldfbSynthesis_ivas_fx(
    return;
}

void cldfbSynthesis_ivas_fx_2(
    Word32 **realBuffer_fx,          /* i  : real values                 Qx*/
    Word32 **imagBuffer_fx,          /* i  : imag values                 Qx*/
    Word32 *timeOut_fx,              /* o  : output time domain samples  Qx - 1*/
    const Word16 samplesToProcess,   /* i  : number of processed samples */
    const Word16 shift,              /* i  : number of processed samples */
    HANDLE_CLDFB_FILTER_BANK h_cldfb /* i  : filter bank state           */
)
{
    Word16 i;
    Word16 k;
    Word16 L2;
    Word16 M1;
    Word16 M2;
    Word16 M41;
    Word16 M42;
    Word16 Mz;

    Word32 rBuffer_fx[2 * CLDFB_NO_CHANNELS_MAX];
    Word32 iBuffer_fx[2 * CLDFB_NO_CHANNELS_MAX];
    const Word32 *rot_vctr_re_fx;
    const Word32 *rot_vctr_im_fx;
    const Word32 *rot_vctr_delay_re_fx;
    const Word32 *rot_vctr_delay_im_fx;
    Word32 rr12_fx, ir12_fx;
    Word32 ri12_fx, ii12_fx;

    Word32 *synthesisBuffer_fx, buffer_fx[( CLDFB_NO_CHANNELS_MAX * CLDFB_NO_COL_MAX ) + ( 10 * CLDFB_NO_CHANNELS_MAX )];
    Word32 new_samples_fx[2 * CLDFB_NO_CHANNELS_MAX];

    Word32 *ptr_time_out_fx;
    const Word16 *p_filter;
    Word16 p_filter_sf;

    Word32 accu0, accu1, accu2, accu3, accu4;
    Word16 no_col = h_cldfb->no_col;
    move16();

    M1 = h_cldfb->no_channels;
    move16();
    L2 = shl( M1, 1 );
    M2 = shr( M1, 1 );
    M41 = shr( M2, 1 );
    M42 = sub( M2, M41 );
    Mz = sub( M1, h_cldfb->bandsToZero );

    /* only process needed cols */
    IF( GT_16( samplesToProcess, -1 ) )
    {
        no_col = s_min( no_col, idiv1616( sub( add( samplesToProcess, h_cldfb->no_channels ), 1 ), h_cldfb->no_channels ) );
        move16();
    }

    rot_vctr_re_fx = h_cldfb->rot_vec_syn_re_fx;
    rot_vctr_im_fx = h_cldfb->rot_vec_syn_im_fx;

    rot_vctr_delay_re_fx = h_cldfb->rot_vec_syn_delay_re_fx;
    rot_vctr_delay_im_fx = h_cldfb->rot_vec_syn_delay_im_fx;

    synthesisBuffer_fx = buffer_fx;
    Copy32( h_cldfb->cldfb_state_fx, synthesisBuffer_fx + i_mult( M1, no_col ), h_cldfb->p_filter_length );

    p_filter = h_cldfb->p_filter;
    p_filter_sf = h_cldfb->p_filter_sf; // Q14
    move16();
    ptr_time_out_fx = timeOut_fx;

    /*synthesisBuffer += M1 * h_cldfb->no_col;*/
    synthesisBuffer_fx += i_mult( M1, no_col );

    FOR( k = 0; k < no_col; k++ )
    {
        IF( EQ_16( h_cldfb->prototype, CLDFB_PROTOTYPE_5_00MS ) )
        {
            /* rotation due to delay*/
            /*if(h_cldfb->ds != M1)*/
            IF( rot_vctr_delay_re_fx != NULL )
            {
                FOR( i = 0; i < M1; i++ )
                {
                    Word32 cplx_aux;
                    /* delay */
                    /*cplxMult(&rBuffer[i], &iBuffer[i], realBuffer[k][i], imagBuffer[k][i], cos((EVS_PI/M1)*(i+0.5)*(-(h_cldfb->ds-M1)*0.5)),
                            sin((EVS_PI/M1)*(i+0.5)*(-(h_cldfb->ds-M1)*0.5)));*/
                    /*cplxMult(&rBuffer[i], &iBuffer[i], realBuffer[k][i], imagBuffer[k][i], rot_vctr_delay_re[i], rot_vctr_delay_im[i]);*/
                    /*cplxMult(&realBuffer[k][i], &imagBuffer[k][i], realBuffer[k][i], imagBuffer[k][i], rot_vctr_delay_re[i], rot_vctr_delay_im[i]);*/
                    cplx_aux = Msub_32_32( Mpy_32_32( realBuffer_fx[k][i], rot_vctr_delay_re_fx[i] ), imagBuffer_fx[k][i], rot_vctr_delay_im_fx[i] ); // Qx
                    imagBuffer_fx[k][i] = Madd_32_32( Mpy_32_32( realBuffer_fx[k][i], rot_vctr_delay_im_fx[i] ), imagBuffer_fx[k][i], rot_vctr_delay_re_fx[i] );
                    move32();
                    realBuffer_fx[k][i] = cplx_aux;
                    move32();
                    /*realBuffer[k][i] = rBuffer[i];*/
                    /*imagBuffer[k][i] = iBuffer[i];*/
                }
            }
        }
        FOR( i = Mz; i < M1; i++ )
        {
            realBuffer_fx[k][i] = 0;
            move32();
            imagBuffer_fx[k][i] = 0;
            move32();
        }

        FOR( i = 0; i < M2; i++ )
        {
            /* pre modulation of DST IV */
            /*cplxMult(&rBuffer[2*i], &rBuffer[2*i+1], realBuffer[k][2*i], realBuffer[k][M1-1-2*i], rot_vctr_re[i], rot_vctr_im[i]);*/
            rBuffer_fx[2 * i] = Msub_32_32( Mpy_32_32( realBuffer_fx[k][2 * i], rot_vctr_re_fx[i] ), realBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_im_fx[i] ); // Qx
            move32();
            rBuffer_fx[2 * i + 1] = Madd_32_32( Mpy_32_32( realBuffer_fx[k][2 * i], rot_vctr_im_fx[i] ), realBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[i] ); // Qx
            move32();

            /* pre modulation of DCT IV */
            /*cplxMult(&iBuffer[2*i], &iBuffer[2*i+1],-imagBuffer[k][2*i], imagBuffer[k][M1-1-2*i], rot_vctr_re[i], rot_vctr_im[i]);*/
            iBuffer_fx[2 * i] = Msub_32_32( Mpy_32_32( ( L_negate( imagBuffer_fx[k][2 * i] ) ), rot_vctr_re_fx[i] ), imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_im_fx[i] ); // Qx
            move32();
            iBuffer_fx[2 * i + 1] = Msub_32_32( Mpy_32_32( imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[i] ), imagBuffer_fx[k][2 * i], rot_vctr_im_fx[i] ); // Qx
            move32();
        }

        /* FFT of DST IV */
        fft_cldfb_fx( rBuffer_fx, M2 );

        /* FFT of DCT IV */
        fft_cldfb_fx( iBuffer_fx, M2 );

        /* folding */
        FOR( i = 0; i < M41; i++ )
        {
            /* post modulation of DST IV */
            rr12_fx = Msub_32_32( Mpy_32_32( rBuffer_fx[( M1 - 2 ) - ( i * 2 )], rot_vctr_re_fx[( M2 - 1 ) - i] ), rBuffer_fx[( M1 - 1 ) - ( i * 2 )], rot_vctr_im_fx[( M2 - 1 ) - i] ); // Qx
            ri12_fx = Madd_32_32( Mpy_32_32( rBuffer_fx[( M1 - 2 ) - ( i * 2 )], rot_vctr_im_fx[( M2 - 1 ) - i] ), rBuffer_fx[( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[( M2 - 1 ) - i] ); // Qx

            /* post modulation of DCT IV */
            ir12_fx = Msub_32_32( Mpy_32_32( iBuffer_fx[( M1 - 2 ) - ( i * 2 )], rot_vctr_re_fx[( M2 - 1 ) - i] ), iBuffer_fx[( M1 - 1 ) - ( i * 2 )], rot_vctr_im_fx[( M2 - 1 ) - i] ); // Qx
            ii12_fx = Madd_32_32( Mpy_32_32( iBuffer_fx[( M1 - 2 ) - ( i * 2 )], rot_vctr_im_fx[( M2 - 1 ) - i] ), iBuffer_fx[( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[( M2 - 1 ) - i] ); // Qx

            new_samples_fx[M1 + M2 + 1 + 2 * i] = L_negate( L_add( rr12_fx, ii12_fx ) ); // Qx
            move32();
            new_samples_fx[M2 - 2 - 2 * i] = L_negate( L_add( ri12_fx, ir12_fx ) ); // Qx
            move32();

            new_samples_fx[M1 + M2 - 2 - 2 * i] = L_sub( rr12_fx, ii12_fx ); // Qx
            move32();
            new_samples_fx[M2 + 1 + 2 * i] = L_sub( ir12_fx, ri12_fx ); // Qx
            move32();
        }

        FOR( i = 0; i < M42; i++ )
        {
            /* post modulation of DST IV */
            rr12_fx = Msub_32_32( Mpy_32_32( rBuffer_fx[2 * i], rot_vctr_re_fx[i] ), rBuffer_fx[2 * i + 1], rot_vctr_im_fx[i] ); // Qx
            ri12_fx = Madd_32_32( Mpy_32_32( rBuffer_fx[2 * i], rot_vctr_im_fx[i] ), rBuffer_fx[2 * i + 1], rot_vctr_re_fx[i] ); // Qx

            /* post modulation of DCT IV */
            ir12_fx = Msub_32_32( Mpy_32_32( iBuffer_fx[2 * i], rot_vctr_re_fx[i] ), iBuffer_fx[2 * i + 1], rot_vctr_im_fx[i] ); // Qx
            ii12_fx = Madd_32_32( Mpy_32_32( iBuffer_fx[2 * i], rot_vctr_im_fx[i] ), iBuffer_fx[2 * i + 1], rot_vctr_re_fx[i] ); // Qx

            new_samples_fx[M1 + M2 + 2 * i] = L_add( ri12_fx, ir12_fx ); // Qx
            move32();
            new_samples_fx[M2 - 1 - 2 * i] = L_add( rr12_fx, ii12_fx ); // Qx
            move32();

            new_samples_fx[M1 + M2 - 1 - 2 * i] = L_sub( ir12_fx, ri12_fx ); // Qx
            move32();
            new_samples_fx[M2 + 2 * i] = L_sub( rr12_fx, ii12_fx ); // Qx
            move32();
        }

        /* synthesis prototype filter */
        FOR( i = 0; i < L2; i++ )
        {
            Word32 prod = L_shl_sat( Mpy_32_16_1( new_samples_fx[L2 - 1 - i], p_filter_sf ), shift );
            accu0 = Madd_32_16( synthesisBuffer_fx[i], prod, p_filter[i] );                       // Qx - 1
            accu1 = Madd_32_16( synthesisBuffer_fx[1 * L2 + i], prod, p_filter[( 1 * L2 + i )] ); // Qx - 1
            accu2 = Madd_32_16( synthesisBuffer_fx[2 * L2 + i], prod, p_filter[( 2 * L2 + i )] ); // Qx - 1
            accu3 = Madd_32_16( synthesisBuffer_fx[3 * L2 + i], prod, p_filter[( 3 * L2 + i )] ); // Qx - 1
            accu4 = Madd_32_16( synthesisBuffer_fx[4 * L2 + i], prod, p_filter[( 4 * L2 + i )] ); // Qx - 1

            synthesisBuffer_fx[i] = accu0;
            move32();
            synthesisBuffer_fx[1 * L2 + i] = accu1;
            move32();
            synthesisBuffer_fx[2 * L2 + i] = accu2;
            move32();
            synthesisBuffer_fx[3 * L2 + i] = accu3;
            move32();
            synthesisBuffer_fx[4 * L2 + i] = accu4;
            move32();
        }

        FOR( i = 0; i < M1; i++ )
        {
            ptr_time_out_fx[( M1 - 1 ) - i] = synthesisBuffer_fx[4 * L2 + M1 + i];
            move32();
        }

        ptr_time_out_fx += M1;

        synthesisBuffer_fx -= M1;

        set32_fx( synthesisBuffer_fx, 0, M1 );
    }

    /* update memory */
    Copy32( buffer_fx, h_cldfb->cldfb_state_fx, h_cldfb->p_filter_length );

    return;
}

void cldfbSynthesis_ivas_fx_3(
    Word32 **realBuffer_fx,          /* i  : real values                 Qx*/
    Word32 **imagBuffer_fx,          /* i  : imag values                 Qx*/
    Word32 *timeOut_fx,              /* o  : output time domain samples  Qx - 1*/
    const Word16 samplesToProcess,   /* i  : number of processed samples */
    const Word16 shift,              /* i  : number of processed samples */
    const Word16 out_shift,          /* i  : number of processed samples */
    HANDLE_CLDFB_FILTER_BANK h_cldfb /* i  : filter bank state           */
)
{
    Word16 i;
    Word16 k;
    Word16 L2;
    Word16 M1;
    Word16 M2;
    Word16 M41;
    Word16 M42;
    Word16 Mz;

    Word32 rBuffer_fx[2 * CLDFB_NO_CHANNELS_MAX];
    Word32 iBuffer_fx[2 * CLDFB_NO_CHANNELS_MAX];
    const Word32 *rot_vctr_re_fx;
    const Word32 *rot_vctr_im_fx;
    const Word32 *rot_vctr_delay_re_fx;
    const Word32 *rot_vctr_delay_im_fx;
    Word32 rr12_fx, ir12_fx;
    Word32 ri12_fx, ii12_fx;

    Word32 *synthesisBuffer_fx, buffer_fx[( CLDFB_NO_CHANNELS_MAX * CLDFB_NO_COL_MAX ) + ( 10 * CLDFB_NO_CHANNELS_MAX )];
    Word32 new_samples_fx[2 * CLDFB_NO_CHANNELS_MAX];

    Word32 *ptr_time_out_fx;
    const Word16 *p_filter;
    Word16 p_filter_sf;

    Word32 accu0, accu1, accu2, accu3, accu4;
    Word16 no_col = h_cldfb->no_col;
    move16();

    M1 = h_cldfb->no_channels;
    move16();
    L2 = shl( M1, 1 );
    M2 = shr( M1, 1 );
    M41 = shr( M2, 1 );
    M42 = sub( M2, M41 );
    Mz = sub( M1, h_cldfb->bandsToZero );

    /* only process needed cols */
    IF( GT_16( samplesToProcess, -1 ) )
    {
        no_col = s_min( no_col, idiv1616( sub( add( samplesToProcess, h_cldfb->no_channels ), 1 ), h_cldfb->no_channels ) );
        move16();
    }

    rot_vctr_re_fx = h_cldfb->rot_vec_syn_re_fx;
    rot_vctr_im_fx = h_cldfb->rot_vec_syn_im_fx;

    rot_vctr_delay_re_fx = h_cldfb->rot_vec_syn_delay_re_fx;
    rot_vctr_delay_im_fx = h_cldfb->rot_vec_syn_delay_im_fx;

    synthesisBuffer_fx = buffer_fx;
    Copy32( h_cldfb->cldfb_state_fx, synthesisBuffer_fx + i_mult( M1, no_col ), h_cldfb->p_filter_length );

    p_filter = h_cldfb->p_filter;
    p_filter_sf = h_cldfb->p_filter_sf; // Q14
    move16();
    ptr_time_out_fx = timeOut_fx;

    /*synthesisBuffer += M1 * h_cldfb->no_col;*/
    synthesisBuffer_fx += i_mult( M1, no_col );

    FOR( k = 0; k < no_col; k++ )
    {
        IF( EQ_16( h_cldfb->prototype, CLDFB_PROTOTYPE_5_00MS ) )
        {
            /* rotation due to delay*/
            /*if(h_cldfb->ds != M1)*/
            IF( rot_vctr_delay_re_fx != NULL )
            {
                FOR( i = 0; i < M1; i++ )
                {
                    Word32 cplx_aux;
                    /* delay */
                    /*cplxMult(&rBuffer[i], &iBuffer[i], realBuffer[k][i], imagBuffer[k][i], cos((EVS_PI/M1)*(i+0.5)*(-(h_cldfb->ds-M1)*0.5)),
                            sin((EVS_PI/M1)*(i+0.5)*(-(h_cldfb->ds-M1)*0.5)));*/
                    /*cplxMult(&rBuffer[i], &iBuffer[i], realBuffer[k][i], imagBuffer[k][i], rot_vctr_delay_re[i], rot_vctr_delay_im[i]);*/
                    /*cplxMult(&realBuffer[k][i], &imagBuffer[k][i], realBuffer[k][i], imagBuffer[k][i], rot_vctr_delay_re[i], rot_vctr_delay_im[i]);*/
                    cplx_aux = Msub_32_32( Mpy_32_32( realBuffer_fx[k][i], rot_vctr_delay_re_fx[i] ), imagBuffer_fx[k][i], rot_vctr_delay_im_fx[i] ); // Qx
                    imagBuffer_fx[k][i] = Madd_32_32( Mpy_32_32( realBuffer_fx[k][i], rot_vctr_delay_im_fx[i] ), imagBuffer_fx[k][i], rot_vctr_delay_re_fx[i] );
                    move32();
                    realBuffer_fx[k][i] = cplx_aux;
                    move32();
                    /*realBuffer[k][i] = rBuffer[i];*/
                    /*imagBuffer[k][i] = iBuffer[i];*/
                }
            }
        }
        FOR( i = Mz; i < M1; i++ )
        {
            realBuffer_fx[k][i] = 0;
            move32();
            imagBuffer_fx[k][i] = 0;
            move32();
        }

        FOR( i = 0; i < M2; i++ )
        {
            /* pre modulation of DST IV */
            /*cplxMult(&rBuffer[2*i], &rBuffer[2*i+1], realBuffer[k][2*i], realBuffer[k][M1-1-2*i], rot_vctr_re[i], rot_vctr_im[i]);*/
            rBuffer_fx[2 * i] = Msub_32_32( Mpy_32_32( realBuffer_fx[k][2 * i], rot_vctr_re_fx[i] ), realBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_im_fx[i] ); // Qx
            move32();
            rBuffer_fx[2 * i + 1] = Madd_32_32( Mpy_32_32( realBuffer_fx[k][2 * i], rot_vctr_im_fx[i] ), realBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[i] ); // Qx
            move32();

            /* pre modulation of DCT IV */
            /*cplxMult(&iBuffer[2*i], &iBuffer[2*i+1],-imagBuffer[k][2*i], imagBuffer[k][M1-1-2*i], rot_vctr_re[i], rot_vctr_im[i]);*/
            iBuffer_fx[2 * i] = Msub_32_32( Mpy_32_32( ( L_negate( imagBuffer_fx[k][2 * i] ) ), rot_vctr_re_fx[i] ), imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_im_fx[i] ); // Qx
            move32();
            iBuffer_fx[2 * i + 1] = Msub_32_32( Mpy_32_32( imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[i] ), imagBuffer_fx[k][2 * i], rot_vctr_im_fx[i] ); // Qx
            move32();
        }

        /* FFT of DST IV */
        fft_cldfb_fx( rBuffer_fx, M2 );

        /* FFT of DCT IV */
        fft_cldfb_fx( iBuffer_fx, M2 );

        /* folding */
        FOR( i = 0; i < M41; i++ )
        {
            /* post modulation of DST IV */
            rr12_fx = Msub_32_32( Mpy_32_32( rBuffer_fx[( M1 - 2 ) - ( i * 2 )], rot_vctr_re_fx[( M2 - 1 ) - i] ), rBuffer_fx[( M1 - 1 ) - ( i * 2 )], rot_vctr_im_fx[( M2 - 1 ) - i] ); // Qx
            ri12_fx = Madd_32_32( Mpy_32_32( rBuffer_fx[( M1 - 2 ) - ( i * 2 )], rot_vctr_im_fx[( M2 - 1 ) - i] ), rBuffer_fx[( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[( M2 - 1 ) - i] ); // Qx

            /* post modulation of DCT IV */
            ir12_fx = Msub_32_32( Mpy_32_32( iBuffer_fx[( M1 - 2 ) - ( i * 2 )], rot_vctr_re_fx[( M2 - 1 ) - i] ), iBuffer_fx[( M1 - 1 ) - ( i * 2 )], rot_vctr_im_fx[( M2 - 1 ) - i] ); // Qx
            ii12_fx = Madd_32_32( Mpy_32_32( iBuffer_fx[( M1 - 2 ) - ( i * 2 )], rot_vctr_im_fx[( M2 - 1 ) - i] ), iBuffer_fx[( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[( M2 - 1 ) - i] ); // Qx

            new_samples_fx[M1 + M2 + 1 + 2 * i] = L_negate( L_add( rr12_fx, ii12_fx ) ); // Qx
            move32();
            new_samples_fx[M2 - 2 - 2 * i] = L_negate( L_add( ri12_fx, ir12_fx ) ); // Qx
            move32();

            new_samples_fx[M1 + M2 - 2 - 2 * i] = L_sub( rr12_fx, ii12_fx ); // Qx
            move32();
            new_samples_fx[M2 + 1 + 2 * i] = L_sub( ir12_fx, ri12_fx ); // Qx
            move32();
        }

        FOR( i = 0; i < M42; i++ )
        {
            /* post modulation of DST IV */
            rr12_fx = Msub_32_32( Mpy_32_32( rBuffer_fx[2 * i], rot_vctr_re_fx[i] ), rBuffer_fx[2 * i + 1], rot_vctr_im_fx[i] ); // Qx
            ri12_fx = Madd_32_32( Mpy_32_32( rBuffer_fx[2 * i], rot_vctr_im_fx[i] ), rBuffer_fx[2 * i + 1], rot_vctr_re_fx[i] ); // Qx

            /* post modulation of DCT IV */
            ir12_fx = Msub_32_32( Mpy_32_32( iBuffer_fx[2 * i], rot_vctr_re_fx[i] ), iBuffer_fx[2 * i + 1], rot_vctr_im_fx[i] ); // Qx
            ii12_fx = Madd_32_32( Mpy_32_32( iBuffer_fx[2 * i], rot_vctr_im_fx[i] ), iBuffer_fx[2 * i + 1], rot_vctr_re_fx[i] ); // Qx

            new_samples_fx[M1 + M2 + 2 * i] = L_add( ri12_fx, ir12_fx ); // Qx
            move32();
            new_samples_fx[M2 - 1 - 2 * i] = L_add( rr12_fx, ii12_fx ); // Qx
            move32();

            new_samples_fx[M1 + M2 - 1 - 2 * i] = L_sub( ir12_fx, ri12_fx ); // Qx
            move32();
            new_samples_fx[M2 + 2 * i] = L_sub( rr12_fx, ii12_fx ); // Qx
            move32();
        }

        /* synthesis prototype filter */
        FOR( i = 0; i < L2; i++ )
        {
            Word32 prod = L_shl_sat( Mpy_32_16_1( new_samples_fx[L2 - 1 - i], p_filter_sf ), shift );
            accu0 = Madd_32_16( synthesisBuffer_fx[i], prod, p_filter[i] );                       // Qx - 1
            accu1 = Madd_32_16( synthesisBuffer_fx[1 * L2 + i], prod, p_filter[( 1 * L2 + i )] ); // Qx - 1
            accu2 = Madd_32_16( synthesisBuffer_fx[2 * L2 + i], prod, p_filter[( 2 * L2 + i )] ); // Qx - 1
            accu3 = Madd_32_16( synthesisBuffer_fx[3 * L2 + i], prod, p_filter[( 3 * L2 + i )] ); // Qx - 1
            accu4 = Madd_32_16( synthesisBuffer_fx[4 * L2 + i], prod, p_filter[( 4 * L2 + i )] ); // Qx - 1

            synthesisBuffer_fx[i] = accu0;
            move32();
            synthesisBuffer_fx[1 * L2 + i] = accu1;
            move32();
            synthesisBuffer_fx[2 * L2 + i] = accu2;
            move32();
            synthesisBuffer_fx[3 * L2 + i] = accu3;
            move32();
            synthesisBuffer_fx[4 * L2 + i] = accu4;
            move32();
        }

        FOR( i = 0; i < M1; i++ )
        {
            ptr_time_out_fx[( M1 - 1 ) - i] = L_shl_sat( synthesisBuffer_fx[4 * L2 + M1 + i], out_shift );
            move32();
        }

        ptr_time_out_fx += M1;

        synthesisBuffer_fx -= M1;

        set32_fx( synthesisBuffer_fx, 0, M1 );
    }

    /* update memory */
    Copy32( buffer_fx, h_cldfb->cldfb_state_fx, h_cldfb->p_filter_length );

    return;
}


void configureCldfb_ivas_enc_fx(
    HANDLE_CLDFB_FILTER_BANK h_cldfb, /* i/o: filter bank handle        */
+5 −9

File changed.

Preview size limit exceeded, changes collapsed.

+20 −0

File changed.

Preview size limit exceeded, changes collapsed.

+172 −4

File changed.

Preview size limit exceeded, changes collapsed.

+5 −41

File changed.

Preview size limit exceeded, changes collapsed.

Loading