Commit ae3fc3c5 authored by vaclav's avatar vaclav
Browse files

Merge branch 'basop-2426-removing-unnecessary-parameters-in-acelp_fast_fx' into 'main'

basop-2426-removing-unnecessary-parameters-in-acelp_fast_fx

Closes #2426

See merge request !2807
parents 792164f0 3c75fc59
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -3031,9 +3031,15 @@ void acelp_fast_fx(
    BSTR_ENC_HANDLE hBstr,                                      /* i/o: encoder bitstream handle                */
    const Word16 cdk_index,                                     /* i  : codebook index                          */
    const Word16 dn_orig[L_SUBFR],                              /* i  : corr. between target and h[].      Q_dn */
#ifdef OPT_2416_ACELP_FAST                                      
    const Word16 Q_dncn,                                        /* i  : scaling factor of dn and cn             */  
#else
    Word16 Q_dn,
#endif
    const Word16 cn[L_SUBFR],                                   /* i  : residual after long term prediction q_cn*/
#ifndef OPT_2416_ACELP_FAST
    const Word16 q_cn,
#endif
    const Word16 H[L_SUBFR],                                    /* i  : impulse response of weighted synthesis filter  e(norm_s(H[0])+1) */
    Word16 code[L_SUBFR],                                       /* o  : algebraic (fixed) codebook excitation   */
    Word16 y[],                                                 /* o  : filtered fixed codebook excitation      */
+1 −0
Original line number Diff line number Diff line
@@ -95,6 +95,7 @@
#define HARMONIZE_ACELP_ENC                             /* VA: basop issue 2400: Remove duplicated main ACELP encoder function */
#define FIX_2392_MSAN_DESTROY_DEC                       /* VA: basop issue 2392: fix MSAN in ivas_destroy_dec_fx() */
#define FIX_FLOAT_1522_LTV_MSAN_QMETADATA_ENC_EC3       /* Nokia: float issue 1522: fix uninit MSAN in EC3 of qmetadata encoding */
#define OPT_2416_ACELP_FAST                             /* VA: basop issue 2426, optimisation of acelp_fast_fx ( reduc. compl. by 0.35 wmops ) */
#define FIX_2410_HARM_MODIF_FS                          /* VA: basop issue 2410: Remove duplicated modif_Fs */
#define HARM_LEV_DURBIN                                 /* VA: basop issue 2423: harmonize levinson-Durbin algorithm */
#define HARMONIZE_TBE2                                  /* VA: basop issue 2399: Remove duplicated code: TBE, step 2 */
+164 −27
Original line number Diff line number Diff line
@@ -42,11 +42,8 @@
 * Local constants
 *-------------------------------------------------------------------*/

#define BETA_BN1 2.0f
#define BETA_BN2 2.25f

#define BETA_BN1_FX 2 // Q0
#define BETA_BN2_FX 9 // Q2
#define BETA_BN1_FX 2 // 2.0f in Q0
#define BETA_BN2_FX 9 // 2.25f in Q2

#define L_SUBFR_MAX       2 * L_SUBFR
#define MAX_NUM_INTER     5
@@ -76,6 +73,8 @@ static Word16 quant_1p_N1_L_subfr_fx(

    return index;
}


static Word16 find_best_pulse_fx(
    const Word16 L_subfr,
    const Word16 nb_tracks,
@@ -111,6 +110,7 @@ static Word16 find_best_pulse_fx(
    return m;
}


/*-------------------------------------------------------------------*
 * Function  acelp_fast()
 *
@@ -121,18 +121,19 @@ static Word16 find_best_pulse_fx(
void acelp_fast_fx(
    BSTR_ENC_HANDLE hBstr,         /* i/o: encoder bitstream handle                                       */
    const Word16 cdk_index,        /* i  : codebook index                                                 */
    const Word16 dn_orig[L_SUBFR],
    /* i  : corr. between target and h[].                 */ // Q_dn
    const Word16 dn_orig[L_SUBFR], /* i  : corr. between target and h[].                           Q_dncn */
#ifdef OPT_2416_ACELP_FAST
    const Word16 Q_dncn, /* i  : scaling factor of dn and cn                                     */
#else
    Word16 Q_dn,
    const Word16 cn[L_SUBFR],
    /* i  : residual after long term prediction           */ // q_cn
#endif
    const Word16 cn[L_SUBFR], /* i  : residual after long term prediction                     Q_dncn */
#ifndef OPT_2416_ACELP_FAST
    const Word16 q_cn,
    const Word16 H[L_SUBFR],
    /* i  : impulse response of weighted synthesis filter */ // e(norm_s(H[0])+1)
    Word16 code[L_SUBFR],
    /* o  : algebraic (fixed) codebook excitation         */ // Q0
    Word16 y[],
    /* o  : filtered fixed codebook excitation            */ // e(norm_s(H[0])+1)
#endif
    const Word16 H[L_SUBFR], /* i  : impulse response of weighted synthesis filter e(norm_s(H[0])+1) */
    Word16 code[L_SUBFR],    /* o  : algebraic (fixed) codebook excitation                        Q0 */
    Word16 y[],              /* o  : filtered fixed codebook excitation            e(norm_s(H[0])+1) */
    const Word16 L_subfr     /* i  : subframe length                                                 */
)
{
@@ -163,8 +164,11 @@ void acelp_fast_fx(
    Word16 flag = 0;
    move16();
    Word32 temp1, temp2, temp3, temp4, temp5, temp6;
#ifndef OPT_2416_ACELP_FAST
    Word16 q_temp1, q_temp2;
#endif
    Word16 scale_temp1, scale_temp2;

    /*-----------------------------------------------------------------*
     * Initialization
     *-----------------------------------------------------------------*/
@@ -223,7 +227,6 @@ void acelp_fast_fx(
        codetrackpos = -1; /* to avoid compilation warnings */
        move16();


        IF( EQ_16( cdk_index, 14 ) )
        {
            /* 14 bits, 2 pulses, 2 tracks: 11 (used all tracks) */
@@ -295,8 +298,9 @@ void acelp_fast_fx(
     * Find signal bn[] and sign pre-selection vector sign[].
     *-----------------------------------------------------------------*/

#ifndef OPT_2416_ACELP_FAST
    exp = sub( Q31, shl( Q_dn, 1 ) );

#endif
    s64 = 0;
    move64();
    FOR( i = 0; i < L_subfr; i++ )
@@ -311,10 +315,13 @@ void acelp_fast_fx(
    {
        Word16 new_exp1 = W_norm( s64 );
        dndn_fx = W_extract_h( W_shl( s64, new_exp1 ) ); // 2 * Q_dyn + exp1 - 31
#ifndef OPT_2416_ACELP_FAST
        dndn_e = sub( 31, sub( add( add( shl( Q_dn, 1 ), 1 ), new_exp1 ), 32 ) );
#else
        dndn_e = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), new_exp1 ), 32 ) );
#endif
    }


    cncn_fx = 214748365 /* 0.1f in Q31 */;
    move32();
    cncn_e = 0;
@@ -337,7 +344,11 @@ void acelp_fast_fx(
        {
            Word16 new_exp1 = W_norm( s64 );
            cncn_track[q] = W_extract_h( W_shl( s64, new_exp1 ) ); // 2 * Q_dyn + exp1 - 31
#ifndef OPT_2416_ACELP_FAST
            cncn_track_e[q] = sub( 31, sub( add( add( shl( q_cn, 1 ), 1 ), new_exp1 ), 32 ) );
#else
            cncn_track_e[q] = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), new_exp1 ), 32 ) );
#endif
        }
        cncn_fx = BASOP_Util_Add_Mant32Exp( cncn_fx, cncn_e, cncn_track[q], cncn_track_e[q], &cncn_e ); // Q(cncn_e)
    }
@@ -348,8 +359,14 @@ void acelp_fast_fx(
    tmp = add( tmp, sub( dndn_e, cncn_e ) );
    s_coef_fx = Sqrt16( s_coef_fx, &tmp ); // Q(15 - tmp)

#ifdef OPT_2416_ACELP_FAST
    scale_temp1 = sub( Q16, tmp );
    /* Q_dn = q_cn and it doesn't matter */
#else
    q_temp1 = add( add( sub( Q15, tmp ), q_cn ), Q1 );
    scale_temp1 = sub( q_temp1, Q_dn );
#endif

    FOR( i = 0; i < L_subfr; i++ )
    {
        temp1 = L_mult( s_coef_fx, cn[i] );     // Q(15 - tmp)+q_cn+1
@@ -360,6 +377,15 @@ void acelp_fast_fx(
        bn_orig_fx[i] = L_add( temp1, temp2 ); // Q_dn
        move32();

#ifdef OPT_2416_ACELP_FAST
        sign_fx[i] = -1;
        move16();
        if ( bn_orig_fx[i] >= 0 )
        {
            sign_fx[i] = 1;
            move16();
        }
#else
        IF( bn_orig_fx[i] >= 0 )
        {
            sign_fx[i] = 1;
@@ -369,6 +395,7 @@ void acelp_fast_fx(
            sign_fx[i] = -1;
        }
        move16();
#endif
    }

    /*-----------------------------------------------------------------*
@@ -397,6 +424,7 @@ void acelp_fast_fx(
    /*-----------------------------------------------------------------*
     * Approximate FI[i][j] by alp[abs(i-j)] and compute buffer alp_buf[].
     *-----------------------------------------------------------------*/

    q_H = sub( 14, norm_s( H[0] ) );
    shift = sub( shl( q_H, 1 ), 6 );

@@ -458,6 +486,29 @@ void acelp_fast_fx(
        test();
        test();
        /* skip certain tracks if number of pulses is lower than number of tracks */
#ifdef OPT_2416_ACELP_FAST /* Just need a negative number, it doesn't need to be scaled */
        IF( EQ_16( nb_pulse, 2 ) && EQ_16( nb_tracks, NB_TRACK_FCB_4T ) )
        {
            max_track[NB_TRACK_FCB_4T - 3] = L_deposit_l( -1 );
            move32();
            max_track[NB_TRACK_FCB_4T - 1] = L_deposit_l( -1 );
            move32();
        }
        ELSE IF( EQ_16( nb_pulse, 3 ) && EQ_16( codetrackpos, TRACKPOS_FIXED_FIRST ) )
        {
            max_track[NB_TRACK_FCB_4T - 1] = L_deposit_l( -1 );
            move32();
        }

        FOR( q = 0; q < nb_tracks; q++ )
        {
            i = maximum_32_fx( max_track, nb_tracks, &L_tmp1 );
            track_order[q] = i;
            move16();
            max_track[i] = L_deposit_l( -1 );
            move32();
        }
#else
        IF( EQ_16( nb_pulse, 2 ) && EQ_16( nb_tracks, NB_TRACK_FCB_4T ) )
        {
            max_track[NB_TRACK_FCB_4T - 3] = L_shl( -1, Q_dn ); // Q_dn
@@ -479,6 +530,7 @@ void acelp_fast_fx(
            max_track[i] = L_shl( -1, Q_dn ); // Q_dn
            move32();
        }
#endif

        track_order[4] = track_order[1]; // Q0
        move16();
@@ -617,9 +669,13 @@ void acelp_fast_fx(
            move64();
            FOR( i = track; i < L_subfr; i += nb_tracks )
            {
#ifdef OPT_2416_ACELP_FAST
                temp3 = L_msu0( L_mult0( Gd, dn_orig[i] ), G, *alp_pos0 );
#else
                temp1 = L_mult0( Gd, dn_orig[i] );
                temp2 = L_mult0( G, *alp_pos0 );
                temp3 = L_sub( temp1, temp2 );
#endif
                dn[i] = L_shr( temp3, 6 );
                move32();
                alp_pos0 += nb_tracks;
@@ -627,8 +683,11 @@ void acelp_fast_fx(
            }
            exp1 = W_norm( s64 );
            dndn_fx = W_extract_h( W_shl( s64, exp1 ) ); // 2 * Q_dyn + exp1 - 31
#ifndef OPT_2416_ACELP_FAST
            dndn_e = sub( 31, sub( add( add( shl( Q_dn, 1 ), 1 ), exp1 ), 32 ) );

#else
            dndn_e = sub( 31, sub( add( add( shl( Q_dncn, 1 ), 1 ), exp1 ), 32 ) );
#endif
            IF( dndn_fx == 0 )
            {
                dndn_fx = 214748365 /* 0.1f in Q31 */;
@@ -645,10 +704,16 @@ void acelp_fast_fx(
            move16();
            m[1] = track; // Q0
            move16();
#ifdef OPT_2416_ACELP_FAST
            scale_temp1 = sub( Q16, exp1 );
            scale_temp2 = Q2;
            move16();
#else
            q_temp1 = add( add( sub( Q15, exp1 ), q_cn ), 1 );
            q_temp2 = add( Q_dn, Q2 );
            scale_temp1 = sub( q_temp1, Q_dn );
            scale_temp2 = sub( q_temp2, Q_dn );
#endif
            FOR( i = track; i < L_subfr; i += nb_tracks )
            {
                temp1 = L_mult( s_coef_fx, cn[i] );   // Q(15 - tmp)+q_cn+1
@@ -683,7 +748,11 @@ void acelp_fast_fx(
            Gn = add( Gn, i_mult( s[1], dn_orig[m[1]] ) ); // Q_dn
            Gd32 = Gd;
            move16();
#ifdef OPT_2416_ACELP_FAST
            Gd32 = L_add( Gd32, L_mac0( alp[0], i_mult( shl( s[0], 1 ), s[1] ), alp[m[0] - m[1]] ) ); // Q6
#else
            Gd32 = L_add( Gd32, L_add( alp[0], L_mult0( i_mult( shl( s[0], 1 ), s[1] ), alp[m[0] - m[1]] ) ) ); // Q6
#endif
            G = Gn; // Q_dn
            move16();
            G1 = i_mult( G, s[1] ); // Q_dn
@@ -697,10 +766,15 @@ void acelp_fast_fx(
            FOR( i = track; i < L_subfr; i += nb_tracks )
            {
                temp1 = imult3216( Gd32, dn_orig[i] );
#ifdef OPT_2416_ACELP_FAST
                temp4 = L_msu0( temp1, G, *alp_pos0 );
                temp4 = L_msu0( temp4, G1, *alp_pos1 );
#else
                temp2 = L_mult0( G, *alp_pos0 );
                temp3 = L_mult0( G1, *alp_pos1 );
                temp4 = L_sub( temp1, temp2 );
                temp4 = L_sub( temp4, temp3 );
#endif
                dn[i] = L_shr( temp4, 6 );
                move32();
                alp_pos0 += nb_tracks;
@@ -720,9 +794,15 @@ void acelp_fast_fx(
            Gn = add( Gn, i_mult( s[2], dn_orig[m[2]] ) ); // Q_dn
            temp1 = alp[0];
            move32();
#ifdef OPT_2416_ACELP_FAST
            temp2 = L_mac0( temp1, i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] );
            temp3 = L_mac0( temp2, i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] );
            Gd32 = L_add( Gd32, temp3 ); // Q6
#else
            temp2 = L_mult0( i_mult( shl( s[0], 1 ), s[2] ), alp[m[0] - m[2]] );
            temp3 = L_mult0( i_mult( shl( s[1], 1 ), s[2] ), alp[m[1] - m[2]] );
            Gd32 = L_add( Gd32, L_add( L_add( temp1, temp2 ), temp3 ) ); // Q6
#endif
            G = Gn; // Q_dn
            move16();
            G1 = i_mult( G, s[1] ); // Q_dn
@@ -739,12 +819,18 @@ void acelp_fast_fx(
            {

                temp1 = imult3216( Gd32, dn_orig[i] );
#ifdef OPT_2416_ACELP_FAST
                temp5 = L_msu0( temp1, G, *alp_pos0 );
                temp5 = L_msu0( temp5, G1, *alp_pos1 );
                temp5 = L_msu0( temp5, G2, *alp_pos2 );
#else
                temp2 = L_mult0( G, *alp_pos0 );
                temp3 = L_mult0( G1, *alp_pos1 );
                temp4 = L_mult0( G2, *alp_pos2 );
                temp5 = L_sub( temp1, temp2 );
                temp5 = L_sub( temp5, temp3 );
                temp5 = L_sub( temp5, temp4 );
#endif
                dn[i] = L_shr( temp5, 6 );
                move32();
                alp_pos0 += nb_tracks;
@@ -770,11 +856,19 @@ void acelp_fast_fx(
            Gn = add( Gn, i_mult( s[3], dn_orig[m[3]] ) ); // Q_dn
            temp1 = alp[0];
            move32();
#ifdef OPT_2416_ACELP_FAST
            temp2 = L_mac0( temp1, i_mult( shl( s[0], 1 ), s[3] ), alp[m[0] - m[3]] );
            temp2 = L_mac0( temp2, i_mult( shl( s[1], 1 ), s[3] ), alp[m[1] - m[3]] );
            temp2 = L_mac0( temp2, i_mult( shl( s[2], 1 ), s[3] ), alp[m[2] - m[3]] );

            Gd32 = L_add( Gd32, temp2 ); // Q6
#else
            temp2 = L_mult0( i_mult( shl( s[0], 1 ), s[3] ), alp[m[0] - m[3]] );
            temp3 = L_mult0( i_mult( shl( s[1], 1 ), s[3] ), alp[m[1] - m[3]] );
            temp4 = L_mult0( i_mult( shl( s[2], 1 ), s[3] ), alp[m[2] - m[3]] );

            Gd32 = L_add( Gd32, L_add( L_add( L_add( temp1, temp2 ), temp3 ), temp4 ) ); // Q6
#endif
            G = Gn;
            move16();               // Q_dn
            G1 = i_mult( G, s[1] ); // Q_dn
@@ -795,6 +889,12 @@ void acelp_fast_fx(
                FOR( i = track; i < L_subfr; i += nb_tracks )
                {
                    temp1 = imult3216( Gd32, dn_orig[i] );
#ifdef OPT_2416_ACELP_FAST
                    temp6 = L_msu0( temp1, G, *alp_pos0 );
                    temp6 = L_msu0( temp6, G1, *alp_pos1 );
                    temp6 = L_msu0( temp6, G2, *alp_pos2 );
                    temp6 = L_msu0( temp6, G3, *alp_pos3 );
#else
                    temp2 = L_mult0( G, *alp_pos0 );
                    temp3 = L_mult0( G1, *alp_pos1 );
                    temp4 = L_mult0( G2, *alp_pos2 );
@@ -803,6 +903,7 @@ void acelp_fast_fx(
                    temp6 = L_sub( temp6, temp3 );
                    temp6 = L_sub( temp6, temp4 );
                    temp6 = L_sub( temp6, temp5 );
#endif
                    dn[i] = L_shr( temp6, 6 );
                    move32();
                    alp_pos0 += nb_tracks;
@@ -824,6 +925,12 @@ void acelp_fast_fx(
                FOR( i = 0; i < L_subfr; i++ )
                {
                    temp1 = imult3216( Gd32, dn_orig[i] );
#ifdef OPT_2416_ACELP_FAST
                    temp6 = L_msu0( temp1, G, *alp_pos0 );
                    temp6 = L_msu0( temp6, G1, *alp_pos1 );
                    temp6 = L_msu0( temp6, G2, *alp_pos2 );
                    temp6 = L_msu0( temp6, G3, *alp_pos3 );
#else
                    temp2 = L_mult0( G, *alp_pos0 );
                    temp3 = L_mult0( G1, *alp_pos1 );
                    temp4 = L_mult0( G2, *alp_pos2 );
@@ -832,6 +939,7 @@ void acelp_fast_fx(
                    temp6 = L_sub( temp6, temp3 );
                    temp6 = L_sub( temp6, temp4 );
                    temp6 = L_sub( temp6, temp5 );
#endif
                    dn[i] = L_shr( temp6, 6 );
                    move16();
                    alp_pos0++;
@@ -885,6 +993,13 @@ void acelp_fast_fx(

        FOR( j = 0; j < nb_pulse; j++ )
        {
#ifdef OPT_2416_ACELP_FAST
            p_hn = h_inv - m[j];
            if ( s[j] > 0 )
            {
                p_hn = h - m[j];
            }
#else
            IF( s[j] > 0 )
            {
                p_hn = h - m[j];
@@ -893,7 +1008,7 @@ void acelp_fast_fx(
            {
                p_hn = h_inv - m[j];
            }

#endif
            FOR( i = 0; i < L_subfr; i++ )
            {
                y_tmp[i] = add_sat( y_tmp[i], *p_hn++ ); // q_H
@@ -906,8 +1021,11 @@ void acelp_fast_fx(
        s64 = W_mult0_32_32( crit_num, crit_num ); // 2*Q_dn
        exp = W_norm( s64 );
        crit_num = W_extract_h( W_shl( s64, exp ) ); // 2*Q_dn + exp - 32
#ifndef OPT_2416_ACELP_FAST
        q_crit_num = add( shl( Q_dn, 1 ), sub( exp, 32 ) );

#else
        q_crit_num = add( shl( Q_dncn, 1 ), sub( exp, 32 ) );
#endif
        // crit_den = sum2_fx( y_tmp, L_subfr );                          // 2*q_H
        s64 = 0;
        move64();
@@ -926,6 +1044,15 @@ void acelp_fast_fx(

        IF( GT_16( exp, exp1 ) )
        {
#ifdef OPT_2416_ACELP_FAST
            flag = 0;
            move16();
            if ( GE_32( L_shr( L_tmp1, sub( exp, exp1 ) ), L_tmp2 ) )
            {
                flag = 1;
                move16();
            }
#else
            IF( GE_32( L_shr( L_tmp1, sub( exp, exp1 ) ), L_tmp2 ) )
            {
                flag = 1;
@@ -936,9 +1063,19 @@ void acelp_fast_fx(
                flag = 0;
                move16();
            }
#endif
        }
        ELSE
        {
#ifdef OPT_2416_ACELP_FAST
            flag = 0;
            move16();
            if ( GE_32( L_tmp1, L_shr( L_tmp2, sub( exp1, exp ) ) ) )
            {
                flag = 1;
                move16();
            }
#else
            IF( GE_32( L_tmp1, L_shr( L_tmp2, sub( exp1, exp ) ) ) )
            {
                flag = 1;
@@ -949,9 +1086,9 @@ void acelp_fast_fx(
                flag = 0;
                move16();
            }
#endif
        }


        IF( flag )
        {
            crit_num_max = crit_num;
+8 −0
Original line number Diff line number Diff line
@@ -369,7 +369,11 @@ Word16 inov_encode_fx(
                        }
                        ELSE
                        {
#ifdef OPT_2416_ACELP_FAST
                            acelp_fast_fx( hBstr, nBits, dn, Qdn, cn, h2, code, y2, L_subfr );
#else
                            acelp_fast_fx( hBstr, nBits, dn, Qdn, cn, Qcn, h2, code, y2, L_subfr );
#endif
                        }
                    }
                    ELSE IF( ( EQ_16( st_fx->idchan, 1 ) && LE_16( st_fx->acelp_cfg.fixed_cdk_index[idx2], 7 ) ) || ( st_fx->idchan == 0 && LE_16( st_fx->acelp_cfg.fixed_cdk_index[idx2], 3 ) ) )
@@ -380,7 +384,11 @@ Word16 inov_encode_fx(
                        }
                        ELSE
                        {
#ifdef OPT_2416_ACELP_FAST
                            acelp_fast_fx( hBstr, st_fx->acelp_cfg.fixed_cdk_index[idx2], dn, Qdn, cn, h2, code, y2, L_SUBFR );
#else
                            acelp_fast_fx( hBstr, st_fx->acelp_cfg.fixed_cdk_index[idx2], dn, Qdn, cn, Qcn, h2, code, y2, L_SUBFR );
#endif
                        }
                    }
                    ELSE