Commit f9c4d5e1 authored by Sandesh Venkatesh's avatar Sandesh Venkatesh
Browse files

Few bug fixes, performance optimizations

parent e0d6e5bb
Loading
Loading
Loading
Loading
Loading
+34 −17
Original line number Diff line number Diff line
@@ -966,38 +966,57 @@ void computeDiffuseness_fixed(
        p_tmp_c = buffer_energy + i * num_freq_bands;

        q_tmp = add( q_factor_energy[i], min_q_shift1 );

        IF( LT_16( q_tmp, q_ene ) )
        {
            Word16 shift_q = sub( q_ene, q_tmp );
            FOR( k = 0; k < num_freq_bands; k++ )
            {
                tmp = L_shl( p_tmp_c[k], min_q_shift1 );
            IF( LT_16( q_tmp, q_ene ) )
            {
                energy_slow[k] = L_add( L_shr( energy_slow[k], sub( q_ene, q_tmp ) ), tmp );
                energy_slow[k] = L_add( L_shr( energy_slow[k], shift_q ), tmp );
                move32();
            }
        }
        ELSE
        {
                energy_slow[k] = L_add( energy_slow[k], L_shr( tmp, sub( q_tmp, q_ene ) ) );
            Word16 shift_q = sub( q_tmp, q_ene );
            FOR( k = 0; k < num_freq_bands; k++ )
            {
                tmp = L_shl( p_tmp_c[k], min_q_shift1 );
                energy_slow[k] = L_add( energy_slow[k], L_shr( tmp, shift_q ) );
                move32();
            }
        }

        q_ene = s_min( q_ene, q_tmp );

        /* Intensity slow */
        q_tmp = add( q_factor_intensity[i], min_q_shift2 );

        IF( LT_16( q_intensity, q_tmp ) )
        {
            Word16 shift_q = sub( q_tmp, q_intensity );
            FOR( j = 0; j < DIRAC_NUM_DIMS; ++j )
            {
                p_tmp = buffer_intensity[j][i];
                FOR( k = 0; k < num_freq_bands; k++ )
                {
                    tmp = L_shl( p_tmp[k], min_q_shift2 );
                IF( LT_16( q_intensity, q_tmp ) )
                {
                    intensity_slow[j * num_freq_bands + k] = L_add( intensity_slow[j * num_freq_bands + k], L_shr( tmp, sub( q_tmp, q_intensity ) ) );
                    intensity_slow[j * num_freq_bands + k] = L_add( intensity_slow[j * num_freq_bands + k], L_shr( tmp, shift_q ) );
                    move32();
                }
            }
        }
        ELSE
        {
                    intensity_slow[j * num_freq_bands + k] = L_add( L_shr( intensity_slow[j * num_freq_bands + k], sub( q_intensity, q_tmp ) ), tmp );
            Word16 shift_q = sub( q_intensity, q_tmp );
            FOR( j = 0; j < DIRAC_NUM_DIMS; ++j )
            {
                p_tmp = buffer_intensity[j][i];
                FOR( k = 0; k < num_freq_bands; k++ )
                {
                    tmp = L_shl( p_tmp[k], min_q_shift2 );
                    intensity_slow[j * num_freq_bands + k] = L_add( L_shr( intensity_slow[j * num_freq_bands + k], shift_q ), tmp );
                    move32();
                }
            }
@@ -1017,9 +1036,7 @@ void computeDiffuseness_fixed(

        FOR( k = 0; k < num_freq_bands; k++ )
        {
            p_tmp[k] = Mpy_32_32( p_tmp[k], p_tmp[k] );
            move32();
            intensity_slow_abs[k] = L_add( intensity_slow_abs[k], p_tmp[k] );
            intensity_slow_abs[k] = Madd_32_32( intensity_slow_abs[k], p_tmp[k], p_tmp[k] );
            move32();
        }
    }
+15 −4
Original line number Diff line number Diff line
@@ -209,16 +209,27 @@ ivas_error ivas_cpe_enc_fx(
    Copy32( data_fx_ch0, sts[0]->input32_fx, input_frame ); // Q(q_data_fx)
    sts[0]->q_inp32 = q_data_fx;
    move16();
    Copy_Scale_sig32_16( sts[0]->input32_fx, sts[0]->input_fx, input_frame, sub( Q16, q_data_fx ) ); // Q(q_data_fx) -> Q0
    sts[0]->q_inp = 0;
    Word16 norm = L_norm_arr( sts[0]->input32_fx, input_frame );
    scale_sig32( sts[0]->input32_fx, input_frame, norm );
    sts[0]->q_inp32 = add( sts[0]->q_inp32, norm );
    move16();

    Copy_Scale_sig32_16( sts[0]->input32_fx, sts[0]->input_fx, input_frame, 0 );
    sts[0]->q_inp = sub( sts[0]->q_inp32, Q16 );
    move16();
    IF( data_fx_ch1 != NULL ) /*this may happen for cases with odd number of channels*/
    {
        Copy32( data_fx_ch1, sts[1]->input32_fx, input_frame ); // Q(q_data_fx)
        sts[1]->q_inp32 = q_data_fx;
        move16();
        Copy_Scale_sig32_16( sts[1]->input32_fx, sts[1]->input_fx, input_frame, sub( Q16, q_data_fx ) ); // Q(q_data_fx) -> Q0
        sts[1]->q_inp = 0;

        norm = L_norm_arr( sts[1]->input32_fx, input_frame );
        scale_sig32( sts[1]->input32_fx, input_frame, norm );
        sts[1]->q_inp32 = add( sts[1]->q_inp32, norm );
        move16();

        Copy_Scale_sig32_16( sts[1]->input32_fx, sts[1]->input_fx, input_frame, 0 );
        sts[1]->q_inp = sub( sts[1]->q_inp32, Q16 );
        move16();
    }

+10 −19
Original line number Diff line number Diff line
@@ -67,7 +67,7 @@ static void unclr_calc_corr_features_fx(
#define XH_BIAS_FX_Q15            13107
#define XL_WIDTH_FX_Q15           3932
#define XH_WIDTH_FX_Q15           4915
#define SMOOTH_DIST_FACTOR_FX_Q15 14418
#define SMOOTH_DIST_FACTOR_FX_Q15 13107
#define A_BIAS_FX_Q15             3277
#define B_BIAS_FX_Q15             -16384
#define A_WIDTH_FX_Q31            32212264
@@ -840,23 +840,13 @@ static void corrStatsEst_fx(
    scale_sig32( corrEst_fx, 2 * L_NCSHIFT_DS + 1, temp ); /* Q31-corrEst_exp */
    corrEst_exp = sub( corrEst_exp, temp );

    IF( GT_16( corrEst_exp, hStereoTCA->corrEstPrev_exp ) )
    {
        scale_sig32( hStereoTCA->corrEstPrev_fx[0], 2 * L_NCSHIFT_DS + 1, sub( hStereoTCA->corrEstPrev_exp, corrEst_exp ) ); /* Q31-hStereoTCA->corrEstPrev_exp */
        scale_sig32( hStereoTCA->corrEstPrev_fx[1], 2 * L_NCSHIFT_DS + 1, sub( hStereoTCA->corrEstPrev_exp, corrEst_exp ) ); /* Q31-hStereoTCA->corrEstPrev_exp */
    Copy32( corrEst_fx, hStereoTCA->corrEstPrev_fx[2], tempLen ); /* Q31-corrEst_exp */
    hStereoTCA->corrEstPrev_exp = corrEst_exp;
    move16();
    }
    ELSE
    {
        Copy32( corrEst_fx, hStereoTCA->corrEstPrev_fx[2], tempLen );                                                        /* Q31-corrEst_exp */
        scale_sig32( hStereoTCA->corrEstPrev_fx[2], 2 * L_NCSHIFT_DS + 1, sub( corrEst_exp, hStereoTCA->corrEstPrev_exp ) ); /* Q31-corrEst_exp */
    }
    scale_sig32( hStereoTCA->corrEstPrev_fx[0], 2 * L_NCSHIFT_DS + 1, -1 ); /* Q31-hStereoTCA->corrEstPrev_exp-1 */
    scale_sig32( hStereoTCA->corrEstPrev_fx[1], 2 * L_NCSHIFT_DS + 1, -1 ); /* Q31-hStereoTCA->corrEstPrev_exp-1 */
    scale_sig32( hStereoTCA->corrEstPrev_fx[2], 2 * L_NCSHIFT_DS + 1, -1 ); /* Q31-hStereoTCA->corrEstPrev_exp-1 */
    hStereoTCA->corrEstPrev_exp = add( hStereoTCA->corrEstPrev_exp, 1 );
    Word16 gb = find_guarded_bits_fx( 2 * L_NCSHIFT_DS + 1 );

    scale_sig32( hStereoTCA->corrEstPrev_fx[2], 2 * L_NCSHIFT_DS + 1, -gb ); /* Q31-hStereoTCA->corrEstPrev_exp-1 */
    hStereoTCA->corrEstPrev_exp = add( hStereoTCA->corrEstPrev_exp, gb );
    move16();
    Word32 buf1_fx_temp[L_FRAME_DS];
    Word32 buf2_fx_temp[L_FRAME_DS];
@@ -1032,7 +1022,8 @@ static void corrStatsEst_fx(
        loc_weight_win_fx[i] = win_bias_fx; // Q15
        move16();
    }

    reg_prv_corr_fx = L_shr( reg_prv_corr_fx, 1 );
    reg_prv_corr_exp = add( reg_prv_corr_exp, 1 );
    Word16 x = TRUNC_FX( reg_prv_corr_fx, reg_prv_corr_exp ); /* Q0 */
    move16();
    for ( i = 0, j = ( L_NCSHIFT_DS - x ); i < 2 * L_NCSHIFT_DS + 1; i++, j++ )
+6 −56
Original line number Diff line number Diff line
@@ -2516,64 +2516,14 @@ static Word16 in_tri_fx(
    move32();
    matInv[1][1] = Mpy_32_32( tmpDot1[0], invFactor ); // q=22+invFactor_exp
    move32();

    /* Computing S (Q13 + matInv_exp_final[i] + P_minus_A_exp_final + invFactor_exp - 1 ) =
    matInv (Q22 + matInv_exp_final[i] + invFactor_exp) *(P-A) (Q22 + P_minus_A_exp_final) */
    Word16 matInv_exp[2][2], P_minus_A_exp[2];
    Word16 matInv_exp_final[2], P_minus_A_exp_final;
    FOR( Word32 i = 0; i < 2; i++ )
    {
        FOR( Word32 j = 0; j < 2; j++ )
        {
            matInv_exp[i][j] = 31;
            move16();
            IF( matInv[i][j] != 0 )
            {
                matInv_exp[i][j] = norm_l( matInv[i][j] );
                move16();
            }
        }
        matInv_exp_final[i] = s_min( matInv_exp[i][0], matInv_exp[i][1] );
        move16();
        P_minus_A_exp[i] = 31;
        move16();
        IF( P_minus_A[i] != 0 )
        {
            P_minus_A_exp[i] = norm_l( P_minus_A[i] );
            move16();
        }
    }
    P_minus_A_exp_final = s_min( P_minus_A_exp[0], P_minus_A_exp[1] );

    S[0] = L_add( L_shr( Mpy_32_32( L_shl( matInv[0][0], matInv_exp_final[0] ), L_shl( P_minus_A[0], P_minus_A_exp_final ) ), Q1 ),
                  L_shr( Mpy_32_32( L_shl( matInv[0][1], matInv_exp_final[0] ), L_shl( P_minus_A[1], P_minus_A_exp_final ) ), Q1 ) ); //(22+invFactor_exp+matInv_exp_final[0]+22+P_minus_A_exp_final-1)-31=>12+invFactor_exp+matInv_exp_final[0]+P_minus_A_exp_final
    move64();
    S[1] = L_add( L_shr( Mpy_32_32( L_shl( matInv[1][0], matInv_exp_final[1] ), L_shl( P_minus_A[0], P_minus_A_exp_final ) ), Q1 ),
                  L_shr( Mpy_32_32( L_shl( matInv[1][1], matInv_exp_final[1] ), L_shl( P_minus_A[1], P_minus_A_exp_final ) ), Q1 ) ); //(22+invFactor_exp+matInv_exp_final[1]+22+P_minus_A_exp_final-1)-31=>12+invFactor_exp+matInv_exp_final[0]+P_minus_A_exp_final
    S[0] = W_add( W_mult_32_32( matInv[0][0], P_minus_A[0] ), W_mult_32_32( matInv[0][1], P_minus_A[1] ) ); // Q22+invFactor_exp +Q22
    move64();

    /* Checking if we are in the triangle; For the theory, check Christian Borss article, section 3.2 */
    // Q32 S
    IF( sub( sub( sub( Q20, matInv_exp_final[0] ), P_minus_A_exp_final ), invFactor_exp ) < 0 )
    {
        S[0] = W_shr( S[0], sub( add( add( matInv_exp_final[0], P_minus_A_exp_final ), invFactor_exp ), Q20 ) ); // q32
        move64();
    }
    ELSE
    {
        S[0] = W_shl( S[0], sub( sub( sub( Q20, matInv_exp_final[0] ), P_minus_A_exp_final ), invFactor_exp ) ); // q32
    S[0] = W_shr( S[0], add( 13, invFactor_exp ) ); // q32
    move64();
    }
    IF( sub( sub( sub( Q20, matInv_exp_final[1] ), P_minus_A_exp_final ), invFactor_exp ) < 0 )
    {
        S[1] = W_shr( S[1], sub( add( add( matInv_exp_final[1], P_minus_A_exp_final ), invFactor_exp ), Q20 ) ); // q32
    S[1] = W_add( W_mult_32_32( matInv[1][0], P_minus_A[0] ), W_mult_32_32( matInv[1][1], P_minus_A[1] ) );
    move64();
    }
    ELSE
    {
        S[1] = W_shl( S[1], sub( sub( sub( Q20, matInv_exp_final[1] ), P_minus_A_exp_final ), invFactor_exp ) ); // q32
    S[1] = W_shr( S[1], add( 13, invFactor_exp ) ); // q32
    move64();
    }

    test();
    test();