Commit e957073d authored by Fabian Bauer's avatar Fabian Bauer Committed by Manuel Jander
Browse files

cleanup useless speedup macros

parent cdddf5d2
Loading
Loading
Loading
Loading
+14 −108
Original line number Diff line number Diff line
@@ -47,22 +47,16 @@

// MHZ NUMBERS:
// NULL: 178.407
// ALL: 169.499
// ALL: 169.499 77 (170.650 wo 17)


#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                      --> USE
#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                      --> USE
#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx                //  .1 WMOPS                      --> USE
#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                      --> USE
#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs        --> USE
#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails   --> DONTUSEYET
#define FIX_1326_SPEEDUP_10 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails   --> DONTUSEYET
#define FIX_1326_SPEEDUP_11 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails   --> DONTUSEYET
#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          // 2.9 WMOPS                      --> USE

#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd        //  .1 WMOPS                      --> USE
#define FIX_1326_SPEEDUP_16 // tiny speedup like 04             //  .2 WMOPS                      --> USE
//#define FIX_1326_SPEEDUP_17 // use 1/x                          // 1.25WMOPS                      --> USE
#define FIX_1326_SPEEDUP_18 // structural speedup               // 1   WMOPS                      --> USE

Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };
@@ -1317,7 +1311,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
    }
    pop_wmops(); /*push_wmops( "IDRBCM apply EQ_low" );*/

    push_wmops( "IDRBCM target matrix" );
    push_wmops( "IDRBCM target matrix (IDRBCMtm)" );
    /* Determine target covariance matrix containing target binaural properties */
    FOR( bin = 0; bin < nBins; bin++ )
    {
@@ -1343,6 +1337,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
        meanEnePerCh_fx = Mpy_32_32( hDiracDecBin->earlyPartEneCorrection_fx[bin], subFrameTotalEne_fx[bin] ); // Q( q_meanEnePerCh )
        q_meanEnePerCh = add( sub( q_earlyPartEneCorrection, subFrameTotalEne_e[bin] ), 1 );                   // q_earlyPartEneCorrection + 31 - subFrameTotalEne_e[bin] - 31 + Q1(0.5f)
        /* Determine direct part target covariance matrix (for 1 or 2 directions) */
        push_wmops( "IDRBCMtm LOOP1" );
        FOR( dirIndex = 0; dirIndex < hSpatParamRendCom->numSimultaneousDirections; dirIndex++ )
        {
            Word16 aziDeg, eleDeg;
@@ -1421,6 +1416,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                diffuseness_fx = 0;
                move32();
            }

            IF( isIsmDirection )
            {
                /* Objects cause lesser decorrelation reduction, to avoid removing all decorrelation when only objects are present */
@@ -1430,7 +1426,6 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
            {
                diffusenessValForDecorrelationReduction_fx = L_sub( diffusenessValForDecorrelationReduction_fx, ratio_fx ); /*Q30*/
            }

            IF( separateCenterChannelRendering )
            {
                /* In masa + mono rendering mode, the center directions originate from phantom sources, so the
@@ -1477,14 +1472,12 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                Word32 hrtfEneCenter_fx, hrtfEneSides_fx, hrtfEneRealized_fx;
                Word16 eneCorrectionFactor_fx, eneCorrectionFactor_e;
                Word16 w1_fx, w2_fx, w3_fx, eq_fx;
#ifdef FIX_1326_SPEEDUP_15
                hrtfEneCenter_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ), lImagp_fx, lImagp_fx ), lRealp_fx, lRealp_fx ); // Q25
#else

                hrtfEneCenter_fx = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ),                               // Q25
                                          L_add( Mpy_32_32( lImagp_fx, lImagp_fx ),                        // Q25
                                                 L_add( Mpy_32_32( rRealp_fx, rRealp_fx ),                 // Q25
                                                        Mpy_32_32( rImagp_fx, rImagp_fx ) ) ) );           // Q25
#endif

                /* Spread coherence is synthesized as coherent sources at 30 degree horizontal spacing.
                 * The following formulas determine the gains for these sources.
                 * spreadCoh = 0: Only panning
@@ -1513,14 +1506,12 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric

                /* Apply the gain for the left source of the three coherent sources */
                getDirectPartGains_fx( bin, add( aziDeg, 30 ), eleDeg, &lRealpTmp_fx, &lImagpTmp_fx, &rRealpTmp_fx, &rImagpTmp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex + 1], isHeadtracked );
#ifdef FIX_1326_SPEEDUP_15
                hrtfEneSides_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), rImagpTmp_fx, rImagpTmp_fx ), lImagpTmp_fx, lImagpTmp_fx ), lRealpTmp_fx, lRealpTmp_fx ); // Q25
#else

                hrtfEneSides_fx = L_add( Mpy_32_32( lRealpTmp_fx, lRealpTmp_fx ),                          // Q25
                                         L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ),                   // Q25
                                                L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ),            // Q25
                                                       Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) );      // Q25
#endif

                lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) ); // Q25
                lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) ); // Q25
                rRealp_fx = L_add( rRealp_fx, Mpy_32_32( sidesMul_fx, rRealpTmp_fx ) ); // Q25
@@ -1608,21 +1599,12 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                move16();
            }

#ifdef FIX_1326_SPEEDUP_15
            hrtfEne_fx[0] = Madd_32_32( Mpy_32_32( lRealp_fx, lRealp_fx ), lImagp_fx, lImagp_fx ); // Q( 2*q_lr - 31 )
            hrtfEne_fx[1] = Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ); // Q( 2*q_lr - 31 )
            move32();
            move32();
            hrtfCrossRe_fx = Madd_32_32( Mpy_32_32( lRealp_fx, rRealp_fx ), lImagp_fx, rImagp_fx );  // Q( 2*q_lr - 31 )
            hrtfCrossIm_fx = Madd_32_32( Mpy_32_32( -lImagp_fx, rRealp_fx ), lRealp_fx, rImagp_fx ); // Q( 2*q_lr - 31 )
#else
            hrtfEne_fx[0] = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), Mpy_32_32( lImagp_fx, lImagp_fx ) ); // Q( 2*q_lr - 31 )
            hrtfEne_fx[1] = L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), Mpy_32_32( rImagp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 )
            move32();
            move32();
            hrtfCrossRe_fx = L_add( Mpy_32_32( lRealp_fx, rRealp_fx ), Mpy_32_32( lImagp_fx, rImagp_fx ) );  // Q( 2*q_lr - 31 )
            hrtfCrossIm_fx = L_add( Mpy_32_32( -lImagp_fx, rRealp_fx ), Mpy_32_32( lRealp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 )
#endif

            /* Add direct part (1 or 2) covariance matrix */
            dirEne_fx = Mpy_32_32( ratio_fx, meanEnePerCh_fx ); // Q(q_meanEnePerCh - 1)
@@ -1639,6 +1621,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
            move32();
            move32();
        }
        pop_wmops(); //push_wmops( "IDRBCMtm LOOP1" );

        /* Add diffuse / ambient part covariance matrix */
        diffuseness_fx = L_max( 0, diffuseness_fx );               // Q30
@@ -1697,11 +1680,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
            }
            ELSE
            {
#ifdef FIX_1326_SPEEDUP_15
                hDiracDecBin->ChCrossReOut_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossReOut_e[bin], Mpy_32_32( Madd_32_16( L_shl( surCoh_fx, 16 ), hDiracDecBin->diffuseFieldCoherence_fx[bin], sub( 32767, surCoh_fx ) ), diffEne_fx ), sub( 31, q_diffEne ), &hDiracDecBin->ChCrossReOut_e[bin] );
#else
                hDiracDecBin->ChCrossReOut_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossReOut_e[bin], Mpy_32_32( L_add( Mpy_32_16_1( hDiracDecBin->diffuseFieldCoherence_fx[bin], sub( 32767, surCoh_fx ) ), L_shl( surCoh_fx, 16 ) ), diffEne_fx ), sub( 31, q_diffEne ), &hDiracDecBin->ChCrossReOut_e[bin] );
#endif
            }
            move32();
        }
@@ -1717,7 +1696,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
        hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29
        move32();
    }
    pop_wmops(); /*push_wmops( "IDRBCM target matrix" );*/
    pop_wmops(); /*push_wmops( "IDRBCM target matrix (IDRBCMtm)" );;*/

    test();
    /* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */
@@ -2165,14 +2144,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
        exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 );
        tmp2 = L_add( L_shl( resultMtxRe_fx[0][0], exp ), L_shl( resultMtxRe_fx[1][1], exp ) );
        q_tmp2 = add( q_res, exp );
#ifdef FIX_1326_SPEEDUP_11
        {
            Word16 shift1 = s_max( 0, sub( q_tmp2, q_CrEne ) );
            Word16 shift2 = s_max( 0, sub( q_CrEne, q_tmp2 ) );
            realizedOutputEne_fx = L_add( L_shr( tmp1, shift2 ), L_shr( tmp2, shift1 ) );
            q_realizedOutputEne = s_min( q_CrEne, q_tmp2 );
        }
#else

        IF( LT_16( q_CrEne, q_tmp2 ) )
        {
            realizedOutputEne_fx = L_add( tmp1, L_shr( tmp2, sub( q_tmp2, q_CrEne ) ) );
@@ -2185,7 +2157,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
            q_realizedOutputEne = q_tmp2;
            move16();
        }
#endif

        exp = sub( get_min_scalefactor( hDiracDecBin->ChEneOut_fx[0][bin], hDiracDecBin->ChEneOut_fx[1][bin] ), 1 );
        targetOutputEne_fx = L_add( L_shl( hDiracDecBin->ChEneOut_fx[0][bin], exp ), L_shl( hDiracDecBin->ChEneOut_fx[1][bin], exp ) );
        q_targetOutputEne = add( hDiracDecBin->q_ChEneOut, exp );
@@ -3351,21 +3323,7 @@ static void eig2x2_fx(
#endif

    /* Numeric case, when input is near an identity matrix with a gain */
#ifdef FIX_1326_SPEEDUP_03
    tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31

    IF( LT_32( pm_fx, L_shl_sat( tmp1, sub( q_tmp1, q_tmp2 ) ) ) )
    {
        Ure_fx[0][0] = ONE_IN_Q30;
        move32();
        Ure_fx[1][1] = ONE_IN_Q30;
        move32();
        *q_U = Q30;
        move16();

        return;
    }
#else
    tmp1 = Mpy_32_32( 2147484, add_fx ); // 2147484 = 1e-3f in Q31

    IF( LT_16( q_tmp1, q_tmp2 ) )
@@ -3396,7 +3354,6 @@ static void eig2x2_fx(
            return;
        }
    }
#endif

    q_U_1 = 0;
    q_U_2 = 0;
@@ -3450,11 +3407,7 @@ static void eig2x2_fx(
#endif

#if 1
#ifdef FIX_1326_SPEEDUP_17
            tmp2 = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, tmp3, &exp );
#else
            tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
#endif
            exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
            normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
            q_tmp2 = sub( 31, exp );
@@ -3542,11 +3495,7 @@ static void eig2x2_fx(
#endif

#if 1
#ifdef FIX_1326_SPEEDUP_17
            tmp2 = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, tmp3, &exp );
#else
            tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );
#endif
            exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) );
            normVal_fx = Sqrt32( tmp2, &exp ); // q_tmp2
            q_tmp2 = sub( 31, exp );
@@ -4214,13 +4163,7 @@ static void formulate2x2MixingMatrix_fx(
    }
    ELSE
    {
        push_wmops( "formulate2x2MixingMatrix Division" );
#ifdef FIX_1326_SPEEDUP_17
        maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, maxEne_fx, &exp );
#else
        maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxEne_fx, &exp );
#endif
        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
        q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) );
    }
    exp = norm_l( maxEneDiv_fx );
@@ -4298,9 +4241,7 @@ static void formulate2x2MixingMatrix_fx(
    ELSE
    {
        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
        push_wmops( "formulate2x2MixingMatrix Division" );
        temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp );
        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
        exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) );
        Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
    }
@@ -4335,9 +4276,7 @@ static void formulate2x2MixingMatrix_fx(
            a++;
        }
        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
        push_wmops( "formulate2x2MixingMatrix Division" );
        temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
        exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
        Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
    }
@@ -4466,24 +4405,19 @@ static void formulate2x2MixingMatrix_fx(
#endif
    pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/


    IF( D_fx[0] == 0 )
    {
        temp = ONE_DIV_EPSILON_MANT; /* Result of 1.0/eps with full precision */
        move32();
        exp = ONE_DIV_EPSILON_EXP;
        move16();
#endif
    }
    ELSE
    {
        push_wmops( "formulate2x2MixingMatrix Division" );
#ifdef FIX_1326_SPEEDUP_17
        temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, D_fx[0], &exp );
#else

        temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
#endif
        exp = sub( exp, sub( Q30, q_D ) );
        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
    }
    div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
    move32();
@@ -4514,13 +4448,7 @@ static void formulate2x2MixingMatrix_fx(
    }
    ELSE
    {
        push_wmops( "formulate2x2MixingMatrix Division" );
#ifdef FIX_1326_SPEEDUP_17
        temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, D_fx[1], &exp1 );
#else
        temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[1], &exp1 );
#endif
        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
        exp1 = sub( exp1, sub( Q30, q_D ) );
    }
    div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
@@ -4535,17 +4463,6 @@ static void formulate2x2MixingMatrix_fx(


    // 1310720000 = 10,000.0f in Q17
#ifdef FIX_1326_SPEEDUP_09
    {
        Word16 shift1 = s_max( sub( Q17, q_div ), 0 );
        Word16 shift2 = s_max( sub( q_div, Q17 ), 0 );

        div_fx[0] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[0], shift2 ) ); // q_div
        move32();
        div_fx[1] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[1], shift2 ) ); // q_div
        move32();
    }
#else
    IF( LT_16( q_div, Q17 ) )
    {
        div_fx[0] = L_min( L_shr( 1310720000, sub( Q17, q_div ) ), div_fx[0] ); // q_div
@@ -4562,7 +4479,6 @@ static void formulate2x2MixingMatrix_fx(
        q_div = Q17;
        move16();
    }
#endif

    matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp );

@@ -4579,11 +4495,7 @@ static void formulate2x2MixingMatrix_fx(
            W_tmp = W_mult0_32_32( tmpRe_fx[chA][chB], div_fx[chB] );
            IF( W_tmp != 0 )
            {
#ifdef FIX_1326_SPEEDUP_10
                hdrm_re[chA][chB] = W_norm( W_tmp );
#else
                hdrm_re[chA][chB] = sub( W_norm( W_tmp ), 0 );
#endif
                move16();
                W_tmp = W_shl( W_tmp, hdrm_re[chA][chB] );
                tmpRe_fx[chA][chB] = W_extract_h( W_tmp );
@@ -4710,13 +4622,7 @@ static void formulate2x2MixingMatrix_fx(
        {
            Word16 Pre_shift, Pim_shift;
            temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
            push_wmops( "formulate2x2MixingMatrix Division" );
#ifdef FIX_1326_SPEEDUP_17
            temp = BASOP_Util_Divide3232_Scale_cadence_1( ONE_IN_Q30, temp, &exp );
#else
            temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp );
#endif
            pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
            q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp );

            Pre_shift = norm_l( Pre_fx[0][chB] );