Commit ed23bfb9 authored by Fabian Bauer's avatar Fabian Bauer Committed by Manuel Jander
Browse files

activate speedup 09 10 11 for testing

parent 84ba3767
Loading
Loading
Loading
Loading
+60 −20
Original line number Diff line number Diff line
@@ -45,16 +45,25 @@

#include "wmc_auto.h"

//#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream
#define FIX_1326_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx :  .4 WMOPS
#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx //  .3 WMOPS
#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx //  .1 WMOPS
#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx //  .2 WMOPS
// MHZ NUMBERS:
// NULL: 179.292


//#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams     //no occurence        --> DONT USE
//#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                    --> USE
//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                    --> USE
//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx                //  .1 WMOPS                    --> USE
//#define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx                //  .2 WMOPS                    --> USE
//#define FIX_1326_SPEEDUP_05 // div->sqrt =>isqrt                // 3.5 WMOPS //Quite bad diffs  --> DONT USE
//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt                // 3.0 WMOPS //Quite bad diffs  --> DONT USE
#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt //  ?  WMOPS //Big DIffs , no replacement of divSqrt , PIPELINE GREEN, --> USE
#define FIX_1326_SPEEDUP_08 // "-"               // 3.0 WMOPS //small diffs, PIPELINE GREEN! -- > USE
#define FIX_1326_SPEEDUP_09 // Relocate matrixMul
//#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt                //  0  WMOPS                    --> DONT USE
//#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs      --> USE
#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS                    
#define FIX_1326_SPEEDUP_10 // tiny speedup                    //   .1 WMOPS 
#define FIX_1326_SPEEDUP_11  // tiny speedup                    //  .1 WMOPS
//#define FIX_1326_SPEEDUP_12 // tiny speedup                     //  <.1 WMOPS                 -->DONTUSE
//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          //   2.9 WMOPS
//#define FIX_1326_SPEEDUP_14 // 
Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };

/*-------------------------------------------------------------------------
@@ -2142,6 +2151,14 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
        exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 );
        tmp2 = L_add( L_shl( resultMtxRe_fx[0][0], exp ), L_shl( resultMtxRe_fx[1][1], exp ) );
        q_tmp2 = add( q_res, exp );
#ifdef FIX_1326_SPEEDUP_11
        {
            Word16 shift1 = s_max( 0, sub( q_tmp2, q_CrEne ) );
            Word16 shift2 = s_max( 0, sub( q_CrEne, q_tmp2 ) );
            realizedOutputEne_fx = L_add( L_shr( tmp1, shift2 ), L_shr( tmp2, shift1 ) );
            q_realizedOutputEne = s_min( q_CrEne, q_tmp2 );
        }
#else
        IF( LT_16( q_CrEne, q_tmp2 ) )
        {
            realizedOutputEne_fx = L_add( tmp1, L_shr( tmp2, sub( q_tmp2, q_CrEne ) ) );
@@ -2154,7 +2171,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
            q_realizedOutputEne = q_tmp2;
            move16();
        }

#endif
        exp = sub( get_min_scalefactor( hDiracDecBin->ChEneOut_fx[0][bin], hDiracDecBin->ChEneOut_fx[1][bin] ), 1 );
        targetOutputEne_fx = L_add( L_shl( hDiracDecBin->ChEneOut_fx[0][bin], exp ), L_shl( hDiracDecBin->ChEneOut_fx[1][bin], exp ) );
        q_targetOutputEne = add( hDiracDecBin->q_ChEneOut, exp );
@@ -2177,9 +2194,17 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
            q_missingOutputEne = q_targetOutputEne;
            move16();
        }

        tmp1 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), missingOutputEne_fx, sub( 31, q_missingOutputEne ), &exp1 );

#ifdef FIX_1326_SPEEDUP_13
        {
            Word16 exp_temp;
            tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
            tmp2 = ISqrt32( tmp2, &exp_temp );
            gain_fx = Mpy_32_32(tmp2, Sqrt32(tmp1, &exp1));
            q_gain = sub( 31, add( exp_temp, exp1 ) );
        }
#else
        {
            Word16 exp_temp;
            tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
@@ -2188,6 +2213,8 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
        }
        gain_fx = Sqrt32( tmp2, &exp2 );
        q_gain = sub( 31, exp2 );
#endif
     

        // 1073741824 = 4 in Q28
        IF( LT_16( q_gain, Q28 ) )
@@ -4749,17 +4776,11 @@ static void formulate2x2MixingMatrix_fx(


    eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
#ifdef FIX_1326_SPEEDUP_09
    matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp );
#endif
#else
    matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );

    eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );

#ifdef FIX_1326_SPEEDUP_09
    matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp );
#endif
#endif
    pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/

@@ -4849,7 +4870,19 @@ static void formulate2x2MixingMatrix_fx(
    div_fx[1] = L_shr( div_fx[1], sub( sub( 31, exp1 ), q_div ) ); // q_div
    move32();


    // 1310720000 = 10,000.0f in Q17
#ifdef FIX_1326_SPEEDUP_09
    {
        Word16 shift1 = s_max( sub( Q17, q_div ), 0 );
        Word16 shift2 = s_max( sub( q_div, Q17 ), 0 );

        div_fx[0] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[0], shift2 ) ); // q_div
        move32();
        div_fx[1] = L_min( L_shr( 1310720000, shift1 ), L_shr( div_fx[1], shift2 ) ); // q_div
        move32();
    }
#else
    IF( LT_16( q_div, Q17 ) )
    {
        div_fx[0] = L_min( L_shr( 1310720000, sub( Q17, q_div ) ), div_fx[0] ); // q_div
@@ -4866,10 +4899,9 @@ static void formulate2x2MixingMatrix_fx(
        q_div = Q17;
        move16();
    }
#endif

#ifndef FIX_1326_SPEEDUP_09
    matrixMul_fx( Are_fx, Aim_fx, &q_A, Ure_fx, Uim_fx, &q_U, tmpRe_fx, tmpIm_fx, &q_temp );
#endif

    exp = L_norm_arr( div_fx, BINAURAL_CHANNELS );
    scale_sig32( div_fx, BINAURAL_CHANNELS, exp );
@@ -4884,7 +4916,11 @@ static void formulate2x2MixingMatrix_fx(
            W_tmp = W_mult0_32_32( tmpRe_fx[chA][chB], div_fx[chB] );
            IF( W_tmp != 0 )
            {
#ifdef FIX_1326_SPEEDUP_10
                hdrm_re[chA][chB] = W_norm( W_tmp );
#else
                hdrm_re[chA][chB] = sub( W_norm( W_tmp ), 0 );
#endif
                move16();
                W_tmp = W_shl( W_tmp, hdrm_re[chA][chB] );
                tmpRe_fx[chA][chB] = W_extract_h( W_tmp );
@@ -4901,7 +4937,11 @@ static void formulate2x2MixingMatrix_fx(
            W_tmp = W_mult0_32_32( tmpIm_fx[chA][chB], div_fx[chB] );
            IF( W_tmp != 0 )
            {
#ifdef FIX_1326_SPEEDUP_10
                hdrm_im[chA][chB] = W_norm( W_tmp );
#else
                hdrm_im[chA][chB] = sub( W_norm( W_tmp ), 0 );
#endif
                move16();
                W_tmp = W_shl( W_tmp, hdrm_im[chA][chB] );
                tmpIm_fx[chA][chB] = W_extract_h( W_tmp );