Commit 2986c80d authored by Fabian Bauer's avatar Fabian Bauer Committed by Sandesh Venkatesh
Browse files

deactivate SPeedup 14, activate Speedup 13 for testing

parent c6d0d7da
Loading
Loading
Loading
Loading
+61 −14
Original line number Diff line number Diff line
@@ -49,7 +49,6 @@
// NULL: 179.292


//#define FIX_1326_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams     //no occurence        --> DONT USE
//#define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx  //  .4 WMOPS                    --> USE
//#define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx                //  .3 WMOPS                    --> USE
//#define FIX_1326_SPEEDUP_03 // speedup eig2x2_fx                //  .1 WMOPS                    --> USE
@@ -58,12 +57,18 @@
//#define FIX_1326_SPEEDUP_06 // div->sqrt =>isqrt                // 3.0 WMOPS //Quite bad diffs  --> DONT USE
//#define FIX_1326_SPEEDUP_07 // div->sqrt =>isqrt                //  0  WMOPS                    --> DONT USE
//#define FIX_1326_SPEEDUP_08 // "-"                              // 3.0 WMOPS //small diffs      --> USE
//#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS                      --> USE? (pipe 48851 fails --> DONTUSEYET)
//#define FIX_1326_SPEEDUP_10 // tiny speedup                    //   .1 WMOPS                      --> USE? (pipe 48851 fails --> DONTUSEYET)
//#define FIX_1326_SPEEDUP_11  // tiny speedup                    //  .1 WMOPS                      --> USE? (pipe 48851 fails --> DONTUSEYET)
//#define FIX_1326_SPEEDUP_09 // tiny speedup                     //  .1 WMOPS   pipe 48851 fails --> DONTUSEYET
//#define FIX_1326_SPEEDUP_10 // tiny speedup                    //   .1 WMOPS   pipe 48851 fails --> DONTUSEYET
//#define FIX_1326_SPEEDUP_11  // tiny speedup                    //  .1 WMOPS   pipe 48851 fails --> DONTUSEYET
//#define FIX_1326_SPEEDUP_12 // tiny speedup                     //  <.1 WMOPS                   --> DONTUSE
//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          //   2.9 WMOPS                  -->USE? (pipe coming)
#define FIX_1326_SPEEDUP_14 // test
//#define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt          //   2.9 WMOPS                  --> USE? (pipe tbd)
//#define FIX_1326_SPEEDUP_14 // test wether any of these paths is realy necessary, then assert   --> DONTUSE (pipes red, asserts!)
//#define FIX_1326_SPEEDUP_15 // replace Ladd(Mpy) -> Madd          // .1 WMOPS                   --> USE? (pipe tbd)
//#define FIX_1326_SPEEDUP_16 // tiny speedup like 04              // .18 WMOPS                     --> USE? (pipe tbd)




Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };

/*-------------------------------------------------------------------------
@@ -943,9 +948,9 @@ static void ivas_dirac_dec_binaural_internal_fx(
    }

    test();
    push_wmops( "IDRBI cov matrices" );
    push_wmops( "IDRBI cov matrices (IDRBCM)" );
    ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, st_ivas->hMasaIsmData, q_inp );
    pop_wmops(); /*push_wmops( "IDRBI cov matrices" );*/
    pop_wmops(); /*push_wmops( "IDRBI cov matrices (IDRBCM)" );*/

    IF( EQ_32( config_data.ivas_format, ISM_FORMAT ) )
    {
@@ -1168,7 +1173,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric

    nBins = hSpatParamRendCom->num_freq_bands; /* Actually bins */
    move16();

    push_wmops( "IDRBCM inits" );
    q_earlyPartEneCorrection = s_min( Q31, add( getScaleFactor32( hDiracDecBin->earlyPartEneCorrection_fx, nBins ), hDiracDecBin->q_earlyPartEneCorrection ) );
    scale_sig32( hDiracDecBin->earlyPartEneCorrection_fx, nBins, sub( q_earlyPartEneCorrection, hDiracDecBin->q_earlyPartEneCorrection ) );
    hDiracDecBin->q_earlyPartEneCorrection = q_earlyPartEneCorrection;
@@ -1202,6 +1207,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
        gainCache[idx].azi = -1000; /* Use -1000 as value for uninitialized cache. */
        move16();
    }
    pop_wmops(); /*push_wmops( "IDRBCM inits" );*/

    /* Determine EQ for low bit rates (13.2 and 16.4 kbps) */
    applyLowBitRateEQ = 0;
@@ -1214,11 +1220,13 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
        move16();
        IF( EQ_32( ivas_total_brate, IVAS_16k4 ) )
        {
            push_wmops( "IDRBCM Determine EQ_low_rates" );
            FOR( bin = 0; bin < LOW_BIT_RATE_BINAURAL_EQ_BINS; bin++ )
            {
                lowBitRateEQ_fx[bin + LOW_BIT_RATE_BINAURAL_EQ_OFFSET] = L_add( L_shr( lowBitRateBinauralEQ_fx[bin], 1 ), ONE_IN_Q30 ); // Q31
                move32();
            }
            pop_wmops(); /*push_wmops( "IDRBCM Determine EQ_low_rates" );*/
        }
        ELSE
        {
@@ -1237,6 +1245,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric

    exp = sub( 63, shl( q, 1 ) ); // exp for the energy (inRe_fx * inRe_fx + inIm_fx * inIm_fx) computed below

    push_wmops( "IDRBCM input Matrix" );
    /* Calculate input covariance matrix */
    FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ )
    {
@@ -1271,7 +1280,9 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
            move32();
        }
    }
    pop_wmops(); /*push_wmops( "IDRBCM input Matrix" );*/

    push_wmops( "IDRBCM apply EQ_low" );
    /* Apply EQ at low bit rates */
    IF( applyLowBitRateEQ != 0 )
    {
@@ -1324,7 +1335,9 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
            }
        }
    }
    pop_wmops(); /*push_wmops( "IDRBCM apply EQ_low" );*/

    push_wmops( "IDRBCM target matrix" );
    /* Determine target covariance matrix containing target binaural properties */
    FOR( bin = 0; bin < nBins; bin++ )
    {
@@ -1484,12 +1497,14 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                Word32 hrtfEneCenter_fx, hrtfEneSides_fx, hrtfEneRealized_fx;
                Word16 eneCorrectionFactor_fx, eneCorrectionFactor_e;
                Word16 w1_fx, w2_fx, w3_fx, eq_fx;

#ifdef FIX_1326_SPEEDUP_15
                hrtfEneCenter_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ), lImagp_fx, lImagp_fx ), lRealp_fx, lRealp_fx ); //Q25
#else
                hrtfEneCenter_fx = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ),                     // Q25
                                          L_add( Mpy_32_32( lImagp_fx, lImagp_fx ),              // Q25
                                                 L_add( Mpy_32_32( rRealp_fx, rRealp_fx ),       // Q25
                                                        Mpy_32_32( rImagp_fx, rImagp_fx ) ) ) ); // Q25

#endif
                /* Spread coherence is synthesized as coherent sources at 30 degree horizontal spacing.
                 * The following formulas determine the gains for these sources.
                 * spreadCoh = 0: Only panning
@@ -1518,11 +1533,14 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric

                /* Apply the gain for the left source of the three coherent sources */
                getDirectPartGains_fx( bin, add( aziDeg, 30 ), eleDeg, &lRealpTmp_fx, &lImagpTmp_fx, &rRealpTmp_fx, &rImagpTmp_fx, hDiracDecBin->renderStereoOutputInsteadOfBinaural, Rmat_fx, &gainCache[gainCacheBaseIndex + 1], isHeadtracked );

#ifdef FIX_1326_SPEEDUP_15
                hrtfEneSides_fx = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ), rImagpTmp_fx, rImagpTmp_fx ), lImagpTmp_fx, lImagpTmp_fx ), lRealpTmp_fx, lRealpTmp_fx ); // Q25
#else
                hrtfEneSides_fx = L_add( Mpy_32_32( lRealpTmp_fx, lRealpTmp_fx ),                     // Q25
                                         L_add( Mpy_32_32( lImagpTmp_fx, lImagpTmp_fx ),              // Q25
                                                L_add( Mpy_32_32( rRealpTmp_fx, rRealpTmp_fx ),       // Q25
                                                       Mpy_32_32( rImagpTmp_fx, rImagpTmp_fx ) ) ) ); // Q25
#endif
                lRealp_fx = L_add( lRealp_fx, Mpy_32_32( sidesMul_fx, lRealpTmp_fx ) );               // Q25
                lImagp_fx = L_add( lImagp_fx, Mpy_32_32( sidesMul_fx, lImagpTmp_fx ) );               // Q25
                rRealp_fx = L_add( rRealp_fx, Mpy_32_32( sidesMul_fx, rRealpTmp_fx ) );               // Q25
@@ -1610,12 +1628,21 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
                move16();
            }

#ifdef FIX_1326_SPEEDUP_15
            hrtfEne_fx[0] = Madd_32_32( Mpy_32_32( lRealp_fx, lRealp_fx ), lImagp_fx, lImagp_fx ); // Q( 2*q_lr - 31 )
            hrtfEne_fx[1] = Madd_32_32( Mpy_32_32( rRealp_fx, rRealp_fx ), rImagp_fx, rImagp_fx ); // Q( 2*q_lr - 31 )
            move32();
            move32();
            hrtfCrossRe_fx = Madd_32_32( Mpy_32_32( lRealp_fx, rRealp_fx ), lImagp_fx, rImagp_fx );  // Q( 2*q_lr - 31 )
            hrtfCrossIm_fx = Madd_32_32( Mpy_32_32( -lImagp_fx, rRealp_fx ), lRealp_fx, rImagp_fx ); // Q( 2*q_lr - 31 )
#else
            hrtfEne_fx[0] = L_add( Mpy_32_32( lRealp_fx, lRealp_fx ), Mpy_32_32( lImagp_fx, lImagp_fx ) ); // Q( 2*q_lr - 31 )
            hrtfEne_fx[1] = L_add( Mpy_32_32( rRealp_fx, rRealp_fx ), Mpy_32_32( rImagp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 )
            move32();
            move32();
            hrtfCrossRe_fx = L_add( Mpy_32_32( lRealp_fx, rRealp_fx ), Mpy_32_32( lImagp_fx, rImagp_fx ) );  // Q( 2*q_lr - 31 )
            hrtfCrossIm_fx = L_add( Mpy_32_32( -lImagp_fx, rRealp_fx ), Mpy_32_32( lRealp_fx, rImagp_fx ) ); // Q( 2*q_lr - 31 )
#endif

            /* Add direct part (1 or 2) covariance matrix */
            dirEne_fx = Mpy_32_32( ratio_fx, meanEnePerCh_fx ); // Q(q_meanEnePerCh - 1)
@@ -1690,7 +1717,11 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
            }
            ELSE
            {
#ifdef FIX_1326_SPEEDUP_15
                hDiracDecBin->ChCrossReOut_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossReOut_e[bin], Mpy_32_32( Madd_32_16( L_shl( surCoh_fx, 16 ), hDiracDecBin->diffuseFieldCoherence_fx[bin], sub( 32767, surCoh_fx ) ), diffEne_fx ), sub( 31, q_diffEne ), &hDiracDecBin->ChCrossReOut_e[bin] );
#else
                hDiracDecBin->ChCrossReOut_fx[bin] = BASOP_Util_Add_Mant32Exp( hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossReOut_e[bin], Mpy_32_32( L_add( Mpy_32_16_1( hDiracDecBin->diffuseFieldCoherence_fx[bin], sub( 32767, surCoh_fx ) ), L_shl( surCoh_fx, 16 ) ), diffEne_fx ), sub( 31, q_diffEne ), &hDiracDecBin->ChCrossReOut_e[bin] );
#endif
            }
            move32();
        }
@@ -1706,6 +1737,7 @@ static void ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matric
        hDiracDecBin->frameMeanDiffuseness_fx[bin] = L_shl( frameMeanDiffuseness, sub( exp, 2 ) ); // Q29
        move32();
    }
    pop_wmops();/*push_wmops( "IDRBCM target matrix" );*/

    test();
    /* Temporal IIR-type smoothing of covariance matrices. Also apply encoding quality based smoothing factor. */
@@ -3435,10 +3467,25 @@ static void eig2x2_fx(
            tmp2 = Mpy_32_32( s_fx, s_fx );
            q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 );


#ifdef FIX_1326_SPEEDUP_16

            {
                Word16 tmp2_exp;
                Word32 eps_tmp;
                tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &tmp2_exp );

                //Add epsilon if relevant
                eps_tmp = L_shl_sat( epsilon_mant, sub(epsilon_exp, tmp2_exp ));
                tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) );

                exp_tmp3 = add( tmp2_exp, 1 );
            } 
#else
            tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 );
            q_tmp2 = sub( 31, q_tmp2 );

            tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 );
#endif

#if 1
            tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp );