Commit f8db9100 authored by Fabian Bauer's avatar Fabian Bauer Committed by Manuel Jander
Browse files

- added some wmops push/pop,

- added FIX_xxxx_SPEEDUP_00: not implemented, no bitstream
- added FIX_xxxx_SPEEDUP_01: not implemented yet
parent 1394fcfe
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -1010,6 +1010,7 @@ Word32 div_w( Word32 L_num, Word32 L_den )
    }
}


Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
{
    Word32 z;
@@ -1017,6 +1018,8 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
    Word16 sy;
    Word32 sign;

    //push_wmops( "BASOP_Util_Divide3232_Scale_cadence" );

    /* assert (x >= (Word32)0); */
    assert( y != (Word32) 0 );

@@ -1038,6 +1041,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
    IF( x == (Word32) 0 )
    {
        *s = 0;
        //pop_wmops();
        return ( (Word32) 0 );
    }

@@ -1058,7 +1062,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
    {
        z = L_negate( z );
    }

    //pop_wmops();
    return z;
}

+1 −1
Original line number Diff line number Diff line
@@ -1875,7 +1875,7 @@ ivas_error ivas_jbm_dec_render_fx(
    move16();
    SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom;

    push_wmops( "ivas_dec_render" );
    push_wmops( "ivas_dec_render (IDR)" );
    /*----------------------------------------------------------------*
     * Initialization of local vars after struct has been set
     *----------------------------------------------------------------*/
+68 −4
Original line number Diff line number Diff line
@@ -45,6 +45,9 @@

#include "wmc_auto.h"

//#define FIX_xxxx_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream
//#define FIX_xxxx_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : rollout loop in mul, only 3 out of 4 results are needed - maybe a=b can also benefitcui

Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };

/*-------------------------------------------------------------------------
@@ -504,8 +507,9 @@ void ivas_dirac_dec_binaural_render_fx(
    FOR( subframe_idx = first_sf; subframe_idx < last_sf; subframe_idx++ )
    {
        Word16 n_samples_sf = imult1616( slot_size, hSpatParamRendCom->subframe_nbslots[subframe_idx] );
        push_wmops( "IDR binaural internal (IDRBI)" );
        ivas_dirac_dec_binaural_internal_fx( st_ivas, st_ivas->hCombinedOrientationData, output_fx_local, nchan_transport, subframe_idx );

        pop_wmops();/*push_wmops( "IDR binaural internal (IDRBI)" );*/
        FOR( ch = 0; ch < nchan_out; ch++ )
        {
            output_fx_local[ch] += n_samples_sf;
@@ -708,6 +712,7 @@ static void ivas_dirac_dec_binaural_internal_fx(
        }
    }
    /* CLDFB Analysis of input */
    push_wmops( "IDRBI CLDFB ANALYSYS" );
    FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ )
    {
        FOR( ch = 0; ch < numInChannels; ch++ )
@@ -857,6 +862,7 @@ static void ivas_dirac_dec_binaural_internal_fx(
            }
        }
    }
    pop_wmops(); /*push_wmops( "IDRBI CLDFB ANALYSYS" );*/

    test();
    IF( EQ_32( config_data.ivas_format, SBA_FORMAT ) || EQ_32( config_data.ivas_format, SBA_ISM_FORMAT ) )
@@ -921,7 +927,9 @@ static void ivas_dirac_dec_binaural_internal_fx(
    }

    test();
    push_wmops( "IDRBI cov matrices" );
    ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, st_ivas->hMasaIsmData, q_inp );
    pop_wmops();/*push_wmops( "IDRBI cov matrices" );*/

    IF( EQ_32( config_data.ivas_format, ISM_FORMAT ) )
    {
@@ -959,7 +967,9 @@ static void ivas_dirac_dec_binaural_internal_fx(
        move16();
    }

    push_wmops( "IDRBI proc matrices (IRDBI pm)" );
    ivas_dirac_dec_binaural_determine_processing_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, max_band_decorr, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, nchanSeparateChannels, st_ivas->hMasaIsmData );
    pop_wmops(); /*push_wmops( "IDRBI proc matrices (IRDBI pm)" );*/

    q_inp = Q6;
    move16();
@@ -1005,8 +1015,10 @@ static void ivas_dirac_dec_binaural_internal_fx(
    hDiracDecBin->q_processMtxDecPrev = q_mat;
    move16();

    push_wmops( "IDRBI processOutput" );
    ivas_dirac_dec_binaural_process_output_fx( hDiracDecBin, hSpatParamRendCom, st_ivas->cldfbSynDec, output_fx, &q_out, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, q_inp, max_band_decorr, numInChannels, config_data.processReverb, subframe, q_mat );

    pop_wmops(); /*push_wmops( "IDRBI processOutput" );
    */
    hDiracDecBin->hDiffuseDist = NULL;

    hSpatParamRendCom->slots_rendered = add( hSpatParamRendCom->slots_rendered, hSpatParamRendCom->subframe_nbslots[subframe] );
@@ -1843,6 +1855,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
        move16();
    }

    push_wmops( "IRDBI pm LOOP1 (IDRBI pm LOOP1)" );
    FOR( bin = 0; bin < nBins; bin++ )
    {
        Word32 tmpMtxRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], tmpMtxIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], resultMtxRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], resultMtxIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], gain_fx;
@@ -1866,6 +1879,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
        q_CrEne = Q31;
        move16();

        push_wmops( "IDRBI pm LOOP1 sec A (formulate2x2MixingMatrix)" );
        IF( GT_16( hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_e[1][bin] ) )
        {
            hDiracDecBin->ChEne_fx[1][bin] = L_shr( hDiracDecBin->ChEne_fx[1][bin], sub( hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_e[1][bin] ) );
@@ -1935,7 +1949,9 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
                                     hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossImOut_fx[bin],
                                     hDiracDecBin->q_ChCrossOut,
                                     prototypeMtx_fx, Mre_fx, Mim_fx, &q_M, hDiracDecBin->reqularizationFactor_fx );
        pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec A (formulate2x2MixingMatrix)" );*/

        push_wmops( "IDRBI pm LOOP1 sec B" );
        IF( LT_16( hDiracDecBin->q_ChEne, hDiracDecBin->q_ChCross ) )
        {
            CxRe_fx[0][0] = hDiracDecBin->ChEne_fx[0][bin];
@@ -1989,9 +2005,13 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
#endif
            resultMtxRe_fx, resultMtxIm_fx, &q_res );

        pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec B" );*/

        /* When below the frequency limit where decorrelation is applied, we inject the decorrelated
         * residual (or missing) signal component. The procedure is active when there are not enough independent
         * signal energy to synthesize a signal with the target covariance matrix from the non-decorrelated signals */

        push_wmops( "IDRBI pm LOOP1 sec C" );
        IF( LT_16( bin, max_band_decorr ) )
        {
            Word32 decorrelationReductionFactor_fx;
@@ -2107,7 +2127,9 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
            q_Mdec = Q31;
            move16();
        }
        pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec C" );*/

        push_wmops( "IDRBI pm LOOP1 sec D" );
        /* The regularizations at determining mixing matrices cause signal energy to be lost to some degree, which is compensated for here */
        tmp1 = L_add( CrEneL_fx, CrEneR_fx );
        exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 );
@@ -2198,6 +2220,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
        q_processMtxDec_bin = q_processMtxDec[bin];
        move16();
        move16();

        /* Store processing matrices */
        FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
        {
@@ -2232,7 +2255,10 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
        move16();
        q_processMtxDec[bin] = sub( q_Mdec, 16 );
        move16();
        pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec D" );*/


        push_wmops( "IDRBI pm LOOP1 sec E" );
        IF( separateCenterChannelRendering )
        {
            /* The rendering of the separate center channel in masa + mono mode.
@@ -2322,7 +2348,10 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
                }
            }
        }
        pop_wmops(); /*push_wmops( "IDRBI pm LOOP1 sec E" );*/
    }
    pop_wmops(); /*push_wmops( "IRDBI pm LOOP1 (IDRBI pm LOOP1)" );*/

    /* Aligning Q-factors of all bins in the processing matrices to a common Q-factor */
    minimum_s( q_processMtx, nBins, &hDiracDecBin->q_processMtx );
    minimum_s( q_processMtxPrev, nBins, &hDiracDecBin->q_processMtxPrev );
@@ -2342,6 +2371,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
    minimum_s( q_processMtxDec, nBins, &hDiracDecBin->q_processMtxDec );
    minimum_s( q_processMtxDecPrev, nBins, &hDiracDecBin->q_processMtxDecPrev );

    push_wmops( "IRDBI pm LOOP2" );
    FOR( bin = 0; bin < nBins; bin++ )
    {
        FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
@@ -2381,6 +2411,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
            }
        }
    }
    pop_wmops(); /*push_wmops( "IRDBI pm LOOP2" );*/

    return;
}
@@ -4354,7 +4385,9 @@ static void formulate2x2MixingMatrix_fx(
    }
    ELSE
    {
        push_wmops( "formulate2x2MixingMatrix Division" );
        maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxEne_fx, &exp );
        pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/
        q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) );
    }
    exp = norm_l( maxEneDiv_fx );
@@ -4377,9 +4410,12 @@ static void formulate2x2MixingMatrix_fx(
    Cout_im = Mpy_32_32( Cout_im, maxEneDiv_fx );
    q_cout = sub( add( q_cout, q_maxEneDiv ), 31 );

    push_wmops( "formulate2x2MixingMatrix cholesky" );
    /* Cholesky decomposition of target / output covariance matrix */
    chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky );
    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix cholesky" );*/

    push_wmops( "formulate2x2MixingMatrix Eigendecomp" );
    /* Eigendecomposition of input covariance matrix */
    eig2x2_fx( E_in1, E_in2, q_ein, Cin_re, Cin_im, q_cin, Uxre_fx, Uxim_fx, &q_Ux, Sx_fx, &q_Sx );

@@ -4397,7 +4433,9 @@ static void formulate2x2MixingMatrix_fx(
    move32();

    matrixDiagMul_fx( Uxre_fx, Uxim_fx, q_Ux, Sx_fx, q_Sx, Kxre_fx, Kxim_fx, &q_Kx );
    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Eigendecomp" );*/

    push_wmops( "formulate2x2MixingMatrix RegSMInv" );
    /* Regularize the diagonal Sx for matrix inversion */
    Sx_fx[0] = L_max( L_shr( Sx_fx[0], 1 ), Mpy_32_16_1( Sx_fx[1], regularizationFactor_fx ) );
    Sx_fx[1] = L_max( L_shr( Sx_fx[1], 1 ), L_shl( Mpy_32_16_1( Sx_fx[0], regularizationFactor_fx ), 1 ) );
@@ -4432,8 +4470,9 @@ static void formulate2x2MixingMatrix_fx(
    ELSE
    {
        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );

        push_wmops( "formulate2x2MixingMatrix Division" );
        temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp );
        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
        exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) );
#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
        Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
@@ -4469,8 +4508,9 @@ static void formulate2x2MixingMatrix_fx(
    ELSE
    {
        temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );

        push_wmops( "formulate2x2MixingMatrix Division" );
        temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
        pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/
        exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
        Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
@@ -4487,7 +4527,9 @@ static void formulate2x2MixingMatrix_fx(
    move32();
    Ghat_fx[1] = L_shr( Ghat_fx[1], sub( sub( 31, exp1 ), q_Ghat ) ); // q_Ghat
    move32();
    pop_wmops();

    push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q" );
    /* Matrix multiplication, tmp = Ky' * G_hat * Q */
    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
    {
@@ -4513,17 +4555,29 @@ static void formulate2x2MixingMatrix_fx(
            move32();
        }
    }
    pop_wmops();/*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q" );*/

    q_temp = sub( add( q_ky, q_GhatQ ), 31 );

    push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" );
    /* A = Ky' * G_hat * Q * Kx (see publication) */
    matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A );
    pop_wmops();/*push_wmops( "formulate2x2MixingMatrix MMUL K*Ghat*Q*Kx" );*/

    push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA (oPtoA)" );
    /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx
       For matrix A that is P = A(A'A)^0.5 */
    push_wmops( "oPtoA MT1M" );
#ifdef FIX_xxxx_SPEEDUP_01
    matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );

    eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
#else
    matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );

    eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
#endif
    pop_wmops();/*push_wmops( "oPtoA MT1M" );*/

    IF( D_fx[0] == 0 )
    {
@@ -4537,8 +4591,10 @@ static void formulate2x2MixingMatrix_fx(
    }
    ELSE
    {
        push_wmops( "formulate2x2MixingMatrix Division" );
        temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
        exp = sub( exp, sub( Q30, q_D ) );
        pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Division" )*/
    }
    div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
    move32();
@@ -4555,7 +4611,9 @@ static void formulate2x2MixingMatrix_fx(
    }
    ELSE
    {
        push_wmops( "formulate2x2MixingMatrix Division" );
        temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[1], &exp1 );
        pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/
        exp1 = sub( exp1, sub( Q30, q_D ) );
    }
    div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
@@ -4657,7 +4715,9 @@ static void formulate2x2MixingMatrix_fx(
                         0 /*int Bscale*/,
#endif
                         Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA" );*/

    push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" );
    /* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */
#if ( BINAURAL_CHANNELS != 2 )
    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
@@ -4740,7 +4800,9 @@ static void formulate2x2MixingMatrix_fx(
        {
            Word16 Pre_shift, Pim_shift;
            temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
            push_wmops( "formulate2x2MixingMatrix Division" );
            temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp );
            pop_wmops();/*push_wmops( "formulate2x2MixingMatrix Division" )*/
            q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp );

            Pre_shift = norm_l( Pre_fx[0][chB] );
@@ -4811,6 +4873,8 @@ static void formulate2x2MixingMatrix_fx(
                         0 /*int Bscale*/,
#endif
                         Mre_fx, Mim_fx, q_M );
    pop_wmops(); /*push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" );*/

    return;
}