Commit dd2dc9c9 authored by thomas dettbarn's avatar thomas dettbarn Committed by Sandesh Venkatesh
Browse files

replaced the down mix channel floating point code with 64 bit fix point. WMOPS...

replaced the down mix channel floating point code with 64 bit fix point. WMOPS went from total   150.00  263.631 365.398 357.230 to  total   150.00  244.736 315.064 308.406  for ./IVAS_cod  -mc 7_1_4 128000 48 scripts/testv/stv714MC48c.wav out.128
parent f48701fc
Loading
Loading
Loading
Loading
+103 −20
Original line number Diff line number Diff line
@@ -29,7 +29,8 @@
   the United Nations Convention on Contracts on the International Sales of Goods.

*******************************************************************************************************/

#define	MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE
#define	CONVERT64( x_64, y_fx, y_e ) { Word16 norm; norm=W_norm(x_64); y_fx = W_extract_h( W_shl( x_64, norm ) ); y_e = sub( sub( 35, gb ), norm ); }
#include <math.h>
#include <assert.h>
#include "options.h"
@@ -649,10 +650,15 @@ static void ivas_param_mc_param_est_enc_fx(
    Word32 *p_slot_frame_f_real_fx[MAX_CICP_CHANNELS];                     /* Output of the MDFT FB - real part */
    Word32 *p_slot_frame_f_imag_fx[MAX_CICP_CHANNELS];                     /* Output of the MDFT FB - imag part */

#ifdef	MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE
    Word64 dmx_real_64[PARAM_MC_MAX_TRANSPORT_CHANS];
    Word64 dmx_imag_64[PARAM_MC_MAX_TRANSPORT_CHANS];
#else
    Word32 dmx_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS]; /* Downmix channel - Real Part */
    Word16 dmx_real_e[PARAM_MC_MAX_TRANSPORT_CHANS];  /* Downmix channel - Real Part */
    Word32 dmx_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS]; /* Downmix channel - Imag Part */
    Word16 dmx_imag_e[PARAM_MC_MAX_TRANSPORT_CHANS];  /* Downmix channel - Imag Part */
#endif
    Word32 a_fx, b_fx, c_fx, d_fx;                    /* Tmp complex values */
    Word16 a_e, b_e, c_e, d_e;                        /* Tmp complex values */
#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE
@@ -774,6 +780,26 @@ static void ivas_param_mc_param_est_enc_fx(

                FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 )
                {
#ifdef	MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE
                    Word64 real_64;
                    Word64 imag_64;

                    real_64 = 0;
                    imag_64 = 0;
                    move64();
                    move64();
                    FOR( inp_ch = 0; inp_ch < nchan_input; inp_ch++ )
                    {
                        real_64 = W_add( real_64, W_mult0_32_32( slot_frame_f_real_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ) );
                        imag_64 = W_add( imag_64, W_mult0_32_32( slot_frame_f_imag_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ) );
                        p_dmx_fac_fx++;
                    }
                    dmx_real_64[ch_idx1] = real_64;
                    dmx_imag_64[ch_idx1] = imag_64;
                    move64();
                    move64();

#else
#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE
                    dmx_real_fx[ch_idx1] = 0;
                    move32();
@@ -814,14 +840,33 @@ static void ivas_param_mc_param_est_enc_fx(
                    move16();
                    move32();
                    move16();
#endif

#endif
                }

                /* Cx for transport channels */
                FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 )
                {
#ifdef	MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE
                    CONVERT64(dmx_real_64[ch_idx1], a_fx, a_e );
                    CONVERT64(dmx_imag_64[ch_idx1], b_fx, b_e );
                    move32();
                    move32();
                    move16();
                    move16();
#endif
                    FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 )
                    {
#ifdef	MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE
                        CONVERT64(dmx_real_64[ch_idx2], c_fx, c_e );
                        CONVERT64(dmx_imag_64[ch_idx2], d_fx, d_e );
                        move32();
                        move32();
                        move16();
                        move16();

#else
                        a_fx = dmx_real_fx[ch_idx1];
                        move32();
                        a_e = dmx_real_e[ch_idx1];
@@ -838,6 +883,7 @@ static void ivas_param_mc_param_est_enc_fx(
                        move32();
                        d_e = dmx_imag_e[ch_idx2];
                        move16();
#endif

                        /* (a-ib)(c+id) = ac + bd + i(ad-bc) */
                        L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e );
@@ -858,7 +904,6 @@ static void ivas_param_mc_param_est_enc_fx(
                    move32();
                    FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 )
                    {
                        Word16 norm;
                        c_fx = slot_frame_f_real_fx[ch_idx2][cur_cldfb_band];
                        d_fx = slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band];
                        move32();
@@ -870,10 +915,6 @@ static void ivas_param_mc_param_est_enc_fx(
                                                                                  W_sub( W_mult0_32_32( a_fx, d_fx ), W_mult0_32_32( b_fx, c_fx ) ) );
                        move64();

                        // convert the 64 bit fixpoint back into the 48 bit float format
                        norm = W_norm( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] );
                        Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], norm ) );
                        Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] = sub( sub( 62, gb ), norm );
                    }
                }
#else
@@ -953,6 +994,26 @@ static void ivas_param_mc_param_est_enc_fx(

                FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 )
                {
#ifdef	MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE
                    Word64 real_64;
                    Word64 imag_64;

                    real_64 = 0;
                    imag_64 = 0;
                    move64();
                    move64();

                    FOR( inp_ch = 0; inp_ch < nchan_input; inp_ch++ )
                    {
                        real_64 = W_add( real_64, W_mult0_32_32( slot_frame_f_real_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ) );
                        imag_64 = W_add( imag_64, W_mult0_32_32( slot_frame_f_imag_fx[inp_ch][cur_cldfb_band], ( *p_dmx_fac_fx ) ) );
                        p_dmx_fac_fx++;
                    }
                    dmx_real_64[ch_idx1] = real_64;
                    dmx_imag_64[ch_idx1] = imag_64;
                    move64();
                    move64();
#else
#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE
                    dmx_real_fx[ch_idx1] = 0;
                    move32();
@@ -997,12 +1058,22 @@ static void ivas_param_mc_param_est_enc_fx(
                    move32();
                    dmx_imag_e[ch_idx1] = imag_e;
                    move16();
#endif

#endif
                }

                /* Cx for transport channels */
                FOR( ch_idx1 = 0; ch_idx1 < nchan_transport; ++ch_idx1 )
                {
#ifdef	MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE
                    CONVERT64( dmx_real_64[ch_idx1], a_fx, a_e );
                    CONVERT64( dmx_imag_64[ch_idx1], b_fx, b_e );
                    move32();
                    move32();
                    move16();
                    move16();
#else
#ifdef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE
                    a_fx = dmx_real_fx[ch_idx1];
                    move32();
@@ -1012,9 +1083,24 @@ static void ivas_param_mc_param_est_enc_fx(
                    move32();
                    b_e = dmx_imag_e[ch_idx1];
                    move16();
#endif

#endif
                    FOR( ch_idx2 = 0; ch_idx2 < nchan_transport; ++ch_idx2 )
                    {
#ifdef	MERGE_REQUEST_1472_SPEEDUIP_ivas_mc_param_enc_fx_NONBE
                        CONVERT64( dmx_real_64[ch_idx2], c_fx, c_e );
                        CONVERT64( dmx_imag_64[ch_idx2], d_fx, d_e );
                        move32();
                        move32();
                        move16();
                        move16();

                        /* (a-ib)(c+id) = ac + bd + i(ad-bc) */
                        L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, c_fx ), add( a_e, c_e ), Mpy_32_32( b_fx, d_fx ), add( b_e, d_e ), &tmp_e );
                        Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e,
                                                                                                &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] );
#else
#ifndef IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_BE
                        a_fx = dmx_real_fx[ch_idx1];
                        move32();
@@ -1042,6 +1128,7 @@ static void ivas_param_mc_param_est_enc_fx(
                        L_tmp = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, dmx_real_fx[ch_idx2] ), add( a_e, dmx_real_e[ch_idx2] ), Mpy_32_32( b_fx, dmx_imag_fx[ch_idx2] ), add( b_e, dmx_imag_e[ch_idx2] ), &tmp_e );
                        Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2] = BASOP_Util_Add_Mant32Exp( Cx_sum_fx[cur_param_band][ch_idx1][ch_idx2], Cx_sum_e[cur_param_band][ch_idx1][ch_idx2], L_tmp, tmp_e,
                                                                                                &Cx_sum_e[cur_param_band][ch_idx1][ch_idx2] );
#endif
#endif
                        move32();
                    }
@@ -1075,7 +1162,6 @@ static void ivas_param_mc_param_est_enc_fx(
                    move32();
                    FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 )
                    {
                        Word16 norm;
                        c_fx = slot_frame_f_real_fx[ch_idx2][cur_cldfb_band];
                        d_fx = slot_frame_f_imag_fx[ch_idx2][cur_cldfb_band];
                        move32();
@@ -1084,10 +1170,6 @@ static void ivas_param_mc_param_est_enc_fx(
                        Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] = W_add( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2],
                                                                                  W_add( W_mult0_32_32( a_fx, c_fx ), W_mult0_32_32( b_fx, d_fx ) ) );
                        move64();
                        // convert the 64 bit fixpoint back into the 48 bit float format
                        norm = W_norm( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2] );
                        Cy_sum_fx[cur_param_band][ch_idx1][ch_idx2] = W_extract_h( W_shl( Cy_sum_real_64[cur_param_band][ch_idx1][ch_idx2], norm ) );
                        Cy_sum_e[cur_param_band][ch_idx1][ch_idx2] = sub( sub( 62, gb ), norm );
                    }
#else
                    FOR( ch_idx2 = ch_idx1; ch_idx2 < nchan_input; ++ch_idx2 )
@@ -1135,14 +1217,6 @@ static void ivas_param_mc_param_est_enc_fx(
        {
            FOR( ch_idx1 = 0; ch_idx1 < nchan_input; ++ch_idx1 )
            {
                Cy_sum_fx[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
                move32();
                Cy_sum_e[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
                move16();
                Cy_sum_fx[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0;
                move32();
                Cy_sum_e[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0;
                move16();
#ifdef MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE
                Cy_sum_real_64[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
                move64();
@@ -1153,6 +1227,14 @@ static void ivas_param_mc_param_est_enc_fx(
                Cy_sum_imag_64[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0;
                move64();
#else
                Cy_sum_fx[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
                move32();
                Cy_sum_e[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
                move16();
                Cy_sum_fx[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0;
                move32();
                Cy_sum_e[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0;
                move16();
                Cy_sum_imag_fx[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
                move32();
                Cy_sum_imag_e[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
@@ -1174,7 +1256,7 @@ static void ivas_param_mc_param_est_enc_fx(
                move64();
                Cy_sum_real_64[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0;
                move64();
#endif
#else
                Cy_sum_fx[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
                move32();
                Cy_sum_e[cur_param_band][hParamMC->lfe_index][ch_idx1] = 0;
@@ -1183,6 +1265,7 @@ static void ivas_param_mc_param_est_enc_fx(
                move32();
                Cy_sum_e[cur_param_band][ch_idx1][hParamMC->lfe_index] = 0;
                move16();
#endif
            }
        }
    }