Commit 4e3b6f90 authored by thomas dettbarn's avatar thomas dettbarn
Browse files

Merge branch 'main' into 1519-improve-wmops-performance-of-ivas_param_mc_param_est_enc_fx-step-2

parents 38547d2e 7225ebdc
Loading
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -87,6 +87,7 @@
#define DIV32_OPT_NEWTON                               /* FhG: faster 32 by 32 bit division */ 
#define	MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of Cy calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Obsoletes IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE. */
#define MERGE_REQUEST_1472_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of dmx calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Requires MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE. */
#define	MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE /* FhG: reduce WMOPS by inlining the matrix multiplications for the smoothing operation. */
#define FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat            /*FhG: reduces WMOPS - bit-exact*/
#define FIX_1439_SPEEDUP_stereo_icBWE_dec_fx                    /*FhG: reduces WMOPS - bit-exact*/
#define FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx                    /*FhG: reduces WMOPS - bit-exact*/
+72 −0
Original line number Diff line number Diff line
@@ -30,6 +30,8 @@

*******************************************************************************************************/

#define MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE

#include <stdint.h>
#include <string.h>
#include <stdio.h>
@@ -516,10 +518,14 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
    Word16 mixing_matrix_buffer_e;
    Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
    Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE

#else
    Word32 output_f_real_fx[MAX_CICP_CHANNELS];
    Word32 output_f_imag_fx[MAX_CICP_CHANNELS];
    Word16 output_f_real_e;
    Word16 output_f_imag_e;
#endif
    Word32 diff_f_real_fx[MAX_CICP_CHANNELS];
    Word32 diff_f_imag_fx[MAX_CICP_CHANNELS];

@@ -527,8 +533,12 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(

    set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
    set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE

#else
    set_zero_fx( output_f_real_fx, MAX_CICP_CHANNELS );
    set_zero_fx( output_f_imag_fx, MAX_CICP_CHANNELS );
#endif
    set_zero_fx( diff_f_real_fx, MAX_CICP_CHANNELS );
    set_zero_fx( diff_f_imag_fx, MAX_CICP_CHANNELS );

@@ -592,6 +602,36 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
                }

                /* apply residual mixing */
#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
                {
                    Word16 shifter;

                    shifter = 31 - mixing_matrix_res_smooth_e;
                    FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
                    {
                        int i;
                        Word16 idx;
                        Word64 temp_real, temp_imag;


                        idx = ch_idx;
                        temp_real = 0;
                        temp_imag = 0;
                        move64();
                        move64();
                        for ( i = 0; i < nY; i++ )
                        {
                            temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] ) );
                            temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] ) );
                            idx += nY;
                        }
                        Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_real, shifter ) );
                        Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_imag, shifter ) );
                    }
                }


#else

                matrix_product_mant_exp_fx( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, nY, nY, 0, diff_f_real_fx, 25, nY, 1, 0, output_f_real_fx, &output_f_real_e );
                scale_sig32( output_f_real_fx, nY, sub( Q6, sub( Q31, output_f_real_e ) ) ); // Q6
@@ -607,6 +647,7 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
                    Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = output_f_imag_fx[ch_idx]; // Q6
                    move32();
                }
#endif
            }
            ELSE
            {
@@ -630,6 +671,36 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
            }

            /* apply mixing matrix */
#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
            {
                Word16 shifter;
                shifter = 31 - mixing_matrix_smooth_e;

                FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
                {
                    int i;
                    Word16 idx;
                    Word64 temp_real, temp_imag;


                    idx = ch_idx;
                    temp_real = 0;
                    temp_imag = 0;
                    move64();
                    move64();
                    for ( i = 0; i < nX; i++ )
                    {
                        temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_real_fx[i] ) );
                        temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_imag_fx[i] ) );
                        idx += nY;
                    }
                    Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) );
                    move32();
                    Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) );
                    move32();
                }
            }
#else

            matrix_product_mant_exp_fx( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, nY, nX, 0, input_f_real_fx, 25, nX, 1, 0, output_f_real_fx, &output_f_real_e );
            scale_sig32( output_f_real_fx, MAX_CICP_CHANNELS, sub( 6, sub( 31, output_f_real_e ) ) ); // Q6
@@ -646,6 +717,7 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
                Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], output_f_imag_fx[ch_idx] );
                move32();
            }
#endif
        }
    }