Commit 40b6da87 authored by thomas dettbarn's avatar thomas dettbarn Committed by Sandesh Venkatesh
Browse files

inling the matrix multiplication in...

inling the matrix multiplication in ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx().
parent f48701fc
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -86,6 +86,7 @@
//#define HARM_SCE_INIT
#define DIV32_OPT_NEWTON                               /* FhG: faster 32 by 32 bit division */ 
#define	MERGE_REQUEST_1378_SPEEDUP_ivas_mc_param_enc_fx_NONBE /* FhG: reduce WMOPS of Cy calculation in ivas_param_mc_param_est_enc_fx() by using 64 Bit addition. Obsoletes IMPROVE_HIGH_COMPLEXITY_PARAM_MC_PRM_EST_NONBE. */
#define	MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE /* FhG: reduce WMOPS by inlining the matrix multiplications for the smoothing operation. */
#define FIX_1439_SPEEDUP_Copy_Scale_sig_16_32_no_sat            /*FhG: reduces WMOPS - bit-exact*/
#define FIX_1439_SPEEDUP_stereo_icBWE_dec_fx                    /*FhG: reduces WMOPS - bit-exact*/
#define FIX_1439_SPEEDUP_ivas_swb_tbe_dec_fx                    /*FhG: reduces WMOPS - bit-exact*/
+70 −0
Original line number Diff line number Diff line
@@ -30,6 +30,8 @@

*******************************************************************************************************/

#define	MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE

#include <stdint.h>
#include <string.h>
#include <stdio.h>
@@ -516,10 +518,14 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
    Word16 mixing_matrix_buffer_e;
    Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
    Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
#ifdef	MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE

#else
    Word32 output_f_real_fx[MAX_CICP_CHANNELS];
    Word32 output_f_imag_fx[MAX_CICP_CHANNELS];
    Word16 output_f_real_e;
    Word16 output_f_imag_e;
#endif
    Word32 diff_f_real_fx[MAX_CICP_CHANNELS];
    Word32 diff_f_imag_fx[MAX_CICP_CHANNELS];

@@ -527,8 +533,12 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(

    set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
    set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
#ifdef	MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE

#else
    set_zero_fx( output_f_real_fx, MAX_CICP_CHANNELS );
    set_zero_fx( output_f_imag_fx, MAX_CICP_CHANNELS );
#endif
    set_zero_fx( diff_f_real_fx, MAX_CICP_CHANNELS );
    set_zero_fx( diff_f_imag_fx, MAX_CICP_CHANNELS );

@@ -592,6 +602,36 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
                }

                /* apply residual mixing */
#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
		{	
			Word16 shifter;

			shifter=31-mixing_matrix_res_smooth_e;
			FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
			{
				int i;
				Word16 idx;
				Word64 temp_real, temp_imag;


				idx = ch_idx;
				temp_real = 0;
				temp_imag = 0;
				move64();
				move64();
				for (i=0;i<nY;i++)
				{
					temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] ) ); 
					temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] ) ); 
					idx = add(idx, nY );
				}
				Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_real, shifter ) );
				Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_imag, shifter ) );
			}
		}


#else

                matrix_product_mant_exp_fx( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, nY, nY, 0, diff_f_real_fx, 25, nY, 1, 0, output_f_real_fx, &output_f_real_e );
                scale_sig32( output_f_real_fx, nY, sub( Q6, sub( Q31, output_f_real_e ) ) ); // Q6
@@ -607,6 +647,7 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
                    Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = output_f_imag_fx[ch_idx]; // Q6
                    move32();
                }
#endif
            }
            ELSE
            {
@@ -630,6 +671,34 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
            }

            /* apply mixing matrix */
#ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
	{
            Word16 shifter;
            shifter=31-mixing_matrix_smooth_e;

	    FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
	    {
                int i;
                Word16 idx;
                Word64 temp_real, temp_imag;


                idx = ch_idx;
                temp_real = 0;
                temp_imag = 0;
                move64();
                move64();
                for (i=0;i<nX;i++)
                {
                    temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_real_fx[i] ) ); 
                    temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_imag_fx[i] ) ); 
                    idx = add(idx, nY );
                }
                Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) );
                Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) );
	    }
	}
#else

            matrix_product_mant_exp_fx( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, nY, nX, 0, input_f_real_fx, 25, nX, 1, 0, output_f_real_fx, &output_f_real_e );
            scale_sig32( output_f_real_fx, MAX_CICP_CHANNELS, sub( 6, sub( 31, output_f_real_e ) ) ); // Q6
@@ -646,6 +715,7 @@ void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
                Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], output_f_imag_fx[ch_idx] );
                move32();
            }
#endif
        }
    }