Commit f8db9100 authored Feb 27, 2025 by Fabian Bauer Committed by Manuel Jander Mar 24, 2025

- added some wmops push/pop,

- added FIX_xxxx_SPEEDUP_00: not implemented, no bitstream
- added FIX_xxxx_SPEEDUP_01: not implemented yet

parent 1394fcfe

lib_com/basop_util.c

+5 −1

Original line number	Diff line number	Diff line
		@@ -1010,6 +1010,7 @@ Word32 div_w( Word32 L_num, Word32 L_den )
		}
		}


		Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
		{
		Word32 z;
		@@ -1017,6 +1018,8 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
		Word16 sy;
		Word32 sign;

		//push_wmops( "BASOP_Util_Divide3232_Scale_cadence" );

		/* assert (x >= (Word32)0); */
		assert( y != (Word32) 0 );

		@@ -1038,6 +1041,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
		IF( x == (Word32) 0 )
		{
		*s = 0;
		//pop_wmops();
		return ( (Word32) 0 );
		}

		@@ -1058,7 +1062,7 @@ Word32 BASOP_Util_Divide3232_Scale_cadence( Word32 x, Word32 y, Word16 *s )
		{
		z = L_negate( z );
		}

		//pop_wmops();
		return z;
		}

lib_dec/ivas_jbm_dec_fx.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -1875,7 +1875,7 @@ ivas_error ivas_jbm_dec_render_fx(
		move16();
		SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom;

		push_wmops( "ivas_dec_render" );
		push_wmops( "ivas_dec_render (IDR)" );
		/----------------------------------------------------------------
		* Initialization of local vars after struct has been set
		----------------------------------------------------------------/

lib_rend/ivas_dirac_dec_binaural_functions_fx.c

+68 −4

Original line number	Diff line number	Diff line
		@@ -45,6 +45,9 @@

		#include "wmc_auto.h"

		//#define FIX_xxxx_SPEEDUP_00 //make sqrt(1) a const - catch bitstreams //no occurence in current bitstream
		//#define FIX_xxxx_SPEEDUP_01 // optimize matrixTransp1Mul_fx -> eig2x2_fx : rollout loop in mul, only 3 out of 4 results are needed - maybe a=b can also benefitcui

		Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 };

		/*-------------------------------------------------------------------------
		@@ -504,8 +507,9 @@ void ivas_dirac_dec_binaural_render_fx(
		FOR( subframe_idx = first_sf; subframe_idx < last_sf; subframe_idx++ )
		{
		Word16 n_samples_sf = imult1616( slot_size, hSpatParamRendCom->subframe_nbslots[subframe_idx] );
		push_wmops( "IDR binaural internal (IDRBI)" );
		ivas_dirac_dec_binaural_internal_fx( st_ivas, st_ivas->hCombinedOrientationData, output_fx_local, nchan_transport, subframe_idx );

		pop_wmops();/push_wmops( "IDR binaural internal (IDRBI)" );/
		FOR( ch = 0; ch < nchan_out; ch++ )
		{
		output_fx_local[ch] += n_samples_sf;
		@@ -708,6 +712,7 @@ static void ivas_dirac_dec_binaural_internal_fx(
		}
		}
		/* CLDFB Analysis of input */
		push_wmops( "IDRBI CLDFB ANALYSYS" );
		FOR( slot = 0; slot < hSpatParamRendCom->subframe_nbslots[subframe]; slot++ )
		{
		FOR( ch = 0; ch < numInChannels; ch++ )
		@@ -857,6 +862,7 @@ static void ivas_dirac_dec_binaural_internal_fx(
		}
		}
		}
		pop_wmops(); /push_wmops( "IDRBI CLDFB ANALYSYS" );/

		test();
		IF( EQ_32( config_data.ivas_format, SBA_FORMAT ) \|\| EQ_32( config_data.ivas_format, SBA_ISM_FORMAT ) )
		@@ -921,7 +927,9 @@ static void ivas_dirac_dec_binaural_internal_fx(
		}

		test();
		push_wmops( "IDRBI cov matrices" );
		ivas_dirac_dec_binaural_formulate_input_and_target_covariance_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, st_ivas->hMasaIsmData, q_inp );
		pop_wmops();/push_wmops( "IDRBI cov matrices" );/

		IF( EQ_32( config_data.ivas_format, ISM_FORMAT ) )
		{
		@@ -959,7 +967,9 @@ static void ivas_dirac_dec_binaural_internal_fx(
		move16();
		}

		push_wmops( "IDRBI proc matrices (IRDBI pm)" );
		ivas_dirac_dec_binaural_determine_processing_matrices_fx( hDiracDecBin, hSpatParamRendCom, &config_data, max_band_decorr, Rmat_fx, subframe, hCombinedOrientationData && hCombinedOrientationData->enableCombinedOrientation[hCombinedOrientationData->subframe_idx] > 0, nchanSeparateChannels, st_ivas->hMasaIsmData );
		pop_wmops(); /push_wmops( "IDRBI proc matrices (IRDBI pm)" );/

		q_inp = Q6;
		move16();
		@@ -1005,8 +1015,10 @@ static void ivas_dirac_dec_binaural_internal_fx(
		hDiracDecBin->q_processMtxDecPrev = q_mat;
		move16();

		push_wmops( "IDRBI processOutput" );
		ivas_dirac_dec_binaural_process_output_fx( hDiracDecBin, hSpatParamRendCom, st_ivas->cldfbSynDec, output_fx, &q_out, Cldfb_RealBuffer_in_fx, Cldfb_ImagBuffer_in_fx, q_inp, max_band_decorr, numInChannels, config_data.processReverb, subframe, q_mat );

		pop_wmops(); /*push_wmops( "IDRBI processOutput" );
		*/
		hDiracDecBin->hDiffuseDist = NULL;

		hSpatParamRendCom->slots_rendered = add( hSpatParamRendCom->slots_rendered, hSpatParamRendCom->subframe_nbslots[subframe] );
		@@ -1843,6 +1855,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		move16();
		}

		push_wmops( "IRDBI pm LOOP1 (IDRBI pm LOOP1)" );
		FOR( bin = 0; bin < nBins; bin++ )
		{
		Word32 tmpMtxRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], tmpMtxIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], resultMtxRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], resultMtxIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], gain_fx;
		@@ -1866,6 +1879,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		q_CrEne = Q31;
		move16();

		push_wmops( "IDRBI pm LOOP1 sec A (formulate2x2MixingMatrix)" );
		IF( GT_16( hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_e[1][bin] ) )
		{
		hDiracDecBin->ChEne_fx[1][bin] = L_shr( hDiracDecBin->ChEne_fx[1][bin], sub( hDiracDecBin->ChEne_e[0][bin], hDiracDecBin->ChEne_e[1][bin] ) );
		@@ -1935,7 +1949,9 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		hDiracDecBin->ChCrossReOut_fx[bin], hDiracDecBin->ChCrossImOut_fx[bin],
		hDiracDecBin->q_ChCrossOut,
		prototypeMtx_fx, Mre_fx, Mim_fx, &q_M, hDiracDecBin->reqularizationFactor_fx );
		pop_wmops(); /push_wmops( "IDRBI pm LOOP1 sec A (formulate2x2MixingMatrix)" );/

		push_wmops( "IDRBI pm LOOP1 sec B" );
		IF( LT_16( hDiracDecBin->q_ChEne, hDiracDecBin->q_ChCross ) )
		{
		CxRe_fx[0][0] = hDiracDecBin->ChEne_fx[0][bin];
		@@ -1989,9 +2005,13 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		#endif
		resultMtxRe_fx, resultMtxIm_fx, &q_res );

		pop_wmops(); /push_wmops( "IDRBI pm LOOP1 sec B" );/

		/* When below the frequency limit where decorrelation is applied, we inject the decorrelated
		* residual (or missing) signal component. The procedure is active when there are not enough independent
		* signal energy to synthesize a signal with the target covariance matrix from the non-decorrelated signals */

		push_wmops( "IDRBI pm LOOP1 sec C" );
		IF( LT_16( bin, max_band_decorr ) )
		{
		Word32 decorrelationReductionFactor_fx;
		@@ -2107,7 +2127,9 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		q_Mdec = Q31;
		move16();
		}
		pop_wmops(); /push_wmops( "IDRBI pm LOOP1 sec C" );/

		push_wmops( "IDRBI pm LOOP1 sec D" );
		/* The regularizations at determining mixing matrices cause signal energy to be lost to some degree, which is compensated for here */
		tmp1 = L_add( CrEneL_fx, CrEneR_fx );
		exp = sub( get_min_scalefactor( resultMtxRe_fx[0][0], resultMtxRe_fx[1][1] ), 2 );
		@@ -2198,6 +2220,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		q_processMtxDec_bin = q_processMtxDec[bin];
		move16();
		move16();

		/* Store processing matrices */
		FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
		{
		@@ -2232,7 +2255,10 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		move16();
		q_processMtxDec[bin] = sub( q_Mdec, 16 );
		move16();
		pop_wmops(); /push_wmops( "IDRBI pm LOOP1 sec D" );/


		push_wmops( "IDRBI pm LOOP1 sec E" );
		IF( separateCenterChannelRendering )
		{
		/* The rendering of the separate center channel in masa + mono mode.
		@@ -2322,7 +2348,10 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		}
		}
		}
		pop_wmops(); /push_wmops( "IDRBI pm LOOP1 sec E" );/
		}
		pop_wmops(); /push_wmops( "IRDBI pm LOOP1 (IDRBI pm LOOP1)" );/

		/* Aligning Q-factors of all bins in the processing matrices to a common Q-factor */
		minimum_s( q_processMtx, nBins, &hDiracDecBin->q_processMtx );
		minimum_s( q_processMtxPrev, nBins, &hDiracDecBin->q_processMtxPrev );
		@@ -2342,6 +2371,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		minimum_s( q_processMtxDec, nBins, &hDiracDecBin->q_processMtxDec );
		minimum_s( q_processMtxDecPrev, nBins, &hDiracDecBin->q_processMtxDecPrev );

		push_wmops( "IRDBI pm LOOP2" );
		FOR( bin = 0; bin < nBins; bin++ )
		{
		FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
		@@ -2381,6 +2411,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx(
		}
		}
		}
		pop_wmops(); /push_wmops( "IRDBI pm LOOP2" );/

		return;
		}
		@@ -4354,7 +4385,9 @@ static void formulate2x2MixingMatrix_fx(
		}
		ELSE
		{
		push_wmops( "formulate2x2MixingMatrix Division" );
		maxEneDiv_fx = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, maxEne_fx, &exp );
		pop_wmops();/push_wmops( "formulate2x2MixingMatrix Division" )/
		q_maxEneDiv = add( sub( 31, exp ), sub( Q30, q_maxEne ) );
		}
		exp = norm_l( maxEneDiv_fx );
		@@ -4377,9 +4410,12 @@ static void formulate2x2MixingMatrix_fx(
		Cout_im = Mpy_32_32( Cout_im, maxEneDiv_fx );
		q_cout = sub( add( q_cout, q_maxEneDiv ), 31 );

		push_wmops( "formulate2x2MixingMatrix cholesky" );
		/* Cholesky decomposition of target / output covariance matrix */
		chol2x2_fx( E_out1, E_out2, q_eout, Cout_re, Cout_im, q_cout, KyRe_fx, KyIm_fx, &q_ky );
		pop_wmops(); /push_wmops( "formulate2x2MixingMatrix cholesky" );/

		push_wmops( "formulate2x2MixingMatrix Eigendecomp" );
		/* Eigendecomposition of input covariance matrix */
		eig2x2_fx( E_in1, E_in2, q_ein, Cin_re, Cin_im, q_cin, Uxre_fx, Uxim_fx, &q_Ux, Sx_fx, &q_Sx );

		@@ -4397,7 +4433,9 @@ static void formulate2x2MixingMatrix_fx(
		move32();

		matrixDiagMul_fx( Uxre_fx, Uxim_fx, q_Ux, Sx_fx, q_Sx, Kxre_fx, Kxim_fx, &q_Kx );
		pop_wmops(); /push_wmops( "formulate2x2MixingMatrix Eigendecomp" );/

		push_wmops( "formulate2x2MixingMatrix RegSMInv" );
		/* Regularize the diagonal Sx for matrix inversion */
		Sx_fx[0] = L_max( L_shr( Sx_fx[0], 1 ), Mpy_32_16_1( Sx_fx[1], regularizationFactor_fx ) );
		Sx_fx[1] = L_max( L_shr( Sx_fx[1], 1 ), L_shl( Mpy_32_16_1( Sx_fx[0], regularizationFactor_fx ), 1 ) );
		@@ -4432,8 +4470,9 @@ static void formulate2x2MixingMatrix_fx(
		ELSE
		{
		temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );

		push_wmops( "formulate2x2MixingMatrix Division" );
		temp = BASOP_Util_Divide3232_Scale_cadence( E_out1, temp, &exp );
		pop_wmops(); /push_wmops( "formulate2x2MixingMatrix Division" )/
		exp = sub( exp, sub( q_eout, sub( 31, exp_temp ) ) );
		#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
		Ghat_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
		@@ -4469,8 +4508,9 @@ static void formulate2x2MixingMatrix_fx(
		ELSE
		{
		temp = BASOP_Util_Add_Mant32Exp( temp, sub( 31, q_ein ), EPSILON_MANT, EPSILON_EXP, &exp_temp );

		push_wmops( "formulate2x2MixingMatrix Division" );
		temp = BASOP_Util_Divide3232_Scale_cadence( E_out2, temp, &exp1 );
		pop_wmops();/push_wmops( "formulate2x2MixingMatrix Division" )/
		exp1 = sub( exp1, sub( q_eout, sub( 31, exp_temp ) ) );
		#ifdef FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC
		Ghat_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
		@@ -4487,7 +4527,9 @@ static void formulate2x2MixingMatrix_fx(
		move32();
		Ghat_fx[1] = L_shr( Ghat_fx[1], sub( sub( 31, exp1 ), q_Ghat ) ); // q_Ghat
		move32();
		pop_wmops();

		push_wmops( "formulate2x2MixingMatrix MMUL KGhatQ" );
		/* Matrix multiplication, tmp = Ky' * G_hat * Q */
		FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
		{
		@@ -4513,17 +4555,29 @@ static void formulate2x2MixingMatrix_fx(
		move32();
		}
		}
		pop_wmops();/push_wmops( "formulate2x2MixingMatrix MMUL KGhatQ" );/

		q_temp = sub( add( q_ky, q_GhatQ ), 31 );

		push_wmops( "formulate2x2MixingMatrix MMUL KGhatQ*Kx" );
		/* A = Ky' * G_hat * Q * Kx (see publication) */
		matrixMul_fx( tmpRe_fx, tmpIm_fx, &q_temp, Kxre_fx, Kxim_fx, &q_Kx, Are_fx, Aim_fx, &q_A );
		pop_wmops();/push_wmops( "formulate2x2MixingMatrix MMUL KGhatQKx" );*/

		push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA (oPtoA)" );
		/* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx
		For matrix A that is P = A(A'A)^0.5 */
		push_wmops( "oPtoA MT1M" );
		#ifdef FIX_xxxx_SPEEDUP_01
		matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );

		eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
		#else
		matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp );

		eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D );
		#endif
		pop_wmops();/push_wmops( "oPtoA MT1M" );/

		IF( D_fx[0] == 0 )
		{
		@@ -4537,8 +4591,10 @@ static void formulate2x2MixingMatrix_fx(
		}
		ELSE
		{
		push_wmops( "formulate2x2MixingMatrix Division" );
		temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[0], &exp );
		exp = sub( exp, sub( Q30, q_D ) );
		pop_wmops(); /push_wmops( "formulate2x2MixingMatrix Division" )/
		}
		div_fx[0] = Sqrt32( temp, &exp ); // Q = 31 - exp
		move32();
		@@ -4555,7 +4611,9 @@ static void formulate2x2MixingMatrix_fx(
		}
		ELSE
		{
		push_wmops( "formulate2x2MixingMatrix Division" );
		temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, D_fx[1], &exp1 );
		pop_wmops();/push_wmops( "formulate2x2MixingMatrix Division" )/
		exp1 = sub( exp1, sub( Q30, q_D ) );
		}
		div_fx[1] = Sqrt32( temp, &exp1 ); // Q = 31 - exp1
		@@ -4657,7 +4715,9 @@ static void formulate2x2MixingMatrix_fx(
		0 /int Bscale/,
		#endif
		Pre_fx, Pim_fx, &q_P ); /* Nearest orthonormal matrix P to matrix A formulated */
		pop_wmops(); /push_wmops( "formulate2x2MixingMatrix nrst orthonrm PtoA" );/

		push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" );
		/* These are the final formulas of the JAES publication M = Ky P Kx^(-1) */
		#if ( BINAURAL_CHANNELS != 2 )
		FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
		@@ -4740,7 +4800,9 @@ static void formulate2x2MixingMatrix_fx(
		{
		Word16 Pre_shift, Pim_shift;
		temp = BASOP_Util_Add_Mant32Exp( Sx_fx[chB], sub( 31, q_Sx ), EPSILON_MANT, EPSILON_EXP, &exp_temp );
		push_wmops( "formulate2x2MixingMatrix Division" );
		temp = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, temp, &exp );
		pop_wmops();/push_wmops( "formulate2x2MixingMatrix Division" )/
		q_temp = add( sub( sub( q_P, exp ), sub( 31, Q30 ) ), exp_temp );

		Pre_shift = norm_l( Pre_fx[0][chB] );
		@@ -4811,6 +4873,8 @@ static void formulate2x2MixingMatrix_fx(
		0 /int Bscale/,
		#endif
		Mre_fx, Mim_fx, q_M );
		pop_wmops(); /push_wmops( "formulate2x2MixingMatrix Ky P Kx^-1" );/

		return;
		}