Loading lib_com/options.h +4 −3 Original line number Diff line number Diff line Loading @@ -109,6 +109,7 @@ #define OPT_2239_IVAS_FILTER_PROCESS /* Dolby: Issue 2239, optimize ivas_filter_process_fx. */ #define NONBE_OPT_2193_EIG2X2 /* Dolby: Issue 2193, optimize eig2x2_fx. */ #define BE_FIX_2240_COMPUTE_COV_MTC_FX_FAST /* FhG: Speeds up covariance calculation e.g. 60 WMOPS for encoding -mc 7_1_4 24400 48 */ #define OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS /* Dolby: Issue 2269, optimize IVAS DIRAC DEC binaural functions. */ /* #################### End BASOP optimization switches ############################ */ Loading lib_rend/ivas_dirac_dec_binaural_functions_fx.c +308 −8 Original line number Diff line number Diff line Loading @@ -4185,9 +4185,39 @@ static void matrixDiagMul_fx( Word32 imOut_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_Out*/ Word16 *q_Out ) { Word16 chA, chB; Word32 not_zero = 0; #ifdef OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS reOut_fx[0][0] = Mpy_32_32( reIn_fx[0][0], D_fx[0] ); imOut_fx[0][0] = Mpy_32_32( imIn_fx[0][0], D_fx[0] ); move32(); move32(); not_zero = L_or( not_zero, reOut_fx[0][0] ); not_zero = L_or( not_zero, imOut_fx[0][0] ); reOut_fx[0][1] = Mpy_32_32( reIn_fx[0][1], D_fx[1] ); imOut_fx[0][1] = Mpy_32_32( imIn_fx[0][1], D_fx[1] ); move32(); move32(); not_zero = L_or( not_zero, reOut_fx[0][1] ); not_zero = L_or( not_zero, imOut_fx[0][1] ); reOut_fx[1][0] = Mpy_32_32( reIn_fx[1][0], D_fx[0] ); imOut_fx[1][0] = Mpy_32_32( imIn_fx[1][0], D_fx[0] ); move32(); move32(); not_zero = L_or( not_zero, reOut_fx[1][0] ); not_zero = L_or( not_zero, imOut_fx[1][0] ); reOut_fx[1][1] = Mpy_32_32( reIn_fx[1][1], D_fx[1] ); imOut_fx[1][1] = Mpy_32_32( imIn_fx[1][1], D_fx[1] ); move32(); move32(); not_zero = L_or( not_zero, reOut_fx[1][1] ); not_zero = L_or( not_zero, imOut_fx[1][1] ); #else Word16 chA, chB; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) Loading @@ -4200,7 +4230,7 @@ static void matrixDiagMul_fx( not_zero = L_or( not_zero, imOut_fx[chA][chB] ); } } #endif *q_Out = sub( add( q_In, q_D ), 31 ); move16(); if ( !not_zero ) Loading @@ -4217,6 +4247,78 @@ static void matrixScale_fx( Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word16 *q_A ) { #ifdef OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS Word16 shift_tmp; Word16 shift = 31; move16(); shift_tmp = norm_l( Are_fx[0][0] ); if ( Are_fx[0][0] != 0 ) { shift = s_min( shift, shift_tmp ); } shift_tmp = norm_l( Are_fx[0][1] ); if ( Are_fx[0][1] != 0 ) { shift = s_min( shift, shift_tmp ); } shift_tmp = norm_l( Are_fx[1][0] ); if ( Are_fx[1][0] != 0 ) { shift = s_min( shift, shift_tmp ); } shift_tmp = norm_l( Are_fx[1][1] ); if ( Are_fx[1][1] != 0 ) { shift = s_min( shift, shift_tmp ); } shift_tmp = norm_l( Aim_fx[0][0] ); if ( Aim_fx[0][0] != 0 ) { shift = s_min( shift, shift_tmp ); } shift_tmp = norm_l( Aim_fx[0][1] ); if ( Aim_fx[0][1] != 0 ) { shift = s_min( shift, shift_tmp ); } shift_tmp = norm_l( Aim_fx[1][0] ); if ( Aim_fx[1][0] != 0 ) { shift = s_min( shift, shift_tmp ); } shift_tmp = norm_l( Aim_fx[1][1] ); if ( Aim_fx[1][1] != 0 ) { shift = s_min( shift, shift_tmp ); } shift = sub( shift, 1 ); IF( shift != 0 ) { Are_fx[0][0] = L_shl( Are_fx[0][0], shift ); Are_fx[0][1] = L_shl( Are_fx[0][1], shift ); Are_fx[1][0] = L_shl( Are_fx[1][0], shift ); Are_fx[1][1] = L_shl( Are_fx[1][1], shift ); Aim_fx[0][0] = L_shl( Aim_fx[0][0], shift ); Aim_fx[0][1] = L_shl( Aim_fx[0][1], shift ); Aim_fx[1][0] = L_shl( Aim_fx[1][0], shift ); Aim_fx[1][1] = L_shl( Aim_fx[1][1], shift ); move32(); move32(); move32(); move32(); move32(); move32(); move32(); move32(); } *q_A = add( *q_A, shift ); move16(); #else Word16 shift; Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 ); Loading @@ -4224,6 +4326,7 @@ static void matrixScale_fx( scale_sig32( Aim_fx[0], size, shift ); *q_A = add( *q_A, shift ); move16(); #endif } static void matrixMul_fx( Loading @@ -4237,8 +4340,61 @@ static void matrixMul_fx( Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word16 *q_out ) { Word16 chA, chB; Word32 not_zero = 0; #if defined( OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS ) && !defined( IVAS_ENH64_CADENCE_CHANGES ) outRe_fx[0][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[0][0] ), Are_fx[0][1], Bre_fx[1][0] ), Aim_fx[0][0], Bim_fx[0][0] ), Aim_fx[0][1], Bim_fx[1][0] ); move32(); outIm_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[0][0] ), Aim_fx[0][1], Bre_fx[1][0] ), Are_fx[0][0], Bim_fx[0][0] ), Are_fx[0][1], Bim_fx[1][0] ); move32(); not_zero = L_or( not_zero, outRe_fx[0][0] ); not_zero = L_or( not_zero, outIm_fx[0][0] ); outRe_fx[0][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[0][1] ), Are_fx[0][1], Bre_fx[1][1] ), Aim_fx[0][0], Bim_fx[0][1] ), Aim_fx[0][1], Bim_fx[1][1] ); move32(); outIm_fx[0][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[0][1] ), Aim_fx[0][1], Bre_fx[1][1] ), Are_fx[0][0], Bim_fx[0][1] ), Are_fx[0][1], Bim_fx[1][1] ); move32(); not_zero = L_or( not_zero, outRe_fx[0][1] ); not_zero = L_or( not_zero, outIm_fx[0][1] ); outRe_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[0][0] ), Are_fx[1][1], Bre_fx[1][0] ), Aim_fx[1][0], Bim_fx[0][0] ), Aim_fx[1][1], Bim_fx[1][0] ); move32(); outIm_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[0][0] ), Aim_fx[1][1], Bre_fx[1][0] ), Are_fx[1][0], Bim_fx[0][0] ), Are_fx[1][1], Bim_fx[1][0] ); move32(); not_zero = L_or( not_zero, outRe_fx[1][0] ); not_zero = L_or( not_zero, outIm_fx[1][0] ); outRe_fx[1][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[0][1] ), Are_fx[1][1], Bre_fx[1][1] ), Aim_fx[1][0], Bim_fx[0][1] ), Aim_fx[1][1], Bim_fx[1][1] ); move32(); outIm_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[0][1] ), Aim_fx[1][1], Bre_fx[1][1] ), Are_fx[1][0], Bim_fx[0][1] ), Are_fx[1][1], Bim_fx[1][1] ); move32(); not_zero = L_or( not_zero, outRe_fx[1][1] ); not_zero = L_or( not_zero, outIm_fx[1][1] ); #else Word16 chA, chB; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { Loading Loading @@ -4269,6 +4425,7 @@ static void matrixMul_fx( not_zero = L_or( not_zero, outIm_fx[chA][chB] ); } } #endif *q_out = sub( add( *q_A, *q_B ), 31 ); move16(); Loading @@ -4292,8 +4449,6 @@ static void matrixTransp1Mul_fx( Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word16 *q_out ) { Word16 chA, chB; Word64 tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word64 tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word64 tmp64; Loading @@ -4302,6 +4457,96 @@ static void matrixTransp1Mul_fx( common_lsh = sub( 63, q ); move16(); Word32 not_zero = 0; move32(); #ifdef OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS tmp64 = W_mult_32_32( Are_fx[0][0], Bre_fx[0][0] ); tmp64 = W_mac_32_32( tmp64, Are_fx[1][0], Bre_fx[1][0] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[0][0], Bim_fx[0][0] ); tmp_outRe_fx[0][0] = W_mac_32_32( tmp64, Aim_fx[1][0], Bim_fx[1][0] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[0][0] ) ); tmp64 = W_mult_32_32( Aim_fx[0][0], Bre_fx[0][0] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[1][0], Bre_fx[1][0] ); tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][0], Bim_fx[0][0] ); tmp_outIm_fx[0][0] = W_mac_32_32( tmp64, Are_fx[1][0], Bim_fx[1][0] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[0][0] ) ); tmp64 = W_mult_32_32( Are_fx[0][0], Bre_fx[0][1] ); tmp64 = W_mac_32_32( tmp64, Are_fx[1][0], Bre_fx[1][1] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[0][0], Bim_fx[0][1] ); tmp_outRe_fx[0][1] = W_mac_32_32( tmp64, Aim_fx[1][0], Bim_fx[1][1] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[0][1] ) ); tmp64 = W_mult_32_32( Aim_fx[0][0], Bre_fx[0][1] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[1][0], Bre_fx[1][1] ); tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][0], Bim_fx[0][1] ); tmp_outIm_fx[0][1] = W_mac_32_32( tmp64, Are_fx[1][0], Bim_fx[1][1] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[0][1] ) ); tmp64 = W_mult_32_32( Are_fx[0][1], Bre_fx[0][0] ); tmp64 = W_mac_32_32( tmp64, Are_fx[1][1], Bre_fx[1][0] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[0][1], Bim_fx[0][0] ); tmp_outRe_fx[1][0] = W_mac_32_32( tmp64, Aim_fx[1][1], Bim_fx[1][0] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[1][0] ) ); tmp64 = W_mult_32_32( Aim_fx[0][1], Bre_fx[0][0] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[1][1], Bre_fx[1][0] ); tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][1], Bim_fx[0][0] ); tmp_outIm_fx[1][0] = W_mac_32_32( tmp64, Are_fx[1][1], Bim_fx[1][0] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[1][0] ) ); tmp64 = W_mult_32_32( Are_fx[0][1], Bre_fx[0][1] ); tmp64 = W_mac_32_32( tmp64, Are_fx[1][1], Bre_fx[1][1] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[0][1], Bim_fx[0][1] ); tmp_outRe_fx[1][1] = W_mac_32_32( tmp64, Aim_fx[1][1], Bim_fx[1][1] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[1][1] ) ); tmp64 = W_mult_32_32( Aim_fx[0][1], Bre_fx[0][1] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[1][1], Bre_fx[1][1] ); tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][1], Bim_fx[0][1] ); tmp_outIm_fx[1][1] = W_mac_32_32( tmp64, Are_fx[1][1], Bim_fx[1][1] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[1][1] ) ); outRe_fx[0][0] = W_extract_h( W_shl( tmp_outRe_fx[0][0], common_lsh ) ); move32(); outIm_fx[0][0] = W_extract_h( W_shl( tmp_outIm_fx[0][0], common_lsh ) ); move32(); not_zero = L_or( not_zero, outRe_fx[0][0] ); not_zero = L_or( not_zero, outIm_fx[0][0] ); outRe_fx[0][1] = W_extract_h( W_shl( tmp_outRe_fx[0][1], common_lsh ) ); move32(); outIm_fx[0][1] = W_extract_h( W_shl( tmp_outIm_fx[0][1], common_lsh ) ); move32(); not_zero = L_or( not_zero, outRe_fx[0][1] ); not_zero = L_or( not_zero, outIm_fx[0][1] ); outRe_fx[1][0] = W_extract_h( W_shl( tmp_outRe_fx[1][0], common_lsh ) ); move32(); outIm_fx[1][0] = W_extract_h( W_shl( tmp_outIm_fx[1][0], common_lsh ) ); move32(); not_zero = L_or( not_zero, outRe_fx[1][0] ); not_zero = L_or( not_zero, outIm_fx[1][0] ); outRe_fx[1][1] = W_extract_h( W_shl( tmp_outRe_fx[1][1], common_lsh ) ); move32(); outIm_fx[1][1] = W_extract_h( W_shl( tmp_outIm_fx[1][1], common_lsh ) ); move32(); not_zero = L_or( not_zero, outRe_fx[1][1] ); not_zero = L_or( not_zero, outIm_fx[1][1] ); #else Word16 chA, chB; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) Loading @@ -4321,7 +4566,7 @@ static void matrixTransp1Mul_fx( common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[chA][chB] ) ); } } Word32 not_zero = 0; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) Loading @@ -4334,6 +4579,8 @@ static void matrixTransp1Mul_fx( not_zero = L_or( not_zero, outIm_fx[chA][chB] ); } } #endif *q_out = sub( add( q, common_lsh ), 32 ); move16(); if ( !not_zero ) Loading @@ -4356,9 +4603,61 @@ static void matrixTransp2Mul_fx( Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word16 *q_out ) { Word16 chA, chB; // Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; Word32 not_zero = 0; #if defined( OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS ) && !defined( IVAS_ENH64_CADENCE_CHANGES ) outRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[0][0] ), Are_fx[0][1], Bre_fx[0][1] ), Aim_fx[0][0], Bim_fx[0][0] ), Aim_fx[0][1], Bim_fx[0][1] ); move32(); outIm_fx[0][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[0][0] ), Aim_fx[0][1], Bre_fx[0][1] ), Are_fx[0][0], Bim_fx[0][0] ), Are_fx[0][1], Bim_fx[0][1] ); move32(); not_zero = L_or( not_zero, outRe_fx[0][0] ); not_zero = L_or( not_zero, outIm_fx[0][0] ); outRe_fx[0][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[1][0] ), Are_fx[0][1], Bre_fx[1][1] ), Aim_fx[0][0], Bim_fx[1][0] ), Aim_fx[0][1], Bim_fx[1][1] ); move32(); outIm_fx[0][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[1][0] ), Aim_fx[0][1], Bre_fx[1][1] ), Are_fx[0][0], Bim_fx[1][0] ), Are_fx[0][1], Bim_fx[1][1] ); move32(); not_zero = L_or( not_zero, outRe_fx[0][1] ); not_zero = L_or( not_zero, outIm_fx[0][1] ); outRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[0][0] ), Are_fx[1][1], Bre_fx[0][1] ), Aim_fx[1][0], Bim_fx[0][0] ), Aim_fx[1][1], Bim_fx[0][1] ); move32(); outIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[0][0] ), Aim_fx[1][1], Bre_fx[0][1] ), Are_fx[1][0], Bim_fx[0][0] ), Are_fx[1][1], Bim_fx[0][1] ); move32(); not_zero = L_or( not_zero, outRe_fx[1][0] ); not_zero = L_or( not_zero, outIm_fx[1][0] ); outRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[1][0] ), Are_fx[1][1], Bre_fx[1][1] ), Aim_fx[1][0], Bim_fx[1][0] ), Aim_fx[1][1], Bim_fx[1][1] ); move32(); outIm_fx[1][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[1][0] ), Aim_fx[1][1], Bre_fx[1][1] ), Are_fx[1][0], Bim_fx[1][0] ), Are_fx[1][1], Bim_fx[1][1] ); move32(); not_zero = L_or( not_zero, outRe_fx[1][1] ); not_zero = L_or( not_zero, outIm_fx[1][1] ); #else Word16 chA, chB; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) Loading Loading @@ -4388,6 +4687,7 @@ static void matrixTransp2Mul_fx( not_zero = L_or( not_zero, outIm_fx[chA][chB] ); } } #endif *q_out = sub( add( *q_A, *q_B ), 31 ); move16(); Loading Loading
lib_com/options.h +4 −3 Original line number Diff line number Diff line Loading @@ -109,6 +109,7 @@ #define OPT_2239_IVAS_FILTER_PROCESS /* Dolby: Issue 2239, optimize ivas_filter_process_fx. */ #define NONBE_OPT_2193_EIG2X2 /* Dolby: Issue 2193, optimize eig2x2_fx. */ #define BE_FIX_2240_COMPUTE_COV_MTC_FX_FAST /* FhG: Speeds up covariance calculation e.g. 60 WMOPS for encoding -mc 7_1_4 24400 48 */ #define OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS /* Dolby: Issue 2269, optimize IVAS DIRAC DEC binaural functions. */ /* #################### End BASOP optimization switches ############################ */ Loading
lib_rend/ivas_dirac_dec_binaural_functions_fx.c +308 −8 Original line number Diff line number Diff line Loading @@ -4185,9 +4185,39 @@ static void matrixDiagMul_fx( Word32 imOut_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_Out*/ Word16 *q_Out ) { Word16 chA, chB; Word32 not_zero = 0; #ifdef OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS reOut_fx[0][0] = Mpy_32_32( reIn_fx[0][0], D_fx[0] ); imOut_fx[0][0] = Mpy_32_32( imIn_fx[0][0], D_fx[0] ); move32(); move32(); not_zero = L_or( not_zero, reOut_fx[0][0] ); not_zero = L_or( not_zero, imOut_fx[0][0] ); reOut_fx[0][1] = Mpy_32_32( reIn_fx[0][1], D_fx[1] ); imOut_fx[0][1] = Mpy_32_32( imIn_fx[0][1], D_fx[1] ); move32(); move32(); not_zero = L_or( not_zero, reOut_fx[0][1] ); not_zero = L_or( not_zero, imOut_fx[0][1] ); reOut_fx[1][0] = Mpy_32_32( reIn_fx[1][0], D_fx[0] ); imOut_fx[1][0] = Mpy_32_32( imIn_fx[1][0], D_fx[0] ); move32(); move32(); not_zero = L_or( not_zero, reOut_fx[1][0] ); not_zero = L_or( not_zero, imOut_fx[1][0] ); reOut_fx[1][1] = Mpy_32_32( reIn_fx[1][1], D_fx[1] ); imOut_fx[1][1] = Mpy_32_32( imIn_fx[1][1], D_fx[1] ); move32(); move32(); not_zero = L_or( not_zero, reOut_fx[1][1] ); not_zero = L_or( not_zero, imOut_fx[1][1] ); #else Word16 chA, chB; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) Loading @@ -4200,7 +4230,7 @@ static void matrixDiagMul_fx( not_zero = L_or( not_zero, imOut_fx[chA][chB] ); } } #endif *q_Out = sub( add( q_In, q_D ), 31 ); move16(); if ( !not_zero ) Loading @@ -4217,6 +4247,78 @@ static void matrixScale_fx( Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word16 *q_A ) { #ifdef OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS Word16 shift_tmp; Word16 shift = 31; move16(); shift_tmp = norm_l( Are_fx[0][0] ); if ( Are_fx[0][0] != 0 ) { shift = s_min( shift, shift_tmp ); } shift_tmp = norm_l( Are_fx[0][1] ); if ( Are_fx[0][1] != 0 ) { shift = s_min( shift, shift_tmp ); } shift_tmp = norm_l( Are_fx[1][0] ); if ( Are_fx[1][0] != 0 ) { shift = s_min( shift, shift_tmp ); } shift_tmp = norm_l( Are_fx[1][1] ); if ( Are_fx[1][1] != 0 ) { shift = s_min( shift, shift_tmp ); } shift_tmp = norm_l( Aim_fx[0][0] ); if ( Aim_fx[0][0] != 0 ) { shift = s_min( shift, shift_tmp ); } shift_tmp = norm_l( Aim_fx[0][1] ); if ( Aim_fx[0][1] != 0 ) { shift = s_min( shift, shift_tmp ); } shift_tmp = norm_l( Aim_fx[1][0] ); if ( Aim_fx[1][0] != 0 ) { shift = s_min( shift, shift_tmp ); } shift_tmp = norm_l( Aim_fx[1][1] ); if ( Aim_fx[1][1] != 0 ) { shift = s_min( shift, shift_tmp ); } shift = sub( shift, 1 ); IF( shift != 0 ) { Are_fx[0][0] = L_shl( Are_fx[0][0], shift ); Are_fx[0][1] = L_shl( Are_fx[0][1], shift ); Are_fx[1][0] = L_shl( Are_fx[1][0], shift ); Are_fx[1][1] = L_shl( Are_fx[1][1], shift ); Aim_fx[0][0] = L_shl( Aim_fx[0][0], shift ); Aim_fx[0][1] = L_shl( Aim_fx[0][1], shift ); Aim_fx[1][0] = L_shl( Aim_fx[1][0], shift ); Aim_fx[1][1] = L_shl( Aim_fx[1][1], shift ); move32(); move32(); move32(); move32(); move32(); move32(); move32(); move32(); } *q_A = add( *q_A, shift ); move16(); #else Word16 shift; Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; shift = sub( s_min( L_norm_arr( Are_fx[0], size ), L_norm_arr( Aim_fx[0], size ) ), 1 ); Loading @@ -4224,6 +4326,7 @@ static void matrixScale_fx( scale_sig32( Aim_fx[0], size, shift ); *q_A = add( *q_A, shift ); move16(); #endif } static void matrixMul_fx( Loading @@ -4237,8 +4340,61 @@ static void matrixMul_fx( Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word16 *q_out ) { Word16 chA, chB; Word32 not_zero = 0; #if defined( OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS ) && !defined( IVAS_ENH64_CADENCE_CHANGES ) outRe_fx[0][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[0][0] ), Are_fx[0][1], Bre_fx[1][0] ), Aim_fx[0][0], Bim_fx[0][0] ), Aim_fx[0][1], Bim_fx[1][0] ); move32(); outIm_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[0][0] ), Aim_fx[0][1], Bre_fx[1][0] ), Are_fx[0][0], Bim_fx[0][0] ), Are_fx[0][1], Bim_fx[1][0] ); move32(); not_zero = L_or( not_zero, outRe_fx[0][0] ); not_zero = L_or( not_zero, outIm_fx[0][0] ); outRe_fx[0][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[0][1] ), Are_fx[0][1], Bre_fx[1][1] ), Aim_fx[0][0], Bim_fx[0][1] ), Aim_fx[0][1], Bim_fx[1][1] ); move32(); outIm_fx[0][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[0][1] ), Aim_fx[0][1], Bre_fx[1][1] ), Are_fx[0][0], Bim_fx[0][1] ), Are_fx[0][1], Bim_fx[1][1] ); move32(); not_zero = L_or( not_zero, outRe_fx[0][1] ); not_zero = L_or( not_zero, outIm_fx[0][1] ); outRe_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[0][0] ), Are_fx[1][1], Bre_fx[1][0] ), Aim_fx[1][0], Bim_fx[0][0] ), Aim_fx[1][1], Bim_fx[1][0] ); move32(); outIm_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[0][0] ), Aim_fx[1][1], Bre_fx[1][0] ), Are_fx[1][0], Bim_fx[0][0] ), Are_fx[1][1], Bim_fx[1][0] ); move32(); not_zero = L_or( not_zero, outRe_fx[1][0] ); not_zero = L_or( not_zero, outIm_fx[1][0] ); outRe_fx[1][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[0][1] ), Are_fx[1][1], Bre_fx[1][1] ), Aim_fx[1][0], Bim_fx[0][1] ), Aim_fx[1][1], Bim_fx[1][1] ); move32(); outIm_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[0][1] ), Aim_fx[1][1], Bre_fx[1][1] ), Are_fx[1][0], Bim_fx[0][1] ), Are_fx[1][1], Bim_fx[1][1] ); move32(); not_zero = L_or( not_zero, outRe_fx[1][1] ); not_zero = L_or( not_zero, outIm_fx[1][1] ); #else Word16 chA, chB; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { Loading Loading @@ -4269,6 +4425,7 @@ static void matrixMul_fx( not_zero = L_or( not_zero, outIm_fx[chA][chB] ); } } #endif *q_out = sub( add( *q_A, *q_B ), 31 ); move16(); Loading @@ -4292,8 +4449,6 @@ static void matrixTransp1Mul_fx( Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word16 *q_out ) { Word16 chA, chB; Word64 tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word64 tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word64 tmp64; Loading @@ -4302,6 +4457,96 @@ static void matrixTransp1Mul_fx( common_lsh = sub( 63, q ); move16(); Word32 not_zero = 0; move32(); #ifdef OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS tmp64 = W_mult_32_32( Are_fx[0][0], Bre_fx[0][0] ); tmp64 = W_mac_32_32( tmp64, Are_fx[1][0], Bre_fx[1][0] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[0][0], Bim_fx[0][0] ); tmp_outRe_fx[0][0] = W_mac_32_32( tmp64, Aim_fx[1][0], Bim_fx[1][0] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[0][0] ) ); tmp64 = W_mult_32_32( Aim_fx[0][0], Bre_fx[0][0] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[1][0], Bre_fx[1][0] ); tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][0], Bim_fx[0][0] ); tmp_outIm_fx[0][0] = W_mac_32_32( tmp64, Are_fx[1][0], Bim_fx[1][0] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[0][0] ) ); tmp64 = W_mult_32_32( Are_fx[0][0], Bre_fx[0][1] ); tmp64 = W_mac_32_32( tmp64, Are_fx[1][0], Bre_fx[1][1] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[0][0], Bim_fx[0][1] ); tmp_outRe_fx[0][1] = W_mac_32_32( tmp64, Aim_fx[1][0], Bim_fx[1][1] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[0][1] ) ); tmp64 = W_mult_32_32( Aim_fx[0][0], Bre_fx[0][1] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[1][0], Bre_fx[1][1] ); tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][0], Bim_fx[0][1] ); tmp_outIm_fx[0][1] = W_mac_32_32( tmp64, Are_fx[1][0], Bim_fx[1][1] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[0][1] ) ); tmp64 = W_mult_32_32( Are_fx[0][1], Bre_fx[0][0] ); tmp64 = W_mac_32_32( tmp64, Are_fx[1][1], Bre_fx[1][0] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[0][1], Bim_fx[0][0] ); tmp_outRe_fx[1][0] = W_mac_32_32( tmp64, Aim_fx[1][1], Bim_fx[1][0] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[1][0] ) ); tmp64 = W_mult_32_32( Aim_fx[0][1], Bre_fx[0][0] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[1][1], Bre_fx[1][0] ); tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][1], Bim_fx[0][0] ); tmp_outIm_fx[1][0] = W_mac_32_32( tmp64, Are_fx[1][1], Bim_fx[1][0] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[1][0] ) ); tmp64 = W_mult_32_32( Are_fx[0][1], Bre_fx[0][1] ); tmp64 = W_mac_32_32( tmp64, Are_fx[1][1], Bre_fx[1][1] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[0][1], Bim_fx[0][1] ); tmp_outRe_fx[1][1] = W_mac_32_32( tmp64, Aim_fx[1][1], Bim_fx[1][1] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outRe_fx[1][1] ) ); tmp64 = W_mult_32_32( Aim_fx[0][1], Bre_fx[0][1] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[1][1], Bre_fx[1][1] ); tmp64 = W_mac_32_32( W_neg( tmp64 ), Are_fx[0][1], Bim_fx[0][1] ); tmp_outIm_fx[1][1] = W_mac_32_32( tmp64, Are_fx[1][1], Bim_fx[1][1] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[1][1] ) ); outRe_fx[0][0] = W_extract_h( W_shl( tmp_outRe_fx[0][0], common_lsh ) ); move32(); outIm_fx[0][0] = W_extract_h( W_shl( tmp_outIm_fx[0][0], common_lsh ) ); move32(); not_zero = L_or( not_zero, outRe_fx[0][0] ); not_zero = L_or( not_zero, outIm_fx[0][0] ); outRe_fx[0][1] = W_extract_h( W_shl( tmp_outRe_fx[0][1], common_lsh ) ); move32(); outIm_fx[0][1] = W_extract_h( W_shl( tmp_outIm_fx[0][1], common_lsh ) ); move32(); not_zero = L_or( not_zero, outRe_fx[0][1] ); not_zero = L_or( not_zero, outIm_fx[0][1] ); outRe_fx[1][0] = W_extract_h( W_shl( tmp_outRe_fx[1][0], common_lsh ) ); move32(); outIm_fx[1][0] = W_extract_h( W_shl( tmp_outIm_fx[1][0], common_lsh ) ); move32(); not_zero = L_or( not_zero, outRe_fx[1][0] ); not_zero = L_or( not_zero, outIm_fx[1][0] ); outRe_fx[1][1] = W_extract_h( W_shl( tmp_outRe_fx[1][1], common_lsh ) ); move32(); outIm_fx[1][1] = W_extract_h( W_shl( tmp_outIm_fx[1][1], common_lsh ) ); move32(); not_zero = L_or( not_zero, outRe_fx[1][1] ); not_zero = L_or( not_zero, outIm_fx[1][1] ); #else Word16 chA, chB; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) Loading @@ -4321,7 +4566,7 @@ static void matrixTransp1Mul_fx( common_lsh = s_min( common_lsh, W_norm( tmp_outIm_fx[chA][chB] ) ); } } Word32 not_zero = 0; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) Loading @@ -4334,6 +4579,8 @@ static void matrixTransp1Mul_fx( not_zero = L_or( not_zero, outIm_fx[chA][chB] ); } } #endif *q_out = sub( add( q, common_lsh ), 32 ); move16(); if ( !not_zero ) Loading @@ -4356,9 +4603,61 @@ static void matrixTransp2Mul_fx( Word32 outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_out*/ Word16 *q_out ) { Word16 chA, chB; // Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; Word32 not_zero = 0; #if defined( OPT_2269_IVAS_DIRAC_DEC_BINAURAL_FUNCTIONS ) && !defined( IVAS_ENH64_CADENCE_CHANGES ) outRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[0][0] ), Are_fx[0][1], Bre_fx[0][1] ), Aim_fx[0][0], Bim_fx[0][0] ), Aim_fx[0][1], Bim_fx[0][1] ); move32(); outIm_fx[0][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[0][0] ), Aim_fx[0][1], Bre_fx[0][1] ), Are_fx[0][0], Bim_fx[0][0] ), Are_fx[0][1], Bim_fx[0][1] ); move32(); not_zero = L_or( not_zero, outRe_fx[0][0] ); not_zero = L_or( not_zero, outIm_fx[0][0] ); outRe_fx[0][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Bre_fx[1][0] ), Are_fx[0][1], Bre_fx[1][1] ), Aim_fx[0][0], Bim_fx[1][0] ), Aim_fx[0][1], Bim_fx[1][1] ); move32(); outIm_fx[0][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[0][0], Bre_fx[1][0] ), Aim_fx[0][1], Bre_fx[1][1] ), Are_fx[0][0], Bim_fx[1][0] ), Are_fx[0][1], Bim_fx[1][1] ); move32(); not_zero = L_or( not_zero, outRe_fx[0][1] ); not_zero = L_or( not_zero, outIm_fx[0][1] ); outRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[0][0] ), Are_fx[1][1], Bre_fx[0][1] ), Aim_fx[1][0], Bim_fx[0][0] ), Aim_fx[1][1], Bim_fx[0][1] ); move32(); outIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[0][0] ), Aim_fx[1][1], Bre_fx[0][1] ), Are_fx[1][0], Bim_fx[0][0] ), Are_fx[1][1], Bim_fx[0][1] ); move32(); not_zero = L_or( not_zero, outRe_fx[1][0] ); not_zero = L_or( not_zero, outIm_fx[1][0] ); outRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[1][0], Bre_fx[1][0] ), Are_fx[1][1], Bre_fx[1][1] ), Aim_fx[1][0], Bim_fx[1][0] ), Aim_fx[1][1], Bim_fx[1][1] ); move32(); outIm_fx[1][1] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Aim_fx[1][0], Bre_fx[1][0] ), Aim_fx[1][1], Bre_fx[1][1] ), Are_fx[1][0], Bim_fx[1][0] ), Are_fx[1][1], Bim_fx[1][1] ); move32(); not_zero = L_or( not_zero, outRe_fx[1][1] ); not_zero = L_or( not_zero, outIm_fx[1][1] ); #else Word16 chA, chB; FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) Loading Loading @@ -4388,6 +4687,7 @@ static void matrixTransp2Mul_fx( not_zero = L_or( not_zero, outIm_fx[chA][chB] ); } } #endif *q_out = sub( add( *q_A, *q_B ), 31 ); move16(); Loading