Loading lib_com/options.h +3 −0 Original line number Diff line number Diff line Loading @@ -175,4 +175,7 @@ #define NONBE_FIX_1402_WAVEADJUST /* VA: BASOP iisue 1402: fix waveform adjustment decoder PLC */ #define FIX_ISSUE_1376 /* VA: Fix for issue 1376 (issue with GSC excitation) */ #define OPT_SBA_AVOID_SPAR_RESCALE /* Optimization made to spar decoder and IGF */ #define FIX_1326_SUBSTITUTE_CMPMANT32EXP /* FhG: Minor WMOPS tuning*/ #define FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT /* FhG: WMOPS tuning */ #define FIX_1326_SPEEDUP_eig2x2_fx /* FhG: Minor WMOPS tuning*/ #endif lib_rend/ivas_dirac_dec_binaural_functions_fx.c +8 −132 Original line number Diff line number Diff line Loading @@ -46,18 +46,6 @@ #include "wmc_auto.h" // MHZ NUMBERS: // NULL: 178.407 // ALL: 169.499 77 (170.650 wo 17) #define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE #define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE #define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE #define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE #define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE #define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .2 WMOPS --> USE #define FIX_1326_SPEEDUP_18 // structural speedup // 1 WMOPS --> USE Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; Loading Loading @@ -1892,11 +1880,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( move16(); Word32 tmp1, tmp2, res1, res2; Word16 q_tmp1, q_tmp2, q_realizedOutputEne, q_targetOutputEne, q_missingOutputEne, q_gain; #ifdef FIX_1326_SPEEDUP_13 Word16 exp1, q_processMtx_bin, q_processMtxDec_bin; #else Word16 exp1, exp2, q_processMtx_bin, q_processMtxDec_bin; #endif CrEneL_fx = 0; move32(); Loading Loading @@ -2199,13 +2183,12 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( tmp1 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), missingOutputEne_fx, sub( 31, q_missingOutputEne ), &exp1 ); #ifdef FIX_1326_SPEEDUP_13 #ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT { Word16 exp_temp; tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); tmp2 = ISqrt32( tmp2, &exp_temp ); tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp2 ); tmp2 = ISqrt32( tmp2, &exp2 ); gain_fx = Mpy_32_32( tmp2, Sqrt32( tmp1, &exp1 ) ); q_gain = sub( 31, add( exp_temp, exp1 ) ); q_gain = sub( 31, add( exp2, exp1 ) ); } #else { Loading Loading @@ -3317,7 +3300,7 @@ static void eig2x2_fx( /* Numeric case, when input is practically zeros */ // IF( D_fx[0] < EPSILON_FX ) #ifdef FIX_1326_SPEEDUP_02 #ifdef FIX_1326_SUBSTITUTE_CMPMANT32EXP IF( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) ) { Ure_fx[0][0] = ONE_IN_Q31; Loading Loading @@ -3406,27 +3389,10 @@ static void eig2x2_fx( tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); #ifdef FIX_1326_SPEEDUP_16 { Word16 tmp2_exp; Word32 eps_tmp; tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &tmp2_exp ); // Add epsilon if relevant eps_tmp = L_shl_sat( epsilon_mant, sub( epsilon_exp, tmp2_exp ) ); tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) ); exp_tmp3 = add( tmp2_exp, 1 ); } #else tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); #endif #if 1 tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); Loading Loading @@ -3499,22 +3465,10 @@ static void eig2x2_fx( q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); #ifdef FIX_1326_SPEEDUP_04 Word16 exp_tmp2; Word32 eps_tmp; tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &exp_tmp2 ); eps_tmp = L_shl_sat( epsilon_mant, sub( epsilon_exp, exp_tmp2 ) ); tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) ); // Add Epsilon if relevant exp_tmp3 = add( exp_tmp2, 1 ); #else tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); #endif #if 1 tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); Loading Loading @@ -3815,7 +3769,6 @@ static void matrixMul_fx( return; } #ifndef FIX_1326_SPEEDUP_01 static void matrixTransp1Mul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Loading Loading @@ -3929,7 +3882,6 @@ static void matrixTransp1Mul_fx( return; } #endif /*FIX_1326_SPEEDUP_01*/ static void matrixTransp2Mul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Loading Loading @@ -4655,80 +4607,10 @@ static void formulate2x2MixingMatrix_fx( /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx For matrix A that is P = A(A'A)^0.5 */ push_wmops( "oPtoA MT1M" ); #ifdef FIX_1326_SPEEDUP_01 // matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); { // Word16 chA, chB; { chA = 0, chB = 0; tmpRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ), Are_fx[1][0], Are_fx[1][0] ), Aim_fx[0][0], Aim_fx[0][0] ), Aim_fx[1][0], Aim_fx[1][0] ); move32(); } { // chA = 0, chB = 1; tmpRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ), Are_fx[1][1], Are_fx[1][0] ), Aim_fx[0][1], Aim_fx[0][0] ), Aim_fx[1][1], Aim_fx[1][0] ); move32(); tmpIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Aim_fx[0][0] ), Are_fx[1][1], Aim_fx[1][0] ), Aim_fx[0][1], Are_fx[0][0] ), Aim_fx[1][1], Are_fx[1][0] ); move32(); } { // chA = 1, chB = 0; tmpRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ), Are_fx[1][1], Are_fx[1][1] ), Aim_fx[0][1], Aim_fx[0][1] ), Aim_fx[1][1], Aim_fx[1][1] ); move32(); } { // chA = 1, chB = 1; } q_temp = sub( add( q_A, q_A ), 31 ); move16(); Word16 ZeroState = add( 1, 0 ); if ( tmpRe_fx[0][0] != 0 ) { ZeroState = add( 0, 0 ); } if ( tmpRe_fx[1][1] != 0 ) { ZeroState = add( 0, 0 ); } if ( tmpRe_fx[1][0] != 0 ) { ZeroState = add( 0, 0 ); } if ( tmpIm_fx[1][0] != 0 ) { ZeroState = add( 0, 0 ); } if ( sub( ZeroState, 1 ) == 0 ) { q_temp = Q31; move16(); } } eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); #else matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); #endif pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/ Loading @@ -4753,11 +4635,9 @@ static void formulate2x2MixingMatrix_fx( move32(); #ifdef FIX_1326_SPEEDUP_08 // This is just a shortcut to already existing optimizations (FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) - but makes everything even faster { //Sqrt(1) div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1 exp1 = add( 0, 20 ); // move32(); } exp1 = add( 0, 20 ); IF( D_fx[1] != 0 ) // This is the new code: replace div sqrt by isqrt { Loading Loading @@ -4841,11 +4721,7 @@ static void formulate2x2MixingMatrix_fx( W_tmp = W_mult0_32_32( tmpIm_fx[chA][chB], div_fx[chB] ); IF( W_tmp != 0 ) { #ifdef FIX_1326_SPEEDUP_10 hdrm_im[chA][chB] = W_norm( W_tmp ); #else hdrm_im[chA][chB] = sub( W_norm( W_tmp ), 0 ); #endif move16(); W_tmp = W_shl( W_tmp, hdrm_im[chA][chB] ); tmpIm_fx[chA][chB] = W_extract_h( W_tmp ); Loading Loading
lib_com/options.h +3 −0 Original line number Diff line number Diff line Loading @@ -175,4 +175,7 @@ #define NONBE_FIX_1402_WAVEADJUST /* VA: BASOP iisue 1402: fix waveform adjustment decoder PLC */ #define FIX_ISSUE_1376 /* VA: Fix for issue 1376 (issue with GSC excitation) */ #define OPT_SBA_AVOID_SPAR_RESCALE /* Optimization made to spar decoder and IGF */ #define FIX_1326_SUBSTITUTE_CMPMANT32EXP /* FhG: Minor WMOPS tuning*/ #define FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT /* FhG: WMOPS tuning */ #define FIX_1326_SPEEDUP_eig2x2_fx /* FhG: Minor WMOPS tuning*/ #endif
lib_rend/ivas_dirac_dec_binaural_functions_fx.c +8 −132 Original line number Diff line number Diff line Loading @@ -46,18 +46,6 @@ #include "wmc_auto.h" // MHZ NUMBERS: // NULL: 178.407 // ALL: 169.499 77 (170.650 wo 17) #define FIX_1326_SPEEDUP_01 // optimize matrixT1mul->eig2x2_fx // .4 WMOPS --> USE #define FIX_1326_SPEEDUP_02 // speedup eig2x2_fx // .3 WMOPS --> USE #define FIX_1326_SPEEDUP_04 // speedup eig2x2_fx // .2 WMOPS --> USE #define FIX_1326_SPEEDUP_08 // "-" // 3.0 WMOPS //small diffs --> USE #define FIX_1326_SPEEDUP_13 // replace div/sqrt->isqrt // 2.9 WMOPS --> USE #define FIX_1326_SPEEDUP_16 // tiny speedup like 04 // .2 WMOPS --> USE #define FIX_1326_SPEEDUP_18 // structural speedup // 1 WMOPS --> USE Word16 slot_fx[4] = { 32767, 16384, 10922, 8192 }; Loading Loading @@ -1892,11 +1880,7 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( move16(); Word32 tmp1, tmp2, res1, res2; Word16 q_tmp1, q_tmp2, q_realizedOutputEne, q_targetOutputEne, q_missingOutputEne, q_gain; #ifdef FIX_1326_SPEEDUP_13 Word16 exp1, q_processMtx_bin, q_processMtxDec_bin; #else Word16 exp1, exp2, q_processMtx_bin, q_processMtxDec_bin; #endif CrEneL_fx = 0; move32(); Loading Loading @@ -2199,13 +2183,12 @@ static void ivas_dirac_dec_binaural_determine_processing_matrices_fx( tmp1 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), missingOutputEne_fx, sub( 31, q_missingOutputEne ), &exp1 ); #ifdef FIX_1326_SPEEDUP_13 #ifdef FIX_1326_SUBSTITUTE_DIV_SQRT_IOSQRT { Word16 exp_temp; tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp_temp ); tmp2 = ISqrt32( tmp2, &exp_temp ); tmp2 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), EPSILON_MANT, EPSILON_EXP, &exp2 ); tmp2 = ISqrt32( tmp2, &exp2 ); gain_fx = Mpy_32_32( tmp2, Sqrt32( tmp1, &exp1 ) ); q_gain = sub( 31, add( exp_temp, exp1 ) ); q_gain = sub( 31, add( exp2, exp1 ) ); } #else { Loading Loading @@ -3317,7 +3300,7 @@ static void eig2x2_fx( /* Numeric case, when input is practically zeros */ // IF( D_fx[0] < EPSILON_FX ) #ifdef FIX_1326_SPEEDUP_02 #ifdef FIX_1326_SUBSTITUTE_CMPMANT32EXP IF( LT_32( L_shl_sat( D_fx[0], sub( sub( 31, *q_D ), EPSILON_EXP ) ), EPSILON_MANT ) ) { Ure_fx[0][0] = ONE_IN_Q31; Loading Loading @@ -3406,27 +3389,10 @@ static void eig2x2_fx( tmp2 = Mpy_32_32( s_fx, s_fx ); q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); #ifdef FIX_1326_SPEEDUP_16 { Word16 tmp2_exp; Word32 eps_tmp; tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &tmp2_exp ); // Add epsilon if relevant eps_tmp = L_shl_sat( epsilon_mant, sub( epsilon_exp, tmp2_exp ) ); tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) ); exp_tmp3 = add( tmp2_exp, 1 ); } #else tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); #endif #if 1 tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); exp = sub( exp, sub( Q30, sub( 31, exp_tmp3 ) ) ); Loading Loading @@ -3499,22 +3465,10 @@ static void eig2x2_fx( q_tmp2 = sub( add( q_tmp1, q_tmp1 ), 31 ); #ifdef FIX_1326_SPEEDUP_04 Word16 exp_tmp2; Word32 eps_tmp; tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &exp_tmp2 ); eps_tmp = L_shl_sat( epsilon_mant, sub( epsilon_exp, exp_tmp2 ) ); tmp3 = L_add( L_shr( tmp2, 1 ), L_shr( eps_tmp, 1 ) ); // Add Epsilon if relevant exp_tmp3 = add( exp_tmp2, 1 ); #else tmp2 = BASOP_Util_Add_Mant32Exp( crossSquare_fx, sub( 31, q_crossSquare ), tmp2, sub( 31, q_tmp2 ), &q_tmp2 ); q_tmp2 = sub( 31, q_tmp2 ); tmp3 = BASOP_Util_Add_Mant32Exp( tmp2, sub( 31, q_tmp2 ), epsilon_mant, epsilon_exp, &exp_tmp3 ); #endif #if 1 tmp2 = BASOP_Util_Divide3232_Scale_cadence( ONE_IN_Q30, tmp3, &exp ); Loading Loading @@ -3815,7 +3769,6 @@ static void matrixMul_fx( return; } #ifndef FIX_1326_SPEEDUP_01 static void matrixTransp1Mul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Word32 Aim_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Loading Loading @@ -3929,7 +3882,6 @@ static void matrixTransp1Mul_fx( return; } #endif /*FIX_1326_SPEEDUP_01*/ static void matrixTransp2Mul_fx( Word32 Are_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS], /*q_A*/ Loading Loading @@ -4655,80 +4607,10 @@ static void formulate2x2MixingMatrix_fx( /* Find nearest orthonormal matrix P to A = Ky' * G_hat * Q * Kx For matrix A that is P = A(A'A)^0.5 */ push_wmops( "oPtoA MT1M" ); #ifdef FIX_1326_SPEEDUP_01 // matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); { // Word16 chA, chB; { chA = 0, chB = 0; tmpRe_fx[0][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][0], Are_fx[0][0] ), Are_fx[1][0], Are_fx[1][0] ), Aim_fx[0][0], Aim_fx[0][0] ), Aim_fx[1][0], Aim_fx[1][0] ); move32(); } { // chA = 0, chB = 1; tmpRe_fx[1][0] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][0] ), Are_fx[1][1], Are_fx[1][0] ), Aim_fx[0][1], Aim_fx[0][0] ), Aim_fx[1][1], Aim_fx[1][0] ); move32(); tmpIm_fx[1][0] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Aim_fx[0][0] ), Are_fx[1][1], Aim_fx[1][0] ), Aim_fx[0][1], Are_fx[0][0] ), Aim_fx[1][1], Are_fx[1][0] ); move32(); } { // chA = 1, chB = 0; tmpRe_fx[1][1] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][1], Are_fx[0][1] ), Are_fx[1][1], Are_fx[1][1] ), Aim_fx[0][1], Aim_fx[0][1] ), Aim_fx[1][1], Aim_fx[1][1] ); move32(); } { // chA = 1, chB = 1; } q_temp = sub( add( q_A, q_A ), 31 ); move16(); Word16 ZeroState = add( 1, 0 ); if ( tmpRe_fx[0][0] != 0 ) { ZeroState = add( 0, 0 ); } if ( tmpRe_fx[1][1] != 0 ) { ZeroState = add( 0, 0 ); } if ( tmpRe_fx[1][0] != 0 ) { ZeroState = add( 0, 0 ); } if ( tmpIm_fx[1][0] != 0 ) { ZeroState = add( 0, 0 ); } if ( sub( ZeroState, 1 ) == 0 ) { q_temp = Q31; move16(); } } eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); #else matrixTransp1Mul_fx( Are_fx, Aim_fx, q_A, Are_fx, Aim_fx, q_A, tmpRe_fx, tmpIm_fx, &q_temp ); eig2x2_fx( tmpRe_fx[0][0], tmpRe_fx[1][1], q_temp, tmpRe_fx[1][0], tmpIm_fx[1][0], q_temp, Ure_fx, Uim_fx, &q_U, D_fx, &q_D ); #endif pop_wmops(); /*push_wmops( "oPtoA MT1M" );*/ Loading @@ -4753,11 +4635,9 @@ static void formulate2x2MixingMatrix_fx( move32(); #ifdef FIX_1326_SPEEDUP_08 // This is just a shortcut to already existing optimizations (FIX_1009_REPLACE_DIV_SQRT_BY_ISQRT_LC) - but makes everything even faster { //Sqrt(1) div_fx[1] = L_add( 0, 2047986068 ); // Q = 31 - exp1 exp1 = add( 0, 20 ); // move32(); } exp1 = add( 0, 20 ); IF( D_fx[1] != 0 ) // This is the new code: replace div sqrt by isqrt { Loading Loading @@ -4841,11 +4721,7 @@ static void formulate2x2MixingMatrix_fx( W_tmp = W_mult0_32_32( tmpIm_fx[chA][chB], div_fx[chB] ); IF( W_tmp != 0 ) { #ifdef FIX_1326_SPEEDUP_10 hdrm_im[chA][chB] = W_norm( W_tmp ); #else hdrm_im[chA][chB] = sub( W_norm( W_tmp ), 0 ); #endif move16(); W_tmp = W_shl( W_tmp, hdrm_im[chA][chB] ); tmpIm_fx[chA][chB] = W_extract_h( W_tmp ); Loading