Loading lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -148,6 +148,7 @@ #define OPT_2181_MATRIX_TRANSP_1_MUL /* Dolby: Issue 2181, optimize matrixTransp1Mul_fx. */ #define OPT_2182_MATRIX_SCALE_OPS /* Dolby: Issue 2181, move matrix scale operations outside mul operations. */ #define OPT_XXXX_MATRIX_OUT_SCALING /* #################### End BASOP optimization switches ############################ */ Loading lib_rend/ivas_dirac_dec_binaural_functions_fx.c +76 −30 Original line number Diff line number Diff line Loading @@ -3986,7 +3986,6 @@ static void matrixMul_fx( { #ifdef OPT_2182_MATRIX_SCALE_OPS Word16 chA, chB; Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); #else Word16 chA, chB; Word16 min_q_shift1, min_q_shift2; Loading @@ -4005,6 +4004,9 @@ static void matrixMul_fx( move16(); move16(); #endif #ifdef OPT_XXXX_MATRIX_OUT_SCALING Word32 not_zero = 0; #endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { Loading @@ -4031,16 +4033,31 @@ static void matrixMul_fx( Are_fx[chA][1], Bim_fx[1][chB] ); move32(); #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ #ifdef OPT_XXXX_MATRIX_OUT_SCALING not_zero = L_or( not_zero, outRe_fx[chA][chB] ); not_zero = L_or( not_zero, outIm_fx[chA][chB] ); #endif } } *q_out = sub( add( *q_A, *q_B ), 31 ); *q_out = sub( add( *q_A, *q_B ), 31 ); move16(); #ifdef OPT_XXXX_MATRIX_OUT_SCALING if ( !not_zero ) { *q_out = Q31; move16(); } #else #ifdef OPT_2182_MATRIX_SCALE_OPS Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); #endif if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) { *q_out = Q31; move16(); } #endif return; } Loading @@ -4057,7 +4074,6 @@ static void matrixTransp1Mul_fx( Word16 *q_out ) { Word16 chA, chB; Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); Word64 tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word64 tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Loading @@ -4067,11 +4083,19 @@ static void matrixTransp1Mul_fx( q = add( add( q_A, q_B ), 1 ); common_lsh = sub( 63, q ); move16(); #else Word16 q_tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word16 q_tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word64 tmp64_1, tmp64_2; Word16 tmp16, q_common = 63; move16(); #endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { #ifdef OPT_2181_MATRIX_TRANSP_1_MUL tmp64 = W_mult_32_32( Are_fx[0][chA], Bre_fx[0][chB] ); tmp64 = W_mac_32_32( tmp64, Are_fx[1][chA], Bre_fx[1][chB] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[0][chA], Bim_fx[0][chB] ); Loading @@ -4085,31 +4109,7 @@ static void matrixTransp1Mul_fx( tmp_outIm_fx[chA][chB] = tmp64 = W_mac_32_32( tmp64, Are_fx[1][chA], Bim_fx[1][chB] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp64 ) ); } } FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { outRe_fx[chA][chB] = W_extract_h( W_shl( tmp_outRe_fx[chA][chB], common_lsh ) ); move32(); outIm_fx[chA][chB] = W_extract_h( W_shl( tmp_outIm_fx[chA][chB], common_lsh ) ); move32(); } } *q_out = sub( add( q, common_lsh ), 32 ); move16(); #else Word16 q_tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word16 q_tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word64 tmp64_1, tmp64_2; Word16 tmp16, q_common = 63; move16(); FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { tmp64_1 = W_mac_32_32( W_mult_32_32( Are_fx[0][chA], Bre_fx[0][chB] ), Are_fx[1][chA], Bre_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 ) tmp64_2 = W_mac_32_32( W_mult_32_32( Aim_fx[0][chA], Bim_fx[0][chB] ), Aim_fx[1][chA], Bim_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 ) tmp_outRe_fx[chA][chB] = W_add( tmp64_1, tmp64_2 ); // Q: add( add( q_A, q_B ), 1 ) Loading @@ -4132,26 +4132,54 @@ static void matrixTransp1Mul_fx( q_tmp_outIm_fx[chA][chB] = add( tmp16, add( add( q_A, q_B ), 1 ) ); move16(); q_common = s_min( q_tmp_outIm_fx[chA][chB], q_common ); #endif } } #ifdef OPT_XXXX_MATRIX_OUT_SCALING Word32 not_zero = 0; #endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { #ifdef OPT_2181_MATRIX_TRANSP_1_MUL outRe_fx[chA][chB] = W_extract_h( W_shl( tmp_outRe_fx[chA][chB], common_lsh ) ); move32(); outIm_fx[chA][chB] = W_extract_h( W_shl( tmp_outIm_fx[chA][chB], common_lsh ) ); move32(); #else outRe_fx[chA][chB] = W_extract_h( W_shl( tmp_outRe_fx[chA][chB], s_max( -63, sub( q_common, q_tmp_outRe_fx[chA][chB] ) ) ) ); move32(); outIm_fx[chA][chB] = W_extract_h( W_shl( tmp_outIm_fx[chA][chB], s_max( -63, sub( q_common, q_tmp_outIm_fx[chA][chB] ) ) ) ); move32(); #endif #ifdef OPT_XXXX_MATRIX_OUT_SCALING not_zero = L_or( not_zero, outRe_fx[chA][chB] ); not_zero = L_or( not_zero, outIm_fx[chA][chB] ); #endif } } #ifdef OPT_2181_MATRIX_TRANSP_1_MUL *q_out = sub( add( q, common_lsh ), 32 ); move16(); #else *q_out = sub( q_common, 32 ); move16(); #endif #ifdef OPT_XXXX_MATRIX_OUT_SCALING if ( !not_zero ) { *q_out = Q31; move16(); } #else Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) { *q_out = Q31; move16(); } #endif return; } Loading Loading @@ -4184,7 +4212,7 @@ static void matrixTransp2Mul_fx( { #ifdef OPT_2182_MATRIX_SCALE_OPS Word16 chA, chB; Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; // Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; #else Word16 chA, chB; Word16 min_q_shift; Loading @@ -4208,7 +4236,9 @@ static void matrixTransp2Mul_fx( move16(); } #endif #ifdef OPT_XXXX_MATRIX_OUT_SCALING Word32 not_zero = 0; #endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) Loading @@ -4234,16 +4264,32 @@ static void matrixTransp2Mul_fx( Are_fx[chA][1], Bim_fx[chB][1] ); move32(); #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ #ifdef OPT_XXXX_MATRIX_OUT_SCALING not_zero = L_or( not_zero, outRe_fx[chA][chB] ); not_zero = L_or( not_zero, outIm_fx[chA][chB] ); #endif } } *q_out = sub( add( *q_A, *q_B ), 31 ); move16(); #ifdef OPT_XXXX_MATRIX_OUT_SCALING if ( !not_zero ) { *q_out = Q31; move16(); } #else #ifdef OPT_2182_MATRIX_SCALE_OPS Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; #endif if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) { *q_out = Q31; move16(); } #endif return; } Loading Loading
lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -148,6 +148,7 @@ #define OPT_2181_MATRIX_TRANSP_1_MUL /* Dolby: Issue 2181, optimize matrixTransp1Mul_fx. */ #define OPT_2182_MATRIX_SCALE_OPS /* Dolby: Issue 2181, move matrix scale operations outside mul operations. */ #define OPT_XXXX_MATRIX_OUT_SCALING /* #################### End BASOP optimization switches ############################ */ Loading
lib_rend/ivas_dirac_dec_binaural_functions_fx.c +76 −30 Original line number Diff line number Diff line Loading @@ -3986,7 +3986,6 @@ static void matrixMul_fx( { #ifdef OPT_2182_MATRIX_SCALE_OPS Word16 chA, chB; Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); #else Word16 chA, chB; Word16 min_q_shift1, min_q_shift2; Loading @@ -4005,6 +4004,9 @@ static void matrixMul_fx( move16(); move16(); #endif #ifdef OPT_XXXX_MATRIX_OUT_SCALING Word32 not_zero = 0; #endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { Loading @@ -4031,16 +4033,31 @@ static void matrixMul_fx( Are_fx[chA][1], Bim_fx[1][chB] ); move32(); #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ #ifdef OPT_XXXX_MATRIX_OUT_SCALING not_zero = L_or( not_zero, outRe_fx[chA][chB] ); not_zero = L_or( not_zero, outIm_fx[chA][chB] ); #endif } } *q_out = sub( add( *q_A, *q_B ), 31 ); *q_out = sub( add( *q_A, *q_B ), 31 ); move16(); #ifdef OPT_XXXX_MATRIX_OUT_SCALING if ( !not_zero ) { *q_out = Q31; move16(); } #else #ifdef OPT_2182_MATRIX_SCALE_OPS Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); #endif if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) { *q_out = Q31; move16(); } #endif return; } Loading @@ -4057,7 +4074,6 @@ static void matrixTransp1Mul_fx( Word16 *q_out ) { Word16 chA, chB; Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); Word64 tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word64 tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Loading @@ -4067,11 +4083,19 @@ static void matrixTransp1Mul_fx( q = add( add( q_A, q_B ), 1 ); common_lsh = sub( 63, q ); move16(); #else Word16 q_tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word16 q_tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word64 tmp64_1, tmp64_2; Word16 tmp16, q_common = 63; move16(); #endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { #ifdef OPT_2181_MATRIX_TRANSP_1_MUL tmp64 = W_mult_32_32( Are_fx[0][chA], Bre_fx[0][chB] ); tmp64 = W_mac_32_32( tmp64, Are_fx[1][chA], Bre_fx[1][chB] ); tmp64 = W_mac_32_32( tmp64, Aim_fx[0][chA], Bim_fx[0][chB] ); Loading @@ -4085,31 +4109,7 @@ static void matrixTransp1Mul_fx( tmp_outIm_fx[chA][chB] = tmp64 = W_mac_32_32( tmp64, Are_fx[1][chA], Bim_fx[1][chB] ); move64(); common_lsh = s_min( common_lsh, W_norm( tmp64 ) ); } } FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { outRe_fx[chA][chB] = W_extract_h( W_shl( tmp_outRe_fx[chA][chB], common_lsh ) ); move32(); outIm_fx[chA][chB] = W_extract_h( W_shl( tmp_outIm_fx[chA][chB], common_lsh ) ); move32(); } } *q_out = sub( add( q, common_lsh ), 32 ); move16(); #else Word16 q_tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word16 q_tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; Word64 tmp64_1, tmp64_2; Word16 tmp16, q_common = 63; move16(); FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { tmp64_1 = W_mac_32_32( W_mult_32_32( Are_fx[0][chA], Bre_fx[0][chB] ), Are_fx[1][chA], Bre_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 ) tmp64_2 = W_mac_32_32( W_mult_32_32( Aim_fx[0][chA], Bim_fx[0][chB] ), Aim_fx[1][chA], Bim_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 ) tmp_outRe_fx[chA][chB] = W_add( tmp64_1, tmp64_2 ); // Q: add( add( q_A, q_B ), 1 ) Loading @@ -4132,26 +4132,54 @@ static void matrixTransp1Mul_fx( q_tmp_outIm_fx[chA][chB] = add( tmp16, add( add( q_A, q_B ), 1 ) ); move16(); q_common = s_min( q_tmp_outIm_fx[chA][chB], q_common ); #endif } } #ifdef OPT_XXXX_MATRIX_OUT_SCALING Word32 not_zero = 0; #endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) { #ifdef OPT_2181_MATRIX_TRANSP_1_MUL outRe_fx[chA][chB] = W_extract_h( W_shl( tmp_outRe_fx[chA][chB], common_lsh ) ); move32(); outIm_fx[chA][chB] = W_extract_h( W_shl( tmp_outIm_fx[chA][chB], common_lsh ) ); move32(); #else outRe_fx[chA][chB] = W_extract_h( W_shl( tmp_outRe_fx[chA][chB], s_max( -63, sub( q_common, q_tmp_outRe_fx[chA][chB] ) ) ) ); move32(); outIm_fx[chA][chB] = W_extract_h( W_shl( tmp_outIm_fx[chA][chB], s_max( -63, sub( q_common, q_tmp_outIm_fx[chA][chB] ) ) ) ); move32(); #endif #ifdef OPT_XXXX_MATRIX_OUT_SCALING not_zero = L_or( not_zero, outRe_fx[chA][chB] ); not_zero = L_or( not_zero, outIm_fx[chA][chB] ); #endif } } #ifdef OPT_2181_MATRIX_TRANSP_1_MUL *q_out = sub( add( q, common_lsh ), 32 ); move16(); #else *q_out = sub( q_common, 32 ); move16(); #endif #ifdef OPT_XXXX_MATRIX_OUT_SCALING if ( !not_zero ) { *q_out = Q31; move16(); } #else Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS ); if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) { *q_out = Q31; move16(); } #endif return; } Loading Loading @@ -4184,7 +4212,7 @@ static void matrixTransp2Mul_fx( { #ifdef OPT_2182_MATRIX_SCALE_OPS Word16 chA, chB; Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; // Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; #else Word16 chA, chB; Word16 min_q_shift; Loading @@ -4208,7 +4236,9 @@ static void matrixTransp2Mul_fx( move16(); } #endif #ifdef OPT_XXXX_MATRIX_OUT_SCALING Word32 not_zero = 0; #endif FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ ) { FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ ) Loading @@ -4234,16 +4264,32 @@ static void matrixTransp2Mul_fx( Are_fx[chA][1], Bim_fx[chB][1] ); move32(); #endif /* #ifdef IVAS_ENH64_CADENCE_CHANGES */ #ifdef OPT_XXXX_MATRIX_OUT_SCALING not_zero = L_or( not_zero, outRe_fx[chA][chB] ); not_zero = L_or( not_zero, outIm_fx[chA][chB] ); #endif } } *q_out = sub( add( *q_A, *q_B ), 31 ); move16(); #ifdef OPT_XXXX_MATRIX_OUT_SCALING if ( !not_zero ) { *q_out = Q31; move16(); } #else #ifdef OPT_2182_MATRIX_SCALE_OPS Word16 size = BINAURAL_CHANNELS * BINAURAL_CHANNELS; #endif if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) ) { *q_out = Q31; move16(); } #endif return; } Loading