Loading .gitlab-ci.yml +2 −2 Original line number Diff line number Diff line Loading @@ -506,7 +506,7 @@ stages: .ivas-pytest-sanitizers-anchor: &ivas-pytest-sanitizers-anchor stage: test needs: ["build-codec-linux-make"] timeout: "300 minutes" timeout: "420 minutes" rules: - if: $CI_PIPELINE_SOURCE == 'push' when: never Loading @@ -522,7 +522,7 @@ stages: - *build-reference-and-dut-binaries - make clean - make -j CLANG=$CLANG_NUM - if [[ $CLANG_NUM == 3 ]]; then export UBSAN_OPTIONS="suppressions=scripts/ubsan.supp,report_error_type=1"; fi - if [[ $CLANG_NUM == 3 ]]; then export UBSAN_OPTIONS="suppressions=scripts/ubsan_basop.supp,report_error_type=1"; fi - testcase_timeout=$TESTCASE_TIMEOUT_LTV_SANITIZERS - python3 -m pytest $TEST_SUITE -v --tb=no --update_ref 1 --html=report.html --self-contained-html --junit-xml=report-junit.xml --testcase_timeout $testcase_timeout --ref_encoder_path $DUT_ENCODER_PATH --ref_decoder_path $DUT_DECODER_PATH artifacts: Loading lib_com/cldfb.c +1 −1 Original line number Diff line number Diff line Loading @@ -1213,7 +1213,7 @@ void cldfbSynthesis_ivas_fx( /*cplxMult(&iBuffer[2*i], &iBuffer[2*i+1],-imagBuffer[k][2*i], imagBuffer[k][M1-1-2*i], rot_vctr_re[i], rot_vctr_im[i]);*/ iBuffer_fx[2 * i] = Msub_32_32( Mpy_32_32( ( L_negate( imagBuffer_fx[k][2 * i] ) ), rot_vctr_re_fx[i] ), imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_im_fx[i] ); // Qx move32(); iBuffer_fx[2 * i + 1] = Madd_32_32( Mpy_32_32( ( L_negate( imagBuffer_fx[k][2 * i] ) ), rot_vctr_im_fx[i] ), imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[i] ); // Qx iBuffer_fx[2 * i + 1] = Msub_32_32( Mpy_32_32( imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[i] ), imagBuffer_fx[k][2 * i], rot_vctr_im_fx[i] ); // Qx move32(); } Loading lib_com/ivas_stereo_ica_com_fx.c +24 −38 Original line number Diff line number Diff line Loading @@ -131,24 +131,23 @@ static void interpTargetChannel_fx( Word32 spread_factor2_fx; Word64 tempD1_fx, tempD2_fx; d = negate( sub( currShift, prevShift ) ); IF( d >= 0 ) d = sub( prevShift, currShift ); IF( d == 0 ) { /* this can happen in DFT->TD switching */ return; } signShift = 1; move16(); } ELSE if ( d < 0 ) { signShift = -1; move16(); } IF( d == 0 ) { /* this can happen in DFT->TD switching */ return; } N = L_shift_adapt; move16(); Word32 *table_pointer = NULL; Loading Loading @@ -207,7 +206,7 @@ static void interpTargetChannel_fx( FOR( j = lim1; j <= lim2; j++ ) { ptr2_fx[i] = L_add( Mpy_32_32( win_fx[j * INTERP_FACTOR1 - i], ptr1_fx[j] ), ptr2_fx[i] ); // qsynth ptr2_fx[i] = Madd_32_32( ptr2_fx[i], win_fx[j * INTERP_FACTOR1 - i], ptr1_fx[j] ); // qsynth move32(); } } Loading @@ -225,44 +224,31 @@ static void interpTargetChannel_fx( tempD1_fx = W_deposit32_l( table_D1_pointer[abs( d )] ); // Q35 tempD2_fx = W_mult0_32_32( 3, table_D1_pointer[abs( d )] ); // Q35 IF( EQ_16( signShift, 1 ) ) { tempF1_fx = ONE_IN_Q12; // Q12 move32(); } ELSE { tempF1_fx = -ONE_IN_Q12; // Q12 move32(); } tempF1_fx = L_sub( imult3216( factor_fx, d ), tempF1_fx ); // Q12 FOR( k = 0; k < sub( N, 1 ); k++ ) { Word32 local = L_sub( W_extract_l( W_shr( W_mult0_32_32( tempF1_fx, spread_factor2_fx ), 31 ) ), ONE_IN_Q12 ); // Q12 Word32 sign_local; IF( local > 0 ) if ( EQ_16( signShift, 1 ) ) { sign_local = 1; tempF1_fx = ONE_IN_Q12; // Q12 move32(); } ELSE tempF1_fx = L_sub( imult3216( factor_fx, d ), tempF1_fx ); // Q12 FOR( k = 0; k < N - 1; k++ ) { sign_local = -1; move32(); } Word32 local_int = W_extract_l( W_shr( W_abs( local ), 12 ) ); // Q0 Word32 local = Madd_32_32( -ONE_IN_Q12, tempF1_fx, spread_factor2_fx ); // Q12 Word32 local_int = L_shr( local, 12 ); // Q0 Word32 res_a1, res_a2, res_a3; Word32 res_b1, res_b2, res_b3; Word32 res_c1, res_c2, res_c3; Word32 res_d1, res_d2, res_d3; Word64 local_int_scaled; Word32 local_int_scaled; Word64 res_a, res_b, res_c, res_d; Word64 tempa, tempb; Word64 mult_a_D1, mult_b_D2; local_int = W_extract_l( W_mult0_32_32( sign_local, local_int ) ); // Q0 local_int_scaled = W_deposit32_l( L_shl( local_int, 12 ) ); // Q12 local_int_scaled = L_shl( local_int, 12 ); // Q12 lim1 = extract_l( local_int ); // Q0 IF( W_sub( local_int_scaled, local ) > 0 ) // Q21 if ( L_sub( local_int_scaled, local ) > 0 ) // Q12 { lim1 = sub( lim1, 1 ); // Q0 } Loading Loading @@ -387,7 +373,7 @@ static void targetCh_AlignStereoDFT_fx( } FOR( i = 0; i < L_shift_adapt; i++ ) { target_fx[i] = L_add( Mpy_32_32( alpha_fx, fadeInBuff_fx[i] ), Mpy_32_32( L_sub( ONE_IN_Q31, alpha_fx ), fadeOutBuff_fx[i] ) ); // qsynth target_fx[i] = Madd_32_32( Mpy_32_32( alpha_fx, fadeInBuff_fx[i] ), L_sub( ONE_IN_Q31, alpha_fx ), fadeOutBuff_fx[i] ); // qsynth move32(); alpha_fx = L_add_sat( alpha_fx, winSlope_fx ); // Q31 Loading lib_com/ivas_tools.c +81 −43 Original line number Diff line number Diff line Loading @@ -942,6 +942,12 @@ Word16 matrix_product_mant_exp_fx( Word16 *Zp_fx_e = out_e; Word16 row, col; Word16 x_idx, y_idx; Word64 temp; Word16 temp_e; Word16 prod_e = add( X_fx_e, Y_fx_e ); Word16 max_exp = -31; move16(); /* Processing */ test(); Loading @@ -957,17 +963,28 @@ Word16 matrix_product_mant_exp_fx( { FOR( i = 0; i < colsX; ++i ) { ( *Zp_fx ) = 0; move32(); ( *Zp_fx_e ) = 0; move16(); temp = 0; move64(); FOR( k = 0; k < rowsX; ++k ) { x_idx = add( k, imult1616( i, rowsX ) ); y_idx = add( k, imult1616( j, rowsY ) ); ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/ move32(); x_idx = k + i * rowsX; y_idx = k + j * rowsY; temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e } /* Maximize accumulated value to 32-bit */ temp_e = W_norm( temp ); temp = W_shl( temp, temp_e ); if ( 0 == temp ) { temp_e = prod_e; move16(); } *Zp_fx_e = sub( prod_e, temp_e ); move16(); ( *Zp_fx ) = W_extract_h( temp ); move32(); max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp Zp_fx++; Zp_fx_e++; } Loading @@ -987,17 +1004,27 @@ Word16 matrix_product_mant_exp_fx( { FOR( i = 0; i < rowsX; ++i ) { ( *Zp_fx ) = 0; move32(); ( *Zp_fx_e ) = 0; move16(); temp = 0; move64(); FOR( k = 0; k < colsX; ++k ) { x_idx = add( i, imult1616( k, rowsX ) ); y_idx = add( j, imult1616( k, rowsY ) ); ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/ move32(); x_idx = i + k * rowsX; y_idx = j + k * rowsY; temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e } /* Maximize accumulated value to 32-bit */ temp_e = W_norm( temp ); temp = W_shl( temp, temp_e ); if ( 0 == temp ) { temp_e = prod_e; move16(); } *Zp_fx_e = sub( prod_e, temp_e ); move16(); ( *Zp_fx ) = W_extract_h( temp ); move32(); max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp Zp_fx++; Zp_fx_e++; } Loading @@ -1017,18 +1044,27 @@ Word16 matrix_product_mant_exp_fx( { FOR( i = 0; i < colsX; ++i ) { ( *Zp_fx ) = 0; move32(); ( *Zp_fx_e ) = 0; move16(); temp = 0; move64(); FOR( k = 0; k < colsX; ++k ) { x_idx = add( k, imult1616( i, rowsX ) ); y_idx = add( j, imult1616( k, rowsY ) ); ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/ move32(); x_idx = k + i * rowsX; y_idx = j + k * rowsY; temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e } /* Maximize accumulated value to 32-bit */ temp_e = W_norm( temp ); temp = W_shl( temp, temp_e ); if ( 0 == temp ) { temp_e = prod_e; move16(); } *Zp_fx_e = sub( prod_e, temp_e ); move16(); ( *Zp_fx ) = W_extract_h( temp ); move32(); max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp Zp_fx++; Zp_fx_e++; } Loading @@ -1049,17 +1085,26 @@ Word16 matrix_product_mant_exp_fx( { FOR( i = 0; i < rowsX; ++i ) { ( *Zp_fx ) = 0; move32(); ( *Zp_fx_e ) = 0; move16(); temp = 0; move64(); FOR( k = 0; k < colsX; ++k ) { x_idx = add( i, imult1616( k, rowsX ) ); y_idx = add( k, imult1616( j, rowsY ) ); ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/ move32(); x_idx = i + k * rowsX; y_idx = k + j * rowsY; temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e } /* Maximize accumulated value to 32-bit */ temp_e = W_norm( temp ); temp = W_shl( temp, temp_e ); if ( 0 == temp ) { temp_e = prod_e; } *Zp_fx_e = sub( prod_e, temp_e ); move16(); ( *Zp_fx ) = W_extract_h( temp ); move32(); max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp Zp_fx++; Zp_fx_e++; } Loading @@ -1070,18 +1115,11 @@ Word16 matrix_product_mant_exp_fx( move16(); } Zp_fx = Z_fx; /*Q31 - Zp_fx_e*/ Zp_fx_e = out_e; Word16 max_exp = -31; move16(); FOR( j = 0; j < row; ++j ) { FOR( i = 0; i < col; ++i ) { max_exp = s_max( max_exp, *Zp_fx_e ); Zp_fx_e++; } } Zp_fx_e = out_e; *Z_fx_e = max_exp; move16(); FOR( j = 0; j < row; ++j ) Loading lib_com/options.h +3 −1 Original line number Diff line number Diff line Loading @@ -152,5 +152,7 @@ #define FIX_ISSUE_1237_KEEP_EVS_BE /* VA: Fix to keep EVS bitexactness to 26.444 */ #define FIX_ISSUE_1214 /* Ittiam: Fix for issue 1214: Energy leakage in IGF tiles for MDCT-stereo @64kbps SWB*/ #define FIX_881_HILBERT_FILTER /* VA: improve the precision of the Hilbert filter to remove 2kHz unwanted tone */ #endif #define FIX_ISSUE_1245 /* Ittiam: Fix for issue 1245: Basop Encoder: Audible noise for silent Stereo input DTX on @24.4 kbps, @32 kbps*/ #define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ #define SVD_WMOPS_OPT /* Ittiam : SVD related optimizations */ #endif Loading
.gitlab-ci.yml +2 −2 Original line number Diff line number Diff line Loading @@ -506,7 +506,7 @@ stages: .ivas-pytest-sanitizers-anchor: &ivas-pytest-sanitizers-anchor stage: test needs: ["build-codec-linux-make"] timeout: "300 minutes" timeout: "420 minutes" rules: - if: $CI_PIPELINE_SOURCE == 'push' when: never Loading @@ -522,7 +522,7 @@ stages: - *build-reference-and-dut-binaries - make clean - make -j CLANG=$CLANG_NUM - if [[ $CLANG_NUM == 3 ]]; then export UBSAN_OPTIONS="suppressions=scripts/ubsan.supp,report_error_type=1"; fi - if [[ $CLANG_NUM == 3 ]]; then export UBSAN_OPTIONS="suppressions=scripts/ubsan_basop.supp,report_error_type=1"; fi - testcase_timeout=$TESTCASE_TIMEOUT_LTV_SANITIZERS - python3 -m pytest $TEST_SUITE -v --tb=no --update_ref 1 --html=report.html --self-contained-html --junit-xml=report-junit.xml --testcase_timeout $testcase_timeout --ref_encoder_path $DUT_ENCODER_PATH --ref_decoder_path $DUT_DECODER_PATH artifacts: Loading
lib_com/cldfb.c +1 −1 Original line number Diff line number Diff line Loading @@ -1213,7 +1213,7 @@ void cldfbSynthesis_ivas_fx( /*cplxMult(&iBuffer[2*i], &iBuffer[2*i+1],-imagBuffer[k][2*i], imagBuffer[k][M1-1-2*i], rot_vctr_re[i], rot_vctr_im[i]);*/ iBuffer_fx[2 * i] = Msub_32_32( Mpy_32_32( ( L_negate( imagBuffer_fx[k][2 * i] ) ), rot_vctr_re_fx[i] ), imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_im_fx[i] ); // Qx move32(); iBuffer_fx[2 * i + 1] = Madd_32_32( Mpy_32_32( ( L_negate( imagBuffer_fx[k][2 * i] ) ), rot_vctr_im_fx[i] ), imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[i] ); // Qx iBuffer_fx[2 * i + 1] = Msub_32_32( Mpy_32_32( imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[i] ), imagBuffer_fx[k][2 * i], rot_vctr_im_fx[i] ); // Qx move32(); } Loading
lib_com/ivas_stereo_ica_com_fx.c +24 −38 Original line number Diff line number Diff line Loading @@ -131,24 +131,23 @@ static void interpTargetChannel_fx( Word32 spread_factor2_fx; Word64 tempD1_fx, tempD2_fx; d = negate( sub( currShift, prevShift ) ); IF( d >= 0 ) d = sub( prevShift, currShift ); IF( d == 0 ) { /* this can happen in DFT->TD switching */ return; } signShift = 1; move16(); } ELSE if ( d < 0 ) { signShift = -1; move16(); } IF( d == 0 ) { /* this can happen in DFT->TD switching */ return; } N = L_shift_adapt; move16(); Word32 *table_pointer = NULL; Loading Loading @@ -207,7 +206,7 @@ static void interpTargetChannel_fx( FOR( j = lim1; j <= lim2; j++ ) { ptr2_fx[i] = L_add( Mpy_32_32( win_fx[j * INTERP_FACTOR1 - i], ptr1_fx[j] ), ptr2_fx[i] ); // qsynth ptr2_fx[i] = Madd_32_32( ptr2_fx[i], win_fx[j * INTERP_FACTOR1 - i], ptr1_fx[j] ); // qsynth move32(); } } Loading @@ -225,44 +224,31 @@ static void interpTargetChannel_fx( tempD1_fx = W_deposit32_l( table_D1_pointer[abs( d )] ); // Q35 tempD2_fx = W_mult0_32_32( 3, table_D1_pointer[abs( d )] ); // Q35 IF( EQ_16( signShift, 1 ) ) { tempF1_fx = ONE_IN_Q12; // Q12 move32(); } ELSE { tempF1_fx = -ONE_IN_Q12; // Q12 move32(); } tempF1_fx = L_sub( imult3216( factor_fx, d ), tempF1_fx ); // Q12 FOR( k = 0; k < sub( N, 1 ); k++ ) { Word32 local = L_sub( W_extract_l( W_shr( W_mult0_32_32( tempF1_fx, spread_factor2_fx ), 31 ) ), ONE_IN_Q12 ); // Q12 Word32 sign_local; IF( local > 0 ) if ( EQ_16( signShift, 1 ) ) { sign_local = 1; tempF1_fx = ONE_IN_Q12; // Q12 move32(); } ELSE tempF1_fx = L_sub( imult3216( factor_fx, d ), tempF1_fx ); // Q12 FOR( k = 0; k < N - 1; k++ ) { sign_local = -1; move32(); } Word32 local_int = W_extract_l( W_shr( W_abs( local ), 12 ) ); // Q0 Word32 local = Madd_32_32( -ONE_IN_Q12, tempF1_fx, spread_factor2_fx ); // Q12 Word32 local_int = L_shr( local, 12 ); // Q0 Word32 res_a1, res_a2, res_a3; Word32 res_b1, res_b2, res_b3; Word32 res_c1, res_c2, res_c3; Word32 res_d1, res_d2, res_d3; Word64 local_int_scaled; Word32 local_int_scaled; Word64 res_a, res_b, res_c, res_d; Word64 tempa, tempb; Word64 mult_a_D1, mult_b_D2; local_int = W_extract_l( W_mult0_32_32( sign_local, local_int ) ); // Q0 local_int_scaled = W_deposit32_l( L_shl( local_int, 12 ) ); // Q12 local_int_scaled = L_shl( local_int, 12 ); // Q12 lim1 = extract_l( local_int ); // Q0 IF( W_sub( local_int_scaled, local ) > 0 ) // Q21 if ( L_sub( local_int_scaled, local ) > 0 ) // Q12 { lim1 = sub( lim1, 1 ); // Q0 } Loading Loading @@ -387,7 +373,7 @@ static void targetCh_AlignStereoDFT_fx( } FOR( i = 0; i < L_shift_adapt; i++ ) { target_fx[i] = L_add( Mpy_32_32( alpha_fx, fadeInBuff_fx[i] ), Mpy_32_32( L_sub( ONE_IN_Q31, alpha_fx ), fadeOutBuff_fx[i] ) ); // qsynth target_fx[i] = Madd_32_32( Mpy_32_32( alpha_fx, fadeInBuff_fx[i] ), L_sub( ONE_IN_Q31, alpha_fx ), fadeOutBuff_fx[i] ); // qsynth move32(); alpha_fx = L_add_sat( alpha_fx, winSlope_fx ); // Q31 Loading
lib_com/ivas_tools.c +81 −43 Original line number Diff line number Diff line Loading @@ -942,6 +942,12 @@ Word16 matrix_product_mant_exp_fx( Word16 *Zp_fx_e = out_e; Word16 row, col; Word16 x_idx, y_idx; Word64 temp; Word16 temp_e; Word16 prod_e = add( X_fx_e, Y_fx_e ); Word16 max_exp = -31; move16(); /* Processing */ test(); Loading @@ -957,17 +963,28 @@ Word16 matrix_product_mant_exp_fx( { FOR( i = 0; i < colsX; ++i ) { ( *Zp_fx ) = 0; move32(); ( *Zp_fx_e ) = 0; move16(); temp = 0; move64(); FOR( k = 0; k < rowsX; ++k ) { x_idx = add( k, imult1616( i, rowsX ) ); y_idx = add( k, imult1616( j, rowsY ) ); ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/ move32(); x_idx = k + i * rowsX; y_idx = k + j * rowsY; temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e } /* Maximize accumulated value to 32-bit */ temp_e = W_norm( temp ); temp = W_shl( temp, temp_e ); if ( 0 == temp ) { temp_e = prod_e; move16(); } *Zp_fx_e = sub( prod_e, temp_e ); move16(); ( *Zp_fx ) = W_extract_h( temp ); move32(); max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp Zp_fx++; Zp_fx_e++; } Loading @@ -987,17 +1004,27 @@ Word16 matrix_product_mant_exp_fx( { FOR( i = 0; i < rowsX; ++i ) { ( *Zp_fx ) = 0; move32(); ( *Zp_fx_e ) = 0; move16(); temp = 0; move64(); FOR( k = 0; k < colsX; ++k ) { x_idx = add( i, imult1616( k, rowsX ) ); y_idx = add( j, imult1616( k, rowsY ) ); ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/ move32(); x_idx = i + k * rowsX; y_idx = j + k * rowsY; temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e } /* Maximize accumulated value to 32-bit */ temp_e = W_norm( temp ); temp = W_shl( temp, temp_e ); if ( 0 == temp ) { temp_e = prod_e; move16(); } *Zp_fx_e = sub( prod_e, temp_e ); move16(); ( *Zp_fx ) = W_extract_h( temp ); move32(); max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp Zp_fx++; Zp_fx_e++; } Loading @@ -1017,18 +1044,27 @@ Word16 matrix_product_mant_exp_fx( { FOR( i = 0; i < colsX; ++i ) { ( *Zp_fx ) = 0; move32(); ( *Zp_fx_e ) = 0; move16(); temp = 0; move64(); FOR( k = 0; k < colsX; ++k ) { x_idx = add( k, imult1616( i, rowsX ) ); y_idx = add( j, imult1616( k, rowsY ) ); ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/ move32(); x_idx = k + i * rowsX; y_idx = j + k * rowsY; temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e } /* Maximize accumulated value to 32-bit */ temp_e = W_norm( temp ); temp = W_shl( temp, temp_e ); if ( 0 == temp ) { temp_e = prod_e; move16(); } *Zp_fx_e = sub( prod_e, temp_e ); move16(); ( *Zp_fx ) = W_extract_h( temp ); move32(); max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp Zp_fx++; Zp_fx_e++; } Loading @@ -1049,17 +1085,26 @@ Word16 matrix_product_mant_exp_fx( { FOR( i = 0; i < rowsX; ++i ) { ( *Zp_fx ) = 0; move32(); ( *Zp_fx_e ) = 0; move16(); temp = 0; move64(); FOR( k = 0; k < colsX; ++k ) { x_idx = add( i, imult1616( k, rowsX ) ); y_idx = add( k, imult1616( j, rowsY ) ); ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /*Q31 - Zp_fx_e*/ move32(); x_idx = i + k * rowsX; y_idx = k + j * rowsY; temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e } /* Maximize accumulated value to 32-bit */ temp_e = W_norm( temp ); temp = W_shl( temp, temp_e ); if ( 0 == temp ) { temp_e = prod_e; } *Zp_fx_e = sub( prod_e, temp_e ); move16(); ( *Zp_fx ) = W_extract_h( temp ); move32(); max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp Zp_fx++; Zp_fx_e++; } Loading @@ -1070,18 +1115,11 @@ Word16 matrix_product_mant_exp_fx( move16(); } Zp_fx = Z_fx; /*Q31 - Zp_fx_e*/ Zp_fx_e = out_e; Word16 max_exp = -31; move16(); FOR( j = 0; j < row; ++j ) { FOR( i = 0; i < col; ++i ) { max_exp = s_max( max_exp, *Zp_fx_e ); Zp_fx_e++; } } Zp_fx_e = out_e; *Z_fx_e = max_exp; move16(); FOR( j = 0; j < row; ++j ) Loading
lib_com/options.h +3 −1 Original line number Diff line number Diff line Loading @@ -152,5 +152,7 @@ #define FIX_ISSUE_1237_KEEP_EVS_BE /* VA: Fix to keep EVS bitexactness to 26.444 */ #define FIX_ISSUE_1214 /* Ittiam: Fix for issue 1214: Energy leakage in IGF tiles for MDCT-stereo @64kbps SWB*/ #define FIX_881_HILBERT_FILTER /* VA: improve the precision of the Hilbert filter to remove 2kHz unwanted tone */ #endif #define FIX_ISSUE_1245 /* Ittiam: Fix for issue 1245: Basop Encoder: Audible noise for silent Stereo input DTX on @24.4 kbps, @32 kbps*/ #define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ #define SVD_WMOPS_OPT /* Ittiam : SVD related optimizations */ #endif