Merge branch '1072-complexity-high-complexity-overhead-for-osba-decoding' of... (63b1f65b) · Commits · SA4 / Audio / IVAS BASOP

.gitlab-ci.yml

+21 −4

Original line number	Diff line number	Diff line
		@@ -506,7 +506,7 @@ stages:
		.ivas-pytest-sanitizers-anchor: &ivas-pytest-sanitizers-anchor
		stage: test
		needs: ["build-codec-linux-make"]
		timeout: "300 minutes"
		timeout: "420 minutes"
		rules:
		- if: $CI_PIPELINE_SOURCE == 'push'
		when: never
		@@ -522,7 +522,7 @@ stages:
		- *build-reference-and-dut-binaries
		- make clean
		- make -j CLANG=$CLANG_NUM
		- if [[ $CLANG_NUM == 3 ]]; then export UBSAN_OPTIONS="suppressions=scripts/ubsan.supp,report_error_type=1"; fi
		- if [[ $CLANG_NUM == 3 ]]; then export UBSAN_OPTIONS="suppressions=scripts/ubsan_basop.supp,report_error_type=1"; fi
		- testcase_timeout=$TESTCASE_TIMEOUT_LTV_SANITIZERS
		- python3 -m pytest $TEST_SUITE -v --tb=no --update_ref 1 --html=report.html --self-contained-html --junit-xml=report-junit.xml --testcase_timeout $testcase_timeout --ref_encoder_path $DUT_ENCODER_PATH --ref_decoder_path $DUT_DECODER_PATH
		artifacts:
		@@ -1253,8 +1253,25 @@ voip-be-on-merge-request:
		- unzip artifacts.zip \|\| true # this may fail on first run, when there are no artifacts there and the zip file is actually just "404"-html
		- ls
		- public_dir="$CI_JOB_NAME-public"

		# if is needed to catch case when no artifact is there (first run), similarly as above
		- if [[ -d $public_dir ]]; then mv $public_dir/* wmops/; fi
		- if [[ -d $public_dir ]]; then
		- mv $public_dir/* wmops/
		# check here if we have the split-by-levels files present - if not, fake them up with the existing global one
		# this is needed for the first run with split graphs on a branch where the global version did run previously
		# NOTE: checking only for level_1 file here as this should already be sufficient
		# NOTE2: also not chechking for RAM for same reason
		- wmops_all_global="wmops/log_wmops_all.txt"
		- ram_all_global="wmops/log_ram_all.txt"
		- if [ -f "${wmops_all_global}" ] && [ ! -f "wmops/log_wmops_all_level_1.txt" ]; then
		- declare -a suffixes=("level_1" "level_2" "level_3" "rate_sw")
		- for suffix in "${suffixes[@]}"; do
		- cp ${wmops_all_global} wmops/log_wmops_all_${suffix}.txt
		- cp ${ram_all_global} wmops/log_ram_all_${suffix}.txt
		- done
		- fi
		- fi

		- ls wmops
		- rm artifacts.zip
		- rm -rf $public_dir
		@@ -1271,7 +1288,7 @@ voip-be-on-merge-request:
		&complexity-measurements-prepare-artifacts # prepare artifacts -> move to public directory
		- public_dir="$CI_JOB_NAME-public"
		- mkdir $public_dir
		- mv -f wmops/log__all.txt ./.js ${public_dir}/
		- mv -f wmops/log__all.txt ./*.js ${public_dir}/
		# move logfiles for links
		- mkdir $public_dir/logs
		# first move logs

lib_com/cldfb.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -1213,7 +1213,7 @@ void cldfbSynthesis_ivas_fx(
		/cplxMult(&iBuffer[2i], &iBuffer[2i+1],-imagBuffer[k][2i], imagBuffer[k][M1-1-2i], rot_vctr_re[i], rot_vctr_im[i]);/
		iBuffer_fx[2 * i] = Msub_32_32( Mpy_32_32( ( L_negate( imagBuffer_fx[k][2 * i] ) ), rot_vctr_re_fx[i] ), imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_im_fx[i] ); // Qx
		move32();
		iBuffer_fx[2 * i + 1] = Madd_32_32( Mpy_32_32( ( L_negate( imagBuffer_fx[k][2 * i] ) ), rot_vctr_im_fx[i] ), imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[i] ); // Qx
		iBuffer_fx[2 * i + 1] = Msub_32_32( Mpy_32_32( imagBuffer_fx[k][( M1 - 1 ) - ( i * 2 )], rot_vctr_re_fx[i] ), imagBuffer_fx[k][2 * i], rot_vctr_im_fx[i] ); // Qx
		move32();
		}

lib_com/ivas_dirac_com.c

+9 −11

Original line number	Diff line number	Diff line
		@@ -960,7 +960,6 @@ void computeDiffuseness_fixed(
		q_intensity = add( q_factor_intensity[0], min_q_shift2 );
		move16();

		push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness B <<-\|" );
		FOR( i = 0; i < DIRAC_NO_COL_AVG_DIFF; ++i )
		{
		/* Energy slow */
		@@ -976,13 +975,13 @@ void computeDiffuseness_fixed(
		if( shift_q < 0 )
		{
		shiftEquiv = L_lshl( 0x80000000, shift_q );
		shift_qtotal = sub( min_q_shift1, 0 );
		}
		if( shift_q >= 0 )
		{
		shiftEquiv = L_add( 0x7FFFFFFF, 0 );
		shift_qtotal = sub( min_q_shift1, shift_q );
		}
		shift_qtotal = sub( min_q_shift1, s_max( shift_q, 0 ) );

		FOR( k = 0; k < num_freq_bands; k++ )
		{
		tmp = L_shl( p_tmp_c[k], shift_qtotal );
		@@ -1019,17 +1018,17 @@ void computeDiffuseness_fixed(
		q_tmp = add( q_factor_intensity[i], min_q_shift2 );

		shift_q = sub( q_tmp, q_intensity );
		#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB
		if( shift_q >= 0 )
		{
		shiftEquiv = L_lshl( 0x7FFFFFFF, 0 );
		shift_qtotal = sub( min_q_shift2, shift_q );
		}
		#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESS
		if ( shift_q < 0 )
		{
		shiftEquiv = L_lshl( 0x80000000, shift_q );
		shift_qtotal = sub( min_q_shift2, 0 );
		}
		if ( shift_q >= 0 )
		{
		shiftEquiv = L_lshl( 0x7FFFFFFF, 0 );
		}
		shift_qtotal = sub( min_q_shift2, s_max( shift_q, 0 ) );

		FOR( j = 0; j < DIRAC_NUM_DIMS; ++j )
		{
		p_tmp = buffer_intensity[j][i];
		@@ -1071,7 +1070,6 @@ void computeDiffuseness_fixed(

		q_intensity = s_min( q_intensity, q_tmp );
		}
		pop_wmops(); /push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness B <<-\|" );//

		min_q_shift1 = getScaleFactor32( intensity_slow, i_mult( DIRAC_NUM_DIMS, num_freq_bands ) );
		min_q_shift1 = sub( min_q_shift1, idiv1616( add( find_guarded_bits_fx( DIRAC_NUM_DIMS ), 1 ), 2 ) );

lib_com/ivas_stereo_ica_com_fx.c

+24 −38

Original line number	Diff line number	Diff line
		@@ -131,24 +131,23 @@ static void interpTargetChannel_fx(
		Word32 spread_factor2_fx;
		Word64 tempD1_fx, tempD2_fx;

		d = negate( sub( currShift, prevShift ) );
		IF( d >= 0 )
		d = sub( prevShift, currShift );

		IF( d == 0 )
		{
		/* this can happen in DFT->TD switching */
		return;
		}

		signShift = 1;
		move16();
		}
		ELSE

		if ( d < 0 )
		{
		signShift = -1;
		move16();
		}

		IF( d == 0 )
		{
		/* this can happen in DFT->TD switching */
		return;
		}

		N = L_shift_adapt;
		move16();
		Word32 *table_pointer = NULL;
		@@ -207,7 +206,7 @@ static void interpTargetChannel_fx(

		FOR( j = lim1; j <= lim2; j++ )
		{
		ptr2_fx[i] = L_add( Mpy_32_32( win_fx[j * INTERP_FACTOR1 - i], ptr1_fx[j] ), ptr2_fx[i] ); // qsynth
		ptr2_fx[i] = Madd_32_32( ptr2_fx[i], win_fx[j * INTERP_FACTOR1 - i], ptr1_fx[j] ); // qsynth
		move32();
		}
		}
		@@ -225,44 +224,31 @@ static void interpTargetChannel_fx(
		tempD1_fx = W_deposit32_l( table_D1_pointer[abs( d )] ); // Q35
		tempD2_fx = W_mult0_32_32( 3, table_D1_pointer[abs( d )] ); // Q35

		IF( EQ_16( signShift, 1 ) )
		{
		tempF1_fx = ONE_IN_Q12; // Q12
		move32();
		}
		ELSE
		{
		tempF1_fx = -ONE_IN_Q12; // Q12
		move32();
		}
		tempF1_fx = L_sub( imult3216( factor_fx, d ), tempF1_fx ); // Q12
		FOR( k = 0; k < sub( N, 1 ); k++ )
		{
		Word32 local = L_sub( W_extract_l( W_shr( W_mult0_32_32( tempF1_fx, spread_factor2_fx ), 31 ) ), ONE_IN_Q12 ); // Q12
		Word32 sign_local;
		IF( local > 0 )

		if ( EQ_16( signShift, 1 ) )
		{
		sign_local = 1;
		tempF1_fx = ONE_IN_Q12; // Q12
		move32();
		}
		ELSE

		tempF1_fx = L_sub( imult3216( factor_fx, d ), tempF1_fx ); // Q12
		FOR( k = 0; k < N - 1; k++ )
		{
		sign_local = -1;
		move32();
		}
		Word32 local_int = W_extract_l( W_shr( W_abs( local ), 12 ) ); // Q0
		Word32 local = Madd_32_32( -ONE_IN_Q12, tempF1_fx, spread_factor2_fx ); // Q12
		Word32 local_int = L_shr( local, 12 ); // Q0
		Word32 res_a1, res_a2, res_a3;
		Word32 res_b1, res_b2, res_b3;
		Word32 res_c1, res_c2, res_c3;
		Word32 res_d1, res_d2, res_d3;
		Word64 local_int_scaled;
		Word32 local_int_scaled;
		Word64 res_a, res_b, res_c, res_d;
		Word64 tempa, tempb;
		Word64 mult_a_D1, mult_b_D2;
		local_int = W_extract_l( W_mult0_32_32( sign_local, local_int ) ); // Q0
		local_int_scaled = W_deposit32_l( L_shl( local_int, 12 ) ); // Q12
		local_int_scaled = L_shl( local_int, 12 ); // Q12
		lim1 = extract_l( local_int ); // Q0
		IF( W_sub( local_int_scaled, local ) > 0 ) // Q21
		if ( L_sub( local_int_scaled, local ) > 0 ) // Q12
		{
		lim1 = sub( lim1, 1 ); // Q0
		}
		@@ -387,7 +373,7 @@ static void targetCh_AlignStereoDFT_fx(
		}
		FOR( i = 0; i < L_shift_adapt; i++ )
		{
		target_fx[i] = L_add( Mpy_32_32( alpha_fx, fadeInBuff_fx[i] ), Mpy_32_32( L_sub( ONE_IN_Q31, alpha_fx ), fadeOutBuff_fx[i] ) ); // qsynth
		target_fx[i] = Madd_32_32( Mpy_32_32( alpha_fx, fadeInBuff_fx[i] ), L_sub( ONE_IN_Q31, alpha_fx ), fadeOutBuff_fx[i] ); // qsynth
		move32();

		alpha_fx = L_add_sat( alpha_fx, winSlope_fx ); // Q31

lib_com/ivas_tools.c

+81 −43

Original line number	Diff line number	Diff line
		@@ -942,6 +942,12 @@ Word16 matrix_product_mant_exp_fx(
		Word16 *Zp_fx_e = out_e;
		Word16 row, col;
		Word16 x_idx, y_idx;
		Word64 temp;
		Word16 temp_e;
		Word16 prod_e = add( X_fx_e, Y_fx_e );

		Word16 max_exp = -31;
		move16();

		/* Processing */
		test();
		@@ -957,17 +963,28 @@ Word16 matrix_product_mant_exp_fx(
		{
		FOR( i = 0; i < colsX; ++i )
		{
		( *Zp_fx ) = 0;
		move32();
		( *Zp_fx_e ) = 0;
		move16();
		temp = 0;
		move64();

		FOR( k = 0; k < rowsX; ++k )
		{
		x_idx = add( k, imult1616( i, rowsX ) );
		y_idx = add( k, imult1616( j, rowsY ) );
		( Zp_fx ) = BASOP_Util_Add_Mant32Exp( Zp_fx, Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /Q31 - Zp_fx_e*/
		move32();
		x_idx = k + i * rowsX;
		y_idx = k + j * rowsY;
		temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e
		}
		/* Maximize accumulated value to 32-bit */
		temp_e = W_norm( temp );
		temp = W_shl( temp, temp_e );
		if ( 0 == temp )
		{
		temp_e = prod_e;
		move16();
		}
		*Zp_fx_e = sub( prod_e, temp_e );
		move16();
		( *Zp_fx ) = W_extract_h( temp );
		move32();
		max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
		Zp_fx++;
		Zp_fx_e++;
		}
		@@ -987,17 +1004,27 @@ Word16 matrix_product_mant_exp_fx(
		{
		FOR( i = 0; i < rowsX; ++i )
		{
		( *Zp_fx ) = 0;
		move32();
		( *Zp_fx_e ) = 0;
		move16();
		temp = 0;
		move64();
		FOR( k = 0; k < colsX; ++k )
		{
		x_idx = add( i, imult1616( k, rowsX ) );
		y_idx = add( j, imult1616( k, rowsY ) );
		( Zp_fx ) = BASOP_Util_Add_Mant32Exp( Zp_fx, Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /Q31 - Zp_fx_e*/
		move32();
		x_idx = i + k * rowsX;
		y_idx = j + k * rowsY;
		temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e
		}
		/* Maximize accumulated value to 32-bit */
		temp_e = W_norm( temp );
		temp = W_shl( temp, temp_e );
		if ( 0 == temp )
		{
		temp_e = prod_e;
		move16();
		}
		*Zp_fx_e = sub( prod_e, temp_e );
		move16();
		( *Zp_fx ) = W_extract_h( temp );
		move32();
		max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
		Zp_fx++;
		Zp_fx_e++;
		}
		@@ -1017,18 +1044,27 @@ Word16 matrix_product_mant_exp_fx(
		{
		FOR( i = 0; i < colsX; ++i )
		{
		( *Zp_fx ) = 0;
		move32();
		( *Zp_fx_e ) = 0;
		move16();
		temp = 0;
		move64();
		FOR( k = 0; k < colsX; ++k )
		{
		x_idx = add( k, imult1616( i, rowsX ) );
		y_idx = add( j, imult1616( k, rowsY ) );
		( Zp_fx ) = BASOP_Util_Add_Mant32Exp( Zp_fx, Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /Q31 - Zp_fx_e*/
		move32();
		x_idx = k + i * rowsX;
		y_idx = j + k * rowsY;
		temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e
		}

		/* Maximize accumulated value to 32-bit */
		temp_e = W_norm( temp );
		temp = W_shl( temp, temp_e );
		if ( 0 == temp )
		{
		temp_e = prod_e;
		move16();
		}
		*Zp_fx_e = sub( prod_e, temp_e );
		move16();
		( *Zp_fx ) = W_extract_h( temp );
		move32();
		max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
		Zp_fx++;
		Zp_fx_e++;
		}
		@@ -1049,17 +1085,26 @@ Word16 matrix_product_mant_exp_fx(
		{
		FOR( i = 0; i < rowsX; ++i )
		{
		( *Zp_fx ) = 0;
		move32();
		( *Zp_fx_e ) = 0;
		move16();
		temp = 0;
		move64();
		FOR( k = 0; k < colsX; ++k )
		{
		x_idx = add( i, imult1616( k, rowsX ) );
		y_idx = add( k, imult1616( j, rowsY ) );
		( Zp_fx ) = BASOP_Util_Add_Mant32Exp( Zp_fx, Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); /Q31 - Zp_fx_e*/
		move32();
		x_idx = i + k * rowsX;
		y_idx = k + j * rowsY;
		temp = W_mac_32_32( temp, X_fx[x_idx], Y_fx[y_idx] ); // X_fx_e + Y_fx_e
		}
		/* Maximize accumulated value to 32-bit */
		temp_e = W_norm( temp );
		temp = W_shl( temp, temp_e );
		if ( 0 == temp )
		{
		temp_e = prod_e;
		}
		*Zp_fx_e = sub( prod_e, temp_e );
		move16();
		( *Zp_fx ) = W_extract_h( temp );
		move32();
		max_exp = s_max( max_exp, *Zp_fx_e ); // Find the max exp
		Zp_fx++;
		Zp_fx_e++;
		}
		@@ -1070,18 +1115,11 @@ Word16 matrix_product_mant_exp_fx(
		move16();
		}
		Zp_fx = Z_fx; /Q31 - Zp_fx_e/

		Zp_fx_e = out_e;
		Word16 max_exp = -31;
		move16();
		FOR( j = 0; j < row; ++j )
		{
		FOR( i = 0; i < col; ++i )
		{
		max_exp = s_max( max_exp, *Zp_fx_e );
		Zp_fx_e++;
		}
		}
		Zp_fx_e = out_e;


		*Z_fx_e = max_exp;
		move16();
		FOR( j = 0; j < row; ++j )