From 2ec0301c1268655e48dc99244c19b938a884d389 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Fri, 24 Jan 2025 17:23:26 +0530 Subject: [PATCH] Complexity optimization for SBA path decoding --- lib_com/cnst.h | 1 + lib_com/ivas_cnst.h | 1 + lib_com/ivas_dirac_com.c | 49 ++++--- lib_com/ivas_spar_com.c | 76 +++++------ lib_com/ivas_transient_det.c | 18 +-- lib_com/tools_fx.c | 14 +- lib_dec/igf_dec_fx.c | 13 +- lib_dec/ivas_spar_decoder.c | 19 +-- lib_rend/ivas_dirac_onsets_dec.c | 123 +---------------- lib_rend/ivas_dirac_output_synthesis_dec.c | 146 ++++++++++++--------- lib_rend/ivas_dirac_rend.c | 21 +-- lib_rend/ivas_efap.c | 68 ++-------- 12 files changed, 217 insertions(+), 332 deletions(-) diff --git a/lib_com/cnst.h b/lib_com/cnst.h index 7a5260a41..66b6dc9a1 100644 --- a/lib_com/cnst.h +++ b/lib_com/cnst.h @@ -137,6 +137,7 @@ #define FOUR_IN_Q28 1073741824 #define MAX_WORD16 32767 +#define ONE_IN_Q45 (Word64)0x200000000000 #define ONE_IN_Q62 (Word64)0x4000000000000000 /*----------------------------------------------------------------------------------* * General constants diff --git a/lib_com/ivas_cnst.h b/lib_com/ivas_cnst.h index a7a7ce88a..1fe54d164 100644 --- a/lib_com/ivas_cnst.h +++ b/lib_com/ivas_cnst.h @@ -150,6 +150,7 @@ typedef enum RENDERER_OSBA_LS } RENDERER_TYPE; +#define MAX_FREQUENCY_BANDS 64 /*----------------------------------------------------------------------------------* * IVAS general constants diff --git a/lib_com/ivas_dirac_com.c b/lib_com/ivas_dirac_com.c index 6aeff8ec5..ebc874cce 100644 --- a/lib_com/ivas_dirac_com.c +++ b/lib_com/ivas_dirac_com.c @@ -966,38 +966,55 @@ void computeDiffuseness_fixed( p_tmp_c = buffer_energy + i * num_freq_bands; q_tmp = add( q_factor_energy[i], min_q_shift1 ); - FOR( k = 0; k < num_freq_bands; k++ ) + + Word16 shift_q = sub( q_tmp, q_ene ); + IF( shift_q < 0 ) { - tmp = L_shl( p_tmp_c[k], min_q_shift1 ); - IF( LT_16( q_tmp, q_ene ) ) + FOR( k = 0; k < num_freq_bands; k++ ) { - energy_slow[k] = L_add( L_shr( energy_slow[k], sub( q_ene, q_tmp ) ), tmp ); + tmp = L_shl( p_tmp_c[k], min_q_shift1 ); + energy_slow[k] = L_add( L_shl( energy_slow[k], shift_q ), tmp ); move32(); } - ELSE + } + ELSE + { + FOR( k = 0; k < num_freq_bands; k++ ) { - energy_slow[k] = L_add( energy_slow[k], L_shr( tmp, sub( q_tmp, q_ene ) ) ); + tmp = L_shl( p_tmp_c[k], min_q_shift1 ); + energy_slow[k] = L_add( energy_slow[k], L_shr( tmp, shift_q ) ); move32(); } } + q_ene = s_min( q_ene, q_tmp ); /* Intensity slow */ q_tmp = add( q_factor_intensity[i], min_q_shift2 ); - FOR( j = 0; j < DIRAC_NUM_DIMS; ++j ) + + shift_q = sub( q_tmp, q_intensity ); + IF( shift_q > 0 ) { - p_tmp = buffer_intensity[j][i]; - FOR( k = 0; k < num_freq_bands; k++ ) + FOR( j = 0; j < DIRAC_NUM_DIMS; ++j ) { - tmp = L_shl( p_tmp[k], min_q_shift2 ); - IF( LT_16( q_intensity, q_tmp ) ) + p_tmp = buffer_intensity[j][i]; + FOR( k = 0; k < num_freq_bands; k++ ) { - intensity_slow[j * num_freq_bands + k] = L_add( intensity_slow[j * num_freq_bands + k], L_shr( tmp, sub( q_tmp, q_intensity ) ) ); + tmp = L_shl( p_tmp[k], min_q_shift2 ); + intensity_slow[j * num_freq_bands + k] = L_add( intensity_slow[j * num_freq_bands + k], L_shr( tmp, shift_q ) ); move32(); } - ELSE + } + } + ELSE + { + FOR( j = 0; j < DIRAC_NUM_DIMS; ++j ) + { + p_tmp = buffer_intensity[j][i]; + FOR( k = 0; k < num_freq_bands; k++ ) { - intensity_slow[j * num_freq_bands + k] = L_add( L_shr( intensity_slow[j * num_freq_bands + k], sub( q_intensity, q_tmp ) ), tmp ); + tmp = L_shl( p_tmp[k], min_q_shift2 ); + intensity_slow[j * num_freq_bands + k] = L_add( L_shl( intensity_slow[j * num_freq_bands + k], shift_q ), tmp ); move32(); } } @@ -1017,9 +1034,7 @@ void computeDiffuseness_fixed( FOR( k = 0; k < num_freq_bands; k++ ) { - p_tmp[k] = Mpy_32_32( p_tmp[k], p_tmp[k] ); - move32(); - intensity_slow_abs[k] = L_add( intensity_slow_abs[k], p_tmp[k] ); + intensity_slow_abs[k] = Madd_32_32( intensity_slow_abs[k], p_tmp[k], p_tmp[k] ); move32(); } } diff --git a/lib_com/ivas_spar_com.c b/lib_com/ivas_spar_com.c index 362c1c9ed..9f5106e0c 100644 --- a/lib_com/ivas_spar_com.c +++ b/lib_com/ivas_spar_com.c @@ -7044,7 +7044,7 @@ void ivas_dirac_dec_get_response_fx( Word16 Q_out ) { Word16 index_azimuth, index_elevation; - Word16 el, e, az; + Word16 el, e, az, q_diff; Word32 cos_1_fx, cos_2_fx, sin_1_fx, cos_az_fx[3]; Word32 sin_az_fx[3]; Word32 f_fx; @@ -7055,49 +7055,39 @@ void ivas_dirac_dec_get_response_fx( index_azimuth = add( azimuth, 180 ) % 360; move16(); index_elevation = add( elevation, 90 ); - IF( GT_16( index_elevation, 90 ) ) + + e = 1; + move16(); + + if ( GT_16( index_elevation, 90 ) ) { e = -1; - move16(); - } - ELSE - { - e = 1; - move16(); } - IF( GT_16( index_elevation, 90 ) ) + el = index_elevation; + move16(); + + if ( GT_16( index_elevation, 90 ) ) { el = sub( 180, index_elevation ); - move16(); - } - ELSE - { - el = index_elevation; - move16(); } - IF( GT_16( index_azimuth, 180 ) ) + az = index_azimuth; + move16(); + + if ( GT_16( index_azimuth, 180 ) ) { az = sub( 360, index_azimuth ); - move16(); - } - ELSE - { - az = index_azimuth; - move16(); } - IF( GT_16( index_azimuth, 180 ) ) + f_fx = 1; + move16(); + + if ( GT_16( index_azimuth, 180 ) ) { f_fx = -1; - move16(); - } - ELSE - { - f_fx = 1; - move16(); } + cos_1_fx = L_shr( dirac_gains_trg_term_fx[az][0], 1 ); // q30 cos_2_fx = L_shl( Mpy_32_32( cos_1_fx, cos_1_fx ), 1 ); // q30 sin_1_fx = L_shr( dirac_gains_trg_term_fx[az][1], 1 ); // q30 @@ -7122,54 +7112,54 @@ void ivas_dirac_dec_get_response_fx( response_fx[0] = L_shl_sat( 1, Q_out ); // Q_out move32(); + q_diff = sub( Q_out, 29 ); + FOR( l = 1; l <= ambisonics_order; l++ ) { b_2 = imult1616( l, l ); - b1_2 = add( imult1616( l, l ), shl( l, 1 ) ); + b1_2 = add( b_2, shl( l, 1 ) ); FOR( m = 0; m < l; m += 2 ) { - b = add( b_2, m ); + b = b_2 + m; a = dirac_gains_P_idx[b]; - move16(); + c_fx_better = local_result_table[el][a]; // q30 move32(); - response_fx[b] = L_shl( Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ), sub( Q_out, 29 ) ); // Q_out + response_fx[b] = L_shl( Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ), q_diff ); // Q_out move32(); - b1 = sub( b1_2, m ); - response_fx[b1] = L_shl( Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ), sub( Q_out, 29 ) ); // Q_out + b1 = b1_2 - m; + response_fx[b1] = L_shl( Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ), q_diff ); // Q_out move32(); } FOR( m = 1; m < l; m += 2 ) { - b = add( b_2, m ); + b = b_2 + m; a = dirac_gains_P_idx[b]; - move16(); c_fx_better = local_result_table[el][a]; // q30 move32(); if ( EQ_16( e, -1 ) ) { c_fx_better = L_negate( c_fx_better ); // q30 } - response_fx[b] = L_shl( Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ), sub( Q_out, 29 ) ); // Q_out + response_fx[b] = L_shl( Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ), q_diff ); // Q_out move32(); - b1 = sub( b1_2, m ); - response_fx[b1] = L_shl( Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ), sub( Q_out, 29 ) ); // Q_out + + b1 = b1_2 - m; + response_fx[b1] = L_shl( Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ), q_diff ); // Q_out move32(); } b = add( b_2, l ); a = dirac_gains_P_idx[b]; - move16(); c_fx_better = local_result_table_2[el][a]; // q30 move32(); - IF( EQ_16( ( l % 2 ), 1 ) ) + IF( EQ_16( s_and( l, 0x01 ), 1 ) ) { if ( EQ_16( e, -1 ) ) { c_fx_better = L_negate( c_fx_better ); // q30 - move32(); } } response_fx[b] = L_shl( c_fx_better, sub( Q_out, 30 ) ); // Q_out diff --git a/lib_com/ivas_transient_det.c b/lib_com/ivas_transient_det.c index 8f8905fdd..dccf4db02 100644 --- a/lib_com/ivas_transient_det.c +++ b/lib_com/ivas_transient_det.c @@ -344,26 +344,26 @@ static Word32 ivas_calc_duck_gain_fx( Word32 duck_gain_out, L_tmp; Word16 tmp_e; - duck_gain_out = L_add( L_shl( Mpy_32_32( L_sub( duck_gain, ONE_IN_Q30 ), duck_coeff ), Q1 ), ONE_IN_Q30 ); /*Q30*/ + duck_gain_out = L_add( Mpy_32_32( L_sub( duck_gain, ONE_IN_Q30 ), duck_coeff ), ONE_IN_Q29 ); /*Q29*/ - IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( Mpy_32_32( duck_mult_fac, env_1 ), add( 2, env1_e ), Mpy_32_32( duck_gain_out, env_2 ), add( 1, env2_e ) ), -1 ) ) + IF( LT_64( W_mult0_32_32( duck_mult_fac, env_1 ), W_shr( W_mult0_32_32( duck_gain_out, env_2 ), sub( env1_e, env2_e ) ) ) ) { + + duck_gain_out = 0; + move32(); + test(); - IF( ( env_1 == 0 ) || ( env_2 == 0 ) ) - { - duck_gain_out = 0; - move32(); - } - ELSE + IF( ( env_1 != 0 ) && ( env_2 != 0 ) ) { L_tmp = BASOP_Util_Divide3232_Scale_cadence( env_1, env_2, &tmp_e ); L_tmp = L_shl( L_tmp, add( sub( env1_e, env2_e ), tmp_e ) ); duck_gain_out = Mpy_32_32( duck_mult_fac, L_tmp ); /*Q29*/ - duck_gain_out = L_shl( duck_gain_out, Q1 ); /*Q30*/ } } + duck_gain_out = L_shl( duck_gain_out, Q1 ); /*Q30*/ + return duck_gain_out; /*Q30*/ } /*-----------------------------------------------------------------------------------------* diff --git a/lib_com/tools_fx.c b/lib_com/tools_fx.c index f6152f265..74a47dde8 100644 --- a/lib_com/tools_fx.c +++ b/lib_com/tools_fx.c @@ -884,6 +884,10 @@ void Scale_sig32( Flag Overflow = 0; move32(); #endif + IF( 0 == exp0 ) + { + return; + } FOR( i = 0; i < lg; i++ ) { @@ -4217,10 +4221,11 @@ UWord32 mvl2s_r( IF( (void *) y <= (const void *) x ) { + Word32 tempd = L_shl( 1, sub( q_x, 1 ) ); FOR( i = 0; i < n; i++ ) { - temp = L_shr( x[i], sub( q_x, 1 ) ); - temp = L_shr( L_add( temp, 1 ), 1 ); + temp = L_add( x[i], tempd ); + temp = L_shr( temp, q_x ); IF( GT_32( temp, MAX16B ) ) { @@ -4241,10 +4246,11 @@ UWord32 mvl2s_r( } ELSE { + Word32 tempd = L_shl( 1, sub( q_x, 1 ) ); FOR( i = n - 1; i >= 0; i-- ) { - temp = L_shr( x[i], sub( q_x, 1 ) ); - temp = L_shr( L_add( temp, 1 ), 1 ); + temp = L_add( x[i], tempd ); + temp = L_shr( temp, q_x ); IF( GT_32( temp, MAX16B ) ) { diff --git a/lib_dec/igf_dec_fx.c b/lib_dec/igf_dec_fx.c index c09ca95ef..4f74c31ca 100644 --- a/lib_dec/igf_dec_fx.c +++ b/lib_dec/igf_dec_fx.c @@ -2904,19 +2904,22 @@ static void IGF_getWhiteSpectralData_ivas( Word16 guard_bits = add( find_guarded_bits_fx( add( i_mult( 2, level ), 1 ) ), 1 ) / 2; s_l = sub( s_l, guard_bits ); + Word16 quo = BASOP_Util_Divide3216_Scale( ONE_IN_Q30, add( shl( level, 1 ), 1 ), &tmp_e ); + tmp_e = add( tmp_e, 1 ); + + ak_e = add( tmp_e, sub( shl( sub( in_e, s_l ), 1 ), 15 ) ); // tmp_e + 2 * (in_e - s_l) - 15 FOR( i = start; i < stop - level; i++ ) { ak = 0; move32(); - ak_e = 0; move32(); FOR( j = i - level; j < i + level + 1; j++ ) { tmp_16 = extract_h( L_shl( in[j], s_l ) ); // e: in_e - s_l ak = L_mac( ak, tmp_16, tmp_16 ); // e: 2 * (in_e - s_l) } - ak = L_deposit_h( BASOP_Util_Divide3216_Scale( ak, add( shl( level, 1 ), 1 ), &tmp_e ) ); - ak_e = add( tmp_e, sub( shl( sub( in_e, s_l ), 1 ), 15 ) ); // tmp_e + 2 * (in_e - s_l) - 15 + ak = Mult_32_16( ak, quo ); // add( shl( level, 1 ), 1 ), &tmp_e ) ); + n = sub( 30, add( norm_l( ak ), sub( 31, ak_e ) ) ); n = shr( n, 1 ); @@ -2929,7 +2932,6 @@ static void IGF_getWhiteSpectralData_ivas( FOR( ; i < stop; i++ ) { ak = 0; - ak_e = 0; move32(); move16(); @@ -2938,8 +2940,7 @@ static void IGF_getWhiteSpectralData_ivas( tmp_16 = extract_h( L_shl( in[j], s_l ) ); // e: in_e - s_l ak = L_mac( ak, tmp_16, tmp_16 ); // e: 2 * (in_e - s_l) } - ak = L_deposit_h( BASOP_Util_Divide3216_Scale( ak, sub( stop, sub( i, level ) ), &tmp_e ) ); - ak_e = add( tmp_e, sub( shl( sub( in_e, s_l ), 1 ), 15 ) ); // tmp_e + 2 * (in_e - s_l) - 15 + ak = Mult_32_16( ak, quo ); n = sub( 30, add( norm_l( ak ), sub( 31, ak_e ) ) ); n = shr( n, 1 ); diff --git a/lib_dec/ivas_spar_decoder.c b/lib_dec/ivas_spar_decoder.c index fdace42d8..f76961ebe 100644 --- a/lib_dec/ivas_spar_decoder.c +++ b/lib_dec/ivas_spar_decoder.c @@ -1687,8 +1687,9 @@ void ivas_spar_dec_upmixer_sf_fx( SPAR_DEC_HANDLE hSpar; Word16 num_md_sub_frames; Word16 q1 = 30; + Word16 prod; move16(); - push_wmops( "ivas_spar_dec_upmixer_sf" ); + push_wmops( "ivas_spar_dec_upmixer_sf_fx" ); hSpar = st_ivas->hSpar; hDecoderConfig = st_ivas->hDecoderConfig; nchan_transport = hSpar->hMdDec->spar_md_cfg.nchan_transport; /*Q0*/ @@ -1706,6 +1707,8 @@ void ivas_spar_dec_upmixer_sf_fx( slot_idx_start = hSpar->slots_rendered; /*Q0*/ move16(); + prod = i_mult( slot_idx_start, slot_size ); + test(); IF( EQ_32( st_ivas->ivas_format, SBA_ISM_FORMAT ) && EQ_32( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) ) { @@ -1716,7 +1719,7 @@ void ivas_spar_dec_upmixer_sf_fx( FOR( i = 0; i < nchan_internal; i++ ) { - p_tc_fx[i] = st_ivas->hTcBuffer->tc_fx[( i + nchan_ism )] + i_mult( slot_idx_start, slot_size ); /*Q11*/ + p_tc_fx[i] = st_ivas->hTcBuffer->tc_fx[( i + nchan_ism )] + prod; /*Q11*/ } test(); @@ -1725,7 +1728,7 @@ void ivas_spar_dec_upmixer_sf_fx( { FOR( i = 0; i < nchan_ism; i++ ) { - p_tc_fx[( i + nchan_internal )] = st_ivas->hTcBuffer->tc_fx[i] + i_mult( slot_idx_start, slot_size ); /*Q11*/ + p_tc_fx[( i + nchan_internal )] = st_ivas->hTcBuffer->tc_fx[i] + prod; /*Q11*/ } } } @@ -1733,7 +1736,7 @@ void ivas_spar_dec_upmixer_sf_fx( { FOR( i = 0; i < nchan_internal; i++ ) { - p_tc_fx[i] = st_ivas->hTcBuffer->tc_fx[i] + i_mult( slot_idx_start, slot_size ); /*Q11*/ + p_tc_fx[i] = st_ivas->hTcBuffer->tc_fx[i] + prod; /*Q11*/ } } @@ -1897,14 +1900,14 @@ void ivas_spar_dec_upmixer_sf_fx( } ELSE { - cldfb_par_fx = 0; - move32(); + Word64 acc = 0; + move64(); FOR( spar_band = bin2band->p_spar_start_bands[cldfb_band]; spar_band < num_spar_bands; spar_band++ ) { /* accumulate contributions from all SPAR bands */ - Word16 tmp = extract_l( L_shr( bin2band->pp_cldfb_weights_per_spar_band_fx[cldfb_band][spar_band], 7 ) ); /*Q15*/ - cldfb_par_fx = L_add_sat( cldfb_par_fx, Mpy_32_16_1( mixer_mat_fx[out_ch][in_ch][spar_band], tmp ) ); /*q1*/ + acc = W_mac_32_32( acc, mixer_mat_fx[out_ch][in_ch][spar_band], bin2band->pp_cldfb_weights_per_spar_band_fx[cldfb_band][spar_band] ); // q1+ Q23 } + cldfb_par_fx = W_shl_sat_l( acc, -23 ); // q1 } out_re_fx[out_ch] = Madd_32_32( out_re_fx[out_ch], cldfb_in_ts_re_fx[in_ch][ts][cldfb_band], cldfb_par_fx ); /*q1-25*/ diff --git a/lib_rend/ivas_dirac_onsets_dec.c b/lib_rend/ivas_dirac_onsets_dec.c index 9e82fa022..f53f41404 100644 --- a/lib_rend/ivas_dirac_onsets_dec.c +++ b/lib_rend/ivas_dirac_onsets_dec.c @@ -43,110 +43,6 @@ #include "wmc_auto.h" #include "prot_fx.h" -static Word16 BASOP_Util_Cmp_Mant32Exp_sat( Word32 a_m, Word16 a_e, Word32 b_m, Word16 b_e ); -Word16 BASOP_Util_Cmp_Mant32Exp_sat /*!< o: flag: result of comparison */ - /* 0, if a == b */ - /* 1, if a > b */ - /* -1, if a < b */ - ( Word32 a_m, /*!< i: Mantissa of 1st operand a */ - Word16 a_e, /*!< i: Exponent of 1st operand a */ - Word32 b_m, /*!< i: Mantissa of 2nd operand b */ - Word16 b_e ) /*!< i: Exponent of 2nd operand b */ - -{ - Word32 diff_m; - Word16 diff_e, shift, result; - - - /* - This function compares two input parameters, both represented by a 32-bit mantissa and a 16-bit exponent. - If both values are identical, 0 is returned. - If a is greater b, 1 is returned. - If a is less than b, -1 is returned. - */ - - /* Check, if both mantissa and exponents are identical, when normalized: return 0 */ - shift = norm_l( a_m ); - IF( shift ) - { - a_m = L_shl( a_m, shift ); - } - IF( shift ) - { - a_e = sub( a_e, shift ); - } - - shift = norm_l( b_m ); - IF( shift ) - { - b_m = L_shl( b_m, shift ); - } - IF( shift ) - { - b_e = sub( b_e, shift ); - } - - /* align exponent, if any mantissa is zero */ - if ( !a_m ) - { - a_e = b_e; - move16(); - } - if ( !b_m ) - { - b_e = a_e; - move16(); - } - - BASOP_SATURATE_WARNING_OFF_EVS - diff_m = L_sub_sat( a_m, b_m ); - BASOP_SATURATE_WARNING_ON_EVS - diff_e = sub( a_e, b_e ); - - test(); - IF( diff_m == 0 && diff_e == 0 ) - { - return 0; - } - - /* Check sign, exponent and mantissa to identify, whether a is greater b or not */ - result = -1; - move16(); - - IF( a_m >= 0 ) - { - /* a is positive */ - if ( b_m < 0 ) - { - result = 1; - move16(); - } - - test(); - test(); - test(); - if ( ( b_m >= 0 ) && ( ( diff_e > 0 ) || ( diff_e == 0 && diff_m > 0 ) ) ) - { - result = 1; - move16(); - } - } - ELSE - { - /* a is negative */ - test(); - test(); - test(); - if ( ( b_m < 0 ) && ( ( diff_e < 0 ) || ( diff_e == 0 && diff_m > 0 ) ) ) - { - result = 1; - move16(); - } - } - return result; -} - - /*------------------------------------------------------------------------- * ivas_dirac_dec_onset_detection_open() * @@ -247,31 +143,22 @@ void ivas_dirac_dec_onset_detection_process_fx( move32(); } - IF( *p_onset_detector_1_fx == 0 ) + if ( *p_onset_detector_1_fx == 0 ) { - *p_onset_detector_1_fx = L_add( *p_onset_detector_1_fx, EPSILON_FX ); /* Q(q_onset_detector) */ + *p_onset_detector_1_fx = EPSILON_FX; /* Q(q_onset_detector) */ move32(); } /*onset filter limited between 0 and 1*/ tmp_fx = BASOP_Util_Divide3232_Scale( *p_onset_detector_2_fx, *p_onset_detector_1_fx, &e_scale ); - tmp32_fx = L_mult0( tmp_fx, DIRAC_ONSET_GAIN_FX ); + tmp32_fx = L_mult0( tmp_fx, DIRAC_ONSET_GAIN_FX ); // Q= Q12 + (15-e_scale) if ( tmp32_fx < 0 ) { tmp32_fx = 0; move32(); } - Word16 comp_flag = BASOP_Util_Cmp_Mant32Exp_sat( tmp32_fx, add( e_scale, 4 ), ONE_IN_Q30, 1 ); - test(); - IF( EQ_16( comp_flag, 1 ) || comp_flag == 0 ) - { - tmp32_fx = ONE_IN_Q31; - move32(); - } - ELSE - { - tmp32_fx = L_shl( tmp32_fx, add( e_scale, 4 ) ); - } + tmp32_fx = L_shl_sat( tmp32_fx, add( e_scale, 4 ) ); + onset_filter[b] = tmp32_fx; move32(); diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index d9377b5c0..4686ca1e6 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -823,6 +823,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( Word32 sqr_inp, sqr; /*Directonal sound gain nrg compensation*/ + c = L_add( ONE_IN_Q29 /*1 Q29*/, Mpy_32_16_1( L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_decorr_fx, ONE_IN_Q29 /*1 Q29*/ ), 5461 /*1.0 / 6.0 Q15*/ ) ); /*Diffuseness modellling nrg compensation*/ /*Q29*/ FOR( k = 0; k < num_freq_bands_diff; k++ ) { a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses @@ -838,15 +839,16 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( { IF( reference_power[k + ( ch_idx + 1 ) * num_freq_bands] == 0 ) { - b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], 232831 /* EPSILON in exp 63 */, &b_exp ); - b_exp = add( b_exp, sub( sub( 31, q_reference_power ), 63 ) ); + b = MAX_16; + move16(); + b_exp = 0; + move16(); } ELSE { b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], reference_power[k + ( ch_idx + 1 ) * num_freq_bands], &b_exp ); /*Q(15-b_exp)*/ } } - c = L_add( ONE_IN_Q29 /*1 Q29*/, Mpy_32_16_1( L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_decorr_fx, ONE_IN_Q29 /*1 Q29*/ ), 5461 /*1.0 / 6.0 Q15*/ ) ); /*Diffuseness modellling nrg compensation*/ /*Q29*/ mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q(q_diff_aab) = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 @@ -903,6 +905,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } + c = Madd_32_16( ONE_IN_Q27 /*1 Q27*/, L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_fx, ONE_IN_Q27 /*1 Q27*/ ), 5461 ); /*Diffuseness modellling nrg compensation*/ /* 1.0 / 6.0 = 5461 in Q15*/ /*Q27*/ FOR( ; k < num_freq_bands; k++ ) { a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses @@ -918,15 +921,16 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( { IF( reference_power[k + ( ch_idx + 1 ) * num_freq_bands] == 0 ) { - b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], 232831 /* EPSILON in exp 63 */, &b_exp ); - b_exp = add( b_exp, sub( sub( 31, q_reference_power ), 63 ) ); + b = MAX_16; + move16(); + b_exp = 0; + move16(); } ELSE { b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], reference_power[k + ( ch_idx + 1 ) * num_freq_bands], &b_exp ); /*q(15-b_exp)*/ } } - c = Madd_32_16( ONE_IN_Q27 /*1 Q27*/, L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_fx, ONE_IN_Q27 /*1 Q27*/ ), 5461 ); /*Diffuseness modellling nrg compensation*/ /* 1.0 / 6.0 = 5461 in Q15*/ /*Q27*/ mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 @@ -1195,7 +1199,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( Word16 buf_idx, ch_idx, i, l; Word16 num_freq_bands, num_freq_bands_diff; Word16 num_channels_dir, num_channels_diff; - Word32 g, g1, g2; + Word32 g, g1[MAX_FREQUENCY_BANDS], g2; Word32 *p_gains_dir, *p_gains_diff; Word32 *p_gains_dir_prev, *p_gains_diff_prev; Word32 *p_cy_cross_dir_smooth; @@ -1209,12 +1213,14 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( DIRAC_OUTPUT_SYNTHESIS_STATE h_dirac_output_synthesis_state; Word16 nchan_transport_foa; Word16 ch_idx_diff; + Word32 cmp1, cmp2; Word32 aux_buf[CLDFB_NO_CHANNELS_MAX]; Word32 ratio_float[DIRAC_HO_NUMSECTORS * CLDFB_NO_CHANNELS_MAX]; Word16 q_com = 0; move16(); Word16 exp = 0; move16(); + Word16 q_shift; /* collect some often used parameters */ h_dirac_output_synthesis_params = hDirACRend->h_output_synthesis_psd_params; @@ -1234,6 +1240,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( nchan_transport_foa = s_min( 4, nchan_transport ); move16(); + Word16 prod = imult1616( num_freq_bands, num_channels_dir ); /*-----------------------------------------------------------------* * comput target Gains *-----------------------------------------------------------------*/ @@ -1401,6 +1408,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( move16(); /*Diffuse gain*/ + q_com = s_min( h_dirac_output_synthesis_state.q_cy_auto_diff_smooth_prev, Q31 ); FOR( ch_idx = nchan_transport_foa; ch_idx < num_channels_diff; ch_idx++ ) { v_multc_fixed_16( h_dirac_output_synthesis_state.diffuse_power_factor_fx, // Q31 @@ -1409,7 +1417,6 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( num_freq_bands_diff ); // Scale to bring in common Q-factor - q_com = s_min( h_dirac_output_synthesis_state.q_cy_auto_diff_smooth_prev, Q31 ); Scale_sig32( &h_dirac_output_synthesis_state.cy_auto_diff_smooth_fx[ch_idx * num_freq_bands_diff], num_freq_bands_diff, sub( q_com, Q31 ) ); /*q31->q_com*/ @@ -1431,59 +1438,71 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( p_cy_cross_dir_smooth = h_dirac_output_synthesis_state.cy_cross_dir_smooth_fx; p_gains_dir = h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx; + FOR( l = 0; l < num_freq_bands; l++ ) + { + g1[l] = Madd_32_32( POINT_3679_Q31, onset_filter[l], POINT_1175_Q31 - POINT_3679_Q31 ); // Q31, (Q31, Q31) -> Q31 + move32(); + } + + q_shift = sub( 26, h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev ); + move16(); + /* Direct gains */ IF( hodirac_flag ) { + cmp1 = L_shr( 66437775 /* 0.99f in Q26 */, q_shift ); + cmp2 = L_shr( ONE_IN_Q27 /* 2.0f in Q26 */, q_shift ); FOR( ch_idx = 0; ch_idx < nchan_transport_foa; ch_idx++ ) { FOR( l = 0; l < num_freq_bands; l++ ) { - g1 = Madd_32_32( POINT_3679_Q31, onset_filter[l], POINT_1175_Q31 - POINT_3679_Q31 ); // Q31, (Q31, Q31) -> Q31 - g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1 ), *( p_gains_dir ) ); // (Q31, p_gains_dir_q) -> p_gains_dir_q - g2 = L_add_sat( g2, Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth++ ) ) ) ); // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q - g2 = L_max( g2, Mpy_32_32( 2126008812 /* 0.99f in Q31 */, L_shl( 1, h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev ) ) ); // p_gains_dir_q - g2 = L_min( g2, Mpy_32_32( ONE_IN_Q31 /* 2.0f in Q30 */, L_shl( 1, add( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 1 ) ) ) ); // p_gains_dir_q - *( p_gains_dir++ ) = g2; // p_gains_dir_q + g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1[l] ), *( p_gains_dir ) ); // (Q31, p_gains_dir_q) -> p_gains_dir_q + g2 = L_add_sat( g2, Mpy_32_32( g1[l], ( *( p_cy_cross_dir_smooth++ ) ) ) ); // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q + g2 = L_max( g2, cmp1 ); // p_gains_dir_q + g2 = L_min( g2, cmp2 ); // p_gains_dir_q + *( p_gains_dir++ ) = g2; // p_gains_dir_q move32(); } } } ELSE { + cmp1 = L_shr( 57042534 /* 0.85f in Q26 */, q_shift ); + cmp2 = L_shr( 77175193 /* 1.15f in Q26 */, q_shift ); FOR( ch_idx = 0; ch_idx < nchan_transport_foa; ch_idx++ ) { FOR( l = 0; l < num_freq_bands; l++ ) { - g1 = Madd_32_32( POINT_3679_Q31, onset_filter[l], POINT_1175_Q31 - POINT_3679_Q31 ); // Q31, (Q31, Q31) -> Q31 - g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1 ), *( p_gains_dir ) ); // (Q31, p_gains_dir_q) -> p_gains_dir_q - g2 = L_add_sat( g2, Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth++ ) ) ) ); // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q - g2 = L_max( g2, Mpy_32_32( 1825361101 /* 0.85f in Q31 */, L_shl( 1, h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev ) ) ); // p_gains_dir_q - g2 = L_min( g2, Mpy_32_32( 1234803098 /* 1.15f in Q30 */, L_shl( 1, add( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 1 ) ) ) ); // p_gains_dir_q - *( p_gains_dir++ ) = g2; // p_gains_dir_q + g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1[l] ), *( p_gains_dir ) ); // (Q31, p_gains_dir_q) -> p_gains_dir_q + g2 = L_add_sat( g2, Mpy_32_32( g1[l], ( *( p_cy_cross_dir_smooth++ ) ) ) ); // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q + g2 = L_max( g2, cmp1 ); // p_gains_dir_q + g2 = L_min( g2, cmp2 ); // p_gains_dir_q + *( p_gains_dir++ ) = g2; // p_gains_dir_q move32(); } } } /*Directional gains*/ + cmp1 = L_shr( -DIRAC_GAIN_LIMIT_Q26, q_shift ); + cmp2 = L_negate( cmp1 ); FOR( ch_idx = nchan_transport_foa; ch_idx < num_channels_dir; ch_idx++ ) { FOR( l = 0; l < num_freq_bands; l++ ) { - g1 = Madd_32_32( POINT_3679_Q31, onset_filter[l], POINT_1175_Q31 - POINT_3679_Q31 ); // Q31, (Q31, Q31) -> Q31 - g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1 ), *( p_gains_dir ) ); // (Q31, p_gains_dir_q) -> p_gains_dir_q - g2 = L_add_sat( g2, Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth++ ) ) ) ); // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q - g2 = L_max( g2, W_extract_h( W_shl( W_mult_32_32( -DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev ) ), Q5 ) ) ); // p_gains_dir_q - g2 = L_min( g2, W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev ) ), Q5 ) ) ); // p_gains_dir_q - *( p_gains_dir++ ) = g2; // p_gains_dir_q + g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1[l] ), *( p_gains_dir ) ); // (Q31, p_gains_dir_q) -> p_gains_dir_q + g2 = L_add_sat( g2, Mpy_32_32( g1[l], ( *( p_cy_cross_dir_smooth++ ) ) ) ); // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q + g2 = L_max( g2, cmp1 ); // p_gains_dir_q + g2 = L_min( g2, cmp2 ); // p_gains_dir_q + *( p_gains_dir++ ) = g2; // p_gains_dir_q move32(); } } IF( hodirac_flag ) { - p_cy_cross_dir_smooth = h_dirac_output_synthesis_state.cy_cross_dir_smooth_fx + imult1616( num_freq_bands, num_channels_dir ); - p_gains_dir = h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx + imult1616( num_freq_bands, num_channels_dir ); + p_cy_cross_dir_smooth = h_dirac_output_synthesis_state.cy_cross_dir_smooth_fx + prod; + p_gains_dir = h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx + prod; /*Direct gains*/ FOR( ch_idx = 0; ch_idx < nchan_transport_foa; ch_idx++ ) @@ -1500,12 +1519,11 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( { FOR( l = 0; l < num_freq_bands; l++ ) { - g1 = Madd_32_32( POINT_3679_Q31, onset_filter[l], POINT_1175_Q31 - POINT_3679_Q31 ); // Q31, (Q31, Q31) -> Q31 - g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1 ), *( p_gains_dir ) ); // (Q31, p_gains_dir_q) -> p_gains_dir_q - g2 = L_add_sat( g2, Mpy_32_32( g1, ( *( p_cy_cross_dir_smooth++ ) ) ) ); // (p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q - g2 = L_max( g2, W_extract_h( W_shl( W_mult_32_32( -DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev ) ), Q5 ) ) ); // p_gains_dir_q - g2 = L_min( g2, W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev ) ), Q5 ) ) ); // p_gains_dir_q - *( p_gains_dir++ ) = g2; // p_gains_dir_q + g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1[l] ), *( p_gains_dir ) ); // (Q31, p_gains_dir_q) -> p_gains_dir_q + g2 = L_add_sat( g2, Mpy_32_32( g1[l], ( *( p_cy_cross_dir_smooth++ ) ) ) ); // (p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q + g2 = L_max( g2, cmp1 ); // p_gains_dir_q + g2 = L_min( g2, cmp2 ); // p_gains_dir_q + *( p_gains_dir++ ) = g2; // p_gains_dir_q move32(); } } @@ -1514,17 +1532,18 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( /*Diffuse gains*/ p_cy_auto_diff_smooth = h_dirac_output_synthesis_state.cy_auto_diff_smooth_fx + imult1616( nchan_transport_foa, num_freq_bands_diff ); p_gains_diff = h_dirac_output_synthesis_state.cy_auto_diff_smooth_prev_fx + imult1616( nchan_transport_foa, num_freq_bands_diff ); + g1[0] = POINT_1175_Q31; // Q31 + move32(); FOR( ch_idx = nchan_transport_foa; ch_idx < num_channels_diff; ch_idx++ ) { FOR( l = 0; l < num_freq_bands_diff; l++ ) { - g1 = POINT_1175_Q31; // Q31 move32(); - g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1 ), *( p_gains_diff ) ); // (Q31, p_gains_dir_q) -> p_gains_dir_q - g2 = L_add_sat( g2, Mpy_32_32( g1, ( *( p_cy_auto_diff_smooth++ ) ) ) ); // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q - g2 = L_max( g2, 0 ); // p_gains_diff_q - g2 = L_min( g2, W_extract_h( W_shl( W_mult_32_32( DIRAC_GAIN_LIMIT_Q26, L_shl( 1, h_dirac_output_synthesis_state.q_cy_auto_diff_smooth_prev ) ), Q5 ) ) ); // p_gains_diff_q - *( p_gains_diff++ ) = g2; // p_gains_diff_q + g2 = Mpy_32_32( L_sub( ONE_IN_Q31, g1[0] ), *( p_gains_diff ) ); // (Q31, p_gains_dir_q) -> p_gains_dir_q + g2 = L_add_sat( g2, Mpy_32_32( g1[0], ( *( p_cy_auto_diff_smooth++ ) ) ) ); // p_gains_diff_q, (Q31, p_gains_diff_q) -> p_gains_diff_q + g2 = L_max( g2, 0 ); // p_gains_diff_q + g2 = L_min( g2, cmp2 ); // p_gains_diff_q + *( p_gains_diff++ ) = g2; // p_gains_diff_q move32(); } } @@ -1535,8 +1554,9 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( FOR( buf_idx = 0; buf_idx < nbslots; ++buf_idx ) { - g1 = L_deposit_h( h_dirac_output_synthesis_params.interpolator_fx[buf_idx] ); // Q31 - g2 = L_sub( ONE_IN_Q31, g1 ); // Q31 + g1[0] = L_deposit_h( h_dirac_output_synthesis_params.interpolator_fx[buf_idx] ); // Q31 + move32(); + g2 = L_sub( ONE_IN_Q31, g1[0] ); // Q31 /*Direct input->output*/ p_gains_dir = h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx; // (p_gains_dir_q) @@ -1557,7 +1577,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( shl( ch_idx * num_freq_bands, Q1 ); FOR( l = 0; l < num_freq_bands; l++ ) { - g = Madd_32_32( Mpy_32_32( g1, ( *( p_gains_dir++ ) ) ), g2, ( *( p_gains_dir_prev++ ) ) ); // (Q31, p_gains_dir_q) -> (p_gains_dir_q) + g = Madd_32_32( Mpy_32_32( g1[0], ( *( p_gains_dir++ ) ) ), g2, ( *( p_gains_dir_prev++ ) ) ); // (Q31, p_gains_dir_q) -> (p_gains_dir_q) output_real[l * num_channels_dir + ch_idx] = Mpy_32_32( g, ( *( p_proto_diff++ ) ) ); // (p_gains_dir_q, p_proto_diff_q) -> (p_gains_dir_q + p_proto_diff_q - 31) move32(); @@ -1583,25 +1603,31 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( shl( i_mult( proto_direct_index[1], num_freq_bands ), Q1 ); FOR( l = 0; l < num_freq_bands; l++ ) { - gs1 = Madd_32_32( Mpy_32_32( g1, ( *( p_gains_dir ) ) ), g2, ( *( p_gains_dir_prev ) ) ); // (Q31, p_gains_dir_q) -> (p_gains_dir_q) - gs2 = Madd_32_32( Mpy_32_32( g1, ( *( p_gains_dir + imult1616( num_freq_bands, num_channels_dir ) ) ) ), g2, ( *( p_gains_dir_prev + imult1616( num_freq_bands, num_channels_dir ) ) ) ); // (Q31, p_gains_dir_q) -> (p_gains_dir_q) + + Word32 temp1, temp2; + gs1 = Madd_32_32( Mpy_32_32( g1[0], ( *( p_gains_dir ) ) ), g2, ( *( p_gains_dir_prev ) ) ); // (Q31, p_gains_dir_q) -> (p_gains_dir_q) + gs2 = Madd_32_32( Mpy_32_32( g1[0], ( *( p_gains_dir + prod ) ) ), g2, ( *( p_gains_dir_prev + prod ) ) ); // (Q31, p_gains_dir_q) -> (p_gains_dir_q) p_gains_dir++; p_gains_dir_prev++; + temp1 = Mpy_32_32( 1903158016 /* 1.772454e+00f / 2 in Q31 */, ( *p_proto ) ); + temp2 = Mpy_32_32( 1098788992 /* 1.023327e+00f / 2 in Q31 */, ( *p_proto2 ) ); // ((p_gains_dir_q, p_proto_dir_q) >> 1) -> (p_gains_dir_q + p_proto_dir_q - 31) output_real[l * num_channels_dir + ch_idx] = Madd_32_32( - Mpy_32_32( gs1, ( L_add( Mpy_32_32( 1903158016 /* 1.772454e+00f / 2 in Q31 */, ( *p_proto ) ), Mpy_32_32( 1098788992 /* 1.023327e+00f / 2 in Q31 */, ( *p_proto2 ) ) ) ) ), /* s1 */ - gs2, L_sub( Mpy_32_32( 1903158016 /* 1.772454e+00f / 2 in Q31 */, ( *p_proto ) ), Mpy_32_32( 1098788992 /* 1.023327e+00f / 2 in Q31 */, ( *p_proto2 ) ) ) ); /* s2 */ + Mpy_32_32( gs1, ( L_add( temp1, temp2 ) ) ), /* s1 */ + gs2, L_sub( temp1, temp2 ) ); /* s2 */ move32(); p_proto++; p_proto2++; + temp1 = Mpy_32_32( 1903158016 /* 1.772454e+00f / 2 in Q31 */, ( *p_proto ) ); + temp2 = Mpy_32_32( 1098788992 /* 1.023327e+00f / 2 in Q31 */, ( *p_proto2 ) ); // ((p_gains_dir_q, p_proto_dir_q) >> 1) -> (p_gains_dir_q + p_proto_dir_q - 31) output_imag[l * num_channels_dir + ch_idx] = Madd_32_32( - Mpy_32_32( gs1, ( L_add( Mpy_32_32( 1903158016 /* 1.772454e+00f / 2 in Q31 */, ( *p_proto ) ), Mpy_32_32( 1098788992 /* 1.023327e+00f / 2 in Q31 */, ( *p_proto2 ) ) ) ) ), - gs2, L_sub( Mpy_32_32( 1903158016 /* 1.772454e+00f / 2 in Q31 */, ( *p_proto ) ), Mpy_32_32( 1098788992 /* 1.023327e+00f / 2 in Q31 */, ( *p_proto2 ) ) ) ); + Mpy_32_32( gs1, ( L_add( temp1, temp2 ) ) ), + gs2, L_sub( temp1, temp2 ) ); move32(); p_proto++; p_proto2++; @@ -1612,14 +1638,15 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( p_proto = h_dirac_output_synthesis_state.proto_direct_buffer_f_fx + shl( i_mult( buf_idx, i_mult( num_freq_bands, num_protos_dir ) ), Q1 ) + shl( i_mult( proto_direct_index[ch_idx], num_freq_bands ), Q1 ); + Word16 diff = sub( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 31 ); FOR( l = 0; l < num_freq_bands; l++ ) { p_gains_dir++; p_gains_dir_prev++; - output_real[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), sub( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 31 ) ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31) + output_real[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), diff ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31) move32(); - output_imag[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), sub( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 31 ) ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31) + output_imag[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), diff ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31) move32(); } } @@ -1633,7 +1660,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( { FOR( l = 0; l < num_freq_bands; l++ ) { - g = Madd_32_32( Mpy_32_32( g1, ( *( p_gains_dir++ ) ) ), g2, ( *( p_gains_dir_prev++ ) ) ); // (Q31, p_gains_dir_q) -> (p_gains_dir_q) + g = Madd_32_32( Mpy_32_32( g1[0], ( *( p_gains_dir++ ) ) ), g2, ( *( p_gains_dir_prev++ ) ) ); // (Q31, p_gains_dir_q) -> (p_gains_dir_q) output_real[l * num_channels_dir + ch_idx] = Mpy_32_32( g, ( *( p_proto++ ) ) ); // (p_gains_dir_q, p_proto_dir_q) -> (p_gains_dir_q + p_proto_dir_q - 31) move32(); @@ -1643,14 +1670,15 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( } ELSE { + Word16 shift_q = sub( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 31 ); FOR( l = 0; l < num_freq_bands; l++ ) { p_gains_dir++; p_gains_dir_prev++; - output_real[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), sub( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 31 ) ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31) + output_real[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), shift_q ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31) move32(); - output_imag[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), sub( h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev, 31 ) ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31) + output_imag[l * num_channels_dir + ch_idx] = L_shl( *( p_proto++ ), shift_q ); // p_proto_dir_q -> (p_gains_dir_q + p_proto_dir_q - 31) move32(); } } @@ -1681,7 +1709,7 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( ch_idx_diff = add( ch_idx_diff, 1 ); FOR( l = 0; l < num_freq_bands_diff; l++ ) { - g = Madd_32_32( Mpy_32_32( g1, ( *( p_gains_diff++ ) ) ), g2, ( *( p_gains_diff_prev++ ) ) ); // (Q31, p_gains_diff_q) -> p_gains_diff_q + g = Madd_32_32( Mpy_32_32( g1[0], ( *( p_gains_diff++ ) ) ), g2, ( *( p_gains_diff_prev++ ) ) ); // (Q31, p_gains_diff_q) -> p_gains_diff_q // ((p_gains_diff_q, p_proto_diff_q) >> Q1) -> (p_gains_diff_q + p_proto_diff_q - 31) output_real[l * num_channels_dir + hDirACRend->sba_map_tc[ch_idx]] = @@ -1765,11 +1793,11 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( /* store estimates for next synthesis block */ IF( hodirac_flag ) { - Copy32( h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx, h_dirac_output_synthesis_state.gains_dir_prev_fx, imult1616( num_freq_bands, num_channels_dir ) * DIRAC_HO_NUMSECTORS ); /*h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev*/ + Copy32( h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx, h_dirac_output_synthesis_state.gains_dir_prev_fx, prod * DIRAC_HO_NUMSECTORS ); /*h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev*/ } ELSE { - Copy32( h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx, h_dirac_output_synthesis_state.gains_dir_prev_fx, imult1616( num_freq_bands, num_channels_dir ) ); /*h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev*/ + Copy32( h_dirac_output_synthesis_state.cy_cross_dir_smooth_prev_fx, h_dirac_output_synthesis_state.gains_dir_prev_fx, prod ); /*h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev*/ } *q_cy_cross_dir_smooth_prev = h_dirac_output_synthesis_state.q_cy_cross_dir_smooth_prev; move16(); @@ -1781,11 +1809,11 @@ void ivas_dirac_dec_output_synthesis_process_subframe_gain_shd_fx( /* reset values */ IF( hodirac_flag ) { - set_zero_fx( h_dirac_output_synthesis_state.cy_cross_dir_smooth_fx, imult1616( num_freq_bands, num_channels_dir ) * DIRAC_HO_NUMSECTORS ); + set_zero_fx( h_dirac_output_synthesis_state.cy_cross_dir_smooth_fx, prod * DIRAC_HO_NUMSECTORS ); } ELSE { - set_zero_fx( h_dirac_output_synthesis_state.cy_cross_dir_smooth_fx, imult1616( num_freq_bands, num_channels_dir ) ); + set_zero_fx( h_dirac_output_synthesis_state.cy_cross_dir_smooth_fx, prod ); } set_zero_fx( h_dirac_output_synthesis_state.cy_auto_diff_smooth_fx, imult1616( num_freq_bands_diff, num_channels_diff ) ); diff --git a/lib_rend/ivas_dirac_rend.c b/lib_rend/ivas_dirac_rend.c index 1982862bf..ea507b4c7 100644 --- a/lib_rend/ivas_dirac_rend.c +++ b/lib_rend/ivas_dirac_rend.c @@ -1444,15 +1444,15 @@ void protoSignalComputation_shd_fx( reference_power_fx[l] = L_shr( reference_power_fx[l + num_freq_bands], 1 ); /*2*Q(q_cldfb+min_q_shift)-31-1*/ move32(); + re1 = L_shl( RealBuffer_fx[1][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/ + re2 = L_shl( RealBuffer_fx[2][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/ + re3 = L_shl( RealBuffer_fx[3][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/ + im1 = L_shl( ImagBuffer_fx[1][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/ + im2 = L_shl( ImagBuffer_fx[2][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/ + im3 = L_shl( ImagBuffer_fx[3][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/ + FOR( k = 1; k < 4; k++ ) { - re1 = L_shl( RealBuffer_fx[1][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/ - re2 = L_shl( RealBuffer_fx[2][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/ - re3 = L_shl( RealBuffer_fx[3][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/ - im1 = L_shl( ImagBuffer_fx[1][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/ - im2 = L_shl( ImagBuffer_fx[2][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/ - im3 = L_shl( ImagBuffer_fx[3][0][l], min_q_shift ); /*Q(q_cldfb+min_q_shift)*/ - idx = i_mult( 3, Rmat_k[k] ); idx1 = add( l, i_mult( add( k, 1 ), num_freq_bands ) ); @@ -1477,13 +1477,16 @@ void protoSignalComputation_shd_fx( } *proto_direct_buffer_f_q = add( q_cldfb, min_q_shift ); + move16(); *reference_power_q = sub( add( *proto_direct_buffer_f_q, *proto_direct_buffer_f_q ), 31 ); + move16(); + Word16 shift = sub( *proto_direct_buffer_f_q, q_cldfb ); FOR( k = 1; k < 4; k++ ) { - RealBuffer_fx[k][0][l] = L_shr( p_proto_direct_buffer_fx[2 * ( k * num_freq_bands + l )], sub( *proto_direct_buffer_f_q, q_cldfb ) ); // proto_direct_buffer_f_q -> q_cldfb + RealBuffer_fx[k][0][l] = L_shr( p_proto_direct_buffer_fx[2 * ( k * num_freq_bands + l )], shift ); // proto_direct_buffer_f_q -> q_cldfb move32(); - ImagBuffer_fx[k][0][l] = L_shr( p_proto_direct_buffer_fx[2 * ( k * num_freq_bands + l ) + 1], sub( *proto_direct_buffer_f_q, q_cldfb ) ); // proto_direct_buffer_f_q -> q_cldfb + ImagBuffer_fx[k][0][l] = L_shr( p_proto_direct_buffer_fx[2 * ( k * num_freq_bands + l ) + 1], shift ); // proto_direct_buffer_f_q -> q_cldfb move32(); } } diff --git a/lib_rend/ivas_efap.c b/lib_rend/ivas_efap.c index 7d47f3174..0bf3b85a4 100644 --- a/lib_rend/ivas_efap.c +++ b/lib_rend/ivas_efap.c @@ -2479,7 +2479,7 @@ static Word16 in_tri_fx( Word16 tmp16, tmp_e; Word64 S[2]; /* Threshold adjusted */ - Word64 thresh_int = 4295; // 1e-6f in Q32 + Word64 thresh_int = 35184640; // 1e-6f in Q45 move64(); /* @@ -2490,7 +2490,7 @@ static Word16 in_tri_fx( */ v_sub_fixed( B, A, tmpDot1, 2, 0 ); // tmpDot1 q22 - v_sub_fixed( C, A, tmpDot2, 2, 0 ); // tmpDot 2q22 + v_sub_fixed( C, A, tmpDot2, 2, 0 ); // tmpDot2 q22 /* Verification of the non-colinearity : Q22 * Q22 = Q13 */ invFactor = L_sub( Mpy_32_32( tmpDot1[0], tmpDot2[1] ), Mpy_32_32( tmpDot1[1], tmpDot2[0] ) ); /*q22+q22-q31->q13*/ @@ -2516,68 +2516,18 @@ static Word16 in_tri_fx( move32(); matInv[1][1] = Mpy_32_32( tmpDot1[0], invFactor ); // q=22+invFactor_exp move32(); - - /* Computing S (Q13 + matInv_exp_final[i] + P_minus_A_exp_final + invFactor_exp - 1 ) = - matInv (Q22 + matInv_exp_final[i] + invFactor_exp) *(P-A) (Q22 + P_minus_A_exp_final) */ - Word16 matInv_exp[2][2], P_minus_A_exp[2]; - Word16 matInv_exp_final[2], P_minus_A_exp_final; - FOR( Word32 i = 0; i < 2; i++ ) - { - FOR( Word32 j = 0; j < 2; j++ ) - { - matInv_exp[i][j] = 31; - move16(); - IF( matInv[i][j] != 0 ) - { - matInv_exp[i][j] = norm_l( matInv[i][j] ); - move16(); - } - } - matInv_exp_final[i] = s_min( matInv_exp[i][0], matInv_exp[i][1] ); - move16(); - P_minus_A_exp[i] = 31; - move16(); - IF( P_minus_A[i] != 0 ) - { - P_minus_A_exp[i] = norm_l( P_minus_A[i] ); - move16(); - } - } - P_minus_A_exp_final = s_min( P_minus_A_exp[0], P_minus_A_exp[1] ); - - S[0] = L_add( L_shr( Mpy_32_32( L_shl( matInv[0][0], matInv_exp_final[0] ), L_shl( P_minus_A[0], P_minus_A_exp_final ) ), Q1 ), - L_shr( Mpy_32_32( L_shl( matInv[0][1], matInv_exp_final[0] ), L_shl( P_minus_A[1], P_minus_A_exp_final ) ), Q1 ) ); //(22+invFactor_exp+matInv_exp_final[0]+22+P_minus_A_exp_final-1)-31=>12+invFactor_exp+matInv_exp_final[0]+P_minus_A_exp_final + S[0] = W_mac_32_32( W_mult_32_32( matInv[0][0], P_minus_A[0] ), matInv[0][1], P_minus_A[1] ); // Q22+invFactor_exp +Q22 + 1 move64(); - S[1] = L_add( L_shr( Mpy_32_32( L_shl( matInv[1][0], matInv_exp_final[1] ), L_shl( P_minus_A[0], P_minus_A_exp_final ) ), Q1 ), - L_shr( Mpy_32_32( L_shl( matInv[1][1], matInv_exp_final[1] ), L_shl( P_minus_A[1], P_minus_A_exp_final ) ), Q1 ) ); //(22+invFactor_exp+matInv_exp_final[1]+22+P_minus_A_exp_final-1)-31=>12+invFactor_exp+matInv_exp_final[0]+P_minus_A_exp_final + S[0] = W_shr( S[0], invFactor_exp ); // q45 + move64(); + S[1] = W_mac_32_32( W_mult_32_32( matInv[1][0], P_minus_A[0] ), matInv[1][1], P_minus_A[1] ); // Q22+invFactor_exp +Q22 + 1 + move64(); + S[1] = W_shr( S[1], invFactor_exp ); // q45 move64(); - - /* Checking if we are in the triangle; For the theory, check Christian Borss article, section 3.2 */ - // Q32 S - IF( sub( sub( sub( Q20, matInv_exp_final[0] ), P_minus_A_exp_final ), invFactor_exp ) < 0 ) - { - S[0] = W_shr( S[0], sub( add( add( matInv_exp_final[0], P_minus_A_exp_final ), invFactor_exp ), Q20 ) ); // q32 - move64(); - } - ELSE - { - S[0] = W_shl( S[0], sub( sub( sub( Q20, matInv_exp_final[0] ), P_minus_A_exp_final ), invFactor_exp ) ); // q32 - move64(); - } - IF( sub( sub( sub( Q20, matInv_exp_final[1] ), P_minus_A_exp_final ), invFactor_exp ) < 0 ) - { - S[1] = W_shr( S[1], sub( add( add( matInv_exp_final[1], P_minus_A_exp_final ), invFactor_exp ), Q20 ) ); // q32 - move64(); - } - ELSE - { - S[1] = W_shl( S[1], sub( sub( sub( Q20, matInv_exp_final[1] ), P_minus_A_exp_final ), invFactor_exp ) ); // q32 - move64(); - } test(); test(); - IF( LT_64( S[0], -thresh_int ) || LT_64( S[1], -thresh_int ) || GT_64( W_add( S[0], S[1] ), W_add( W_shl( 1, 32 ), thresh_int ) ) ) + IF( LT_64( S[0], -thresh_int ) || LT_64( S[1], -thresh_int ) || GT_64( W_add( S[0], S[1] ), W_add( ONE_IN_Q45, thresh_int ) ) ) { return 0; } -- GitLab