diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index 80bdda3c2e6fd4ce43204716ae9aa789eded3be5..132562045136e7e75c4090b47b35ba8fbe1876bb 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -4995,6 +4995,15 @@ void ivas_dirac_dec_get_response_fx( const Word16 ambisonics_order, Word16 Q_out ); +#ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx +/*This is a derivate to ivas_dirac_dec_get_response_fx with fixed Q_out=29*/ +void ivas_dirac_dec_get_response_fx_29( + const Word16 azimuth, + const Word16 elevation, + Word32 *response_fx, /*Q_out=29*/ + const Word16 ambisonics_order ); +#endif + void calculate_hodirac_sector_parameters_fx( DIRAC_ENC_HANDLE hDirAC, /* i : DirAC handle */ Word32 RealBuffer_fx[DIRAC_MAX_ANA_CHANS][DIRAC_NO_FB_BANDS_MAX], /* i : signal vector (L+1)^2 x N_bins, real part */ diff --git a/lib_com/ivas_spar_com.c b/lib_com/ivas_spar_com.c index 0ebae0de678aacc93972eab223eefd9cb9908cf3..842a34743cb93762a9e4b776240270124d6f628d 100644 --- a/lib_com/ivas_spar_com.c +++ b/lib_com/ivas_spar_com.c @@ -7173,6 +7173,133 @@ void ivas_dirac_dec_get_response_fx( return; } +#ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx +/*This is a derivate to ivas_dirac_dec_get_response_fx with fixed Q_out=29*/ +void ivas_dirac_dec_get_response_fx_29( + const Word16 azimuth, + const Word16 elevation, + Word32 *response_fx, /*Q_out*/ + const Word16 ambisonics_order ) +{ + Word16 index_azimuth, index_elevation; + Word16 el, az; + Word32 cos_1_fx, cos_2_fx, sin_1_fx, cos_az_fx[3]; + Word32 sin_az_fx[3]; + Word32 f_fx; + Word32 c_fx_better; + Word16 l, m; + Word16 b, b1, b_2, b1_2; + // Word16 Q_out = 29; + + index_azimuth = add( azimuth, 180 ) % 360; + move16(); + index_elevation = add( elevation, 90 ); + + Word32 e_fac = L_add( 0x7FFFFFFF, 0 ); + + if ( GT_16( index_elevation, 90 ) ) + { + e_fac = L_add( 0x80000000, 0 ); + } + + + el = index_elevation; + move16(); + + if ( GT_16( index_elevation, 90 ) ) + { + el = sub( 180, index_elevation ); + } + + az = index_azimuth; + move16(); + + if ( GT_16( index_azimuth, 180 ) ) + { + az = sub( 360, index_azimuth ); + } + + f_fx = 1; + move16(); + + if ( GT_16( index_azimuth, 180 ) ) + { + f_fx = -1; + } + + cos_1_fx = L_shr( dirac_gains_trg_term_fx[az][0], 1 ); // q30 + cos_2_fx = L_shl( Mpy_32_32( cos_1_fx, cos_1_fx ), 1 ); // q30 + sin_1_fx = L_shr( dirac_gains_trg_term_fx[az][1], 1 ); // q30 + + if ( EQ_32( f_fx, -1 ) ) + { + sin_1_fx = L_negate( sin_1_fx ); // q30 + } + cos_az_fx[0] = cos_1_fx; // q30 + move32(); + cos_az_fx[1] = L_shl( L_sub( cos_2_fx, ONE_IN_Q29 /*0.5 q30*/ ), 1 ); /*q30*/ + move32(); + cos_az_fx[2] = L_sub( L_shl( Mpy_32_32( cos_1_fx, cos_az_fx[1] ), 2 ), cos_az_fx[0] /* cos_az_fx[0] q30*/ ); /*q30*/ + move32(); + sin_az_fx[0] = sin_1_fx; /*q30*/ + move32(); + sin_az_fx[1] = L_shl( Mpy_32_32( sin_1_fx, cos_1_fx ), 2 ); /*q30*/ + move32(); + sin_az_fx[2] = L_shl( Mpy_32_32( sin_1_fx, L_sub( cos_2_fx, ONE_IN_Q28 /*1/4 q30*/ ) ), 3 ); /*q30*/ + move32(); + + response_fx[0] = 0x20000000; + move32(); + + FOR( l = 1; l <= ambisonics_order; l++ ) + { + Word16 a; + b_2 = imult1616( l, l ); + b1_2 = add( b_2, shl( l, 1 ) ); + FOR( m = 0; m < l; m += 2 ) + { + b = b_2 + m; + a = dirac_gains_P_idx[b]; + + c_fx_better = local_result_table[el][a]; // q30 + move32(); + response_fx[b] = Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ); // Q_out + move32(); + + b1 = b1_2 - m; + response_fx[b1] = Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ); // Q_out + move32(); + } + + FOR( m = 1; m < l; m += 2 ) + { + b = b_2 + m; + a = dirac_gains_P_idx[b]; + c_fx_better = local_result_table[el][a]; // q30 + move32(); + c_fx_better = Mpy_32_32( c_fx_better, e_fac ); // q30 + response_fx[b] = Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ); // Q_out + move32(); + + b1 = b1_2 - m; + response_fx[b1] = Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ); // Q_out + move32(); + } + + b = add( b_2, l ); + a = dirac_gains_P_idx[b]; + c_fx_better = local_result_table_2[el][a]; // q30 + move32(); + if ( s_and( l, 0x01 ) ) + { + c_fx_better = Mpy_32_32( c_fx_better, e_fac ); // q30 + } + response_fx[b] = L_shl( c_fx_better, -1 ); // Q_out + move32(); + } + return; +} +#endif /*FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx*/ /*-----------------------------------------------------------------------------------------* * Function ivas_get_bits_to_encode * diff --git a/lib_com/options.h b/lib_com/options.h index d3570004d0ed950bbe6677bedf3af9376621fafa..e3727ea26c43fce6ad940950c6b9ba0f828f9d4b 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -173,4 +173,6 @@ #define NONBE_FIX_1277_EVS_DTX_HIGH_RATE_THRESHOLD /* VA/Eri: FLP issue 1277: Fix Mismatch in DTX high-rate threshold between EVS float and BASOP */ #define NONBE_FIX_708_OSBA_BR_SWITCHING_CRASH /* FhG: issue 708: fix crash in OSBA BR switching with long test vectors */ //#define OPT_STEREO_32KBPS_V1 /* Optimization made in stereo decoding path for 32kbps decoding */ +#define FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, nonbe*/ +#define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, nonbe*/ #endif diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index d6c999aa0becbd126f532f07fffd4a1741e3140c..ddd568698d74c004ccd9be83493db83f23fccb4b 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -906,6 +906,74 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } } c = Madd_32_16( ONE_IN_Q27 /*1 Q27*/, L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_fx, ONE_IN_Q27 /*1 Q27*/ ), 5461 ); /*Diffuseness modellling nrg compensation*/ /* 1.0 / 6.0 = 5461 in Q15*/ /*Q27*/ +#ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot + FOR( ; k < num_freq_bands; k++ ) + { + a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses + move32(); + IF( reference_power[k + num_freq_bands] == 0 ) + { + sqr_inp = Mpy_32_32( diffuseness[k], c ); + sqr_exp = sub( 31 + 4, q_diffuseness ); + } + ELSE + { + Word16 diff_c_exp; + Word16 diff_aab_exp; + IF( reference_power[k + ( ch_idx + 1 ) * num_freq_bands] == 0 ) + { + mpy_a_a_b = Mpy_32_32( a, a ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 + mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 + mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 + diff_aab_exp = sub( 31 + 62, add( h_dirac_output_synthesis_state->direct_responses_q, add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ) ) ); + diff_c_exp = sub( 31 + 4, q_diffuseness ); + + sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, diff_c_exp, mpy_diff_aab, diff_aab_exp, &sqr_exp ); /*q(31-sqr_exp)*/ + } + ELSE + { + b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], reference_power[k + ( ch_idx + 1 ) * num_freq_bands], &b_exp ); /*q(15-b_exp)*/ + + mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 + mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 + mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 + diff_aab_exp = sub( sub( add( sub( 31 + 62, h_dirac_output_synthesis_state->direct_responses_q ), b_exp ), h_dirac_output_synthesis_state->direct_responses_q ), q_diffuseness ); + diff_c_exp = sub( 31 + 4, q_diffuseness ); + + sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, diff_c_exp, mpy_diff_aab, diff_aab_exp, &sqr_exp ); /*q(31-sqr_exp)*/ + } + } + sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ + sqr = L_shr( sqr, 2 ); /*Q(31-sqr_exp)*/ + + + IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 ) + { + IF( LT_16( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ) + { + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_shr( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, sub( 31, sqr_exp ) ) ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth->Q( 31- sqr_exp )*/ + move32(); + Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); + move16(); + } + ELSE + { + sqr = L_shr( sqr, sub( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ); /*Q(31- sqr_exp)->h_dirac_output_synthesis_state->q_cy_cross_dir_smooth*/ + Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = h_dirac_output_synthesis_state->q_cy_cross_dir_smooth; + move16(); + } + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k]*/ + move32(); + } + ELSE + { + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q(31- sqr_exp)*/ + move32(); + Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); + move16(); + } + } +#else FOR( ; k < num_freq_bands; k++ ) { a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses @@ -936,7 +1004,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 - q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q + sub( sub( 15, b_exp ), 15 ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); + q_diff_aab = add( add( h_dirac_output_synthesis_state->direct_responses_q, sub( sub( 15, b_exp ), 15 ) ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); q_diff_c = sub( q_diffuseness, 4 ); test(); @@ -987,6 +1055,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } +#endif } ELSE { @@ -3049,11 +3118,19 @@ void ivas_dirac_dec_compute_directional_responses_fx( } ELSE { +#ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx + ivas_dirac_dec_get_response_fx_29( azimuth[k], elevation[k], direct_response_hoa_fx, hDirACRend->hOutSetup.ambisonics_order ); +#else ivas_dirac_dec_get_response_fx( azimuth[k], elevation[k], direct_response_hoa_fx, hDirACRend->hOutSetup.ambisonics_order, Q_direct_response_hoa ); +#endif IF( hodirac_flag ) { +#ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx + ivas_dirac_dec_get_response_fx_29( azimuth2[k], elevation2[k], direct_response_dir2_fx, hDirACRend->hOutSetup.ambisonics_order ); +#else ivas_dirac_dec_get_response_fx( azimuth2[k], elevation2[k], direct_response_dir2_fx, hDirACRend->hOutSetup.ambisonics_order, Q_direct_response_dir2 ); +#endif } }