From 8d14bce5907700aa296db9634ad20e093adc309f Mon Sep 17 00:00:00 2001 From: ber Date: Thu, 6 Feb 2025 13:03:40 +0100 Subject: [PATCH 01/13] some push/pop wmops added, also addedd FIX_1072_SPEEDUP_gainpanning and FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB active --- lib_com/ivas_dirac_com.c | 51 +++++++++++++ lib_com/options.h | 6 +- lib_dec/ivas_dirac_dec.c | 23 ++++-- lib_rend/ivas_dirac_output_synthesis_dec.c | 89 +++++++++++++++++++++- 4 files changed, 160 insertions(+), 9 deletions(-) diff --git a/lib_com/ivas_dirac_com.c b/lib_com/ivas_dirac_com.c index ebd958e1b..796fc8960 100644 --- a/lib_com/ivas_dirac_com.c +++ b/lib_com/ivas_dirac_com.c @@ -960,6 +960,7 @@ void computeDiffuseness_fixed( q_intensity = add( q_factor_intensity[0], min_q_shift2 ); move16(); + push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness B <<-|" ); FOR( i = 0; i < DIRAC_NO_COL_AVG_DIFF; ++i ) { /* Energy slow */ @@ -967,6 +968,28 @@ void computeDiffuseness_fixed( q_tmp = add( q_factor_energy[i], min_q_shift1 ); + +#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB + Word16 shift_q = sub( q_tmp, q_ene ); + Word32 shiftEquiv; + Word16 shift_qtotal; + if( shift_q < 0 ) + { + shiftEquiv = L_lshl( 0x80000000, shift_q ); + shift_qtotal = sub( min_q_shift1, 0 ); + } + if( shift_q >= 0 ) + { + shiftEquiv = L_add( 0x7FFFFFFF, 0 ); + shift_qtotal = sub( min_q_shift1, shift_q ); + } + FOR( k = 0; k < num_freq_bands; k++ ) + { + tmp = L_shl( p_tmp_c[k], shift_qtotal ); + energy_slow[k] = Madd_32_32_r( tmp, energy_slow[k], shiftEquiv ); + move32(); + } +#else Word16 shift_q = sub( q_tmp, q_ene ); IF( shift_q < 0 ) { @@ -986,6 +1009,9 @@ void computeDiffuseness_fixed( move32(); } } +#endif + + q_ene = s_min( q_ene, q_tmp ); @@ -993,6 +1019,28 @@ void computeDiffuseness_fixed( q_tmp = add( q_factor_intensity[i], min_q_shift2 ); shift_q = sub( q_tmp, q_intensity ); +#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB + if( shift_q >= 0 ) + { + shiftEquiv = L_lshl( 0x7FFFFFFF, 0 ); + shift_qtotal = sub( min_q_shift2, shift_q ); + } + if ( shift_q < 0 ) + { + shiftEquiv = L_lshl( 0x80000000, shift_q ); + shift_qtotal = sub( min_q_shift2, 0 ); + } + FOR( j = 0; j < DIRAC_NUM_DIMS; ++j ) + { + p_tmp = buffer_intensity[j][i]; + FOR( k = 0; k < num_freq_bands; k++ ) + { + tmp = L_shl( p_tmp[k], shift_qtotal ); + intensity_slow[j * num_freq_bands + k] = Madd_32_32_r( tmp, intensity_slow[j * num_freq_bands + k], shiftEquiv ); + move32(); + } + } +#else IF( shift_q > 0 ) { FOR( j = 0; j < DIRAC_NUM_DIMS; ++j ) @@ -1019,8 +1067,11 @@ void computeDiffuseness_fixed( } } } +#endif + q_intensity = s_min( q_intensity, q_tmp ); } + pop_wmops(); /*push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness B <<-|" );/*/ min_q_shift1 = getScaleFactor32( intensity_slow, i_mult( DIRAC_NUM_DIMS, num_freq_bands ) ); min_q_shift1 = sub( min_q_shift1, idiv1616( add( find_guarded_bits_fx( DIRAC_NUM_DIMS ), 1 ), 2 ) ); diff --git a/lib_com/options.h b/lib_com/options.h index fa1fd8f4e..5dfebfaa6 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -56,7 +56,7 @@ #define SUPPORT_JBM_TRACEFILE /* Support for JBM tracefile, which is needed for 3GPP objective/subjective testing, but not relevant for real-world implementations */ -/*#define WMOPS*/ /* Activate complexity and memory counters */ +//#define WMOPS /* Activate complexity and memory counters */ #ifdef WMOPS /*#define WMOPS_PER_FRAME*/ /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */ /*#define WMOPS_DETAIL*/ /* Output detailed complexity printout for every function. Increases runtime overhead */ @@ -153,3 +153,7 @@ #define FIX_881_HILBERT_FILTER /* VA: improve the precision of the Hilbert filter to remove 2kHz unwanted tone */ #endif #define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ + + +#define FIX_1072_SPEEDUP_gainpanning /* FhG: WMOPS tuning, in development*/ +#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB /* "-" */ \ No newline at end of file diff --git a/lib_dec/ivas_dirac_dec.c b/lib_dec/ivas_dirac_dec.c index 95cca12a0..318a3ec55 100644 --- a/lib_dec/ivas_dirac_dec.c +++ b/lib_dec/ivas_dirac_dec.c @@ -2231,7 +2231,7 @@ void ivas_dirac_dec_render_sf_fx( move16(); Word16 tmp1; - push_wmops( "ivas_dirac_dec_render" ); + push_wmops( "ivas_dirac_dec_render (IDR)" ); /* Initialize aux buffers */ hDirAC = st_ivas->hDirAC; @@ -2341,6 +2341,7 @@ void ivas_dirac_dec_render_sf_fx( } ELSE IF( !( EQ_16( st_ivas->ivas_format, SBA_FORMAT ) || EQ_16( st_ivas->ivas_format, SBA_ISM_FORMAT ) ) ) { + push_wmops( "(IDR) SBA_FORMAT | SBA_ISM_FORMAT" ); Word16 outchannels; idx_lfe = 0; move16(); @@ -2409,6 +2410,7 @@ void ivas_dirac_dec_render_sf_fx( } } } + pop_wmops(); /*push_wmops( "(IDR) SBA_FORMAT | SBA_ISM_FORMAT" );*/ } size = imult1616( hDirACRend->num_outputs_dir, hSpatParamRendCom->num_freq_bands ); @@ -2555,7 +2557,7 @@ void ivas_dirac_dec_render_sf_fx( p_Rmat_fx = 0; move32(); } - + IF( ( hDirAC->hConfig->dec_param_estim == FALSE ) ) { Word16 *masa_band_mapping; @@ -2706,6 +2708,7 @@ void ivas_dirac_dec_render_sf_fx( } } + push_wmops( "(IDR) LOOP1" ); FOR( slot_idx = 0; slot_idx < hSpatParamRendCom->subframe_nbslots[subframe_idx]; slot_idx++ ) { index_slot = add( slot_idx_start, slot_idx ); @@ -2923,6 +2926,8 @@ void ivas_dirac_dec_render_sf_fx( move16(); BREAK; default: + pop_wmops(); /* push_wmops( "ivas_dirac_dec_render (IDR)" );*/ + pop_wmops(); /*push_wmops( "(IDR) LOOP1");/*/ return; } q_proto_direct_buffer[slot_idx] = hDirACRend->h_output_synthesis_psd_state.proto_direct_buffer_f_q; @@ -2932,6 +2937,7 @@ void ivas_dirac_dec_render_sf_fx( /*-----------------------------------------------------------------* * Compute DirAC parameters at decoder side *-----------------------------------------------------------------*/ + push_wmops( "(IDR) LOOP1 DirACparams |" ); IF( EQ_16( hDirAC->hConfig->dec_param_estim, TRUE ) ) { Copy( &hSpatParamRendCom->azimuth[md_idx][hDirAC->hConfig->enc_param_start_band], &azimuth[hDirAC->hConfig->enc_param_start_band], sub( hSpatParamRendCom->num_freq_bands, hDirAC->hConfig->enc_param_start_band ) ); @@ -2980,8 +2986,11 @@ void ivas_dirac_dec_render_sf_fx( hDirACRend->q_buffer_energy[index - 1] = DirAC_mem.reference_power_q; move16(); + push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness <-|" ); computeDiffuseness_fixed( hDirACRend->buffer_intensity_real_fx, hDirACRend->buffer_energy_fx, num_freq_bands, hSpatParamRendCom->diffuseness_vector_fx[md_idx], hDirACRend->q_buffer_intensity_real, hDirACRend->q_buffer_energy, &hSpatParamRendCom->q_diffuseness_vector ); + pop_wmops(); /*push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness <-|" );/*/ } + pop_wmops(); /* push_wmops( "(IDR) LOOP1 DirACparams |" );*/ /*-----------------------------------------------------------------* * frequency domain decorrelation @@ -3083,6 +3092,7 @@ void ivas_dirac_dec_render_sf_fx( } /*Compute PSDs*/ + push_wmops( "(IDR) LOOP1 PSDs |" ); h_dirac_output_synthesis_params = &( hDirACRend->h_output_synthesis_psd_params ); h_dirac_output_synthesis_state = &( hDirACRend->h_output_synthesis_psd_state ); num_channels_dir = hDirACRend->num_outputs_dir; @@ -3165,6 +3175,7 @@ void ivas_dirac_dec_render_sf_fx( } ELSE { + push_wmops( "(IDR) LOOP1 PSDs PATH3 <-|" ); ivas_dirac_dec_output_synthesis_process_slot_fx( reference_power_fx, DirAC_mem.reference_power_q, p_onset_filter_fx, @@ -3182,6 +3193,7 @@ void ivas_dirac_dec_render_sf_fx( md_idx, hodirac_flag, hDirAC->hConfig->dec_param_estim ); + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 <-|" );/*/ } IF( hDirAC->hConfig->dec_param_estim ) @@ -3252,7 +3264,9 @@ void ivas_dirac_dec_render_sf_fx( v_add_fixed( reference_power_fx, reference_power_smooth_fx, reference_power_smooth_fx, hSpatParamRendCom->num_freq_bands, 1 ); q_reference_power_smooth = sub( q_reference_power_smooth, 1 ); } + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs |" );*/ } + pop_wmops(); /*push_wmops( "(IDR) LOOP1" );*/ minimum_s( q_proto_direct_buffer, hSpatParamRendCom->subframe_nbslots[subframe_idx], &hDirACRend->h_output_synthesis_psd_state.proto_direct_buffer_f_q ); IF( EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) @@ -3581,7 +3595,6 @@ void ivas_dirac_dec_render_sf_fx( /*-----------------------------------------------------------------* * CLDFB synthesis (and binaural rendering) *-----------------------------------------------------------------*/ - index_slot = slot_idx_start_cldfb_synth; move16(); @@ -3963,6 +3976,7 @@ void ivas_dirac_dec_render_sf_fx( } } + hSpatParamRendCom->slots_rendered = add( hSpatParamRendCom->slots_rendered, hSpatParamRendCom->subframe_nbslots[subframe_idx] ); move16(); hSpatParamRendCom->subframes_rendered = add( hSpatParamRendCom->subframes_rendered, 1 ); @@ -4077,8 +4091,7 @@ void ivas_dirac_dec_render_sf_fx( } } } - - pop_wmops(); + pop_wmops(); /*push_wmops( "ivas_dirac_dec_render (IDR)" );*/ return; } diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 4686ca1e6..5b37ab7d9 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -710,11 +710,13 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } } + push_wmops( "(IDR) LOOP1 PSDs PATH3 B <<-|" ); test(); IF( dec_param_estim == FALSE && hodirac_flag ) { IF( EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) { + push_wmops( "(IDR) LOOP1 PSDs PATH3 B1<<<-|" ); v_multc_fixed( hSpatParamRendCom->energy_ratio1_fx[md_idx], -MAX_32 /*-1 Q31*/, aux_buf, num_freq_bands ); /* 30 + 31 - 31 -> 30 */ v_addc_fixed( aux_buf, ONE_IN_Q30 /*1 Q30*/, aux_buf, num_freq_bands ); /*30*/ Copy32( hSpatParamRendCom->energy_ratio1_fx[md_idx], @@ -737,19 +739,24 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); h_dirac_output_synthesis_state->direct_power_factor_q = 30; move16(); + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B1<<<-|" );/*/ } ELSE { + push_wmops( "(IDR) LOOP1 PSDs PATH3 B2<<<-|" ); ivas_dirac_dec_compute_gain_factors_fx( num_freq_bands, hSpatParamRendCom->diffuseness_vector_fx[md_idx], h_dirac_output_synthesis_state->direct_power_factor_fx, h_dirac_output_synthesis_state->diffuse_power_factor_fx, &h_dirac_output_synthesis_state->direct_power_factor_q, &h_dirac_output_synthesis_state->diffuse_power_factor_q ); + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B2<<<-|" );*/ } } ELSE IF( EQ_16( dec_param_estim, TRUE ) ) { + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3<<<-|" ); + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.1<<<<-|" ); /* compute direct responses */ ivas_dirac_dec_compute_directional_responses_fx( hSpatParamRendCom, hDirACRend, @@ -764,7 +771,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( sh_rot_max_order, p_Rmat, hodirac_flag ); - + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.1<<<<-|" );*/ { IF( h_dirac_output_synthesis_state->direct_responses_square_fx ) { @@ -811,12 +818,14 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( Q_temp_cy_cross_dir_smooth_fx[kk] = h_dirac_output_synthesis_state->q_cy_cross_dir_smooth; move16(); } - + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop <<<<-|" ); FOR( ch_idx = 0; ch_idx < s_min( 4, nchan_transport ); ch_idx++ ) { Word16 k; IF( ch_idx != 0 ) { + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF <<<<<-|" ); + ; Word32 a, c; Word16 b, b_exp, sqr_exp, q_diff_aab, q_diff_c; Word32 mpy_a_a_b, mpy_diff_c, mpy_diff_aab; @@ -906,6 +915,8 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } } c = Madd_32_16( ONE_IN_Q27 /*1 Q27*/, L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_fx, ONE_IN_Q27 /*1 Q27*/ ), 5461 ); /*Diffuseness modellling nrg compensation*/ /* 1.0 / 6.0 = 5461 in Q15*/ /*Q27*/ + + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF nfreqbds <<<<<<-|" ); FOR( ; k < num_freq_bands; k++ ) { a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses @@ -987,9 +998,12 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF nfreqbds <<<<<<-|" );*/ + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF <<<<<-|" );*/ } ELSE { + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE <<<<<-|" ); Word32 sqr_inp, mpy_diff, sqr; Word16 sqr_exp; /*Diffuseness modellling nrg compensation*/ @@ -1027,6 +1041,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE nfreqbds <<<<<<-|" ); FOR( ; k < num_freq_bands; k++ ) { mpy_diff = Mpy_32_32( diffuseness[k], L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_decorr_fx, ONE_IN_Q29 /*1 Q29*/ ) ); // Q = q_diffuseness - 1 @@ -1060,8 +1075,12 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE nfreqbds <<<<<<-|" );*/ + pop_wmops();/*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE <<<<<-|" );/*/ } } + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop <<<<-|" );/*/ + Word16 temp = MAX_16; /*q0*/ move16(); tmp16 = imult1616( num_freq_bands, num_channels_dir ); @@ -1078,9 +1097,21 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } free( Q_temp_cy_cross_dir_smooth_fx ); /*Directional gain (panning)*/ + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 gainpanning <<<<-|" ); + Word16 temp_q = sub( add( h_dirac_output_synthesis_state->direct_power_factor_q, h_dirac_output_synthesis_state->direct_responses_q ), 31 ); IF( LT_16( temp_q, h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ) { +#ifdef FIX_1072_SPEEDUP_gainpanning /*is there any difference in any bitstream?*/ + Word16 temp_q1 = sub( temp_q, h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ); + FOR( Word16 kk = 0; kk < tmp16; kk++ ) + { + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[kk] = L_shl( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[kk], temp_q1 ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ->temp_q*/ + move32(); + } + h_dirac_output_synthesis_state->q_cy_cross_dir_smooth = temp_q; + move16(); +#else FOR( Word16 kk = 0; kk < tmp16; kk++ ) { h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[kk] = L_shl( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[kk], sub( temp_q, h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ->temp_q*/ @@ -1088,7 +1119,53 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } h_dirac_output_synthesis_state->q_cy_cross_dir_smooth = temp_q; move16(); +#endif + } +#ifdef FIX_1072_SPEEDUP_gainpanning + Word16 temp_q1 = sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, temp_q ); + FOR( ch_idx = s_min( 4, nchan_transport ); ch_idx < num_channels_dir; ch_idx++ ) + { + IF( NE_16( temp_q, h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ) + { + Word16 i; + Word32 aux; + IF(temp_q1 < 0) + { + Word32 temp_q1_equiv = L_lshl( 0x80000000, temp_q1 ); + FOR( i = 0; i < num_freq_bands; i++ ) + { + aux = Mpy_32_32( h_dirac_output_synthesis_state->direct_power_factor_fx[i], h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + i] ); + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = Madd_32_32( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] , aux, temp_q1_equiv ); + move32(); + } + } + ELSE + { + FOR( i = 0; i < num_freq_bands; i++ ) + { + aux = Mpy_32_32( h_dirac_output_synthesis_state->direct_power_factor_fx[i], h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + i] ); + aux = L_shl( aux, temp_q1 ); + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i], aux ); + move32(); + } + } + + } + ELSE + { + Word16 i; + FOR( i = 0; i < num_freq_bands; i++ ) + { + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = Madd_32_32( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i], h_dirac_output_synthesis_state->direct_power_factor_fx[i], h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + i] ); + move32(); + } + } + + + } + +#else FOR( ch_idx = s_min( 4, nchan_transport ); ch_idx < num_channels_dir; ch_idx++ ) { v_mult_fixed( h_dirac_output_synthesis_state->direct_power_factor_fx, @@ -1107,6 +1184,9 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( num_freq_bands, 0 ); /*Q(h_dirac_output_synthesis_state->q_cy_cross_dir_smooth)*/ } +#endif + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 gainpanning <<<<-|" );*/ + /*Diffuse gain*/ FOR( ch_idx = s_min( 4, nchan_transport ); ch_idx < num_channels_diff; ch_idx++ ) { @@ -1124,7 +1204,8 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( &h_dirac_output_synthesis_state->cy_auto_diff_smooth_fx[ch_idx * num_freq_bands_diff], num_freq_bands_diff, 0 ); /*h_dirac_output_synthesis_state->q_cy_auto_diff_smooth*/ } - + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3 <<-|" );/*/ + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B <<-|" );/*/ return; } ELSE @@ -1143,7 +1224,9 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( h_dirac_output_synthesis_state->direct_power_factor_q = 31; move16(); } + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3<<<-|" );/*/ } + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B <<-|" );/*/ diff_start_band = 0; move16(); -- GitLab From 6a2e3a1e83a250a8e6748af2bf6ad963f403cb30 Mon Sep 17 00:00:00 2001 From: ber Date: Thu, 6 Feb 2025 15:14:20 +0100 Subject: [PATCH 02/13] some introduction to more tunings :: FIX_1072_SPEEDUP_output_synthesis_procSlot - inactive --- lib_com/options.h | 5 +++-- lib_rend/ivas_dirac_output_synthesis_dec.c | 13 +++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 5dfebfaa6..92144ade1 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -56,7 +56,7 @@ #define SUPPORT_JBM_TRACEFILE /* Support for JBM tracefile, which is needed for 3GPP objective/subjective testing, but not relevant for real-world implementations */ -//#define WMOPS /* Activate complexity and memory counters */ +/*#define WMOPS*/ /* Activate complexity and memory counters */ #ifdef WMOPS /*#define WMOPS_PER_FRAME*/ /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */ /*#define WMOPS_DETAIL*/ /* Output detailed complexity printout for every function. Increases runtime overhead */ @@ -156,4 +156,5 @@ #define FIX_1072_SPEEDUP_gainpanning /* FhG: WMOPS tuning, in development*/ -#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB /* "-" */ \ No newline at end of file +#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB /* "-" */ +//#define FIX_1072_SPEEDUP_output_synthesis_procSlot /* "-" */ \ No newline at end of file diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 5b37ab7d9..404f98b1b 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -942,7 +942,18 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], reference_power[k + ( ch_idx + 1 ) * num_freq_bands], &b_exp ); /*q(15-b_exp)*/ } } +#ifdef FIX_1072_SPEEDUP_output_synthesis_procSlot + q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q + sub( sub( 15, b_exp ), 15 ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); + q_diff_c = sub( q_diffuseness, 4 ); + + mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 + mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 + mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 + + /*Todo: simplify so that mpy+add can be merged to madd*/ + sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, sub( 31, q_diff_c ), mpy_diff_aab, sub( 31, q_diff_aab ), &sqr_exp ); /*q(31-sqr_exp)*/ +#else mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 @@ -970,6 +981,8 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( sqr_exp = sub( 31, q_diff_c ); /*q_diff_c*/ } } +#endif + sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ sqr = L_shr( sqr, 2 ); /*Q(31-sqr_exp)*/ IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 ) -- GitLab From b05d69a9edf078616466f69a705ceb5845b831c8 Mon Sep 17 00:00:00 2001 From: ber Date: Thu, 6 Feb 2025 15:16:18 +0100 Subject: [PATCH 03/13] cleaning up options.h --- lib_com/options.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 92144ade1..7e8912490 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -155,6 +155,5 @@ #define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ -#define FIX_1072_SPEEDUP_gainpanning /* FhG: WMOPS tuning, in development*/ -#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB /* "-" */ -//#define FIX_1072_SPEEDUP_output_synthesis_procSlot /* "-" */ \ No newline at end of file +#define FIX_1072_SPEEDUP_gainpanning /* FhG: Minor WMOPS tuning, nonbe */ +#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB /* FhG: Minor WMOPS tuning, nonbe */ -- GitLab From 0917f3f7b1c455ae3c6fedf4c4e0830415137fbe Mon Sep 17 00:00:00 2001 From: ber Date: Thu, 6 Feb 2025 15:17:58 +0100 Subject: [PATCH 04/13] some more cleaning --- lib_com/ivas_dirac_com.c | 4 ++-- lib_com/options.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib_com/ivas_dirac_com.c b/lib_com/ivas_dirac_com.c index 796fc8960..97c8a88bd 100644 --- a/lib_com/ivas_dirac_com.c +++ b/lib_com/ivas_dirac_com.c @@ -969,7 +969,7 @@ void computeDiffuseness_fixed( q_tmp = add( q_factor_energy[i], min_q_shift1 ); -#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB +#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESS Word16 shift_q = sub( q_tmp, q_ene ); Word32 shiftEquiv; Word16 shift_qtotal; @@ -1019,7 +1019,7 @@ void computeDiffuseness_fixed( q_tmp = add( q_factor_intensity[i], min_q_shift2 ); shift_q = sub( q_tmp, q_intensity ); -#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB +#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESS if( shift_q >= 0 ) { shiftEquiv = L_lshl( 0x7FFFFFFF, 0 ); diff --git a/lib_com/options.h b/lib_com/options.h index 7e8912490..b47825b32 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -156,4 +156,4 @@ #define FIX_1072_SPEEDUP_gainpanning /* FhG: Minor WMOPS tuning, nonbe */ -#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB /* FhG: Minor WMOPS tuning, nonbe */ +#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESS /* FhG: Minor WMOPS tuning, nonbe */ -- GitLab From 491400705b0984e8379706dfd004e883507edee1 Mon Sep 17 00:00:00 2001 From: ber Date: Thu, 6 Feb 2025 15:22:03 +0100 Subject: [PATCH 05/13] apply clang format patch --- lib_com/ivas_dirac_com.c | 7 +++---- lib_dec/ivas_dirac_dec.c | 4 ++-- lib_rend/ivas_dirac_output_synthesis_dec.c | 12 ++++-------- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/lib_com/ivas_dirac_com.c b/lib_com/ivas_dirac_com.c index 97c8a88bd..d03191084 100644 --- a/lib_com/ivas_dirac_com.c +++ b/lib_com/ivas_dirac_com.c @@ -973,12 +973,12 @@ void computeDiffuseness_fixed( Word16 shift_q = sub( q_tmp, q_ene ); Word32 shiftEquiv; Word16 shift_qtotal; - if( shift_q < 0 ) + if ( shift_q < 0 ) { shiftEquiv = L_lshl( 0x80000000, shift_q ); shift_qtotal = sub( min_q_shift1, 0 ); } - if( shift_q >= 0 ) + if ( shift_q >= 0 ) { shiftEquiv = L_add( 0x7FFFFFFF, 0 ); shift_qtotal = sub( min_q_shift1, shift_q ); @@ -1012,7 +1012,6 @@ void computeDiffuseness_fixed( #endif - q_ene = s_min( q_ene, q_tmp ); /* Intensity slow */ @@ -1020,7 +1019,7 @@ void computeDiffuseness_fixed( shift_q = sub( q_tmp, q_intensity ); #ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESS - if( shift_q >= 0 ) + if ( shift_q >= 0 ) { shiftEquiv = L_lshl( 0x7FFFFFFF, 0 ); shift_qtotal = sub( min_q_shift2, shift_q ); diff --git a/lib_dec/ivas_dirac_dec.c b/lib_dec/ivas_dirac_dec.c index 318a3ec55..81429927a 100644 --- a/lib_dec/ivas_dirac_dec.c +++ b/lib_dec/ivas_dirac_dec.c @@ -2557,7 +2557,7 @@ void ivas_dirac_dec_render_sf_fx( p_Rmat_fx = 0; move32(); } - + IF( ( hDirAC->hConfig->dec_param_estim == FALSE ) ) { Word16 *masa_band_mapping; @@ -2926,7 +2926,7 @@ void ivas_dirac_dec_render_sf_fx( move16(); BREAK; default: - pop_wmops(); /* push_wmops( "ivas_dirac_dec_render (IDR)" );*/ + pop_wmops(); /* push_wmops( "ivas_dirac_dec_render (IDR)" );*/ pop_wmops(); /*push_wmops( "(IDR) LOOP1");/*/ return; } diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 404f98b1b..3f4111253 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -1089,7 +1089,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } } pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE nfreqbds <<<<<<-|" );*/ - pop_wmops();/*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE <<<<<-|" );/*/ + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE <<<<<-|" );/*/ } } pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop <<<<-|" );/*/ @@ -1133,7 +1133,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth = temp_q; move16(); #endif - } #ifdef FIX_1072_SPEEDUP_gainpanning Word16 temp_q1 = sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, temp_q ); @@ -1143,13 +1142,13 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( { Word16 i; Word32 aux; - IF(temp_q1 < 0) + IF( temp_q1 < 0 ) { Word32 temp_q1_equiv = L_lshl( 0x80000000, temp_q1 ); FOR( i = 0; i < num_freq_bands; i++ ) { aux = Mpy_32_32( h_dirac_output_synthesis_state->direct_power_factor_fx[i], h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + i] ); - h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = Madd_32_32( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] , aux, temp_q1_equiv ); + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = Madd_32_32( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i], aux, temp_q1_equiv ); move32(); } } @@ -1163,7 +1162,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move32(); } } - } ELSE { @@ -1174,8 +1172,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move32(); } } - - } #else @@ -1237,7 +1233,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( h_dirac_output_synthesis_state->direct_power_factor_q = 31; move16(); } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3<<<-|" );/*/ + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3<<<-|" );/*/ } pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B <<-|" );/*/ -- GitLab From 59fd769bd56da5f9aec8aad04125283815b1a260 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Thu, 6 Feb 2025 14:30:48 +0000 Subject: [PATCH 06/13] Final cleanups before merge --- lib_com/ivas_dirac_com.c | 2 - lib_dec/ivas_dirac_dec.c | 21 ++-------- lib_rend/ivas_dirac_output_synthesis_dec.c | 45 ++-------------------- 3 files changed, 7 insertions(+), 61 deletions(-) diff --git a/lib_com/ivas_dirac_com.c b/lib_com/ivas_dirac_com.c index d03191084..e07d36b1e 100644 --- a/lib_com/ivas_dirac_com.c +++ b/lib_com/ivas_dirac_com.c @@ -960,7 +960,6 @@ void computeDiffuseness_fixed( q_intensity = add( q_factor_intensity[0], min_q_shift2 ); move16(); - push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness B <<-|" ); FOR( i = 0; i < DIRAC_NO_COL_AVG_DIFF; ++i ) { /* Energy slow */ @@ -1070,7 +1069,6 @@ void computeDiffuseness_fixed( q_intensity = s_min( q_intensity, q_tmp ); } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness B <<-|" );/*/ min_q_shift1 = getScaleFactor32( intensity_slow, i_mult( DIRAC_NUM_DIMS, num_freq_bands ) ); min_q_shift1 = sub( min_q_shift1, idiv1616( add( find_guarded_bits_fx( DIRAC_NUM_DIMS ), 1 ), 2 ) ); diff --git a/lib_dec/ivas_dirac_dec.c b/lib_dec/ivas_dirac_dec.c index 81429927a..95cca12a0 100644 --- a/lib_dec/ivas_dirac_dec.c +++ b/lib_dec/ivas_dirac_dec.c @@ -2231,7 +2231,7 @@ void ivas_dirac_dec_render_sf_fx( move16(); Word16 tmp1; - push_wmops( "ivas_dirac_dec_render (IDR)" ); + push_wmops( "ivas_dirac_dec_render" ); /* Initialize aux buffers */ hDirAC = st_ivas->hDirAC; @@ -2341,7 +2341,6 @@ void ivas_dirac_dec_render_sf_fx( } ELSE IF( !( EQ_16( st_ivas->ivas_format, SBA_FORMAT ) || EQ_16( st_ivas->ivas_format, SBA_ISM_FORMAT ) ) ) { - push_wmops( "(IDR) SBA_FORMAT | SBA_ISM_FORMAT" ); Word16 outchannels; idx_lfe = 0; move16(); @@ -2410,7 +2409,6 @@ void ivas_dirac_dec_render_sf_fx( } } } - pop_wmops(); /*push_wmops( "(IDR) SBA_FORMAT | SBA_ISM_FORMAT" );*/ } size = imult1616( hDirACRend->num_outputs_dir, hSpatParamRendCom->num_freq_bands ); @@ -2708,7 +2706,6 @@ void ivas_dirac_dec_render_sf_fx( } } - push_wmops( "(IDR) LOOP1" ); FOR( slot_idx = 0; slot_idx < hSpatParamRendCom->subframe_nbslots[subframe_idx]; slot_idx++ ) { index_slot = add( slot_idx_start, slot_idx ); @@ -2926,8 +2923,6 @@ void ivas_dirac_dec_render_sf_fx( move16(); BREAK; default: - pop_wmops(); /* push_wmops( "ivas_dirac_dec_render (IDR)" );*/ - pop_wmops(); /*push_wmops( "(IDR) LOOP1");/*/ return; } q_proto_direct_buffer[slot_idx] = hDirACRend->h_output_synthesis_psd_state.proto_direct_buffer_f_q; @@ -2937,7 +2932,6 @@ void ivas_dirac_dec_render_sf_fx( /*-----------------------------------------------------------------* * Compute DirAC parameters at decoder side *-----------------------------------------------------------------*/ - push_wmops( "(IDR) LOOP1 DirACparams |" ); IF( EQ_16( hDirAC->hConfig->dec_param_estim, TRUE ) ) { Copy( &hSpatParamRendCom->azimuth[md_idx][hDirAC->hConfig->enc_param_start_band], &azimuth[hDirAC->hConfig->enc_param_start_band], sub( hSpatParamRendCom->num_freq_bands, hDirAC->hConfig->enc_param_start_band ) ); @@ -2986,11 +2980,8 @@ void ivas_dirac_dec_render_sf_fx( hDirACRend->q_buffer_energy[index - 1] = DirAC_mem.reference_power_q; move16(); - push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness <-|" ); computeDiffuseness_fixed( hDirACRend->buffer_intensity_real_fx, hDirACRend->buffer_energy_fx, num_freq_bands, hSpatParamRendCom->diffuseness_vector_fx[md_idx], hDirACRend->q_buffer_intensity_real, hDirACRend->q_buffer_energy, &hSpatParamRendCom->q_diffuseness_vector ); - pop_wmops(); /*push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness <-|" );/*/ } - pop_wmops(); /* push_wmops( "(IDR) LOOP1 DirACparams |" );*/ /*-----------------------------------------------------------------* * frequency domain decorrelation @@ -3092,7 +3083,6 @@ void ivas_dirac_dec_render_sf_fx( } /*Compute PSDs*/ - push_wmops( "(IDR) LOOP1 PSDs |" ); h_dirac_output_synthesis_params = &( hDirACRend->h_output_synthesis_psd_params ); h_dirac_output_synthesis_state = &( hDirACRend->h_output_synthesis_psd_state ); num_channels_dir = hDirACRend->num_outputs_dir; @@ -3175,7 +3165,6 @@ void ivas_dirac_dec_render_sf_fx( } ELSE { - push_wmops( "(IDR) LOOP1 PSDs PATH3 <-|" ); ivas_dirac_dec_output_synthesis_process_slot_fx( reference_power_fx, DirAC_mem.reference_power_q, p_onset_filter_fx, @@ -3193,7 +3182,6 @@ void ivas_dirac_dec_render_sf_fx( md_idx, hodirac_flag, hDirAC->hConfig->dec_param_estim ); - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 <-|" );/*/ } IF( hDirAC->hConfig->dec_param_estim ) @@ -3264,9 +3252,7 @@ void ivas_dirac_dec_render_sf_fx( v_add_fixed( reference_power_fx, reference_power_smooth_fx, reference_power_smooth_fx, hSpatParamRendCom->num_freq_bands, 1 ); q_reference_power_smooth = sub( q_reference_power_smooth, 1 ); } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs |" );*/ } - pop_wmops(); /*push_wmops( "(IDR) LOOP1" );*/ minimum_s( q_proto_direct_buffer, hSpatParamRendCom->subframe_nbslots[subframe_idx], &hDirACRend->h_output_synthesis_psd_state.proto_direct_buffer_f_q ); IF( EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) @@ -3595,6 +3581,7 @@ void ivas_dirac_dec_render_sf_fx( /*-----------------------------------------------------------------* * CLDFB synthesis (and binaural rendering) *-----------------------------------------------------------------*/ + index_slot = slot_idx_start_cldfb_synth; move16(); @@ -3976,7 +3963,6 @@ void ivas_dirac_dec_render_sf_fx( } } - hSpatParamRendCom->slots_rendered = add( hSpatParamRendCom->slots_rendered, hSpatParamRendCom->subframe_nbslots[subframe_idx] ); move16(); hSpatParamRendCom->subframes_rendered = add( hSpatParamRendCom->subframes_rendered, 1 ); @@ -4091,7 +4077,8 @@ void ivas_dirac_dec_render_sf_fx( } } } - pop_wmops(); /*push_wmops( "ivas_dirac_dec_render (IDR)" );*/ + + pop_wmops(); return; } diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 3f4111253..866badda0 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -710,13 +710,11 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } } - push_wmops( "(IDR) LOOP1 PSDs PATH3 B <<-|" ); test(); IF( dec_param_estim == FALSE && hodirac_flag ) { IF( EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) { - push_wmops( "(IDR) LOOP1 PSDs PATH3 B1<<<-|" ); v_multc_fixed( hSpatParamRendCom->energy_ratio1_fx[md_idx], -MAX_32 /*-1 Q31*/, aux_buf, num_freq_bands ); /* 30 + 31 - 31 -> 30 */ v_addc_fixed( aux_buf, ONE_IN_Q30 /*1 Q30*/, aux_buf, num_freq_bands ); /*30*/ Copy32( hSpatParamRendCom->energy_ratio1_fx[md_idx], @@ -739,24 +737,19 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); h_dirac_output_synthesis_state->direct_power_factor_q = 30; move16(); - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B1<<<-|" );/*/ } ELSE { - push_wmops( "(IDR) LOOP1 PSDs PATH3 B2<<<-|" ); ivas_dirac_dec_compute_gain_factors_fx( num_freq_bands, hSpatParamRendCom->diffuseness_vector_fx[md_idx], h_dirac_output_synthesis_state->direct_power_factor_fx, h_dirac_output_synthesis_state->diffuse_power_factor_fx, &h_dirac_output_synthesis_state->direct_power_factor_q, &h_dirac_output_synthesis_state->diffuse_power_factor_q ); - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B2<<<-|" );*/ } } ELSE IF( EQ_16( dec_param_estim, TRUE ) ) { - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3<<<-|" ); - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.1<<<<-|" ); /* compute direct responses */ ivas_dirac_dec_compute_directional_responses_fx( hSpatParamRendCom, hDirACRend, @@ -771,7 +764,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( sh_rot_max_order, p_Rmat, hodirac_flag ); - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.1<<<<-|" );*/ + { IF( h_dirac_output_synthesis_state->direct_responses_square_fx ) { @@ -818,14 +811,12 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( Q_temp_cy_cross_dir_smooth_fx[kk] = h_dirac_output_synthesis_state->q_cy_cross_dir_smooth; move16(); } - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop <<<<-|" ); + FOR( ch_idx = 0; ch_idx < s_min( 4, nchan_transport ); ch_idx++ ) { Word16 k; IF( ch_idx != 0 ) { - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF <<<<<-|" ); - ; Word32 a, c; Word16 b, b_exp, sqr_exp, q_diff_aab, q_diff_c; Word32 mpy_a_a_b, mpy_diff_c, mpy_diff_aab; @@ -915,8 +906,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } } c = Madd_32_16( ONE_IN_Q27 /*1 Q27*/, L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_fx, ONE_IN_Q27 /*1 Q27*/ ), 5461 ); /*Diffuseness modellling nrg compensation*/ /* 1.0 / 6.0 = 5461 in Q15*/ /*Q27*/ - - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF nfreqbds <<<<<<-|" ); FOR( ; k < num_freq_bands; k++ ) { a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses @@ -942,22 +931,11 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], reference_power[k + ( ch_idx + 1 ) * num_freq_bands], &b_exp ); /*q(15-b_exp)*/ } } -#ifdef FIX_1072_SPEEDUP_output_synthesis_procSlot - q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q + sub( sub( 15, b_exp ), 15 ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); - q_diff_c = sub( q_diffuseness, 4 ); mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 - /*Todo: simplify so that mpy+add can be merged to madd*/ - sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, sub( 31, q_diff_c ), mpy_diff_aab, sub( 31, q_diff_aab ), &sqr_exp ); /*q(31-sqr_exp)*/ - -#else - mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 - mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 - mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 - q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q + sub( sub( 15, b_exp ), 15 ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); q_diff_c = sub( q_diffuseness, 4 ); @@ -981,8 +959,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( sqr_exp = sub( 31, q_diff_c ); /*q_diff_c*/ } } -#endif - sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ sqr = L_shr( sqr, 2 ); /*Q(31-sqr_exp)*/ IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 ) @@ -1011,12 +987,9 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF nfreqbds <<<<<<-|" );*/ - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF <<<<<-|" );*/ } ELSE { - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE <<<<<-|" ); Word32 sqr_inp, mpy_diff, sqr; Word16 sqr_exp; /*Diffuseness modellling nrg compensation*/ @@ -1054,7 +1027,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE nfreqbds <<<<<<-|" ); FOR( ; k < num_freq_bands; k++ ) { mpy_diff = Mpy_32_32( diffuseness[k], L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_decorr_fx, ONE_IN_Q29 /*1 Q29*/ ) ); // Q = q_diffuseness - 1 @@ -1088,12 +1060,8 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE nfreqbds <<<<<<-|" );*/ - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE <<<<<-|" );/*/ } } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop <<<<-|" );/*/ - Word16 temp = MAX_16; /*q0*/ move16(); tmp16 = imult1616( num_freq_bands, num_channels_dir ); @@ -1110,8 +1078,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } free( Q_temp_cy_cross_dir_smooth_fx ); /*Directional gain (panning)*/ - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 gainpanning <<<<-|" ); - Word16 temp_q = sub( add( h_dirac_output_synthesis_state->direct_power_factor_q, h_dirac_output_synthesis_state->direct_responses_q ), 31 ); IF( LT_16( temp_q, h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ) { @@ -1192,9 +1158,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( &h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands], num_freq_bands, 0 ); /*Q(h_dirac_output_synthesis_state->q_cy_cross_dir_smooth)*/ } - #endif - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 gainpanning <<<<-|" );*/ /*Diffuse gain*/ FOR( ch_idx = s_min( 4, nchan_transport ); ch_idx < num_channels_diff; ch_idx++ ) @@ -1213,8 +1177,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( &h_dirac_output_synthesis_state->cy_auto_diff_smooth_fx[ch_idx * num_freq_bands_diff], num_freq_bands_diff, 0 ); /*h_dirac_output_synthesis_state->q_cy_auto_diff_smooth*/ } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3 <<-|" );/*/ - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B <<-|" );/*/ + return; } ELSE @@ -1233,9 +1196,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( h_dirac_output_synthesis_state->direct_power_factor_q = 31; move16(); } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3<<<-|" );/*/ } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B <<-|" );/*/ diff_start_band = 0; move16(); -- GitLab From f5fe2f050dca5c506a610bb38f9290e5e057826f Mon Sep 17 00:00:00 2001 From: ber Date: Thu, 6 Feb 2025 15:53:56 +0100 Subject: [PATCH 07/13] Revert "apply clang format patch" This reverts commit 491400705b0984e8379706dfd004e883507edee1. --- lib_com/ivas_dirac_com.c | 7 ++++--- lib_dec/ivas_dirac_dec.c | 4 ++-- lib_rend/ivas_dirac_output_synthesis_dec.c | 12 ++++++++---- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/lib_com/ivas_dirac_com.c b/lib_com/ivas_dirac_com.c index d03191084..97c8a88bd 100644 --- a/lib_com/ivas_dirac_com.c +++ b/lib_com/ivas_dirac_com.c @@ -973,12 +973,12 @@ void computeDiffuseness_fixed( Word16 shift_q = sub( q_tmp, q_ene ); Word32 shiftEquiv; Word16 shift_qtotal; - if ( shift_q < 0 ) + if( shift_q < 0 ) { shiftEquiv = L_lshl( 0x80000000, shift_q ); shift_qtotal = sub( min_q_shift1, 0 ); } - if ( shift_q >= 0 ) + if( shift_q >= 0 ) { shiftEquiv = L_add( 0x7FFFFFFF, 0 ); shift_qtotal = sub( min_q_shift1, shift_q ); @@ -1012,6 +1012,7 @@ void computeDiffuseness_fixed( #endif + q_ene = s_min( q_ene, q_tmp ); /* Intensity slow */ @@ -1019,7 +1020,7 @@ void computeDiffuseness_fixed( shift_q = sub( q_tmp, q_intensity ); #ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESS - if ( shift_q >= 0 ) + if( shift_q >= 0 ) { shiftEquiv = L_lshl( 0x7FFFFFFF, 0 ); shift_qtotal = sub( min_q_shift2, shift_q ); diff --git a/lib_dec/ivas_dirac_dec.c b/lib_dec/ivas_dirac_dec.c index 81429927a..318a3ec55 100644 --- a/lib_dec/ivas_dirac_dec.c +++ b/lib_dec/ivas_dirac_dec.c @@ -2557,7 +2557,7 @@ void ivas_dirac_dec_render_sf_fx( p_Rmat_fx = 0; move32(); } - + IF( ( hDirAC->hConfig->dec_param_estim == FALSE ) ) { Word16 *masa_band_mapping; @@ -2926,7 +2926,7 @@ void ivas_dirac_dec_render_sf_fx( move16(); BREAK; default: - pop_wmops(); /* push_wmops( "ivas_dirac_dec_render (IDR)" );*/ + pop_wmops(); /* push_wmops( "ivas_dirac_dec_render (IDR)" );*/ pop_wmops(); /*push_wmops( "(IDR) LOOP1");/*/ return; } diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 3f4111253..404f98b1b 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -1089,7 +1089,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } } pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE nfreqbds <<<<<<-|" );*/ - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE <<<<<-|" );/*/ + pop_wmops();/*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE <<<<<-|" );/*/ } } pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop <<<<-|" );/*/ @@ -1133,6 +1133,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth = temp_q; move16(); #endif + } #ifdef FIX_1072_SPEEDUP_gainpanning Word16 temp_q1 = sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, temp_q ); @@ -1142,13 +1143,13 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( { Word16 i; Word32 aux; - IF( temp_q1 < 0 ) + IF(temp_q1 < 0) { Word32 temp_q1_equiv = L_lshl( 0x80000000, temp_q1 ); FOR( i = 0; i < num_freq_bands; i++ ) { aux = Mpy_32_32( h_dirac_output_synthesis_state->direct_power_factor_fx[i], h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + i] ); - h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = Madd_32_32( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i], aux, temp_q1_equiv ); + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = Madd_32_32( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] , aux, temp_q1_equiv ); move32(); } } @@ -1162,6 +1163,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move32(); } } + } ELSE { @@ -1172,6 +1174,8 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move32(); } } + + } #else @@ -1233,7 +1237,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( h_dirac_output_synthesis_state->direct_power_factor_q = 31; move16(); } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3<<<-|" );/*/ + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3<<<-|" );/*/ } pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B <<-|" );/*/ -- GitLab From 896f21e54f609aaaab580d12e6056add7a6b5899 Mon Sep 17 00:00:00 2001 From: ber Date: Thu, 6 Feb 2025 15:54:25 +0100 Subject: [PATCH 08/13] Revert "some more cleaning" This reverts commit 0917f3f7b1c455ae3c6fedf4c4e0830415137fbe. --- lib_com/ivas_dirac_com.c | 4 ++-- lib_com/options.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib_com/ivas_dirac_com.c b/lib_com/ivas_dirac_com.c index 97c8a88bd..796fc8960 100644 --- a/lib_com/ivas_dirac_com.c +++ b/lib_com/ivas_dirac_com.c @@ -969,7 +969,7 @@ void computeDiffuseness_fixed( q_tmp = add( q_factor_energy[i], min_q_shift1 ); -#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESS +#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB Word16 shift_q = sub( q_tmp, q_ene ); Word32 shiftEquiv; Word16 shift_qtotal; @@ -1019,7 +1019,7 @@ void computeDiffuseness_fixed( q_tmp = add( q_factor_intensity[i], min_q_shift2 ); shift_q = sub( q_tmp, q_intensity ); -#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESS +#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB if( shift_q >= 0 ) { shiftEquiv = L_lshl( 0x7FFFFFFF, 0 ); diff --git a/lib_com/options.h b/lib_com/options.h index b47825b32..7e8912490 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -156,4 +156,4 @@ #define FIX_1072_SPEEDUP_gainpanning /* FhG: Minor WMOPS tuning, nonbe */ -#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESS /* FhG: Minor WMOPS tuning, nonbe */ +#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB /* FhG: Minor WMOPS tuning, nonbe */ -- GitLab From ac90f9d69453b85c66a40220c4a4e6cc9b715d72 Mon Sep 17 00:00:00 2001 From: ber Date: Thu, 6 Feb 2025 15:54:47 +0100 Subject: [PATCH 09/13] Revert "cleaning up options.h" This reverts commit b05d69a9edf078616466f69a705ceb5845b831c8. --- lib_com/options.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 7e8912490..92144ade1 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -155,5 +155,6 @@ #define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ -#define FIX_1072_SPEEDUP_gainpanning /* FhG: Minor WMOPS tuning, nonbe */ -#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB /* FhG: Minor WMOPS tuning, nonbe */ +#define FIX_1072_SPEEDUP_gainpanning /* FhG: WMOPS tuning, in development*/ +#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB /* "-" */ +//#define FIX_1072_SPEEDUP_output_synthesis_procSlot /* "-" */ \ No newline at end of file -- GitLab From 43633d492c8a1b5d9cb793788e09567f393f896b Mon Sep 17 00:00:00 2001 From: Markus Multrus Date: Sun, 9 Feb 2025 20:46:21 +0100 Subject: [PATCH 10/13] fix instrumentation --- lib_com/ivas_dirac_com.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib_com/ivas_dirac_com.c b/lib_com/ivas_dirac_com.c index e07d36b1e..2704c1f08 100644 --- a/lib_com/ivas_dirac_com.c +++ b/lib_com/ivas_dirac_com.c @@ -975,13 +975,13 @@ void computeDiffuseness_fixed( if ( shift_q < 0 ) { shiftEquiv = L_lshl( 0x80000000, shift_q ); - shift_qtotal = sub( min_q_shift1, 0 ); } if ( shift_q >= 0 ) { shiftEquiv = L_add( 0x7FFFFFFF, 0 ); - shift_qtotal = sub( min_q_shift1, shift_q ); } + shift_qtotal = sub( min_q_shift1, s_max( shift_q, 0 ) ); + FOR( k = 0; k < num_freq_bands; k++ ) { tmp = L_shl( p_tmp_c[k], shift_qtotal ); @@ -1018,16 +1018,16 @@ void computeDiffuseness_fixed( shift_q = sub( q_tmp, q_intensity ); #ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESS - if ( shift_q >= 0 ) - { - shiftEquiv = L_lshl( 0x7FFFFFFF, 0 ); - shift_qtotal = sub( min_q_shift2, shift_q ); - } if ( shift_q < 0 ) { shiftEquiv = L_lshl( 0x80000000, shift_q ); - shift_qtotal = sub( min_q_shift2, 0 ); } + if ( shift_q >= 0 ) + { + shiftEquiv = L_lshl( 0x7FFFFFFF, 0 ); + } + shift_qtotal = sub( min_q_shift2, s_max( shift_q, 0 ) ); + FOR( j = 0; j < DIRAC_NUM_DIMS; ++j ) { p_tmp = buffer_intensity[j][i]; -- GitLab From 74f66bf8e65f7b3e63d8df19e27d02b07af0feeb Mon Sep 17 00:00:00 2001 From: ber Date: Tue, 18 Feb 2025 09:47:30 +0100 Subject: [PATCH 11/13] further cleanup --- lib_com/options.h | 1 - lib_rend/ivas_dirac_output_synthesis_dec.c | 13 ------------- 2 files changed, 14 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 92144ade1..2747226b4 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -157,4 +157,3 @@ #define FIX_1072_SPEEDUP_gainpanning /* FhG: WMOPS tuning, in development*/ #define FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB /* "-" */ -//#define FIX_1072_SPEEDUP_output_synthesis_procSlot /* "-" */ \ No newline at end of file diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 404f98b1b..6a7a73c18 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -942,18 +942,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], reference_power[k + ( ch_idx + 1 ) * num_freq_bands], &b_exp ); /*q(15-b_exp)*/ } } -#ifdef FIX_1072_SPEEDUP_output_synthesis_procSlot - q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q + sub( sub( 15, b_exp ), 15 ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); - q_diff_c = sub( q_diffuseness, 4 ); - - mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 - mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 - mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 - - /*Todo: simplify so that mpy+add can be merged to madd*/ - sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, sub( 31, q_diff_c ), mpy_diff_aab, sub( 31, q_diff_aab ), &sqr_exp ); /*q(31-sqr_exp)*/ - -#else mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 @@ -981,7 +969,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( sqr_exp = sub( 31, q_diff_c ); /*q_diff_c*/ } } -#endif sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ sqr = L_shr( sqr, 2 ); /*Q(31-sqr_exp)*/ -- GitLab From fc11d020838a7da70152594f05efce479afaddcb Mon Sep 17 00:00:00 2001 From: ber Date: Tue, 18 Feb 2025 11:34:22 +0100 Subject: [PATCH 12/13] tiny fix --- lib_com/ivas_dirac_com.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_com/ivas_dirac_com.c b/lib_com/ivas_dirac_com.c index 9fe68f250..38abc76f3 100644 --- a/lib_com/ivas_dirac_com.c +++ b/lib_com/ivas_dirac_com.c @@ -968,7 +968,7 @@ void computeDiffuseness_fixed( q_tmp = add( q_factor_energy[i], min_q_shift1 ); -#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB +#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESS Word16 shift_q = sub( q_tmp, q_ene ); Word32 shiftEquiv; Word16 shift_qtotal; -- GitLab From 74d1e1951db936a260f30cd2fefe11c1a41e9cf9 Mon Sep 17 00:00:00 2001 From: ber Date: Tue, 18 Feb 2025 11:39:19 +0100 Subject: [PATCH 13/13] apply clang format patch --- lib_com/ivas_dirac_com.c | 5 ++--- lib_dec/ivas_dirac_dec.c | 2 +- lib_rend/ivas_dirac_output_synthesis_dec.c | 8 ++------ 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/lib_com/ivas_dirac_com.c b/lib_com/ivas_dirac_com.c index 38abc76f3..2704c1f08 100644 --- a/lib_com/ivas_dirac_com.c +++ b/lib_com/ivas_dirac_com.c @@ -972,11 +972,11 @@ void computeDiffuseness_fixed( Word16 shift_q = sub( q_tmp, q_ene ); Word32 shiftEquiv; Word16 shift_qtotal; - if( shift_q < 0 ) + if ( shift_q < 0 ) { shiftEquiv = L_lshl( 0x80000000, shift_q ); } - if( shift_q >= 0 ) + if ( shift_q >= 0 ) { shiftEquiv = L_add( 0x7FFFFFFF, 0 ); } @@ -1011,7 +1011,6 @@ void computeDiffuseness_fixed( #endif - q_ene = s_min( q_ene, q_tmp ); /* Intensity slow */ diff --git a/lib_dec/ivas_dirac_dec.c b/lib_dec/ivas_dirac_dec.c index ed1f292b2..95cca12a0 100644 --- a/lib_dec/ivas_dirac_dec.c +++ b/lib_dec/ivas_dirac_dec.c @@ -2555,7 +2555,7 @@ void ivas_dirac_dec_render_sf_fx( p_Rmat_fx = 0; move32(); } - + IF( ( hDirAC->hConfig->dec_param_estim == FALSE ) ) { Word16 *masa_band_mapping; diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index ba8048267..866badda0 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -1099,7 +1099,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth = temp_q; move16(); #endif - } #ifdef FIX_1072_SPEEDUP_gainpanning Word16 temp_q1 = sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, temp_q ); @@ -1109,13 +1108,13 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( { Word16 i; Word32 aux; - IF(temp_q1 < 0) + IF( temp_q1 < 0 ) { Word32 temp_q1_equiv = L_lshl( 0x80000000, temp_q1 ); FOR( i = 0; i < num_freq_bands; i++ ) { aux = Mpy_32_32( h_dirac_output_synthesis_state->direct_power_factor_fx[i], h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + i] ); - h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = Madd_32_32( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] , aux, temp_q1_equiv ); + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = Madd_32_32( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i], aux, temp_q1_equiv ); move32(); } } @@ -1129,7 +1128,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move32(); } } - } ELSE { @@ -1140,8 +1138,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move32(); } } - - } #else -- GitLab