From 8fcadb93851018d4702a386c6dcf388e72810373 Mon Sep 17 00:00:00 2001 From: ber Date: Tue, 18 Feb 2025 12:23:28 +0100 Subject: [PATCH 01/18] added some push/pop wmops , introduced FIX_1072_SPEEDUP_output_synthesis_procSlot --- lib_com/ivas_dirac_com.c | 51 ++++++++ lib_com/options.h | 5 + lib_dec/ivas_dirac_dec.c | 23 +++- lib_rend/ivas_dirac_output_synthesis_dec.c | 143 ++++++++++++++++++++- 4 files changed, 214 insertions(+), 8 deletions(-) diff --git a/lib_com/ivas_dirac_com.c b/lib_com/ivas_dirac_com.c index ebd958e1b..796fc8960 100644 --- a/lib_com/ivas_dirac_com.c +++ b/lib_com/ivas_dirac_com.c @@ -960,6 +960,7 @@ void computeDiffuseness_fixed( q_intensity = add( q_factor_intensity[0], min_q_shift2 ); move16(); + push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness B <<-|" ); FOR( i = 0; i < DIRAC_NO_COL_AVG_DIFF; ++i ) { /* Energy slow */ @@ -967,6 +968,28 @@ void computeDiffuseness_fixed( q_tmp = add( q_factor_energy[i], min_q_shift1 ); + +#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB + Word16 shift_q = sub( q_tmp, q_ene ); + Word32 shiftEquiv; + Word16 shift_qtotal; + if( shift_q < 0 ) + { + shiftEquiv = L_lshl( 0x80000000, shift_q ); + shift_qtotal = sub( min_q_shift1, 0 ); + } + if( shift_q >= 0 ) + { + shiftEquiv = L_add( 0x7FFFFFFF, 0 ); + shift_qtotal = sub( min_q_shift1, shift_q ); + } + FOR( k = 0; k < num_freq_bands; k++ ) + { + tmp = L_shl( p_tmp_c[k], shift_qtotal ); + energy_slow[k] = Madd_32_32_r( tmp, energy_slow[k], shiftEquiv ); + move32(); + } +#else Word16 shift_q = sub( q_tmp, q_ene ); IF( shift_q < 0 ) { @@ -986,6 +1009,9 @@ void computeDiffuseness_fixed( move32(); } } +#endif + + q_ene = s_min( q_ene, q_tmp ); @@ -993,6 +1019,28 @@ void computeDiffuseness_fixed( q_tmp = add( q_factor_intensity[i], min_q_shift2 ); shift_q = sub( q_tmp, q_intensity ); +#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB + if( shift_q >= 0 ) + { + shiftEquiv = L_lshl( 0x7FFFFFFF, 0 ); + shift_qtotal = sub( min_q_shift2, shift_q ); + } + if ( shift_q < 0 ) + { + shiftEquiv = L_lshl( 0x80000000, shift_q ); + shift_qtotal = sub( min_q_shift2, 0 ); + } + FOR( j = 0; j < DIRAC_NUM_DIMS; ++j ) + { + p_tmp = buffer_intensity[j][i]; + FOR( k = 0; k < num_freq_bands; k++ ) + { + tmp = L_shl( p_tmp[k], shift_qtotal ); + intensity_slow[j * num_freq_bands + k] = Madd_32_32_r( tmp, intensity_slow[j * num_freq_bands + k], shiftEquiv ); + move32(); + } + } +#else IF( shift_q > 0 ) { FOR( j = 0; j < DIRAC_NUM_DIMS; ++j ) @@ -1019,8 +1067,11 @@ void computeDiffuseness_fixed( } } } +#endif + q_intensity = s_min( q_intensity, q_tmp ); } + pop_wmops(); /*push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness B <<-|" );/*/ min_q_shift1 = getScaleFactor32( intensity_slow, i_mult( DIRAC_NUM_DIMS, num_freq_bands ) ); min_q_shift1 = sub( min_q_shift1, idiv1616( add( find_guarded_bits_fx( DIRAC_NUM_DIMS ), 1 ), 2 ) ); diff --git a/lib_com/options.h b/lib_com/options.h index fa1fd8f4e..47a8f8b2c 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -153,3 +153,8 @@ #define FIX_881_HILBERT_FILTER /* VA: improve the precision of the Hilbert filter to remove 2kHz unwanted tone */ #endif #define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ + + +#define FIX_1072_SPEEDUP_gainpanning /* FhG: WMOPS tuning, in development*/ +#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB /* "-" */ +#define FIX_1072_SPEEDUP_output_synthesis_procSlot /* "-" */ \ No newline at end of file diff --git a/lib_dec/ivas_dirac_dec.c b/lib_dec/ivas_dirac_dec.c index 95cca12a0..318a3ec55 100644 --- a/lib_dec/ivas_dirac_dec.c +++ b/lib_dec/ivas_dirac_dec.c @@ -2231,7 +2231,7 @@ void ivas_dirac_dec_render_sf_fx( move16(); Word16 tmp1; - push_wmops( "ivas_dirac_dec_render" ); + push_wmops( "ivas_dirac_dec_render (IDR)" ); /* Initialize aux buffers */ hDirAC = st_ivas->hDirAC; @@ -2341,6 +2341,7 @@ void ivas_dirac_dec_render_sf_fx( } ELSE IF( !( EQ_16( st_ivas->ivas_format, SBA_FORMAT ) || EQ_16( st_ivas->ivas_format, SBA_ISM_FORMAT ) ) ) { + push_wmops( "(IDR) SBA_FORMAT | SBA_ISM_FORMAT" ); Word16 outchannels; idx_lfe = 0; move16(); @@ -2409,6 +2410,7 @@ void ivas_dirac_dec_render_sf_fx( } } } + pop_wmops(); /*push_wmops( "(IDR) SBA_FORMAT | SBA_ISM_FORMAT" );*/ } size = imult1616( hDirACRend->num_outputs_dir, hSpatParamRendCom->num_freq_bands ); @@ -2555,7 +2557,7 @@ void ivas_dirac_dec_render_sf_fx( p_Rmat_fx = 0; move32(); } - + IF( ( hDirAC->hConfig->dec_param_estim == FALSE ) ) { Word16 *masa_band_mapping; @@ -2706,6 +2708,7 @@ void ivas_dirac_dec_render_sf_fx( } } + push_wmops( "(IDR) LOOP1" ); FOR( slot_idx = 0; slot_idx < hSpatParamRendCom->subframe_nbslots[subframe_idx]; slot_idx++ ) { index_slot = add( slot_idx_start, slot_idx ); @@ -2923,6 +2926,8 @@ void ivas_dirac_dec_render_sf_fx( move16(); BREAK; default: + pop_wmops(); /* push_wmops( "ivas_dirac_dec_render (IDR)" );*/ + pop_wmops(); /*push_wmops( "(IDR) LOOP1");/*/ return; } q_proto_direct_buffer[slot_idx] = hDirACRend->h_output_synthesis_psd_state.proto_direct_buffer_f_q; @@ -2932,6 +2937,7 @@ void ivas_dirac_dec_render_sf_fx( /*-----------------------------------------------------------------* * Compute DirAC parameters at decoder side *-----------------------------------------------------------------*/ + push_wmops( "(IDR) LOOP1 DirACparams |" ); IF( EQ_16( hDirAC->hConfig->dec_param_estim, TRUE ) ) { Copy( &hSpatParamRendCom->azimuth[md_idx][hDirAC->hConfig->enc_param_start_band], &azimuth[hDirAC->hConfig->enc_param_start_band], sub( hSpatParamRendCom->num_freq_bands, hDirAC->hConfig->enc_param_start_band ) ); @@ -2980,8 +2986,11 @@ void ivas_dirac_dec_render_sf_fx( hDirACRend->q_buffer_energy[index - 1] = DirAC_mem.reference_power_q; move16(); + push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness <-|" ); computeDiffuseness_fixed( hDirACRend->buffer_intensity_real_fx, hDirACRend->buffer_energy_fx, num_freq_bands, hSpatParamRendCom->diffuseness_vector_fx[md_idx], hDirACRend->q_buffer_intensity_real, hDirACRend->q_buffer_energy, &hSpatParamRendCom->q_diffuseness_vector ); + pop_wmops(); /*push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness <-|" );/*/ } + pop_wmops(); /* push_wmops( "(IDR) LOOP1 DirACparams |" );*/ /*-----------------------------------------------------------------* * frequency domain decorrelation @@ -3083,6 +3092,7 @@ void ivas_dirac_dec_render_sf_fx( } /*Compute PSDs*/ + push_wmops( "(IDR) LOOP1 PSDs |" ); h_dirac_output_synthesis_params = &( hDirACRend->h_output_synthesis_psd_params ); h_dirac_output_synthesis_state = &( hDirACRend->h_output_synthesis_psd_state ); num_channels_dir = hDirACRend->num_outputs_dir; @@ -3165,6 +3175,7 @@ void ivas_dirac_dec_render_sf_fx( } ELSE { + push_wmops( "(IDR) LOOP1 PSDs PATH3 <-|" ); ivas_dirac_dec_output_synthesis_process_slot_fx( reference_power_fx, DirAC_mem.reference_power_q, p_onset_filter_fx, @@ -3182,6 +3193,7 @@ void ivas_dirac_dec_render_sf_fx( md_idx, hodirac_flag, hDirAC->hConfig->dec_param_estim ); + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 <-|" );/*/ } IF( hDirAC->hConfig->dec_param_estim ) @@ -3252,7 +3264,9 @@ void ivas_dirac_dec_render_sf_fx( v_add_fixed( reference_power_fx, reference_power_smooth_fx, reference_power_smooth_fx, hSpatParamRendCom->num_freq_bands, 1 ); q_reference_power_smooth = sub( q_reference_power_smooth, 1 ); } + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs |" );*/ } + pop_wmops(); /*push_wmops( "(IDR) LOOP1" );*/ minimum_s( q_proto_direct_buffer, hSpatParamRendCom->subframe_nbslots[subframe_idx], &hDirACRend->h_output_synthesis_psd_state.proto_direct_buffer_f_q ); IF( EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) @@ -3581,7 +3595,6 @@ void ivas_dirac_dec_render_sf_fx( /*-----------------------------------------------------------------* * CLDFB synthesis (and binaural rendering) *-----------------------------------------------------------------*/ - index_slot = slot_idx_start_cldfb_synth; move16(); @@ -3963,6 +3976,7 @@ void ivas_dirac_dec_render_sf_fx( } } + hSpatParamRendCom->slots_rendered = add( hSpatParamRendCom->slots_rendered, hSpatParamRendCom->subframe_nbslots[subframe_idx] ); move16(); hSpatParamRendCom->subframes_rendered = add( hSpatParamRendCom->subframes_rendered, 1 ); @@ -4077,8 +4091,7 @@ void ivas_dirac_dec_render_sf_fx( } } } - - pop_wmops(); + pop_wmops(); /*push_wmops( "ivas_dirac_dec_render (IDR)" );*/ return; } diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 4686ca1e6..320492c66 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -710,11 +710,13 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } } + push_wmops( "(IDR) LOOP1 PSDs PATH3 B <<-|" ); test(); IF( dec_param_estim == FALSE && hodirac_flag ) { IF( EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) { + push_wmops( "(IDR) LOOP1 PSDs PATH3 B1<<<-|" ); v_multc_fixed( hSpatParamRendCom->energy_ratio1_fx[md_idx], -MAX_32 /*-1 Q31*/, aux_buf, num_freq_bands ); /* 30 + 31 - 31 -> 30 */ v_addc_fixed( aux_buf, ONE_IN_Q30 /*1 Q30*/, aux_buf, num_freq_bands ); /*30*/ Copy32( hSpatParamRendCom->energy_ratio1_fx[md_idx], @@ -737,19 +739,24 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); h_dirac_output_synthesis_state->direct_power_factor_q = 30; move16(); + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B1<<<-|" );/*/ } ELSE { + push_wmops( "(IDR) LOOP1 PSDs PATH3 B2<<<-|" ); ivas_dirac_dec_compute_gain_factors_fx( num_freq_bands, hSpatParamRendCom->diffuseness_vector_fx[md_idx], h_dirac_output_synthesis_state->direct_power_factor_fx, h_dirac_output_synthesis_state->diffuse_power_factor_fx, &h_dirac_output_synthesis_state->direct_power_factor_q, &h_dirac_output_synthesis_state->diffuse_power_factor_q ); + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B2<<<-|" );*/ } } ELSE IF( EQ_16( dec_param_estim, TRUE ) ) { + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3<<<-|" ); + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.1<<<<-|" ); /* compute direct responses */ ivas_dirac_dec_compute_directional_responses_fx( hSpatParamRendCom, hDirACRend, @@ -764,7 +771,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( sh_rot_max_order, p_Rmat, hodirac_flag ); - + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.1<<<<-|" );*/ { IF( h_dirac_output_synthesis_state->direct_responses_square_fx ) { @@ -811,12 +818,14 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( Q_temp_cy_cross_dir_smooth_fx[kk] = h_dirac_output_synthesis_state->q_cy_cross_dir_smooth; move16(); } - + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop <<<<-|" ); FOR( ch_idx = 0; ch_idx < s_min( 4, nchan_transport ); ch_idx++ ) { Word16 k; IF( ch_idx != 0 ) { + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF <<<<<-|" ); + ; Word32 a, c; Word16 b, b_exp, sqr_exp, q_diff_aab, q_diff_c; Word32 mpy_a_a_b, mpy_diff_c, mpy_diff_aab; @@ -906,6 +915,8 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } } c = Madd_32_16( ONE_IN_Q27 /*1 Q27*/, L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_fx, ONE_IN_Q27 /*1 Q27*/ ), 5461 ); /*Diffuseness modellling nrg compensation*/ /* 1.0 / 6.0 = 5461 in Q15*/ /*Q27*/ + + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF nfreqbds <<<<<<-|" ); FOR( ; k < num_freq_bands; k++ ) { a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses @@ -931,11 +942,63 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], reference_power[k + ( ch_idx + 1 ) * num_freq_bands], &b_exp ); /*q(15-b_exp)*/ } } +#ifdef FIX_1072_SPEEDUP_output_synthesis_procSlot + q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q + sub( sub( 15, b_exp ), 15 ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); + q_diff_c = sub( q_diffuseness, 4 ); mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 + /*Todo: simplify so that mpy+add can be merged to madd*/ + + //sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, sub( 31, q_diff_c ), mpy_diff_aab, sub( 31, q_diff_aab ), &sqr_exp ); /*q(31-sqr_exp)*/ + + { + Word16 mpy_diff_c_exp = sub( 31, q_diff_c ); + Word16 q_diff_aab_exp = sub( 31, q_diff_aab ); + + Word32 L_tmp; + Word16 shift; + + if ( !mpy_diff_c ) + mpy_diff_c_exp = add( q_diff_aab_exp, 0 ); + + if ( !mpy_diff_aab ) + q_diff_aab_exp = add( mpy_diff_c_exp, 0 ); + + shift = sub( mpy_diff_c_exp, q_diff_aab_exp ); + shift = s_max( -31, shift ); + shift = s_min( 31, shift ); + if ( shift < 0 ) + { + /* exponent of b is greater than exponent of a, shr mpy_diff_c */ + mpy_diff_c = L_shl( mpy_diff_c, shift ); + } + if ( shift > 0 ) + { + /* exponent of a is greater than exponent of b */ + mpy_diff_aab = L_shr( mpy_diff_aab, shift ); + } + mpy_diff_c_exp = add( s_max( mpy_diff_c_exp, q_diff_aab_exp ), 1 ); + L_tmp = L_add( L_shr( mpy_diff_c, 1 ), L_shr( mpy_diff_aab, 1 ) ); + shift = norm_l( L_tmp ); + if ( shift ) + L_tmp = L_shl( L_tmp, shift ); + if ( L_tmp == 0 ) + mpy_diff_c_exp = add( 0, 0 ); + if ( L_tmp != 0 ) + mpy_diff_c_exp = sub( mpy_diff_c_exp, shift ); + sqr_exp = mpy_diff_c_exp; + + sqr_inp = L_tmp; + } + +#else + mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 + mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 + mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 + q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q + sub( sub( 15, b_exp ), 15 ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); q_diff_c = sub( q_diffuseness, 4 ); @@ -959,6 +1022,8 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( sqr_exp = sub( 31, q_diff_c ); /*q_diff_c*/ } } +#endif + sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ sqr = L_shr( sqr, 2 ); /*Q(31-sqr_exp)*/ IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 ) @@ -987,9 +1052,12 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF nfreqbds <<<<<<-|" );*/ + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF <<<<<-|" );*/ } ELSE { + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE <<<<<-|" ); Word32 sqr_inp, mpy_diff, sqr; Word16 sqr_exp; /*Diffuseness modellling nrg compensation*/ @@ -1027,6 +1095,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE nfreqbds <<<<<<-|" ); FOR( ; k < num_freq_bands; k++ ) { mpy_diff = Mpy_32_32( diffuseness[k], L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_decorr_fx, ONE_IN_Q29 /*1 Q29*/ ) ); // Q = q_diffuseness - 1 @@ -1060,8 +1129,12 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE nfreqbds <<<<<<-|" );*/ + pop_wmops();/*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE <<<<<-|" );/*/ } } + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop <<<<-|" );/*/ + Word16 temp = MAX_16; /*q0*/ move16(); tmp16 = imult1616( num_freq_bands, num_channels_dir ); @@ -1078,9 +1151,21 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } free( Q_temp_cy_cross_dir_smooth_fx ); /*Directional gain (panning)*/ + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 gainpanning <<<<-|" ); + Word16 temp_q = sub( add( h_dirac_output_synthesis_state->direct_power_factor_q, h_dirac_output_synthesis_state->direct_responses_q ), 31 ); IF( LT_16( temp_q, h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ) { +#ifdef FIX_1072_SPEEDUP_gainpanning /*is there any difference in any bitstream?*/ + Word16 temp_q1 = sub( temp_q, h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ); + FOR( Word16 kk = 0; kk < tmp16; kk++ ) + { + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[kk] = L_shl( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[kk], temp_q1 ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ->temp_q*/ + move32(); + } + h_dirac_output_synthesis_state->q_cy_cross_dir_smooth = temp_q; + move16(); +#else FOR( Word16 kk = 0; kk < tmp16; kk++ ) { h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[kk] = L_shl( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[kk], sub( temp_q, h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ->temp_q*/ @@ -1088,7 +1173,53 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } h_dirac_output_synthesis_state->q_cy_cross_dir_smooth = temp_q; move16(); +#endif + + } +#ifdef FIX_1072_SPEEDUP_gainpanning + Word16 temp_q1 = sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, temp_q ); + FOR( ch_idx = s_min( 4, nchan_transport ); ch_idx < num_channels_dir; ch_idx++ ) + { + IF( NE_16( temp_q, h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ) + { + Word16 i; + Word32 aux; + IF(temp_q1 < 0) + { + Word32 temp_q1_equiv = L_lshl( 0x80000000, temp_q1 ); + FOR( i = 0; i < num_freq_bands; i++ ) + { + aux = Mpy_32_32( h_dirac_output_synthesis_state->direct_power_factor_fx[i], h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + i] ); + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = Madd_32_32( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] , aux, temp_q1_equiv ); + move32(); + } + } + ELSE + { + FOR( i = 0; i < num_freq_bands; i++ ) + { + aux = Mpy_32_32( h_dirac_output_synthesis_state->direct_power_factor_fx[i], h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + i] ); + aux = L_shl( aux, temp_q1 ); + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i], aux ); + move32(); + } + } + + } + ELSE + { + Word16 i; + FOR( i = 0; i < num_freq_bands; i++ ) + { + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = Madd_32_32( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i], h_dirac_output_synthesis_state->direct_power_factor_fx[i], h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + i] ); + move32(); + } + } + + } + +#else FOR( ch_idx = s_min( 4, nchan_transport ); ch_idx < num_channels_dir; ch_idx++ ) { v_mult_fixed( h_dirac_output_synthesis_state->direct_power_factor_fx, @@ -1107,6 +1238,9 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( num_freq_bands, 0 ); /*Q(h_dirac_output_synthesis_state->q_cy_cross_dir_smooth)*/ } +#endif + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 gainpanning <<<<-|" );*/ + /*Diffuse gain*/ FOR( ch_idx = s_min( 4, nchan_transport ); ch_idx < num_channels_diff; ch_idx++ ) { @@ -1124,7 +1258,8 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( &h_dirac_output_synthesis_state->cy_auto_diff_smooth_fx[ch_idx * num_freq_bands_diff], num_freq_bands_diff, 0 ); /*h_dirac_output_synthesis_state->q_cy_auto_diff_smooth*/ } - + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3 <<-|" );/*/ + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B <<-|" );/*/ return; } ELSE @@ -1143,7 +1278,9 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( h_dirac_output_synthesis_state->direct_power_factor_q = 31; move16(); } + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3<<<-|" );/*/ } + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B <<-|" );/*/ diff_start_band = 0; move16(); -- GitLab From e195b40a9d3cc49024483b47e5b60932b3a298a2 Mon Sep 17 00:00:00 2001 From: ber Date: Tue, 18 Feb 2025 15:21:55 +0100 Subject: [PATCH 02/18] delete FIX_1072_SPEEDUP_output_synthesis_procSlot --- lib_com/options.h | 3 +- lib_rend/ivas_dirac_output_synthesis_dec.c | 33 +++++++++++++--------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 9e9295f22..b3a1e4238 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -56,7 +56,7 @@ #define SUPPORT_JBM_TRACEFILE /* Support for JBM tracefile, which is needed for 3GPP objective/subjective testing, but not relevant for real-world implementations */ -/*#define WMOPS*/ /* Activate complexity and memory counters */ +#define WMOPS /* Activate complexity and memory counters */ #ifdef WMOPS /*#define WMOPS_PER_FRAME*/ /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */ /*#define WMOPS_DETAIL*/ /* Output detailed complexity printout for every function. Increases runtime overhead */ @@ -170,6 +170,5 @@ #define FIX_1072_SPEEDUP_gainpanning /* FhG: WMOPS tuning, in development*/ #define FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB /* "-" */ -#define FIX_1072_SPEEDUP_output_synthesis_procSlot /* "-" */ #endif \ No newline at end of file diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 320492c66..39d52be9f 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -942,28 +942,27 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], reference_power[k + ( ch_idx + 1 ) * num_freq_bands], &b_exp ); /*q(15-b_exp)*/ } } +#define FIX_1072_SPEEDUP_output_synthesis_procSlot #ifdef FIX_1072_SPEEDUP_output_synthesis_procSlot q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q + sub( sub( 15, b_exp ), 15 ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); q_diff_c = sub( q_diffuseness, 4 ); - mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 - mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 - mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 - - /*Todo: simplify so that mpy+add can be merged to madd*/ - - //sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, sub( 31, q_diff_c ), mpy_diff_aab, sub( 31, q_diff_aab ), &sqr_exp ); /*q(31-sqr_exp)*/ - { Word16 mpy_diff_c_exp = sub( 31, q_diff_c ); Word16 q_diff_aab_exp = sub( 31, q_diff_aab ); - Word32 L_tmp; + Word32 L_tmp, L_tmp1, L_tmp2; Word16 shift; - if ( !mpy_diff_c ) - mpy_diff_c_exp = add( q_diff_aab_exp, 0 ); + mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 + mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 + + L_tmp = L_add( diffuseness[k], 0 ); + if ( !diffuseness[k] ) + mpy_diff_c_exp = add( q_diff_aab_exp, 0 ); + if ( !c ) + mpy_diff_c_exp = add( q_diff_aab_exp, 0 ); if ( !mpy_diff_aab ) q_diff_aab_exp = add( mpy_diff_c_exp, 0 ); @@ -973,7 +972,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( if ( shift < 0 ) { /* exponent of b is greater than exponent of a, shr mpy_diff_c */ - mpy_diff_c = L_shl( mpy_diff_c, shift ); + L_tmp = L_shl( L_tmp, shift ); } if ( shift > 0 ) { @@ -981,7 +980,15 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( mpy_diff_aab = L_shr( mpy_diff_aab, shift ); } mpy_diff_c_exp = add( s_max( mpy_diff_c_exp, q_diff_aab_exp ), 1 ); - L_tmp = L_add( L_shr( mpy_diff_c, 1 ), L_shr( mpy_diff_aab, 1 ) ); + L_tmp = L_shr( L_tmp, 1 ); + L_tmp1 = L_shr( mpy_diff_aab, 1 ); + + L_tmp = Madd_32_32( L_tmp1, L_tmp, c ); // Q = q_diffuseness - 4 + + /*Todo: simplify so that mpy+add can be merged to madd*/ + + // sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, sub( 31, q_diff_c ), mpy_diff_aab, sub( 31, q_diff_aab ), &sqr_exp ); /*q(31-sqr_exp)*/ + shift = norm_l( L_tmp ); if ( shift ) L_tmp = L_shl( L_tmp, shift ); -- GitLab From 7ffce0ac054813b100e1d6fa03aaf7cc8139c5a8 Mon Sep 17 00:00:00 2001 From: ber Date: Tue, 18 Feb 2025 15:22:21 +0100 Subject: [PATCH 03/18] delete FIX_1072_SPEEDUP_output_synthesis_procSlot 2 --- lib_rend/ivas_dirac_output_synthesis_dec.c | 61 ---------------------- 1 file changed, 61 deletions(-) diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 39d52be9f..6a7a73c18 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -942,66 +942,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], reference_power[k + ( ch_idx + 1 ) * num_freq_bands], &b_exp ); /*q(15-b_exp)*/ } } -#define FIX_1072_SPEEDUP_output_synthesis_procSlot -#ifdef FIX_1072_SPEEDUP_output_synthesis_procSlot - q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q + sub( sub( 15, b_exp ), 15 ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); - q_diff_c = sub( q_diffuseness, 4 ); - - { - Word16 mpy_diff_c_exp = sub( 31, q_diff_c ); - Word16 q_diff_aab_exp = sub( 31, q_diff_aab ); - - Word32 L_tmp, L_tmp1, L_tmp2; - Word16 shift; - - mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 - mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 - - L_tmp = L_add( diffuseness[k], 0 ); - - if ( !diffuseness[k] ) - mpy_diff_c_exp = add( q_diff_aab_exp, 0 ); - if ( !c ) - mpy_diff_c_exp = add( q_diff_aab_exp, 0 ); - if ( !mpy_diff_aab ) - q_diff_aab_exp = add( mpy_diff_c_exp, 0 ); - - shift = sub( mpy_diff_c_exp, q_diff_aab_exp ); - shift = s_max( -31, shift ); - shift = s_min( 31, shift ); - if ( shift < 0 ) - { - /* exponent of b is greater than exponent of a, shr mpy_diff_c */ - L_tmp = L_shl( L_tmp, shift ); - } - if ( shift > 0 ) - { - /* exponent of a is greater than exponent of b */ - mpy_diff_aab = L_shr( mpy_diff_aab, shift ); - } - mpy_diff_c_exp = add( s_max( mpy_diff_c_exp, q_diff_aab_exp ), 1 ); - L_tmp = L_shr( L_tmp, 1 ); - L_tmp1 = L_shr( mpy_diff_aab, 1 ); - - L_tmp = Madd_32_32( L_tmp1, L_tmp, c ); // Q = q_diffuseness - 4 - - /*Todo: simplify so that mpy+add can be merged to madd*/ - - // sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, sub( 31, q_diff_c ), mpy_diff_aab, sub( 31, q_diff_aab ), &sqr_exp ); /*q(31-sqr_exp)*/ - - shift = norm_l( L_tmp ); - if ( shift ) - L_tmp = L_shl( L_tmp, shift ); - if ( L_tmp == 0 ) - mpy_diff_c_exp = add( 0, 0 ); - if ( L_tmp != 0 ) - mpy_diff_c_exp = sub( mpy_diff_c_exp, shift ); - sqr_exp = mpy_diff_c_exp; - - sqr_inp = L_tmp; - } - -#else mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 @@ -1029,7 +969,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( sqr_exp = sub( 31, q_diff_c ); /*q_diff_c*/ } } -#endif sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ sqr = L_shr( sqr, 2 ); /*Q(31-sqr_exp)*/ -- GitLab From 9ab12acf35c129fae7d4bfc27bde0200a2331f59 Mon Sep 17 00:00:00 2001 From: ber Date: Tue, 18 Feb 2025 16:01:44 +0100 Subject: [PATCH 04/18] Some more push/pop wmops --- lib_com/ivas_dirac_com.c | 4 ++-- lib_com/options.h | 2 +- lib_rend/ivas_dirac_output_synthesis_dec.c | 16 ++++++++++++---- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/lib_com/ivas_dirac_com.c b/lib_com/ivas_dirac_com.c index 796fc8960..97c8a88bd 100644 --- a/lib_com/ivas_dirac_com.c +++ b/lib_com/ivas_dirac_com.c @@ -969,7 +969,7 @@ void computeDiffuseness_fixed( q_tmp = add( q_factor_energy[i], min_q_shift1 ); -#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB +#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESS Word16 shift_q = sub( q_tmp, q_ene ); Word32 shiftEquiv; Word16 shift_qtotal; @@ -1019,7 +1019,7 @@ void computeDiffuseness_fixed( q_tmp = add( q_factor_intensity[i], min_q_shift2 ); shift_q = sub( q_tmp, q_intensity ); -#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB +#ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESS if( shift_q >= 0 ) { shiftEquiv = L_lshl( 0x7FFFFFFF, 0 ); diff --git a/lib_com/options.h b/lib_com/options.h index b3a1e4238..44e1e3623 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -169,6 +169,6 @@ #define FIX_1072_SPEEDUP_gainpanning /* FhG: WMOPS tuning, in development*/ -#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESSB /* "-" */ +#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESS /* "-" */ #endif \ No newline at end of file diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 6a7a73c18..540934a63 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -623,7 +623,6 @@ void ivas_dirac_dec_output_synthesis_close_fx( * * *------------------------------------------------------------------------*/ - void ivas_dirac_dec_output_synthesis_process_slot_fx( const Word32 *reference_power, /* i : Estimated power Q(q_reference_power)*/ const Word16 q_reference_power, /* i : Estimated power Q */ @@ -756,7 +755,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( ELSE IF( EQ_16( dec_param_estim, TRUE ) ) { push_wmops( "(IDR) LOOP1 PSDs PATH3 B3<<<-|" ); - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.1<<<<-|" ); /* compute direct responses */ ivas_dirac_dec_compute_directional_responses_fx( hSpatParamRendCom, hDirACRend, @@ -771,7 +769,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( sh_rot_max_order, p_Rmat, hodirac_flag ); - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.1<<<<-|" );*/ { IF( h_dirac_output_synthesis_state->direct_responses_square_fx ) { @@ -945,7 +942,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 - q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q + sub( sub( 15, b_exp ), 15 ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); q_diff_c = sub( q_diffuseness, 4 ); @@ -2974,6 +2970,8 @@ void ivas_dirac_dec_compute_directional_responses_fx( Word16 dipole_freq_range[2]; MASA_TRANSPORT_SIGNAL_TYPE transport_signal_type; + push_wmops( "(IDR PATH3 B3.1)" ); + Q_direct_response_ls = Q31; move16(); exp_direct_response_ls = 0; @@ -3035,6 +3033,7 @@ void ivas_dirac_dec_compute_directional_responses_fx( LT_16( k, MASA_band_grouping_24[masa_band_mapping[add( codingBand, 1 )]] ) && NE_16( k, hDirACRend->h_output_synthesis_psd_params.max_band_decorr ) ) { + push_wmops( "(IDR PATH3 B3.1) mvr2r_inc_fixed " ); /* Panning gains have to be computed only for the first bin of the coding band in MASA, for other bins the previous values can be used */ IF( NE_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) { @@ -3046,12 +3045,15 @@ void ivas_dirac_dec_compute_directional_responses_fx( hSpatParamRendCom->num_freq_bands, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*direct_response_q*/ + pop_wmops();/*push_wmops( "(IDR PATH3 B3.1) mvr2r_inc_fixed " );*/ } ELSE { + push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING " ); /* HOA3 PANNING */ IF( EQ_16( hDirACRend->panningConf, DIRAC_PANNING_HOA3 ) ) { + push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF" ); set32_fx( direct_response_hoa_fx, ONE_IN_Q29, MAX_OUTPUT_CHANNELS ); /*q29*/ set32_fx( direct_response_dir2_fx, ONE_IN_Q29, MAX_OUTPUT_CHANNELS ); /*q29*/ @@ -3350,9 +3352,11 @@ void ivas_dirac_dec_compute_directional_responses_fx( { assert( 0 && "Not supported synthesis method!" ); } + pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF" );*/ } ELSE IF( EQ_16( hDirACRend->panningConf, DIRAC_PANNING_VBAP ) ) /*VBAP*/ { + push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING ELSE" ); /* Synthesize the first direction */ spreadCoherencePanningVbap_fx( azimuth[k], elevation[k], hSpatParamRendCom->spreadCoherence_fx[md_idx][k], direct_response_ls_fx, &Q_direct_response_ls, num_channels_dir, hVBAPdata ); @@ -3604,11 +3608,13 @@ void ivas_dirac_dec_compute_directional_responses_fx( mvr2r_inc_fixed( direct_response_square_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_square_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*direct_response_square_q*/ mvr2r_inc_fixed( direct_response_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*direct_response_q*/ + pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING ELSE" );*/ } ELSE { assert( 0 && "Not supported panning method!" ); } + pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING " );*/ } } @@ -3617,6 +3623,8 @@ void ivas_dirac_dec_compute_directional_responses_fx( hDirACRend->h_output_synthesis_psd_state.direct_responses_square_q = direct_response_square_q; move16(); + + pop_wmops(); return; } -- GitLab From 1bb7dd5c70dda96a488f52e4c56dc475ecf35c86 Mon Sep 17 00:00:00 2001 From: Bauer Date: Thu, 20 Feb 2025 10:46:34 +0100 Subject: [PATCH 05/18] added/changed FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot & FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx --- lib_com/ivas_prot_fx.h | 8 ++ lib_com/ivas_spar_com.c | 135 +++++++++++++++++++++ lib_com/options.h | 7 +- lib_rend/ivas_dirac_output_synthesis_dec.c | 117 +++++++++++++----- 4 files changed, 237 insertions(+), 30 deletions(-) diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index 5ce037b9a..ed72fd178 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -4994,6 +4994,14 @@ void ivas_dirac_dec_get_response_fx( const Word16 ambisonics_order, Word16 Q_out ); +#ifdef FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx +void ivas_dirac_dec_get_response_fx_29( + const Word16 azimuth, + const Word16 elevation, + Word32 *response_fx, /*Q_out*/ + const Word16 ambisonics_order); + #endif + void calculate_hodirac_sector_parameters_fx( DIRAC_ENC_HANDLE hDirAC, /* i : DirAC handle */ Word32 RealBuffer_fx[DIRAC_MAX_ANA_CHANS][DIRAC_NO_FB_BANDS_MAX], /* i : signal vector (L+1)^2 x N_bins, real part */ diff --git a/lib_com/ivas_spar_com.c b/lib_com/ivas_spar_com.c index 14de6cc15..cd5bab07c 100644 --- a/lib_com/ivas_spar_com.c +++ b/lib_com/ivas_spar_com.c @@ -7173,6 +7173,141 @@ void ivas_dirac_dec_get_response_fx( return; } +#ifdef FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx +void ivas_dirac_dec_get_response_fx_29( + const Word16 azimuth, + const Word16 elevation, + Word32 *response_fx, /*Q_out*/ + const Word16 ambisonics_order) +{ + Word16 index_azimuth, index_elevation; + Word16 el, az; + Word32 cos_1_fx, cos_2_fx, sin_1_fx, cos_az_fx[3]; + Word32 sin_az_fx[3]; + Word32 f_fx; + Word32 c_fx_better; + Word16 l, m; + Word16 b, b1, b_2, b1_2; + //Word16 Q_out = 29; + + push_wmops( "ivas_dirac_dec_get_response_fx_29" ); + index_azimuth = add( azimuth, 180 ) % 360; + move16(); + index_elevation = add( elevation, 90 ); + + Word32 e_fac = L_add(0x7FFFFFFF, 0); + + if ( GT_16( index_elevation, 90 ) ) + { + e_fac = L_add(0x80000000, 0); + } + + + el = index_elevation; + move16(); + + if ( GT_16( index_elevation, 90 ) ) + { + el = sub( 180, index_elevation ); + } + + az = index_azimuth; + move16(); + + if ( GT_16( index_azimuth, 180 ) ) + { + az = sub( 360, index_azimuth ); + } + + f_fx = 1; + move16(); + + if ( GT_16( index_azimuth, 180 ) ) + { + f_fx = -1; + } + + cos_1_fx = L_shr( dirac_gains_trg_term_fx[az][0], 1 ); // q30 + cos_2_fx = L_shl( Mpy_32_32( cos_1_fx, cos_1_fx ), 1 ); // q30 + sin_1_fx = L_shr( dirac_gains_trg_term_fx[az][1], 1 ); // q30 + + if ( EQ_32( f_fx, -1 ) ) + { + sin_1_fx = L_negate( sin_1_fx ); // q30 + } + cos_az_fx[0] = cos_1_fx; // q30 + move32(); + cos_az_fx[1] = L_shl( L_sub( cos_2_fx, ONE_IN_Q29 /*0.5 q30*/ ), 1 ); /*q30*/ + move32(); + cos_az_fx[2] = L_sub( L_shl( Mpy_32_32( cos_1_fx, cos_az_fx[1] ), 2 ), cos_az_fx[0] /* cos_az_fx[0] q30*/ ); /*q30*/ + move32(); + sin_az_fx[0] = sin_1_fx; /*q30*/ + move32(); + sin_az_fx[1] = L_shl( Mpy_32_32( sin_1_fx, cos_1_fx ), 2 ); /*q30*/ + move32(); + sin_az_fx[2] = L_shl( Mpy_32_32( sin_1_fx, L_sub( cos_2_fx, ONE_IN_Q28 /*1/4 q30*/ ) ), 3 ); /*q30*/ + move32(); + + //response_fx[0] = L_shl_sat( 1, Q_out ); // Q_out + response_fx[0] = 0x20000000; + move32(); + + //q_diff = sub( Q_out, 29 ); + + push_wmops( "ivas_dirac_dec_get_response_fx_29_LOOPS" ); + +FOR( l = 1; l <= ambisonics_order; l++ ) + { + Word16 a; + b_2 = imult1616( l, l ); + b1_2 = add( b_2, shl( l, 1 ) ); + FOR( m = 0; m < l; m += 2 ) + { + b = b_2 + m; + a = dirac_gains_P_idx[b]; + + c_fx_better = local_result_table[el][a]; // q30 + move32(); + response_fx[b] = Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ); // Q_out + move32(); + + b1 = b1_2 - m; + response_fx[b1] = Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ); // Q_out + move32(); + } + + FOR( m = 1; m < l; m += 2 ) + { + b = b_2 + m; + a = dirac_gains_P_idx[b]; + c_fx_better = local_result_table[el][a]; // q30 + move32(); + c_fx_better = Mpy_32_32( c_fx_better, e_fac ); // q30 + response_fx[b] = Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ); // Q_out + move32(); + + b1 = b1_2 - m; + response_fx[b1] = Mpy_32_32( c_fx_better, cos_az_fx[l - m - 1] ); // Q_out + move32(); + } + + b = add( b_2, l ); + a = dirac_gains_P_idx[b]; + c_fx_better = local_result_table_2[el][a]; // q30 + move32(); + if ( s_and( l, 0x01 ) ) + { + c_fx_better = Mpy_32_32( c_fx_better, e_fac ); // q30 + } + response_fx[b] = L_shl( c_fx_better, -1 ); // Q_out + move32(); + } + + pop_wmops(); /*push_wmops( "ivas_dirac_dec_get_response_fx_29_LOOPS" );*/ + pop_wmops(); /*push_wmops( "ivas_dirac_dec_get_response_fx_29" );*/ + return; +} +#endif /*FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx*/ /*-----------------------------------------------------------------------------------------* * Function ivas_get_bits_to_encode * diff --git a/lib_com/options.h b/lib_com/options.h index 44e1e3623..8fe969bb6 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -164,11 +164,12 @@ #define FIX_ISSUE_1247 #define NONBE_FIX_1087_OOB_SBA_DTX_RS /* VA: issue 1087: Extend the length of the buffer for MCT decoding to avoid out-of-bound writing in SBA SID bitrate switching decoding */ #define FIX_1285_DECODER_CRASH - #define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ +#define FIX_1072_SPEEDUP_gainpanning /* FhG: Minor WMOPS tuning, in development*/ +#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESS /* "-" */ -#define FIX_1072_SPEEDUP_gainpanning /* FhG: WMOPS tuning, in development*/ -#define FIX_1072_SPEEDUP_COMPUTEDIFUSENESS /* "-" */ +#define FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, in development*/ +#define FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, in development*/ #endif \ No newline at end of file diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 540934a63..f7d18b65d 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -821,7 +821,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( Word16 k; IF( ch_idx != 0 ) { - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF <<<<<-|" ); + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop A <<<<<-|" ); ; Word32 a, c; Word16 b, b_exp, sqr_exp, q_diff_aab, q_diff_c; @@ -912,8 +912,50 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } } c = Madd_32_16( ONE_IN_Q27 /*1 Q27*/, L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_fx, ONE_IN_Q27 /*1 Q27*/ ), 5461 ); /*Diffuseness modellling nrg compensation*/ /* 1.0 / 6.0 = 5461 in Q15*/ /*Q27*/ + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop A <<<<<-|" );*/ + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop B <<<<<-|" ); +#ifdef FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot + FOR( ; k < num_freq_bands; k++ ) + { + a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses + move32(); + IF( reference_power[k + num_freq_bands] == 0 ) + { + sqr_inp = Mpy_32_32( diffuseness[k], c ); + sqr_exp = sub( 31 - 4, q_diffuseness ); + } + ELSE + { + IF( reference_power[k + ( ch_idx + 1 ) * num_freq_bands] == 0 ) + { + mpy_a_a_b = Mpy_32_32( a, a ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 + mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 + mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 + //q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q, add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); + q_diff_aab = sub( add( h_dirac_output_synthesis_state->direct_responses_q, add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ) ), 62 ); + q_diff_c = sub( q_diffuseness, 4 ); + + sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, sub( 31, q_diff_c ), mpy_diff_aab, sub( 31, q_diff_aab ), &sqr_exp ); /*q(31-sqr_exp)*/ + } + ELSE + { + b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], reference_power[k + ( ch_idx + 1 ) * num_freq_bands], &b_exp ); /*q(15-b_exp)*/ + + mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 + mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 + mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 + //q_diff_aab = add( add(h_dirac_output_synthesis_state->direct_responses_q , sub( sub( 15, b_exp ), 15 )), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); + q_diff_aab = add( sub( h_dirac_output_synthesis_state->direct_responses_q, b_exp ), ( sub( add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ), 62 ) ) ); + q_diff_c = sub( q_diffuseness, 4 ); - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF nfreqbds <<<<<<-|" ); + sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, sub( 31, q_diff_c ), mpy_diff_aab, sub( 31, q_diff_aab ), &sqr_exp ); /*q(31-sqr_exp)*/ + + } + } + sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ + sqr = L_shr( sqr, 2 ); /*Q(31-sqr_exp)*/ + } +#else FOR( ; k < num_freq_bands; k++ ) { a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses @@ -942,7 +984,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 - q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q + sub( sub( 15, b_exp ), 15 ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); + q_diff_aab = add( add( h_dirac_output_synthesis_state->direct_responses_q, sub( sub( 15, b_exp ), 15 ) ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); q_diff_c = sub( q_diffuseness, 4 ); test(); @@ -968,38 +1010,40 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ sqr = L_shr( sqr, 2 ); /*Q(31-sqr_exp)*/ - IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 ) + } +#endif + + IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 ) + { + IF( LT_16( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ) { - IF( LT_16( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ) - { - h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_shr( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, sub( 31, sqr_exp ) ) ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth->Q( 31- sqr_exp )*/ - move32(); - Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); - move16(); - } - ELSE - { - sqr = L_shr( sqr, sub( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ); /*Q(31- sqr_exp)->h_dirac_output_synthesis_state->q_cy_cross_dir_smooth*/ - Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = h_dirac_output_synthesis_state->q_cy_cross_dir_smooth; - move16(); - } - h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k]*/ + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_shr( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, sub( 31, sqr_exp ) ) ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth->Q( 31- sqr_exp )*/ move32(); + Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); + move16(); } ELSE { - h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q(31- sqr_exp)*/ - move32(); - Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); + sqr = L_shr( sqr, sub( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ); /*Q(31- sqr_exp)->h_dirac_output_synthesis_state->q_cy_cross_dir_smooth*/ + Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = h_dirac_output_synthesis_state->q_cy_cross_dir_smooth; move16(); } + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k]*/ + move32(); + } + ELSE + { + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q(31- sqr_exp)*/ + move32(); + Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); + move16(); } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF nfreqbds <<<<<<-|" );*/ - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop IF <<<<<-|" );*/ + + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop B <<<<<-|" );*/ } ELSE { - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE <<<<<-|" ); + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop C <<<<<-|" ); Word32 sqr_inp, mpy_diff, sqr; Word16 sqr_exp; /*Diffuseness modellling nrg compensation*/ @@ -1037,7 +1081,8 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE nfreqbds <<<<<<-|" ); + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop C <<<<<<-|" );*/ + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop D <<<<<<-|" ); FOR( ; k < num_freq_bands; k++ ) { mpy_diff = Mpy_32_32( diffuseness[k], L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_decorr_fx, ONE_IN_Q29 /*1 Q29*/ ) ); // Q = q_diffuseness - 1 @@ -1071,8 +1116,8 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE nfreqbds <<<<<<-|" );*/ - pop_wmops();/*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop ELSE <<<<<-|" );/*/ + + pop_wmops();/*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop D <<<<<-|" );/*/ } } pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop <<<<-|" );/*/ @@ -3066,24 +3111,38 @@ void ivas_dirac_dec_compute_directional_responses_fx( exp_direct_response_dir2 = 0; move16(); + push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse" ); IF( p_Rmat != 0 ) { + push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse_SPLIT_FX" ); ivas_dirac_dec_get_response_split_order_fx( azimuth[k], elevation[k], direct_response_hoa_fx, shd_rot_max_order, p_Rmat, &Q_direct_response_hoa ); IF( hodirac_flag ) { ivas_dirac_dec_get_response_split_order_fx( azimuth2[k], elevation2[k], direct_response_dir2_fx, shd_rot_max_order, p_Rmat, &Q_direct_response_dir2 ); } + pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse_SPLIT_FX" );*/ } ELSE { + push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse__FX" ); +#ifdef FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx + ivas_dirac_dec_get_response_fx_29( azimuth[k], elevation[k], direct_response_hoa_fx, hDirACRend->hOutSetup.ambisonics_order); +#else ivas_dirac_dec_get_response_fx( azimuth[k], elevation[k], direct_response_hoa_fx, hDirACRend->hOutSetup.ambisonics_order, Q_direct_response_hoa ); +#endif IF( hodirac_flag ) { +#ifdef FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx + ivas_dirac_dec_get_response_fx_29( azimuth2[k], elevation2[k], direct_response_dir2_fx, hDirACRend->hOutSetup.ambisonics_order); +#else ivas_dirac_dec_get_response_fx( azimuth2[k], elevation2[k], direct_response_dir2_fx, hDirACRend->hOutSetup.ambisonics_order, Q_direct_response_dir2 ); +#endif } + pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse__FX" );*/ } + pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse" );*/ test(); test(); @@ -3091,16 +3150,19 @@ void ivas_dirac_dec_compute_directional_responses_fx( test(); IF( masa_band_mapping == NULL && EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) { + push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- PATH1" ); mvr2r_inc_fixed( direct_response_hoa_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*Q_direct_response_hoa*/ IF( hodirac_flag ) { mvr2r_inc_fixed( direct_response_dir2_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k + hSpatParamRendCom->num_freq_bands * num_channels_dir], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*Q_direct_response_dir2*/ } + pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- PATH1" );*/ } ELSE IF( ( ( EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) && ( masa_band_mapping != NULL ) ) || EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_PSD_SHD ) || EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_MONO ) ) { + push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- PATH2" ); /* Synthesize the first direction */ IF( GT_16( Q_direct_response_hoa, Q29 ) ) { @@ -3347,6 +3409,7 @@ void ivas_dirac_dec_compute_directional_responses_fx( } mvr2r_inc_fixed( direct_response_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*q29*/ + pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- PATH2" );*/ } ELSE { -- GitLab From abc297aae68e774f96cdb5ac108997b539f289a4 Mon Sep 17 00:00:00 2001 From: Bauer Date: Thu, 20 Feb 2025 13:57:31 +0100 Subject: [PATCH 06/18] some more mods to FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot --- lib_rend/ivas_dirac_output_synthesis_dec.c | 56 ++++++++++++++++------ 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index f7d18b65d..d96165ba4 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -821,8 +821,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( Word16 k; IF( ch_idx != 0 ) { - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop A <<<<<-|" ); - ; Word32 a, c; Word16 b, b_exp, sqr_exp, q_diff_aab, q_diff_c; Word32 mpy_a_a_b, mpy_diff_c, mpy_diff_aab; @@ -912,7 +910,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } } c = Madd_32_16( ONE_IN_Q27 /*1 Q27*/, L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_fx, ONE_IN_Q27 /*1 Q27*/ ), 5461 ); /*Diffuseness modellling nrg compensation*/ /* 1.0 / 6.0 = 5461 in Q15*/ /*Q27*/ - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop A <<<<<-|" );*/ + push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop B <<<<<-|" ); #ifdef FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot FOR( ; k < num_freq_bands; k++ ) @@ -926,16 +924,20 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } ELSE { + Word16 diff_c_exp; + Word16 diff_aab_exp; IF( reference_power[k + ( ch_idx + 1 ) * num_freq_bands] == 0 ) { mpy_a_a_b = Mpy_32_32( a, a ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 //q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q, add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); - q_diff_aab = sub( add( h_dirac_output_synthesis_state->direct_responses_q, add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ) ), 62 ); - q_diff_c = sub( q_diffuseness, 4 ); + //q_diff_aab = sub( add( h_dirac_output_synthesis_state->direct_responses_q, add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ) ), 62 ); + diff_aab_exp = sub( 31 + 62, add( h_dirac_output_synthesis_state->direct_responses_q, add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ) ) ); + //q_diff_c = sub( q_diffuseness, 4 ); + diff_c_exp = sub( 31 + 4, q_diffuseness ); - sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, sub( 31, q_diff_c ), mpy_diff_aab, sub( 31, q_diff_aab ), &sqr_exp ); /*q(31-sqr_exp)*/ + sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, diff_c_exp, mpy_diff_aab, diff_aab_exp, &sqr_exp ); /*q(31-sqr_exp)*/ } ELSE { @@ -945,16 +947,44 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 //q_diff_aab = add( add(h_dirac_output_synthesis_state->direct_responses_q , sub( sub( 15, b_exp ), 15 )), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); - q_diff_aab = add( sub( h_dirac_output_synthesis_state->direct_responses_q, b_exp ), ( sub( add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ), 62 ) ) ); - q_diff_c = sub( q_diffuseness, 4 ); + //q_diff_aab = add( sub( h_dirac_output_synthesis_state->direct_responses_q, b_exp ), ( sub( add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ), 62 ) ) ); + diff_aab_exp = sub( sub( add( sub( 31 + 62, h_dirac_output_synthesis_state->direct_responses_q ), b_exp ), h_dirac_output_synthesis_state->direct_responses_q ), q_diffuseness ); + //q_diff_c = sub( q_diffuseness, 4 ); + diff_c_exp = sub( 31 + 4, q_diffuseness ); - sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, sub( 31, q_diff_c ), mpy_diff_aab, sub( 31, q_diff_aab ), &sqr_exp ); /*q(31-sqr_exp)*/ + sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, diff_c_exp, mpy_diff_aab, diff_aab_exp, &sqr_exp ); /*q(31-sqr_exp)*/ } } sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ sqr = L_shr( sqr, 2 ); /*Q(31-sqr_exp)*/ } + + IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 ) + { + IF( LT_16( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ) + { + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_shr( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, sub( 31, sqr_exp ) ) ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth->Q( 31- sqr_exp )*/ + move32(); + Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); + move16(); + } + ELSE + { + sqr = L_shr( sqr, sub( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ); /*Q(31- sqr_exp)->h_dirac_output_synthesis_state->q_cy_cross_dir_smooth*/ + Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = h_dirac_output_synthesis_state->q_cy_cross_dir_smooth; + move16(); + } + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k]*/ + move32(); + } + ELSE + { + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q(31- sqr_exp)*/ + move32(); + Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); + move16(); + } #else FOR( ; k < num_freq_bands; k++ ) { @@ -1011,7 +1041,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ sqr = L_shr( sqr, 2 ); /*Q(31-sqr_exp)*/ } -#endif IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 ) { @@ -1038,12 +1067,13 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); move16(); } +#endif + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop B <<<<<-|" );*/ } ELSE { - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop C <<<<<-|" ); Word32 sqr_inp, mpy_diff, sqr; Word16 sqr_exp; /*Diffuseness modellling nrg compensation*/ @@ -1081,8 +1111,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop C <<<<<<-|" );*/ - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop D <<<<<<-|" ); FOR( ; k < num_freq_bands; k++ ) { mpy_diff = Mpy_32_32( diffuseness[k], L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_decorr_fx, ONE_IN_Q29 /*1 Q29*/ ) ); // Q = q_diffuseness - 1 @@ -1116,8 +1144,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } - - pop_wmops();/*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop D <<<<<-|" );/*/ } } pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop <<<<-|" );/*/ -- GitLab From 592784dde6f72cd513196a8066255e1b0a1d4be6 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Thu, 20 Feb 2025 14:52:16 +0000 Subject: [PATCH 07/18] Revert some unwanted changes --- lib_com/ivas_prot_fx.h | 2 +- lib_com/options.h | 1 - lib_dec/ivas_dirac_dec.c | 3 ++- lib_rend/ivas_dirac_output_synthesis_dec.c | 5 ++++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index 21583afe5..96a628408 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -5001,7 +5001,7 @@ void ivas_dirac_dec_get_response_fx_29( const Word16 elevation, Word32 *response_fx, /*Q_out*/ const Word16 ambisonics_order); - #endif +#endif void calculate_hodirac_sector_parameters_fx( DIRAC_ENC_HANDLE hDirAC, /* i : DirAC handle */ diff --git a/lib_com/options.h b/lib_com/options.h index 8fe969bb6..88bb82c2e 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -164,7 +164,6 @@ #define FIX_ISSUE_1247 #define NONBE_FIX_1087_OOB_SBA_DTX_RS /* VA: issue 1087: Extend the length of the buffer for MCT decoding to avoid out-of-bound writing in SBA SID bitrate switching decoding */ #define FIX_1285_DECODER_CRASH -#define FIX_MINOR_SVD_WMOPS_MR1010X /* FhG: Minor WMOPS tuning, bit-exact to previous version, saves about 8.2 WMOPS for MR1010 */ #define FIX_1072_SPEEDUP_gainpanning /* FhG: Minor WMOPS tuning, in development*/ #define FIX_1072_SPEEDUP_COMPUTEDIFUSENESS /* "-" */ diff --git a/lib_dec/ivas_dirac_dec.c b/lib_dec/ivas_dirac_dec.c index 318a3ec55..13ba3479b 100644 --- a/lib_dec/ivas_dirac_dec.c +++ b/lib_dec/ivas_dirac_dec.c @@ -3595,6 +3595,7 @@ void ivas_dirac_dec_render_sf_fx( /*-----------------------------------------------------------------* * CLDFB synthesis (and binaural rendering) *-----------------------------------------------------------------*/ + index_slot = slot_idx_start_cldfb_synth; move16(); @@ -3976,7 +3977,6 @@ void ivas_dirac_dec_render_sf_fx( } } - hSpatParamRendCom->slots_rendered = add( hSpatParamRendCom->slots_rendered, hSpatParamRendCom->subframe_nbslots[subframe_idx] ); move16(); hSpatParamRendCom->subframes_rendered = add( hSpatParamRendCom->subframes_rendered, 1 ); @@ -4091,6 +4091,7 @@ void ivas_dirac_dec_render_sf_fx( } } } + pop_wmops(); /*push_wmops( "ivas_dirac_dec_render (IDR)" );*/ return; diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index d96165ba4..a620095a0 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -623,6 +623,7 @@ void ivas_dirac_dec_output_synthesis_close_fx( * * *------------------------------------------------------------------------*/ + void ivas_dirac_dec_output_synthesis_process_slot_fx( const Word32 *reference_power, /* i : Estimated power Q(q_reference_power)*/ const Word16 q_reference_power, /* i : Estimated power Q */ @@ -769,6 +770,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( sh_rot_max_order, p_Rmat, hodirac_flag ); + { IF( h_dirac_output_synthesis_state->direct_responses_square_fx ) { @@ -1011,9 +1013,11 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( b = BASOP_Util_Divide3232_Scale( reference_power[k + num_freq_bands], reference_power[k + ( ch_idx + 1 ) * num_freq_bands], &b_exp ); /*q(15-b_exp)*/ } } + mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 + q_diff_aab = add( add( h_dirac_output_synthesis_state->direct_responses_q, sub( sub( 15, b_exp ), 15 ) ), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); q_diff_c = sub( q_diffuseness, 4 ); @@ -1037,7 +1041,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( sqr_exp = sub( 31, q_diff_c ); /*q_diff_c*/ } } - sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ sqr = L_shr( sqr, 2 ); /*Q(31-sqr_exp)*/ } -- GitLab From 9c1092282d7f2d8635aec151f01ee76ff1640463 Mon Sep 17 00:00:00 2001 From: Bauer Date: Thu, 20 Feb 2025 15:57:09 +0100 Subject: [PATCH 08/18] applied clang patch --- lib_com/ivas_dirac_com.c | 7 ++-- lib_com/ivas_prot_fx.h | 2 +- lib_com/ivas_spar_com.c | 18 +++++------ lib_com/options.h | 4 +-- lib_dec/ivas_dirac_dec.c | 6 ++-- lib_rend/ivas_dirac_output_synthesis_dec.c | 37 ++++++++++------------ 6 files changed, 34 insertions(+), 40 deletions(-) diff --git a/lib_com/ivas_dirac_com.c b/lib_com/ivas_dirac_com.c index 97c8a88bd..d03191084 100644 --- a/lib_com/ivas_dirac_com.c +++ b/lib_com/ivas_dirac_com.c @@ -973,12 +973,12 @@ void computeDiffuseness_fixed( Word16 shift_q = sub( q_tmp, q_ene ); Word32 shiftEquiv; Word16 shift_qtotal; - if( shift_q < 0 ) + if ( shift_q < 0 ) { shiftEquiv = L_lshl( 0x80000000, shift_q ); shift_qtotal = sub( min_q_shift1, 0 ); } - if( shift_q >= 0 ) + if ( shift_q >= 0 ) { shiftEquiv = L_add( 0x7FFFFFFF, 0 ); shift_qtotal = sub( min_q_shift1, shift_q ); @@ -1012,7 +1012,6 @@ void computeDiffuseness_fixed( #endif - q_ene = s_min( q_ene, q_tmp ); /* Intensity slow */ @@ -1020,7 +1019,7 @@ void computeDiffuseness_fixed( shift_q = sub( q_tmp, q_intensity ); #ifdef FIX_1072_SPEEDUP_COMPUTEDIFUSENESS - if( shift_q >= 0 ) + if ( shift_q >= 0 ) { shiftEquiv = L_lshl( 0x7FFFFFFF, 0 ); shift_qtotal = sub( min_q_shift2, shift_q ); diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index 96a628408..95dde1650 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -5000,7 +5000,7 @@ void ivas_dirac_dec_get_response_fx_29( const Word16 azimuth, const Word16 elevation, Word32 *response_fx, /*Q_out*/ - const Word16 ambisonics_order); + const Word16 ambisonics_order ); #endif void calculate_hodirac_sector_parameters_fx( diff --git a/lib_com/ivas_spar_com.c b/lib_com/ivas_spar_com.c index cd5bab07c..c6194b8de 100644 --- a/lib_com/ivas_spar_com.c +++ b/lib_com/ivas_spar_com.c @@ -7178,7 +7178,7 @@ void ivas_dirac_dec_get_response_fx_29( const Word16 azimuth, const Word16 elevation, Word32 *response_fx, /*Q_out*/ - const Word16 ambisonics_order) + const Word16 ambisonics_order ) { Word16 index_azimuth, index_elevation; Word16 el, az; @@ -7188,18 +7188,18 @@ void ivas_dirac_dec_get_response_fx_29( Word32 c_fx_better; Word16 l, m; Word16 b, b1, b_2, b1_2; - //Word16 Q_out = 29; + // Word16 Q_out = 29; push_wmops( "ivas_dirac_dec_get_response_fx_29" ); index_azimuth = add( azimuth, 180 ) % 360; move16(); index_elevation = add( elevation, 90 ); - Word32 e_fac = L_add(0x7FFFFFFF, 0); + Word32 e_fac = L_add( 0x7FFFFFFF, 0 ); if ( GT_16( index_elevation, 90 ) ) { - e_fac = L_add(0x80000000, 0); + e_fac = L_add( 0x80000000, 0 ); } @@ -7248,15 +7248,15 @@ void ivas_dirac_dec_get_response_fx_29( sin_az_fx[2] = L_shl( Mpy_32_32( sin_1_fx, L_sub( cos_2_fx, ONE_IN_Q28 /*1/4 q30*/ ) ), 3 ); /*q30*/ move32(); - //response_fx[0] = L_shl_sat( 1, Q_out ); // Q_out + // response_fx[0] = L_shl_sat( 1, Q_out ); // Q_out response_fx[0] = 0x20000000; move32(); - //q_diff = sub( Q_out, 29 ); + // q_diff = sub( Q_out, 29 ); push_wmops( "ivas_dirac_dec_get_response_fx_29_LOOPS" ); -FOR( l = 1; l <= ambisonics_order; l++ ) + FOR( l = 1; l <= ambisonics_order; l++ ) { Word16 a; b_2 = imult1616( l, l ); @@ -7282,7 +7282,7 @@ FOR( l = 1; l <= ambisonics_order; l++ ) a = dirac_gains_P_idx[b]; c_fx_better = local_result_table[el][a]; // q30 move32(); - c_fx_better = Mpy_32_32( c_fx_better, e_fac ); // q30 + c_fx_better = Mpy_32_32( c_fx_better, e_fac ); // q30 response_fx[b] = Mpy_32_32( c_fx_better, sin_az_fx[l - m - 1] ); // Q_out move32(); @@ -7297,7 +7297,7 @@ FOR( l = 1; l <= ambisonics_order; l++ ) move32(); if ( s_and( l, 0x01 ) ) { - c_fx_better = Mpy_32_32( c_fx_better, e_fac ); // q30 + c_fx_better = Mpy_32_32( c_fx_better, e_fac ); // q30 } response_fx[b] = L_shl( c_fx_better, -1 ); // Q_out move32(); diff --git a/lib_com/options.h b/lib_com/options.h index 88bb82c2e..4ef49845c 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -56,7 +56,7 @@ #define SUPPORT_JBM_TRACEFILE /* Support for JBM tracefile, which is needed for 3GPP objective/subjective testing, but not relevant for real-world implementations */ -#define WMOPS /* Activate complexity and memory counters */ +//#define WMOPS /* Activate complexity and memory counters */ #ifdef WMOPS /*#define WMOPS_PER_FRAME*/ /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */ /*#define WMOPS_DETAIL*/ /* Output detailed complexity printout for every function. Increases runtime overhead */ @@ -171,4 +171,4 @@ #define FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, in development*/ #define FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, in development*/ -#endif \ No newline at end of file +#endif diff --git a/lib_dec/ivas_dirac_dec.c b/lib_dec/ivas_dirac_dec.c index 13ba3479b..77287dc76 100644 --- a/lib_dec/ivas_dirac_dec.c +++ b/lib_dec/ivas_dirac_dec.c @@ -2557,7 +2557,7 @@ void ivas_dirac_dec_render_sf_fx( p_Rmat_fx = 0; move32(); } - + IF( ( hDirAC->hConfig->dec_param_estim == FALSE ) ) { Word16 *masa_band_mapping; @@ -2926,7 +2926,7 @@ void ivas_dirac_dec_render_sf_fx( move16(); BREAK; default: - pop_wmops(); /* push_wmops( "ivas_dirac_dec_render (IDR)" );*/ + pop_wmops(); /* push_wmops( "ivas_dirac_dec_render (IDR)" );*/ pop_wmops(); /*push_wmops( "(IDR) LOOP1");/*/ return; } @@ -4091,7 +4091,7 @@ void ivas_dirac_dec_render_sf_fx( } } } - + pop_wmops(); /*push_wmops( "ivas_dirac_dec_render (IDR)" );*/ return; diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index a620095a0..bc4970bcc 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -770,7 +770,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( sh_rot_max_order, p_Rmat, hodirac_flag ); - + { IF( h_dirac_output_synthesis_state->direct_responses_square_fx ) { @@ -933,10 +933,10 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( mpy_a_a_b = Mpy_32_32( a, a ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 - //q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q, add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); - //q_diff_aab = sub( add( h_dirac_output_synthesis_state->direct_responses_q, add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ) ), 62 ); + // q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q, add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); + // q_diff_aab = sub( add( h_dirac_output_synthesis_state->direct_responses_q, add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ) ), 62 ); diff_aab_exp = sub( 31 + 62, add( h_dirac_output_synthesis_state->direct_responses_q, add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ) ) ); - //q_diff_c = sub( q_diffuseness, 4 ); + // q_diff_c = sub( q_diffuseness, 4 ); diff_c_exp = sub( 31 + 4, q_diffuseness ); sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, diff_c_exp, mpy_diff_aab, diff_aab_exp, &sqr_exp ); /*q(31-sqr_exp)*/ @@ -948,14 +948,13 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 - //q_diff_aab = add( add(h_dirac_output_synthesis_state->direct_responses_q , sub( sub( 15, b_exp ), 15 )), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); - //q_diff_aab = add( sub( h_dirac_output_synthesis_state->direct_responses_q, b_exp ), ( sub( add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ), 62 ) ) ); + // q_diff_aab = add( add(h_dirac_output_synthesis_state->direct_responses_q , sub( sub( 15, b_exp ), 15 )), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); + // q_diff_aab = add( sub( h_dirac_output_synthesis_state->direct_responses_q, b_exp ), ( sub( add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ), 62 ) ) ); diff_aab_exp = sub( sub( add( sub( 31 + 62, h_dirac_output_synthesis_state->direct_responses_q ), b_exp ), h_dirac_output_synthesis_state->direct_responses_q ), q_diffuseness ); - //q_diff_c = sub( q_diffuseness, 4 ); + // q_diff_c = sub( q_diffuseness, 4 ); diff_c_exp = sub( 31 + 4, q_diffuseness ); sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, diff_c_exp, mpy_diff_aab, diff_aab_exp, &sqr_exp ); /*q(31-sqr_exp)*/ - } } sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ @@ -1190,7 +1189,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth = temp_q; move16(); #endif - } #ifdef FIX_1072_SPEEDUP_gainpanning Word16 temp_q1 = sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, temp_q ); @@ -1200,13 +1198,13 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( { Word16 i; Word32 aux; - IF(temp_q1 < 0) + IF( temp_q1 < 0 ) { Word32 temp_q1_equiv = L_lshl( 0x80000000, temp_q1 ); FOR( i = 0; i < num_freq_bands; i++ ) { aux = Mpy_32_32( h_dirac_output_synthesis_state->direct_power_factor_fx[i], h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + i] ); - h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = Madd_32_32( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] , aux, temp_q1_equiv ); + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i] = Madd_32_32( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + i], aux, temp_q1_equiv ); move32(); } } @@ -1220,7 +1218,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move32(); } } - } ELSE { @@ -1231,8 +1228,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move32(); } } - - } #else @@ -1294,7 +1289,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( h_dirac_output_synthesis_state->direct_power_factor_q = 31; move16(); } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3<<<-|" );/*/ + pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3<<<-|" );/*/ } pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B <<-|" );/*/ @@ -3119,7 +3114,7 @@ void ivas_dirac_dec_compute_directional_responses_fx( hSpatParamRendCom->num_freq_bands, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*direct_response_q*/ - pop_wmops();/*push_wmops( "(IDR PATH3 B3.1) mvr2r_inc_fixed " );*/ + pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) mvr2r_inc_fixed " );*/ } ELSE { @@ -3156,7 +3151,7 @@ void ivas_dirac_dec_compute_directional_responses_fx( { push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse__FX" ); #ifdef FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx - ivas_dirac_dec_get_response_fx_29( azimuth[k], elevation[k], direct_response_hoa_fx, hDirACRend->hOutSetup.ambisonics_order); + ivas_dirac_dec_get_response_fx_29( azimuth[k], elevation[k], direct_response_hoa_fx, hDirACRend->hOutSetup.ambisonics_order ); #else ivas_dirac_dec_get_response_fx( azimuth[k], elevation[k], direct_response_hoa_fx, hDirACRend->hOutSetup.ambisonics_order, Q_direct_response_hoa ); #endif @@ -3164,7 +3159,7 @@ void ivas_dirac_dec_compute_directional_responses_fx( IF( hodirac_flag ) { #ifdef FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx - ivas_dirac_dec_get_response_fx_29( azimuth2[k], elevation2[k], direct_response_dir2_fx, hDirACRend->hOutSetup.ambisonics_order); + ivas_dirac_dec_get_response_fx_29( azimuth2[k], elevation2[k], direct_response_dir2_fx, hDirACRend->hOutSetup.ambisonics_order ); #else ivas_dirac_dec_get_response_fx( azimuth2[k], elevation2[k], direct_response_dir2_fx, hDirACRend->hOutSetup.ambisonics_order, Q_direct_response_dir2 ); #endif @@ -3438,7 +3433,7 @@ void ivas_dirac_dec_compute_directional_responses_fx( } mvr2r_inc_fixed( direct_response_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*q29*/ - pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- PATH2" );*/ + pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- PATH2" );*/ } ELSE { @@ -3700,13 +3695,13 @@ void ivas_dirac_dec_compute_directional_responses_fx( mvr2r_inc_fixed( direct_response_square_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_square_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*direct_response_square_q*/ mvr2r_inc_fixed( direct_response_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*direct_response_q*/ - pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING ELSE" );*/ + pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING ELSE" );*/ } ELSE { assert( 0 && "Not supported panning method!" ); } - pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING " );*/ + pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING " );*/ } } -- GitLab From d4452776da4bc08bbf1c56271369f398d11fb963 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Tue, 25 Feb 2025 10:41:19 +0000 Subject: [PATCH 09/18] undo some unintended changes - deactivate inDev-defines --- lib_com/options.h | 4 ++-- lib_rend/ivas_dirac_output_synthesis_dec.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index be20cf739..788eb80b9 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -172,7 +172,7 @@ #define FIX_1298 /* VA: fix possible assert in gaus_enc */ #define FIX_1300_ICA_SHIFT_QUANT_IMPROV /* VA: Fix to 1300 to improve precision of the lag quantizer */ -#define FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, in development*/ -#define FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, in development*/ +//#define FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, in development*/ +//#define FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, in development*/ #endif diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index b55a50afb..3556575fa 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -959,7 +959,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ sqr = L_shr( sqr, 2 ); /*Q(31-sqr_exp)*/ - } + IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 ) { @@ -986,6 +986,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); move16(); } + } #else FOR( ; k < num_freq_bands; k++ ) { @@ -1042,8 +1043,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ sqr = L_shr( sqr, 2 ); /*Q(31-sqr_exp)*/ - } - IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 ) { IF( LT_16( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ) @@ -1069,6 +1068,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); move16(); } + } #endif -- GitLab From bad5dd7ac52dbe6723eae97978a6f12d9e75757f Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Tue, 25 Feb 2025 11:44:59 +0100 Subject: [PATCH 10/18] applied clang format patch --- lib_rend/ivas_dirac_output_synthesis_dec.c | 74 +++++++++++----------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 3556575fa..5ac5b676b 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -959,33 +959,33 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ sqr = L_shr( sqr, 2 ); /*Q(31-sqr_exp)*/ - - IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 ) - { - IF( LT_16( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ) + + IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 ) { - h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_shr( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, sub( 31, sqr_exp ) ) ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth->Q( 31- sqr_exp )*/ + IF( LT_16( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ) + { + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_shr( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, sub( 31, sqr_exp ) ) ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth->Q( 31- sqr_exp )*/ + move32(); + Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); + move16(); + } + ELSE + { + sqr = L_shr( sqr, sub( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ); /*Q(31- sqr_exp)->h_dirac_output_synthesis_state->q_cy_cross_dir_smooth*/ + Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = h_dirac_output_synthesis_state->q_cy_cross_dir_smooth; + move16(); + } + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k]*/ move32(); - Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); - move16(); } ELSE { - sqr = L_shr( sqr, sub( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ); /*Q(31- sqr_exp)->h_dirac_output_synthesis_state->q_cy_cross_dir_smooth*/ - Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = h_dirac_output_synthesis_state->q_cy_cross_dir_smooth; + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q(31- sqr_exp)*/ + move32(); + Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); move16(); } - h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k]*/ - move32(); - } - ELSE - { - h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q(31- sqr_exp)*/ - move32(); - Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); - move16(); - } } #else FOR( ; k < num_freq_bands; k++ ) @@ -1043,31 +1043,31 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } sqr = Sqrt32( sqr_inp, &sqr_exp ); /*Q(31-sqr_exp)*/ sqr = L_shr( sqr, 2 ); /*Q(31-sqr_exp)*/ - IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 ) - { - IF( LT_16( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ) + IF( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] != 0 ) { - h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_shr( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, sub( 31, sqr_exp ) ) ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth->Q( 31- sqr_exp )*/ + IF( LT_16( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ) + { + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_shr( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sub( h_dirac_output_synthesis_state->q_cy_cross_dir_smooth, sub( 31, sqr_exp ) ) ); /*h_dirac_output_synthesis_state->q_cy_cross_dir_smooth->Q( 31- sqr_exp )*/ + move32(); + Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); + move16(); + } + ELSE + { + sqr = L_shr( sqr, sub( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ); /*Q(31- sqr_exp)->h_dirac_output_synthesis_state->q_cy_cross_dir_smooth*/ + Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = h_dirac_output_synthesis_state->q_cy_cross_dir_smooth; + move16(); + } + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k]*/ move32(); - Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); - move16(); } ELSE { - sqr = L_shr( sqr, sub( sub( 31, sqr_exp ), h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ); /*Q(31- sqr_exp)->h_dirac_output_synthesis_state->q_cy_cross_dir_smooth*/ - Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = h_dirac_output_synthesis_state->q_cy_cross_dir_smooth; + h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q(31- sqr_exp)*/ + move32(); + Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); move16(); } - h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k]*/ - move32(); - } - ELSE - { - h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = L_add( h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k], sqr ); /*Q(31- sqr_exp)*/ - move32(); - Q_temp_cy_cross_dir_smooth_fx[ch_idx * num_freq_bands + k] = sub( 31, sqr_exp ); - move16(); - } } #endif -- GitLab From 5fd61388a3eef1e1de4b9e97f0f2b3fe1e02a017 Mon Sep 17 00:00:00 2001 From: ber Date: Tue, 25 Feb 2025 12:01:48 +0100 Subject: [PATCH 11/18] fixed builderror --- lib_rend/ivas_dirac_output_synthesis_dec.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 5ac5b676b..1bbecc555 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -1248,8 +1248,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( &h_dirac_output_synthesis_state->cy_cross_dir_smooth_fx[ch_idx * num_freq_bands], num_freq_bands, 0 ); /*Q(h_dirac_output_synthesis_state->q_cy_cross_dir_smooth)*/ } -#endif - #endif pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 gainpanning <<<<-|" );*/ -- GitLab From f854c6a7ad1d22159d570a7a0761bae6461a2c41 Mon Sep 17 00:00:00 2001 From: ber Date: Tue, 25 Feb 2025 13:06:40 +0100 Subject: [PATCH 12/18] activate inDev macros --- lib_com/options.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index 788eb80b9..be20cf739 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -172,7 +172,7 @@ #define FIX_1298 /* VA: fix possible assert in gaus_enc */ #define FIX_1300_ICA_SHIFT_QUANT_IMPROV /* VA: Fix to 1300 to improve precision of the lag quantizer */ -//#define FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, in development*/ -//#define FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, in development*/ +#define FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, in development*/ +#define FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, in development*/ #endif -- GitLab From 2b317eb444c38f6676598ef15e37554784883a2f Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Tue, 25 Feb 2025 15:20:53 +0100 Subject: [PATCH 13/18] activated inDev macros --- lib_com/options.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib_com/options.h b/lib_com/options.h index be20cf739..8d464ccef 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -56,7 +56,7 @@ #define SUPPORT_JBM_TRACEFILE /* Support for JBM tracefile, which is needed for 3GPP objective/subjective testing, but not relevant for real-world implementations */ -//#define WMOPS /* Activate complexity and memory counters */ +#define WMOPS /* Activate complexity and memory counters */ #ifdef WMOPS /*#define WMOPS_PER_FRAME*/ /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */ /*#define WMOPS_DETAIL*/ /* Output detailed complexity printout for every function. Increases runtime overhead */ @@ -172,7 +172,8 @@ #define FIX_1298 /* VA: fix possible assert in gaus_enc */ #define FIX_1300_ICA_SHIFT_QUANT_IMPROV /* VA: Fix to 1300 to improve precision of the lag quantizer */ -#define FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, in development*/ -#define FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, in development*/ +#define FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, in development*/ // -1.5 WMOPS +#define FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, in development*/ // -1 WMOPS + #endif -- GitLab From da62e16d5fe1e91d6895d3e1d99b694e3a63c4bb Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 26 Feb 2025 13:25:08 +0100 Subject: [PATCH 14/18] some cleaning --- lib_com/ivas_prot_fx.h | 5 +++-- lib_com/ivas_spar_com.c | 5 +++-- lib_com/options.h | 7 +++---- lib_rend/ivas_dirac_output_synthesis_dec.c | 12 +++--------- 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index 95dde1650..debac9a73 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -4995,11 +4995,12 @@ void ivas_dirac_dec_get_response_fx( const Word16 ambisonics_order, Word16 Q_out ); -#ifdef FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx +#ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx +/*This is a derivate to ivas_dirac_dec_get_response_fx with fixed Q_out=29*/ void ivas_dirac_dec_get_response_fx_29( const Word16 azimuth, const Word16 elevation, - Word32 *response_fx, /*Q_out*/ + Word32 *response_fx, /*Q_out=29*/ const Word16 ambisonics_order ); #endif diff --git a/lib_com/ivas_spar_com.c b/lib_com/ivas_spar_com.c index c6194b8de..9ef094977 100644 --- a/lib_com/ivas_spar_com.c +++ b/lib_com/ivas_spar_com.c @@ -7173,7 +7173,8 @@ void ivas_dirac_dec_get_response_fx( return; } -#ifdef FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx +#ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx +/*This is a derivate to ivas_dirac_dec_get_response_fx with fixed Q_out=29*/ void ivas_dirac_dec_get_response_fx_29( const Word16 azimuth, const Word16 elevation, @@ -7307,7 +7308,7 @@ void ivas_dirac_dec_get_response_fx_29( pop_wmops(); /*push_wmops( "ivas_dirac_dec_get_response_fx_29" );*/ return; } -#endif /*FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx*/ +#endif /*FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx*/ /*-----------------------------------------------------------------------------------------* * Function ivas_get_bits_to_encode * diff --git a/lib_com/options.h b/lib_com/options.h index 8d464ccef..68914fa45 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -56,7 +56,7 @@ #define SUPPORT_JBM_TRACEFILE /* Support for JBM tracefile, which is needed for 3GPP objective/subjective testing, but not relevant for real-world implementations */ -#define WMOPS /* Activate complexity and memory counters */ +//#define WMOPS /* Activate complexity and memory counters */ #ifdef WMOPS /*#define WMOPS_PER_FRAME*/ /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */ /*#define WMOPS_DETAIL*/ /* Output detailed complexity printout for every function. Increases runtime overhead */ @@ -171,9 +171,8 @@ #define FIX_1297_OVERFLOW /* VA: fixes issue with overflows in pre-processing */ #define FIX_1298 /* VA: fix possible assert in gaus_enc */ #define FIX_1300_ICA_SHIFT_QUANT_IMPROV /* VA: Fix to 1300 to improve precision of the lag quantizer */ - -#define FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, in development*/ // -1.5 WMOPS -#define FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, in development*/ // -1 WMOPS +#define FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, nonbe*/ +#define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, nonbe*/ #endif diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 1bbecc555..68ca49ad9 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -914,7 +914,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( c = Madd_32_16( ONE_IN_Q27 /*1 Q27*/, L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_fx, ONE_IN_Q27 /*1 Q27*/ ), 5461 ); /*Diffuseness modellling nrg compensation*/ /* 1.0 / 6.0 = 5461 in Q15*/ /*Q27*/ push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop B <<<<<-|" ); -#ifdef FIX1072_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot +#ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot FOR( ; k < num_freq_bands; k++ ) { a = h_dirac_output_synthesis_state->direct_responses_fx[ch_idx * num_freq_bands + k]; // Q = h_dirac_output_synthesis_state->q_direct_responses @@ -933,10 +933,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( mpy_a_a_b = Mpy_32_32( a, a ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 - // q_diff_aab = add( h_dirac_output_synthesis_state->direct_responses_q, add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); - // q_diff_aab = sub( add( h_dirac_output_synthesis_state->direct_responses_q, add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ) ), 62 ); diff_aab_exp = sub( 31 + 62, add( h_dirac_output_synthesis_state->direct_responses_q, add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ) ) ); - // q_diff_c = sub( q_diffuseness, 4 ); diff_c_exp = sub( 31 + 4, q_diffuseness ); sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, diff_c_exp, mpy_diff_aab, diff_aab_exp, &sqr_exp ); /*q(31-sqr_exp)*/ @@ -948,10 +945,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( mpy_a_a_b = Mpy_32_32( a, Mpy_32_16_1( a, b ) ); // Q = (h_dirac_output_synthesis_state->q_direct_responses + (15 - b_exp) - 15) + (h_dirac_output_synthesis_state->q_direct_responses) - 31 mpy_diff_aab = Mpy_32_32( L_sub( L_shl( 1, q_diffuseness ), diffuseness[k] ), mpy_a_a_b ); // Q = 2*(h_dirac_output_synthesis_state->q_direct_responses) - b_exp - 31 + q_diffuseness -31 mpy_diff_c = Mpy_32_32( diffuseness[k], c ); // Q = q_diffuseness - 4 - // q_diff_aab = add( add(h_dirac_output_synthesis_state->direct_responses_q , sub( sub( 15, b_exp ), 15 )), add( sub( h_dirac_output_synthesis_state->direct_responses_q, 31 ), sub( q_diffuseness, 31 ) ) ); - // q_diff_aab = add( sub( h_dirac_output_synthesis_state->direct_responses_q, b_exp ), ( sub( add( h_dirac_output_synthesis_state->direct_responses_q, q_diffuseness ), 62 ) ) ); diff_aab_exp = sub( sub( add( sub( 31 + 62, h_dirac_output_synthesis_state->direct_responses_q ), b_exp ), h_dirac_output_synthesis_state->direct_responses_q ), q_diffuseness ); - // q_diff_c = sub( q_diffuseness, 4 ); diff_c_exp = sub( 31 + 4, q_diffuseness ); sqr_inp = BASOP_Util_Add_Mant32Exp( mpy_diff_c, diff_c_exp, mpy_diff_aab, diff_aab_exp, &sqr_exp ); /*q(31-sqr_exp)*/ @@ -3149,7 +3143,7 @@ void ivas_dirac_dec_compute_directional_responses_fx( ELSE { push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse__FX" ); -#ifdef FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx +#ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx ivas_dirac_dec_get_response_fx_29( azimuth[k], elevation[k], direct_response_hoa_fx, hDirACRend->hOutSetup.ambisonics_order ); #else ivas_dirac_dec_get_response_fx( azimuth[k], elevation[k], direct_response_hoa_fx, hDirACRend->hOutSetup.ambisonics_order, Q_direct_response_hoa ); @@ -3157,7 +3151,7 @@ void ivas_dirac_dec_compute_directional_responses_fx( IF( hodirac_flag ) { -#ifdef FIX_1072_SPEEDUP_ivas_dirac_dec_get_response_fx +#ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx ivas_dirac_dec_get_response_fx_29( azimuth2[k], elevation2[k], direct_response_dir2_fx, hDirACRend->hOutSetup.ambisonics_order ); #else ivas_dirac_dec_get_response_fx( azimuth2[k], elevation2[k], direct_response_dir2_fx, hDirACRend->hOutSetup.ambisonics_order, Q_direct_response_dir2 ); -- GitLab From 67e59adf10ae3f19b1a1e4426bef5462236d91ee Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Wed, 26 Feb 2025 12:29:47 +0000 Subject: [PATCH 15/18] Del push/pop wmops --- lib_com/ivas_dirac_com.c | 2 - lib_com/ivas_spar_com.c | 9 ----- lib_com/options.h | 4 +- lib_dec/ivas_dirac_dec.c | 18 +-------- lib_rend/ivas_dirac_output_synthesis_dec.c | 45 +--------------------- 5 files changed, 5 insertions(+), 73 deletions(-) diff --git a/lib_com/ivas_dirac_com.c b/lib_com/ivas_dirac_com.c index 43cffea7c..2704c1f08 100644 --- a/lib_com/ivas_dirac_com.c +++ b/lib_com/ivas_dirac_com.c @@ -960,7 +960,6 @@ void computeDiffuseness_fixed( q_intensity = add( q_factor_intensity[0], min_q_shift2 ); move16(); - push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness B <<-|" ); FOR( i = 0; i < DIRAC_NO_COL_AVG_DIFF; ++i ) { /* Energy slow */ @@ -1070,7 +1069,6 @@ void computeDiffuseness_fixed( q_intensity = s_min( q_intensity, q_tmp ); } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness B <<-|" );/*/ min_q_shift1 = getScaleFactor32( intensity_slow, i_mult( DIRAC_NUM_DIMS, num_freq_bands ) ); min_q_shift1 = sub( min_q_shift1, idiv1616( add( find_guarded_bits_fx( DIRAC_NUM_DIMS ), 1 ), 2 ) ); diff --git a/lib_com/ivas_spar_com.c b/lib_com/ivas_spar_com.c index 9ef094977..dc23ad8a0 100644 --- a/lib_com/ivas_spar_com.c +++ b/lib_com/ivas_spar_com.c @@ -7191,7 +7191,6 @@ void ivas_dirac_dec_get_response_fx_29( Word16 b, b1, b_2, b1_2; // Word16 Q_out = 29; - push_wmops( "ivas_dirac_dec_get_response_fx_29" ); index_azimuth = add( azimuth, 180 ) % 360; move16(); index_elevation = add( elevation, 90 ); @@ -7249,14 +7248,9 @@ void ivas_dirac_dec_get_response_fx_29( sin_az_fx[2] = L_shl( Mpy_32_32( sin_1_fx, L_sub( cos_2_fx, ONE_IN_Q28 /*1/4 q30*/ ) ), 3 ); /*q30*/ move32(); - // response_fx[0] = L_shl_sat( 1, Q_out ); // Q_out response_fx[0] = 0x20000000; move32(); - // q_diff = sub( Q_out, 29 ); - - push_wmops( "ivas_dirac_dec_get_response_fx_29_LOOPS" ); - FOR( l = 1; l <= ambisonics_order; l++ ) { Word16 a; @@ -7303,9 +7297,6 @@ void ivas_dirac_dec_get_response_fx_29( response_fx[b] = L_shl( c_fx_better, -1 ); // Q_out move32(); } - - pop_wmops(); /*push_wmops( "ivas_dirac_dec_get_response_fx_29_LOOPS" );*/ - pop_wmops(); /*push_wmops( "ivas_dirac_dec_get_response_fx_29" );*/ return; } #endif /*FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx*/ diff --git a/lib_com/options.h b/lib_com/options.h index 68914fa45..19b588da3 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -56,7 +56,7 @@ #define SUPPORT_JBM_TRACEFILE /* Support for JBM tracefile, which is needed for 3GPP objective/subjective testing, but not relevant for real-world implementations */ -//#define WMOPS /* Activate complexity and memory counters */ +/*#define WMOPS*/ /* Activate complexity and memory counters */ #ifdef WMOPS /*#define WMOPS_PER_FRAME*/ /* Output per-frame complexity (writes one float value per frame to the file "wmops_analysis") */ /*#define WMOPS_DETAIL*/ /* Output detailed complexity printout for every function. Increases runtime overhead */ @@ -173,6 +173,4 @@ #define FIX_1300_ICA_SHIFT_QUANT_IMPROV /* VA: Fix to 1300 to improve precision of the lag quantizer */ #define FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx /*FhG: WMOPS tuning, nonbe*/ #define FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot /*FhG: WMOPS tuning, nonbe*/ - - #endif diff --git a/lib_dec/ivas_dirac_dec.c b/lib_dec/ivas_dirac_dec.c index 77287dc76..95cca12a0 100644 --- a/lib_dec/ivas_dirac_dec.c +++ b/lib_dec/ivas_dirac_dec.c @@ -2231,7 +2231,7 @@ void ivas_dirac_dec_render_sf_fx( move16(); Word16 tmp1; - push_wmops( "ivas_dirac_dec_render (IDR)" ); + push_wmops( "ivas_dirac_dec_render" ); /* Initialize aux buffers */ hDirAC = st_ivas->hDirAC; @@ -2341,7 +2341,6 @@ void ivas_dirac_dec_render_sf_fx( } ELSE IF( !( EQ_16( st_ivas->ivas_format, SBA_FORMAT ) || EQ_16( st_ivas->ivas_format, SBA_ISM_FORMAT ) ) ) { - push_wmops( "(IDR) SBA_FORMAT | SBA_ISM_FORMAT" ); Word16 outchannels; idx_lfe = 0; move16(); @@ -2410,7 +2409,6 @@ void ivas_dirac_dec_render_sf_fx( } } } - pop_wmops(); /*push_wmops( "(IDR) SBA_FORMAT | SBA_ISM_FORMAT" );*/ } size = imult1616( hDirACRend->num_outputs_dir, hSpatParamRendCom->num_freq_bands ); @@ -2708,7 +2706,6 @@ void ivas_dirac_dec_render_sf_fx( } } - push_wmops( "(IDR) LOOP1" ); FOR( slot_idx = 0; slot_idx < hSpatParamRendCom->subframe_nbslots[subframe_idx]; slot_idx++ ) { index_slot = add( slot_idx_start, slot_idx ); @@ -2926,8 +2923,6 @@ void ivas_dirac_dec_render_sf_fx( move16(); BREAK; default: - pop_wmops(); /* push_wmops( "ivas_dirac_dec_render (IDR)" );*/ - pop_wmops(); /*push_wmops( "(IDR) LOOP1");/*/ return; } q_proto_direct_buffer[slot_idx] = hDirACRend->h_output_synthesis_psd_state.proto_direct_buffer_f_q; @@ -2937,7 +2932,6 @@ void ivas_dirac_dec_render_sf_fx( /*-----------------------------------------------------------------* * Compute DirAC parameters at decoder side *-----------------------------------------------------------------*/ - push_wmops( "(IDR) LOOP1 DirACparams |" ); IF( EQ_16( hDirAC->hConfig->dec_param_estim, TRUE ) ) { Copy( &hSpatParamRendCom->azimuth[md_idx][hDirAC->hConfig->enc_param_start_band], &azimuth[hDirAC->hConfig->enc_param_start_band], sub( hSpatParamRendCom->num_freq_bands, hDirAC->hConfig->enc_param_start_band ) ); @@ -2986,11 +2980,8 @@ void ivas_dirac_dec_render_sf_fx( hDirACRend->q_buffer_energy[index - 1] = DirAC_mem.reference_power_q; move16(); - push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness <-|" ); computeDiffuseness_fixed( hDirACRend->buffer_intensity_real_fx, hDirACRend->buffer_energy_fx, num_freq_bands, hSpatParamRendCom->diffuseness_vector_fx[md_idx], hDirACRend->q_buffer_intensity_real, hDirACRend->q_buffer_energy, &hSpatParamRendCom->q_diffuseness_vector ); - pop_wmops(); /*push_wmops( "(IDR) LOOP1 DirACparams computeDiffuseness <-|" );/*/ } - pop_wmops(); /* push_wmops( "(IDR) LOOP1 DirACparams |" );*/ /*-----------------------------------------------------------------* * frequency domain decorrelation @@ -3092,7 +3083,6 @@ void ivas_dirac_dec_render_sf_fx( } /*Compute PSDs*/ - push_wmops( "(IDR) LOOP1 PSDs |" ); h_dirac_output_synthesis_params = &( hDirACRend->h_output_synthesis_psd_params ); h_dirac_output_synthesis_state = &( hDirACRend->h_output_synthesis_psd_state ); num_channels_dir = hDirACRend->num_outputs_dir; @@ -3175,7 +3165,6 @@ void ivas_dirac_dec_render_sf_fx( } ELSE { - push_wmops( "(IDR) LOOP1 PSDs PATH3 <-|" ); ivas_dirac_dec_output_synthesis_process_slot_fx( reference_power_fx, DirAC_mem.reference_power_q, p_onset_filter_fx, @@ -3193,7 +3182,6 @@ void ivas_dirac_dec_render_sf_fx( md_idx, hodirac_flag, hDirAC->hConfig->dec_param_estim ); - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 <-|" );/*/ } IF( hDirAC->hConfig->dec_param_estim ) @@ -3264,9 +3252,7 @@ void ivas_dirac_dec_render_sf_fx( v_add_fixed( reference_power_fx, reference_power_smooth_fx, reference_power_smooth_fx, hSpatParamRendCom->num_freq_bands, 1 ); q_reference_power_smooth = sub( q_reference_power_smooth, 1 ); } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs |" );*/ } - pop_wmops(); /*push_wmops( "(IDR) LOOP1" );*/ minimum_s( q_proto_direct_buffer, hSpatParamRendCom->subframe_nbslots[subframe_idx], &hDirACRend->h_output_synthesis_psd_state.proto_direct_buffer_f_q ); IF( EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) @@ -4092,7 +4078,7 @@ void ivas_dirac_dec_render_sf_fx( } } - pop_wmops(); /*push_wmops( "ivas_dirac_dec_render (IDR)" );*/ + pop_wmops(); return; } diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 68ca49ad9..c0327f3af 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -710,13 +710,11 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } } - push_wmops( "(IDR) LOOP1 PSDs PATH3 B <<-|" ); test(); IF( dec_param_estim == FALSE && hodirac_flag ) { IF( EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) { - push_wmops( "(IDR) LOOP1 PSDs PATH3 B1<<<-|" ); v_multc_fixed( hSpatParamRendCom->energy_ratio1_fx[md_idx], -MAX_32 /*-1 Q31*/, aux_buf, num_freq_bands ); /* 30 + 31 - 31 -> 30 */ v_addc_fixed( aux_buf, ONE_IN_Q30 /*1 Q30*/, aux_buf, num_freq_bands ); /*30*/ Copy32( hSpatParamRendCom->energy_ratio1_fx[md_idx], @@ -739,23 +737,19 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); h_dirac_output_synthesis_state->direct_power_factor_q = 30; move16(); - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B1<<<-|" );/*/ } ELSE { - push_wmops( "(IDR) LOOP1 PSDs PATH3 B2<<<-|" ); ivas_dirac_dec_compute_gain_factors_fx( num_freq_bands, hSpatParamRendCom->diffuseness_vector_fx[md_idx], h_dirac_output_synthesis_state->direct_power_factor_fx, h_dirac_output_synthesis_state->diffuse_power_factor_fx, &h_dirac_output_synthesis_state->direct_power_factor_q, &h_dirac_output_synthesis_state->diffuse_power_factor_q ); - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B2<<<-|" );*/ } } ELSE IF( EQ_16( dec_param_estim, TRUE ) ) { - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3<<<-|" ); /* compute direct responses */ ivas_dirac_dec_compute_directional_responses_fx( hSpatParamRendCom, hDirACRend, @@ -817,7 +811,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( Q_temp_cy_cross_dir_smooth_fx[kk] = h_dirac_output_synthesis_state->q_cy_cross_dir_smooth; move16(); } - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop <<<<-|" ); + FOR( ch_idx = 0; ch_idx < s_min( 4, nchan_transport ); ch_idx++ ) { Word16 k; @@ -912,8 +906,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } } c = Madd_32_16( ONE_IN_Q27 /*1 Q27*/, L_sub( h_dirac_output_synthesis_params->diffuse_compensation_factor_fx, ONE_IN_Q27 /*1 Q27*/ ), 5461 ); /*Diffuseness modellling nrg compensation*/ /* 1.0 / 6.0 = 5461 in Q15*/ /*Q27*/ - - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop B <<<<<-|" ); #ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_output_synthesis_process_slot FOR( ; k < num_freq_bands; k++ ) { @@ -1064,9 +1056,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } } #endif - - - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop B <<<<<-|" );*/ } ELSE { @@ -1142,8 +1131,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } } } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 bigloop <<<<-|" );/*/ - Word16 temp = MAX_16; /*q0*/ move16(); tmp16 = imult1616( num_freq_bands, num_channels_dir ); @@ -1160,8 +1147,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( } free( Q_temp_cy_cross_dir_smooth_fx ); /*Directional gain (panning)*/ - push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 gainpanning <<<<-|" ); - Word16 temp_q = sub( add( h_dirac_output_synthesis_state->direct_power_factor_q, h_dirac_output_synthesis_state->direct_responses_q ), 31 ); IF( LT_16( temp_q, h_dirac_output_synthesis_state->q_cy_cross_dir_smooth ) ) { @@ -1243,7 +1228,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( num_freq_bands, 0 ); /*Q(h_dirac_output_synthesis_state->q_cy_cross_dir_smooth)*/ } #endif - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3.3 gainpanning <<<<-|" );*/ /*Diffuse gain*/ FOR( ch_idx = s_min( 4, nchan_transport ); ch_idx < num_channels_diff; ch_idx++ ) @@ -1262,8 +1246,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( &h_dirac_output_synthesis_state->cy_auto_diff_smooth_fx[ch_idx * num_freq_bands_diff], num_freq_bands_diff, 0 ); /*h_dirac_output_synthesis_state->q_cy_auto_diff_smooth*/ } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3 <<-|" );/*/ - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B <<-|" );/*/ + return; } ELSE @@ -1282,9 +1265,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( h_dirac_output_synthesis_state->direct_power_factor_q = 31; move16(); } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B3<<<-|" );/*/ } - pop_wmops(); /*push_wmops( "(IDR) LOOP1 PSDs PATH3 B <<-|" );/*/ diff_start_band = 0; move16(); @@ -3032,8 +3013,6 @@ void ivas_dirac_dec_compute_directional_responses_fx( Word16 dipole_freq_range[2]; MASA_TRANSPORT_SIGNAL_TYPE transport_signal_type; - push_wmops( "(IDR PATH3 B3.1)" ); - Q_direct_response_ls = Q31; move16(); exp_direct_response_ls = 0; @@ -3095,7 +3074,6 @@ void ivas_dirac_dec_compute_directional_responses_fx( LT_16( k, MASA_band_grouping_24[masa_band_mapping[add( codingBand, 1 )]] ) && NE_16( k, hDirACRend->h_output_synthesis_psd_params.max_band_decorr ) ) { - push_wmops( "(IDR PATH3 B3.1) mvr2r_inc_fixed " ); /* Panning gains have to be computed only for the first bin of the coding band in MASA, for other bins the previous values can be used */ IF( NE_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) { @@ -3107,15 +3085,12 @@ void ivas_dirac_dec_compute_directional_responses_fx( hSpatParamRendCom->num_freq_bands, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*direct_response_q*/ - pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) mvr2r_inc_fixed " );*/ } ELSE { - push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING " ); /* HOA3 PANNING */ IF( EQ_16( hDirACRend->panningConf, DIRAC_PANNING_HOA3 ) ) { - push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF" ); set32_fx( direct_response_hoa_fx, ONE_IN_Q29, MAX_OUTPUT_CHANNELS ); /*q29*/ set32_fx( direct_response_dir2_fx, ONE_IN_Q29, MAX_OUTPUT_CHANNELS ); /*q29*/ @@ -3128,21 +3103,17 @@ void ivas_dirac_dec_compute_directional_responses_fx( exp_direct_response_dir2 = 0; move16(); - push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse" ); IF( p_Rmat != 0 ) { - push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse_SPLIT_FX" ); ivas_dirac_dec_get_response_split_order_fx( azimuth[k], elevation[k], direct_response_hoa_fx, shd_rot_max_order, p_Rmat, &Q_direct_response_hoa ); IF( hodirac_flag ) { ivas_dirac_dec_get_response_split_order_fx( azimuth2[k], elevation2[k], direct_response_dir2_fx, shd_rot_max_order, p_Rmat, &Q_direct_response_dir2 ); } - pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse_SPLIT_FX" );*/ } ELSE { - push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse__FX" ); #ifdef FIX_1310_SPEEDUP_ivas_dirac_dec_get_response_fx ivas_dirac_dec_get_response_fx_29( azimuth[k], elevation[k], direct_response_hoa_fx, hDirACRend->hOutSetup.ambisonics_order ); #else @@ -3157,9 +3128,7 @@ void ivas_dirac_dec_compute_directional_responses_fx( ivas_dirac_dec_get_response_fx( azimuth2[k], elevation2[k], direct_response_dir2_fx, hDirACRend->hOutSetup.ambisonics_order, Q_direct_response_dir2 ); #endif } - pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse__FX" );*/ } - pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- getResponse" );*/ test(); test(); @@ -3167,19 +3136,16 @@ void ivas_dirac_dec_compute_directional_responses_fx( test(); IF( masa_band_mapping == NULL && EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) { - push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- PATH1" ); mvr2r_inc_fixed( direct_response_hoa_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*Q_direct_response_hoa*/ IF( hodirac_flag ) { mvr2r_inc_fixed( direct_response_dir2_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k + hSpatParamRendCom->num_freq_bands * num_channels_dir], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*Q_direct_response_dir2*/ } - pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- PATH1" );*/ } ELSE IF( ( ( EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_GAIN_SHD ) ) && ( masa_band_mapping != NULL ) ) || EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_PSD_SHD ) || EQ_16( hDirACRend->synthesisConf, DIRAC_SYNTHESIS_MONO ) ) { - push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- PATH2" ); /* Synthesize the first direction */ IF( GT_16( Q_direct_response_hoa, Q29 ) ) { @@ -3426,17 +3392,14 @@ void ivas_dirac_dec_compute_directional_responses_fx( } mvr2r_inc_fixed( direct_response_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*q29*/ - pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF -- PATH2" );*/ } ELSE { assert( 0 && "Not supported synthesis method!" ); } - pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING IF" );*/ } ELSE IF( EQ_16( hDirACRend->panningConf, DIRAC_PANNING_VBAP ) ) /*VBAP*/ { - push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING ELSE" ); /* Synthesize the first direction */ spreadCoherencePanningVbap_fx( azimuth[k], elevation[k], hSpatParamRendCom->spreadCoherence_fx[md_idx][k], direct_response_ls_fx, &Q_direct_response_ls, num_channels_dir, hVBAPdata ); @@ -3688,13 +3651,11 @@ void ivas_dirac_dec_compute_directional_responses_fx( mvr2r_inc_fixed( direct_response_square_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_square_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*direct_response_square_q*/ mvr2r_inc_fixed( direct_response_fx, 1, &hDirACRend->h_output_synthesis_psd_state.direct_responses_fx[k], hSpatParamRendCom->num_freq_bands, num_channels_dir ); /*direct_response_q*/ - pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING ELSE" );*/ } ELSE { assert( 0 && "Not supported panning method!" ); } - pop_wmops(); /*push_wmops( "(IDR PATH3 B3.1) HOA3 PANNING " );*/ } } @@ -3703,8 +3664,6 @@ void ivas_dirac_dec_compute_directional_responses_fx( hDirACRend->h_output_synthesis_psd_state.direct_responses_square_q = direct_response_square_q; move16(); - - pop_wmops(); return; } -- GitLab From af3a0391da504aecba2648bf91eb6104199e5a9e Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Tue, 4 Mar 2025 14:54:26 +0100 Subject: [PATCH 16/18] fixed arithmetic symbol --- lib_rend/ivas_dirac_output_synthesis_dec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index 1e79224f6..ddd568698 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -914,7 +914,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( IF( reference_power[k + num_freq_bands] == 0 ) { sqr_inp = Mpy_32_32( diffuseness[k], c ); - sqr_exp = sub( 31 - 4, q_diffuseness ); + sqr_exp = sub( 31 + 4, q_diffuseness ); } ELSE { -- GitLab From 1aa2c023baeaa1ee514e8d84709c04ed4dcf9315 Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Tue, 4 Mar 2025 14:34:08 +0000 Subject: [PATCH 17/18] Last cleanup --- lib_rend/ivas_dirac_output_synthesis_dec.c | 1 - 1 file changed, 1 deletion(-) diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index ddd568698..eabdf9389 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -1055,7 +1055,6 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } -#endif } ELSE { -- GitLab From 12d2affe11c67abbdc3fbbc96f2f71be593c80bc Mon Sep 17 00:00:00 2001 From: Fabian Bauer Date: Tue, 4 Mar 2025 15:41:07 +0100 Subject: [PATCH 18/18] revert last commit --- lib_rend/ivas_dirac_output_synthesis_dec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib_rend/ivas_dirac_output_synthesis_dec.c b/lib_rend/ivas_dirac_output_synthesis_dec.c index eabdf9389..ddd568698 100644 --- a/lib_rend/ivas_dirac_output_synthesis_dec.c +++ b/lib_rend/ivas_dirac_output_synthesis_dec.c @@ -1055,6 +1055,7 @@ void ivas_dirac_dec_output_synthesis_process_slot_fx( move16(); } } +#endif } ELSE { -- GitLab