From 9230c2956c4af8efefae97b3780b37203b808811 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Mon, 9 Jun 2025 19:33:07 +0530 Subject: [PATCH] Fix for 3GPP issue 1732: Accuracy of spatial metadata in ivas_omasa_enc_fx compared to the floating point equivalent Link #1732 --- lib_com/ivas_dirac_com_fx.c | 33 ++++++++---- lib_enc/ivas_omasa_enc_fx.c | 101 ++++++++++++++++++++++++++---------- 2 files changed, 96 insertions(+), 38 deletions(-) diff --git a/lib_com/ivas_dirac_com_fx.c b/lib_com/ivas_dirac_com_fx.c index 2be3f8ace..0922e0289 100644 --- a/lib_com/ivas_dirac_com_fx.c +++ b/lib_com/ivas_dirac_com_fx.c @@ -796,19 +796,23 @@ void computeDiffuseness_fixed( { Word32 intensity_slow[DIRAC_NUM_DIMS * CLDFB_NO_CHANNELS_MAX]; Word32 intensity_slow_abs[CLDFB_NO_CHANNELS_MAX]; + Word64 tmp_intensity_slow_abs[CLDFB_NO_CHANNELS_MAX]; + Word16 intensity_slow_abs_exp[CLDFB_NO_CHANNELS_MAX]; Word32 energy_slow[CLDFB_NO_CHANNELS_MAX]; - Word16 i, j, k; + Word16 i, j, k, tmp16; Word32 tmp = 0; move32(); Word32 *p_tmp; const Word32 *p_tmp_c; Word16 min_q_shift1, min_q_shift2, exp1, exp2, q_tmp; - Word16 q_ene, q_intensity, q_intensity_slow; + Word16 q_ene, q_intensity; /* Compute Intensity slow and energy slow buffer_intensity and buffer_energy */ - set_zero_fx( intensity_slow, i_mult( DIRAC_NUM_DIMS, CLDFB_NO_CHANNELS_MAX ) ); + set_zero_fx( intensity_slow, DIRAC_NUM_DIMS * CLDFB_NO_CHANNELS_MAX ); set_zero_fx( intensity_slow_abs, CLDFB_NO_CHANNELS_MAX ); + set16_fx( intensity_slow_abs_exp, 0, CLDFB_NO_CHANNELS_MAX ); + set64_fx( tmp_intensity_slow_abs, 0, CLDFB_NO_CHANNELS_MAX ); set_zero_fx( energy_slow, CLDFB_NO_CHANNELS_MAX ); /* Calculate max possible shift for the buffer buffer_energy and buffer_intensity */ @@ -892,10 +896,8 @@ void computeDiffuseness_fixed( q_intensity = s_min( q_intensity, q_tmp ); } - min_q_shift1 = getScaleFactor32( intensity_slow, i_mult( DIRAC_NUM_DIMS, num_freq_bands ) ); - min_q_shift1 = sub( min_q_shift1, idiv1616( add( find_guarded_bits_fx( DIRAC_NUM_DIMS ), 1 ), 2 ) ); - scale_sig32( intensity_slow, i_mult( DIRAC_NUM_DIMS, num_freq_bands ), min_q_shift1 ); - q_intensity = add( q_intensity, min_q_shift1 ); + scale_sig32( intensity_slow, i_mult( DIRAC_NUM_DIMS, num_freq_bands ), -2 ); + q_intensity = sub( q_intensity, 2 ); /* intensity_slow.^2 + intensity_slow_abs*/ FOR( j = 0; j < DIRAC_NUM_DIMS; ++j ) @@ -904,11 +906,19 @@ void computeDiffuseness_fixed( FOR( k = 0; k < num_freq_bands; k++ ) { - intensity_slow_abs[k] = Madd_32_32( intensity_slow_abs[k], p_tmp[k], p_tmp[k] ); - move32(); + tmp_intensity_slow_abs[k] = W_mac_32_32( tmp_intensity_slow_abs[k], p_tmp[k], p_tmp[k] ); + move64(); } } - q_intensity_slow = sub( add( q_intensity, q_intensity ), 31 ); + + FOR( k = 0; k < num_freq_bands; k++ ) + { + tmp16 = W_norm( tmp_intensity_slow_abs[k] ); + intensity_slow_abs[k] = W_extract_h( W_shl( tmp_intensity_slow_abs[k], tmp16 ) ); + move32(); + intensity_slow_abs_exp[k] = sub( 31, sub( add( add( shl( q_intensity, 1 ), 1 ), tmp16 ), 32 ) ); + move16(); + } /* Compute Diffuseness */ p_tmp = intensity_slow_abs; @@ -916,7 +926,8 @@ void computeDiffuseness_fixed( move16(); FOR( i = 0; i < num_freq_bands; ++i ) { - exp1 = sub( 31, q_intensity_slow ); + exp1 = intensity_slow_abs_exp[i]; + move16(); tmp = Sqrt32( p_tmp[i], &exp1 ); tmp = BASOP_Util_Divide3232_Scale_newton( tmp, L_add( energy_slow[i], EPSILLON_FX ), &exp2 ); diff --git a/lib_enc/ivas_omasa_enc_fx.c b/lib_enc/ivas_omasa_enc_fx.c index b88bb69a1..58ba680d6 100644 --- a/lib_enc/ivas_omasa_enc_fx.c +++ b/lib_enc/ivas_omasa_enc_fx.c @@ -82,13 +82,14 @@ static void ivas_omasa_dmx_fx( Word16 prev_gains[][MASA_MAX_TRANSPORT_CHANNELS], /*o:q15*/ const Word16 interpolator[L_FRAME48k] /*i:q15*/ ); -static void computeIntensityVector_enc_fx( - const Word16 *band_grouping, /* i : Band grouping for estimation */ - Word32 Cldfb_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /* i : Real part of input signal */ - Word32 Cldfb_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /* i : Imag part of input signal */ - const Word16 num_frequency_bands, /* i : Number of frequency bands */ - Word32 intensity_real[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS], /* o : Intensity vector */ - Word16 guard_bits ); +void computeIntensityVector_enc_fx( + const Word16 *band_grouping, + Word32 Cldfb_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /*inp_q*/ + Word32 Cldfb_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /*inp_q*/ + const Word16 num_frequency_bands, + Word32 intensity_real[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS], /*exp: exp_intensity_real*/ + Word16 *exp_intensity_real, + Word16 inp_q ); static void computeReferencePower_omasa_ivas_fx( const Word16 *band_grouping, Word32 Cldfb_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], Word32 Cldfb_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], Word32 *reference_power, const Word16 enc_param_start_band, const Word16 num_freq_bands, Word16 q_Cldfb, Word16 *ref_exp ); /*--------------------------------------------------------------------------* * ivas_omasa_enc_open() @@ -1191,8 +1192,13 @@ static void ivas_omasa_param_est_enc_fx( norm_buff = s_min( norm_buff, L_norm_arr( &Foa_ImagBuffer_fx[0][0], FOA_CHANNELS * CLDFB_NO_CHANNELS_MAX ) ); guard_bits = find_guarded_bits_fx( max_band_grouping_diff ); guard_bits = add( guard_bits, sub( 1, norm_buff ) ); - computeIntensityVector_enc_fx( hOMasa->band_grouping, Foa_RealBuffer_fx, Foa_ImagBuffer_fx, num_freq_bands, intensity_real_fx, guard_bits ); - intensity_real_e = sub( add( 62, guard_bits ), shl( q, 1 ) ); + + scale_sig32( &Foa_RealBuffer_fx[0][0], FOA_CHANNELS * CLDFB_NO_CHANNELS_MAX, negate( guard_bits ) ); + scale_sig32( &Foa_ImagBuffer_fx[0][0], FOA_CHANNELS * CLDFB_NO_CHANNELS_MAX, negate( guard_bits ) ); + + q = sub( q, guard_bits ); + + computeIntensityVector_enc_fx( hOMasa->band_grouping, Foa_RealBuffer_fx, Foa_ImagBuffer_fx, num_freq_bands, intensity_real_fx, &intensity_real_e, q ); computeDirectionVectors_fixed( intensity_real_fx[0], intensity_real_fx[1], intensity_real_fx[2], 0, num_freq_bands, direction_vector_fx[0], direction_vector_fx[1], direction_vector_fx[2], intensity_real_e, NULL ); @@ -1478,20 +1484,27 @@ static void ivas_omasa_dmx_fx( } void computeIntensityVector_enc_fx( - const Word16 *band_grouping, /* i : Band grouping for estimation */ - Word32 Cldfb_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /* i : Real part of input signal */ - Word32 Cldfb_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /* i : Imag part of input signal */ - const Word16 num_frequency_bands, /* i : Number of frequency bands */ - Word32 intensity_real[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS], /* o : Intensity vector */ - Word16 guard_bits ) + const Word16 *band_grouping, + Word32 Cldfb_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /*inp_q*/ + Word32 Cldfb_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /*inp_q*/ + const Word16 num_frequency_bands, + Word32 intensity_real[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS], /*exp: exp_intensity_real*/ + Word16 *exp_intensity_real, + Word16 inp_q ) { - /* Reminder - * X = a + ib; Y = c + id - * X*Y = ac - bd + i(ad +bc) - */ Word16 i, j; Word32 real, img; - Word16 brange[2]; + Word16 brange[2], shift = 63; + move16(); + Flag is_zero = 0; + move16(); + + Word64 intensity_real64[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS]; + + FOR( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + set64_fx( intensity_real64[i], 0, MASA_FREQUENCY_BANDS ); + } FOR( i = 0; i < num_frequency_bands; i++ ) { @@ -1510,21 +1523,55 @@ void computeIntensityVector_enc_fx( FOR( j = brange[0]; j < brange[1]; j++ ) { real = Cldfb_RealBuffer[0][j]; - img = Cldfb_ImagBuffer[0][j]; - /* Intensity is XYZ order, audio is WYZX order. */ - intensity_real[0][i] = L_add( intensity_real[0][i], L_add( L_shr( Mpy_32_32( Cldfb_RealBuffer[3][j], real ), guard_bits ), L_shr( Mpy_32_32( Cldfb_ImagBuffer[3][j], img ), guard_bits ) ) ); // output Q= 2* input_q -31-guard_bits - move32(); - intensity_real[1][i] = L_add( intensity_real[1][i], L_add( L_shr( Mpy_32_32( Cldfb_RealBuffer[1][j], real ), guard_bits ), L_shr( Mpy_32_32( Cldfb_ImagBuffer[1][j], img ), guard_bits ) ) ); // output Q= 2* input_q -31-guard_bits move32(); - intensity_real[2][i] = L_add( intensity_real[2][i], L_add( L_shr( Mpy_32_32( Cldfb_RealBuffer[2][j], real ), guard_bits ), L_shr( Mpy_32_32( Cldfb_ImagBuffer[2][j], img ), guard_bits ) ) ); // output Q= 2* input_q -31-guard_bits + img = Cldfb_ImagBuffer[0][j]; move32(); + /* Intensity is XYZ order, audio is WYZX order. */ + intensity_real64[0][i] = W_add( intensity_real64[0][i], W_add( W_mult0_32_32( Cldfb_RealBuffer[3][j], real ), W_mult0_32_32( Cldfb_ImagBuffer[3][j], img ) ) ); // output Q= 2* input_q + move64(); + intensity_real64[1][i] = W_add( intensity_real64[1][i], W_add( W_mult0_32_32( Cldfb_RealBuffer[1][j], real ), W_mult0_32_32( Cldfb_ImagBuffer[1][j], img ) ) ); // output Q= 2* input_q + move64(); + intensity_real64[2][i] = W_add( intensity_real64[2][i], W_add( W_mult0_32_32( Cldfb_RealBuffer[2][j], real ), W_mult0_32_32( Cldfb_ImagBuffer[2][j], img ) ) ); // output Q= 2* input_q + move64(); + } + } + + FOR( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + is_zero = is_zero_arr64( intensity_real64[i], MASA_FREQUENCY_BANDS ); + IF( is_zero == 0 ) + { + BREAK; } } + IF( is_zero == 0 ) + { + FOR( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + shift = s_min( shift, W_norm_arr( intensity_real64[i], MASA_FREQUENCY_BANDS ) ); + } + + FOR( i = 0; i < DIRAC_NUM_DIMS; i++ ) + { + FOR( j = 0; j < MASA_FREQUENCY_BANDS; j++ ) + { + intensity_real[i][j] = W_extract_h( W_shl( intensity_real64[i][j], shift ) ); + move32(); + } + } + + *exp_intensity_real = sub( 31, sub( add( shift, shl( inp_q, 1 ) ), 32 ) ); + move16(); + } + ELSE + { + *exp_intensity_real = 0; + move16(); + } return; } - static void computeReferencePower_omasa_ivas_fx( const Word16 *band_grouping, /* i : Band grouping for estimation */ Word32 Cldfb_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX], /* i : Real part of input signal Q6*/ -- GitLab