From 273a7210137ba2b3865ab2e1dfde9279f841adf3 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Tue, 28 Jan 2025 14:19:18 +0530 Subject: [PATCH] Fix for 3GPP issue 1078: MDCT-stereo at 48kbps for STV produces severely distorted output with BASOP encoder Link #1078 --- lib_com/wtda_fx.c | 44 ++++++++++++++++++------------------ lib_enc/ivas_mdct_core_enc.c | 12 +++++----- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/lib_com/wtda_fx.c b/lib_com/wtda_fx.c index 5f35a3c29..e060984df 100644 --- a/lib_com/wtda_fx.c +++ b/lib_com/wtda_fx.c @@ -572,7 +572,7 @@ void wtda_ivas_fx( void wtda_ext_fx( const Word16 *new_audio, /* i : input audio (Q_in) */ - Word16 *wtda_audio, /* o : windowed audio (Q_in) */ + Word16 *wtda_audio, /* o : windowed audio (Q_in - 1) */ const Word16 left_mode, /* i : window overlap of previous frame (0: full, 2: none, or 3: half) */ const Word16 right_mode, /* i : window overlap of current frame (0: full, 2: none, or 3: half) */ const Word16 L, /* i : length */ @@ -628,10 +628,10 @@ void wtda_ext_fx( FOR( i = 0; i < L / 2 - n; i += 2 ) { wtda_audio[i] = round_fx( L_mac0( L_mult0( negate( allsig_r[L / 2 - i - 1] ), win_int_right[3 * L_FRAME16k / 2 - i / 2 - 1 - windecay16] ), - allsig_r[L / 2 + i], win_int_right[3 * L_FRAME16k / 2 + i / 2 - windecay16] ) ); // q_in + allsig_r[L / 2 + i], win_int_right[3 * L_FRAME16k / 2 + i / 2 - windecay16] ) ); // q_in - 1 move16(); wtda_audio[i + 1] = round_fx( L_mac0( L_mult0( negate( allsig_r[L / 2 - ( i + 1 ) - 1] ), win_right[( 3 * L_FRAME16k / 2 - i / 2 - 1 ) * decimate + decay - windecay48] ), - allsig_r[L / 2 + i + 1], win_right[( 3 * L_FRAME16k / 2 + 1 + i / 2 ) * decimate - decay - 1 - windecay48] ) ); // q_in + allsig_r[L / 2 + i + 1], win_right[( 3 * L_FRAME16k / 2 + 1 + i / 2 ) * decimate - decay - 1 - windecay48] ) ); // q_in - 1 move16(); } } @@ -640,19 +640,19 @@ void wtda_ext_fx( FOR( i = 0; i < L / 2 - n; i += 2 ) { wtda_audio[i] = round_fx( L_msu0( L_mult0( negate( allsig_r[L / 2 - i - 1] ), win_int_right[3 * L_FRAME16k / 2 - i / 2 - 1 - windecay16] ), - allsig_r[L / 2 + i], win_int_right[3 * L_FRAME16k / 2 + i / 2 - windecay16] ) ); // q_in + allsig_r[L / 2 + i], win_int_right[3 * L_FRAME16k / 2 + i / 2 - windecay16] ) ); // q_in - 1 move16(); wtda_audio[i + 1] = round_fx( L_msu0( L_mult0( negate( allsig_r[L / 2 - ( i + 1 ) - 1] ), win_right[( 3 * L_FRAME16k / 2 - i / 2 - 1 ) * decimate + decay - windecay48] ), - allsig_r[L / 2 + i + 1], win_right[( 3 * L_FRAME16k / 2 + 1 + i / 2 ) * decimate - decay - 1 - windecay48] ) ); // q_in + allsig_r[L / 2 + i + 1], win_right[( 3 * L_FRAME16k / 2 + 1 + i / 2 ) * decimate - decay - 1 - windecay48] ) ); // q_in - 1 move16(); } } FOR( i = L / 2 - n; i < L / 2; i += 2 ) { - wtda_audio[i] = shr( negate( allsig_r[L / 2 - i - 1] ), 1 ); // q_in + wtda_audio[i] = shr( negate( allsig_r[L / 2 - i - 1] ), 1 ); // q_in - 1 move16(); - wtda_audio[i + 1] = shr( negate( allsig_r[L / 2 - ( i + 1 ) - 1] ), 1 ); // q_in + wtda_audio[i + 1] = shr( negate( allsig_r[L / 2 - ( i + 1 ) - 1] ), 1 ); // q_in - 1 move16(); } @@ -661,20 +661,20 @@ void wtda_ext_fx( FOR( i = 0; i < n; i += 2 ) { wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( negate( allsig_l[i] ), win_left[( i / 2 ) * decimate + decay] ), - new_audio[n - i - 1], MAX16B ) ); // q_in + new_audio[n - i - 1], MAX16B ) ); // q_in - 1 move16(); wtda_audio[i + L / 2 + 1] = round_fx( L_msu0( L_mult0( negate( allsig_l[i + 1] ), win_int_left[i / 2] ), - new_audio[n - ( i + 1 ) - 1], MAX16B ) ); // q_in + new_audio[n - ( i + 1 ) - 1], MAX16B ) ); // q_in - 1 move16(); } FOR( i = n; i < L / 2; i += 2 ) { wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( negate( allsig_l[i] ), win_left[( i / 2 ) * decimate + decay] ), - allsig_l[L - i - 1], win_left[( L / 2 - i / 2 ) * decimate - 1 - decay] ) ); // q_in + allsig_l[L - i - 1], win_left[( L / 2 - i / 2 ) * decimate - 1 - decay] ) ); // q_in - 1 move16(); wtda_audio[i + L / 2 + 1] = round_fx( L_msu0( L_mult0( negate( allsig_l[i + 1] ), win_int_left[i / 2] ), - allsig_l[L - ( i + 1 ) - 1], win_int_left[L / 2 - i / 2 - 1] ) ); // q_in + allsig_l[L - ( i + 1 ) - 1], win_int_left[L / 2 - i / 2 - 1] ) ); // q_in - 1 move16(); } } @@ -683,20 +683,20 @@ void wtda_ext_fx( FOR( i = 0; i < n; i += 2 ) { wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( allsig_l[i], win_left[( i / 2 ) * decimate + decay] ), - new_audio[n - i - 1], MAX16B ) ); // q_in + new_audio[n - i - 1], MAX16B ) ); // q_in - 1 move16(); wtda_audio[i + L / 2 + 1] = round_fx( L_msu0( L_mult0( allsig_l[i + 1], win_int_left[i / 2] ), - new_audio[n - ( i + 1 ) - 1], MAX16B ) ); // q_in + new_audio[n - ( i + 1 ) - 1], MAX16B ) ); // q_in - 1 move16(); } FOR( i = n; i < L / 2; i += 2 ) { wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( allsig_l[i], win_left[( i / 2 ) * decimate + decay] ), - allsig_l[L - i - 1], win_left[( L / 2 - i / 2 ) * decimate - 1 - decay] ) ); // q_in + allsig_l[L - i - 1], win_left[( L / 2 - i / 2 ) * decimate - 1 - decay] ) ); // q_in - 1 move16(); wtda_audio[i + L / 2 + 1] = round_fx( L_msu0( L_mult0( allsig_l[i + 1], win_int_left[i / 2] ), - allsig_l[L - ( i + 1 ) - 1], win_int_left[L / 2 - i / 2 - 1] ) ); // q_in + allsig_l[L - ( i + 1 ) - 1], win_int_left[L / 2 - i / 2 - 1] ) ); // q_in - 1 move16(); } } @@ -708,7 +708,7 @@ void wtda_ext_fx( FOR( i = 0; i < L / 2 - n; i++ ) { wtda_audio[i] = round_fx( L_mac0( L_mult0( negate( allsig_r[L / 2 - i - 1] ), win_right[3 * L / 2 * decimate - ( i + 1 ) * decimate + decay - windecay48] ), - allsig_r[L / 2 + i], win_right[3 * L / 2 * decimate - 1 + ( i + 1 ) * decimate - decay - windecay48] ) ); // q_in + allsig_r[L / 2 + i], win_right[3 * L / 2 * decimate - 1 + ( i + 1 ) * decimate - decay - windecay48] ) ); // q_in - 1 move16(); } } @@ -717,14 +717,14 @@ void wtda_ext_fx( FOR( i = 0; i < L / 2 - n; i++ ) { wtda_audio[i] = round_fx( L_msu0( L_mult0( negate( allsig_r[L / 2 - i - 1] ), win_right[3 * L / 2 * decimate - ( i + 1 ) * decimate + decay - windecay48] ), - allsig_r[L / 2 + i], win_right[3 * L / 2 * decimate - 1 + ( i + 1 ) * decimate - decay - windecay48] ) ); // q_in + allsig_r[L / 2 + i], win_right[3 * L / 2 * decimate - 1 + ( i + 1 ) * decimate - decay - windecay48] ) ); // q_in - 1 move16(); } } FOR( i = L / 2 - n; i < L / 2; i++ ) { - wtda_audio[i] = shr( negate( allsig_r[L / 2 - i - 1] ), 1 ); // q_in + wtda_audio[i] = shr( negate( allsig_r[L / 2 - i - 1] ), 1 ); // q_in - 1 move16(); } @@ -733,14 +733,14 @@ void wtda_ext_fx( FOR( i = 0; i < n; i++ ) { wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( negate( allsig_l[i] ), win_left[i * decimate + decay] ), - new_audio[n - i - 1], MAX16B ) ); // q_in + new_audio[n - i - 1], MAX16B ) ); // q_in - 1 move16(); } FOR( i = n; i < L / 2; i++ ) { wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( negate( allsig_l[i] ), win_left[i * decimate + decay] ), - allsig_l[L - i - 1], win_left[L * decimate - i * decimate - 1 - decay] ) ); // q_in + allsig_l[L - i - 1], win_left[L * decimate - i * decimate - 1 - decay] ) ); // q_in - 1 move16(); } } @@ -749,14 +749,14 @@ void wtda_ext_fx( FOR( i = 0; i < n; i++ ) { wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( allsig_l[i], win_left[i * decimate + decay] ), - new_audio[n - i - 1], MAX16B ) ); // q_in + new_audio[n - i - 1], MAX16B ) ); // q_in - 1 move16(); } FOR( i = n; i < L / 2; i++ ) { wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( allsig_l[i], win_left[i * decimate + decay] ), - allsig_l[L - i - 1], win_left[L * decimate - i * decimate - 1 - decay] ) ); // q_in + allsig_l[L - i - 1], win_left[L * decimate - i * decimate - 1 - decay] ) ); // q_in - 1 move16(); } } diff --git a/lib_enc/ivas_mdct_core_enc.c b/lib_enc/ivas_mdct_core_enc.c index 4fb6108e1..734fbcf9d 100644 --- a/lib_enc/ivas_mdct_core_enc.c +++ b/lib_enc/ivas_mdct_core_enc.c @@ -436,24 +436,24 @@ static void kernel_switch_update_transforms_fx( #else Scale_sig( &tcxTimeSignal[n - s], shl( s, 1 ), -Q1 ); // Q0 -> Q-1 #endif - wtda_ext_fx( tcxTimeSignal, windowedTimeSignal_16, extract_l( windowedTimeSignal[0] ), extract_l( windowedTimeSignal[1] ), s, kernelType ); // Q-1 + wtda_ext_fx( tcxTimeSignal, windowedTimeSignal_16, extract_l( windowedTimeSignal[0] ), extract_l( windowedTimeSignal[1] ), s, kernelType ); // Q-2 #ifdef MSAN_FIX Scale_sig( &tcxTimeSignal[n - s], add( sub( shl( s, 1 ), n ), 1 ), Q1 ); // Q-1 -> Q0 #else Scale_sig( &tcxTimeSignal[n - s], shl( s, 1 ), Q1 ); // Q-1 -> Q0 #endif - Copy_Scale_sig_16_32_no_sat( windowedTimeSignal_16, windowedTimeSignal, s, Q16 ); // Q15 - scale_sig32( windowedTimeSignal, s, -Q8 /* guard bits */ ); // Q7 + Copy_Scale_sig_16_32_no_sat( windowedTimeSignal_16 /* Q(-2) */, windowedTimeSignal, s, Q16 ); // Q14 + scale_sig32( windowedTimeSignal, s, -Q8 /* guard bits */ ); // Q6 edxt_fx( windowedTimeSignal, sigR, s, kernelType, FALSE ); tmp = BASOP_Util_Divide1616_Scale( NORM_MDCT_FACTOR, s, &exp_tmp ); tmp = Sqrt16( tmp, &exp_tmp ); factor = L_shl( L_deposit_h( tmp ), sub( exp_tmp, Q1 ) ); // Q30 - v_multc_fixed( sigR, factor, sigR, s ); // (Q7, Q30) -> Q6 + v_multc_fixed( sigR, factor, sigR, s ); // (Q6, Q30) -> Q5 q_com = L_norm_arr( sigR, s ); - q_com = s_min( add( q_com, Q6 ), *q_sig ); - scale_sig32( sigR, s, sub( q_com, Q6 ) ); // q_com + q_com = s_min( add( q_com, Q5 ), *q_sig ); + scale_sig32( sigR, s, sub( q_com, Q5 ) ); // q_com scale_sig32( sigI, s, sub( q_com, *q_sig ) ); // q_com *q_sig = q_com; move16(); -- GitLab