From 273a7210137ba2b3865ab2e1dfde9279f841adf3 Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Tue, 28 Jan 2025 14:19:18 +0530
Subject: [PATCH] Fix for 3GPP issue 1078: MDCT-stereo at 48kbps for STV
 produces severely distorted output with BASOP encoder

Link #1078
---
 lib_com/wtda_fx.c            | 44 ++++++++++++++++++------------------
 lib_enc/ivas_mdct_core_enc.c | 12 +++++-----
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/lib_com/wtda_fx.c b/lib_com/wtda_fx.c
index 5f35a3c29..e060984df 100644
--- a/lib_com/wtda_fx.c
+++ b/lib_com/wtda_fx.c
@@ -572,7 +572,7 @@ void wtda_ivas_fx(
 
 void wtda_ext_fx(
     const Word16 *new_audio,  /* i  : input audio (Q_in)                  */
-    Word16 *wtda_audio,       /* o  : windowed audio (Q_in)               */
+    Word16 *wtda_audio,       /* o  : windowed audio (Q_in - 1)           */
     const Word16 left_mode,   /* i  : window overlap of previous frame (0: full, 2: none, or 3: half) */
     const Word16 right_mode,  /* i  : window overlap of current frame (0: full, 2: none, or 3: half) */
     const Word16 L,           /* i  : length                              */
@@ -628,10 +628,10 @@ void wtda_ext_fx(
             FOR( i = 0; i < L / 2 - n; i += 2 )
             {
                 wtda_audio[i] = round_fx( L_mac0( L_mult0( negate( allsig_r[L / 2 - i - 1] ), win_int_right[3 * L_FRAME16k / 2 - i / 2 - 1 - windecay16] ),
-                                                  allsig_r[L / 2 + i], win_int_right[3 * L_FRAME16k / 2 + i / 2 - windecay16] ) ); // q_in
+                                                  allsig_r[L / 2 + i], win_int_right[3 * L_FRAME16k / 2 + i / 2 - windecay16] ) ); // q_in - 1
                 move16();
                 wtda_audio[i + 1] = round_fx( L_mac0( L_mult0( negate( allsig_r[L / 2 - ( i + 1 ) - 1] ), win_right[( 3 * L_FRAME16k / 2 - i / 2 - 1 ) * decimate + decay - windecay48] ),
-                                                      allsig_r[L / 2 + i + 1], win_right[( 3 * L_FRAME16k / 2 + 1 + i / 2 ) * decimate - decay - 1 - windecay48] ) ); // q_in
+                                                      allsig_r[L / 2 + i + 1], win_right[( 3 * L_FRAME16k / 2 + 1 + i / 2 ) * decimate - decay - 1 - windecay48] ) ); // q_in - 1
                 move16();
             }
         }
@@ -640,19 +640,19 @@ void wtda_ext_fx(
             FOR( i = 0; i < L / 2 - n; i += 2 )
             {
                 wtda_audio[i] = round_fx( L_msu0( L_mult0( negate( allsig_r[L / 2 - i - 1] ), win_int_right[3 * L_FRAME16k / 2 - i / 2 - 1 - windecay16] ),
-                                                  allsig_r[L / 2 + i], win_int_right[3 * L_FRAME16k / 2 + i / 2 - windecay16] ) ); // q_in
+                                                  allsig_r[L / 2 + i], win_int_right[3 * L_FRAME16k / 2 + i / 2 - windecay16] ) ); // q_in - 1
                 move16();
                 wtda_audio[i + 1] = round_fx( L_msu0( L_mult0( negate( allsig_r[L / 2 - ( i + 1 ) - 1] ), win_right[( 3 * L_FRAME16k / 2 - i / 2 - 1 ) * decimate + decay - windecay48] ),
-                                                      allsig_r[L / 2 + i + 1], win_right[( 3 * L_FRAME16k / 2 + 1 + i / 2 ) * decimate - decay - 1 - windecay48] ) ); // q_in
+                                                      allsig_r[L / 2 + i + 1], win_right[( 3 * L_FRAME16k / 2 + 1 + i / 2 ) * decimate - decay - 1 - windecay48] ) ); // q_in - 1
                 move16();
             }
         }
 
         FOR( i = L / 2 - n; i < L / 2; i += 2 )
         {
-            wtda_audio[i] = shr( negate( allsig_r[L / 2 - i - 1] ), 1 ); // q_in
+            wtda_audio[i] = shr( negate( allsig_r[L / 2 - i - 1] ), 1 ); // q_in - 1
             move16();
-            wtda_audio[i + 1] = shr( negate( allsig_r[L / 2 - ( i + 1 ) - 1] ), 1 ); // q_in
+            wtda_audio[i + 1] = shr( negate( allsig_r[L / 2 - ( i + 1 ) - 1] ), 1 ); // q_in - 1
             move16();
         }
 
@@ -661,20 +661,20 @@ void wtda_ext_fx(
             FOR( i = 0; i < n; i += 2 )
             {
                 wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( negate( allsig_l[i] ), win_left[( i / 2 ) * decimate + decay] ),
-                                                          new_audio[n - i - 1], MAX16B ) ); // q_in
+                                                          new_audio[n - i - 1], MAX16B ) ); // q_in - 1
                 move16();
                 wtda_audio[i + L / 2 + 1] = round_fx( L_msu0( L_mult0( negate( allsig_l[i + 1] ), win_int_left[i / 2] ),
-                                                              new_audio[n - ( i + 1 ) - 1], MAX16B ) ); // q_in
+                                                              new_audio[n - ( i + 1 ) - 1], MAX16B ) ); // q_in - 1
                 move16();
             }
 
             FOR( i = n; i < L / 2; i += 2 )
             {
                 wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( negate( allsig_l[i] ), win_left[( i / 2 ) * decimate + decay] ),
-                                                          allsig_l[L - i - 1], win_left[( L / 2 - i / 2 ) * decimate - 1 - decay] ) ); // q_in
+                                                          allsig_l[L - i - 1], win_left[( L / 2 - i / 2 ) * decimate - 1 - decay] ) ); // q_in - 1
                 move16();
                 wtda_audio[i + L / 2 + 1] = round_fx( L_msu0( L_mult0( negate( allsig_l[i + 1] ), win_int_left[i / 2] ),
-                                                              allsig_l[L - ( i + 1 ) - 1], win_int_left[L / 2 - i / 2 - 1] ) ); // q_in
+                                                              allsig_l[L - ( i + 1 ) - 1], win_int_left[L / 2 - i / 2 - 1] ) ); // q_in - 1
                 move16();
             }
         }
@@ -683,20 +683,20 @@ void wtda_ext_fx(
             FOR( i = 0; i < n; i += 2 )
             {
                 wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( allsig_l[i], win_left[( i / 2 ) * decimate + decay] ),
-                                                          new_audio[n - i - 1], MAX16B ) ); // q_in
+                                                          new_audio[n - i - 1], MAX16B ) ); // q_in - 1
                 move16();
                 wtda_audio[i + L / 2 + 1] = round_fx( L_msu0( L_mult0( allsig_l[i + 1], win_int_left[i / 2] ),
-                                                              new_audio[n - ( i + 1 ) - 1], MAX16B ) ); // q_in
+                                                              new_audio[n - ( i + 1 ) - 1], MAX16B ) ); // q_in - 1
                 move16();
             }
 
             FOR( i = n; i < L / 2; i += 2 )
             {
                 wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( allsig_l[i], win_left[( i / 2 ) * decimate + decay] ),
-                                                          allsig_l[L - i - 1], win_left[( L / 2 - i / 2 ) * decimate - 1 - decay] ) ); // q_in
+                                                          allsig_l[L - i - 1], win_left[( L / 2 - i / 2 ) * decimate - 1 - decay] ) ); // q_in - 1
                 move16();
                 wtda_audio[i + L / 2 + 1] = round_fx( L_msu0( L_mult0( allsig_l[i + 1], win_int_left[i / 2] ),
-                                                              allsig_l[L - ( i + 1 ) - 1], win_int_left[L / 2 - i / 2 - 1] ) ); // q_in
+                                                              allsig_l[L - ( i + 1 ) - 1], win_int_left[L / 2 - i / 2 - 1] ) ); // q_in - 1
                 move16();
             }
         }
@@ -708,7 +708,7 @@ void wtda_ext_fx(
             FOR( i = 0; i < L / 2 - n; i++ )
             {
                 wtda_audio[i] = round_fx( L_mac0( L_mult0( negate( allsig_r[L / 2 - i - 1] ), win_right[3 * L / 2 * decimate - ( i + 1 ) * decimate + decay - windecay48] ),
-                                                  allsig_r[L / 2 + i], win_right[3 * L / 2 * decimate - 1 + ( i + 1 ) * decimate - decay - windecay48] ) ); // q_in
+                                                  allsig_r[L / 2 + i], win_right[3 * L / 2 * decimate - 1 + ( i + 1 ) * decimate - decay - windecay48] ) ); // q_in - 1
                 move16();
             }
         }
@@ -717,14 +717,14 @@ void wtda_ext_fx(
             FOR( i = 0; i < L / 2 - n; i++ )
             {
                 wtda_audio[i] = round_fx( L_msu0( L_mult0( negate( allsig_r[L / 2 - i - 1] ), win_right[3 * L / 2 * decimate - ( i + 1 ) * decimate + decay - windecay48] ),
-                                                  allsig_r[L / 2 + i], win_right[3 * L / 2 * decimate - 1 + ( i + 1 ) * decimate - decay - windecay48] ) ); // q_in
+                                                  allsig_r[L / 2 + i], win_right[3 * L / 2 * decimate - 1 + ( i + 1 ) * decimate - decay - windecay48] ) ); // q_in - 1
                 move16();
             }
         }
 
         FOR( i = L / 2 - n; i < L / 2; i++ )
         {
-            wtda_audio[i] = shr( negate( allsig_r[L / 2 - i - 1] ), 1 ); // q_in
+            wtda_audio[i] = shr( negate( allsig_r[L / 2 - i - 1] ), 1 ); // q_in - 1
             move16();
         }
 
@@ -733,14 +733,14 @@ void wtda_ext_fx(
             FOR( i = 0; i < n; i++ )
             {
                 wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( negate( allsig_l[i] ), win_left[i * decimate + decay] ),
-                                                          new_audio[n - i - 1], MAX16B ) ); // q_in
+                                                          new_audio[n - i - 1], MAX16B ) ); // q_in - 1
                 move16();
             }
 
             FOR( i = n; i < L / 2; i++ )
             {
                 wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( negate( allsig_l[i] ), win_left[i * decimate + decay] ),
-                                                          allsig_l[L - i - 1], win_left[L * decimate - i * decimate - 1 - decay] ) ); // q_in
+                                                          allsig_l[L - i - 1], win_left[L * decimate - i * decimate - 1 - decay] ) ); // q_in - 1
                 move16();
             }
         }
@@ -749,14 +749,14 @@ void wtda_ext_fx(
             FOR( i = 0; i < n; i++ )
             {
                 wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( allsig_l[i], win_left[i * decimate + decay] ),
-                                                          new_audio[n - i - 1], MAX16B ) ); // q_in
+                                                          new_audio[n - i - 1], MAX16B ) ); // q_in - 1
                 move16();
             }
 
             FOR( i = n; i < L / 2; i++ )
             {
                 wtda_audio[i + L / 2] = round_fx( L_msu0( L_mult0( allsig_l[i], win_left[i * decimate + decay] ),
-                                                          allsig_l[L - i - 1], win_left[L * decimate - i * decimate - 1 - decay] ) ); // q_in
+                                                          allsig_l[L - i - 1], win_left[L * decimate - i * decimate - 1 - decay] ) ); // q_in - 1
                 move16();
             }
         }
diff --git a/lib_enc/ivas_mdct_core_enc.c b/lib_enc/ivas_mdct_core_enc.c
index 4fb6108e1..734fbcf9d 100644
--- a/lib_enc/ivas_mdct_core_enc.c
+++ b/lib_enc/ivas_mdct_core_enc.c
@@ -436,24 +436,24 @@ static void kernel_switch_update_transforms_fx(
 #else
         Scale_sig( &tcxTimeSignal[n - s], shl( s, 1 ), -Q1 ); // Q0 -> Q-1
 #endif
-        wtda_ext_fx( tcxTimeSignal, windowedTimeSignal_16, extract_l( windowedTimeSignal[0] ), extract_l( windowedTimeSignal[1] ), s, kernelType ); // Q-1
+        wtda_ext_fx( tcxTimeSignal, windowedTimeSignal_16, extract_l( windowedTimeSignal[0] ), extract_l( windowedTimeSignal[1] ), s, kernelType ); // Q-2
 #ifdef MSAN_FIX
         Scale_sig( &tcxTimeSignal[n - s], add( sub( shl( s, 1 ), n ), 1 ), Q1 ); // Q-1 -> Q0
 #else
         Scale_sig( &tcxTimeSignal[n - s], shl( s, 1 ), Q1 );  // Q-1 -> Q0
 #endif
-        Copy_Scale_sig_16_32_no_sat( windowedTimeSignal_16, windowedTimeSignal, s, Q16 ); // Q15
-        scale_sig32( windowedTimeSignal, s, -Q8 /* guard bits */ );                       // Q7
+        Copy_Scale_sig_16_32_no_sat( windowedTimeSignal_16 /* Q(-2) */, windowedTimeSignal, s, Q16 ); // Q14
+        scale_sig32( windowedTimeSignal, s, -Q8 /* guard bits */ );                                   // Q6
         edxt_fx( windowedTimeSignal, sigR, s, kernelType, FALSE );
 
         tmp = BASOP_Util_Divide1616_Scale( NORM_MDCT_FACTOR, s, &exp_tmp );
         tmp = Sqrt16( tmp, &exp_tmp );
         factor = L_shl( L_deposit_h( tmp ), sub( exp_tmp, Q1 ) ); // Q30
-        v_multc_fixed( sigR, factor, sigR, s );                   // (Q7, Q30) -> Q6
+        v_multc_fixed( sigR, factor, sigR, s );                   // (Q6, Q30) -> Q5
 
         q_com = L_norm_arr( sigR, s );
-        q_com = s_min( add( q_com, Q6 ), *q_sig );
-        scale_sig32( sigR, s, sub( q_com, Q6 ) );     // q_com
+        q_com = s_min( add( q_com, Q5 ), *q_sig );
+        scale_sig32( sigR, s, sub( q_com, Q5 ) );     // q_com
         scale_sig32( sigI, s, sub( q_com, *q_sig ) ); // q_com
         *q_sig = q_com;
         move16();
-- 
GitLab