From 67f74ae7d4354cfb68eac5ca5b4746d60694c5ed Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Wed, 28 May 2025 11:36:09 +0530
Subject: [PATCH] Fix for 3GPP issue 1504: BASOP decoder to MONO output for
 MASA 2TC LTV input exhibits higher MLD

Link #1504
---
 lib_rend/ivas_dirac_rend_fx.c | 122 ++++++++++++----------------------
 lib_rend/ivas_stat_rend.h     |   4 +-
 2 files changed, 44 insertions(+), 82 deletions(-)

diff --git a/lib_rend/ivas_dirac_rend_fx.c b/lib_rend/ivas_dirac_rend_fx.c
index 063c70f66..99a84bf52 100644
--- a/lib_rend/ivas_dirac_rend_fx.c
+++ b/lib_rend/ivas_dirac_rend_fx.c
@@ -1759,17 +1759,19 @@ void protoSignalComputation2_fx(
     Word32 lr_total_bb_ratio_fx, lr_total_hi_ratio_fx;
     Word32 min_sum_total_ratio_fx, min_sum_total_ratio_db_fx;
     Word32 sum_total_ratio_fx[MASA_SUM_FREQ_RANGE_BINS];
-    Word16 q_sum_total_ratio;
+    Word16 exp_sum_total_ratio_fx[MASA_SUM_FREQ_RANGE_BINS];
+    Word16 q_sum_total_ratio = 0;
+    move16();
     Word32 a_fx, b_fx, a2_fx, b2_fx;
     Word16 interpolatorSpaced_fx, interpolatorDmx_fx;
     Word32 tempSpaced_fx, tempDmx_fx;
 #ifdef FIX_867_CLDFB_NRG_SCALE
-    Word16 q_shift, min_q_shift[2], exp, q_temp[2], temp_q_shift, q_temp2;
+    Word16 q_shift, min_q_shift[2], exp, q_temp[2];
 #else
     Word16 q_shift, min_q_shift, exp, q_temp, temp_q_shift, q_temp2;
 #endif
     Word32 temp;
-    Word64 W_tmp1, W_tmp2;
+    Word64 W_tmp1, W_tmp2, W_tmp3;
     Word64 reference_power_64fx[CLDFB_NO_CHANNELS_MAX];
     Word16 q_reference_power_64fx;
     Word16 head_room, q_Left_Right_power;
@@ -1786,9 +1788,7 @@ void protoSignalComputation2_fx(
     min_q_shift = Q31;
     move16();
 #endif
-    temp_q_shift = Q31;
-    move16();
-    q_sum_total_ratio = Q31;
+    q_sum_total_ratio = 0;
     move16();
     exp = 0;
     move16();
@@ -1813,7 +1813,6 @@ void protoSignalComputation2_fx(
         min_q_shift = s_min( min_q_shift, q_shift );
 #endif
         q_shift = s_min( L_norm_arr( RealBuffer_fx[l][0], s_min( num_freq_bands, MASA_SUM_FREQ_RANGE_BINS ) ), L_norm_arr( ImagBuffer_fx[l][0], s_min( num_freq_bands, MASA_SUM_FREQ_RANGE_BINS ) ) );
-        temp_q_shift = s_min( temp_q_shift, q_shift );
     }
 
 #ifdef FIX_867_CLDFB_NRG_SCALE
@@ -1822,7 +1821,6 @@ void protoSignalComputation2_fx(
 #else
     min_q_shift = sub( min_q_shift, 2 ); // guard bits
 #endif
-    temp_q_shift = sub( temp_q_shift, 2 ); // guard bits
 
     /* Upscaling of the buffer proto_power_smooth_fx */
 #ifdef FIX_867_CLDFB_NRG_SCALE
@@ -2072,7 +2070,7 @@ void protoSignalComputation2_fx(
         }
 
 #ifdef FIX_867_CLDFB_NRG_SCALE
-        Word16 total_shift[2], q_temp_total;
+        Word16 total_shift[2];
         /* total_shift shift required to get common Q of sum power values */
         total_shift[0] = shl( s_max( 0, sub( min_q_shift[0], min_q_shift[1] ) ), 1 );
         total_shift[1] = shl( s_max( 0, sub( min_q_shift[1], min_q_shift[0] ) ), 1 );
@@ -2080,13 +2078,11 @@ void protoSignalComputation2_fx(
         min_q_shift[1] = sub( min_q_shift[1], idiv1616( find_guarded_bits_fx( num_freq_bands ), 2 ) );
         q_temp[0] = sub( add( add( q_cldfb, min_q_shift[0] ), add( q_cldfb, min_q_shift[0] ) ), 31 );
         q_temp[1] = sub( add( add( q_cldfb, min_q_shift[1] ), add( q_cldfb, min_q_shift[1] ) ), 31 );
-        q_temp_total = s_min( q_temp[0], q_temp[1] );
 #else
         min_q_shift = sub( min_q_shift, idiv1616( find_guarded_bits_fx( num_freq_bands ), 2 ) );
 
         q_temp = sub( add( add( q_cldfb, min_q_shift ), add( q_cldfb, min_q_shift ) ), 31 );
 #endif
-        q_temp2 = sub( add( add( q_cldfb, temp_q_shift ), add( q_cldfb, temp_q_shift ) ), 31 );
 
         head_room = 63;
         move16();
@@ -2195,77 +2191,50 @@ void protoSignalComputation2_fx(
 
             IF( LT_16( l, s_min( num_freq_bands, MASA_SUM_FREQ_RANGE_BINS ) ) )
             {
-#ifdef FIX_867_CLDFB_NRG_SCALE
-                re_aux = L_shl( Real_aux_fx, sub( temp_q_shift, min_q_shift[0] ) ); // q_cldfb+temp_q_shift
-                im_aux = L_shl( Imag_aux_fx, sub( temp_q_shift, min_q_shift[0] ) ); // q_cldfb+temp_q_shift
-#else
-                re_aux = L_shl( Real_aux_fx, sub( temp_q_shift, min_q_shift ) );                                        // q_cldfb+temp_q_shift
-                im_aux = L_shl( Imag_aux_fx, sub( temp_q_shift, min_q_shift ) );                                        // q_cldfb+temp_q_shift
-#endif
+                re_aux = L_add( L_shr( RealBuffer_fx[0][0][l], 1 ), L_shr( RealBuffer_fx[1][0][l], 1 ) );
+                im_aux = L_add( L_shr( ImagBuffer_fx[0][0][l], 1 ), L_shr( ImagBuffer_fx[1][0][l], 1 ) );
 
-                sum_power_fx = Madd_32_32( Mpy_32_32( re_aux, re_aux ), im_aux, im_aux ); // 2*(q_cldfb+temp_q_shift)-31
-                temp = Mpy_32_32( a_fx, sum_power_fx );                                   // 2*(q_cldfb+temp_q_shift)-31
+                W_tmp3 = W_add( W_mult0_32_32( re_aux, re_aux ), W_mult0_32_32( im_aux, im_aux ) );
+                q_shift = W_norm( W_tmp3 );
+                sum_power_fx = W_extract_h( W_shl( W_tmp3, q_shift ) );
+                exp_temppp = sub( 31, sub( add( shl( sub( q_cldfb, 1 ), 1 ), q_shift ), 32 ) );
 
-                IF( LT_16( q_temp2, stereo_type_detect->q_sum_power ) )
-                {
-                    stereo_type_detect->sum_power_fx[l] = L_add( temp, L_shr( Mpy_32_32( b_fx, stereo_type_detect->sum_power_fx[l] ), sub( stereo_type_detect->q_sum_power, q_temp2 ) ) ); // q_temp2
-                    move32();
-                }
-                ELSE
-                {
-                    stereo_type_detect->sum_power_fx[l] = L_add( L_shr( temp, sub( q_temp2, stereo_type_detect->q_sum_power ) ), Mpy_32_32( b_fx, stereo_type_detect->sum_power_fx[l] ) ); // stereo_type_detect->q_sum_power
-                    move32();
-                }
+                stereo_type_detect->sum_power_fx[l] = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, sum_power_fx ), exp_temppp, Mpy_32_32( b_fx, stereo_type_detect->sum_power_fx[l] ), stereo_type_detect->exp_sum_power[l], &stereo_type_detect->exp_sum_power[l] );
+                move32();
+                W_tmp1 = W_add( W_mult0_32_32( RealBuffer_fx[0][0][l], RealBuffer_fx[0][0][l] ), W_mult0_32_32( ImagBuffer_fx[0][0][l], ImagBuffer_fx[0][0][l] ) );
+                W_tmp2 = W_add( W_mult0_32_32( RealBuffer_fx[1][0][l], RealBuffer_fx[1][0][l] ), W_mult0_32_32( ImagBuffer_fx[1][0][l], ImagBuffer_fx[1][0][l] ) );
 
-#ifdef FIX_867_CLDFB_NRG_SCALE
-                temp = Mpy_32_32( a_fx, W_extract_l( W_shr( reference_power_64fx[l], add( 31, total_shift[qidx] ) ) ) ); // 2*(q_cldfb+min_q_shift) -31
-                IF( LT_16( q_temp_total, stereo_type_detect->q_total_power ) )
-                {
-                    stereo_type_detect->total_power_fx[l] = L_add( temp, L_shr( Mpy_32_32( b_fx, stereo_type_detect->total_power_fx[l] ), sub( stereo_type_detect->q_total_power, q_temp_total ) ) ); // q_temp
-                    move32();
-                }
-                ELSE
-                {
-                    stereo_type_detect->total_power_fx[l] = L_add( L_shr( temp, sub( q_temp_total, stereo_type_detect->q_total_power ) ), Mpy_32_32( b_fx, stereo_type_detect->total_power_fx[l] ) ); // stereo_type_detect->q_total_power
-                    move32();
-                }
-#else
-                temp = Mpy_32_32( a_fx, W_extract_l( W_shr( reference_power_64fx[l], 31 ) ) );                          // 2*(q_cldfb+min_q_shift) -31
-                IF( LT_16( q_temp, stereo_type_detect->q_total_power ) )
-                {
-                    stereo_type_detect->total_power_fx[l] = L_add( temp, L_shr( Mpy_32_32( b_fx, stereo_type_detect->total_power_fx[l] ), sub( stereo_type_detect->q_total_power, q_temp ) ) ); // q_temp
-                    move32();
-                }
-                ELSE
-                {
-                    stereo_type_detect->total_power_fx[l] = L_add( L_shr( temp, sub( q_temp, stereo_type_detect->q_total_power ) ), Mpy_32_32( b_fx, stereo_type_detect->total_power_fx[l] ) ); // stereo_type_detect->q_total_power
-                    move32();
-                }
-#endif
+                W_tmp2 = W_add( W_tmp1, W_tmp2 );
+                q_shift = W_norm( W_tmp2 );
+                Total_power_fx = W_extract_h( W_shl( W_tmp2, q_shift ) );
+                exp_temppp = sub( 31, sub( add( shl( q_cldfb, 1 ), q_shift ), 32 ) );
+
+                stereo_type_detect->total_power_fx[l] = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a_fx, Total_power_fx ), exp_temppp, Mpy_32_32( b_fx, stereo_type_detect->total_power_fx[l] ), stereo_type_detect->exp_total_power[l], &stereo_type_detect->exp_total_power[l] );
+                move32();
 
                 test();
                 IF( ( stereo_type_detect->sum_power_fx[l] == 0 ) && ( stereo_type_detect->total_power_fx[l] == 0 ) )
                 {
                     sum_total_ratio_fx[l] = MAX_32; // q15
                     move32();
+                    exp_sum_total_ratio_fx[l] = 16;
+                    move16();
                 }
                 ELSE IF( stereo_type_detect->total_power_fx[l] == 0 )
                 {
                     sum_total_ratio_fx[l] = MAX_32; // q15
                     move32();
+                    exp_sum_total_ratio_fx[l] = 16;
+                    move16();
                 }
                 ELSE
                 {
-                    sum_total_ratio_fx[l] = BASOP_Util_Divide3232_Scale( stereo_type_detect->sum_power_fx[l], stereo_type_detect->total_power_fx[l], &exp ); // 15-(exp+s_min( stereo_type_detect->q_total_power, q_temp )-s_min( stereo_type_detect->q_sum_power, q_temp2 ))
-                    move32();
-#ifdef FIX_867_CLDFB_NRG_SCALE
-                    q_sum_total_ratio = add( sub( 15, exp ), sub( s_min( stereo_type_detect->q_sum_power, q_temp2 ), s_min( stereo_type_detect->q_total_power, q_temp_total ) ) );
-#else
-                    q_sum_total_ratio = add( sub( 15, exp ), sub( s_min( stereo_type_detect->q_sum_power, q_temp2 ), s_min( stereo_type_detect->q_total_power, q_temp ) ) );
-#endif
-                    sum_total_ratio_fx[l] = L_shl( sum_total_ratio_fx[l], sub( Q15, q_sum_total_ratio ) ); // q15
+                    sum_total_ratio_fx[l] = BASOP_Util_Divide3232_Scale_newton( stereo_type_detect->sum_power_fx[l], stereo_type_detect->total_power_fx[l], &exp ); // 15-(exp+s_min( stereo_type_detect->q_total_power, q_temp )-s_min( stereo_type_detect->q_sum_power, q_temp2 ))
                     move32();
+                    exp_sum_total_ratio_fx[l] = add( exp, sub( stereo_type_detect->exp_sum_power[l], stereo_type_detect->exp_total_power[l] ) );
+                    move16();
                 }
+                q_sum_total_ratio = s_max( q_sum_total_ratio, exp_sum_total_ratio_fx[l] );
             }
 
             IF( l == 0 )
@@ -2626,18 +2595,6 @@ void protoSignalComputation2_fx(
             move32();
         }
 
-        stereo_type_detect->q_sum_power = s_min( stereo_type_detect->q_sum_power, q_temp2 );
-        move16();
-#ifdef FIX_867_CLDFB_NRG_SCALE
-        stereo_type_detect->q_total_power = s_min( stereo_type_detect->q_total_power, q_temp_total );
-        move16();
-#else
-        stereo_type_detect->q_total_power = s_min( stereo_type_detect->q_total_power, q_temp );
-        move16();
-#endif
-        q_sum_total_ratio = Q15;
-        move16();
-
         IF( stereo_type_detect->interpolator > 0 )
         {
             stereo_type_detect->interpolator++;
@@ -2767,8 +2724,15 @@ void protoSignalComputation2_fx(
         // 20480 = 10 in Q11
         lr_total_hi_ratio_fx = Mpy_32_16_1( temp, 20480 ); // Q21
 
+        FOR( Word16 i = 0; i < s_min( num_freq_bands, MASA_SUM_FREQ_RANGE_BINS ); i++ )
+        {
+            sum_total_ratio_fx[i] = L_shl( sum_total_ratio_fx[i], sub( exp_sum_total_ratio_fx[i], q_sum_total_ratio ) );
+            move32();
+        }
+
         minimum_l( sum_total_ratio_fx, s_min( num_freq_bands, MASA_SUM_FREQ_RANGE_BINS ), &min_sum_total_ratio_fx ); // q_sum_total_ratio
-        exp = sub( 31, q_sum_total_ratio );
+        exp = q_sum_total_ratio;
+        move16();
         temp = BASOP_Util_Log2( min_sum_total_ratio_fx ); // q25
         IF( NE_32( temp, MIN_32 ) )
         {
@@ -3537,10 +3501,8 @@ void ivas_masa_init_stereotype_detection_fx(
     set32_fx( stereo_type_detect->sum_power_fx, 0, MASA_SUM_FREQ_RANGE_BINS );
     set32_fx( stereo_type_detect->total_power_fx, 0, MASA_SUM_FREQ_RANGE_BINS );
 
-    stereo_type_detect->q_sum_power = Q31;
-    move16();
-    stereo_type_detect->q_total_power = Q31;
-    move16();
+    set16_fx( stereo_type_detect->exp_sum_power, 0, MASA_SUM_FREQ_RANGE_BINS );
+    set16_fx( stereo_type_detect->exp_total_power, 0, MASA_SUM_FREQ_RANGE_BINS );
 
     stereo_type_detect->subtract_power_y_fx = 0;
     move32();
diff --git a/lib_rend/ivas_stat_rend.h b/lib_rend/ivas_stat_rend.h
index 90e4f5d69..a13ee27e7 100644
--- a/lib_rend/ivas_stat_rend.h
+++ b/lib_rend/ivas_stat_rend.h
@@ -395,9 +395,9 @@ typedef struct
     Word16 q_total_hi_power;
 
     Word32 sum_power_fx[MASA_SUM_FREQ_RANGE_BINS]; /* Q(q_sum_power) */
-    Word16 q_sum_power;
+    Word16 exp_sum_power[MASA_SUM_FREQ_RANGE_BINS];
     Word32 total_power_fx[MASA_SUM_FREQ_RANGE_BINS]; /* Q(q_total_power) */
-    Word16 q_total_power;
+    Word16 exp_total_power[MASA_SUM_FREQ_RANGE_BINS];
 
     Word32 subtract_power_y_fx; /* Q(q_subtract_power_y) */
     Word16 q_subtract_power_y;
-- 
GitLab