From 7e2672a529a79cc7e8a5f1037fb6b8295926e431 Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Thu, 15 May 2025 14:23:40 +0530
Subject: [PATCH] Fix for 3GPP issue 1531: Low-bitrate OMASA shows a bit high
 MLD (over 10) in BASOP decoder rendering to binaural

Link #1531
---
 lib_dec/FEC_clas_estim_fx.c                   | 16 ++++--
 .../ivas_dirac_dec_binaural_functions_fx.c    | 49 ++++++++++++++----
 lib_rend/ivas_dirac_rend_fx.c                 | 51 ++++++++++++-------
 3 files changed, 85 insertions(+), 31 deletions(-)

diff --git a/lib_dec/FEC_clas_estim_fx.c b/lib_dec/FEC_clas_estim_fx.c
index e56a77be1..2bd38153a 100644
--- a/lib_dec/FEC_clas_estim_fx.c
+++ b/lib_dec/FEC_clas_estim_fx.c
@@ -285,7 +285,17 @@ void FEC_clas_estim_fx(
 
 
             Corre( &pt1[pos], &pt1[pos - T0], T0, &cor_max[0] );
-            T0 = mult_r_sat( add_sat( pitch[2], pitch[3] ), 256 );
+            IF( NE_16( st_fx->element_mode, EVS_MONO ) )
+            {
+                IF( LT_16( sub( pos, T0 ), sub( L_frame, L_SUBFR ) ) )
+                {
+                    T0 = mult_r_sat( add_sat( pitch[2], pitch[3] ), 256 );
+                }
+            }
+            ELSE
+            {
+                T0 = mult_r_sat( add_sat( pitch[2], pitch[3] ), 256 );
+            }
             pos_limit = sub( L_frame, L_SUBFR );
             j = s_min( 1, s_max( 0, sub( pos, pos_limit ) ) );
             Ltmp = L_deposit_l( cor_max[0] );
@@ -725,8 +735,8 @@ void FEC_clas_estim_fx(
 
 
     } /* Do the classification only
- - MODE1: when the class is not transmitted in the bitstream
- - MODE2: on good frames (classifier is also called for bfi=1) */
+    - MODE1: when the class is not transmitted in the bitstream
+    - MODE2: on good frames (classifier is also called for bfi=1) */
 
 
     /* update the memory of synthesis for frame class estimation */
diff --git a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
index 16fefa3bb..c47052d5f 100644
--- a/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
+++ b/lib_rend/ivas_dirac_dec_binaural_functions_fx.c
@@ -3584,24 +3584,53 @@ static void matrixTransp1Mul_fx(
     Word16 chA, chB;
     Word16 size = i_mult( BINAURAL_CHANNELS, BINAURAL_CHANNELS );
 
+    Word64 tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word64 tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word16 q_tmp_outRe_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word16 q_tmp_outIm_fx[BINAURAL_CHANNELS][BINAURAL_CHANNELS];
+    Word64 tmp64_1, tmp64_2;
+    Word16 tmp16, q_common = 63;
+    move16();
+
     FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
     {
         FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
         {
-            outRe_fx[chA][chB] = Madd_32_32( Madd_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bre_fx[0][chB] ),
-                                                                     Are_fx[1][chA], Bre_fx[1][chB] ),
-                                                         Aim_fx[0][chA], Bim_fx[0][chB] ),
-                                             Aim_fx[1][chA], Bim_fx[1][chB] );
+            tmp64_1 = W_mac_32_32( W_mult_32_32( Are_fx[0][chA], Bre_fx[0][chB] ), Are_fx[1][chA], Bre_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 )
+            tmp64_2 = W_mac_32_32( W_mult_32_32( Aim_fx[0][chA], Bim_fx[0][chB] ), Aim_fx[1][chA], Bim_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 )
+            tmp_outRe_fx[chA][chB] = W_add( tmp64_1, tmp64_2 );                                                      // Q: add( add( q_A, q_B ), 1 )
+            move64();
+            tmp16 = W_norm( tmp_outRe_fx[chA][chB] );
+            tmp_outRe_fx[chA][chB] = W_shl( tmp_outRe_fx[chA][chB], tmp16 ); // Q:add( tmp16, add( add( q_A, q_B ), 1 ) )
+            move64();
+            q_tmp_outRe_fx[chA][chB] = add( tmp16, add( add( q_A, q_B ), 1 ) );
+            move16();
+            q_common = s_min( q_tmp_outRe_fx[chA][chB], q_common );
+
+
+            tmp64_1 = W_mac_32_32( W_mult_32_32( Are_fx[0][chA], Bim_fx[0][chB] ), Are_fx[1][chA], Bim_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 )
+            tmp64_2 = W_mac_32_32( W_mult_32_32( Aim_fx[0][chA], Bre_fx[0][chB] ), Aim_fx[1][chA], Bre_fx[1][chB] ); // Q: add( add( q_A, q_B ), 1 )
+            tmp_outIm_fx[chA][chB] = W_sub( tmp64_1, tmp64_2 );                                                      // Q: add( add( q_A, q_B ), 1 )
+            move64();
+            tmp16 = W_norm( tmp_outIm_fx[chA][chB] );
+            tmp_outIm_fx[chA][chB] = W_shl( tmp_outIm_fx[chA][chB], tmp16 ); // Q:add( tmp16, add( add( q_A, q_B ), 1 ) )
+            move64();
+            q_tmp_outIm_fx[chA][chB] = add( tmp16, add( add( q_A, q_B ), 1 ) );
+            move16();
+            q_common = s_min( q_tmp_outIm_fx[chA][chB], q_common );
+        }
+    }
+    FOR( chA = 0; chA < BINAURAL_CHANNELS; chA++ )
+    {
+        FOR( chB = 0; chB < BINAURAL_CHANNELS; chB++ )
+        {
+            outRe_fx[chA][chB] = W_extract_h( W_shl( tmp_outRe_fx[chA][chB], sub( q_common, q_tmp_outRe_fx[chA][chB] ) ) );
             move32();
-            outIm_fx[chA][chB] = Msub_32_32( Msub_32_32( Madd_32_32( Mpy_32_32( Are_fx[0][chA], Bim_fx[0][chB] ),
-                                                                     Are_fx[1][chA], Bim_fx[1][chB] ),
-                                                         Aim_fx[0][chA], Bre_fx[0][chB] ),
-                                             Aim_fx[1][chA], Bre_fx[1][chB] );
+            outIm_fx[chA][chB] = W_extract_h( W_shl( tmp_outIm_fx[chA][chB], sub( q_common, q_tmp_outIm_fx[chA][chB] ) ) );
             move32();
         }
     }
-    *q_out = sub( add( q_A, q_B ), 31 );
-
+    *q_out = sub( q_common, 32 );
     move16();
     if ( L_and( is_zero_arr( outRe_fx[0], size ), is_zero_arr( outIm_fx[0], size ) ) )
     {
diff --git a/lib_rend/ivas_dirac_rend_fx.c b/lib_rend/ivas_dirac_rend_fx.c
index 06b37ff5a..063c70f66 100644
--- a/lib_rend/ivas_dirac_rend_fx.c
+++ b/lib_rend/ivas_dirac_rend_fx.c
@@ -1754,7 +1754,7 @@ void protoSignalComputation2_fx(
     Word32 RealSubtract_fx, ImagSubtract_fx;
     Word32 left_bb_power_fx, right_bb_power_fx, total_bb_power_fx, lr_bb_power_fx;
     Word32 left_hi_power_fx, right_hi_power_fx, total_hi_power_fx, lr_hi_power_fx;
-    Word32 sum_power_fx, Left_power_fx, Right_power_fx;
+    Word32 sum_power_fx, Left_power_fx, Right_power_fx, Total_power_fx;
     Word16 q_lr_bb_power, q_lr_hi_power;
     Word32 lr_total_bb_ratio_fx, lr_total_hi_ratio_fx;
     Word32 min_sum_total_ratio_fx, min_sum_total_ratio_db_fx;
@@ -2120,7 +2120,10 @@ void protoSignalComputation2_fx(
 #else
         q_Left_Right_power = add( shl( add( q_cldfb, min_q_shift ), 1 ), sub( head_room, 32 ) );
 #endif
-
+        Word16 exp_left_hi_power = 0, exp_right_hi_power = 0, exp_total_hi_power = 0, exp_temppp;
+        move16();
+        move16();
+        move16();
         FOR( l = 0; l < num_freq_bands; l++ )
         {
 #ifdef FIX_867_CLDFB_NRG_SCALE
@@ -2164,19 +2167,30 @@ void protoSignalComputation2_fx(
             left_bb_power_fx = L_add( left_bb_power_fx, Left_power_fx );    // q_Left_Right_power
             right_bb_power_fx = L_add( right_bb_power_fx, Right_power_fx ); // q_Left_Right_power
             // total_bb_power_fx = L_add( total_bb_power_fx, reference_power_fx[l] );
-            total_bb_power_fx = L_add( total_bb_power_fx, W_extract_h( W_shl( reference_power_64fx[l], head_room ) ) );     // q_Left_Right_power
+            total_bb_power_fx = L_add( total_bb_power_fx, W_extract_h( W_shl( reference_power_64fx[l], head_room ) ) ); // q_Left_Right_power
 #endif
-
             IF( GT_16( l, MASA_HI_FREQ_START_BIN ) )
             {
-                left_hi_power_fx = L_add( left_hi_power_fx, Left_power_fx );    // q_Left_Right_power
-                right_hi_power_fx = L_add( right_hi_power_fx, Right_power_fx ); // q_Left_Right_power
-                                                                                // total_hi_power_fx = L_add( total_hi_power_fx, reference_power_fx[l] );
-#ifdef FIX_867_CLDFB_NRG_SCALE
-                total_hi_power_fx = L_add( total_hi_power_fx, W_extract_h( W_shl( reference_power_64fx[l], sub( head_room, total_shift[qidx] ) ) ) ); // q_Left_Right_power
-#else
-                total_hi_power_fx = L_add( total_hi_power_fx, W_extract_h( W_shl( reference_power_64fx[l], head_room ) ) ); // q_Left_Right_power
-#endif
+                W_tmp1 = W_add( W_mult0_32_32( RealBuffer_fx[0][0][l], RealBuffer_fx[0][0][l] ), W_mult0_32_32( ImagBuffer_fx[0][0][l], ImagBuffer_fx[0][0][l] ) );
+                q_shift = W_norm( W_tmp1 );
+                Left_power_fx = W_extract_h( W_shl( W_tmp1, q_shift ) );
+                exp_temppp = sub( 31, sub( add( shl( q_cldfb, 1 ), q_shift ), 32 ) );
+
+                left_hi_power_fx = BASOP_Util_Add_Mant32Exp( left_hi_power_fx, exp_left_hi_power, Left_power_fx, exp_temppp, &exp_left_hi_power ); // exp:exp_left_hi_power
+
+                W_tmp2 = W_add( W_mult0_32_32( RealBuffer_fx[1][0][l], RealBuffer_fx[1][0][l] ), W_mult0_32_32( ImagBuffer_fx[1][0][l], ImagBuffer_fx[1][0][l] ) );
+                q_shift = W_norm( W_tmp2 );
+                Right_power_fx = W_extract_h( W_shl( W_tmp2, q_shift ) );
+                exp_temppp = sub( 31, sub( add( shl( q_cldfb, 1 ), q_shift ), 32 ) );
+
+                right_hi_power_fx = BASOP_Util_Add_Mant32Exp( right_hi_power_fx, exp_right_hi_power, Right_power_fx, exp_temppp, &exp_right_hi_power ); // exp:exp_right_hi_power
+
+                W_tmp2 = W_add( W_tmp1, W_tmp2 );
+                q_shift = W_norm( W_tmp2 );
+                Total_power_fx = W_extract_h( W_shl( W_tmp2, q_shift ) );
+                exp_temppp = sub( 31, sub( add( shl( q_cldfb, 1 ), q_shift ), 32 ) );
+
+                total_hi_power_fx = BASOP_Util_Add_Mant32Exp( total_hi_power_fx, exp_total_hi_power, Total_power_fx, exp_temppp, &exp_total_hi_power ); // exp:exp_total_hi_power
             }
 
             IF( LT_16( l, s_min( num_freq_bands, MASA_SUM_FREQ_RANGE_BINS ) ) )
@@ -2185,8 +2199,8 @@ void protoSignalComputation2_fx(
                 re_aux = L_shl( Real_aux_fx, sub( temp_q_shift, min_q_shift[0] ) ); // q_cldfb+temp_q_shift
                 im_aux = L_shl( Imag_aux_fx, sub( temp_q_shift, min_q_shift[0] ) ); // q_cldfb+temp_q_shift
 #else
-                re_aux = L_shl( Real_aux_fx, sub( temp_q_shift, min_q_shift ) );                                            // q_cldfb+temp_q_shift
-                im_aux = L_shl( Imag_aux_fx, sub( temp_q_shift, min_q_shift ) );                                            // q_cldfb+temp_q_shift
+                re_aux = L_shl( Real_aux_fx, sub( temp_q_shift, min_q_shift ) );                                        // q_cldfb+temp_q_shift
+                im_aux = L_shl( Imag_aux_fx, sub( temp_q_shift, min_q_shift ) );                                        // q_cldfb+temp_q_shift
 #endif
 
                 sum_power_fx = Madd_32_32( Mpy_32_32( re_aux, re_aux ), im_aux, im_aux ); // 2*(q_cldfb+temp_q_shift)-31
@@ -2216,7 +2230,7 @@ void protoSignalComputation2_fx(
                     move32();
                 }
 #else
-                temp = Mpy_32_32( a_fx, W_extract_l( W_shr( reference_power_64fx[l], 31 ) ) );                              // 2*(q_cldfb+min_q_shift) -31
+                temp = Mpy_32_32( a_fx, W_extract_l( W_shr( reference_power_64fx[l], 31 ) ) );                          // 2*(q_cldfb+min_q_shift) -31
                 IF( LT_16( q_temp, stereo_type_detect->q_total_power ) )
                 {
                     stereo_type_detect->total_power_fx[l] = L_add( temp, L_shr( Mpy_32_32( b_fx, stereo_type_detect->total_power_fx[l] ), sub( stereo_type_detect->q_total_power, q_temp ) ) ); // q_temp
@@ -2704,15 +2718,16 @@ void protoSignalComputation2_fx(
         lr_total_bb_ratio_fx = Mpy_32_16_1( temp, 20480 ); // Q21
 
 #ifdef FIX_867_CLDFB_NRG_SCALE
-        stereo_type_detect->left_hi_power_fx = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a2_fx, left_hi_power_fx ), sub( 31, q_temp_total ), Mpy_32_32( b2_fx, stereo_type_detect->left_hi_power_fx ), sub( 31, stereo_type_detect->q_left_hi_power ), &stereo_type_detect->q_left_hi_power );
+        stereo_type_detect->left_hi_power_fx = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a2_fx, left_hi_power_fx ), exp_left_hi_power, Mpy_32_32( b2_fx, stereo_type_detect->left_hi_power_fx ), sub( 31, stereo_type_detect->q_left_hi_power ), &stereo_type_detect->q_left_hi_power );
         move32();
         stereo_type_detect->q_left_hi_power = sub( 31, stereo_type_detect->q_left_hi_power );
         move16();
-        stereo_type_detect->right_hi_power_fx = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a2_fx, right_hi_power_fx ), sub( 31, q_temp_total ), Mpy_32_32( b2_fx, stereo_type_detect->right_hi_power_fx ), sub( 31, stereo_type_detect->q_right_hi_power ), &stereo_type_detect->q_right_hi_power );
+
+        stereo_type_detect->right_hi_power_fx = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a2_fx, right_hi_power_fx ), exp_right_hi_power, Mpy_32_32( b2_fx, stereo_type_detect->right_hi_power_fx ), sub( 31, stereo_type_detect->q_right_hi_power ), &stereo_type_detect->q_right_hi_power );
         move32();
         stereo_type_detect->q_right_hi_power = sub( 31, stereo_type_detect->q_right_hi_power );
         move16();
-        stereo_type_detect->total_hi_power_fx = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a2_fx, total_hi_power_fx ), sub( 31, q_temp_total ), Mpy_32_32( b2_fx, stereo_type_detect->total_hi_power_fx ), sub( 31, stereo_type_detect->q_total_hi_power ), &stereo_type_detect->q_total_hi_power );
+        stereo_type_detect->total_hi_power_fx = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a2_fx, total_hi_power_fx ), exp_total_hi_power, Mpy_32_32( b2_fx, stereo_type_detect->total_hi_power_fx ), sub( 31, stereo_type_detect->q_total_hi_power ), &stereo_type_detect->q_total_hi_power );
         move32();
 #else
         stereo_type_detect->left_hi_power_fx = BASOP_Util_Add_Mant32Exp( Mpy_32_32( a2_fx, left_hi_power_fx ), sub( 31, q_temp ), Mpy_32_32( b2_fx, stereo_type_detect->left_hi_power_fx ), sub( 31, stereo_type_detect->q_left_hi_power ), &stereo_type_detect->q_left_hi_power );
-- 
GitLab