From 5d560b63e6fb85b8051a6b1eb452f2e34a37654e Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Tue, 13 Aug 2024 10:30:30 +0530
Subject: [PATCH] MLD improvement changes

[x] Fixes in PCA tools
[x] Bug fix in ProtoComputation2
---
 lib_com/ivas_pca_tools.c   | 142 +++++++++++++++++++++++++------------
 lib_rend/ivas_dirac_rend.c |  27 ++++---
 2 files changed, 110 insertions(+), 59 deletions(-)

diff --git a/lib_com/ivas_pca_tools.c b/lib_com/ivas_pca_tools.c
index 9ee54da4f..5966d9dc5 100644
--- a/lib_com/ivas_pca_tools.c
+++ b/lib_com/ivas_pca_tools.c
@@ -560,6 +560,22 @@ void dquat2mat(
     return;
 }
 
+static Word16 check_bound( Word32 tmp )
+{
+    IF( GT_32( tmp, MAX16B ) )
+    {
+        return MAX16B;
+    }
+    ELSE IF( LT_32( tmp, MIN16B ) )
+    {
+        return MIN16B;
+    }
+    ELSE
+    {
+        return extract_l( tmp );
+    }
+}
+
 #ifdef IVAS_FLOAT_FIXED
 void dquat2mat_fx(
     const Word16 *ql,
@@ -605,37 +621,37 @@ void dquat2mat_fx(
     dx = mult( d, x );
     dy = mult( d, y );
     dz = mult( d, z );
-    m[0] = sub( sub( aw, bx ), add( cy, dz ) );
+    m[0] = check_bound( L_sub( L_sub( aw, bx ), L_add( cy, dz ) ) );
     move16();
-    m[1] = sub( sub( cz, dy ), add( ax, bw ) );
+    m[1] = check_bound( L_sub( L_sub( cz, dy ), L_add( ax, bw ) ) );
     move16();
-    m[2] = add( sub( sub( negate( ay ), bz ), cw ), dx );
+    m[2] = check_bound( L_add( L_sub( L_sub( negate( ay ), bz ), cw ), dx ) );
     move16();
-    m[3] = sub( sub( by, az ), add( cx, dw ) );
+    m[3] = check_bound( L_sub( L_sub( by, az ), L_add( cx, dw ) ) );
     move16();
-    m[4] = add( sub( add( bw, ax ), dy ), cz );
+    m[4] = check_bound( L_add( L_sub( L_add( bw, ax ), dy ), cz ) );
     move16();
-    m[5] = add( add( add( negate( bx ), aw ), dz ), cy );
+    m[5] = check_bound( L_add( L_add( L_add( negate( bx ), aw ), dz ), cy ) );
     move16();
-    m[6] = sub( sub( add( negate( by ), az ), dw ), cx );
+    m[6] = check_bound( L_sub( L_sub( L_add( negate( by ), az ), dw ), cx ) );
     move16();
-    m[7] = add( sub( sub( negate( bz ), ay ), dx ), cw );
+    m[7] = check_bound( L_add( L_sub( L_sub( negate( bz ), ay ), dx ), cw ) );
     move16();
-    m[8] = sub( add( add( cw, dx ), ay ), bz );
+    m[8] = check_bound( L_sub( L_add( L_add( cw, dx ), ay ), bz ) );
     move16();
-    m[9] = sub( sub( add( negate( cx ), dw ), az ), by );
+    m[9] = check_bound( L_sub( L_sub( L_add( negate( cx ), dw ), az ), by ) );
     move16();
-    m[10] = add( add( add( negate( cy ), dz ), aw ), bx );
+    m[10] = check_bound( L_add( L_add( L_add( negate( cy ), dz ), aw ), bx ) );
     move16();
-    m[11] = sub( add( sub( negate( cz ), dy ), ax ), bw );
+    m[11] = check_bound( L_sub( L_add( L_sub( negate( cz ), dy ), ax ), bw ) );
     move16();
-    m[12] = add( add( sub( dw, cx ), by ), az );
+    m[12] = check_bound( L_add( L_add( L_sub( dw, cx ), by ), az ) );
     move16();
-    m[13] = add( sub( sub( negate( dx ), cw ), bz ), ay );
+    m[13] = check_bound( L_add( L_sub( L_sub( negate( dx ), cw ), bz ), ay ) );
     move16();
-    m[14] = sub( add( sub( negate( dy ), cz ), bw ), ax );
+    m[14] = check_bound( L_sub( L_add( L_sub( negate( dy ), cz ), bw ), ax ) );
     move16();
-    m[15] = add( add( add( negate( dz ), cy ), bx ), aw );
+    m[15] = check_bound( L_add( L_add( L_add( negate( dz ), cy ), bx ), aw ) );
     move16();
 
     return;
@@ -819,6 +835,24 @@ static void norm_quat(
 }
 
 #ifdef IVAS_FLOAT_FIXED
+
+static Word32 dotp16_fixed_guarded( const Word16 x[], /* i  : vector x[]                    */
+                                    const Word16 y[], /* i  : vector y[]                    */
+                                    const Word16 n /* i  : vector length                 */ )
+{
+    Word16 i;
+    Word32 suma;
+    Word16 guarded_bits = find_guarded_bits_fx( n );
+    suma = L_shr( L_mult( x[0], y[0] ), guarded_bits );
+
+    FOR( i = 1; i < n; i++ )
+    {
+        suma = L_add( suma, L_shr( L_mult( x[i], y[i] ), guarded_bits ) );
+    }
+
+    return suma;
+}
+
 static void norm_quat_fx(
     Word16 *q )
 {
@@ -826,13 +860,13 @@ static void norm_quat_fx(
     Word16 i, exp1;
     exp1 = 0;
     move16();
-    norm_q = dotp_fx( q, q, IVAS_PCA_INTERP, &exp1 );
-    exp1 = sub( 31, add( exp1, 2 ) );
-    norm_q = ISqrt32( norm_q, &exp1 ); /*q(15 - exp)*/
+    norm_q = dotp16_fixed_guarded( q, q, IVAS_PCA_INTERP );
+    exp1 = find_guarded_bits_fx( IVAS_PCA_INTERP );
+    norm_q = ISqrt32( norm_q, &exp1 ); /*Q(31 - exp)*/
 
     FOR( i = 0; i < IVAS_PCA_INTERP; i++ )
     {
-        q[i] = mult( q[i], extract_l( L_shr( norm_q, 15 ) ) ); // todo : recheck
+        q[i] = round_fx( L_shl( Mpy_32_16_1( norm_q, q[i] ), exp1 ) ); /* Q(15) */
         move16();
     }
 
@@ -922,7 +956,7 @@ void pca_interp_preproc_fx(
 {
     Word16 alpha;
     Word16 j;
-    Word16 tmp, tmp2, tmp3;
+    Word16 tmp, tmp2, tmp3, tmp_e;
     FOR( j = 0; j < len; j++ )
     {
         tmp = sub( len, 1 );
@@ -933,11 +967,12 @@ void pca_interp_preproc_fx(
         }
         ELSE
         {
-            alpha = idiv1616( j, tmp ); // the increment can be updated by simple delta //q15
+            alpha = BASOP_Util_Divide1616_Scale( j, tmp, &tmp_e ); // the increment can be updated by simple delta
+            alpha = shl_sat( alpha, tmp_e );                       /* Q15 */
         }
-        tmp2 = mult( EVS_PI_FX, alpha );           // q13
-        tmp3 = getCosWord16( tmp2 );               // q14
-        alpha = shr( sub( ONE_IN_Q14, tmp3 ), 1 ); // q15
+        tmp2 = mult( EVS_PI_FX, alpha );     /* Q13 */
+        tmp3 = getCosWord16( tmp2 );         /* Q14 */
+        alpha = sub_sat( ONE_IN_Q14, tmp3 ); /* Q15 */
         alpha = sub( MAX_16, alpha );
         quat_nlerp_preproc_fx( prev_ql, ql, alpha, &ql_interp[j * IVAS_PCA_INTERP] );
         quat_nlerp_preproc_fx( prev_qr, qr, alpha, &qr_interp[j * IVAS_PCA_INTERP] );
@@ -986,16 +1021,30 @@ static void sp2cart_fx(
     Word16 *q )
 {
     Word16 s1, s2, s1s2;
-    s1 = getSinWord16( ph1 );                 // q15
-    s2 = getSinWord16( ph2 );                 // q15
-    s1s2 = mult( s1, s2 );                    // q15
-    q[3] = mult( getSinWord16( ph3 ), s1s2 ); // q15
+    Word16 sin_ph3, cos_ph3;
+
+    sin_ph3 = cos_ph3 = ph3;
     move16();
-    q[2] = mult( getCosWord16( ph3 ), s1s2 ); // q15
     move16();
-    q[1] = mult( getCosWord16( ph2 ), s1 ); // q15
+
+    IF( GT_16( ph3, 12868 /* PI in Q12 */ ) )
+    {
+        sin_ph3 = sub( 12868, ph3 ); /* sin(x) = sin(PI - x) */
+        cos_ph3 = sub( 25736, ph3 ); /* cos(x) = cos(2*PI - x) */
+    }
+    sin_ph3 = shl( sin_ph3, 1 ); /* Q12 -> Q13 */
+    cos_ph3 = shl( cos_ph3, 1 ); /* Q12 -> Q13 */
+
+    s1 = getSinWord16( ph1 );                     /* Q15 */
+    s2 = getSinWord16( ph2 );                     /* Q15 */
+    s1s2 = mult( s1, s2 );                        /* Q15 */
+    q[3] = mult( getSinWord16( sin_ph3 ), s1s2 ); /* Q15 */
     move16();
-    q[0] = shl_sat( getCosWord16( ph1 ), 1 ); // q15
+    q[2] = shl_sat( mult( getCosWord16( cos_ph3 ), s1s2 ), 1 ); /* Q15 */
+    move16();
+    q[1] = shl_sat( mult( getCosWord16( ph2 ), s1 ), 1 ); /* Q15 */
+    move16();
+    q[0] = shl_sat( getCosWord16( ph1 ), 1 ); /* Q15 */
     move16();
 
     return;
@@ -1021,9 +1070,9 @@ static Word16 calc_n2_fx(
 {
     Word16 n2;
     Word32 temp;
-    temp = L_mult( 23040, getSinWord16( ph1 ) ); // q8
-    n2 = round_fx( temp );
-    n2 = shr( n2, 7 );
+    temp = L_mult( 23040, getSinWord16( ph1 ) ); /* Q8 + Q15 + Q1 = Q24 */
+    n2 = round_fx( temp );                       /* Q24 -> Q8 */
+    n2 = shr( n2, 8 );
     IF( s_and( n2, 1 ) == 0 )
     {
         n2 = add( n2, 1 );
@@ -1061,10 +1110,10 @@ static Word16 calc_n3_fx(
     const Word16 ph2 )
 {
     Word16 n3;
-    Word16 temp1 = mult( getSinWord16( ph2 ), getSinWord16( ph1 ) ); // q7 + q15 - q15
-    n3 = round_fx( L_mult( temp1, getSinWord16( ph2 ) ) );           // q7 + q15 - q15
+    Word16 temp1 = mult( getSinWord16( ph2 ), getSinWord16( ph1 ) ); /* Q15 */
+    n3 = round_fx( L_mult( temp1, 23040 /* 180.0f in Q7 */ ) );      /* Q15 + Q7 + Q1 - Q16 -> Q7*/
 
-    n3 = shr( n3, 8 );
+    n3 = shr( n3, 7 );
 
     IF( n3 == 0 )
     {
@@ -1489,8 +1538,10 @@ void pca_dec_s3_fx(
     num_fx = 12868;
     move16();
 
-    d_fx = idiv1616( num_fx, n1 );     // Q12
-    ph1_q_fx = i_mult( index1, d_fx ); // Q12
+    d_fx = idiv1616( num_fx, sub( n1, 1 ) ); // Q12
+    ph1_q_fx = i_mult( index1, d_fx );       // Q12
+
+    ph1_q_fx = shl( ph1_q_fx, 1 ); /* Q12 -> Q13 */
 
     n2 = calc_n2_fx( ph1_q_fx );
 
@@ -1521,6 +1572,7 @@ void pca_dec_s3_fx(
         move16();
         d_fx = idiv1616( num_fx, sub( n2, 1 ) ); // Q12
         ph2_q_fx = i_mult( index2, d_fx );       // Q12
+        ph2_q_fx = shl( ph2_q_fx, 1 );           /* Q12 -> Q13 */
     }
 
     j = L_sub( j, ivas_pca_offset_index2[add( index2, get_pca_offset_n2_fx( index1 ) )] );
@@ -1536,11 +1588,13 @@ void pca_dec_s3_fx(
     }
     ELSE
     {
-        num_fx = 6434;
+        Word16 exp;
+        num_fx = 25736; /* PI2 in Q12 */
         move16();
-        d_fx = idiv1616( num_fx, n3 );                 // Q10
-        ph3_q_fx = round_fx( L_mult( index3, d_fx ) ); // Q10
-        // ph3_q_fx = shl(ph3_q_fx, 2);
+        d_fx = BASOP_Util_Divide1616_Scale( num_fx, n3, &exp ); /* Q12 */
+        exp = add( 3 - 15, exp );
+        d_fx = shl( d_fx, sub( exp, 3 ) ); /* Q12 */
+        ph3_q_fx = i_mult( index3, d_fx ); /* Q12 */
     }
     sp2cart_fx( ph1_q_fx, ph2_q_fx, ph3_q_fx, q_fx );
 
diff --git a/lib_rend/ivas_dirac_rend.c b/lib_rend/ivas_dirac_rend.c
index 59a8ef6b6..c6ef71a46 100644
--- a/lib_rend/ivas_dirac_rend.c
+++ b/lib_rend/ivas_dirac_rend.c
@@ -2857,7 +2857,6 @@ void protoSignalComputation2_fx(
     Word32 tempSpaced_fx, tempDmx_fx;
     Word16 q_shift, min_q_shift, exp, q_temp, temp_q_shift, q_temp2;
     Word32 temp;
-
     /* Calculate maximum possible shift for the buffers RealBuffer_fx and ImagBuffer_fx */
     min_q_shift = Q31;
     move16();
@@ -3187,7 +3186,6 @@ void protoSignalComputation2_fx(
                 {
                     Real_aux_fx = L_shr( Real_aux_fx, 1 );
                     Imag_aux_fx = L_shr( Imag_aux_fx, 1 );
-
                     temp = Madd_32_32( Mpy_32_32( Real_aux_fx, Real_aux_fx ), Imag_aux_fx, Imag_aux_fx );
                     IF( LT_16( q_temp, *q_proto_power_smooth ) )
                     {
@@ -3434,8 +3432,8 @@ void protoSignalComputation2_fx(
         }
         q_lr_bb_power = sub( q_lr_bb_power, 1 ); /* = (lr_bb_power_fx * 2) */
 
-        temp = BASOP_Util_Divide3232_Scale( lr_bb_power_fx, L_add( stereo_type_detect->total_bb_power_fx, EPSILON_FX ), &exp );
-        exp = sub( 31, add( sub( 15, exp ), sub( q_lr_bb_power, stereo_type_detect->q_total_bb_power ) ) );
+        temp = BASOP_Util_Divide3232_Scale_cadence( lr_bb_power_fx, L_add( stereo_type_detect->total_bb_power_fx, EPSILON_FX ), &exp );
+        exp = sub( 31, add( sub( 31, exp ), sub( q_lr_bb_power, stereo_type_detect->q_total_bb_power ) ) );
         temp = BASOP_Util_Log2( temp );
         IF( NE_32( temp, MIN_32 ) )
         {
@@ -3500,8 +3498,8 @@ void protoSignalComputation2_fx(
         }
         q_lr_hi_power = sub( q_lr_hi_power, 1 ); /* = (q_lr_hi_power * 2) */
 
-        temp = BASOP_Util_Divide3232_Scale( lr_hi_power_fx, L_add( stereo_type_detect->total_hi_power_fx, EPSILON_FX ), &exp );
-        exp = sub( 31, add( sub( 15, exp ), sub( q_lr_hi_power, stereo_type_detect->q_total_hi_power ) ) );
+        temp = BASOP_Util_Divide3232_Scale_cadence( lr_hi_power_fx, L_add( stereo_type_detect->total_hi_power_fx, EPSILON_FX ), &exp );
+        exp = sub( 31, add( sub( 31, exp ), sub( q_lr_hi_power, stereo_type_detect->q_total_hi_power ) ) );
         temp = BASOP_Util_Log2( temp );
         IF( NE_32( temp, MIN_32 ) )
         {
@@ -3596,7 +3594,6 @@ void protoSignalComputation2_fx(
             move32();
         }
     }
-
     *q_proto_frame_f = add( q_cldfb, min_q_shift );
     move16();
     *q_proto_direct_buffer_f = add( q_cldfb, min_q_shift );
@@ -4698,7 +4695,7 @@ void ivas_masa_stereotype_detection_fx(
     {
         /* subtract_temp = ( -subtract_target_ratio_db - 3.0f ) / 3.0f; */
         temp = L_sub( L_shr( -subtract_target_ratio_db_fx, 1 ), L_shr( THREE_Q21, 1 ) );
-        subtract_temp_fx = Mpy_32_32( temp, 715827883 /* 1 / 3.0f in Q31 */ );
+        subtract_temp_fx = Mpy_32_32( temp, 708669604 /* 1 / 3.0f in Q31 */ );
         subtract_temp_fx = L_shl( subtract_temp_fx, 1 ); /* Q21 */
 
         /* min_sum_temp = max( -min_sum_total_ratio_db / 6.0f, 0.0f ); */
@@ -4710,7 +4707,7 @@ void ivas_masa_stereotype_detection_fx(
 
         change_to_spaced_fx = L_add( L_add( subtract_temp_fx, min_sum_temp_fx ), lr_total_bb_temp_fx ); /* Q21 */
 
-        IF( GE_32( change_to_spaced_fx, ONE_IN_Q15 ) )
+        IF( GE_32( change_to_spaced_fx, ONE_IN_Q21 ) )
         {
             change_to_spaced_selection = 1;
             move16();
@@ -4723,7 +4720,7 @@ void ivas_masa_stereotype_detection_fx(
     IF( subtract_target_ratio_db_fx > 0 )
     {
         /* subtract_temp = subtract_target_ratio_db / 3.0f; */
-        subtract_temp_fx = Mpy_32_32( subtract_target_ratio_db_fx, 715827883 /* 1 / 3.0f in Q31 */ ); /* Q21 */
+        subtract_temp_fx = Mpy_32_32( subtract_target_ratio_db_fx, 708669604 /* 1 / 3.0f in Q31 */ ); /* Q21 */
 
         /* min_sum_temp = ( min_sum_total_ratio_db + 1.0f ) / 6.0f; */
         min_sum_temp_fx = Mpy_32_32_r( L_add( min_sum_total_ratio_db_fx, ONE_IN_Q21 ), 357913941 /* 1 / 6.0f in Q31 */ ); /* Q21 */
@@ -4733,7 +4730,7 @@ void ivas_masa_stereotype_detection_fx(
 
         change_to_downmix_fx = L_add( L_add( subtract_temp_fx, min_sum_temp_fx ), lr_total_bb_temp_fx ); /* Q21 */
 
-        IF( GE_32( change_to_downmix_fx, ONE_IN_Q15 ) )
+        IF( GE_32( change_to_downmix_fx, ONE_IN_Q21 ) )
         {
             change_to_downmix_selection = 1;
             move16();
@@ -4744,17 +4741,17 @@ void ivas_masa_stereotype_detection_fx(
     IF( LT_32( lr_total_hi_ratio_db_fx, -25165824 ) ) // 25165824 = 12.0 in Q21
     {
         /* subtract_temp = ( subtract_target_ratio_db + 4.0f ) / 3.0f; */
-        subtract_temp_fx = Mpy_32_32( L_add( subtract_target_ratio_db_fx, 8388608 /* 4.0 in Q21 */ ), 715827883 /* 1 / 3.0f in Q31 */ );
+        subtract_temp_fx = Mpy_32_32( L_add( subtract_target_ratio_db_fx, 8388608 /* 4.0 in Q21 */ ), 708669604 /* 1 / 3.0f in Q31 */ );
 
         /* min_sum_temp = min_sum_total_ratio_db / 6.0f; */
         min_sum_temp_fx = Mpy_32_32_r( min_sum_total_ratio_db_fx, 357913941 /* 1 / 6.0f in Q31 */ );
 
         /* lr_total_hi_temp = ( -lr_total_hi_ratio_db - 12.0f ) / 3.0f; */
-        lr_total_hi_temp_fx = Mpy_32_32( L_sub( -lr_total_hi_ratio_db_fx, 25165824 /* 12.0 in Q21 */ ), 715827883 /* 1 / 3.0f in Q31 */ );
+        lr_total_hi_temp_fx = Mpy_32_32( L_sub( -lr_total_hi_ratio_db_fx, 25165824 /* 12.0 in Q21 */ ), 708669604 /* 1 / 3.0f in Q31 */ );
 
-        change_to_downmix2_fx = L_add( L_add( subtract_temp_fx, min_sum_temp_fx ), lr_total_hi_temp_fx ); // Q15
+        change_to_downmix2_fx = L_add( L_add( subtract_temp_fx, min_sum_temp_fx ), lr_total_hi_temp_fx ); // Q21
 
-        IF( GE_32( change_to_downmix2_fx, ONE_IN_Q15 ) )
+        IF( GE_32( change_to_downmix2_fx, ONE_IN_Q21 ) )
         {
             change_to_downmix_selection = 1;
             move16();
-- 
GitLab