From ae58d3f013076fb98533baf5334fd3b024def33c Mon Sep 17 00:00:00 2001
From: Hiromi Sekine <sekine.hiromi@ntt-tx.co.jp>
Date: Wed, 17 Aug 2022 13:09:44 +0900
Subject: [PATCH 1/6] Contribution: Reduction of ROM size and Update of ITD
 switch in stereo downmix for EVS.

---
 lib_com/options.h             |  13 +
 lib_enc/ivas_rom_enc.c        |   2 +
 lib_enc/ivas_rom_enc.h        |   2 +
 lib_enc/ivas_stereo_dmx_evs.c | 619 +++++++++++++++++++++++++++++++++-
 4 files changed, 632 insertions(+), 4 deletions(-)

diff --git a/lib_com/options.h b/lib_com/options.h
index da537889ed..bbc2b52469 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -149,6 +149,19 @@
 
 #define DIRAC_DRCT_GAIN_TUNING                          /* issue 64: tuning of DirAC energy-compensation gains */
 
+/* NTT switches */
+///////#define DEBUG_STEREO_DMX
+//#define STEREO_AVE_DOWNMIX
+#ifndef STEREO_AVE_DOWNMIX
+
+//#define NTT_UPDATE_ITD_SW
+//#define NTT_REMOVE_EPS_ROM
+//#define REDUCED_POC
+//#define DELAYED_SUM2
+
+//#define REDUCED_POC5A
+///////////#define REDUCED_POC6
+#endif
 
 /* ################## End DEVELOPMENT switches ######################### */
 /* clang-format on */
diff --git a/lib_enc/ivas_rom_enc.c b/lib_enc/ivas_rom_enc.c
index 783085fb86..e8048b8f1a 100644
--- a/lib_enc/ivas_rom_enc.c
+++ b/lib_enc/ivas_rom_enc.c
@@ -534,6 +534,7 @@ const float ari_bit_estimate_s17_LC[RANGE_N_CONTEXT][RANGE_N_SYMBOLS] =
  * Stereo downmix to EVS ROM tables
  *----------------------------------------------------------------------------------*/
 
+#ifndef NTT_REMOVE_EPS_ROM
 const float Stereo_dmx_s_wnd_coef_eps_16k[L_FRAME16k * 3 / 4] = {
     0.00000000f, 0.000385506690f, 0.000770864717f, 0.00115592557f, 0.00154054083f, 0.00192456215f, 0.00230784155f, 0.00269023119f, 0.00307158381f, 0.00345175178f,
     0.00383058959f, 0.00420795102f, 0.00458368938f, 0.00495766103f, 0.00532972161f, 0.00569972629f, 0.00606753491f, 0.00643300405f, 0.00679599261f, 0.00715636183f,
@@ -686,6 +687,7 @@ const float Stereo_dmx_s_wnd_coef_eps_48k[L_FRAME48k * 3 / 4] = {
     -0.00648899190f, -0.00649444433f, -0.00649961829f, -0.00650451379f, -0.00650913082f, -0.00651346892f, -0.00651752809f, -0.00652130833f, -0.00652480870f, -0.00652803015f,
     -0.00653097173f, -0.00653363345f, -0.00653601484f, -0.00653811684f, -0.00653993897f, -0.00654148031f, -0.00654274225f, -0.00654372340f, -0.00654442422f, -0.00654484471f
 };
+#endif
 
 const float Stereo_dmx_s_wnd_coef_16k[L_FRAME16k >> 4] = {
     0.00154133327f, 0.0138150426f, 0.0380602330f, 0.0736799166f, 0.119797014f, 0.175276011f, 0.238750681f, 0.308658302f, 0.383277327f, 0.460770488f,
diff --git a/lib_enc/ivas_rom_enc.h b/lib_enc/ivas_rom_enc.h
index 7194264225..2a4f71b8b9 100644
--- a/lib_enc/ivas_rom_enc.h
+++ b/lib_enc/ivas_rom_enc.h
@@ -120,9 +120,11 @@ extern const uint16_t ECSQ_tab_vals[ECSQ_PARAM_COUNT - 1][1 + ECSQ_TAB_VALS_SIZE
  * Stereo downmix to EVS ROM tables
  *----------------------------------------------------------------------------------*/
 
+#ifndef NTT_REMOVE_EPS_ROM
 extern const float Stereo_dmx_s_wnd_coef_eps_16k[L_FRAME16k * 3 / 4];
 extern const float Stereo_dmx_s_wnd_coef_eps_32k[L_FRAME32k * 3 / 4];
 extern const float Stereo_dmx_s_wnd_coef_eps_48k[L_FRAME48k * 3 / 4];
+#endif
 extern const float Stereo_dmx_s_wnd_coef_16k[L_FRAME16k >> 4];
 extern const float Stereo_dmx_s_wnd_coef_32k[L_FRAME32k >> 4];
 extern const float Stereo_dmx_s_wnd_coef_48k[L_FRAME48k >> 4];
diff --git a/lib_enc/ivas_stereo_dmx_evs.c b/lib_enc/ivas_stereo_dmx_evs.c
index af3430766a..ae83b53ff1 100644
--- a/lib_enc/ivas_stereo_dmx_evs.c
+++ b/lib_enc/ivas_stereo_dmx_evs.c
@@ -63,6 +63,8 @@
 #define STEREO_DMX_EVS_DMX_EGY_FORGETTING 0.25f
 #define STEREO_DMX_EVS_CORR_FORGETTING    0.78f
 
+#define Q_BAND 0.25f
+
 /*-----------------------------------------------------------------------*
  * Local function prototypes
  *-----------------------------------------------------------------------*/
@@ -76,6 +78,10 @@ static void create_M_signal( const float srcL[], const float srcR[], float dmx[]
 static float find_poc_peak( STEREO_DMX_EVS_POC_HANDLE hPOC, float itd[], const int16_t input_frame, const float ratio );
 static void calc_energy( const float src1[], const float src2[], float energy[], const int16_t input_frame, const float ratio );
 
+#ifdef DEBUG_STEREO_DMX
+FILE *fp;
+#endif
+
 /*-------------------------------------------------------------------*
  * estimate_itd_wnd_fft()
  *
@@ -153,10 +159,28 @@ static void calc_poc(
     const float *c, *s;
     float *P;
     float tmp1, tmp2, Lr, Li, Rr, Ri, gamma, igamma, iN;
+#ifdef REDUCED_POC
+    float specPOr[L_FRAME48k / 2 + 1], specPOi[L_FRAME48k / 2];
+    float aR, aI;
+#else
     float specPOr[L_FRAME48k], specPOi[L_FRAME48k];
+#endif
     float tmpPOC1[L_FRAME48k], tmpPOC2[L_FRAME48k];
     float rfft_buf[L_FRAME48k];
     int16_t step, bias;
+#ifdef REDUCED_POC6
+    float add_pow, prev_pow = 0.0f;
+#endif
+#ifdef NTT_REMOVE_EPS_ROM
+    int16_t i_for;
+    int16_t cos_step, cos_max;
+    float eps_cos, eps_sin, EPS;
+#endif
+#ifdef REDUCED_POC
+#ifdef NTT_REMOVE_EPS_ROM
+    int16_t j;
+#endif
+#endif
 
     /* Initialization */
     iN = 1.0f / (float) input_frame;
@@ -168,8 +192,10 @@ static void calc_poc(
     itdLR = hPOC->itdLR;
     igamma = STEREO_DMX_EVS_POC_GAMMA * iN;
     gamma = 1.0f - igamma;
+#ifndef REDUCED_POC
     set_zero( tmpPOC1, L_FRAME48k );
     set_zero( tmpPOC2, L_FRAME48k );
+#endif
 
     if ( input_frame == L_FRAME16k )
     {
@@ -185,6 +211,459 @@ static void calc_poc(
     specPOr[0] = sign( specLr[0] ) * sign( specRr[0] ) * wnd[bias];
     specPOi[0] = 0.0f;
 
+#ifdef REDUCED_POC
+#ifdef NTT_REMOVE_EPS_ROM
+    EPS = hPOC->eps;
+
+    if ( input_frame == L_FRAME16k )
+    {
+        cos_step = 4;
+        cos_max = input_frame;
+    }
+    else // for 32 kHz & 48 kHz
+    {
+        cos_step = 2;
+        cos_max = n0;
+    }
+
+    for ( i = 1; i < n0 / 2; i++ )
+    {
+        Lr = specLr[i];
+        Li = specLi[i];
+        Rr = specRr[i];
+        Ri = specRi[i];
+        // i_for = i * 4;
+        i_for = i * cos_step;
+        eps_cos = s[cos_max - i_for] * EPS;
+        eps_sin = s[i_for] * EPS;
+        Lr += ( specRr[i] * eps_cos + specRi[i] * eps_sin );
+        Li += ( -specRr[i] * eps_sin + specRi[i] * eps_cos );
+        Rr += ( specLr[i] * eps_cos + specLi[i] * eps_sin );
+        Ri += ( -specLr[i] * eps_sin + specLi[i] * eps_cos );
+
+        specPOr[i] = ( Lr * Rr + Li * Ri );
+        specPOi[i] = ( Lr * Ri - Li * Rr );
+
+        j = n0 - i;
+        Lr = specLr[j];
+        Li = specLi[j];
+        Rr = specRr[j];
+        Ri = specRi[j];
+        Lr += ( -specRr[j] * eps_cos + specRi[j] * eps_sin );
+        Li += ( -specRr[j] * eps_sin - specRi[j] * eps_cos );
+        Rr += ( -specLr[j] * eps_cos + specLi[j] * eps_sin );
+        Ri += ( -specLr[j] * eps_sin - specLi[j] * eps_cos );
+
+        specPOr[j] = ( Lr * Rr + Li * Ri );
+        specPOi[j] = ( Lr * Ri - Li * Rr );
+    }
+    {
+        i = n0 / 2;
+        Lr = specLr[i] + specRi[i] * EPS;
+        Li = specLi[i] - specRr[i] * EPS;
+        Rr = specRr[i] + specLi[i] * EPS;
+        Ri = specRi[i] - specLr[i] * EPS;
+
+        specPOr[i] = ( Lr * Rr + Li * Ri );
+        specPOi[i] = ( Lr * Ri - Li * Rr );
+    }
+
+
+#else
+    for ( i = 1; i < n0; i++ )
+    {
+        Lr = specLr[i];
+        Li = specLi[i];
+        Rr = specRr[i];
+        Ri = specRi[i];
+
+        Lr += ( specRr[i] * c[i] + specRi[i] * s[i] );
+        Li += ( -specRr[i] * s[i] + specRi[i] * c[i] );
+        Rr += ( specLr[i] * c[i] + specLi[i] * s[i] );
+        Ri += ( -specLr[i] * s[i] + specLi[i] * c[i] );
+
+        specPOr[i] = ( Lr * Rr + Li * Ri );
+        specPOi[i] = ( Lr * Ri - Li * Rr );
+    }
+#endif //end NTT_REMOVE_EPS_ROM
+    for ( i = 1; i < 10; i++ )
+    {
+        specPOr[i] = sign( specPOr[i] ) * 0.866f; // low angles are more frequent
+        specPOi[i] = sign( specPOi[i] ) * 0.5f;
+    }
+    for ( i = 10; i<n0>> 4; i++ )
+    {
+        specPOr[i] = sign( specPOr[i] ) * 0.7071f;
+        specPOi[i] = sign( specPOi[i] ) * 0.7071f;
+    }
+    for ( i = n0 >> 4; i<n0>> 3; i++ )
+    {
+        aR = fabsf( specPOr[i] );
+        aI = fabsf( specPOi[i] );
+        if ( aR > aI )
+        {
+            specPOr[i] = sign( specPOr[i] ) * 0.92388f;  //(wnd[n0>>2]+wnd[(n0>>2)-1])*0.5f)
+            specPOi[i] = sign( specPOi[i] ) * 0.382683f; //(wnd[n0>>2]+wnd[(n0>>2)-1])*0.5f)
+                                                         //if (i==2)
+        }
+        else
+        {
+            specPOr[i] = sign( specPOr[i] ) * 0.382683f; //(wnd[n0>>2]+wnd[(n0>>2)-1])*0.5f)
+            specPOi[i] = sign( specPOi[i] ) * 0.92388f;  //(wnd[(n0>>2)*3]+wnd[((n0>>2)*3)-1])*0.5f)
+                                                         //if (i==2)
+        }
+    } //// 4level
+    for ( i = n0 >> 3; i<n0>> 2; i++ )
+    {
+        aR = fabsf( specPOr[i] );
+        aI = fabsf( specPOi[i] );
+        if ( aR > aI )
+        {
+            if ( aR * 0.414213f /*tanf(EVS_PI*0.125f)*/ > aI )
+            {
+                specPOr[i] = sign( specPOr[i] ) * 0.980785f; //(wnd[(n0>>3)*7]+wnd[((n0>>3)*7)-1])*0.5f
+                specPOi[i] = sign( specPOi[i] ) * 0.19509f;  //(wnd[n0>>3]+wnd[(n0>>3)-1])*0.5f)
+            }
+            else
+            {
+                specPOr[i] = sign( specPOr[i] ) * 0.83147f; //(wnd[(n0>>3)*5]+wnd[((n0>>3)*5)-1])*0.5f
+                specPOi[i] = sign( specPOi[i] ) * 0.55557f; //(wnd[(n0>>3)*3]+wnd[(n0>>3)*3-1])*0.5f
+            }
+        }
+        else
+        {
+            if ( aR /** 2.414213f*/ /*tanf(EVS_PI*6/16)*/ > aI * 0.41421356f /*cot(PI*3/8)*/ )
+            {
+                specPOr[i] = sign( specPOr[i] ) * 0.55557f;
+                specPOi[i] = sign( specPOi[i] ) * 0.83147f;
+            }
+            else
+            {
+                specPOr[i] = sign( specPOr[i] ) * 0.19509f;
+                specPOi[i] = sign( specPOi[i] ) * 0.980785f;
+            }
+        }
+    }
+    //// 8 level
+    for ( i = 1; i<n0>> 2; i++ )
+    {
+        tmp1 = wnd[i * step + bias] * gamma;
+        specPOr[i] *= tmp1;
+        specPOi[i] *= tmp1;
+        gamma -= igamma;
+    }
+
+#ifdef REDUCED_POC5A
+    for ( i = n0 >> 2; i<n0>> 1 /*min((n0>>1), 320)*/; i++ )
+#else
+    for ( i = n0 >> 2; i<n0>> 1; i++ )
+#endif
+    {
+        aR = fabsf( specPOr[i] );
+        aI = fabsf( specPOi[i] );
+
+#ifdef REDUCED_POC6
+        add_pow = aR + aI;
+        prev_pow = 0.2f * prev_pow + 0.8f * add_pow;
+        if ( prev_pow * 0.2f > add_pow )
+        {
+            //fprintf(stderr, "%d skip_bin %f %f \n", i, prev_pow, aR+aI);
+            specPOr[i] = 0.f;
+            specPOi[i] = 0.f;
+            gamma -= igamma;
+            continue;
+        }
+#endif
+
+        if ( aR > aI )
+        {
+            if ( aR * 0.4142136f /*tanf(EVS_PI*0.125f)*/ > aI )
+            {
+                if ( aR * 0.19891f /*tanf(EVS_PI/16)*/ > aI )
+                {
+                    specPOr[i] = sign( specPOr[i] ) * 0.995185f;
+                    specPOi[i] = sign( specPOi[i] ) * 0.098017f;
+                }
+                else
+                {
+                    specPOr[i] = sign( specPOr[i] ) * 0.95694f;
+                    specPOi[i] = sign( specPOi[i] ) * 0.290285f;
+                }
+            }
+            else
+            {
+                if ( aR * 0.66818f /*tanf(EVS_PI*3/16)*/ > aI )
+                {
+                    specPOr[i] = sign( specPOr[i] ) * 0.881921f;
+                    specPOi[i] = sign( specPOi[i] ) * 0.471397f;
+                }
+                else
+                {
+                    specPOr[i] = sign( specPOr[i] ) * 0.77301f;
+                    specPOi[i] = sign( specPOi[i] ) * 0.634393f;
+                }
+            }
+        }
+        else
+        {
+            if ( aR /** 2.414213f*/ /*tanf(EVS_PI*6/16)*/ > aI * 0.4142136f /*cot(PI*3/8)*/ )
+            {
+                if ( aR /**1.49661f*/ /*tanf(EVS_PI*5/16)*/ > aI * 0.668179f /*cot(PI*5/16)*/ )
+                {
+                    specPOr[i] = sign( specPOr[i] ) * 0.634393f;
+                    specPOi[i] = sign( specPOi[i] ) * 0.77301f;
+                }
+                else
+                {
+                    specPOr[i] = sign( specPOr[i] ) * 0.471397f;
+                    specPOi[i] = sign( specPOi[i] ) * 0.881921f;
+                }
+            }
+            else
+            {
+                if ( aR /**5.027339f*/ /*tanf(EVS_PI*7/16)*/ > aI * 0.198912f /*cot(PI*7/16)*/ )
+                {
+                    specPOr[i] = sign( specPOr[i] ) * 0.290285f;
+                    specPOi[i] = sign( specPOi[i] ) * 0.95694f;
+                }
+                else
+                {
+                    specPOr[i] = sign( specPOr[i] ) * 0.098017f;
+                    specPOi[i] = sign( specPOi[i] ) * 0.995158f;
+                }
+            }
+        }
+
+        tmp1 = wnd[i * step + bias] * gamma;
+        specPOr[i] *= tmp1;
+        specPOi[i] *= tmp1;
+        gamma -= igamma;
+    }
+
+#ifdef REDUCED_POC5A
+    for ( i = n0 >> 1; i < min( n0, 320 /*240*/ ); i++ )
+#else
+    for ( i = n0 >> 1; i < n0; i++ )
+#endif ////// 16 level
+    {
+        aR = fabsf( specPOr[i] );
+        aI = fabsf( specPOi[i] );
+
+#ifdef REDUCED_POC6
+        add_pow = aR + aI;
+        prev_pow = 0.2f * prev_pow + 0.8f * add_pow;
+        if ( prev_pow * 0.2f > add_pow )
+        {
+            //fprintf(stderr, "%d skip_bin %f %f \n", i, prev_pow, aR+aI);
+            specPOr[i] = 0.f;
+            specPOi[i] = 0.f;
+            gamma -= igamma;
+            continue;
+        }
+#endif
+
+        if ( aR > aI )
+        {
+            if ( aR * 0.414213f /*tanf(EVS_PI*0.125f)*/ > aI )
+            {
+                if ( aR * 0.19891f /*tanf(EVS_PI/16)*/ > aI )
+                {
+                    if ( aR * 0.0984914f /*tanf(EVS_PI/32)*/ > aI )
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.99879f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.04907f;
+                    }
+                    else
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.98918f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.14763f;
+                    }
+                }
+                else
+                {
+                    if ( aR * 0.303347f /*tanf(EVS_PI*3/32)*/ > aI )
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.970031f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.24298f;
+                    }
+                    else
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.941544f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.33689f;
+                    }
+                }
+            }
+            else
+            {
+                if ( aR * 0.66818f /*tanf(EVS_PI*3/16)*/ > aI )
+                {
+                    if ( aR * 0.534511f /*tanf(EVS_PI*5/32)*/ > aI )
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.903989f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.427555f;
+                    }
+                    else
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.857729f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.514103f;
+                    }
+                }
+                else
+                {
+                    if ( aR * 0.8206788f /*tanf(EVS_PI*7/32)*/ > aI )
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.803208f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.595699f;
+                    }
+                    else
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.740951f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.671559f;
+                    }
+                }
+            }
+        }
+        else
+        {
+            if ( aR /** 2.414213*/ /*tanf(EVS_PI*0.375f)*/ > aI * 0.4142136f )
+            {
+                if ( aR /**1.49661f*/ /*tanf(EVS_PI*5/16)*/ > aI * 0.6681767f )
+                {
+                    if ( aR /**1.21850f*/ /*tanf(EVS_PI*9/32)*/ > aI * 0.820681f )
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.671559f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.740951f;
+                    }
+                    else
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.595699f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.803208f;
+                    }
+                }
+                else
+                {
+                    if ( aR /**1.8708684f*/ /*tanf(EVS_PI*11/32)*/ > aI * 0.5345111f )
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.514103f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.857729f;
+                    }
+                    else
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.427555f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.903989f;
+                    }
+                }
+            }
+            else
+            {
+                if ( aR /**5.027339f*/ /*tanf(EVS_PI*7/16)*/ > aI * 0.1989124f )
+                {
+                    if ( aR /**3.296558f*/ /*tanf(EVS_PI*13/32)*/ > aI * 0.3033467f )
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.33689f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.941544f;
+                    }
+                    else
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.24298f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.970031f;
+                    }
+                }
+                else
+                {
+                    if ( aR /**10.15317f*/ /*tanf(EVS_PI*15/32)*/ > aI * 0.098491f )
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.14673f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.989177f;
+                    }
+                    else
+                    {
+                        specPOr[i] = sign( specPOr[i] ) * 0.049068f;
+                        specPOi[i] = sign( specPOi[i] ) * 0.998795f;
+                    }
+                }
+            }
+        }
+        tmp1 = wnd[i * step + bias] * gamma;
+        specPOr[i] *= tmp1;
+        specPOi[i] *= tmp1;
+        gamma -= igamma;
+    }
+    if ( i < n0 )
+    {
+        gamma -= igamma * ( n0 - 320 );
+    }
+    for ( /* i = min(n0, 320) */; i < n0; i++ )
+    {
+        specPOr[i] = 0.f;
+        specPOi[i] = 0.f;
+    }
+    specPOr[n0] = sign( specLr[n0] ) * sign( specRr[n0] ) * wnd[i * step + bias] * gamma;
+
+    //end REDUCED_POC
+#else
+#ifdef NTT_REMOVE_EPS_ROM
+    EPS = hPOC->eps;
+    //EPS=0.009817f;
+    //fprintf(stderr, "%f \n", EPS);
+
+    if ( input_frame == L_FRAME16k )
+    {
+        cos_step = 4;
+        cos_max = input_frame;
+    }
+    else // for 32 kHz & 48 kHz
+    {
+        cos_step = 2;
+        cos_max = n0;
+    }
+
+    for ( i = 1; i < n0 / 2; i++ )
+    {
+        Lr = specLr[i];
+        Li = specLi[i];
+        Rr = specRr[i];
+        Ri = specRi[i];
+        // i_for = i * 4;
+        i_for = i * cos_step;
+        eps_cos = s[cos_max - i_for] * EPS;
+        eps_sin = s[i_for] * EPS;
+        Lr += ( specRr[i] * eps_cos + specRi[i] * eps_sin );
+        Li += ( -specRr[i] * eps_sin + specRi[i] * eps_cos );
+        Rr += ( specLr[i] * eps_cos + specLi[i] * eps_sin );
+        Ri += ( -specLr[i] * eps_sin + specLi[i] * eps_cos );
+        tmp1 = wnd[i * step + bias] * gamma / ( sqrtf( ( ( Lr * Lr + Li * Li ) ) * ( ( Rr * Rr + Ri * Ri ) ) ) + EPS );
+
+        specPOr[i] = ( Lr * Rr + Li * Ri ) * tmp1;
+        specPOi[i] = ( Lr * Ri - Li * Rr ) * tmp1;
+
+        gamma -= igamma;
+    }
+
+    for ( i = n0 >> 1; i < n0; i++ )
+    {
+        Lr = specLr[i];
+        Li = specLi[i];
+        Rr = specRr[i];
+        Ri = specRi[i];
+
+        i_for = ( n0 - i ) * cos_step;
+        eps_cos = s[cos_max - i_for] * EPS;
+        eps_sin = s[i_for] * EPS;
+
+        Lr += ( -specRr[i] * eps_cos + specRi[i] * eps_sin );
+        Li += ( -specRr[i] * eps_sin - specRi[i] * eps_cos );
+        Rr += ( -specLr[i] * eps_cos + specLi[i] * eps_sin );
+        Ri += ( -specLr[i] * eps_sin - specLi[i] * eps_cos );
+
+        tmp1 = wnd[i * step + bias] * gamma / ( sqrtf( ( ( Lr * Lr + Li * Li ) ) * ( ( Rr * Rr + Ri * Ri ) ) ) + EPS );
+
+        specPOr[i] = ( Lr * Rr + Li * Ri ) * tmp1;
+        specPOi[i] = ( Lr * Ri - Li * Rr ) * tmp1;
+        gamma -= igamma;
+    }
+    //end NTT_REMOVE_EPS_ROM
+#else
     for ( i = 1; i < n0; i++ )
     {
         Lr = specLr[i];
@@ -204,7 +683,9 @@ static void calc_poc(
 
         gamma -= igamma;
     }
+#endif //end !NTT_REMOVE_EPS_ROM
     specPOr[n0] = sign( specLr[i] ) * sign( specRr[i] ) * wnd[i * step + bias] * gamma;
+#endif
 
     rfft_buf[0] = specPOr[0];
     rfft_buf[1] = specPOr[n0];
@@ -327,7 +808,7 @@ static float find_poc_peak(
         cnt[n] = 0;
         cQ[n] = P[Lh - itd_cand[n]];
 
-        peak_range = (int16_t) ( abs( itd_cand[n] ) + hPOC->shift_limit / STEREO_DMX_EVS_FIND_POC_PEAK_TAU ) / STEREO_DMX_EVS_FIND_POC_PEAK_TAU2;
+        peak_range = ( int16_t )( abs( itd_cand[n] ) + hPOC->shift_limit / STEREO_DMX_EVS_FIND_POC_PEAK_TAU ) / STEREO_DMX_EVS_FIND_POC_PEAK_TAU2;
 
         for ( i = 1; i <= peak_range; i++ )
         {
@@ -374,6 +855,7 @@ static float find_poc_peak(
         }
     }
 
+#ifndef NTT_UPDATE_ITD_SW
     if ( on[0] && prev_off[0] )
     {
         *itd = (float) itdLR[0];
@@ -386,6 +868,36 @@ static float find_poc_peak(
     {
         *itd = ( *itd > 0 ) ? (float) itdLR[0] : (float) itdLR[1];
     }
+#else
+    if ( ( on[0] && prev_off[0] ) && ( on[1] && prev_off[1] ) )
+    {
+        *itd = ( Q[0] > Q[1] ) ? (float) itdLR[0] : (float) itdLR[1];
+    }
+    else if ( ( on[0] && prev_off[0] ) && ( Q[0] > ( Q[1] - 0.1 ) ) )
+    {
+        *itd = (float) itdLR[0];
+    }
+    else if ( ( on[1] && prev_off[1] ) && ( Q[1] > ( Q[0] - 0.1 ) ) )
+    {
+        *itd = (float) itdLR[1];
+    }
+    else if ( Q[0] > ( Q[1] + Q_BAND ) )
+    {
+        *itd = (float) itdLR[0];
+    }
+    else if ( Q[1] > ( Q[0] + Q_BAND ) )
+    {
+        *itd = (float) itdLR[1];
+    }
+    else if ( *itd == 0.0 )
+    {
+        *itd = 0;
+    }
+    else
+    {
+        *itd = ( *itd > 0 ) ? (float) itdLR[0] : (float) itdLR[1];
+    }
+#endif
 
     cconfidence = sqrtf( fabsf( Q[0] - Q[1] ) );
 
@@ -648,8 +1160,21 @@ void stereo_dmx_evs_enc(
     float data_f[CPE_CHANNELS][L_FRAME48k];
     float dmx_data[L_FRAME48k];
     int16_t input_frame;
+#ifdef DEBUG_STEREO_DMX
+    static int16_t tlen = -1;
 
-    input_frame = (int16_t) ( input_Fs / FRAMES_PER_SEC );
+    if ( tlen == -1 )
+    {
+        tlen = NS2SA( input_Fs, get_delay( ENC, input_Fs, MONO_FORMAT, NULL, RENDERER_DISABLE, 0 ) + 0.5f );
+    }
+#endif
+
+#ifdef DELAYED_SUM2
+    float wt_delay, wt1, wt2;
+    int16_t itd;
+    itd = min( (int16_t) hStereoDmxEVS->itd, 640 );
+#endif
+    input_frame = ( int16_t )( input_Fs / FRAMES_PER_SEC );
 
     for ( n = 0; n < input_frame; n++ )
     {
@@ -662,8 +1187,55 @@ void stereo_dmx_evs_enc(
         set_f( data_f[1] + n_samples, 0.0f, input_frame - n_samples );
     }
 
+#ifdef STEREO_AVE_DOWNMIX
+    for ( n = 0; n < input_frame; n++ )
+    {
+        dmx_data[n] = ( data_f[0][n] + data_f[1][n] ) / 2.f;
+    }
+#else
     estimate_itd( &corr, hStereoDmxEVS->hPOC, data_f[0], data_f[1], &hStereoDmxEVS->itd, input_frame );
 
+#ifdef DELAYED_SUM2
+    wt_delay = corr /*max(0.0f, (corr-0.3f)*1.5f)*/;
+
+    if ( itd > 1 && itd < 200 )
+    {
+        if ( corr > 0.5f /*0.8f*/ )
+        {
+            wt1 = wt_delay * 2.0f - 1.0f;
+            wt2 = 1.0f - wt1;
+            for ( n = input_frame - 1; n >= itd; n-- )
+            {
+                data_f[0][n] = wt2 * data_f[0][n] + wt1 * data_f[0][n - itd];
+            }
+        }
+    }
+    else if ( itd < -1 && itd > -200 )
+    {
+        if ( corr > 0.5f /*0.8f*/ )
+        {
+            wt1 = wt_delay * 2.0f - 1.0f;
+            wt2 = 1.0f - wt1;
+            for ( n = input_frame - 1; n >= -itd; n-- )
+            {
+                data_f[1][n] = wt2 * data_f[1][n] + wt1 * data_f[1][n + itd];
+            }
+        }
+    }
+#endif
+
+#ifdef DEBUG_STEREO_DMX
+    ///* itd */
+    //fp = fopen("itd.csv", "a");
+    //fprintf(fp, "%f\n", hStereoDmxEVS->itd);
+    //fclose(fp);
+
+    ///* confidence */
+    //fp = fopen("conf.csv", "a");
+    //fprintf(fp, "%f\n", corr);
+    //fclose(fp);
+#endif
+
     if ( hStereoDmxEVS->itd )
     {
         dmx_weight = ( ( hStereoDmxEVS->itd > 0 ) ? ( -1 ) : 1 ) * 0.5f * corr + 0.5f;
@@ -673,11 +1245,30 @@ void stereo_dmx_evs_enc(
         dmx_weight = 0.5f;
     }
 
+#ifdef DEBUG_STEREO_DMX
+    //fp = fopen("weight.csv", "a");
+    //fprintf(fp, "%f\n", dmx_weight);
+    //fclose(fp);
+#endif
+
     create_M_signal( data_f[0], data_f[1], dmx_data, dmx_weight, input_frame, hStereoDmxEVS->s_wnd,
                      hStereoDmxEVS->dmx_weight, hStereoDmxEVS->pre_dmx_energy, hStereoDmxEVS->aux_dmx_energy );
+#endif
 
     mvr2s( dmx_data, data, n_samples );
 
+#ifdef DEBUG_STEREO_DMX
+    fp = fopen( "debug_mono.pcm", "ab" );
+    if ( tlen != 0 )
+    {
+        short buf[L_FRAME48k] = { 0 };
+        fwrite( buf, sizeof( short ), tlen, fp );
+        tlen = 0;
+    }
+    fwrite( data, sizeof( short ), n_samples, fp );
+    fclose( fp );
+#endif
+
     return;
 }
 
@@ -696,7 +1287,7 @@ ivas_error stereo_dmx_evs_init_encoder(
     STEREO_DMX_EVS_ENC_HANDLE hStereoDmxEVS;
     int16_t n, input_frame;
 
-    input_frame = (int16_t) ( input_Fs / FRAMES_PER_SEC );
+    input_frame = ( int16_t )( input_Fs / FRAMES_PER_SEC );
 
     hStereoDmxEVS = NULL;
     if ( ( hStereoDmxEVS = (STEREO_DMX_EVS_ENC_HANDLE) count_malloc( sizeof( STEREO_DMX_EVS_ENC_DATA ) ) ) == NULL )
@@ -738,7 +1329,7 @@ ivas_error stereo_dmx_evs_init_encoder(
         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for STEREO_DMX_EVS_POC_DATA\n" ) );
     }
 
-    hStereoDmxEVS->hPOC->shift_limit = (int16_t) ( STEREO_DMX_EVS_SHIFT_LIMIT * input_Fs / 1000 );
+    hStereoDmxEVS->hPOC->shift_limit = ( int16_t )( STEREO_DMX_EVS_SHIFT_LIMIT * input_Fs / 1000 );
     for ( n = 0; n < CPE_CHANNELS; n++ )
     {
         hStereoDmxEVS->hPOC->peakQ[n] = 0.0f;
@@ -766,6 +1357,24 @@ ivas_error stereo_dmx_evs_init_encoder(
     }
     hStereoDmxEVS->hPOC->eps = 2.0f * EVS_PI / ( (float) input_frame );
 
+#ifdef NTT_REMOVE_EPS_ROM
+    if ( input_frame == L_FRAME16k )
+    {
+        hStereoDmxEVS->hPOC->sin = dft_trigo_32k;
+    }
+    else if ( input_frame == L_FRAME32k )
+    {
+        hStereoDmxEVS->hPOC->sin = dft_trigo_32k;
+    }
+    else if ( input_frame == L_FRAME48k )
+    {
+        hStereoDmxEVS->hPOC->sin = dft_trigo_48k;
+    }
+    else
+    {
+        return IVAS_ERROR( IVAS_ERR_INTERNAL_FATAL, "invalid frame length\n" );
+    }
+#else
     if ( input_frame == L_FRAME16k )
     {
         hStereoDmxEVS->hPOC->sin = Stereo_dmx_s_wnd_coef_eps_16k;
@@ -782,6 +1391,8 @@ ivas_error stereo_dmx_evs_init_encoder(
     {
         return IVAS_ERROR( IVAS_ERR_INTERNAL_FATAL, "invalid frame length\n" );
     }
+#endif
+
     hStereoDmxEVS->hPOC->confidence = 0.0f;
 
     *hStereoDmxEVS_out = hStereoDmxEVS;
-- 
GitLab


From e9701ce1b9f9c88b2c30e100658a7381b9ae4833 Mon Sep 17 00:00:00 2001
From: Hiromi Sekine <sekine.hiromi@ntt-tx.co.jp>
Date: Wed, 17 Aug 2022 15:17:17 +0900
Subject: [PATCH 2/6] Remove unnecessary code.

---
 lib_com/options.h             |  14 +-
 lib_enc/ivas_stereo_dmx_evs.c | 496 +---------------------------------
 2 files changed, 3 insertions(+), 507 deletions(-)

diff --git a/lib_com/options.h b/lib_com/options.h
index bbc2b52469..967381aa24 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -150,18 +150,8 @@
 #define DIRAC_DRCT_GAIN_TUNING                          /* issue 64: tuning of DirAC energy-compensation gains */
 
 /* NTT switches */
-///////#define DEBUG_STEREO_DMX
-//#define STEREO_AVE_DOWNMIX
-#ifndef STEREO_AVE_DOWNMIX
-
-//#define NTT_UPDATE_ITD_SW
-//#define NTT_REMOVE_EPS_ROM
-//#define REDUCED_POC
-//#define DELAYED_SUM2
-
-//#define REDUCED_POC5A
-///////////#define REDUCED_POC6
-#endif
+#define NTT_UPDATE_ITD_SW
+#define NTT_REMOVE_EPS_ROM
 
 /* ################## End DEVELOPMENT switches ######################### */
 /* clang-format on */
diff --git a/lib_enc/ivas_stereo_dmx_evs.c b/lib_enc/ivas_stereo_dmx_evs.c
index ae83b53ff1..1ce97c39f0 100644
--- a/lib_enc/ivas_stereo_dmx_evs.c
+++ b/lib_enc/ivas_stereo_dmx_evs.c
@@ -78,10 +78,6 @@ static void create_M_signal( const float srcL[], const float srcR[], float dmx[]
 static float find_poc_peak( STEREO_DMX_EVS_POC_HANDLE hPOC, float itd[], const int16_t input_frame, const float ratio );
 static void calc_energy( const float src1[], const float src2[], float energy[], const int16_t input_frame, const float ratio );
 
-#ifdef DEBUG_STEREO_DMX
-FILE *fp;
-#endif
-
 /*-------------------------------------------------------------------*
  * estimate_itd_wnd_fft()
  *
@@ -159,27 +155,14 @@ static void calc_poc(
     const float *c, *s;
     float *P;
     float tmp1, tmp2, Lr, Li, Rr, Ri, gamma, igamma, iN;
-#ifdef REDUCED_POC
-    float specPOr[L_FRAME48k / 2 + 1], specPOi[L_FRAME48k / 2];
-    float aR, aI;
-#else
     float specPOr[L_FRAME48k], specPOi[L_FRAME48k];
-#endif
     float tmpPOC1[L_FRAME48k], tmpPOC2[L_FRAME48k];
     float rfft_buf[L_FRAME48k];
     int16_t step, bias;
-#ifdef REDUCED_POC6
-    float add_pow, prev_pow = 0.0f;
-#endif
 #ifdef NTT_REMOVE_EPS_ROM
     int16_t i_for;
     int16_t cos_step, cos_max;
     float eps_cos, eps_sin, EPS;
-#endif
-#ifdef REDUCED_POC
-#ifdef NTT_REMOVE_EPS_ROM
-    int16_t j;
-#endif
 #endif
 
     /* Initialization */
@@ -192,10 +175,6 @@ static void calc_poc(
     itdLR = hPOC->itdLR;
     igamma = STEREO_DMX_EVS_POC_GAMMA * iN;
     gamma = 1.0f - igamma;
-#ifndef REDUCED_POC
-    set_zero( tmpPOC1, L_FRAME48k );
-    set_zero( tmpPOC2, L_FRAME48k );
-#endif
 
     if ( input_frame == L_FRAME16k )
     {
@@ -211,401 +190,8 @@ static void calc_poc(
     specPOr[0] = sign( specLr[0] ) * sign( specRr[0] ) * wnd[bias];
     specPOi[0] = 0.0f;
 
-#ifdef REDUCED_POC
-#ifdef NTT_REMOVE_EPS_ROM
-    EPS = hPOC->eps;
-
-    if ( input_frame == L_FRAME16k )
-    {
-        cos_step = 4;
-        cos_max = input_frame;
-    }
-    else // for 32 kHz & 48 kHz
-    {
-        cos_step = 2;
-        cos_max = n0;
-    }
-
-    for ( i = 1; i < n0 / 2; i++ )
-    {
-        Lr = specLr[i];
-        Li = specLi[i];
-        Rr = specRr[i];
-        Ri = specRi[i];
-        // i_for = i * 4;
-        i_for = i * cos_step;
-        eps_cos = s[cos_max - i_for] * EPS;
-        eps_sin = s[i_for] * EPS;
-        Lr += ( specRr[i] * eps_cos + specRi[i] * eps_sin );
-        Li += ( -specRr[i] * eps_sin + specRi[i] * eps_cos );
-        Rr += ( specLr[i] * eps_cos + specLi[i] * eps_sin );
-        Ri += ( -specLr[i] * eps_sin + specLi[i] * eps_cos );
-
-        specPOr[i] = ( Lr * Rr + Li * Ri );
-        specPOi[i] = ( Lr * Ri - Li * Rr );
-
-        j = n0 - i;
-        Lr = specLr[j];
-        Li = specLi[j];
-        Rr = specRr[j];
-        Ri = specRi[j];
-        Lr += ( -specRr[j] * eps_cos + specRi[j] * eps_sin );
-        Li += ( -specRr[j] * eps_sin - specRi[j] * eps_cos );
-        Rr += ( -specLr[j] * eps_cos + specLi[j] * eps_sin );
-        Ri += ( -specLr[j] * eps_sin - specLi[j] * eps_cos );
-
-        specPOr[j] = ( Lr * Rr + Li * Ri );
-        specPOi[j] = ( Lr * Ri - Li * Rr );
-    }
-    {
-        i = n0 / 2;
-        Lr = specLr[i] + specRi[i] * EPS;
-        Li = specLi[i] - specRr[i] * EPS;
-        Rr = specRr[i] + specLi[i] * EPS;
-        Ri = specRi[i] - specLr[i] * EPS;
-
-        specPOr[i] = ( Lr * Rr + Li * Ri );
-        specPOi[i] = ( Lr * Ri - Li * Rr );
-    }
-
-
-#else
-    for ( i = 1; i < n0; i++ )
-    {
-        Lr = specLr[i];
-        Li = specLi[i];
-        Rr = specRr[i];
-        Ri = specRi[i];
-
-        Lr += ( specRr[i] * c[i] + specRi[i] * s[i] );
-        Li += ( -specRr[i] * s[i] + specRi[i] * c[i] );
-        Rr += ( specLr[i] * c[i] + specLi[i] * s[i] );
-        Ri += ( -specLr[i] * s[i] + specLi[i] * c[i] );
-
-        specPOr[i] = ( Lr * Rr + Li * Ri );
-        specPOi[i] = ( Lr * Ri - Li * Rr );
-    }
-#endif //end NTT_REMOVE_EPS_ROM
-    for ( i = 1; i < 10; i++ )
-    {
-        specPOr[i] = sign( specPOr[i] ) * 0.866f; // low angles are more frequent
-        specPOi[i] = sign( specPOi[i] ) * 0.5f;
-    }
-    for ( i = 10; i<n0>> 4; i++ )
-    {
-        specPOr[i] = sign( specPOr[i] ) * 0.7071f;
-        specPOi[i] = sign( specPOi[i] ) * 0.7071f;
-    }
-    for ( i = n0 >> 4; i<n0>> 3; i++ )
-    {
-        aR = fabsf( specPOr[i] );
-        aI = fabsf( specPOi[i] );
-        if ( aR > aI )
-        {
-            specPOr[i] = sign( specPOr[i] ) * 0.92388f;  //(wnd[n0>>2]+wnd[(n0>>2)-1])*0.5f)
-            specPOi[i] = sign( specPOi[i] ) * 0.382683f; //(wnd[n0>>2]+wnd[(n0>>2)-1])*0.5f)
-                                                         //if (i==2)
-        }
-        else
-        {
-            specPOr[i] = sign( specPOr[i] ) * 0.382683f; //(wnd[n0>>2]+wnd[(n0>>2)-1])*0.5f)
-            specPOi[i] = sign( specPOi[i] ) * 0.92388f;  //(wnd[(n0>>2)*3]+wnd[((n0>>2)*3)-1])*0.5f)
-                                                         //if (i==2)
-        }
-    } //// 4level
-    for ( i = n0 >> 3; i<n0>> 2; i++ )
-    {
-        aR = fabsf( specPOr[i] );
-        aI = fabsf( specPOi[i] );
-        if ( aR > aI )
-        {
-            if ( aR * 0.414213f /*tanf(EVS_PI*0.125f)*/ > aI )
-            {
-                specPOr[i] = sign( specPOr[i] ) * 0.980785f; //(wnd[(n0>>3)*7]+wnd[((n0>>3)*7)-1])*0.5f
-                specPOi[i] = sign( specPOi[i] ) * 0.19509f;  //(wnd[n0>>3]+wnd[(n0>>3)-1])*0.5f)
-            }
-            else
-            {
-                specPOr[i] = sign( specPOr[i] ) * 0.83147f; //(wnd[(n0>>3)*5]+wnd[((n0>>3)*5)-1])*0.5f
-                specPOi[i] = sign( specPOi[i] ) * 0.55557f; //(wnd[(n0>>3)*3]+wnd[(n0>>3)*3-1])*0.5f
-            }
-        }
-        else
-        {
-            if ( aR /** 2.414213f*/ /*tanf(EVS_PI*6/16)*/ > aI * 0.41421356f /*cot(PI*3/8)*/ )
-            {
-                specPOr[i] = sign( specPOr[i] ) * 0.55557f;
-                specPOi[i] = sign( specPOi[i] ) * 0.83147f;
-            }
-            else
-            {
-                specPOr[i] = sign( specPOr[i] ) * 0.19509f;
-                specPOi[i] = sign( specPOi[i] ) * 0.980785f;
-            }
-        }
-    }
-    //// 8 level
-    for ( i = 1; i<n0>> 2; i++ )
-    {
-        tmp1 = wnd[i * step + bias] * gamma;
-        specPOr[i] *= tmp1;
-        specPOi[i] *= tmp1;
-        gamma -= igamma;
-    }
-
-#ifdef REDUCED_POC5A
-    for ( i = n0 >> 2; i<n0>> 1 /*min((n0>>1), 320)*/; i++ )
-#else
-    for ( i = n0 >> 2; i<n0>> 1; i++ )
-#endif
-    {
-        aR = fabsf( specPOr[i] );
-        aI = fabsf( specPOi[i] );
-
-#ifdef REDUCED_POC6
-        add_pow = aR + aI;
-        prev_pow = 0.2f * prev_pow + 0.8f * add_pow;
-        if ( prev_pow * 0.2f > add_pow )
-        {
-            //fprintf(stderr, "%d skip_bin %f %f \n", i, prev_pow, aR+aI);
-            specPOr[i] = 0.f;
-            specPOi[i] = 0.f;
-            gamma -= igamma;
-            continue;
-        }
-#endif
-
-        if ( aR > aI )
-        {
-            if ( aR * 0.4142136f /*tanf(EVS_PI*0.125f)*/ > aI )
-            {
-                if ( aR * 0.19891f /*tanf(EVS_PI/16)*/ > aI )
-                {
-                    specPOr[i] = sign( specPOr[i] ) * 0.995185f;
-                    specPOi[i] = sign( specPOi[i] ) * 0.098017f;
-                }
-                else
-                {
-                    specPOr[i] = sign( specPOr[i] ) * 0.95694f;
-                    specPOi[i] = sign( specPOi[i] ) * 0.290285f;
-                }
-            }
-            else
-            {
-                if ( aR * 0.66818f /*tanf(EVS_PI*3/16)*/ > aI )
-                {
-                    specPOr[i] = sign( specPOr[i] ) * 0.881921f;
-                    specPOi[i] = sign( specPOi[i] ) * 0.471397f;
-                }
-                else
-                {
-                    specPOr[i] = sign( specPOr[i] ) * 0.77301f;
-                    specPOi[i] = sign( specPOi[i] ) * 0.634393f;
-                }
-            }
-        }
-        else
-        {
-            if ( aR /** 2.414213f*/ /*tanf(EVS_PI*6/16)*/ > aI * 0.4142136f /*cot(PI*3/8)*/ )
-            {
-                if ( aR /**1.49661f*/ /*tanf(EVS_PI*5/16)*/ > aI * 0.668179f /*cot(PI*5/16)*/ )
-                {
-                    specPOr[i] = sign( specPOr[i] ) * 0.634393f;
-                    specPOi[i] = sign( specPOi[i] ) * 0.77301f;
-                }
-                else
-                {
-                    specPOr[i] = sign( specPOr[i] ) * 0.471397f;
-                    specPOi[i] = sign( specPOi[i] ) * 0.881921f;
-                }
-            }
-            else
-            {
-                if ( aR /**5.027339f*/ /*tanf(EVS_PI*7/16)*/ > aI * 0.198912f /*cot(PI*7/16)*/ )
-                {
-                    specPOr[i] = sign( specPOr[i] ) * 0.290285f;
-                    specPOi[i] = sign( specPOi[i] ) * 0.95694f;
-                }
-                else
-                {
-                    specPOr[i] = sign( specPOr[i] ) * 0.098017f;
-                    specPOi[i] = sign( specPOi[i] ) * 0.995158f;
-                }
-            }
-        }
-
-        tmp1 = wnd[i * step + bias] * gamma;
-        specPOr[i] *= tmp1;
-        specPOi[i] *= tmp1;
-        gamma -= igamma;
-    }
-
-#ifdef REDUCED_POC5A
-    for ( i = n0 >> 1; i < min( n0, 320 /*240*/ ); i++ )
-#else
-    for ( i = n0 >> 1; i < n0; i++ )
-#endif ////// 16 level
-    {
-        aR = fabsf( specPOr[i] );
-        aI = fabsf( specPOi[i] );
-
-#ifdef REDUCED_POC6
-        add_pow = aR + aI;
-        prev_pow = 0.2f * prev_pow + 0.8f * add_pow;
-        if ( prev_pow * 0.2f > add_pow )
-        {
-            //fprintf(stderr, "%d skip_bin %f %f \n", i, prev_pow, aR+aI);
-            specPOr[i] = 0.f;
-            specPOi[i] = 0.f;
-            gamma -= igamma;
-            continue;
-        }
-#endif
-
-        if ( aR > aI )
-        {
-            if ( aR * 0.414213f /*tanf(EVS_PI*0.125f)*/ > aI )
-            {
-                if ( aR * 0.19891f /*tanf(EVS_PI/16)*/ > aI )
-                {
-                    if ( aR * 0.0984914f /*tanf(EVS_PI/32)*/ > aI )
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.99879f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.04907f;
-                    }
-                    else
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.98918f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.14763f;
-                    }
-                }
-                else
-                {
-                    if ( aR * 0.303347f /*tanf(EVS_PI*3/32)*/ > aI )
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.970031f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.24298f;
-                    }
-                    else
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.941544f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.33689f;
-                    }
-                }
-            }
-            else
-            {
-                if ( aR * 0.66818f /*tanf(EVS_PI*3/16)*/ > aI )
-                {
-                    if ( aR * 0.534511f /*tanf(EVS_PI*5/32)*/ > aI )
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.903989f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.427555f;
-                    }
-                    else
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.857729f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.514103f;
-                    }
-                }
-                else
-                {
-                    if ( aR * 0.8206788f /*tanf(EVS_PI*7/32)*/ > aI )
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.803208f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.595699f;
-                    }
-                    else
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.740951f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.671559f;
-                    }
-                }
-            }
-        }
-        else
-        {
-            if ( aR /** 2.414213*/ /*tanf(EVS_PI*0.375f)*/ > aI * 0.4142136f )
-            {
-                if ( aR /**1.49661f*/ /*tanf(EVS_PI*5/16)*/ > aI * 0.6681767f )
-                {
-                    if ( aR /**1.21850f*/ /*tanf(EVS_PI*9/32)*/ > aI * 0.820681f )
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.671559f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.740951f;
-                    }
-                    else
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.595699f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.803208f;
-                    }
-                }
-                else
-                {
-                    if ( aR /**1.8708684f*/ /*tanf(EVS_PI*11/32)*/ > aI * 0.5345111f )
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.514103f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.857729f;
-                    }
-                    else
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.427555f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.903989f;
-                    }
-                }
-            }
-            else
-            {
-                if ( aR /**5.027339f*/ /*tanf(EVS_PI*7/16)*/ > aI * 0.1989124f )
-                {
-                    if ( aR /**3.296558f*/ /*tanf(EVS_PI*13/32)*/ > aI * 0.3033467f )
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.33689f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.941544f;
-                    }
-                    else
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.24298f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.970031f;
-                    }
-                }
-                else
-                {
-                    if ( aR /**10.15317f*/ /*tanf(EVS_PI*15/32)*/ > aI * 0.098491f )
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.14673f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.989177f;
-                    }
-                    else
-                    {
-                        specPOr[i] = sign( specPOr[i] ) * 0.049068f;
-                        specPOi[i] = sign( specPOi[i] ) * 0.998795f;
-                    }
-                }
-            }
-        }
-        tmp1 = wnd[i * step + bias] * gamma;
-        specPOr[i] *= tmp1;
-        specPOi[i] *= tmp1;
-        gamma -= igamma;
-    }
-    if ( i < n0 )
-    {
-        gamma -= igamma * ( n0 - 320 );
-    }
-    for ( /* i = min(n0, 320) */; i < n0; i++ )
-    {
-        specPOr[i] = 0.f;
-        specPOi[i] = 0.f;
-    }
-    specPOr[n0] = sign( specLr[n0] ) * sign( specRr[n0] ) * wnd[i * step + bias] * gamma;
-
-    //end REDUCED_POC
-#else
 #ifdef NTT_REMOVE_EPS_ROM
     EPS = hPOC->eps;
-    //EPS=0.009817f;
-    //fprintf(stderr, "%f \n", EPS);
 
     if ( input_frame == L_FRAME16k )
     {
@@ -624,7 +210,6 @@ static void calc_poc(
         Li = specLi[i];
         Rr = specRr[i];
         Ri = specRi[i];
-        // i_for = i * 4;
         i_for = i * cos_step;
         eps_cos = s[cos_max - i_for] * EPS;
         eps_sin = s[i_for] * EPS;
@@ -684,8 +269,8 @@ static void calc_poc(
         gamma -= igamma;
     }
 #endif //end !NTT_REMOVE_EPS_ROM
+
     specPOr[n0] = sign( specLr[i] ) * sign( specRr[i] ) * wnd[i * step + bias] * gamma;
-#endif
 
     rfft_buf[0] = specPOr[0];
     rfft_buf[1] = specPOr[n0];
@@ -1160,20 +745,7 @@ void stereo_dmx_evs_enc(
     float data_f[CPE_CHANNELS][L_FRAME48k];
     float dmx_data[L_FRAME48k];
     int16_t input_frame;
-#ifdef DEBUG_STEREO_DMX
-    static int16_t tlen = -1;
 
-    if ( tlen == -1 )
-    {
-        tlen = NS2SA( input_Fs, get_delay( ENC, input_Fs, MONO_FORMAT, NULL, RENDERER_DISABLE, 0 ) + 0.5f );
-    }
-#endif
-
-#ifdef DELAYED_SUM2
-    float wt_delay, wt1, wt2;
-    int16_t itd;
-    itd = min( (int16_t) hStereoDmxEVS->itd, 640 );
-#endif
     input_frame = ( int16_t )( input_Fs / FRAMES_PER_SEC );
 
     for ( n = 0; n < input_frame; n++ )
@@ -1187,55 +759,8 @@ void stereo_dmx_evs_enc(
         set_f( data_f[1] + n_samples, 0.0f, input_frame - n_samples );
     }
 
-#ifdef STEREO_AVE_DOWNMIX
-    for ( n = 0; n < input_frame; n++ )
-    {
-        dmx_data[n] = ( data_f[0][n] + data_f[1][n] ) / 2.f;
-    }
-#else
     estimate_itd( &corr, hStereoDmxEVS->hPOC, data_f[0], data_f[1], &hStereoDmxEVS->itd, input_frame );
 
-#ifdef DELAYED_SUM2
-    wt_delay = corr /*max(0.0f, (corr-0.3f)*1.5f)*/;
-
-    if ( itd > 1 && itd < 200 )
-    {
-        if ( corr > 0.5f /*0.8f*/ )
-        {
-            wt1 = wt_delay * 2.0f - 1.0f;
-            wt2 = 1.0f - wt1;
-            for ( n = input_frame - 1; n >= itd; n-- )
-            {
-                data_f[0][n] = wt2 * data_f[0][n] + wt1 * data_f[0][n - itd];
-            }
-        }
-    }
-    else if ( itd < -1 && itd > -200 )
-    {
-        if ( corr > 0.5f /*0.8f*/ )
-        {
-            wt1 = wt_delay * 2.0f - 1.0f;
-            wt2 = 1.0f - wt1;
-            for ( n = input_frame - 1; n >= -itd; n-- )
-            {
-                data_f[1][n] = wt2 * data_f[1][n] + wt1 * data_f[1][n + itd];
-            }
-        }
-    }
-#endif
-
-#ifdef DEBUG_STEREO_DMX
-    ///* itd */
-    //fp = fopen("itd.csv", "a");
-    //fprintf(fp, "%f\n", hStereoDmxEVS->itd);
-    //fclose(fp);
-
-    ///* confidence */
-    //fp = fopen("conf.csv", "a");
-    //fprintf(fp, "%f\n", corr);
-    //fclose(fp);
-#endif
-
     if ( hStereoDmxEVS->itd )
     {
         dmx_weight = ( ( hStereoDmxEVS->itd > 0 ) ? ( -1 ) : 1 ) * 0.5f * corr + 0.5f;
@@ -1245,30 +770,11 @@ void stereo_dmx_evs_enc(
         dmx_weight = 0.5f;
     }
 
-#ifdef DEBUG_STEREO_DMX
-    //fp = fopen("weight.csv", "a");
-    //fprintf(fp, "%f\n", dmx_weight);
-    //fclose(fp);
-#endif
-
     create_M_signal( data_f[0], data_f[1], dmx_data, dmx_weight, input_frame, hStereoDmxEVS->s_wnd,
                      hStereoDmxEVS->dmx_weight, hStereoDmxEVS->pre_dmx_energy, hStereoDmxEVS->aux_dmx_energy );
-#endif
 
     mvr2s( dmx_data, data, n_samples );
 
-#ifdef DEBUG_STEREO_DMX
-    fp = fopen( "debug_mono.pcm", "ab" );
-    if ( tlen != 0 )
-    {
-        short buf[L_FRAME48k] = { 0 };
-        fwrite( buf, sizeof( short ), tlen, fp );
-        tlen = 0;
-    }
-    fwrite( data, sizeof( short ), n_samples, fp );
-    fclose( fp );
-#endif
-
     return;
 }
 
-- 
GitLab


From 5c62f4b74347affe4809046be738028c9a10e4af Mon Sep 17 00:00:00 2001
From: Hiromi Sekine <sekine.hiromi@ntt-tx.co.jp>
Date: Wed, 17 Aug 2022 16:33:04 +0900
Subject: [PATCH 3/6] Fix to avoid warning in Linux.

---
 lib_enc/ivas_stereo_dmx_evs.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib_enc/ivas_stereo_dmx_evs.c b/lib_enc/ivas_stereo_dmx_evs.c
index 1ce97c39f0..5c382887c7 100644
--- a/lib_enc/ivas_stereo_dmx_evs.c
+++ b/lib_enc/ivas_stereo_dmx_evs.c
@@ -152,7 +152,10 @@ static void calc_poc(
 {
     int16_t i, n1, n2;
     int16_t n0, *itdLR;
-    const float *c, *s;
+#ifndef NTT_REMOVE_EPS_ROM
+	const float *c;
+#endif
+	const float *s;
     float *P;
     float tmp1, tmp2, Lr, Li, Rr, Ri, gamma, igamma, iN;
     float specPOr[L_FRAME48k], specPOi[L_FRAME48k];
@@ -168,7 +171,9 @@ static void calc_poc(
     /* Initialization */
     iN = 1.0f / (float) input_frame;
 
+#ifndef NTT_REMOVE_EPS_ROM
     c = hPOC->sin + ( input_frame >> 2 );
+#endif
     s = hPOC->sin;
     P = hPOC->P;
     n0 = input_frame / 2;
-- 
GitLab


From 8b75b675d199fee6895e469865847bbe1b2634f3 Mon Sep 17 00:00:00 2001
From: Hiromi Sekine <sekine.hiromi@ntt-tx.co.jp>
Date: Thu, 25 Aug 2022 14:43:28 +0900
Subject: [PATCH 4/6] Added  inline comments for explanations of
 NTT_UPDATE_ITD_SW codes.

---
 lib_enc/ivas_stereo_dmx_evs.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib_enc/ivas_stereo_dmx_evs.c b/lib_enc/ivas_stereo_dmx_evs.c
index 5c382887c7..afff95b83a 100644
--- a/lib_enc/ivas_stereo_dmx_evs.c
+++ b/lib_enc/ivas_stereo_dmx_evs.c
@@ -459,31 +459,31 @@ static float find_poc_peak(
         *itd = ( *itd > 0 ) ? (float) itdLR[0] : (float) itdLR[1];
     }
 #else
-    if ( ( on[0] && prev_off[0] ) && ( on[1] && prev_off[1] ) )
+    if ( ( on[0] && prev_off[0] ) && ( on[1] && prev_off[1] ) ) /*if both channels have newly detected as active (possibility of preceding), select channel by peakness Q[] of POC */
     {
         *itd = ( Q[0] > Q[1] ) ? (float) itdLR[0] : (float) itdLR[1];
     }
-    else if ( ( on[0] && prev_off[0] ) && ( Q[0] > ( Q[1] - 0.1 ) ) )
+    else if ( ( on[0] && prev_off[0] ) && ( Q[0] > ( Q[1] - 0.1 ) ) ) /* if channel 0 becomes active, select channel 0*/
     {
         *itd = (float) itdLR[0];
     }
-    else if ( ( on[1] && prev_off[1] ) && ( Q[1] > ( Q[0] - 0.1 ) ) )
+    else if ( ( on[1] && prev_off[1] ) && ( Q[1] > ( Q[0] - 0.1 ) ) ) /*if channel 1 becomes active, selsect channel 1*/
     {
         *itd = (float) itdLR[1];
     }
-    else if ( Q[0] > ( Q[1] + Q_BAND ) )
+    else if ( Q[0] > ( Q[1] + Q_BAND ) ) /* if no status change, use Q[]*/
     {
         *itd = (float) itdLR[0];
     }
-    else if ( Q[1] > ( Q[0] + Q_BAND ) )
+    else if ( Q[1] > ( Q[0] + Q_BAND ) ) /* if no status change, use Q[]*/
     {
         *itd = (float) itdLR[1];
     }
-    else if ( *itd == 0.0 )
+    else if ( *itd == 0.0 ) /*if no channels are likely to be preceding, follow the status of the previous frame*/
     {
         *itd = 0;
     }
-    else
+    else /*follow the status of the previous frame*/
     {
         *itd = ( *itd > 0 ) ? (float) itdLR[0] : (float) itdLR[1];
     }
-- 
GitLab


From 1e772aee5951f72f99b2d860530286b72f9db813 Mon Sep 17 00:00:00 2001
From: Hiromi Sekine <sekine.hiromi@ntt-tx.co.jp>
Date: Fri, 26 Aug 2022 11:16:05 +0900
Subject: [PATCH 5/6] Added some more comments for explanations.

---
 lib_enc/ivas_stereo_dmx_evs.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib_enc/ivas_stereo_dmx_evs.c b/lib_enc/ivas_stereo_dmx_evs.c
index afff95b83a..3daa28aa96 100644
--- a/lib_enc/ivas_stereo_dmx_evs.c
+++ b/lib_enc/ivas_stereo_dmx_evs.c
@@ -203,7 +203,7 @@ static void calc_poc(
         cos_step = 4;
         cos_max = input_frame;
     }
-    else // for 32 kHz & 48 kHz
+    else /* for 32 kHz & 48 kHz */
     {
         cos_step = 2;
         cos_max = n0;
@@ -376,7 +376,7 @@ static float find_poc_peak(
     itd_cand[0] = itd_cand[1] = 0;
     P = hPOC->P;
 
-    for ( i = 1; i < hPOC->shift_limit; i++ )
+    for ( i = 1; i < hPOC->shift_limit; i++ ) /*find peaks of POC P[] with positive and negative ITD */
     {
         if ( P[Lh - i] > Q[0] )
         {
@@ -411,7 +411,7 @@ static float find_poc_peak(
         Q[n] = ( 1.0f - ( cQ[n] / ( peak_range * 2 + 1 ) + eps2 ) / ( Q[n] + eps2 ) );
         Q[n] = max( Q[n], 0.0f );
 
-        if ( on[n] )
+        if ( on[n] ) /*if channel n was active (likely to be preceding) in the previous frame*/
         {
             tmpf = ( 0.3f - 0.2f * (float) abs( itd_cand[n] ) / (float) hPOC->shift_limit ) * peakQ[n];
             if ( Q[n] < tmpf )
@@ -428,7 +428,7 @@ static float find_poc_peak(
 
             peakQ[n] = max( peakQ[n], Q[n] );
         }
-        else
+        else /*if channel n was not active (not likely to be preceding) in the previous frame*/
         {
             tmpf = ( 0.75f - 0.2f * (float) abs( itd_cand[n] ) / (float) hPOC->shift_limit );
 
@@ -489,7 +489,7 @@ static float find_poc_peak(
     }
 #endif
 
-    cconfidence = sqrtf( fabsf( Q[0] - Q[1] ) );
+    cconfidence = sqrtf( fabsf( Q[0] - Q[1] ) ); /*higher value indicates higher confidence for one preceding channel*/
 
     return hPOC->confidence = hPOC->confidence * STEREO_DMX_EVS_CORR_FORGETTING + cconfidence * ( 1.0f - STEREO_DMX_EVS_CORR_FORGETTING );
 }
-- 
GitLab


From 9c1913e2b7672483b5176f20b2e7803c1987f7d6 Mon Sep 17 00:00:00 2001
From: Hiromi Sekine <sekine.hiromi@ntt-tx.co.jp>
Date: Tue, 30 Aug 2022 14:25:10 +0900
Subject: [PATCH 6/6] Added some more comments for switches.

---
 lib_com/options.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib_com/options.h b/lib_com/options.h
index 967381aa24..53805eda48 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -150,8 +150,8 @@
 #define DIRAC_DRCT_GAIN_TUNING                          /* issue 64: tuning of DirAC energy-compensation gains */
 
 /* NTT switches */
-#define NTT_UPDATE_ITD_SW
-#define NTT_REMOVE_EPS_ROM
+#define NTT_UPDATE_ITD_SW                               /* contribution 4: Update of ITD switch in stereo downmix for EVS */
+#define NTT_REMOVE_EPS_ROM                              /* contribution 4: Reduction of ROM size in stereo downmix for EVS */
 
 /* ################## End DEVELOPMENT switches ######################### */
 /* clang-format on */
-- 
GitLab