From ac9660dbc389dada7215ad5c0ba6fbf695cc1c94 Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Thu, 29 Aug 2024 17:49:09 +0530
Subject: [PATCH 1/2] High MLD fixes for LTV, HP20 filter optimization, float
 code cleanup

[x] HP20 filter recoding for WMOPS optimization
[x] Fix for LTV High MLD due to old_out in ivas_core_dec, decoder_tcx_ivas_fx
[x] Apa-Exec float code cleanup
[x] Fix for high mld issue for the case - [ltv-stereo at 13.2 kbps, 32kHz in, 32kHz out, DTX on]
---
 lib_com/hp50.c              | 176 ++++++++++
 lib_com/ivas_prot.h         |   7 +
 lib_com/options.h           |   1 +
 lib_dec/core_dec_init_fx.c  |   8 -
 lib_dec/dec_tcx_fx.c        |  13 +-
 lib_dec/fd_cng_dec_fx.c     |  21 +-
 lib_dec/hq_core_dec_fx.c    |   2 -
 lib_dec/ivas_core_dec.c     |   4 +
 lib_dec/ivas_mct_dec.c      |   2 +-
 lib_dec/ivas_tcx_core_dec.c |   8 +-
 lib_dec/jbm_pcmdsp_apa.c    | 645 +++++++++++++++++++++++-------------
 lib_dec/jbm_pcmdsp_apa.h    |   3 +-
 lib_dec/lib_dec_fx.c        |  36 +-
 lib_dec/stat_dec.h          |   2 -
 14 files changed, 649 insertions(+), 279 deletions(-)

diff --git a/lib_com/hp50.c b/lib_com/hp50.c
index 64501b178..0f7931604 100644
--- a/lib_com/hp50.c
+++ b/lib_com/hp50.c
@@ -436,6 +436,181 @@ void hp20( Word16 signal[],     /* i/o: signal to filter                   any *
 }
 
 
+#ifdef HP20_FIX32_RECODING
+void hp20_fix32(
+    Word32 signal_fx[],
+    const Word16 lg,
+    Word32 mem_fx[],
+    const Word32 Fs )
+{
+    Word32 i;
+    Word32 a1_fx, a2_fx, b1_fx, b2_fx;
+    Word32 diff_pos, diff_neg;
+#ifdef BASOP_NOGLOB
+    Flag Overflow = 0;
+#endif
+    Word16 prescale, prescaleOld, prescale_current_frame, diff;
+
+    prescale = getScaleFactor32( signal_fx, lg );
+    prescale_current_frame = s_min( 3, sub( 1 + HP20_COEFF_SCALE, prescale ) );
+
+
+    prescaleOld = extract_l( mem_fx[4] );
+
+#ifdef BASOP_NOGLOB
+    diff_pos = norm_l( L_shl_o( L_max( mem_fx[2], mem_fx[3] ), prescaleOld, &Overflow ) );
+    diff_neg = norm_l( L_shl_o( L_min( mem_fx[2], mem_fx[3] ), prescaleOld, &Overflow ) );
+#else
+    diff_pos = norm_l( L_shl( L_max( mem_fx[2], mem_fx[3] ), prescaleOld ) );
+    diff_neg = norm_l( L_shl( L_min( mem_fx[2], mem_fx[3] ), prescaleOld ) );
+#endif
+
+    diff = L_max( diff_pos, diff_neg );
+
+    IF( NE_16( diff, 0 ) )
+    {
+        prescale = s_min( prescale, diff );
+    }
+
+    prescale = s_min( 3, sub( 1 + HP20_COEFF_SCALE, prescale ) );
+
+    diff = sub( prescale, prescaleOld );
+#ifdef BASOP_NOGLOB
+    mem_fx[0] = L_shr_o( mem_fx[0], diff, &Overflow );
+    move32();
+    mem_fx[1] = L_shr_o( mem_fx[1], diff, &Overflow );
+    move32();
+    mem_fx[2] = L_shr_o( mem_fx[2], diff, &Overflow );
+    move32();
+    mem_fx[3] = L_shr_o( mem_fx[3], diff, &Overflow );
+    move32();
+#else
+    mem_fx[0] = L_shr( mem_fx[0], diff );
+    move32();
+    mem_fx[1] = L_shr( mem_fx[1], diff );
+    move32();
+    mem_fx[2] = L_shr( mem_fx[2], diff );
+    move32();
+    mem_fx[3] = L_shr( mem_fx[3], diff );
+    move32();
+#endif
+    mem_fx[4] = L_deposit_l( prescale_current_frame );
+    move32();
+
+    IF( EQ_32( Fs, 8000 ) )
+    {
+        /* hp filter 20Hz at 3dB for 8000KHz input sampling rate
+           [b,a] = butter(2, 20.0/4000.0, 'high');
+           b = [0.988954248067140  -1.977908496134280   0.988954248067140]
+           a =[1.000000000000000  -1.977786483776764   0.978030508491796]*/
+        a1_fx = 1061816033l /* 1.977786483776764 Q29*/;
+        a2_fx = -525076131l /*-0.978030508491796 Q29*/;
+        b1_fx = -1061881538l /*-1.977908496134280 Q29*/;
+        b2_fx = 530940769l /* 0.988954248067140 Q29*/;
+    }
+    ELSE IF( EQ_32( Fs, 16000 ) )
+    {
+        /* hp filter 20Hz at 3dB for 16000KHz sampling rate
+           [b,a] = butter(2, 20.0/8000.0, 'high');
+           b =[ 0.994461788958195  -1.988923577916390   0.994461788958195]
+           a =[1.000000000000000  -1.988892905899653   0.988954249933127] */
+        a1_fx = 1067778748l /* 1.988892905899653 Q29*/;
+        a2_fx = -530940770l /*-0.988954249933127 Q29*/;
+        b1_fx = -1067795215l /*-1.988923577916390 Q29*/;
+        b2_fx = 533897608l /* 0.994461788958195 Q29*/;
+    }
+    ELSE IF( EQ_32( Fs, 32000 ) )
+    {
+        /* hp filter 20Hz at 3dB for 32000KHz sampling rate
+           [b,a] = butter(2, 20.0/16000.0, 'high');
+           b =[0.997227049904470  -1.994454099808940   0.997227049904470]
+           a =[1.000000000000000  -1.994446410541927   0.994461789075954]*/
+        a1_fx = 1070760263l /* 1.994446410541927 Q29*/;
+        a2_fx = -533897608l /*-0.994461789075954 Q29*/;
+        b1_fx = -1070764392l /*-1.994454099808940 Q29*/;
+        b2_fx = 535382196l /* 0.997227049904470 Q29*/;
+    }
+    ELSE
+    {
+        /* hp filter 20Hz at 3dB for 48000KHz sampling rate
+           [b,a] = butter(2, 20.0/24000.0, 'high');
+           b =[ 0.998150511190452  -1.996301022380904   0.998150511190452]
+           a =[1.000000000000000  -1.996297601769122   0.996304442992686]*/
+        a1_fx = 1071754114l /* 1.996297601769122 Q29*/;
+        a2_fx = -534886875l /*-0.996304442992686 Q29*/;
+        b1_fx = -1071755951l /*-1.996301022380904 Q29*/;
+        b2_fx = 535877975l /* 0.998150511190452 Q29*/;
+    }
+    move32();
+    move32();
+    move32();
+    move32();
+    Word64 W_sum, W_y0, W_y1, W_y2;
+    Word32 x0, x1, x2;
+
+    W_sum = W_mult_32_32( b2_fx, mem_fx[2] );       /* b2*x2 */
+    W_sum = W_mac_32_32( W_sum, b1_fx, mem_fx[3] ); /* b1*x1 */
+    x2 = L_shr( signal_fx[0], prescale );
+    W_sum = W_mac_32_32( W_sum, b2_fx, x2 );        /* b2*x0 */
+    W_sum = W_mac_32_32( W_sum, mem_fx[0], a2_fx ); /* y2*a2 */
+    W_sum = W_mac_32_32( W_sum, mem_fx[1], a1_fx ); /* y1*a1 */
+    W_y2 = W_shl( W_sum, HP20_COEFF_SCALE );
+    signal_fx[0] = W_extract_h( W_shl( W_y2, prescale ) );
+    move32();
+
+    W_sum = W_mult_32_32( b2_fx, mem_fx[3] ); /* b2*x2 */
+    W_sum = W_mac_32_32( W_sum, b1_fx, x2 );  /* b1*x1 */
+    x1 = L_shr( signal_fx[1], prescale );
+    W_sum = W_mac_32_32( W_sum, b2_fx, x1 );                  /* b2*x0 */
+    W_sum = W_mac_32_32( W_sum, mem_fx[1], a2_fx );           /* y2*a2 */
+    W_sum = W_mac_32_32( W_sum, W_extract_h( W_y2 ), a1_fx ); /* y1*a1 */
+    W_y1 = W_shl( W_sum, HP20_COEFF_SCALE );
+    signal_fx[1] = W_extract_h( W_shl( W_y1, prescale ) );
+    move32();
+
+    diff = sub( prescale_current_frame, prescale );
+    W_y1 = W_shr( W_y1, diff );
+    W_y2 = W_shr( W_y2, diff );
+    x2 = L_shr( x2, diff );
+    x1 = L_shr( x1, diff );
+
+    FOR( i = 2; i < lg; i++ )
+    {
+        W_sum = W_mult_32_32( b2_fx, x2 );       /* b2*x2 */
+        W_sum = W_mac_32_32( W_sum, b1_fx, x1 ); /* b1*x1 */
+        x0 = L_shr( signal_fx[i], prescale_current_frame );
+        W_sum = W_mac_32_32( W_sum, b2_fx, x0 );                  /* b2*x0 */
+        W_sum = W_mac_32_32( W_sum, W_extract_h( W_y2 ), a2_fx ); /* y2*a2 */
+        W_sum = W_mac_32_32( W_sum, W_extract_h( W_y1 ), a1_fx ); /* y1*a1 */
+        W_y0 = W_shl( W_sum, HP20_COEFF_SCALE );
+
+        signal_fx[i] = W_extract_h( W_shl( W_y0, prescale_current_frame ) );
+        move32();
+
+        x2 = x1;
+        x1 = x0;
+        W_y2 = W_y1;
+        W_y1 = W_y0;
+
+        move32();
+        move32();
+        move64();
+        move64();
+    }
+
+    mem_fx[0] = W_extract_h( W_y2 );
+    mem_fx[1] = W_extract_h( W_y1 );
+    mem_fx[2] = x2;
+    mem_fx[3] = x1;
+
+    move32();
+    move32();
+    move32();
+    move32();
+
+    return;
+}
+#else
 void hp20_fix32(
     Word32 signal_fx[],
     const Word16 lg,
@@ -620,3 +795,4 @@ void hp20_fix32(
     return;
 }
 #endif
+#endif
diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h
index 501b4816c..5340eaeb3 100644
--- a/lib_com/ivas_prot.h
+++ b/lib_com/ivas_prot.h
@@ -1085,6 +1085,13 @@ void ivas_jbm_dec_feed_tc_to_renderer_fx(
     Word32 *data                                                 /* i/o: transport channels/output synthesis signal                  */
 );
 
+void ivas_jbm_dec_feed_tc_to_renderer_fx(
+    Decoder_Struct *st_ivas,                                    /* i/o: IVAS decoder structure                                      */
+    const int16_t nSamplesForRendering,                         /* i  : number of TC samples available for rendering                */
+    int16_t *nSamplesResidual,                                  /* o  : number of samples not fitting into the renderer grid and buffer for the next call*/
+    Word32 *data                                                 /* i/o: transport channels/output synthesis signal                  */
+);
+
 ivas_error ivas_jbm_dec_set_discard_samples(
     Decoder_Struct *st_ivas                                     /* i/o: main IVAS decoder structre                                  */
 );
diff --git a/lib_com/options.h b/lib_com/options.h
index 31ba46021..aa53a1e54 100644
--- a/lib_com/options.h
+++ b/lib_com/options.h
@@ -111,6 +111,7 @@
 #define FIX_818_COMPLEXITY_IMPROV /* Fix for issue 818 */
 #define FIX_805_VERIFICATION /* Fix for issue 805 */
 #define FIX_827_HIGH_MLD /* Fix for issue 827 */
+#define HP20_FIX32_RECODING /* Improved hp20 filtering code */
 #endif
 #define ISM_DISABLE
 #define FIX_TMP_714
diff --git a/lib_dec/core_dec_init_fx.c b/lib_dec/core_dec_init_fx.c
index d85ad5481..6ba494a9b 100644
--- a/lib_dec/core_dec_init_fx.c
+++ b/lib_dec/core_dec_init_fx.c
@@ -1454,12 +1454,6 @@ void open_decoder_LPD_ivas_fx(
     IF( st->hTECDec != NULL )
     {
         /*constraint for adaptive bpf, otherwise parameter estimation and post-processing not time aligned*/
-        if ( ( st->tcxonly == 0 ) )
-        {
-            // To be added later
-            // assert(0 == (st->hTcxCfg->lfacnext > 0 ? st->hTcxCfg->lfacnext : 0));
-        }
-
         resetTecDec_Fx( st->hTECDec );
     }
 
@@ -1624,8 +1618,6 @@ void open_decoder_LPD_ivas_fx(
 
             IF( st->hTcxCfg != NULL )
             {
-                hHQ_core->Q_old_wtda = -1; /*To be removed later when hHQ_core->Q_old_wtda vaue is updated*/
-                move16();
                 Copy_Scale_sig( hHQ_core->old_out_fx + NS2SA_FX2( st->output_Fs, N_ZERO_MDCT_NS ), hTcxDec->syn_OverlFB, st->hTcxCfg->tcx_mdct_window_lengthFB, negate( add( hHQ_core->Q_old_wtda, TCX_IMDCT_HEADROOM ) ) );
                 *Q_syn_OverlFB = *Q_old_out;
                 move16();
diff --git a/lib_dec/dec_tcx_fx.c b/lib_dec/dec_tcx_fx.c
index 89f70f960..3db2e2c2d 100644
--- a/lib_dec/dec_tcx_fx.c
+++ b/lib_dec/dec_tcx_fx.c
@@ -3824,11 +3824,14 @@ void decoder_tcx_ivas_fx(
     }
 
     /* Scaling down buffers for decoder_tcx_imdct_fx*/
-    Scale_sig( st->hTcxDec->syn_Overl_TDACFB, L_FRAME_MAX / 2, 1 );                          // Scaling to Q_syn
-    Scale_sig( st->hTcxDec->syn_Overl_TDAC, L_FRAME32k / 2, 1 );                             // Scaling to Q_syn
-    Scale_sig( st->hHQ_core->old_out_fx, L_FRAME48k, negate( st->hHQ_core->Q_old_out ) );    // Scaling to Q_syn
-    Scale_sig( st->hHQ_core->old_out_LB_fx, L_FRAME32k, negate( st->hHQ_core->Q_old_out ) ); // Scaling to Q_syn
-    st->hHQ_core->Q_old_out = 0;
+    Scale_sig( st->hTcxDec->syn_Overl_TDACFB, L_FRAME_MAX / 2, 1 );                              // Scaling to Q_syn
+    Scale_sig( st->hTcxDec->syn_Overl_TDAC, L_FRAME32k / 2, 1 );                                 // Scaling to Q_syn
+    Scale_sig( st->hHQ_core->old_out_fx, L_FRAME48k, negate( st->hHQ_core->Q_old_wtda ) );       // Scaling to Q_syn
+    Scale_sig( st->hHQ_core->old_out_LB_fx, L_FRAME32k, negate( st->hHQ_core->Q_old_wtda_LB ) ); // Scaling to Q_syn
+    st->hHQ_core->Q_old_wtda = 0;
+    move16();
+    st->hHQ_core->Q_old_wtda_LB = 0;
+    move16();
 
     Scale_sig( st->hTcxDec->old_syn_Overl, 320, 1 ); // Scaling to Q_syn
     Copy_Scale_sig_16_32_no_sat( st->old_Aq_12_8_fx, st->old_Aq_12_8_fx_32, M + 1, ( sub( 28, ( sub( 15, norm_s( sub( st->old_Aq_12_8_fx[0], 1 ) ) ) ) ) ) );
diff --git a/lib_dec/fd_cng_dec_fx.c b/lib_dec/fd_cng_dec_fx.c
index caebd763c..219919f6b 100644
--- a/lib_dec/fd_cng_dec_fx.c
+++ b/lib_dec/fd_cng_dec_fx.c
@@ -1236,6 +1236,8 @@ Word16 ApplyFdCng_ivas_fx(
     hFdCngDec = st->hFdCngDec;
     hFdCngCom = hFdCngDec->hFdCngCom;
 
+    Word32 *sidNoiseEst = hFdCngCom->sidNoiseEst;
+
     /* limit L_frame and core fs values for MDCT-Stereo modes which can have higher core sampling than 16kHz, but use a downsampled buffer */
     L_frame = s_min( st->L_frame, L_FRAME16k );
     last_L_frame = s_min( st->last_L_frame, L_FRAME16k );
@@ -1751,10 +1753,14 @@ Word16 ApplyFdCng_ivas_fx(
                 }
                 ELSE
                 {
+                    if ( EQ_16( st->element_mode, IVAS_CPE_DFT ) )
+                    {
+                        sidNoiseEst = hFdCngCom->sidNoiseEstLp;
+                    }
                     /* Interpolate the CLDFB band levels from the SID (partition) levels */
                     IF( GT_16( hFdCngCom->regularStopBand, hFdCngCom->numCoreBands ) )
                     {
-                        scalebands_fx( hFdCngCom->sidNoiseEst, hFdCngCom->part, hFdCngCom->npart, hFdCngCom->midband,
+                        scalebands_fx( sidNoiseEst, hFdCngCom->part, hFdCngCom->npart, hFdCngCom->midband,
                                        hFdCngCom->nFFTpart, sub( hFdCngCom->stopBand, hFdCngCom->startBand ), cngNoiseLevel, 0 );
 
                         *cngNoiseLevel_exp = hFdCngCom->sidNoiseEstExp;
@@ -1775,10 +1781,10 @@ Word16 ApplyFdCng_ivas_fx(
                         move16();
                         IF( hFdCngDec->partNoiseShape[k] != 0 )
                         {
-                            s1 = norm_l( hFdCngCom->sidNoiseEst[k] );
-                            L_tmp = L_shl( hFdCngCom->sidNoiseEst[k], s1 );
+                            s1 = norm_l( sidNoiseEst[k] );
+                            L_tmp = L_shl( sidNoiseEst[k], s1 );
                             L_tmp_exp = sub( hFdCngCom->sidNoiseEstExp, s1 );
-                            L_tmp = BASOP_Util_Add_Mant32Exp( hFdCngCom->sidNoiseEst[k], hFdCngCom->sidNoiseEstExp, DELTA_MANTISSA_W32, DELTA_EXPONENT, &L_tmp_exp );
+                            L_tmp = BASOP_Util_Add_Mant32Exp( sidNoiseEst[k], hFdCngCom->sidNoiseEstExp, DELTA_MANTISSA_W32, DELTA_EXPONENT, &L_tmp_exp );
                             L_tmp = L_shr( L_tmp, 1 );
                             s = add( L_tmp_exp, 1 );
                             num = extract_h( L_tmp );
@@ -1836,7 +1842,6 @@ Word16 ApplyFdCng_ivas_fx(
             {
                 IF( !( LT_16( hFdCngCom->msFrCnt_init_counter, hFdCngCom->msFrCnt_init_thresh ) ) )
                 {
-
                     s2 = negate( sub( WORD32_BITS, 1 ) );
                     /* Shape the SID noise levels in each FFT bin */
                     j = 0;
@@ -1850,10 +1855,10 @@ Word16 ApplyFdCng_ivas_fx(
                         move16();
                         IF( hFdCngDec->partNoiseShape[k] != 0 )
                         {
-                            s1 = norm_l( hFdCngCom->sidNoiseEst[k] );
-                            L_tmp = L_shl( hFdCngCom->sidNoiseEst[k], s1 );
+                            s1 = norm_l( hFdCngCom->sidNoiseEstLp[k] );
+                            L_tmp = L_shl( hFdCngCom->sidNoiseEstLp[k], s1 );
                             L_tmp_exp = sub( hFdCngCom->sidNoiseEstExp, s1 );
-                            L_tmp = BASOP_Util_Add_Mant32Exp( hFdCngCom->sidNoiseEst[k], hFdCngCom->sidNoiseEstExp, DELTA_MANTISSA_W32, DELTA_EXPONENT, &L_tmp_exp );
+                            L_tmp = BASOP_Util_Add_Mant32Exp( hFdCngCom->sidNoiseEstLp[k], hFdCngCom->sidNoiseEstExp, DELTA_MANTISSA_W32, DELTA_EXPONENT, &L_tmp_exp );
                             L_tmp = L_shr( L_tmp, 1 );
                             s = add( L_tmp_exp, 1 );
                             num = extract_h( L_tmp );
diff --git a/lib_dec/hq_core_dec_fx.c b/lib_dec/hq_core_dec_fx.c
index d25aaf286..cd5833bb7 100644
--- a/lib_dec/hq_core_dec_fx.c
+++ b/lib_dec/hq_core_dec_fx.c
@@ -1230,8 +1230,6 @@ void HQ_core_dec_init_fx(
     hHQ_core->Q_old_wtda = 15;
     hHQ_core->Q_old_postdec = 0;
     hHQ_core->Q_old_wtda_LB = 0;
-    hHQ_core->Q_old_out = 0;
-    hHQ_core->Q_old_outLB = 0;
     move16();
     move16();
     move16();
diff --git a/lib_dec/ivas_core_dec.c b/lib_dec/ivas_core_dec.c
index 0a3da565c..c67519b02 100644
--- a/lib_dec/ivas_core_dec.c
+++ b/lib_dec/ivas_core_dec.c
@@ -611,6 +611,10 @@ ivas_error ivas_core_dec_fx(
             Scale_sig( st->hPFstat->mem_pf_in, L_SUBFR, -Qsyn_temp );
             Scale_sig( st->hHQ_core->old_out_LB_fx, L_FRAME32k, -st->hHQ_core->Q_old_wtda_LB );
             Scale_sig( st->hHQ_core->old_out_fx, L_FRAME48k, -st->hHQ_core->Q_old_wtda );
+            st->hHQ_core->Q_old_wtda_LB = 0;
+            move16();
+            st->hHQ_core->Q_old_wtda = 0;
+            move16();
 
             IF( hCPE == NULL )
             {
diff --git a/lib_dec/ivas_mct_dec.c b/lib_dec/ivas_mct_dec.c
index d0526e869..2e2cf4410 100644
--- a/lib_dec/ivas_mct_dec.c
+++ b/lib_dec/ivas_mct_dec.c
@@ -625,7 +625,7 @@ ivas_error ivas_mct_dec_fx(
             {
 
                 Copy_Scale_sig_16_32( synth_fx[n], synth_fx_32[n], L_FRAME48k, Q11 - 0 );
-                Copy_Scale_sig_16_32( hCPE->hCoreCoder[n]->hHQ_core->old_out_fx, hCPE->hCoreCoder[n]->hHQ_core->oldOut_fx, output_frame, Q11 );
+                Copy_Scale_sig_16_32( hCPE->hCoreCoder[n]->hHQ_core->old_out_fx, hCPE->hCoreCoder[n]->hHQ_core->oldOut_fx, output_frame, sub( Q11, hCPE->hCoreCoder[n]->hHQ_core->Q_old_wtda ) );
                 ivas_post_proc_fx( NULL, hCPE, n, synth_fx_32[n], NULL, output_frame, 1, Q11 );
 #ifdef MSAN_FIX
                 Copy_Scale_sig_32_16( synth_fx_32[n], synth_fx[n], output_frame, 0 - Q11 );
diff --git a/lib_dec/ivas_tcx_core_dec.c b/lib_dec/ivas_tcx_core_dec.c
index 7b8e6c48b..c5ccdfba2 100644
--- a/lib_dec/ivas_tcx_core_dec.c
+++ b/lib_dec/ivas_tcx_core_dec.c
@@ -185,8 +185,8 @@ void stereo_tcx_init_dec_fx(
                                                    &st->hTcxDec->Q_syn_Overl,
                                                    &st->hTcxDec->Q_syn_Overl_TDACFB,
                                                    &st->hTcxDec->Q_syn_OverlFB,
-                                                   &st->hHQ_core->Q_old_out,
-                                                   &st->hHQ_core->Q_old_outLB,
+                                                   &st->hHQ_core->Q_old_wtda,
+                                                   &st->hHQ_core->Q_old_wtda_LB,
                                                    &Q_old_Aq_12_8 );
     }
 
@@ -1873,8 +1873,8 @@ static void dec_prm_tcx_ivas_fx(
                                                        &st->hTcxDec->Q_syn_Overl,
                                                        &st->hTcxDec->Q_syn_Overl_TDACFB,
                                                        &st->hTcxDec->Q_syn_OverlFB,
-                                                       &st->hHQ_core->Q_old_out,
-                                                       &st->hHQ_core->Q_old_outLB, &Q_old_Aq_12_8 );
+                                                       &st->hHQ_core->Q_old_wtda,
+                                                       &st->hHQ_core->Q_old_wtda_LB, &Q_old_Aq_12_8 );
         }
 
         st->last_core = st->last_core_from_bs;
diff --git a/lib_dec/jbm_pcmdsp_apa.c b/lib_dec/jbm_pcmdsp_apa.c
index b3a7886ea..d6098aac0 100644
--- a/lib_dec/jbm_pcmdsp_apa.c
+++ b/lib_dec/jbm_pcmdsp_apa.c
@@ -127,9 +127,9 @@ struct apa_state_t
     UWord16 css; /* correlation subsampling per channel */
 
     float targetQuality;
-    Word32 targetQualityQ16;
-    UWord16 qualityred;  /* quality reduction threshold */
-    UWord16 qualityrise; /* quality rising for adaptive quality thresholds */
+    Word32 targetQuality_fx; /* Q16 */
+    UWord16 qualityred;      /* quality reduction threshold */
+    UWord16 qualityrise;     /* quality rising for adaptive quality thresholds */
 
     UWord16 last_pitch;       /* last pitch/sync position */
     UWord16 bad_frame_count;  /* # frames before quality threshold is lowered */
@@ -145,53 +145,33 @@ struct apa_state_t
 
 #ifndef IVAS_FLOAT_FIXED
 static float apa_corrEnergy2dB( float energy, uint16_t corr_len );
-#endif
-Word16 apa_corrEnergy2dB_fx( Word32 energy, Word16 energyExp, Word16 corr_len );
-Word16 apa_getQualityIncreaseForLowEnergy_fx( Word16 energydB );
 
-#ifndef IVAS_FLOAT_FIXED
 static float apa_getQualityIncreaseForLowEnergy( float energydB );
-#endif
-#ifdef IVAS_FLOAT_FIXED
-
-static Word8 logarithmic_search_fx( const apa_state_t *ps,
-                                    const Word16 *signal,
-                                    Word16 s_start,
-                                    Word16 inlen,
-                                    Word16 offset,
-                                    Word16 fixed_pos,
-                                    Word16 corr_len,
-                                    Word16 wss,
-                                    Word16 css,
-                                    Word16 *synchpos );
 
-#else
 static bool logarithmic_search( const apa_state_t *ps, const float *signal, int16_t s_start, uint16_t inlen, uint16_t offset, uint16_t fixed_pos, uint16_t corr_len, uint16_t wss, uint16_t css, int16_t *synchpos );
-#endif
 
-#ifndef IVAS_FLOAT_FIXED
 static bool find_synch( apa_state_t *ps, const float *in, uint16_t l_in, int16_t s_start, uint16_t s_len, int16_t fixed_pos, uint16_t corr_len, uint16_t offset, float *energy, float *quality, int16_t *synch_pos );
-#endif
 
 static bool copy_frm( apa_state_t *ps, const float frm_in[], float frm_out[], uint16_t *l_frm_out );
 
 static bool shrink_frm( apa_state_t *ps, const float frm_in[], uint16_t maxScaling, float frm_out[], uint16_t *l_frm_out );
 
 static bool extend_frm( apa_state_t *ps, const float frm_in[], float frm_out[], uint16_t *l_frm_out );
+#else
+Word16 apa_corrEnergy2dB_fx( Word32 energy, Word16 energyExp, Word16 corr_len );
 
-#ifdef IVAS_FLOAT_FIXED
-static Word16 find_synch_fx( apa_state_t *ps,
-                             const Word16 *in,
-                             Word16 l_in,
-                             Word16 s_start,
-                             Word16 s_len,
-                             Word16 fixed_pos,
-                             Word16 corr_len,
-                             Word16 offset,
-                             Word16 *energydBQ8,
-                             Word32 *qualityQ16,
-                             Word16 *synch_pos );
-#endif // IVAS_FLOAT_FIXED
+Word16 apa_getQualityIncreaseForLowEnergy_fx( Word16 energydB );
+
+static Word8 logarithmic_search_fx( const apa_state_t *ps, const Word16 *signal, Word16 s_start, Word16 inlen, Word16 offset, Word16 fixed_pos, Word16 corr_len, Word16 wss, Word16 css, Word16 *synchpos );
+
+static Word16 find_synch_fx( apa_state_t *ps, const Word16 *in, Word16 l_in, Word16 s_start, Word16 s_len, Word16 fixed_pos, Word16 corr_len, Word16 offset, Word16 *energydBQ8, Word32 *qualityQ16, Word16 *synch_pos );
+
+static bool copy_frm_fx( apa_state_t *ps, const Word16 frm_in[], Word16 frm_out[], UWord16 *l_frm_out );
+
+static bool shrink_frm_fx( apa_state_t *ps, const Word16 frm_in[], UWord16 maxScaling, Word16 frm_out[], UWord16 *l_frm_out );
+
+static bool extend_frm_fx( apa_state_t *ps, const Word16 frm_in[], Word16 frm_out[], UWord16 *l_frm_out );
+#endif
 
 /*---------------------------------------------------------------------*
  * Public functions
@@ -314,7 +294,7 @@ void apa_reset(
     ps->css = 1;
     move16();
     ps->targetQuality = 0.0f;
-    ps->targetQualityQ16 = 0;
+    ps->targetQuality_fx = 0;
     move32();
 
     ps->qualityred = 0;
@@ -663,7 +643,7 @@ bool apa_set_rate(
 
 /* Set scaling. */
 #ifdef IVAS_FLOAT_FIXED
-bool apa_set_scale(
+bool apa_set_scale_fx(
     apa_state_t *ps,
     UWord16 scale )
 {
@@ -873,6 +853,9 @@ bool apa_set_quality(
     assert( qualityrise > 0 && qualityrise <= 20 );
 
     ps->targetQuality = quality;
+#ifdef IVAS_FLOAT_FIXED
+    ps->targetQuality_fx = float_to_fix( quality, Q16 );
+#endif
     ps->qualityred = qualityred;
     ps->qualityrise = qualityrise;
     ps->bad_frame_count = 0;
@@ -1047,6 +1030,208 @@ bool apa_exit(
 *
 ********************************************************************************
 */
+#ifdef IVAS_FLOAT_FIXED
+UWord8 apa_exec_fx(
+    apa_state_t *ps,     /* i/o: state struct                                  */
+    const Word16 a_in[], /* i  : input samples                                 */
+    UWord16 l_in,        /* i  : number of input samples                       */
+    UWord16 maxScaling,  /* i  : allowed number of inserted/removed samples    */
+    Word16 a_out[],      /* o  : output samples                                */
+    UWord16 *l_out       /* o  : number of output samples                      */
+)
+{
+    UWord16 i;
+    Word16 frm_in[APA_BUF]; /* TODO(mcjbm): this buffer could be smaller - always allocates space for 16 channels */
+    UWord16 l_frm_out;
+    Word16 l_rem;
+    Word32 dl_scaled, dl_copied, l_frm_out_target;
+    Word32 expScaling, actScaling;
+    UWord32 statsResetThreshold, statsResetShift;
+
+    statsResetThreshold = 1637;
+    move32();
+    statsResetShift = 2;
+    move32();
+
+    /* Convert max_scaling from "per channel" to total */
+    maxScaling = (UWord16) imult3216( maxScaling, ps->num_channels );
+
+    /* make sure no invalid output is used */
+    *l_out = 0;
+    move16();
+    l_frm_out = 0;
+    move16();
+
+    /* make sure pointer is valid */
+    IF( ps == NULL )
+    {
+        return 1;
+    }
+    /* check available rate */
+    IF( ps->rate == 0 )
+    {
+        return 2;
+    }
+    /* check size of input */
+    IF( NE_32( l_in, ps->l_frm ) )
+    {
+        return 3;
+    }
+
+    /* get target length */
+    test();
+    test();
+    IF( GT_32( ps->scale, 100 ) )
+    {
+        // expScaling = (int32_t) ( ( ps->l_frm * ( ps->scale - 100.0f ) / 100.0f ) * ( ps->nFramesSinceSetScale + 1 ) + 0.5f );
+        expScaling = W_extract_l( W_shr( W_add( W_mult_32_32( Mpy_32_16_1( imult3216( ps->l_frm, sub( ps->scale, 100 ) ), 328 /* 1 / 100.0f in Q15 */ ), UL_addNsD( ps->nFramesSinceSetScale, 1 ) ), 1 ), 1 ) );
+    }
+    ELSE IF( LT_32( ps->scale, 100 ) )
+    {
+        expScaling = L_negate( W_extract_l( W_shr( W_abs( W_sub( W_mult_32_32( Mpy_32_16_1( imult3216( ps->l_frm, sub( ps->scale, 100 ) ), 328 /* 1 / 100.0f in Q15 */ ), UL_addNsD( ps->nFramesSinceSetScale, 1 ) ), 1 ) ), 1 ) ) );
+    }
+    ELSE
+    {
+        expScaling = 0;
+        move32();
+    }
+    actScaling = L_sub( ps->diffSinceSetScale, ps->l_frm );
+    l_frm_out_target = L_sub( expScaling, actScaling );
+
+    /* Wait until we have l_frm outputs samples */
+    /* (required to search for correlation in the past). */
+    /* If we don't have enough samples, simply copy input to output */
+    IF( LT_32( ps->l_buf_out, ps->l_frm ) )
+    {
+        FOR( i = 0; i < ps->l_frm; i++ )
+        {
+            a_out[i] = a_in[i];
+            move16();
+        }
+        l_frm_out = ps->l_frm;
+        move16();
+    }
+    ELSE
+    {
+        Word16 *buf_out_ptr = &( ps->buf_out_fx[ps->l_buf_out - ps->l_frm] );
+        Word16 *frm_in_ptr = &( frm_in[ps->l_frm] );
+
+        /* fill input frame */
+        /* 1st input frame: previous output samples */
+        FOR( i = 0; i < ps->l_frm; i++ )
+        {
+            frm_in[i] = buf_out_ptr[i];
+            move16();
+        }
+        /* 2nd input frame: new input samples */
+        FOR( i = 0; i < ps->l_frm; i++ )
+        {
+            frm_in_ptr[i] = a_in[i];
+            move16();
+        }
+        /* no scaling */
+        IF( EQ_32( ps->scale, 100 ) )
+        {
+            copy_frm_fx( ps, frm_in, a_out, &l_frm_out );
+        }
+        /* shrink */
+        ELSE IF( LT_32( ps->scale, 100 ) )
+        {
+            shrink_frm_fx( ps, frm_in, maxScaling, a_out, &l_frm_out );
+        }
+        /* extend */
+        ELSE
+        {
+            extend_frm_fx( ps, frm_in, a_out, &l_frm_out );
+        }
+        /* control the amount/frequency of scaling */
+        IF( NE_32( l_frm_out, ps->l_frm ) )
+        {
+            test();
+            IF( NE_32( maxScaling, 0 ) &&
+                GT_32( abs_s( extract_l( L_sub( ps->l_frm, l_frm_out ) ) ), maxScaling ) )
+            {
+                /* maxScaling exceeded -> discard scaled frame */
+                copy_frm_fx( ps, frm_in, a_out, &l_frm_out );
+            }
+            ELSE IF( GT_32( L_abs( l_frm_out_target ), ps->l_frm ) ) /* ignore small difference */
+            {
+                dl_copied = L_sub( l_frm_out_target, ps->l_frm );
+                dl_scaled = L_sub( l_frm_out_target, l_frm_out );
+                /* discard scaled frame if copied frame is closer to target length */
+                IF( LT_32( L_abs( dl_copied ), L_abs( dl_scaled ) ) )
+                {
+                    copy_frm_fx( ps, frm_in, a_out, &l_frm_out );
+                }
+            }
+        }
+    }
+
+    /* copy output to internal buffer */
+    /* avoid buffer overflow: */
+    /* discard old samples; always keep at least most recent l_frm samples */
+    IF( GT_32( L_add( ps->l_buf_out, l_frm_out ), ps->buf_out_capacity ) )
+    {
+        Word16 *buf_out_ptr1 = ps->buf_out_fx;
+        Word16 *buf_out_ptr2;
+
+        l_rem = extract_l( L_sub( ps->l_frm, l_frm_out ) );
+        if ( l_rem < 0 )
+        {
+            l_rem = 0;
+            move16();
+        }
+        buf_out_ptr2 = &( ps->buf_out_fx[ps->l_buf_out - l_rem] );
+        FOR( i = 0; i < l_rem; i++ )
+        {
+            buf_out_ptr1[i] = buf_out_ptr2[i];
+            move16();
+        }
+        ps->l_buf_out = l_rem;
+        move16();
+    }
+    /* append new output samples */
+    IF( GT_32( L_add( ps->l_buf_out, l_frm_out ), ps->buf_out_capacity ) )
+    {
+        return 5;
+    }
+    {
+        Word16 *buf_out_ptr = &( ps->buf_out_fx[ps->l_buf_out] );
+        FOR( i = 0; i < l_frm_out; i++ )
+        {
+            buf_out_ptr[i] = a_out[i];
+            move16();
+        }
+    }
+    ps->l_buf_out = (UWord16) L_add( ps->l_buf_out, l_frm_out );
+    move16();
+
+    *l_out = l_frm_out;
+    move16();
+    /* update time */
+    ps->l_in_total = UL_addNsD( ps->l_in_total, ps->l_frm );
+    move32();
+
+    test();
+    IF( LT_32( L_abs( ps->diffSinceSetScale ), L_sub( 0x7FFFFF, L_sub( l_frm_out, ps->l_frm ) ) ) &&
+        LT_64( ps->nFramesSinceSetScale, statsResetThreshold ) )
+    {
+        ps->diffSinceSetScale = L_add( ps->diffSinceSetScale, L_sub( l_frm_out, ps->l_frm ) );
+        move32();
+        ps->nFramesSinceSetScale = UL_addNsD( ps->nFramesSinceSetScale, 1 );
+        move32();
+    }
+    ELSE /* scale statistics down to avoid overflow */
+    {
+        ps->diffSinceSetScale = L_shr( ps->diffSinceSetScale, (Word16) statsResetShift );
+        move32();
+        ps->nFramesSinceSetScale = UL_lshr( ps->nFramesSinceSetScale, (Word16) statsResetShift );
+        move32();
+    }
+
+    return 0;
+}
+#else
 uint8_t apa_exec(
     apa_state_t *ps,     /* i/o: state struct                                  */
     const float a_in[],  /* i  : input samples                                 */
@@ -1222,6 +1407,7 @@ uint8_t apa_exec(
 
     return 0;
 }
+#endif
 
 
 /*---------------------------------------------------------------------*
@@ -1347,8 +1533,6 @@ static void get_scaling_quality(
     return;
 }
 #else
-
-
 static void get_scaling_quality_fx( const apa_state_t *ps,
                                     const Word16 *signal,
                                     Word16 s_len,
@@ -1454,6 +1638,10 @@ static void get_scaling_quality_fx( const apa_state_t *ps,
 }
 
 #endif
+
+
+/* Converts the correlation energy to dB. */
+#ifdef IVAS_FLOAT_FIXED
 Word16 apa_corrEnergy2dB_fx( Word32 energy, Word16 energyExp, Word16 corr_len )
 {
 
@@ -1471,9 +1659,7 @@ Word16 apa_corrEnergy2dB_fx( Word32 energy, Word16 energyExp, Word16 corr_len )
     result = BASOP_Util_lin2dB( L_deposit_l( result ), energyExp, 1 );
     return result;
 }
-
-/* Converts the correlation energy to dB. */
-#ifndef IVAS_FLOAT_FIXED
+#else
 static float apa_corrEnergy2dB(
     float energy,
     uint16_t corr_len )
@@ -1512,8 +1698,7 @@ static float apa_getQualityIncreaseForLowEnergy(
 
     return qualIncForLowEnergy;
 }
-#endif
-
+#else
 Word16 apa_getQualityIncreaseForLowEnergy_fx( Word16 energydBQ8 )
 {
     Word16 qualIncreaseMinEnergy, qualIncreaseMaxEnergy, qualIncForLowEnergy; /* Q8 */
@@ -1552,6 +1737,8 @@ Word16 apa_getQualityIncreaseForLowEnergy_fx( Word16 energydBQ8 )
     }
     return qualIncForLowEnergy;
 }
+#endif
+
 
 /*
 ********************************************************************************
@@ -1646,8 +1833,6 @@ static bool logarithmic_search(
     return 0;
 }
 #else
-
-
 static Word8 logarithmic_search_fx( const apa_state_t *ps,
                                     const Word16 *signal,
                                     Word16 s_start,
@@ -1735,6 +1920,8 @@ static Word8 logarithmic_search_fx( const apa_state_t *ps,
     return 0;
 }
 #endif
+
+
 /*
 ********************************************************************************
 *
@@ -1808,7 +1995,6 @@ static bool find_synch(
     return 0;
 }
 #else
-
 static Word16 find_synch_fx( apa_state_t *ps,
                              const Word16 *in,
                              Word16 l_in,
@@ -1851,6 +2037,8 @@ static Word16 find_synch_fx( apa_state_t *ps,
 }
 
 #endif
+
+
 /*
 ********************************************************************************
 *
@@ -1873,6 +2061,32 @@ static Word16 find_synch_fx( apa_state_t *ps,
 *
 ********************************************************************************
 */
+#ifdef IVAS_FLOAT_FIXED
+static bool copy_frm_fx(
+    apa_state_t *ps,
+    const Word16 frm_in_fx[],
+    Word16 frm_out_fx[],
+    UWord16 *l_frm_out )
+{
+    UWord16 i;
+
+    /* only 2nd input frame is used */
+    frm_in_fx += ps->l_frm;
+
+    /* copy frame */
+    FOR( i = 0; i < ps->l_frm; i++ )
+    {
+        frm_out_fx[i] = frm_in_fx[i];
+        move16();
+    }
+
+    /* set output length */
+    *l_frm_out = ps->l_frm;
+    move16();
+
+    return 0;
+}
+#else
 static bool copy_frm(
     apa_state_t *ps,
     const float frm_in[],
@@ -1895,6 +2109,7 @@ static bool copy_frm(
 
     return 0;
 }
+#endif
 
 
 /*
@@ -1922,97 +2137,83 @@ static bool copy_frm(
 ********************************************************************************
 */
 #ifdef IVAS_FLOAT_FIXED
-static bool shrink_frm(
+static bool shrink_frm_fx(
     apa_state_t *ps,
-    const float frm_in[],
-    uint16_t maxScaling,
-    float frm_out[],
-    uint16_t *l_frm_out )
+    const Word16 frm_in_fx[],
+    UWord16 maxScaling,
+    Word16 frm_out_fx[],
+    UWord16 *l_frm_out )
 {
     bool findSynchResult = 0;
-    int16_t xtract, l_rem, s_start, s_end;
-    uint16_t i;
-    uint16_t over;
-    float quality = 0.0f;
-    uint16_t l_frm;
-    uint16_t l_seg;
+    Word16 xtract, l_rem, s_start, s_end;
+    UWord16 i;
+    UWord16 over;
+    Word16 energy_fx = 0;
+    Word32 quality_fx = 0;
+    UWord16 l_frm;
+    UWord16 l_seg;
+    move16();
+    move32();
 
     l_frm = ps->l_frm;
+    move16();
     l_seg = ps->l_seg;
+    move16();
 
     /* only 2nd input frame is used */
-    frm_in += l_frm;
+    frm_in_fx += l_frm;
 
-#ifdef IVAS_FLOAT_FIXED
-    Word16 frm_in_fx[APA_BUF];
-    for ( i = 0; i < l_frm /*960*ps->num_channels*/; i++ )
-    {
-        frm_in_fx[i] = (Word16) frm_in[i];
-    }
-#endif
     /* set search range */
     s_start = ( ps->p_min / ps->num_channels ) * ps->num_channels;
-    s_end = s_start + ps->l_search;
-    if ( ( s_end + l_seg ) >= l_frm )
+    s_end = add( s_start, extract_l( ps->l_search ) );
+    IF( GE_32( L_add( s_end, l_seg ), l_frm ) )
     {
-        s_end = ( l_frm - l_seg );
+        s_end = extract_l( L_sub( l_frm, l_seg ) );
     }
 
     /* calculate overlap position */
-#ifdef IVAS_FLOAT_FIXED
-    if ( isSilence_fx( frm_in_fx, l_seg, 10 ) )
-#else
-    if ( isSilence( frm_in, l_seg, 10 ) )
-#endif // !IVAS_FLOAT_FIXED
+    IF( isSilence_fx( frm_in_fx, l_seg, 10 ) )
     {
         /* maximum scaling */
-        // energy = -65;
-        quality = 5;
-        if ( ps->evs_compat_mode == false )
+        energy_fx = -65 * ( 1 << 8 );
+        move16();
+        quality_fx = 5 << Q16;
+        move32();
+        IF( ps->evs_compat_mode == false )
         {
 
             xtract = maxScaling;
+            move16();
             /* take samples already in the renderer buf into account */
-            xtract += ps->l_r_buf;
+            xtract = add( xtract, extract_l( ps->l_r_buf ) );
             /* snap to renderer time slot borders */
-            xtract -= ( ps->l_ts - ( l_frm - xtract + ps->l_r_buf ) % ps->l_ts );
-            while ( xtract < 0 )
+            xtract = sub( xtract, extract_l( L_sub( ps->l_ts, ( L_add( L_sub( l_frm, xtract ), ps->l_r_buf ) ) % ps->l_ts ) ) );
+            WHILE( xtract < 0 )
             {
-                xtract += ps->l_ts;
+                xtract = add( xtract, extract_l( ps->l_ts ) );
             }
-            while ( xtract > ( s_end - ps->num_channels ) )
+            WHILE( GT_32( xtract, sub( s_end, extract_l( ps->num_channels ) ) ) )
             {
                 /* exceeded the possible shrinking, go back one renderer ts*/
-                xtract -= ps->l_ts;
+                xtract = sub( xtract, extract_l( ps->l_ts ) );
             }
         }
-        else if ( maxScaling != 0U && s_end > maxScaling + 1 )
+        ELSE IF( maxScaling != 0U && GT_16( s_end, add( extract_l( maxScaling ), 1 ) ) )
         {
             xtract = maxScaling;
+            move16();
         }
-        else
+        ELSE
         {
             /* set to last valid element (i.e. element[len - 1] but note for stereo last element is last pair of samples) */
-            xtract = s_end - ps->num_channels;
+            xtract = sub( s_end, extract_l( ps->num_channels ) );
         }
     }
-    else
+    ELSE
     {
         /* find synch */
-#ifdef IVAS_FLOAT_FIXED
-        Word16 energyQ8;
-        Word32 qualityQ16 = 0;
-        IF( ps->evs_compat_mode == false )
-        ps->signalScaleForCorrelation += 1;
         scaleSignal16( frm_in_fx, ps->frmInScaled, l_frm, ps->signalScaleForCorrelation );
-        findSynchResult = find_synch_fx( ps, ps->frmInScaled, l_frm, s_start, (uint16_t) ( s_end - s_start ), 0, l_seg, 0, &energyQ8, &qualityQ16, &xtract );
-        // energy = fixedToFloat( energyQ8, 8 );
-        quality = fixedToFloat( qualityQ16, 16 );
-        IF( ps->evs_compat_mode == false )
-        ps->signalScaleForCorrelation -= 1;
-#else
-        findSynchResult = find_synch( ps, frm_in, l_frm, s_start, (uint16_t) ( s_end - s_start ), 0, l_seg, 0, &energy, &quality, &xtract );
-#endif
+        findSynchResult = find_synch_fx( ps, ps->frmInScaled, l_frm, s_start, (UWord16) ( s_end - s_start ), 0, l_seg, 0, &energy_fx, &quality_fx, &xtract );
     }
 
     /* assert synch_pos is cleanly divisible by number of channels */
@@ -2020,42 +2221,50 @@ static bool shrink_frm(
 
     /* set frame overlappable - reset if necessary */
     over = 1;
+    move16();
 
     /* test whether frame has sufficient quality */
-    if ( quality < ( ps->targetQuality - ( ps->bad_frame_count * 0.1f ) + ( ps->good_frame_count * 0.2f ) ) )
+    IF( LT_32( quality_fx, L_add( L_sub( ps->targetQuality_fx,
+                                         L_mult0( ps->bad_frame_count, 6554 ) ),
+                                  L_mult0( ps->good_frame_count, 13107 ) ) ) )
     {
         /* not sufficient */
         over = 0;
-        if ( ps->bad_frame_count < ps->qualityred )
+        move16();
+        IF( LT_32( ps->bad_frame_count, ps->qualityred ) )
         {
-            ++ps->bad_frame_count;
+            ps->bad_frame_count = u_extract_l( UL_addNsD( ps->bad_frame_count, 1 ) );
+            move16();
         }
-        if ( ps->good_frame_count > 0U )
+        IF( GT_32( ps->good_frame_count, 0 ) )
         {
-            --ps->good_frame_count;
+            ps->good_frame_count = u_extract_l( UL_subNsD( ps->good_frame_count, 1 ) );
+            move16();
         }
     }
-    else
+    ELSE
     {
         /* sufficient quality */
-        if ( ps->bad_frame_count > 0U )
+        IF( GT_32( ps->bad_frame_count, 0 ) )
         {
-            --ps->bad_frame_count;
+            ps->bad_frame_count = u_extract_l( UL_subNsD( ps->bad_frame_count, 1 ) );
+            move16();
         }
-        if ( ps->good_frame_count < ps->qualityrise )
+        IF( LT_32( ps->good_frame_count, ps->qualityrise ) )
         {
-            ++ps->good_frame_count;
+            ps->good_frame_count = u_extract_l( UL_addNsD( ps->good_frame_count, 1 ) );
+            move16();
         }
     }
 
     /* Calculate output data */
-    if ( over && xtract )
+    test();
+    IF( over && xtract )
     {
-        if ( findSynchResult == 1 )
+        IF( findSynchResult == 1 )
         {
             return 1;
         }
-        Word16 frm_out_fx[960 * 8];
         IF( ps->evs_compat_mode == true )
         {
             // overlapAddEvs_fx( frm_in_fx, frm_in_fx + xtract, frm_out_fx, l_seg, ps->num_channels, ps->win_fx + ps->l_halfwin_fx, ps->win_fx );
@@ -2065,27 +2274,29 @@ static bool shrink_frm(
         {
             overlapAdd( frm_in_fx, frm_in_fx + xtract, frm_out_fx, l_seg, ps->num_channels, ps->win_fx + ps->l_halfwin, ps->win_fx, ps->win_incrementor );
         }
-        for ( i = 0; i < l_seg; i++ )
-            frm_out[i] = (float) frm_out_fx[i];
     }
-    else
+    ELSE
     {
         xtract = 0;
-        for ( i = 0; i < l_seg; i++ )
+        move16();
+        FOR( i = 0; i < l_seg; i++ )
         {
-            frm_out[i] = frm_in[i];
+            frm_out_fx[i] = frm_in_fx[i];
+            move16();
         }
     }
 
     /* append remaining samples */
-    l_rem = l_frm - xtract - l_seg;
-    for ( i = 0; i < l_rem; i++ )
+    l_rem = extract_l( L_sub( L_sub( l_frm, xtract ), l_seg ) );
+    FOR( i = 0; i < l_rem; i++ )
     {
-        frm_out[l_seg + i] = frm_in[l_frm - l_rem + i];
+        frm_out_fx[l_seg + i] = frm_in_fx[l_frm - l_rem + i];
+        move16();
     }
 
     /* set output length */
-    *l_frm_out = l_seg + l_rem;
+    *l_frm_out = u_extract_l( UL_addNsD( l_seg, l_rem ) );
+    move16();
 
     return 0;
 }
@@ -2230,6 +2441,8 @@ static bool shrink_frm(
     return 0;
 }
 #endif
+
+
 /*
 ********************************************************************************
 *
@@ -2251,38 +2464,39 @@ static bool shrink_frm(
 ********************************************************************************
 */
 #ifdef IVAS_FLOAT_FIXED
-static bool extend_frm(
+static bool extend_frm_fx(
     apa_state_t *ps,
-    const float frm_in[],
-    float frm_out[],
-    uint16_t *l_frm_out )
+    const Word16 frm_in_fx[],
+    Word16 frm_out_fx[],
+    UWord16 *l_frm_out )
 {
     bool findSynchResult = 0;
-    uint16_t l_frm_out_target;
-    uint16_t n, i;
-    int16_t N;
-    int16_t s[MAXN + 2], s_max, s_min;
-    int16_t xtract[MAXN + 2], sync_start, s_end;
-    uint16_t over[MAXN + 2];
-    int16_t l_rem;
-    int16_t s_start = 0;
-    float quality = 0.0f;
-    uint16_t l_frm, l_seg;
-    const float *fadeOut, *fadeIn;
-    float *out;
-
+    UWord16 l_frm_out_target;
+    UWord16 n, i;
+    Word16 N;
+    Word16 s[MAXN + 2], s_max, s_min;
+    Word16 xtract[MAXN + 2], sync_start, s_end;
+    UWord16 over[MAXN + 2];
+    Word16 l_rem;
+    Word16 s_start = 0;
+    Word16 energy_fx;
+    Word32 quality_fx = 0;
+    UWord16 l_frm, l_seg;
+    const Word16 *fadeOut_fx, *fadeIn_fx;
+    Word16 *out_fx;
 
     l_frm = ps->l_frm;
     l_seg = ps->l_seg;
 
     /* number of segments/iterations */
-    l_frm_out_target = (uint16_t) ( (float) l_frm * 1.5f );
+    l_frm_out_target = (UWord16) ( L_add( l_frm, L_shr( l_frm, 1 ) ) );
     N = ( l_frm_out_target / l_seg ) - 1;
-    if ( N < 1 )
+    if ( LT_16( N, 1 ) )
     {
         N = 1;
+        move16();
     }
-    if ( N > MAXN )
+    IF( GT_16( N, MAXN ) )
     {
         return 1;
     }
@@ -2308,9 +2522,9 @@ static bool extend_frm(
     }
     /* else, spread linear in between s_min and s_max */
     /* (including s_min and s_max) */
-    else
+    ELSE
     {
-        for ( n = 2; n <= ( N + 1 ); n++ )
+        FOR( n = 2; n <= ( N + 1 ); n++ )
         {
             s[n] = s_min + ( ( s_max - s_min ) * ( n - 2 ) ) / ( N - 1 );
         }
@@ -2328,64 +2542,41 @@ static bool extend_frm(
     over[n] = 1; /* will be reset if overlap is not required */
     /* check end of search region: should be at least p_min */
     /* samples on the left of synch_start */
-    if ( ( s[n] + ps->l_search ) < ( sync_start - ( ps->p_min ) ) )
+    IF( ( s[n] + ps->l_search ) < ( sync_start - ( ps->p_min ) ) )
     {
         s_start = s[n];
         s_end = s_start + ps->l_search;
     }
-    else
+    ELSE
     {
         /* shrink search region to enforce minimum shift */
         s_end = sync_start - ( ps->p_min );
-        if ( s[n] + ps->l_search < sync_start )
+        IF( s[n] + ps->l_search < sync_start )
         {
             s_start = s[n]; /* just do it with normal start position */
         }
-        else if ( n == ( N + 1 ) ) /* move search region left for last segment */
+        ELSE IF( n == ( N + 1 ) ) /* move search region left for last segment */
         {
             s_start = s_end - ( ps->l_search - ps->p_min );
         }
-        else
+        ELSE
         {
             over[n] = 0; /* don't search/overlap (just copy down) */
         }
     }
-#ifdef IVAS_FLOAT_FIXED
-    Word16 frm_in_fx[1920 * 2 * 2 * 2];
-    Word16 max_flag = 0;
-    for ( i = 0; i < 2 * l_frm; i++ )
-    {
-        if ( frm_in[i] > 32767 )
-        {
-            max_flag = 1;
-            frm_in_fx[i] = 32767;
-        }
-        else if ( frm_in[i] < -32767 )
-        {
-            max_flag = 1;
-            frm_in_fx[i] = -32767;
-        }
-        else
-        {
-            frm_in_fx[i] = (Word16) frm_in[i];
-        }
-    }
-#endif
 
-    if ( over[n] )
+    IF( over[n] )
     {
         /* calculate overlap position */
-#ifdef IVAS_FLOAT_FIXED
-        if ( isSilence_fx( frm_in_fx, l_seg, 10 ) )
-#else
-        if ( isSilence( frm_in, l_seg, 10 ) )
-#endif
+        IF( isSilence_fx( frm_in_fx, l_seg, 10 ) )
         {
             /* maximum scaling */
-            // energy = -65;
-            quality = 5;
+            energy_fx = -65 * ( 1 << 8 );
+            move16();
+            quality_fx = 5 << 16;
+            move32();
             xtract[n] = s_start + ps->num_channels;
-            if ( ps->evs_compat_mode == false )
+            IF( ps->evs_compat_mode == false )
             {
                 /* take renderer buffer samples into accout */
                 xtract[n] += ps->l_r_buf;
@@ -2393,86 +2584,73 @@ static bool extend_frm(
                 xtract[n] -= ( ( N - 1 ) * l_seg - xtract[n] + ps->l_r_buf ) % ps->l_ts;
             }
         }
-        else
+        ELSE
         {
-#ifdef IVAS_FLOAT_FIXED
-            Word16 energyQ8 = 0, *frmInScaled;
-            Word32 qualityQ16 = 0;
+            Word16 *frmInScaled;
             frmInScaled = ps->frmInScaled;
-            if ( max_flag )
-            {
-                ps->signalScaleForCorrelation += 1;
-            }
             assert( sizeof( ps->frmInScaled ) / sizeof( ps->frmInScaled[0] ) >= 2 * (size_t) l_frm );
             scaleSignal16( frm_in_fx, frmInScaled, shl( l_frm, 1 ), ps->signalScaleForCorrelation );
-            findSynchResult = find_synch_fx( ps, frmInScaled, 2 * l_frm, s_start, s_end - s_start, sync_start, l_seg, l_frm, &energyQ8, &qualityQ16, &xtract[n] );
-            // energy = fixedToFloat( energyQ8, 8 );
-            quality = fixedToFloat( qualityQ16, 16 );
-            if ( max_flag )
-            {
-                ps->signalScaleForCorrelation -= 1;
-            }
-#else
-            /* find synch */
-            findSynchResult = find_synch( ps, frm_in, 2 * l_frm, s_start, s_end - s_start, sync_start, l_seg, l_frm, &energy, &quality, &xtract[n] );
-#endif
+            findSynchResult = find_synch_fx( ps, frmInScaled, 2 * l_frm, s_start, s_end - s_start, sync_start, l_seg, l_frm, &energy_fx, &quality_fx, &xtract[n] );
         }
         /* assert synch_pos is cleanly divisible by number of channels */
         assert( xtract[n] % ps->num_channels == 0 );
 
         /* test for sufficient quality */
-        if ( quality < ( ps->targetQuality - ( ps->bad_frame_count * 0.1f ) + ( ps->good_frame_count * 0.2f ) ) )
+        IF( LT_32( quality_fx, L_add( L_sub( ps->targetQuality_fx,
+                                             L_mult0( ps->bad_frame_count, 6554 ) ),
+                                      L_mult0( ps->good_frame_count, 13107 ) ) ) )
         {
             /* not sufficient */
             over[n] = 0;
+            move16();
             xtract[n] = sync_start;
-            if ( ps->bad_frame_count < ps->qualityred )
+            move16();
+            IF( LT_32( ps->bad_frame_count, ps->qualityred ) )
             {
-                ++ps->bad_frame_count;
+                ps->bad_frame_count = u_extract_l( UL_addNsD( ps->bad_frame_count, 1 ) );
+                move16();
             }
-            if ( ps->good_frame_count > 0U )
+            IF( GT_32( ps->good_frame_count, 0 ) )
             {
-                --ps->good_frame_count;
+                ps->good_frame_count = u_extract_l( UL_subNsD( ps->good_frame_count, 1 ) );
+                move16();
             }
         }
-        else
+        ELSE
         {
             /* sufficient quality */
-            if ( ps->bad_frame_count > 0U )
+            IF( GT_32( ps->bad_frame_count, 0 ) )
             {
-                --ps->bad_frame_count;
+                ps->bad_frame_count = u_extract_l( UL_subNsD( ps->bad_frame_count, 1 ) );
+                move16();
             }
-            if ( ps->good_frame_count < ps->qualityrise )
+            IF( LT_32( ps->good_frame_count, ps->qualityrise ) )
             {
-                ++ps->good_frame_count;
+                ps->good_frame_count = u_extract_l( UL_addNsD( ps->good_frame_count, 1 ) );
+                move16();
             }
         }
-        if ( findSynchResult )
+        IF( findSynchResult )
         {
             return 1;
         }
     }
-    else
+    ELSE
     {
         xtract[n] = sync_start;
+        move16();
     }
 
 
     /* Calculate output data */
-    for ( n = 2; n <= N; n++ )
+    FOR( n = 2; n <= N; n++ )
     {
-        if ( over[n] && xtract[n - 1] + l_seg != xtract[n] )
+        IF( over[n] && xtract[n - 1] + l_seg != xtract[n] )
         {
             /* mix 2nd half of previous segment with 1st half of current segment */
-            fadeOut = frm_in + l_frm + xtract[n - 1] + l_seg;
-            fadeIn = frm_in + l_frm + xtract[n];
-            out = frm_out + ( n - 2 ) * l_seg;
-            Word16 fadeOut_fx[960 * 8], fadeIn_fx[960 * 8], out_fx[960 * 8];
-            for ( i = 0; i < l_seg; i++ )
-            {
-                fadeOut_fx[i] = (Word16) fadeOut[i];
-                fadeIn_fx[i] = (Word16) fadeIn[i];
-            }
+            fadeOut_fx = frm_in_fx + l_frm + xtract[n - 1] + l_seg;
+            fadeIn_fx = frm_in_fx + l_frm + xtract[n];
+            out_fx = frm_out_fx + ( n - 2 ) * l_seg;
             IF( ps->evs_compat_mode == true )
             {
                 // overlapAddEvs_fx( fadeOut_fx, fadeIn_fx, out_fx, l_seg, ps->num_channels, ps->win_fx + ps->l_halfwin_fx, ps->win_fx );
@@ -2482,28 +2660,28 @@ static bool extend_frm(
             {
                 overlapAdd( fadeOut_fx, fadeIn_fx, out_fx, l_seg, ps->num_channels, ps->win_fx + ps->l_halfwin, ps->win_fx, ps->win_incrementor );
             }
-            for ( i = 0; i < l_seg; i++ )
-                out[i] = (float) out_fx[i];
         }
-        else
+        ELSE
         {
             /* just copy down 1st half of current segment (= 2nd half of previous segment) */
-            float *frm_out_ptr;
-            const float *frm_in_ptr;
-            frm_out_ptr = &( frm_out[( n - 2 ) * l_seg] );
-            frm_in_ptr = &( frm_in[l_frm + xtract[n]] );
-            for ( i = 0; i < l_seg; i++ )
+            Word16 *frm_out_ptr;
+            const Word16 *frm_in_ptr;
+            frm_out_ptr = &( frm_out_fx[( n - 2 ) * l_seg] );
+            frm_in_ptr = &( frm_in_fx[l_frm + xtract[n]] );
+            FOR( i = 0; i < l_seg; i++ )
             {
                 frm_out_ptr[i] = frm_in_ptr[i];
+                move16();
             }
         }
     }
 
     /* append remaining samples */
     l_rem = l_frm - ( xtract[N] + l_seg );
-    for ( i = 0; i < l_rem; i++ )
+    FOR( i = 0; i < l_rem; i++ )
     {
-        frm_out[( N - 1 ) * l_seg + i] = frm_in[2 * l_frm - l_rem + i];
+        frm_out_fx[( N - 1 ) * l_seg + i] = frm_in_fx[2 * l_frm - l_rem + i];
+        move16();
     }
 
     /* set output length */
@@ -2511,7 +2689,6 @@ static bool extend_frm(
 
     return 0;
 }
-
 #else
 static bool extend_frm(
     apa_state_t *ps,
diff --git a/lib_dec/jbm_pcmdsp_apa.h b/lib_dec/jbm_pcmdsp_apa.h
index cb3a2fc5b..d521d6c2b 100644
--- a/lib_dec/jbm_pcmdsp_apa.h
+++ b/lib_dec/jbm_pcmdsp_apa.h
@@ -113,7 +113,7 @@ bool apa_set_rate( apa_state_t *ps, const int32_t output_Fs );
  *  Must be in range [APA_MIN_SCALE,APA_MAX_SCALE].
  *  @return 0 on success, 1 on failure */
 #ifdef IVAS_FLOAT_FIXED
-bool apa_set_scale( apa_state_t *s, UWord16 scale );
+bool apa_set_scale_fx( apa_state_t *s, UWord16 scale );
 #else
 bool apa_set_scale( apa_state_t *s, uint16_t scale );
 #endif
@@ -145,5 +145,6 @@ bool apa_set_quality( apa_state_t *s, float quality, uint16_t qualityred, uint16
 bool apa_exit( apa_state_t **s );
 
 uint8_t apa_exec( apa_state_t *s, const float a_in[], uint16_t l_in, uint16_t maxScaling, float a_out[], uint16_t *l_out );
+uint8_t apa_exec_fx( apa_state_t *s, const Word16 a_in[], uint16_t l_in, uint16_t maxScaling, Word16 a_out[], uint16_t *l_out );
 
 #endif /* JBM_PCMDSP_APA_H */
diff --git a/lib_dec/lib_dec_fx.c b/lib_dec/lib_dec_fx.c
index 4dec71a19..ff086ad35 100644
--- a/lib_dec/lib_dec_fx.c
+++ b/lib_dec/lib_dec_fx.c
@@ -80,7 +80,6 @@ struct IVAS_DEC
 #ifndef IVAS_FLOAT_FIXED
     float *apaExecBuffer; /* Buffer for APA scaling */
     float tsm_quality;
-
 #else
     Word32 *apaExecBuffer_fx; /* Buffer for APA scaling */
     Word16 tsm_quality;       /*Q14*/
@@ -1079,33 +1078,34 @@ ivas_error IVAS_DEC_GetSamples(
             return error;
         }
 
-#if 1 // apa_exec
         /* JBM */
         IF( hIvasDec->st_ivas->hDecoderConfig->Opt_tsm )
         {
-            float apaExecBuffer[APA_BUF];
+            IF( apa_set_scale_fx( hIvasDec->hTimeScaler, hIvasDec->tsm_scale ) != 0 )
+            {
+                return IVAS_ERR_UNKNOWN;
+            }
+
+            // tmp apaExecBuffer
+            Word16 tmp_apaExecBuffer[APA_BUF];
+
             for ( int i = 0; i < APA_BUF_PER_CHANNEL * nTransportChannels; ++i )
             {
-                apaExecBuffer[i] = fixedToFloat( hIvasDec->apaExecBuffer_fx[i], Q11 );
+                tmp_apaExecBuffer[i] = extract_l( L_shr( hIvasDec->apaExecBuffer_fx[i], Q12 ) );
             }
-            IF( apa_set_scale( hIvasDec->hTimeScaler, hIvasDec->tsm_scale ) != 0 )
+            IF( apa_exec_fx( hIvasDec->hTimeScaler, tmp_apaExecBuffer, (UWord16) imult3216( hIvasDec->nSamplesFrame, nTransportChannels ), (UWord16) hIvasDec->tsm_max_scaling, tmp_apaExecBuffer, &nTimeScalerOutSamples ) != 0 )
             {
                 return IVAS_ERR_UNKNOWN;
             }
 
-            IF( apa_exec( hIvasDec->hTimeScaler, apaExecBuffer, (UWord16) imult3216( hIvasDec->nSamplesFrame, nTransportChannels ), (UWord16) hIvasDec->tsm_max_scaling, apaExecBuffer, &nTimeScalerOutSamples ) != 0 )
+            for ( int i = 0; i < APA_BUF_PER_CHANNEL * nTransportChannels; ++i )
             {
-                return IVAS_ERR_UNKNOWN;
+                hIvasDec->apaExecBuffer_fx[i] = L_shl( tmp_apaExecBuffer[i], Q12 );
             }
 
             assert( LE_32( (Word32) nTimeScalerOutSamples, APA_BUF ) );
             nSamplesTcsScaled = idiv1616( extract_l( nTimeScalerOutSamples ), nTransportChannels );
-            for ( int i = 0; i < APA_BUF_PER_CHANNEL * nTransportChannels; ++i )
-            {
-                hIvasDec->apaExecBuffer_fx[i] = float_to_fix( apaExecBuffer[i], Q11 );
-            }
         }
-#endif
         ELSE
         {
             nSamplesTcsScaled = hIvasDec->nSamplesFrame;
@@ -1354,6 +1354,7 @@ static ivas_error IVAS_DEC_GetTcSamples(
     Decoder_Struct *st_ivas;
     ivas_error error;
 
+
     test();
     IF( hIvasDec == NULL || hIvasDec->st_ivas == NULL )
     {
@@ -1419,14 +1420,12 @@ static ivas_error IVAS_DEC_GetTcSamples(
             }
         }
 
-
         /* Function call: ivas_jbm_dec_tc function */
         IF( NE_32( ( error = ivas_jbm_dec_tc_fx( st_ivas, pcmBuf_fx ) ), IVAS_ERR_OK ) )
         {
             return error;
         }
 
-
         hIvasDec->isInitialized = true; /* Initialization done in ivas_dec() */
 
         test();
@@ -3709,12 +3708,21 @@ static ivas_error IVAS_DEC_VoIP_reconfigure(
             }
             apa_buffer_size = APA_BUF_PER_CHANNEL;
             move16();
+#ifndef IVAS_FLOAT_FIXED
+            free( hIvasDec->apaExecBuffer );
+            IF( ( hIvasDec->apaExecBuffer = malloc( sizeof( float ) * apa_buffer_size * nTransportChannels ) ) == NULL )
+            {
+                return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Could not allocate VoIP handle" );
+            }
+            set_zero( hIvasDec->apaExecBuffer, apa_buffer_size * nTransportChannels );
+#else
             free( hIvasDec->apaExecBuffer_fx );
             IF( ( hIvasDec->apaExecBuffer_fx = malloc( sizeof( Word32 ) * imult1616( apa_buffer_size, (Word16) nTransportChannels ) ) ) == NULL )
             {
                 return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Could not allocate VoIP handle" );
             }
             set_zero_fx( hIvasDec->apaExecBuffer_fx, imult1616( apa_buffer_size, (Word16) nTransportChannels ) );
+#endif
         }
         /* realloc apa_exe_buffer */
     }
diff --git a/lib_dec/stat_dec.h b/lib_dec/stat_dec.h
index 6ba95bf76..cc055680b 100644
--- a/lib_dec/stat_dec.h
+++ b/lib_dec/stat_dec.h
@@ -1269,7 +1269,6 @@ typedef struct hq_dec_structure
 #endif
     Word32 oldOut_fx[L_FRAME48k];  /* HQ core - previous synthesis for OLA */
     Word16 old_out_fx[L_FRAME48k]; /* HQ core - previous synthesis for OLA */
-    Word16 Q_old_out;
 #ifdef IVAS_FLOAT_FIXED
     Word16 exp_old_out;
 #endif
@@ -1278,7 +1277,6 @@ typedef struct hq_dec_structure
     float old_outLB[L_FRAME32k];
 #endif
     Word16 old_out_LB_fx[L_FRAME32k]; /* HQ core - previous synthesis for OLA for Low Band */
-    Word16 Q_old_outLB;
     Word32 old_outLB_fx[L_FRAME32k];
     Word16 q_old_outLB_fx;
 
-- 
GitLab


From 1f6f3d5b00b5bde903819c5f6e3fb807add3f0be Mon Sep 17 00:00:00 2001
From: Sandesh Venkatesh <sandesh.venkatesh@ittiam.com>
Date: Thu, 29 Aug 2024 20:59:05 +0530
Subject: [PATCH 2/2] Fix for EVS JBM streams bitexactness issue

---
 lib_dec/jbm_pcmdsp_apa.c | 206 +++++++++++++++++++++++++++++++++++++++
 lib_dec/jbm_pcmdsp_apa.h |   1 +
 lib_dec/lib_dec_fx.c     |  36 +++++--
 3 files changed, 233 insertions(+), 10 deletions(-)

diff --git a/lib_dec/jbm_pcmdsp_apa.c b/lib_dec/jbm_pcmdsp_apa.c
index d6098aac0..c05aa34d7 100644
--- a/lib_dec/jbm_pcmdsp_apa.c
+++ b/lib_dec/jbm_pcmdsp_apa.c
@@ -1078,6 +1078,211 @@ UWord8 apa_exec_fx(
         return 3;
     }
 
+    /* get target length */
+    test();
+    test();
+    IF( EQ_16( ps->l_frm, 480 ) || EQ_16( ps->l_frm, 960 ) || EQ_16( ps->l_frm, 1920 ) )
+    {
+        /* decomposite ps->l_frm into 15<<i, e.g. 480=15<<5 */
+        i = sub( 15 - 4, norm_s( ps->l_frm ) );
+        /* this only works for 20ms framing */
+        assert( ps->l_frm == shl( shr( ps->l_frm, i ), i ) );
+        // assert( i_mult2( sub( ps->scale, 100 ), add( ps->nFramesSinceSetScale, 1 ) ) == ( ps->scale - 100 ) * ( ps->nFramesSinceSetScale + 1 ) );
+        expScaling = L_shr_r( L_mult0( i_mult2( sub( ps->scale, 100 ), add( (Word16) ps->nFramesSinceSetScale, 1 ) ), 19661 /*15*(1<<2)/100.0 Q15*/ ), sub( 15 + 2, i ) );
+    }
+    ELSE
+    {
+        /* decomposite ps->l_frm into 5<<i, e.g. 320=5<<6 */
+        i = sub( 15 - 3, norm_s( ps->l_frm ) );
+        /* this only works for 20ms framing */
+        assert( ps->l_frm == shl( shr( ps->l_frm, i ), i ) );
+        // assert( i_mult2( sub( ps->scale, 100 ), add( ps->nFramesSinceSetScale, 1 ) ) == ( ps->scale - 100 ) * ( ps->nFramesSinceSetScale + 1 ) );
+        expScaling = L_shr_r( L_mult0( i_mult2( sub( ps->scale, 100 ), add( (Word16) ps->nFramesSinceSetScale, 1 ) ), 13107 /*5*(1<<3)/100.0 Q15*/ ), sub( 15 + 3, i ) );
+    }
+    actScaling = L_sub( ps->diffSinceSetScale, ps->l_frm );
+    l_frm_out_target = L_sub( expScaling, actScaling );
+
+    /* Wait until we have l_frm outputs samples */
+    /* (required to search for correlation in the past). */
+    /* If we don't have enough samples, simply copy input to output */
+    IF( LT_32( ps->l_buf_out, ps->l_frm ) )
+    {
+        FOR( i = 0; i < ps->l_frm; i++ )
+        {
+            a_out[i] = a_in[i];
+            move16();
+        }
+        l_frm_out = ps->l_frm;
+        move16();
+    }
+    ELSE
+    {
+        Word16 *buf_out_ptr = &( ps->buf_out_fx[ps->l_buf_out - ps->l_frm] );
+        Word16 *frm_in_ptr = &( frm_in[ps->l_frm] );
+
+        /* fill input frame */
+        /* 1st input frame: previous output samples */
+        FOR( i = 0; i < ps->l_frm; i++ )
+        {
+            frm_in[i] = buf_out_ptr[i];
+            move16();
+        }
+        /* 2nd input frame: new input samples */
+        FOR( i = 0; i < ps->l_frm; i++ )
+        {
+            frm_in_ptr[i] = a_in[i];
+            move16();
+        }
+        /* no scaling */
+        IF( EQ_32( ps->scale, 100 ) )
+        {
+            copy_frm_fx( ps, frm_in, a_out, &l_frm_out );
+        }
+        /* shrink */
+        ELSE IF( LT_32( ps->scale, 100 ) )
+        {
+            shrink_frm_fx( ps, frm_in, maxScaling, a_out, &l_frm_out );
+        }
+        /* extend */
+        ELSE
+        {
+            extend_frm_fx( ps, frm_in, a_out, &l_frm_out );
+        }
+        /* control the amount/frequency of scaling */
+        IF( NE_32( l_frm_out, ps->l_frm ) )
+        {
+            test();
+            IF( NE_32( maxScaling, 0 ) &&
+                GT_32( abs_s( extract_l( L_sub( ps->l_frm, l_frm_out ) ) ), maxScaling ) )
+            {
+                /* maxScaling exceeded -> discard scaled frame */
+                copy_frm_fx( ps, frm_in, a_out, &l_frm_out );
+            }
+            ELSE IF( GT_32( L_abs( l_frm_out_target ), ps->l_frm ) ) /* ignore small difference */
+            {
+                dl_copied = L_sub( l_frm_out_target, ps->l_frm );
+                dl_scaled = L_sub( l_frm_out_target, l_frm_out );
+                /* discard scaled frame if copied frame is closer to target length */
+                IF( LT_32( L_abs( dl_copied ), L_abs( dl_scaled ) ) )
+                {
+                    copy_frm_fx( ps, frm_in, a_out, &l_frm_out );
+                }
+            }
+        }
+    }
+
+    /* copy output to internal buffer */
+    /* avoid buffer overflow: */
+    /* discard old samples; always keep at least most recent l_frm samples */
+    IF( GT_32( L_add( ps->l_buf_out, l_frm_out ), ps->buf_out_capacity ) )
+    {
+        Word16 *buf_out_ptr1 = ps->buf_out_fx;
+        Word16 *buf_out_ptr2;
+
+        l_rem = extract_l( L_sub( ps->l_frm, l_frm_out ) );
+        if ( l_rem < 0 )
+        {
+            l_rem = 0;
+            move16();
+        }
+        buf_out_ptr2 = &( ps->buf_out_fx[ps->l_buf_out - l_rem] );
+        FOR( i = 0; i < l_rem; i++ )
+        {
+            buf_out_ptr1[i] = buf_out_ptr2[i];
+            move16();
+        }
+        ps->l_buf_out = l_rem;
+        move16();
+    }
+    /* append new output samples */
+    IF( GT_32( L_add( ps->l_buf_out, l_frm_out ), ps->buf_out_capacity ) )
+    {
+        return 5;
+    }
+    {
+        Word16 *buf_out_ptr = &( ps->buf_out_fx[ps->l_buf_out] );
+        FOR( i = 0; i < l_frm_out; i++ )
+        {
+            buf_out_ptr[i] = a_out[i];
+            move16();
+        }
+    }
+    ps->l_buf_out = (UWord16) L_add( ps->l_buf_out, l_frm_out );
+    move16();
+
+    *l_out = l_frm_out;
+    move16();
+    /* update time */
+    ps->l_in_total = UL_addNsD( ps->l_in_total, ps->l_frm );
+    move32();
+
+    test();
+    IF( LT_32( L_abs( ps->diffSinceSetScale ), L_sub( 0x7FFFFF, L_sub( l_frm_out, ps->l_frm ) ) ) &&
+        LT_64( ps->nFramesSinceSetScale, statsResetThreshold ) )
+    {
+        ps->diffSinceSetScale = L_add( ps->diffSinceSetScale, L_sub( l_frm_out, ps->l_frm ) );
+        move32();
+        ps->nFramesSinceSetScale = UL_addNsD( ps->nFramesSinceSetScale, 1 );
+        move32();
+    }
+    ELSE /* scale statistics down to avoid overflow */
+    {
+        ps->diffSinceSetScale = L_shr( ps->diffSinceSetScale, (Word16) statsResetShift );
+        move32();
+        ps->nFramesSinceSetScale = UL_lshr( ps->nFramesSinceSetScale, (Word16) statsResetShift );
+        move32();
+    }
+
+    return 0;
+}
+
+UWord8 apa_exec_ivas_fx(
+    apa_state_t *ps,     /* i/o: state struct                                  */
+    const Word16 a_in[], /* i  : input samples                                 */
+    UWord16 l_in,        /* i  : number of input samples                       */
+    UWord16 maxScaling,  /* i  : allowed number of inserted/removed samples    */
+    Word16 a_out[],      /* o  : output samples                                */
+    UWord16 *l_out       /* o  : number of output samples                      */
+)
+{
+    UWord16 i;
+    Word16 frm_in[APA_BUF]; /* TODO(mcjbm): this buffer could be smaller - always allocates space for 16 channels */
+    UWord16 l_frm_out;
+    Word16 l_rem;
+    Word32 dl_scaled, dl_copied, l_frm_out_target;
+    Word32 expScaling, actScaling;
+    UWord32 statsResetThreshold, statsResetShift;
+
+    statsResetThreshold = 1637;
+    move32();
+    statsResetShift = 2;
+    move32();
+
+    /* Convert max_scaling from "per channel" to total */
+    maxScaling = (UWord16) imult3216( maxScaling, ps->num_channels );
+
+    /* make sure no invalid output is used */
+    *l_out = 0;
+    move16();
+    l_frm_out = 0;
+    move16();
+
+    /* make sure pointer is valid */
+    IF( ps == NULL )
+    {
+        return 1;
+    }
+    /* check available rate */
+    IF( ps->rate == 0 )
+    {
+        return 2;
+    }
+    /* check size of input */
+    IF( NE_32( l_in, ps->l_frm ) )
+    {
+        return 3;
+    }
+
     /* get target length */
     test();
     test();
@@ -1231,6 +1436,7 @@ UWord8 apa_exec_fx(
 
     return 0;
 }
+
 #else
 uint8_t apa_exec(
     apa_state_t *ps,     /* i/o: state struct                                  */
diff --git a/lib_dec/jbm_pcmdsp_apa.h b/lib_dec/jbm_pcmdsp_apa.h
index d521d6c2b..d0de5ea15 100644
--- a/lib_dec/jbm_pcmdsp_apa.h
+++ b/lib_dec/jbm_pcmdsp_apa.h
@@ -145,6 +145,7 @@ bool apa_set_quality( apa_state_t *s, float quality, uint16_t qualityred, uint16
 bool apa_exit( apa_state_t **s );
 
 uint8_t apa_exec( apa_state_t *s, const float a_in[], uint16_t l_in, uint16_t maxScaling, float a_out[], uint16_t *l_out );
+uint8_t apa_exec_ivas_fx( apa_state_t *s, const Word16 a_in[], uint16_t l_in, uint16_t maxScaling, Word16 a_out[], uint16_t *l_out );
 uint8_t apa_exec_fx( apa_state_t *s, const Word16 a_in[], uint16_t l_in, uint16_t maxScaling, Word16 a_out[], uint16_t *l_out );
 
 #endif /* JBM_PCMDSP_APA_H */
diff --git a/lib_dec/lib_dec_fx.c b/lib_dec/lib_dec_fx.c
index ff086ad35..018771d4a 100644
--- a/lib_dec/lib_dec_fx.c
+++ b/lib_dec/lib_dec_fx.c
@@ -1088,21 +1088,37 @@ ivas_error IVAS_DEC_GetSamples(
 
             // tmp apaExecBuffer
             Word16 tmp_apaExecBuffer[APA_BUF];
-
-            for ( int i = 0; i < APA_BUF_PER_CHANNEL * nTransportChannels; ++i )
+            IF( EQ_16( (Word16) hIvasDec->mode, IVAS_DEC_MODE_EVS ) )
             {
-                tmp_apaExecBuffer[i] = extract_l( L_shr( hIvasDec->apaExecBuffer_fx[i], Q12 ) );
+                for ( int i = 0; i < APA_BUF_PER_CHANNEL * nTransportChannels; ++i )
+                {
+                    tmp_apaExecBuffer[i] = extract_l( L_shr( hIvasDec->apaExecBuffer_fx[i], Q11 ) );
+                }
+                IF( apa_exec_fx( hIvasDec->hTimeScaler, tmp_apaExecBuffer, (UWord16) imult3216( hIvasDec->nSamplesFrame, nTransportChannels ), (UWord16) hIvasDec->tsm_max_scaling, tmp_apaExecBuffer, &nTimeScalerOutSamples ) != 0 )
+                {
+                    return IVAS_ERR_UNKNOWN;
+                }
+                for ( int i = 0; i < APA_BUF_PER_CHANNEL * nTransportChannels; ++i )
+                {
+                    hIvasDec->apaExecBuffer_fx[i] = L_shl( tmp_apaExecBuffer[i], Q11 );
+                }
             }
-            IF( apa_exec_fx( hIvasDec->hTimeScaler, tmp_apaExecBuffer, (UWord16) imult3216( hIvasDec->nSamplesFrame, nTransportChannels ), (UWord16) hIvasDec->tsm_max_scaling, tmp_apaExecBuffer, &nTimeScalerOutSamples ) != 0 )
+            ELSE
             {
-                return IVAS_ERR_UNKNOWN;
-            }
+                for ( int i = 0; i < APA_BUF_PER_CHANNEL * nTransportChannels; ++i )
+                {
+                    tmp_apaExecBuffer[i] = extract_l( L_shr( hIvasDec->apaExecBuffer_fx[i], Q12 ) );
+                }
+                IF( apa_exec_ivas_fx( hIvasDec->hTimeScaler, tmp_apaExecBuffer, (UWord16) imult3216( hIvasDec->nSamplesFrame, nTransportChannels ), (UWord16) hIvasDec->tsm_max_scaling, tmp_apaExecBuffer, &nTimeScalerOutSamples ) != 0 )
+                {
+                    return IVAS_ERR_UNKNOWN;
+                }
 
-            for ( int i = 0; i < APA_BUF_PER_CHANNEL * nTransportChannels; ++i )
-            {
-                hIvasDec->apaExecBuffer_fx[i] = L_shl( tmp_apaExecBuffer[i], Q12 );
+                for ( int i = 0; i < APA_BUF_PER_CHANNEL * nTransportChannels; ++i )
+                {
+                    hIvasDec->apaExecBuffer_fx[i] = L_shl( tmp_apaExecBuffer[i], Q12 );
+                }
             }
-
             assert( LE_32( (Word32) nTimeScalerOutSamples, APA_BUF ) );
             nSamplesTcsScaled = idiv1616( extract_l( nTimeScalerOutSamples ), nTransportChannels );
         }
-- 
GitLab