From 28c652475434ec6bee1646ff814be438f0853c98 Mon Sep 17 00:00:00 2001 From: vaclav Date: Mon, 25 Aug 2025 13:06:46 +0200 Subject: [PATCH] port CR --- lib_com/cnst.h | 3 + lib_com/options.h | 1 + lib_dec/dec_LPD.c | 8 +++ lib_dec/ivas_core_dec.c | 33 ++++++++++- lib_dec/ivas_cpe_dec.c | 88 +++++++++++++++++++++++++++++ lib_dec/ivas_mdct_core_dec.c | 8 +++ lib_dec/ivas_stereo_mdct_core_dec.c | 8 +++ lib_dec/ivas_tcx_core_dec.c | 8 +++ 8 files changed, 154 insertions(+), 3 deletions(-) diff --git a/lib_com/cnst.h b/lib_com/cnst.h index ce86e4654..585bb4340 100644 --- a/lib_com/cnst.h +++ b/lib_com/cnst.h @@ -526,6 +526,9 @@ enum #define L_FRAME_MAX L_FRAME48k /* Max 20ms frame size @48kHz */ #define L_FRAME_PLUS 1200 /* Max frame size (long TCX frame) */ #define L_MDCT_OVLP_MAX NS2SA( 48000, ACELP_LOOK_NS ) /* = Max mdct overlap */ +#ifdef FIX_1320_STACK_CPE_DECODER +#define L_FRAME_PLUS_INTERNAL 800 /* Max frame size (long TCX frame) at maximum internal sampling rate */ +#endif #define N_TCX10_MAX 480 /* Max size of TCX10 MDCT spectrum */ #define BITS_TEC 1 /* number of bits for TEC */ #define BITS_TFA 1 /* number of bits for TTF */ diff --git a/lib_com/options.h b/lib_com/options.h index d8e6dbc66..14789eea1 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -220,6 +220,7 @@ #define FIX_1369_HQ_LR_OVERFLOW /* FhG: fix BASOP overflow in hq_lr_enc(), brings floating-point code inline with FX */ #define NONBE_1118_EVS_LR_HQ_BITERROR /* VA: issue 1118: fix EVS decoder crash in LR-HQ in case of bit errors */ #define FIX_1139_REV_COLORATION_SHORT_T60 /* Nokia,FhG: Fix issue 1139, prevent sound coloration artefacts at very low reverberation times */ +#define FIX_1320_STACK_CPE_DECODER /* VA: issue 1320: Optimize the stack memory consumption in the CPE decoder */ /* #################### End BASOP porting switches ############################ */ diff --git a/lib_dec/dec_LPD.c b/lib_dec/dec_LPD.c index 89597cae4..bf4aab5e3 100644 --- a/lib_dec/dec_LPD.c +++ b/lib_dec/dec_LPD.c @@ -71,7 +71,11 @@ void decoder_LPD( { int16_t *prm; int16_t param_lpc[NPRM_LPC_NEW]; +#ifdef FIX_1320_STACK_CPE_DECODER + float synth_buf[OLD_SYNTH_INTERNAL_DEC + L_FRAME_PLUS_INTERNAL + M]; +#else float synth_buf[OLD_SYNTH_SIZE_DEC + L_FRAME_PLUS + M]; +#endif float *synth; float synth_bufFB[OLD_SYNTH_SIZE_DEC + L_FRAME_PLUS + M]; float *synthFB; @@ -124,7 +128,11 @@ void decoder_LPD( synthFB = synth_bufFB + st->hTcxDec->old_synth_lenFB; mvr2r( st->hTcxDec->old_synth, synth_buf, st->hTcxDec->old_synth_len ); mvr2r( st->hTcxDec->old_synthFB, synth_bufFB, st->hTcxDec->old_synth_lenFB ); +#ifdef FIX_1320_STACK_CPE_DECODER + set_zero( synth, L_FRAME_PLUS_INTERNAL + M ); +#else set_zero( synth, L_FRAME_PLUS + M ); +#endif set_zero( synthFB, L_FRAME_PLUS + M ); diff --git a/lib_dec/ivas_core_dec.c b/lib_dec/ivas_core_dec.c index 9a63526b9..bd65baa90 100644 --- a/lib_dec/ivas_core_dec.c +++ b/lib_dec/ivas_core_dec.c @@ -74,7 +74,12 @@ ivas_error ivas_core_dec( float synth[CPE_CHANNELS][L_FRAME48k]; float tmp_buffer[L_FRAME48k]; int16_t tmps, incr; +#ifdef FIX_1320_STACK_CPE_DECODER + float *bwe_exc_extended[CPE_CHANNELS] = { NULL, NULL }; + int16_t flag_bwe_bws; +#else float bwe_exc_extended[CPE_CHANNELS][L_FRAME32k + NL_BUFF_OFFSET]; +#endif float voice_factors[CPE_CHANNELS][NB_SUBFR16k]; int16_t core_switching_flag[CPE_CHANNELS]; float old_syn_12k8_16k[CPE_CHANNELS][L_FRAME16k]; @@ -201,6 +206,9 @@ ivas_error ivas_core_dec( set_f( voice_factors[n], 0.f, NB_SUBFR16k ); set_f( hb_synth[n], 0.0f, L_FRAME48k ); +#ifdef FIX_1320_STACK_CPE_DECODER + bwe_exc_extended[n] = hb_synth[n]; /* note: reuse the buffer */ +#endif /*------------------------------------------------------------------* * Decision matrix (selection of technologies) @@ -522,6 +530,10 @@ ivas_error ivas_core_dec( * SWB(FB) BWE decoding *---------------------------------------------------------------------*/ +#ifdef FIX_1320_STACK_CPE_DECODER + flag_bwe_bws = ( output_Fs >= 32000 && st->core == ACELP_CORE && st->bwidth > NB && st->bws_cnt > 0 && st->bfi == 0 ); + +#endif if ( st->extl == SWB_TBE || st->extl == FB_TBE || ( st->coder_type != AUDIO && st->coder_type != INACTIVE && st->core_brate >= SID_2k40 && st->core == ACELP_CORE && !st->con_tcx && output_Fs >= 32000 && st->bwidth > NB && st->bws_cnt > 0 ) ) { /* SWB TBE decoder */ @@ -533,12 +545,24 @@ ivas_error ivas_core_dec( fb_tbe_dec( st, tmp_buffer /*fb_exc*/, hb_synth[n], tmp_buffer /*fb_synth_ref*/, output_frame ); } } +#ifdef FIX_1320_STACK_CPE_DECODER + else if ( st->extl == SWB_BWE || st->extl == FB_BWE || flag_bwe_bws ) +#else else if ( st->extl == SWB_BWE || st->extl == FB_BWE || ( output_Fs >= 32000 && st->core == ACELP_CORE && st->bwidth > NB && st->bws_cnt > 0 && !st->ppp_mode_dec && !( st->nelp_mode_dec == 1 && st->bfi == 1 ) ) ) +#endif { /* SWB BWE decoder */ swb_bwe_dec( st, output[n], synth[n], hb_synth[n], use_cldfb_for_dft, output_frame ); } +#ifdef FIX_1320_STACK_CPE_DECODER + if ( ( st->core == ACELP_CORE && ( st->extl == -1 || st->extl == SWB_CNG ) ) && flag_bwe_bws == 0 ) + { + set_f( hb_synth[n], 0.0f, L_FRAME48k ); + } + +#endif + /*---------------------------------------------------------------------* * FEC - recovery after lost HQ core (smoothing of the BWE component) *---------------------------------------------------------------------*/ @@ -572,6 +596,7 @@ ivas_error ivas_core_dec( stereo_icBWE_dec( hCPE, hb_synth[0], hb_synth[1], tmp_buffer /*fb_synth_ref*/, voice_factors[0], output_frame ); } +#ifndef FIX_1320_STACK_CPE_DECODER if ( st->element_mode == EVS_MONO ) { /*----------------------------------------------------------------* @@ -587,7 +612,7 @@ ivas_error ivas_core_dec( st->hPlcInfo->Pitch = 0; } } - +#endif /*----------------------------------------------------------------* * Transition and synchronization of BWE components *----------------------------------------------------------------*/ @@ -602,6 +627,7 @@ ivas_error ivas_core_dec( } else { +#ifndef FIX_1320_STACK_CPE_DECODER if ( st->extl == SWB_BWE_HIGHRATE || st->extl == FB_BWE_HIGHRATE ) { /* HR SWB BWE on top of ACELP@16kHz */ @@ -609,9 +635,12 @@ ivas_error ivas_core_dec( } else { +#endif /* TBE on top of ACELP@16kHz */ tmps = NS2SA( output_Fs, MAX_DELAY_TBE_NS - DELAY_SWB_TBE_16k_NS ); +#ifndef FIX_1320_STACK_CPE_DECODER } +#endif } /* Smooth transitions when switching between different technologies */ @@ -745,11 +774,9 @@ ivas_error ivas_core_dec( } /* n_channels loop */ - #ifdef DEBUG_MODE_INFO output_debug_mode_info_dec( sts, n_channels, output_frame, pitch_buf ); #endif - pop_wmops(); return error; } diff --git a/lib_dec/ivas_cpe_dec.c b/lib_dec/ivas_cpe_dec.c index 87c480402..3050f8676 100644 --- a/lib_dec/ivas_cpe_dec.c +++ b/lib_dec/ivas_cpe_dec.c @@ -54,6 +54,10 @@ static void read_stereo_mode_and_bwidth( CPE_DEC_HANDLE hCPE, const Decoder_Stru static void stereo_mode_combined_format_dec( const Decoder_Struct *st_ivas, CPE_DEC_HANDLE hCPE ); +#ifdef FIX_1320_STACK_CPE_DECODER +static ivas_error stereo_dft_dec_main( CPE_DEC_HANDLE hCPE, const int32_t ivas_total_brate, const int16_t n_channels, float *p_res_buf, float *output[], float outputHB[][L_FRAME48k], const int16_t output_frame ); +#endif + /*--------------------------------------------------------------------------* * ivas_cpe_dec() @@ -74,7 +78,11 @@ ivas_error ivas_cpe_dec( int16_t last_bwidth; int16_t tdm_ratio_idx; float outputHB[CPE_CHANNELS][L_FRAME48k]; /* 'float' buffer for output HB synthesis, both channels */ +#ifdef FIX_1320_STACK_CPE_DECODER + float *res_buf = NULL; +#else float res_buf[STEREO_DFT_N_8k]; +#endif CPE_DEC_HANDLE hCPE; Decoder_State **sts; int32_t ivas_total_brate; @@ -258,6 +266,11 @@ ivas_error ivas_cpe_dec( } else { + +#ifdef FIX_1320_STACK_CPE_DECODER + res_buf = outputHB[0]; /* note: temporarily reused buffer */ + +#endif if ( st_ivas->ivas_format == MASA_FORMAT || st_ivas->ivas_format == MASA_ISM_FORMAT ) { nb_bits -= nb_bits_metadata; @@ -446,6 +459,12 @@ ivas_error ivas_cpe_dec( if ( hCPE->element_mode == IVAS_CPE_DFT && !( hCPE->nchan_out == 1 && hCPE->hStereoDft->hConfig->res_cod_mode == STEREO_DFT_RES_COD_OFF ) ) { +#ifdef FIX_1320_STACK_CPE_DECODER + if ( ( error = stereo_dft_dec_main( hCPE, ivas_total_brate, n_channels, res_buf, output, outputHB, output_frame ) ) != IVAS_ERR_OK ) + { + return error; + } +#else float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX]; /* core decoder */ @@ -480,6 +499,7 @@ ivas_error ivas_cpe_dec( { stereo_dft_dec_synthesize( hCPE, DFT, n, output[n], output_frame ); } +#endif } else if ( hCPE->element_mode == IVAS_CPE_TD ) { @@ -604,6 +624,74 @@ ivas_error ivas_cpe_dec( return error; } +#ifdef FIX_1320_STACK_CPE_DECODER + +/*------------------------------------------------------------------------- + * stereo_dft_dec_main() + * + * DFT decoder main function + *-------------------------------------------------------------------------*/ + +static ivas_error stereo_dft_dec_main( + CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */ + const int32_t ivas_total_brate, /* i : IVAS total bitrate */ + const int16_t n_channels, /* i : number of channels to be decoded */ + float *p_res_buf, /* i : DFT stereo residual S signal */ + float *output[], /* o : output synthesis signal */ + float outputHB[][L_FRAME48k], /* o : output HB synthesis signal */ + const int16_t output_frame /* i : output frame length per channel */ +) +{ + float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX]; + int16_t n; + Decoder_State *st0; + ivas_error error; + + st0 = hCPE->hCoreCoder[0]; + + /* copy from temporary buffer */ + if ( hCPE->hStereoDft->res_cod_band_max > 0 && !st0->bfi ) + { + mvr2r( p_res_buf, DFT[1], STEREO_DFT_N_8k ); + } + + /* core decoder */ + if ( ( error = ivas_core_dec( NULL, NULL, hCPE, NULL, n_channels, output, outputHB, DFT, 0 ) ) != IVAS_ERR_OK ) + { + return error; + } + + /* DFT Stereo residual decoding */ + if ( hCPE->hStereoDft->res_cod_band_max > 0 && !st0->bfi ) + { + stereo_dft_dec_res( hCPE, DFT[1] /*res_buf*/, output[1] ); + + stereo_dft_dec_analyze( hCPE, output[1], DFT, 1, L_FRAME8k, output_frame, DFT_STEREO_DEC_ANA_LB, 0, 0 ); + } + + /* DFT stereo CNG */ + stereo_dtf_cng( hCPE, ivas_total_brate, DFT, output_frame ); + + /* decoding */ + if ( hCPE->nchan_out == 1 ) + { + stereo_dft_unify_dmx( hCPE->hStereoDft, st0, DFT, hCPE->input_mem[1], hCPE->hStereoCng->prev_sid_nodata ); + } + else + { + stereo_dft_dec( hCPE->hStereoDft, st0, DFT, hCPE->input_mem[1], hCPE->hStereoCng, 0, 0, 0, 0, 0, 0, MAX_PARAM_SPATIAL_SUBFRAMES ); + } + + /* synthesis iFFT */ + for ( n = 0; n < hCPE->nchan_out; n++ ) + { + stereo_dft_dec_synthesize( hCPE, DFT, n, output[n], output_frame ); + } + + return IVAS_ERR_OK; +} + +#endif /*------------------------------------------------------------------------- * create_cpe_dec() diff --git a/lib_dec/ivas_mdct_core_dec.c b/lib_dec/ivas_mdct_core_dec.c index dd0bfac76..29ca975ee 100644 --- a/lib_dec/ivas_mdct_core_dec.c +++ b/lib_dec/ivas_mdct_core_dec.c @@ -804,7 +804,11 @@ void ivas_mdct_core_reconstruct( int16_t L_frame[CPE_CHANNELS], L_frameTCX[CPE_CHANNELS], nSubframes[CPE_CHANNELS]; int16_t L_frame_global[CPE_CHANNELS], L_frame_globalTCX[CPE_CHANNELS]; /* Synth */ +#ifdef FIX_1320_STACK_CPE_DECODER + float synth_buf[OLD_SYNTH_INTERNAL_DEC + L_FRAME_PLUS_INTERNAL + M]; +#else float synth_buf[OLD_SYNTH_SIZE_DEC + L_FRAME_PLUS + M]; +#endif float *synth; float synth_bufFB[OLD_SYNTH_SIZE_DEC + L_FRAME_PLUS + M]; float *synthFB; @@ -841,7 +845,11 @@ void ivas_mdct_core_reconstruct( synthFB = synth_bufFB + st->hTcxDec->old_synth_lenFB; mvr2r( st->hTcxDec->old_synth, synth_buf, st->hTcxDec->old_synth_len ); mvr2r( st->hTcxDec->old_synthFB, synth_bufFB, st->hTcxDec->old_synth_lenFB ); +#ifdef FIX_1320_STACK_CPE_DECODER + set_zero( synth, L_FRAME_PLUS_INTERNAL + M ); +#else set_zero( synth, L_FRAME_PLUS + M ); +#endif set_zero( synthFB, L_FRAME_PLUS + M ); if ( st->core != ACELP_CORE ) diff --git a/lib_dec/ivas_stereo_mdct_core_dec.c b/lib_dec/ivas_stereo_mdct_core_dec.c index 5c2ba97c2..5b3839aef 100644 --- a/lib_dec/ivas_stereo_mdct_core_dec.c +++ b/lib_dec/ivas_stereo_mdct_core_dec.c @@ -156,7 +156,9 @@ void stereo_mdct_core_dec( float *x[CPE_CHANNELS][NB_DIV]; /*needed to allocate N_MAX to prevent stereo switching crash */ +#ifndef FIX_1320_STACK_CPE_DECODER float x_0_buf[CPE_CHANNELS][N_MAX]; +#endif float *x_0[CPE_CHANNELS][NB_DIV]; /* Concealment */ @@ -202,9 +204,15 @@ void stereo_mdct_core_dec( x[ch][0] = &signal_out_tmp[ch][0]; x[ch][1] = &signal_out_tmp[ch][0] + L_FRAME_PLUS / 2; +#ifdef FIX_1320_STACK_CPE_DECODER + set_zero( signal_outFB_tmp[ch], N_MAX ); /* length of N_MAX is needed to prevent stereo switching crash -> reuse buffer signal_outFB_tmp[][] */ + x_0[ch][0] = &signal_outFB_tmp[ch][0]; + x_0[ch][1] = &signal_outFB_tmp[ch][0] + L_FRAME48k / 2; +#else set_zero( x_0_buf[ch], N_MAX ); x_0[ch][0] = &x_0_buf[ch][0]; x_0[ch][1] = &x_0_buf[ch][0] + L_FRAME48k / 2; +#endif nTnsBitsTCX10[ch][0] = 0; nTnsBitsTCX10[ch][1] = 0; diff --git a/lib_dec/ivas_tcx_core_dec.c b/lib_dec/ivas_tcx_core_dec.c index 22080cb64..62eabd32a 100644 --- a/lib_dec/ivas_tcx_core_dec.c +++ b/lib_dec/ivas_tcx_core_dec.c @@ -179,7 +179,11 @@ void stereo_tcx_core_dec( Word16 Aind[M + 1], lspind[M]; /*Synth*/ +#ifdef FIX_1320_STACK_CPE_DECODER + float synth_buf[OLD_SYNTH_INTERNAL_DEC + L_FRAME_PLUS_INTERNAL + M]; +#else float synth_buf[OLD_SYNTH_SIZE_DEC + L_FRAME_PLUS + M]; +#endif float *synth; float synth_bufFB[OLD_SYNTH_SIZE_DEC + L_FRAME_PLUS + M]; float *synthFB; @@ -246,7 +250,11 @@ void stereo_tcx_core_dec( synthFB = synth_bufFB + hTcxDec->old_synth_lenFB; mvr2r( hTcxDec->old_synth, synth_buf, hTcxDec->old_synth_len ); mvr2r( hTcxDec->old_synthFB, synth_bufFB, hTcxDec->old_synth_lenFB ); +#ifdef FIX_1320_STACK_CPE_DECODER + set_zero( synth, L_FRAME_PLUS_INTERNAL + M ); +#else set_zero( synth, L_FRAME_PLUS + M ); +#endif set_zero( synthFB, L_FRAME_PLUS + M ); #ifdef DEBUG_MODE_INFO_PLC -- GitLab