diff --git a/lib_com/options.h b/lib_com/options.h index 10396a5c34071df32d355449d6e62d8717e547a8..6f6cec49122bf31fb5636b1bca14c260d0a992f5 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -96,6 +96,7 @@ #define HARM_2336_DOTP /* VA: basop 2336; Harmonisation of some dot_product function + some BE optimisation */ #define FIX_2431_AVOID_CALLOC /* VA: basp issue 2431: avoid use of calloc() */ #define FIX_2424_REMOVE_GAUSS_L2_ENC /* VA: basop issue 2424: Remove duplicated code in gauss_L2_ivas_fx() */ +#define FIX_MDCT_STEREO_ENC_STACK /* VA: basop issue 2428: Move IGF temporary buffers out of the highest stack */ /* #################### End BE switches ################################## */ diff --git a/lib_com/prot_fx.h b/lib_com/prot_fx.h index 27e4c19293b0a4ab4f6f065c323d1e73e499f68f..dd0315055eebd4ec7c13d02b6c5e95371af9b189 100644 --- a/lib_com/prot_fx.h +++ b/lib_com/prot_fx.h @@ -10027,6 +10027,25 @@ void ProcessStereoIGF_fx( const Word32 element_brate, /* i : element bitrate */ const Word16 mct_on ); +#ifdef FIX_MDCT_STEREO_ENC_STACK +void ProcessStereoIGF_fx_64( + STEREO_MDCT_ENC_DATA_HANDLE hStereoMdct, + Encoder_State *sts[CPE_CHANNELS], /* i : Encoder state */ + Word16 ms_mask[2][MAX_SFB], /* i : bandwise MS mask */ + Word32 *pITFMDCTSpectrum_fx[CPE_CHANNELS][NB_DIV], /* i : MDCT spectrum fir ITF */ + Word64 powerSpec64[CPE_CHANNELS][N_MAX], /* i/o: MDCT^2 + MDST^2 spectrum, or estimate */ + Word16 exp_powerSpec64[CPE_CHANNELS][NB_DIV], /* i/o: exp of pPowerSpectrum_fx */ + Word32 *pPowerSpectrumMsInv_fx[CPE_CHANNELS][NB_DIV], /* i : inverse power spectrum */ + Word16 *q_pPowerSpectrumMsInv_fx[CPE_CHANNELS][NB_DIV], /* i/o: Q of pPowerSpectrumMsInv_fx */ + Word32 *inv_spectrum_fx[CPE_CHANNELS][NB_DIV], /* i : inverse spectrum */ + Word16 exp_inv_spectrum_fx[CPE_CHANNELS], /* i/o: exp of inv_spectrum_fx */ + const Word16 frameno, /* i : flag indicating index of current subfr. */ + const Word16 sp_aud_decision0, /* i : sp_aud_decision0 */ + const Word32 element_brate, /* i : element bitrate */ + const Word16 mct_on /* i : flag MCT mode */ +); + +#endif void IGFEncApplyStereo_fx( STEREO_MDCT_ENC_DATA_HANDLE hStereoMdct, /* i/o: MDCT stereo encoder structure */ Word16 ms_mask[2][MAX_SFB], /* i : bandwise MS mask */ diff --git a/lib_enc/ivas_stereo_mdct_core_enc_fx.c b/lib_enc/ivas_stereo_mdct_core_enc_fx.c index 705635d887e22490a1c1272d93d120d28835caec..faa0193cb64affdff0db26399f040f6d365caa35 100644 --- a/lib_enc/ivas_stereo_mdct_core_enc_fx.c +++ b/lib_enc/ivas_stereo_mdct_core_enc_fx.c @@ -401,7 +401,12 @@ void stereo_mdct_core_enc_fx( /*--------------------------------------------------------------* * Power spectrum calculation *---------------------------------------------------------------*/ + +#ifdef FIX_MDCT_STEREO_ENC_STACK + Word16 exp, shift1, shift2, norm; +#else Word16 length, exp, shift1, shift2, norm; +#endif Word32 mdct, mdst, imdct, imdst; FOR( ch = 0; ch < CPE_CHANNELS; ch++ ) @@ -576,6 +581,7 @@ void stereo_mdct_core_enc_fx( IF( ( NE_16( hStereoMdct->mdct_stereo_mode[n], hStereoMdct->IGFStereoMode[n] ) || EQ_16( hStereoMdct->mdct_stereo_mode[n], SMDCT_BW_MS ) ) && !hStereoMdct->isSBAStereoMode ) { IGF_ENC_INSTANCE_HANDLE hIGFEnc[CPE_CHANNELS]; +#ifndef FIX_MDCT_STEREO_ENC_STACK Word32 powerSpec_fx[CPE_CHANNELS][N_MAX], *p_powerSpec_fx[CPE_CHANNELS]; Word16 exp_powSpec[CPE_CHANNELS][N_MAX], *p_exp_powSpec[CPE_CHANNELS]; p_powerSpec_fx[0] = powerSpec_fx[0]; @@ -615,7 +621,7 @@ void stereo_mdct_core_enc_fx( set16_fx( exp_powSpec[ch] + length, 0, sub( N_MAX, length ) ); } } - +#endif hIGFEnc[0] = sts[0]->hIGFEnc; hIGFEnc[1] = sts[1]->hIGFEnc; hIGFEnc[0]->spec_be_igf_e = p_orig_spectrum_e[0]; @@ -623,12 +629,19 @@ void stereo_mdct_core_enc_fx( move16(); move16(); +#ifdef FIX_MDCT_STEREO_ENC_STACK + ProcessStereoIGF_fx_64( hStereoMdct, sts, ms_mask, orig_spectrum_fx, powerSpec64, exp_powerSpec64, + powerSpecMsInv_fx, q_powerSpecMsInv_fx, inv_spectrum_fx, exp_inv_spectrum, + n, hCPE->hCoreCoder[0]->sp_aud_decision0, hCPE->hCoreCoder[0]->element_brate, 0 ); +#else ProcessStereoIGF_fx( hStereoMdct, sts, ms_mask, orig_spectrum_fx, sub( Q31, p_orig_spectrum_e[0] ), sub( Q31, p_orig_spectrum_e[1] ), p_powerSpec_fx, p_exp_powSpec, powerSpecMsInv_fx, q_powerSpecMsInv_fx, inv_spectrum_fx, exp_inv_spectrum, n, hCPE->hCoreCoder[0]->sp_aud_decision0, hCPE->hCoreCoder[0]->element_brate, 0 ); +#endif } ELSE { +#ifndef FIX_MDCT_STEREO_ENC_STACK Word32 powerSpec_fx[CPE_CHANNELS][N_MAX]; // each value has a different exponent Word16 exp_powerSpec[CPE_CHANNELS][N_MAX]; { @@ -665,6 +678,7 @@ void stereo_mdct_core_enc_fx( set16_fx( exp_powerSpec[ch] + length, 0, sub( N_MAX, length ) ); } } +#endif FOR( ch = 0; ch < CPE_CHANNELS; ch++ ) { st = sts[ch]; @@ -673,6 +687,9 @@ void stereo_mdct_core_enc_fx( { st->hBstr->ind_list = sts[0]->hBstr->ind_list + sts[0]->hBstr->nb_ind_tot; } +#ifdef FIX_MDCT_STEREO_ENC_STACK + ProcessIGF_ivas_fx_64( st, N_MAX, st->hTcxEnc->spectrum_fx[n], orig_spectrum_fx[ch][n], p_orig_spectrum_e[ch], powerSpec64[ch], exp_powerSpec64[ch], n, hCPE->hCoreCoder[0]->sp_aud_decision0 ); +#else L_subframeTCX = idiv1616( st->hTcxEnc->L_frameTCX, nSubframes ); /* Q0 */ Word16 q_spectrum; q_spectrum = sub( Q31, p_orig_spectrum_e[ch] ); @@ -685,12 +702,14 @@ void stereo_mdct_core_enc_fx( Scale_sig32( orig_spectrum_fx[ch][n], st->hIGFEnc->infoStopLine, sub( q_spectrum, sub( Q31, p_orig_spectrum_e[ch] ) ) ); /* q_spectrum */ ProcessIGF_ivas_fx( st, N_MAX, st->hTcxEnc->spectrum_fx[n], &q_spectrum, orig_spectrum_fx[ch][n], q_spectrum, &powerSpec_fx[ch][n * L_subframeTCX], &exp_powerSpec[ch][n * L_subframeTCX], st->core == TCX_20_CORE, n, hCPE->hCoreCoder[0]->sp_aud_decision0, 0 ); +#endif } } } } ELSE { +#ifndef FIX_MDCT_STEREO_ENC_STACK Word32 powerSpec_fx[CPE_CHANNELS][N_MAX]; // each value has a different exponent Word16 exp_powerSpec[CPE_CHANNELS][N_MAX + L_MDCT_OVLP_MAX]; { @@ -727,6 +746,7 @@ void stereo_mdct_core_enc_fx( set16_fx( exp_powerSpec[ch] + length, 0, sub( N_MAX + L_MDCT_OVLP_MAX, length ) ); } } +#endif FOR( ch = 0; ch < CPE_CHANNELS; ch++ ) { st = sts[ch]; @@ -749,6 +769,9 @@ void stereo_mdct_core_enc_fx( { FOR( n = 0; n < nSubframes; n++ ) { +#ifdef FIX_MDCT_STEREO_ENC_STACK + ProcessIGF_ivas_fx_64( st, N_MAX, st->hTcxEnc->spectrum_fx[n], orig_spectrum_fx[ch][n], p_orig_spectrum_e[ch], powerSpec64[ch], exp_powerSpec64[ch], n, hCPE->hCoreCoder[0]->sp_aud_decision0 ); +#else Word16 q_spectrum; L_subframeTCX = idiv1616( st->hTcxEnc->L_frameTCX, nSubframes ); /* Q0 */ q_spectrum = sub( Q31, p_orig_spectrum_e[ch] ); @@ -760,8 +783,8 @@ void stereo_mdct_core_enc_fx( q_spectrum = sub( Q31, st->hTcxEnc->spectrum_e[n] ); Scale_sig32( orig_spectrum_fx[ch][n], st->hIGFEnc->infoStopLine, sub( q_spectrum, sub( Q31, p_orig_spectrum_e[ch] ) ) ); /* q_spectrum */ - ProcessIGF_ivas_fx( st, N_MAX, st->hTcxEnc->spectrum_fx[n], &q_spectrum, orig_spectrum_fx[ch][n], q_spectrum, &powerSpec_fx[ch][n * L_subframeTCX], &exp_powerSpec[ch][n * L_subframeTCX], st->core == TCX_20_CORE, n, hCPE->hCoreCoder[0]->sp_aud_decision0, 0 ); +#endif } } } diff --git a/lib_enc/prot_fx_enc.h b/lib_enc/prot_fx_enc.h index 8f99230c38de989d8b73e77cb2b9c68792ec3e13..153c24b36f1ef36da4b2ae2f5fc80081308e8533 100644 --- a/lib_enc/prot_fx_enc.h +++ b/lib_enc/prot_fx_enc.h @@ -1097,6 +1097,20 @@ void ProcessIGF_ivas_fx( const Word16 vad_hover_flag /* i : VAD hangover flag */ ); +#ifdef FIX_MDCT_STEREO_ENC_STACK +void ProcessIGF_ivas_fx_64( + Encoder_State *st, /* i/o: Encoder state */ + const Word16 powerSpec_len, /* i : length of pPowerSpectrum buffer */ + Word32 *pMDCTSpectrum, /* i : MDCT spectrum (*q_spectrum) */ + Word32 *pITFMDCTSpectrum, /* i : MDCT spectrum fir ITF */ + const Word16 q_ITFMDCTSpectrum, /* i : Q of MDCT spectrum fir ITF */ + const Word64 *pPowerSpectrum64, /* i : MDCT^2 + MDST^2 spectrum, or estimate (*q_powerSpec) */ + const Word16 *exp_powerSpec64, /* i : Q of power spectrum */ + const Word16 frameno, /* i : flag indicating index of current subframe */ + const Word16 sp_aud_decision0 /* i : first stage switching decision */ +); + +#endif void ProcessIGF_fx( IGF_ENC_INSTANCE_HANDLE const hInstance, /**< in: instance handle of IGF Encoder */ Encoder_State *st, /**< in: Encoder state */ diff --git a/lib_enc/tcx_utils_enc_fx.c b/lib_enc/tcx_utils_enc_fx.c index 6b2543a7591f0a53b65a907457f7badc4f2c7677..01fe7c1d58ac2f582c47b089761708bf8ade47fc 100644 --- a/lib_enc/tcx_utils_enc_fx.c +++ b/lib_enc/tcx_utils_enc_fx.c @@ -4095,3 +4095,156 @@ void ProcessStereoIGF_fx( return; } + +#ifdef FIX_MDCT_STEREO_ENC_STACK +/*---------------------------------------------------------------------* + * ProcessIGF_ivas_fx_64() + * + * + *---------------------------------------------------------------------*/ + +void ProcessIGF_ivas_fx_64( + Encoder_State *st, /* i/o: Encoder state */ + const Word16 powerSpec_len, /* i : length of pPowerSpectrum buffer */ + Word32 *pMDCTSpectrum, /* i : MDCT spectrum (*q_spectrum) */ + Word32 *pITFMDCTSpectrum, /* i : MDCT spectrum fir ITF */ + const Word16 q_ITFMDCTSpectrum, /* i : Q of MDCT spectrum fir ITF */ + const Word64 *pPowerSpectrum64, /* i : MDCT^2 + MDST^2 spectrum, or estimate (*q_powerSpec) */ + const Word16 *exp_powerSpec64, /* i/o: Q of power spectrum */ + const Word16 frameno, /* i : flag indicating index of current subframe */ + const Word16 sp_aud_decision0 /* i : first stage switching decision */ +) +{ + Word16 i, n1, nsub, length, norm; + Word16 L_subframeTCX; + Word16 q_spectrum; + Word32 powerSpec_fx[N_MAX]; // each value has a different exponent + Word16 exp_powerSpec[N_MAX]; + + IF( EQ_16( st->hTcxEnc->tcxMode, TCX_20 ) ) + { + L_subframeTCX = st->hTcxEnc->L_frameTCX; + move16(); + } + ELSE + { + L_subframeTCX = shr( st->hTcxEnc->L_frameTCX, 1 ); + } + + nsub = 1; + length = st->hTcxEnc->L_frameTCX; + move16(); + move16(); + IF( NE_16( st->hTcxEnc->tcxMode, TCX_20 ) ) + { + length = shr( st->hTcxEnc->L_frameTCX, 1 ); + nsub = NB_DIV; + move16(); + } + IF( EQ_16( st->last_core, ACELP_CORE ) ) + { + length = add( length, shr( length, 2 ) ); + } + + /* Copy powerSpec values from 64 bit buffer to 32 bit buffer */ + FOR( n1 = 0; n1 < nsub; n1++ ) + { + FOR( i = 0; i < length; i++ ) + { + norm = W_norm( pPowerSpectrum64[i + n1 * length] ); + powerSpec_fx[i + n1 * length] = W_extract_h( W_shl( pPowerSpectrum64[i + n1 * length], norm ) ); // exp = exp_powerSpec64[ch][n1]-norm + exp_powerSpec[i + n1 * length] = sub( exp_powerSpec64[n1], norm ); + move32(); + move16(); + } + } + set32_fx( powerSpec_fx + length, 0, sub( N_MAX, length ) ); + set16_fx( exp_powerSpec + length, 0, sub( N_MAX, length ) ); + + q_spectrum = sub( Q31, q_ITFMDCTSpectrum ); + IF( st->hIGFEnc ) + { + q_spectrum = s_min( q_spectrum, sub( Q31, st->hIGFEnc->spec_be_igf_e ) ); + } + + q_spectrum = sub( Q31, st->hTcxEnc->spectrum_e[frameno] ); + + Scale_sig32( pITFMDCTSpectrum, st->hIGFEnc->infoStopLine, sub( q_spectrum, sub( Q31, q_ITFMDCTSpectrum ) ) ); /* q_spectrum */ + + ProcessIGF_ivas_fx( st, powerSpec_len, pMDCTSpectrum, &q_spectrum, pITFMDCTSpectrum, q_spectrum, &powerSpec_fx[frameno * L_subframeTCX], &exp_powerSpec[frameno * L_subframeTCX], st->core == TCX_20_CORE, frameno, sp_aud_decision0, 0 ); + + return; +} + + +/*---------------------------------------------------------------------* + * ProcessStereoIGF_fx_64() + * + * + *---------------------------------------------------------------------*/ + +void ProcessStereoIGF_fx_64( + STEREO_MDCT_ENC_DATA_HANDLE hStereoMdct, + Encoder_State *sts[CPE_CHANNELS], /* i : Encoder state */ + Word16 ms_mask[2][MAX_SFB], /* i : bandwise MS mask */ + Word32 *pITFMDCTSpectrum_fx[CPE_CHANNELS][NB_DIV], /* i : MDCT spectrum fir ITF */ + Word64 powerSpec64[CPE_CHANNELS][N_MAX], /* i/o: MDCT^2 + MDST^2 spectrum, or estimate */ + Word16 exp_powerSpec64[CPE_CHANNELS][NB_DIV], /* i/o: exp of pPowerSpectrum_fx */ + Word32 *pPowerSpectrumMsInv_fx[CPE_CHANNELS][NB_DIV], /* i : inverse power spectrum */ + Word16 *q_pPowerSpectrumMsInv_fx[CPE_CHANNELS][NB_DIV], /* i/o: Q of pPowerSpectrumMsInv_fx */ + Word32 *inv_spectrum_fx[CPE_CHANNELS][NB_DIV], /* i : inverse spectrum */ + Word16 exp_inv_spectrum_fx[CPE_CHANNELS], /* i/o: exp of inv_spectrum_fx */ + const Word16 frameno, /* i : flag indicating index of current subfr. */ + const Word16 sp_aud_decision0, /* i : sp_aud_decision0 */ + const Word32 element_brate, /* i : element bitrate */ + const Word16 mct_on /* i : flag MCT mode */ +) +{ + Word16 ch, i, n1, nsub, length, shift1; + Word32 powerSpec_fx[CPE_CHANNELS][N_MAX], *p_powerSpec_fx[CPE_CHANNELS]; + Word16 exp_powSpec[CPE_CHANNELS][N_MAX], *p_exp_powSpec[CPE_CHANNELS]; + + p_powerSpec_fx[0] = powerSpec_fx[0]; + p_exp_powSpec[0] = exp_powSpec[0]; + p_powerSpec_fx[1] = powerSpec_fx[1]; + p_exp_powSpec[1] = exp_powSpec[1]; + + /* Copy powerSpec values from 64 bit buffer to 32 bit buffer */ + FOR( ch = 0; ch < CPE_CHANNELS; ch++ ) + { + nsub = 1; + length = sts[ch]->hTcxEnc->L_frameTCX; + move16(); + move16(); + IF( NE_16( sts[ch]->hTcxEnc->tcxMode, TCX_20 ) ) + { + length = shr( sts[ch]->hTcxEnc->L_frameTCX, 1 ); + nsub = NB_DIV; + move16(); + } + IF( EQ_16( sts[ch]->last_core, ACELP_CORE ) ) + { + length = add( length, shr( length, 2 ) ); + } + FOR( n1 = 0; n1 < nsub; n1++ ) + { + FOR( i = 0; i < length; i++ ) + { + /* This doesn't result in saturation */ + shift1 = W_norm( powerSpec64[ch][i + n1 * length] ); + powerSpec_fx[ch][i + n1 * length] = W_extract_h( W_shl( powerSpec64[ch][i + n1 * length], shift1 ) ); // exp: exp + move32(); + exp_powSpec[ch][i + n1 * length] = sub( exp_powerSpec64[ch][n1], shift1 ); + } + } + set32_fx( powerSpec_fx[ch] + length, 0, sub( N_MAX, length ) ); + set16_fx( exp_powSpec[ch] + length, 0, sub( N_MAX, length ) ); + } + + ProcessStereoIGF_fx( hStereoMdct, sts, ms_mask, pITFMDCTSpectrum_fx, sub( Q31, sts[0]->hIGFEnc->spec_be_igf_e ), sub( Q31, sts[1]->hIGFEnc->spec_be_igf_e ), + p_powerSpec_fx, p_exp_powSpec, pPowerSpectrumMsInv_fx, q_pPowerSpectrumMsInv_fx, inv_spectrum_fx, exp_inv_spectrum_fx, + frameno, sp_aud_decision0, element_brate, mct_on ); + + return; +} +#endif