From bf821b0db0af2ea5fcc5ebd8877f4ecfd75dc9d8 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Tue, 23 Jan 2024 09:33:35 +0530 Subject: [PATCH] Fix for overflow issues with +10dB test cases in sns_shape_spectrum [x] Dynamic Q-factor is used and passed to sns_shape_spectrum() instead of static Q-factor to avoid overflow and to increase precision. --- lib_com/ivas_prot_fx.h | 2 + lib_com/ivas_sns_com_fx.c | 48 +++++++++-- lib_dec/TonalComponentDetection.c | 33 ++++++-- lib_dec/ivas_mdct_core_dec.c | 33 ++++++-- lib_dec/tonalMDCTconcealment.c | 133 +++++++++++++++++++++++++----- 5 files changed, 210 insertions(+), 39 deletions(-) diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index adb8ab969..43abf9c1c 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -170,8 +170,10 @@ void sns_interpolate_scalefactors_fx( void sns_shape_spectrum_fx( Word32 spectrum[], /* i/o: spectrum to be shaped */ + Word16 *q_spectrum, /* i/o: Q of spectrum */ const PsychoacousticParameters *pPsychParams, /* i : psychoacoustic parameters used to get the frequency bands */ const Word32 *scf_int, /* i : already interpolated SNS scalefactors */ + const Word16 q_scf_int, /* i : Q of interpolated SNS scalefactors */ const Word16 L_frame /* i : frame length */ ); diff --git a/lib_com/ivas_sns_com_fx.c b/lib_com/ivas_sns_com_fx.c index 57c621abc..0e414f6d5 100644 --- a/lib_com/ivas_sns_com_fx.c +++ b/lib_com/ivas_sns_com_fx.c @@ -277,13 +277,15 @@ void sns_interpolate_scalefactors_fx( void sns_shape_spectrum_fx( Word32 spectrum[], /* i/o: spectrum to be shaped */ + Word16 *q_spectrum, /* i/o: Q of spectrum */ const PsychoacousticParameters *pPsychParams, /* i : psychoacoustic parameters used to get the frequency bands */ const Word32 *scf_int, /* i : already interpolated SNS scalefactors */ + const Word16 q_scf_int, /* i : Q of interpolated SNS scalefactors */ const Word16 L_frame /* i : frame length */ ) { - Word16 i, n, k, bw; - Word64 L64_tmp; + Word16 i, n, k, tmp_k, bw, q_tmp, shift, min_shift = 63; + Word64 L64_tmp[L_FRAME48k]; const UWord8 nBands = pPsychParams->nBands; const UWord8 *bandLengths = pPsychParams->bandLengths; @@ -297,11 +299,26 @@ void sns_shape_spectrum_fx( { FOR( n = 0; n < bw; ( ++n, ++k ) ) { - L64_tmp = W_mult_32_32( spectrum[k], scf_int[i] ); // Q32 - L64_tmp = W_shr( L64_tmp, 26 ); // Q6 - spectrum[k] = W_sat_l( L64_tmp ); // Q6 + L64_tmp[k] = W_mult_32_32( spectrum[k], scf_int[i] ); // Q = q_spectrum + q_scf_int + 1 + shift = W_norm( L64_tmp[k] ); + IF( LT_16( shift, min_shift ) ) + { + min_shift = shift; + } } } + tmp_k = k; + q_tmp = sub( add( add( *q_spectrum, q_scf_int ), min_shift ), 32 ); + IF( GT_16( q_tmp, 30 ) ) + { + q_tmp = 30; + } + FOR( k = 0; k < tmp_k; k++ ) + { + L64_tmp[k] = W_shr( L64_tmp[k], sub( add( *q_spectrum, q_scf_int ), q_tmp ) ); + spectrum[k] = W_sat_l( L64_tmp[k] ); // Q = q_tmp + } + *q_spectrum = q_tmp; } ELSE { @@ -311,11 +328,26 @@ void sns_shape_spectrum_fx( { FOR( n = 0; n < bandLengths[i]; ( ++n, ++k ) ) { - L64_tmp = W_mult_32_32( spectrum[k], scf_int[i] ); // Q32 - L64_tmp = W_shr( L64_tmp, 26 ); // Q6 - spectrum[k] = W_sat_l( L64_tmp ); // Q6 + L64_tmp[k] = W_mult_32_32( spectrum[k], scf_int[i] ); // Q = q_spectrum + q_scf_int + 1 + shift = W_norm( L64_tmp[k] ); + IF( LT_16( shift, min_shift ) && NE_64( L64_tmp[k], 0 ) ) + { + min_shift = shift; + } } } + tmp_k = k; + q_tmp = q_tmp = sub( add( add( *q_spectrum, q_scf_int ), min_shift ), 32 ); + IF( GT_16( q_tmp, 30 ) ) + { + q_tmp = 30; + } + FOR( k = 0; k < tmp_k; k++ ) + { + L64_tmp[k] = W_shr( L64_tmp[k], sub( add( *q_spectrum, q_scf_int ), q_tmp ) ); + spectrum[k] = W_sat_l( L64_tmp[k] ); // Q = q_tmp + } + *q_spectrum = q_tmp; } return; diff --git a/lib_dec/TonalComponentDetection.c b/lib_dec/TonalComponentDetection.c index 77ce38722..d52f83003 100644 --- a/lib_dec/TonalComponentDetection.c +++ b/lib_dec/TonalComponentDetection.c @@ -101,20 +101,43 @@ void DetectTonalComponents_flt( { #ifdef IVAS_FLOAT_FIXED Word32 pScaledMdctSpectrum_fx[L_FRAME_MAX], scaleFactors_fx[FDNS_NPTS]; + Word16 q_pScaledMdctSpectrum_fx = 31, q_scaleFactors_fx = 31; + FOR( Word16 k = 0; k < nSamplesCore; k++ ) + { + if ( abs( (Word32) pScaledMdctSpectrum[k] ) != 0 ) + q_pScaledMdctSpectrum_fx = s_min( q_pScaledMdctSpectrum_fx, norm_l( pScaledMdctSpectrum[k] ) ); + } + FOR( Word16 k = 0; k < FDNS_NPTS; k++ ) + { + if ( abs( (Word32) scaleFactors[k] ) != 0 ) + q_scaleFactors_fx = s_min( q_scaleFactors_fx, norm_l( scaleFactors[k] ) ); + } + q_pScaledMdctSpectrum_fx -= 1; + q_scaleFactors_fx -= 1; FOR( Word16 c = 0; c < nSamplesCore; c++ ) { - pScaledMdctSpectrum_fx[c] = (Word32) ( pScaledMdctSpectrum[c] * ONE_IN_Q7 ); + pScaledMdctSpectrum_fx[c] = (Word32) ( pScaledMdctSpectrum[c] * ( 1<< q_pScaledMdctSpectrum_fx ) ); } FOR( Word16 c = 0; c < FDNS_NPTS; c++ ) { - scaleFactors_fx[c] = (Word32) ( scaleFactors[c] * ONE_IN_Q24 ); + scaleFactors_fx[c] = (Word32) ( scaleFactors[c] * ( 1<< q_scaleFactors_fx ) ); } - sns_shape_spectrum_fx( pScaledMdctSpectrum_fx, psychParamsCurrent, scaleFactors_fx, nSamplesCore ); + sns_shape_spectrum_fx( pScaledMdctSpectrum_fx, &q_pScaledMdctSpectrum_fx, psychParamsCurrent, scaleFactors_fx, q_scaleFactors_fx, nSamplesCore ); - FOR( Word16 c = 0; c < nSamplesCore; c++ ) + IF ( q_pScaledMdctSpectrum_fx + 1 < 31 ) { - pScaledMdctSpectrum[c] = ( (float) pScaledMdctSpectrum_fx[c] / ONE_IN_Q6 ); + FOR( Word16 c = 0; c < nSamplesCore; c++ ) + { + pScaledMdctSpectrum[c] = ( (float) pScaledMdctSpectrum_fx[c] / ( 1 << ( q_pScaledMdctSpectrum_fx + 1 ) ) ); + } + } + ELSE IF ( q_pScaledMdctSpectrum_fx + 1 == 31 ) + { + FOR( Word16 c = 0; c < nSamplesCore; c++ ) + { + pScaledMdctSpectrum[c] = ( (float) pScaledMdctSpectrum_fx[c] / ONE_IN_Q31 ); + } } #else sns_shape_spectrum( pScaledMdctSpectrum, psychParamsCurrent, scaleFactors, nSamplesCore ); diff --git a/lib_dec/ivas_mdct_core_dec.c b/lib_dec/ivas_mdct_core_dec.c index cdc8d3853..1fc6bef60 100644 --- a/lib_dec/ivas_mdct_core_dec.c +++ b/lib_dec/ivas_mdct_core_dec.c @@ -1160,20 +1160,43 @@ void ivas_mdct_core_tns_ns( #ifdef IVAS_FLOAT_FIXED Word32 x_fx[L_FRAME48k], sns_int_scf_fx[FDNS_NPTS]; + Word16 q_x = 31, q_sns_int_scf = 31; + FOR( Word16 c = 0; c < L_FRAME16k; c++ ) + { + if ( abs( (Word32) x[ch][k][c] ) != 0 ) + q_x = s_min( q_x, norm_l( x[ch][k][c] ) ); + } + FOR( Word16 c = 0; c < FDNS_NPTS; c++ ) + { + if ( abs( (Word32) sns_int_scf[c] ) != 0 ) + q_sns_int_scf = s_min( q_sns_int_scf, norm_l( sns_int_scf[c] ) ); + } + q_x -= 1; + q_sns_int_scf -= 1; FOR( Word16 c = 0; c < st->hTcxCfg->psychParamsCurrent->nBins; c++ ) { - x_fx[c] = (Word32) ( x[ch][k][c] * ONE_IN_Q7 ); + x_fx[c] = (Word32) ( x[ch][k][c] * ( 1 << q_x ) ); } FOR( Word16 c = 0; c < FDNS_NPTS; c++ ) { - sns_int_scf_fx[c] = (Word32) ( sns_int_scf[c] * ONE_IN_Q24 ); + sns_int_scf_fx[c] = (Word32) ( sns_int_scf[c] * ( 1 << q_sns_int_scf ) ); } - sns_shape_spectrum_fx( x_fx, st->hTcxCfg->psychParamsCurrent, sns_int_scf_fx, st->hTcxCfg->psychParamsCurrent->nBins ); + sns_shape_spectrum_fx( x_fx, &q_x, st->hTcxCfg->psychParamsCurrent, sns_int_scf_fx, q_sns_int_scf, st->hTcxCfg->psychParamsCurrent->nBins ); - FOR( Word16 c = 0; c < st->hTcxCfg->psychParamsCurrent->nBins; c++ ) + IF ( q_x + 1 < 31 ) { - x[ch][k][c] = ( (float) x_fx[c] / ONE_IN_Q6 ); + FOR( Word16 c = 0; c < st->hTcxCfg->psychParamsCurrent->nBins; c++ ) + { + x[ch][k][c] = ( (float) x_fx[c] / ( 1 << ( q_x + 1 ) ) ); + } + } + ELSE + { + FOR( Word16 c = 0; c < st->hTcxCfg->psychParamsCurrent->nBins; c++ ) + { + x[ch][k][c] = ( (float) x_fx[c] / ONE_IN_Q31 ); + } } #else sns_shape_spectrum( x[ch][k], st->hTcxCfg->psychParamsCurrent, &sns_int_scf[0], st->hTcxCfg->psychParamsCurrent->nBins ); diff --git a/lib_dec/tonalMDCTconcealment.c b/lib_dec/tonalMDCTconcealment.c index 649fae812..408151569 100644 --- a/lib_dec/tonalMDCTconcealment.c +++ b/lib_dec/tonalMDCTconcealment.c @@ -373,20 +373,43 @@ static void CalcPowerSpecAndDetectTonalComponents( { #ifdef IVAS_FLOAT_FIXED Word32 powerSpectrum_fx[L_FRAME_MAX], invScaleFactors_fx[FDNS_NPTS]; + Word16 q_ps = 31, q_isf = 31; + FOR( Word16 k = 0; k < hTonalMDCTConc->nSamplesCore; k++ ) + { + if ( abs( (Word32) powerSpectrum[k] ) != 0 ) + q_ps = s_min( q_ps, norm_l( powerSpectrum[k] ) ); + } + FOR( Word16 k = 0; k < FDNS_NPTS; k++ ) + { + if ( abs( (Word32) invScaleFactors[k] ) != 0 ) + q_isf = s_min( q_isf, norm_l( invScaleFactors[k] ) ); + } + q_ps -= 1; + q_isf -= 1; FOR( Word16 c = 0; c < hTonalMDCTConc->nSamplesCore; c++ ) { - powerSpectrum_fx[c] = (Word32) ( powerSpectrum[c] * ONE_IN_Q7 ); + powerSpectrum_fx[c] = (Word32) ( powerSpectrum[c] * ( 1 << q_ps ) ); } FOR( Word16 c = 0; c < FDNS_NPTS; c++ ) { - invScaleFactors_fx[c] = (Word32) ( invScaleFactors[c] * ONE_IN_Q24 ); + invScaleFactors_fx[c] = (Word32) ( invScaleFactors[c] * ( 1 << q_isf ) ); } - sns_shape_spectrum_fx( powerSpectrum_fx, psychParamsCurrent, invScaleFactors_fx, hTonalMDCTConc->nSamplesCore ); + sns_shape_spectrum_fx( powerSpectrum_fx, &q_ps, psychParamsCurrent, invScaleFactors_fx, q_isf, hTonalMDCTConc->nSamplesCore ); - FOR( Word16 c = 0; c < hTonalMDCTConc->nSamplesCore; c++ ) + IF( q_ps + 1 < 31 ) + { + FOR( Word16 c = 0; c < hTonalMDCTConc->nSamplesCore; c++ ) + { + powerSpectrum[c] = ( (float) powerSpectrum_fx[c] / ( 1 << ( q_ps + 1 ) ) ); + } + } + ELSE IF( q_ps + 1 == 31 ) { - powerSpectrum[c] = ( (float) powerSpectrum_fx[c] / ONE_IN_Q6 ); + FOR( Word16 c = 0; c < hTonalMDCTConc->nSamplesCore; c++ ) + { + powerSpectrum[c] = ( (float) powerSpectrum_fx[c] / ONE_IN_Q31 ); + } } #else sns_shape_spectrum( powerSpectrum, psychParamsCurrent, invScaleFactors, hTonalMDCTConc->nSamplesCore ); @@ -478,20 +501,43 @@ void TonalMDCTConceal_Detect_ivas( { #ifdef IVAS_FLOAT_FIXED Word32 powerSpectrum_fx[L_FRAME_MAX], scaleFactors_fx[FDNS_NPTS]; + Word16 q_ps = 31, q_sf = 31; + FOR( Word16 k = 0; k < hTonalMDCTConc->nSamplesCore; k++ ) + { + if ( abs( (Word32) powerSpectrum[k] ) != 0 ) + q_ps = s_min( q_ps, norm_l( powerSpectrum[k] ) ); + } + FOR( Word16 k = 0; k < FDNS_NPTS; k++ ) + { + if ( abs( (Word32) hTonalMDCTConc->secondLastBlockData.scaleFactors_float[k] ) != 0 ) + q_sf = s_min( q_sf, norm_l( hTonalMDCTConc->secondLastBlockData.scaleFactors_float[k] ) ); + } + q_ps -= 1; + q_sf -= 1; FOR( Word16 c = 0; c < hTonalMDCTConc->nSamplesCore; c++ ) { - powerSpectrum_fx[c] = (Word32) ( powerSpectrum[c] * ONE_IN_Q7 ); + powerSpectrum_fx[c] = (Word32) ( powerSpectrum[c] * ( 1 << q_ps ) ); } FOR( Word16 c = 0; c < FDNS_NPTS; c++ ) { - scaleFactors_fx[c] = (Word32) ( hTonalMDCTConc->secondLastBlockData.scaleFactors_float[c] * ONE_IN_Q24 ); + scaleFactors_fx[c] = (Word32) ( hTonalMDCTConc->secondLastBlockData.scaleFactors_float[c] * ( 1 << q_sf ) ); } - sns_shape_spectrum_fx( powerSpectrum_fx, psychParamsCurrent, scaleFactors_fx, hTonalMDCTConc->nSamplesCore ); + sns_shape_spectrum_fx( powerSpectrum_fx, &q_ps, psychParamsCurrent, scaleFactors_fx, q_sf, hTonalMDCTConc->nSamplesCore ); - FOR( Word16 c = 0; c < hTonalMDCTConc->nSamplesCore; c++ ) + IF ( q_ps + 1 < 31 ) + { + FOR( Word16 c = 0; c < hTonalMDCTConc->nSamplesCore; c++ ) + { + powerSpectrum[c] = ( (float) powerSpectrum_fx[c] / ( 1 << ( q_ps + 1 ) ) ); + } + } + ELSE IF ( q_ps + 1 == 31 ) { - powerSpectrum[c] = ( (float) powerSpectrum_fx[c] / ONE_IN_Q6 ); + FOR( Word16 c = 0; c < hTonalMDCTConc->nSamplesCore; c++ ) + { + powerSpectrum[c] = ( (float) powerSpectrum_fx[c] / ONE_IN_Q31 ); + } } #else sns_shape_spectrum( powerSpectrum, psychParamsCurrent, hTonalMDCTConc->secondLastBlockData.scaleFactors_float, hTonalMDCTConc->nSamplesCore ); @@ -929,20 +975,43 @@ void TonalMDCTConceal_Apply_ivas( { #ifdef IVAS_FLOAT_FIXED Word32 powerSpectrum_fx[L_FRAME_MAX], scaleFactors_fx[FDNS_NPTS]; + Word16 q_ps = 31, q_sf = 31; + FOR( Word16 k = 0; k < hTonalMDCTConc->nSamplesCore; k++ ) + { + if ( abs( (Word32) powerSpectrum[k] ) != 0 ) + q_ps = s_min( q_ps, norm_l( powerSpectrum[k] ) ); + } + FOR( Word16 k = 0; k < FDNS_NPTS; k++ ) + { + if ( abs( (Word32) hTonalMDCTConc->secondLastBlockData.scaleFactors_float[k] ) != 0 ) + q_sf = s_min( q_sf, norm_l( hTonalMDCTConc->secondLastBlockData.scaleFactors_float[k] ) ); + } + q_ps -= 1; + q_sf -= 1; FOR( Word16 c = 0; c < hTonalMDCTConc->nSamplesCore; c++ ) { - powerSpectrum_fx[c] = (Word32) ( powerSpectrum[c] * ONE_IN_Q7 ); + powerSpectrum_fx[c] = (Word32) ( powerSpectrum[c] * ( 1 << q_ps ) ); } FOR( Word16 c = 0; c < FDNS_NPTS; c++ ) { - scaleFactors_fx[c] = (Word32) ( hTonalMDCTConc->secondLastBlockData.scaleFactors_float[c] * ONE_IN_Q24 ); + scaleFactors_fx[c] = (Word32) ( hTonalMDCTConc->secondLastBlockData.scaleFactors_float[c] * ( 1 << q_sf ) ); } - sns_shape_spectrum_fx( powerSpectrum_fx, psychParamsCurrent, scaleFactors_fx, hTonalMDCTConc->nSamplesCore ); + sns_shape_spectrum_fx( powerSpectrum_fx, &q_ps, psychParamsCurrent, scaleFactors_fx, q_sf, hTonalMDCTConc->nSamplesCore ); - FOR( Word16 c = 0; c < hTonalMDCTConc->nSamplesCore; c++ ) + IF( q_ps + 1 < 31 ) + { + FOR( Word16 c = 0; c < hTonalMDCTConc->nSamplesCore; c++ ) + { + powerSpectrum[c] = ( (float) powerSpectrum_fx[c] / ( 1 << ( q_ps + 1 ) ) ); + } + } + ELSE IF( q_ps + 1 == 31 ) { - powerSpectrum[c] = ( (float) powerSpectrum_fx[c] / ONE_IN_Q6 ); + FOR( Word16 c = 0; c < hTonalMDCTConc->nSamplesCore; c++ ) + { + powerSpectrum[c] = ( (float) powerSpectrum_fx[c] / ONE_IN_Q31 ); + } } #else sns_shape_spectrum( powerSpectrum, psychParamsCurrent, hTonalMDCTConc->secondLastBlockData.scaleFactors_float, hTonalMDCTConc->nSamplesCore ); @@ -1267,21 +1336,43 @@ void TonalMdctConceal_whiten_noise_shape_ivas( #ifdef IVAS_FLOAT_FIXED Word32 whitenend_noise_shape_fx[L_FRAME16k]; Word32 scfs_for_shaping_fx[FDNS_NPTS]; - + Word16 q_wns = 31, q_sfs = 31; FOR( Word16 k = 0; k < L_FRAME16k; k++ ) { - whitenend_noise_shape_fx[k] = (Word32) ( whitenend_noise_shape[k] * ONE_IN_Q7 ); + if ( abs( (Word32) whitenend_noise_shape[k] ) != 0 ) + q_wns = s_min( q_wns, norm_l( whitenend_noise_shape[k] ) ); } FOR( Word16 k = 0; k < FDNS_NPTS; k++ ) { - scfs_for_shaping_fx[k] = (Word32) ( scfs_for_shaping[k] * ONE_IN_Q24 ); + if ( abs( (Word32) scfs_for_shaping[k] ) != 0 ) + q_sfs = s_min( q_sfs, norm_l( scfs_for_shaping[k] ) ); + } + q_wns -= 1; + q_sfs -= 1; + FOR( Word16 k = 0; k < L_FRAME16k; k++ ) + { + whitenend_noise_shape_fx[k] = (Word32) ( whitenend_noise_shape[k] * ( 1 << q_wns ) ); + } + FOR( Word16 k = 0; k < FDNS_NPTS; k++ ) + { + scfs_for_shaping_fx[k] = (Word32) ( scfs_for_shaping[k] * ( 1 << q_sfs ) ); } - sns_shape_spectrum_fx( whitenend_noise_shape_fx, psychParams, scfs_for_shaping_fx, L_frame ); + sns_shape_spectrum_fx( whitenend_noise_shape_fx, &q_wns, psychParams, scfs_for_shaping_fx, q_sfs, L_frame ); - FOR( Word16 k = 0; k < L_FRAME16k; k++ ) + IF( q_wns + 1 < 31 ) { - whitenend_noise_shape[k] = ( (float) whitenend_noise_shape_fx[k] / ONE_IN_Q6 ); + FOR( Word16 k = 0; k < L_FRAME16k; k++ ) + { + whitenend_noise_shape[k] = ( (float) whitenend_noise_shape_fx[k] / ( 1 << ( q_wns + 1 ) ) ); + } + } + ELSE IF( q_wns + 1 == 31 ) + { + FOR( Word16 k = 0; k < L_FRAME16k; k++ ) + { + whitenend_noise_shape[k] = ( (float) whitenend_noise_shape_fx[k] / ONE_IN_Q31 ); + } } #else sns_shape_spectrum( whitenend_noise_shape, psychParams, scfs_for_shaping, L_frame ); -- GitLab