From e0fa304b8c33bd76f5f2319b6084c353a043d27e Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Mon, 8 Jul 2024 21:10:48 +0530 Subject: [PATCH 1/2] BASOP and instrumentation changes for lib_com, lib_rend, lib_dec --- lib_com/fd_cng_com.c | 80 +- lib_com/fd_cng_com_fx.c | 40 +- lib_com/fft.c | 72 +- lib_com/fft_evs.c | 6 + lib_com/fft_fx_evs.c | 89 +- lib_com/fft_rel.c | 65 +- lib_com/fft_rel_fx.c | 6 + lib_com/gs_bitallocation_fx.c | 4 +- lib_com/gs_bitallocation_ivas_fx.c | 247 +++-- lib_com/gs_gains_fx.c | 102 +- lib_com/gs_inact_switching_fx.c | 5 + lib_com/gs_noisefill_fx.c | 71 +- lib_com/gs_preech.c | 8 +- lib_com/guided_plc_util_fx.c | 2 +- lib_com/hp50.c | 25 +- lib_com/hq2_bit_alloc.c | 16 +- lib_com/hq2_bit_alloc_fx.c | 36 +- lib_com/ivas_prot.h | 106 +- lib_com/ivas_prot_fx.h | 42 +- lib_com/ivas_tools.c | 820 +++++++++------ lib_com/wi_fx.c | 393 ++++--- lib_com/window_fx.c | 10 +- lib_com/window_ola_fx.c | 102 +- lib_com/wtda.c | 126 ++- lib_com/wtda_fx.c | 5 +- lib_dec/fd_cng_dec.c | 37 +- lib_dec/ivas_init_dec.c | 2 +- lib_dec/ivas_sba_dec.c | 2 +- lib_dec/ivas_spar_decoder.c | 662 ++++++++---- lib_dec/ivas_spar_md_dec.c | 1524 ++++++++++++++++------------ lib_dec/ivas_stereo_adapt_GR_dec.c | 129 ++- lib_dec/ivas_stereo_cng_dec.c | 527 ++++++---- lib_dec/ivas_stereo_dft_dec.c | 159 +-- lib_enc/ivas_mcmasa_enc.c | 27 + lib_enc/speech_music_classif.c | 50 + 35 files changed, 3580 insertions(+), 2017 deletions(-) diff --git a/lib_com/fd_cng_com.c b/lib_com/fd_cng_com.c index 5e0ec84ed..32df6893f 100644 --- a/lib_com/fd_cng_com.c +++ b/lib_com/fd_cng_com.c @@ -961,56 +961,63 @@ void SynthesisSTFT_fx( Word32 *timeDomainOutput, Word32 *olapBuffer, const Word16 *olapWin, - const int16_t tcx_transition, + const Word16 tcx_transition, HANDLE_FD_CNG_COM hFdCngCom, /* i/o: FD_CNG structure containing all buffers and variables */ - const int16_t element_mode, /* i : element mode */ - const int16_t nchan_out /* i : number of output channels */ + const Word16 element_mode, /* i : element mode */ + const Word16 nchan_out /* i : number of output channels */ ) { - int16_t i; + Word16 i; Word32 buf_fx[M + 1 + 320], tmp_fx; /* Perform IFFT */ RFFTN_fx( fftBuffer, hFdCngCom->fftSineTab_fx, hFdCngCom->fftlen, 1 ); /* Handle overlap in P/S domain for stereo */ - IF( ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_DFT ) && nchan_out == 2 ) + test(); + test(); + IF( ( EQ_16( element_mode, IVAS_CPE_TD ) || EQ_16( element_mode, IVAS_CPE_DFT ) ) && EQ_16( nchan_out, 2 ) ) { - mvl2l( olapBuffer + 3 * hFdCngCom->frameSize / 4 - ( M + 1 ), buf_fx, hFdCngCom->frameSize + M + 1 ); - set_l( olapBuffer, 0, hFdCngCom->fftlen ); + Copy32( olapBuffer + sub( i_mult( 3, shr( hFdCngCom->frameSize, 2 ) ), ( M + 1 ) ), buf_fx, add( hFdCngCom->frameSize, M + 1 ) ); + set32_fx( olapBuffer, 0, hFdCngCom->fftlen ); } ELSE { - mvl2l( olapBuffer + hFdCngCom->frameSize, olapBuffer, hFdCngCom->frameSize ); - set_l( olapBuffer + hFdCngCom->frameSize, 0, hFdCngCom->frameSize ); /*olapBuffer, fftBuffer, olapWin*/ + Copy32( olapBuffer + hFdCngCom->frameSize, olapBuffer, hFdCngCom->frameSize ); + set32_fx( olapBuffer + hFdCngCom->frameSize, 0, hFdCngCom->frameSize ); /*olapBuffer, fftBuffer, olapWin*/ } IF( tcx_transition ) { - FOR( i = 0; i < 5 * hFdCngCom->frameSize / 4; i++ ) + FOR( i = 0; i < i_mult( 5, shr( hFdCngCom->frameSize, 2 ) ); i++ ) { olapBuffer[i] = fftBuffer[i]; + move32(); } } ELSE { - FOR( i = hFdCngCom->frameSize / 4; i < 3 * hFdCngCom->frameSize / 4; i++ ) + FOR( i = hFdCngCom->frameSize / 4; i < i_mult( 3, shr( hFdCngCom->frameSize, 2 ) ); i++ ) { - olapBuffer[i] = L_add( olapBuffer[i], Mpy_32_16_1( fftBuffer[i], olapWin[i - hFdCngCom->frameSize / 4] ) ); + olapBuffer[i] = L_add( olapBuffer[i], Mpy_32_16_1( fftBuffer[i], olapWin[sub( i, shr( hFdCngCom->frameSize, 2 ) )] ) ); + move32(); } - FOR( ; i < 5 * hFdCngCom->frameSize / 4; i++ ) + FOR( ; i < i_mult( 5, shr( hFdCngCom->frameSize, 2 ) ); i++ ) { olapBuffer[i] = fftBuffer[i]; + move32(); } } - FOR( ; i < 7 * hFdCngCom->frameSize / 4; i++ ) + FOR( ; i < i_mult( 7, shr( hFdCngCom->frameSize, 2 ) ); i++ ) { - olapBuffer[i] = Mpy_32_16_1( fftBuffer[i], olapWin[i - 3 * hFdCngCom->frameSize / 4] ); + olapBuffer[i] = Mpy_32_16_1( fftBuffer[i], olapWin[sub( i, i_mult( 3, shr( hFdCngCom->frameSize, 2 ) ) )] ); + move32(); } FOR( ; i < hFdCngCom->fftlen; i++ ) { olapBuffer[i] = 0; + move32(); } Word32 fftScale = 0; @@ -1018,43 +1025,49 @@ void SynthesisSTFT_fx( { case 640: fftScale = FFT_SCALING_640; - break; + move32(); + BREAK; case 512: fftScale = FFT_SCALING_512; - break; + move32(); + BREAK; default: assert( !"Not supported FFT length!" ); } /* Get time-domain signal */ // v_multc(olapBuffer + hFdCngCom->frameSize / 4, (float)(hFdCngCom->fftlen / 2), timeDomainOutput, hFdCngCom->frameSize); v_multc_fixed( olapBuffer + hFdCngCom->frameSize / 4, fftScale, timeDomainOutput, hFdCngCom->frameSize ); // Q_in - 9 - /* Get excitation */ - IF( ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_DFT ) && nchan_out == 2 ) + /* Get excitation */ + test(); + test(); + IF( ( EQ_16( element_mode, IVAS_CPE_TD ) || EQ_16( element_mode, IVAS_CPE_DFT ) ) && EQ_16( nchan_out, 2 ) ) { FOR( i = 0; i < hFdCngCom->frameSize / 2; i++ ) { - buf_fx[i + ( M + 1 )] = L_add( buf_fx[i + ( M + 1 )], olapBuffer[i + hFdCngCom->frameSize / 4] ); + buf_fx[i + ( M + 1 )] = L_add( buf_fx[i + ( M + 1 )], olapBuffer[add( i, shr( hFdCngCom->frameSize, 2 ) )] ); + move32(); } // v_multc(buf, (float)(hFdCngCom->fftlen / 2), buf, M + 1 + hFdCngCom->frameSize); - v_multc_fixed( buf_fx, fftScale, buf_fx, M + 1 + hFdCngCom->frameSize ); + v_multc_fixed( buf_fx, fftScale, buf_fx, add( M + 1, hFdCngCom->frameSize ) ); } ELSE { // v_multc(olapBuffer + hFdCngCom->frameSize / 4 - (M + 1), (float)(hFdCngCom->fftlen / 2), buf, M + 1 + hFdCngCom->frameSize); - v_multc_fixed( olapBuffer + ( hFdCngCom->frameSize / 4 ) - ( M + 1 ), fftScale, buf_fx, M + 1 + hFdCngCom->frameSize ); + v_multc_fixed( olapBuffer + sub( shr( hFdCngCom->frameSize, 2 ), ( M + 1 ) ), fftScale, buf_fx, add( M + 1, hFdCngCom->frameSize ) ); } tmp_fx = buf_fx[0]; + move32(); // preemph(buf + 1, PREEMPH_FAC_FLT, M + hFdCngCom->frameSize, &tmp); - preemph_ivas_fx( buf_fx + 1, PREEMPH_FAC, M + hFdCngCom->frameSize, &tmp_fx ); + preemph_ivas_fx( buf_fx + 1, PREEMPH_FAC, add( M, hFdCngCom->frameSize ), &tmp_fx ); // residu(hFdCngCom->A_cng_flt, M, buf + 1 + M, hFdCngCom->exc_cng_flt, hFdCngCom->frameSize); // floatToFixed_arr( hFdCngCom->A_cng_flt, hFdCngCom->A_cng, Q13, M + 1 ); // residu_ivas_fx( hFdCngCom->A_cng, Q13, M, buf_fx + 1 + M, hFdCngCom->exc_cng_32fx, hFdCngCom->frameSize ); - residu_ivas_fx( hFdCngCom->A_cng, ( 15 - norm_s( hFdCngCom->A_cng[0] - 1 ) ), M, buf_fx + 1 + M, hFdCngCom->exc_cng_32fx, hFdCngCom->frameSize ); + residu_ivas_fx( hFdCngCom->A_cng, sub( 15, norm_s( hFdCngCom->A_cng[0] - 1 ) ), M, buf_fx + 1 + M, hFdCngCom->exc_cng_32fx, hFdCngCom->frameSize ); for ( i = 0; i < hFdCngCom->frameSize; i++ ) { - hFdCngCom->exc_cng_flt[i] = fix_to_float( hFdCngCom->exc_cng_32fx[i], Q_in - 9 ); + hFdCngCom->exc_cng_flt[i] = fix_to_float( hFdCngCom->exc_cng_32fx[i], sub( Q_in, 9 ) ); } return; @@ -1149,7 +1162,8 @@ void SynthesisSTFT_dirac_fx( FOR( i = shr( hFdCngCom->frameSize, 2 ); i < i_mult( 3, shr( hFdCngCom->frameSize, 2 ) ); i++ ) { - olapBuffer[i] = L_add( olapBuffer[i], Mpy_32_16_1( fftBuffer[i], olapWin[i - hFdCngCom->frameSize / 4] ) ); + olapBuffer[i] = L_add( olapBuffer[i], Mpy_32_16_1( fftBuffer[i], olapWin[sub( i, shr( hFdCngCom->frameSize, 2 ) )] ) ); + move32(); } FOR( ; i < i_mult( 5, shr( hFdCngCom->frameSize, 2 ) ); i++ ) { @@ -1186,14 +1200,15 @@ void SynthesisSTFT_dirac_fx( } /* Get time-domain signal */ - v_multc_fixed( olapBuffer + hFdCngCom->frameSize / 4, fftScale, timeDomainOutput, samples_out ); // Q_in - 9 + v_multc_fixed( olapBuffer + shr( hFdCngCom->frameSize, 2 ), fftScale, timeDomainOutput, samples_out ); // Q_in - 9 /* Get excitation */ - v_multc_fixed( olapBuffer + ( hFdCngCom->frameSize / 4 ) - ( M + 1 ), fftScale, buf, M + 1 + hFdCngCom->frameSize ); + v_multc_fixed( olapBuffer + sub( shr( hFdCngCom->frameSize, 2 ), ( M + 1 ) ), fftScale, buf, add( M + 1, hFdCngCom->frameSize ) ); tmp = buf[0]; + move32(); preemph_ivas_fx( buf + 1, PREEMPH_FAC, M + hFdCngCom->frameSize, &tmp ); // residu_ivas_fx( hFdCngCom->A_cng, Q13, M, buf + 1 + M, hFdCngCom->exc_cng_32fx, hFdCngCom->frameSize ); - residu_ivas_fx( hFdCngCom->A_cng, 15 - norm_s( hFdCngCom->A_cng[0] - 1 ), M, buf + 1 + M, hFdCngCom->exc_cng_32fx, hFdCngCom->frameSize ); + residu_ivas_fx( hFdCngCom->A_cng, sub( 15, norm_s( hFdCngCom->A_cng[0] - 1 ) ), M, buf + 1 + M, hFdCngCom->exc_cng_32fx, hFdCngCom->frameSize ); /* update and window olapBuf if we have a output frame that is shorter than the default frame size...*/ IF( LT_16( samples_out, hFdCngCom->frameSize ) ) @@ -1202,7 +1217,8 @@ void SynthesisSTFT_dirac_fx( } FOR( i = i_mult( 5, shr( hFdCngCom->frameSize, 2 ) ); i < i_mult( 7, shr( hFdCngCom->frameSize, 2 ) ); i++ ) { - olapBuffer[i] = Mpy_32_16_1( olapBuffer[i], olapWin[i - 3 * hFdCngCom->frameSize / 4] ); + olapBuffer[i] = Mpy_32_16_1( olapBuffer[i], olapWin[sub( i, i_mult( 3, shr( hFdCngCom->frameSize, 2 ) ) )] ); + move32(); } return; @@ -1286,9 +1302,10 @@ Word32 rand_gauss_fx( temp = own_random( seed ); temp = L_add( temp, own_random( seed ) ); temp = L_add( temp, own_random( seed ) ); - temp = L_shr( temp, 15 - q ); + temp = L_shr( temp, sub( 15, q ) ); *x = temp; + move32(); return temp; } @@ -1311,6 +1328,7 @@ Word16 rand_gauss_fix( temp = L_add( temp, shr( own_random( seed ), Q2 ) ); *x = (Word16) temp; + move32(); return (Word16) temp; } diff --git a/lib_com/fd_cng_com_fx.c b/lib_com/fd_cng_com_fx.c index 7125ec40c..b024df61d 100644 --- a/lib_com/fd_cng_com_fx.c +++ b/lib_com/fd_cng_com_fx.c @@ -395,10 +395,12 @@ void compress_range( if ( in_s != 0 ) { out[i] = extract_h( L_tmp ); + move16(); } if ( out[i] == 0 ) { out[i] = 1; + move16(); } } } @@ -417,10 +419,12 @@ void compress_range( if ( in[i] != 0 ) { out[i] = extract_h( L_tmp ); + move16(); } if ( out[i] == 0 ) { out[i] = 1; + move16(); } } } @@ -523,7 +527,7 @@ void expand_range_var_exp( move32(); Word32 tmp_low_lim = L_shr( low_lim, maxOutExp ); - IF( LT_32( out[i], tmp_low_lim ) ) + if ( LT_32( out[i], tmp_low_lim ) ) { out[i] = tmp_low_lim; move32(); @@ -603,6 +607,7 @@ void minimum_statistics( Word16 msNoiseFloor16; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif @@ -765,7 +770,7 @@ void minimum_statistics( FOR( j = start; j < stop; j++ ) { - /* Compute optimal smoothing parameter for PSD estimation */ test(); + /* Compute optimal smoothing parameter for PSD estimation */ test(); IF( ( scalar == 0 ) || ( msNoiseFloor[j] == 0 ) ) { @@ -827,6 +832,7 @@ void minimum_statistics( /* Compute the PSD (smoothed periodogram) in each band */ msPsd[j] = round_fx( L_add( Mpy_32_16_1( msAlpha[j], msPsd[j] ), Mpy_32_16_1( L_sub( 2147483647l /*1.0 Q31*/, msAlpha[j] ), msPeriodog[j] ) ) ); + move16(); } msPsdSum[cnt] = dotp_s_fx( msPsd + start, psize + start, current_len, CNG_HS ); move32(); @@ -1077,6 +1083,7 @@ void minimum_statistics( FOR( j = 0; j < len; j++ ) { msCurrentMinOut[j] = L_deposit_h( msPeriodog[j] ); + move32(); } set32_fx( hFdCngCom->msAlphaCor, 2147483647l /*1.0 Q31*/, cnt ); set32_fx( msAlpha, 0l /*0.0 Q31*/, len ); @@ -1123,6 +1130,7 @@ void minimum_statistics( FOR( j = 0; j < len; j++ ) { msNoiseEst[j] = round_fx( L_mac( L_mult( 31130 /*0.95 Q15*/, msNoiseEst[j] ), 1638 /*0.05 Q15*/, msNoiseFloor[j] ) ); + move16(); } } #ifdef IVAS_CODE_CNG_COM @@ -1221,6 +1229,7 @@ void minimum_statistics_fx( Word16 msNoiseFloor16; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif @@ -1379,7 +1388,7 @@ void minimum_statistics_fx( FOR( j = start; j < stop; j++ ) { - /* Compute optimal smoothing parameter for PSD estimation */ test(); + /* Compute optimal smoothing parameter for PSD estimation */ test(); IF( ( scalar == 0 ) || ( msNoiseFloor[j] == 0 ) ) { @@ -1441,6 +1450,7 @@ void minimum_statistics_fx( /* Compute the PSD (smoothed periodogram) in each band */ msPsd[j] = round_fx( L_add( Mpy_32_16_1( msAlpha[j], msPsd[j] ), Mpy_32_16_1( L_sub( 2147483647l /*1.0 Q31*/, msAlpha[j] ), msPeriodog[j] ) ) ); + move16(); } msPsdSum[cnt] = dotp_s_fx( msPsd + start, psize + start, current_len, CNG_HS ); move32(); @@ -1691,6 +1701,7 @@ void minimum_statistics_fx( FOR( j = 0; j < len; j++ ) { msCurrentMinOut[j] = L_deposit_h( msPeriodog[j] ); + move32(); } set32_fx( hFdCngCom->msAlphaCor, 2147483647l /*1.0 Q31*/, cnt ); set32_fx( msAlpha, 0l /*0.0 Q31*/, len ); @@ -1731,12 +1742,14 @@ void minimum_statistics_fx( ELSE { hFdCngCom->msFrCnt = add( hFdCngCom->msFrCnt, 1 ); + move16(); } /* Smooth noise estimate during CNG phases */ FOR( j = 0; j < len; j++ ) { msNoiseEst[j] = round_fx( L_mac( L_mult( 31130 /*0.95 Q15*/, msNoiseEst[j] ), 1638 /*0.05 Q15*/, msNoiseFloor[j] ) ); + move16(); } } if ( enc_dec == DEC && element_mode == IVAS_CPE_TD ) @@ -1804,6 +1817,7 @@ void apply_scale( { *scale = L_add( *scale, L_deposit_h( scaleTable[i].scale ) ); + move32(); } } @@ -1837,6 +1851,7 @@ Word16 apply_scale_ind( { *scale = L_add( *scale, L_deposit_h( scaleTable[i].scale ) ); + move32(); } return i; } @@ -1864,7 +1879,9 @@ void apply_scale_ivas_fx( assert( i < scaleTableSize ); *scale = L_add( *scale, L_deposit_h( scaleTable[i].scale ) ); + move32(); *index = i; + move16(); } #endif // IVAS_FLOAT_FIXED /*------------------------------------------------------------------- @@ -1961,6 +1978,7 @@ void scalebands( delta = 0; move16(); partpowLD64M1 = 0L; /* to avoid compilation warnings */ + move32(); /* Interpolate the bin/band-wise levels from the partition levels */ IF( EQ_16( nband, npart ) ) @@ -2102,6 +2120,7 @@ void scalebands_fx( delta = 0; move16(); partpowLD64M1 = 0L; /* to avoid compilation warnings */ + move32(); /* Interpolate the bin/band-wise levels from the partition levels */ IF( EQ_16( nband, npart ) ) @@ -2241,6 +2260,7 @@ static void getmidbands( Word16 j, max_psize, shift; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif @@ -2248,6 +2268,7 @@ static void getmidbands( move16(); /* first half partition */ move16(); midband[0] = part[0]; + move16(); psize[0] = add( part[0], 1 ); move16(); psize_inv[0] = getNormReciprocalWord16( psize[0] ); @@ -2401,8 +2422,10 @@ void AnalysisSTFT_fx( FOR( i = 0; i < hFdCngCom->fftlen; i++ ) { fftBuffer[i] = L_shr( fftBuffer[i], 11 ); + move32(); } *fftBuffer_exp = WORD16_BITS + 11; + move16(); /* Perform FFT */ RFFTN_fx( fftBuffer, hFdCngCom->fftSineTab_fx, hFdCngCom->fftlen, -1 ); @@ -2439,9 +2462,11 @@ void SynthesisSTFT( /* Perform IFFT */ scale = 0; + move16(); BASOP_rfft( fftBuffer, hFdCngCom->fftlen, &scale, 1 ); fftBufferExp = add( fftBufferExp, scale ); hFdCngCom->fftBuffer_exp = fftBufferExp; + move16(); fftBufferExp = add( fftBufferExp, hFdCngCom->fftlenShift ); @@ -2466,6 +2491,7 @@ void SynthesisSTFT( FOR( i = 0; i < len; i++ ) { olapBuffer[i] = round_fx( L_shl( fftBuffer[i], fftBufferExp - 15 ) ); + move16(); } } ELSE @@ -2685,6 +2711,7 @@ void mhvals( tmp2_m = BASOP_Util_Add_Mant32Exp( tmp2_m, tmp2_e, L_deposit_h( m_array[i] ), 0, &tmp2_e ); assert( tmp2_e == 0 ); *m = extract_h( tmp2_m ); + move32(); } } @@ -2735,9 +2762,12 @@ void lpc_from_spectrum( Word32 *powspec = hFdCngCom->cngNoiseLevel; /*i : pointer to noise levels format Q5.27*/ Word16 powspec_exp = hFdCngCom->cngNoiseLevelExp; + move16(); Word16 fftlen = hFdCngCom->fftlen; /*i : size of fft*/ Word16 *A = hFdCngCom->A_cng; /*o : lpc coefficients format Q3.12*/ + move16(); Word16 lpcorder = M; + move16(); scale = 0; move16(); @@ -2767,6 +2797,7 @@ void lpc_from_spectrum( *ptr = nf; move32(); *pti = L_deposit_l( 0 ); + move32(); ptr += 2; pti += 2; } @@ -2776,6 +2807,7 @@ void lpc_from_spectrum( *ptr = L_max( nf, L_shl( powspec[i - start], s1 ) ); move32(); *pti = L_deposit_l( 0 ); + move32(); ptr += 2; pti += 2; } @@ -2785,6 +2817,7 @@ void lpc_from_spectrum( *ptr = nf; move32(); *pti = L_deposit_l( 0 ); + move32(); ptr += 2; pti += 2; } @@ -2960,6 +2993,7 @@ void FdCng_exc( Word16 i; *CNG_mode = -1; + move16(); FOR( i = 0; i < L_frame / L_SUBFR; i++ ) { diff --git a/lib_com/fft.c b/lib_com/fft.c index 75f54e2c3..c40556298 100644 --- a/lib_com/fft.c +++ b/lib_com/fft.c @@ -6513,21 +6513,21 @@ static void BASOP_fft8( move32(); im[s * 6] = L_add( s05, s07 ); move32(); - re[s * 3] = L_add( s08, s14 ); + re[i_mult( s, 3 )] = L_add( s08, s14 ); move32(); - re[s * 7] = L_sub( s08, s14 ); + re[i_mult( s, 7 )] = L_sub( s08, s14 ); move32(); - im[s * 3] = L_add( s09, s15 ); + im[i_mult( s, 3 )] = L_add( s09, s15 ); move32(); - im[s * 7] = L_sub( s09, s15 ); + im[i_mult( s, 7 )] = L_sub( s09, s15 ); move32(); re[s * 1] = L_add( s10, s12 ); move32(); - re[s * 5] = L_sub( s10, s12 ); + re[i_mult( s, 5 )] = L_sub( s10, s12 ); move32(); im[s * 1] = L_add( s11, s13 ); move32(); - im[s * 5] = L_sub( s11, s13 ); + im[i_mult( s, 5 )] = L_sub( s11, s13 ); move32(); return; @@ -6581,23 +6581,23 @@ static void BASOP_fftN2( IF( i == 0 ) { - cplxMpy4_8_1( x02, x03, x[2 * i + 2 * 1 * dim1], x[2 * i + 2 * 1 * dim1 + 1] ); - cplxMpy4_8_1( x04, x05, x[2 * i + 2 * 2 * dim1], x[2 * i + 2 * 2 * dim1 + 1] ); - cplxMpy4_8_1( x06, x07, x[2 * i + 2 * 3 * dim1], x[2 * i + 2 * 3 * dim1 + 1] ); - cplxMpy4_8_1( x08, x09, x[2 * i + 2 * 4 * dim1], x[2 * i + 2 * 4 * dim1 + 1] ); - cplxMpy4_8_1( x10, x11, x[2 * i + 2 * 5 * dim1], x[2 * i + 2 * 5 * dim1 + 1] ); - cplxMpy4_8_1( x12, x13, x[2 * i + 2 * 6 * dim1], x[2 * i + 2 * 6 * dim1 + 1] ); - cplxMpy4_8_1( x14, x15, x[2 * i + 2 * 7 * dim1], x[2 * i + 2 * 7 * dim1 + 1] ); + cplxMpy4_8_1( x02, x03, x[add( shl( i, 1 ), i_mult( 2 * 1, dim1 ) )], x[add( add( shl( i, 1 ), i_mult( 2 * 1, dim1 ) ), 1 )] ); + cplxMpy4_8_1( x04, x05, x[add( shl( i, 1 ), i_mult( 2 * 2, dim1 ) )], x[add( add( shl( i, 1 ), i_mult( 2 * 2, dim1 ) ), 1 )] ); + cplxMpy4_8_1( x06, x07, x[add( shl( i, 1 ), i_mult( 2 * 3, dim1 ) )], x[add( add( shl( i, 1 ), i_mult( 2 * 3, dim1 ) ), 1 )] ); + cplxMpy4_8_1( x08, x09, x[add( shl( i, 1 ), i_mult( 2 * 4, dim1 ) )], x[add( add( shl( i, 1 ), i_mult( 2 * 4, dim1 ) ), 1 )] ); + cplxMpy4_8_1( x10, x11, x[add( shl( i, 1 ), i_mult( 2 * 5, dim1 ) )], x[add( add( shl( i, 1 ), i_mult( 2 * 5, dim1 ) ), 1 )] ); + cplxMpy4_8_1( x12, x13, x[add( shl( i, 1 ), i_mult( 2 * 6, dim1 ) )], x[add( add( shl( i, 1 ), i_mult( 2 * 6, dim1 ) ), 1 )] ); + cplxMpy4_8_1( x14, x15, x[add( shl( i, 1 ), i_mult( 2 * 7, dim1 ) )], x[add( add( shl( i, 1 ), i_mult( 2 * 7, dim1 ) ), 1 )] ); } ELSE { - cplxMpy4_8_0( x02, x03, x[2 * i + 2 * 1 * dim1], x[2 * i + 2 * 1 * dim1 + 1], W[sc * i + sc * 1 * dim1 - Woff], W[sc * i + sc * 1 * dim1 + 1 - Woff] ); - cplxMpy4_8_0( x04, x05, x[2 * i + 2 * 2 * dim1], x[2 * i + 2 * 2 * dim1 + 1], W[sc * i + sc * 2 * dim1 - Woff], W[sc * i + sc * 2 * dim1 + 1 - Woff] ); - cplxMpy4_8_0( x06, x07, x[2 * i + 2 * 3 * dim1], x[2 * i + 2 * 3 * dim1 + 1], W[sc * i + sc * 3 * dim1 - Woff], W[sc * i + sc * 3 * dim1 + 1 - Woff] ); - cplxMpy4_8_0( x08, x09, x[2 * i + 2 * 4 * dim1], x[2 * i + 2 * 4 * dim1 + 1], W[sc * i + sc * 4 * dim1 - Woff], W[sc * i + sc * 4 * dim1 + 1 - Woff] ); - cplxMpy4_8_0( x10, x11, x[2 * i + 2 * 5 * dim1], x[2 * i + 2 * 5 * dim1 + 1], W[sc * i + sc * 5 * dim1 - Woff], W[sc * i + sc * 5 * dim1 + 1 - Woff] ); - cplxMpy4_8_0( x12, x13, x[2 * i + 2 * 6 * dim1], x[2 * i + 2 * 6 * dim1 + 1], W[sc * i + sc * 6 * dim1 - Woff], W[sc * i + sc * 6 * dim1 + 1 - Woff] ); - cplxMpy4_8_0( x14, x15, x[2 * i + 2 * 7 * dim1], x[2 * i + 2 * 7 * dim1 + 1], W[sc * i + sc * 7 * dim1 - Woff], W[sc * i + sc * 7 * dim1 + 1 - Woff] ); + cplxMpy4_8_0( x02, x03, x[add( shl( i, 1 ), i_mult( 2 * 1, dim1 ) )], x[add( add( shl( i, 1 ), i_mult( 2 * 1, dim1 ) ), 1 )], W[sub( add( i_mult( sc, i ), i_mult( sc, i_mult( 1, dim1 ) ) ), Woff )], W[sub( add( add( i_mult( sc, i ), i_mult( sc, i_mult( 1, dim1 ) ) ), 1 ), Woff )] ); + cplxMpy4_8_0( x04, x05, x[add( shl( i, 1 ), i_mult( 2 * 2, dim1 ) )], x[add( add( shl( i, 1 ), i_mult( 2 * 2, dim1 ) ), 1 )], W[sub( add( i_mult( sc, i ), i_mult( sc, i_mult( 2, dim1 ) ) ), Woff )], W[sub( add( add( i_mult( sc, i ), i_mult( sc, i_mult( 2, dim1 ) ) ), 1 ), Woff )] ); + cplxMpy4_8_0( x06, x07, x[add( shl( i, 1 ), i_mult( 2 * 3, dim1 ) )], x[add( add( shl( i, 1 ), i_mult( 2 * 3, dim1 ) ), 1 )], W[sub( add( i_mult( sc, i ), i_mult( sc, i_mult( 3, dim1 ) ) ), Woff )], W[sub( add( add( i_mult( sc, i ), i_mult( sc, i_mult( 3, dim1 ) ) ), 1 ), Woff )] ); + cplxMpy4_8_0( x08, x09, x[add( shl( i, 1 ), i_mult( 2 * 4, dim1 ) )], x[add( add( shl( i, 1 ), i_mult( 2 * 4, dim1 ) ), 1 )], W[sub( add( i_mult( sc, i ), i_mult( sc, i_mult( 4, dim1 ) ) ), Woff )], W[sub( add( add( i_mult( sc, i ), i_mult( sc, i_mult( 4, dim1 ) ) ), 1 ), Woff )] ); + cplxMpy4_8_0( x10, x11, x[add( shl( i, 1 ), i_mult( 2 * 5, dim1 ) )], x[add( add( shl( i, 1 ), i_mult( 2 * 5, dim1 ) ), 1 )], W[sub( add( i_mult( sc, i ), i_mult( sc, i_mult( 5, dim1 ) ) ), Woff )], W[sub( add( add( i_mult( sc, i ), i_mult( sc, i_mult( 5, dim1 ) ) ), 1 ), Woff )] ); + cplxMpy4_8_0( x12, x13, x[add( shl( i, 1 ), i_mult( 2 * 6, dim1 ) )], x[add( add( shl( i, 1 ), i_mult( 2 * 6, dim1 ) ), 1 )], W[sub( add( i_mult( sc, i ), i_mult( sc, i_mult( 6, dim1 ) ) ), Woff )], W[sub( add( add( i_mult( sc, i ), i_mult( sc, i_mult( 6, dim1 ) ) ), 1 ), Woff )] ); + cplxMpy4_8_0( x14, x15, x[add( shl( i, 1 ), i_mult( 2 * 7, dim1 ) )], x[add( add( shl( i, 1 ), i_mult( 2 * 7, dim1 ) ), 1 )], W[sub( add( i_mult( sc, i ), i_mult( sc, i_mult( 7, dim1 ) ) ), Woff )], W[sub( add( add( i_mult( sc, i ), i_mult( sc, i_mult( 7, dim1 ) ) ), 1 ), Woff )] ); } t00 = L_shr( L_add( x00, x08 ), SCALEFACTORN2 - 1 ); t02 = L_shr( L_sub( x00, x08 ), SCALEFACTORN2 - 1 ); @@ -6639,37 +6639,37 @@ static void BASOP_fftN2( s13 = Mpy_32_16_1( L_sub( t03, t01 ), C81_FX ); s15 = Mpy_32_16_1( L_add( t01, t03 ), C82_FX ); - re[sx * i + sx * 0 * dim1] = L_add( s00, s02 ); + re[add( i_mult( sx, i ), i_mult( sx, i_mult( 0, dim1 ) ) )] = L_add( s00, s02 ); move32(); - im[sx * i + sx * 0 * dim1] = L_add( s01, s03 ); + im[add( i_mult( sx, i ), i_mult( sx, i_mult( 0, dim1 ) ) )] = L_add( s01, s03 ); move32(); - re[sx * i + sx * 1 * dim1] = L_add( s10, s12 ); + re[add( i_mult( sx, i ), i_mult( sx, i_mult( 1, dim1 ) ) )] = L_add( s10, s12 ); move32(); - im[sx * i + sx * 1 * dim1] = L_add( s11, s13 ); + im[add( i_mult( sx, i ), i_mult( sx, i_mult( 1, dim1 ) ) )] = L_add( s11, s13 ); move32(); - re[sx * i + sx * 2 * dim1] = L_sub( s04, s06 ); + re[add( i_mult( sx, i ), i_mult( sx, i_mult( 2, dim1 ) ) )] = L_sub( s04, s06 ); move32(); - im[sx * i + sx * 2 * dim1] = L_sub( s05, s07 ); + im[add( i_mult( sx, i ), i_mult( sx, i_mult( 2, dim1 ) ) )] = L_sub( s05, s07 ); move32(); - re[sx * i + sx * 3 * dim1] = L_add( s08, s14 ); + re[add( i_mult( sx, i ), i_mult( sx, i_mult( 3, dim1 ) ) )] = L_add( s08, s14 ); move32(); - im[sx * i + sx * 3 * dim1] = L_add( s09, s15 ); + im[add( i_mult( sx, i ), i_mult( sx, i_mult( 3, dim1 ) ) )] = L_add( s09, s15 ); move32(); - re[sx * i + sx * 4 * dim1] = L_sub( s00, s02 ); + re[add( i_mult( sx, i ), i_mult( sx, i_mult( 4, dim1 ) ) )] = L_sub( s00, s02 ); move32(); - im[sx * i + sx * 4 * dim1] = L_sub( s01, s03 ); + im[add( i_mult( sx, i ), i_mult( sx, i_mult( 4, dim1 ) ) )] = L_sub( s01, s03 ); move32(); - re[sx * i + sx * 5 * dim1] = L_sub( s10, s12 ); + re[add( i_mult( sx, i ), i_mult( sx, i_mult( 5, dim1 ) ) )] = L_sub( s10, s12 ); move32(); - im[sx * i + sx * 5 * dim1] = L_sub( s11, s13 ); + im[add( i_mult( sx, i ), i_mult( sx, i_mult( 5, dim1 ) ) )] = L_sub( s11, s13 ); move32(); - re[sx * i + sx * 6 * dim1] = L_add( s04, s06 ); + re[add( i_mult( sx, i ), i_mult( sx, i_mult( 6, dim1 ) ) )] = L_add( s04, s06 ); move32(); - im[sx * i + sx * 6 * dim1] = L_add( s05, s07 ); + im[add( i_mult( sx, i ), i_mult( sx, i_mult( 6, dim1 ) ) )] = L_add( s05, s07 ); move32(); - re[sx * i + sx * 7 * dim1] = L_sub( s08, s14 ); + re[add( i_mult( sx, i ), i_mult( sx, i_mult( 7, dim1 ) ) )] = L_sub( s08, s14 ); move32(); - im[sx * i + sx * 7 * dim1] = L_sub( s09, s15 ); + im[add( i_mult( sx, i ), i_mult( sx, i_mult( 7, dim1 ) ) )] = L_sub( s09, s15 ); move32(); } diff --git a/lib_com/fft_evs.c b/lib_com/fft_evs.c index 9f98faaa4..3ff15a5c6 100644 --- a/lib_com/fft_evs.c +++ b/lib_com/fft_evs.c @@ -555,12 +555,15 @@ void fft16( Word32 *re, Word32 *im, Word16 s, Word16 bScale ) FOR( i = 0; i < 16; i++ ) { inp_data[i] = CL_form( re[s * i], im[s * i] ); + move64(); } fft16_with_cmplx_data( inp_data, bScale ); FOR( i = 0; i < 16; i++ ) { re[s * i] = CL_Extract_real( inp_data[i] ); + move32(); im[s * i] = CL_Extract_imag( inp_data[i] ); + move32(); } } } @@ -1927,6 +1930,7 @@ static void fftN2( cmplx s0, s1, s2, s3, s4, s5, s6, s7; i = 0; + move16(); { y0 = CL_shr( x_cmplx[i + 0 * dim1], 1 ); y1 = CL_shr( x_cmplx[i + 1 * dim1], 1 ); @@ -2463,7 +2467,9 @@ void BASOP_rfft( Word32 *x, Word16 sizeOfFft, Word16 *scale, Word16 isign ) case 320: case 640: c1 = FFTC( 0x66666680 ); + move16(); c2 = FFTC( 0x99999980 ); + move16(); FOR( i = 0; i < sizeOfFft2; i++ ) { x[2 * i] = Mpy_32_xx( x[2 * i], c1 ); diff --git a/lib_com/fft_fx_evs.c b/lib_com/fft_fx_evs.c index b36bac55f..fa25f6847 100644 --- a/lib_com/fft_fx_evs.c +++ b/lib_com/fft_fx_evs.c @@ -808,10 +808,8 @@ static void cftbsub_fx( move32(); a[j1] = x0r; move32(); - move32(); a[j1 + 1] = x0i; move32(); - move32(); } } } @@ -842,7 +840,7 @@ static void rftfsub_fx( { k = sub( n, j ); kk = add( kk, ks ); - wkr = sub( 8192, c[nc - kk] ); + wkr = sub( 8192, c[sub( nc, kk )] ); wki = c[kk]; move16(); xr = L_sub( a[j], a[k] ); @@ -888,7 +886,7 @@ static void rftbsub_fx( { k = sub( n, j ); kk = add( kk, ks ); - wkr = sub( 8192, c[nc - kk] ); + wkr = sub( 8192, c[sub( nc, kk )] ); wki = c[kk]; move16(); xr = L_sub( a[j], a[k] ); @@ -936,8 +934,8 @@ static void dctsub_fx( { k = sub( n, j ); kk = add( kk, ks ); - wkr = sub( c[kk], c[nc - kk] ); - wki = add( c[kk], c[nc - kk] ); + wkr = sub( c[kk], c[sub( nc, kk )] ); + wki = add( c[kk], c[sub( nc, kk )] ); xr = L_sub( Mult_32_16( L_shl( a[j], 1 ), wki ), Mult_32_16( L_shl( a[k], 1 ), wkr ) ); a[j] = L_add( Mult_32_16( L_shl( a[j], 1 ), wkr ), Mult_32_16( L_shl( a[k], 1 ), wki ) ); move32(); @@ -945,6 +943,7 @@ static void dctsub_fx( move32(); } a[m] = Mult_32_16( L_shl( a[m], 1 ), c[0] ); + move16(); } /*-----------------------------------------------------------------* @@ -967,7 +966,9 @@ void edct2_fx( Word32 xr; *q = Exp16Array( n, in ); + move16(); *q = add( *q, 6 ); + move16(); FOR( j = 0; j < n; j++ ) { a[j] = L_shl( (Word32) in[j], *q ); @@ -1264,6 +1265,7 @@ static void fft5_32_16fx( Word32 L_tmp; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif i0 = Idx[0]; move16(); @@ -1509,7 +1511,6 @@ static void fft32_5_16fx( Word16 i, id, jd; Word16 z[64]; - move16(); /*penalty for 1 ptr init */ FOR( i = 0; i < 32; i++ ) { id = Idx[i]; @@ -1522,7 +1523,6 @@ static void fft32_5_16fx( cdftForw_16fx( 64, z, Ip_fft32, w_fft32_16fx ); - move16(); /*penalty for 1 ptr init */ FOR( i = 0; i < 32; i++ ) { jd = Odx_fft32_5[i]; @@ -1604,7 +1604,6 @@ void DoRTFT128_16fx( Word16 i; Word16 z[256]; - move16(); /*penalty for 1 ptr init */ FOR( i = 0; i < 128; i++ ) { z[2 * i] = x[i]; @@ -1619,8 +1618,6 @@ void DoRTFT128_16fx( move16(); y[0] = z[1]; move16(); - move16(); /*penalty for 1 ptr init */ - move16(); /*penalty for 1 ptr init */ FOR( i = 1; i < 128; i++ ) { x[128 - i] = z[2 * i]; @@ -1840,6 +1837,7 @@ static void cftfsub_16fx( Word16 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif l = 2; @@ -1940,6 +1938,7 @@ static void cft1st_16fx( Word32 L_tmp; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif #ifdef BASOP_NOGLOB @@ -2042,20 +2041,24 @@ static void cft1st_16fx( #ifdef BASOP_NOGLOB a[10] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); tmp = add_o( x0r, x0i, &Overflow ); L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */ a[11] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /* Q(Qx+Q_edct) */ + move16(); x0r = add_o( x3i, x1r, &Overflow ); x0i = sub_o( x3r, x1i, &Overflow ); tmp = sub_o( x0i, x0r, &Overflow ); L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */ a[14] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); tmp = add_o( x0i, x0r, &Overflow ); L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */ a[15] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); #else a[10] = round_fx( L_shl( L_tmp, 1 ) ); /*Q(Qx+Q_edct) */ @@ -2114,30 +2117,36 @@ static void cft1st_16fx( L_tmp = Mult_32_16( wk2r, x0r ); /*Q(15+Qx+Q_edct) */ L_tmp = Msub_32_16( L_tmp, wk2i, x0i ); /*Q(15+Qx+Q_edct) */ a[j + 4] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); L_tmp = Mult_32_16( wk2r, x0i ); /*Q(15+Qx+Q_edct) */ L_tmp = Madd_32_16( L_tmp, wk2i, x0r ); /*Q(15+Qx+Q_edct) */ a[j + 5] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); x0r = sub_o( x1r, x3i, &Overflow ); x0i = add_o( x1i, x3r, &Overflow ); L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */ L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */ a[j + 2] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */ L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */ a[j + 3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); x0r = add_o( x1r, x3i, &Overflow ); x0i = sub_o( x1i, x3r, &Overflow ); L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */ L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */ a[j + 6] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */ L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */ a[j + 7] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); wk1r = w[k2 + 2]; move32(); @@ -2168,21 +2177,25 @@ static void cft1st_16fx( L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */ L_tmp = Msub_32_16( L_tmp, wk2r, x0i ); /*Q(15+Qx+Q_edct) */ a[j + 12] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); tmp = negate( x0i ); L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */ L_tmp = Madd_32_16( L_tmp, wk2r, x0r ); /*Q(15+Qx+Q_edct) */ a[j + 13] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); x0r = sub_o( x1r, x3i, &Overflow ); x0i = add_o( x1i, x3r, &Overflow ); L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */ L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */ a[j + 10] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */ L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */ a[j + 11] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); x0r = add_o( x1r, x3i, &Overflow ); x0i = sub_o( x1i, x3r, &Overflow ); @@ -2190,10 +2203,12 @@ static void cft1st_16fx( L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */ L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */ a[j + 14] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */ L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */ a[j + 15] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); #else x0r = add( a[j], a[j + 2] ); x0i = add( a[j + 1], a[j + 3] ); @@ -2318,9 +2333,9 @@ static void cftmdl_16fx( Word32 L_x0r, L_x0i; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif m = shl( l, 2 ); - move16(); FOR( j = 0; j < l; j += 2 ) { #ifdef BASOP_NOGLOB @@ -2413,20 +2428,24 @@ static void cftmdl_16fx( tmp = sub_o( x0r, x0i, &Overflow ); L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */ a[j1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); tmp = add_o( x0r, x0i, &Overflow ); L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */ a[j1 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); x0r = add_o( x3i, x1r, &Overflow ); x0i = sub_o( x3r, x1i, &Overflow ); tmp = sub_o( x0i, x0r, &Overflow ); L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */ a[j3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); tmp = add_o( x0i, x0r, &Overflow ); L_tmp = Mult_32_16( wk1r, tmp ); /*Q(15+Qx+Q_edct) */ a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); #else j1 = add( j, l ); j2 = add( j1, l ); @@ -2517,10 +2536,12 @@ static void cftmdl_16fx( L_tmp = Mult_32_16( wk2r, x0r ); /*Q(15+Qx+Q_edct) */ L_tmp = Msub_32_16( L_tmp, wk2i, x0i ); /*Q(15+Qx+Q_edct) */ a[j2] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); L_tmp = Mult_32_16( wk2r, x0i ); /*Q(15+Qx+Q_edct) */ L_tmp = Madd_32_16( L_tmp, wk2i, x0r ); /*Q(15+Qx+Q_edct) */ a[j2 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); x0r = sub_o( x1r, x3i, &Overflow ); x0i = add_o( x1i, x3r, &Overflow ); @@ -2528,10 +2549,12 @@ static void cftmdl_16fx( L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */ L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */ a[j1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */ L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */ a[j1 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); L_x0r = L_add( (Word32) x1r, (Word32) x3i ); L_x0i = L_sub( (Word32) x1i, (Word32) x3r ); @@ -2540,10 +2563,12 @@ static void cftmdl_16fx( L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */ L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */ a[j3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */ L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */ a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); #else j1 = add( j, l ); j2 = add( j1, l ); @@ -2641,11 +2666,13 @@ static void cftmdl_16fx( L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */ L_tmp = Msub_32_16( L_tmp, wk2r, x0i ); /*Q(15+Qx+Q_edct) */ a[j2] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); tmp = negate( x0i ); L_tmp = Mult_32_16( wk2i, tmp ); /*Q(15+Qx+Q_edct) */ L_tmp = Madd_32_16( L_tmp, wk2r, x0r ); /*Q(15+Qx+Q_edct) */ a[j2 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); x0r = sub_o( x1r, x3i, &Overflow ); x0i = add_o( x1i, x3r, &Overflow ); @@ -2653,10 +2680,12 @@ static void cftmdl_16fx( L_tmp = Mult_32_16( wk1r, x0r ); /*Q(15+Qx+Q_edct) */ L_tmp = Msub_32_16( L_tmp, wk1i, x0i ); /*Q(15+Qx+Q_edct) */ a[j1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); L_tmp = Mult_32_16( wk1r, x0i ); /*Q(15+Qx+Q_edct) */ L_tmp = Madd_32_16( L_tmp, wk1i, x0r ); /*Q(15+Qx+Q_edct) */ a[j1 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); x0r = add_o( x1r, x3i, &Overflow ); x0i = sub_o( x1i, x3r, &Overflow ); @@ -2664,10 +2693,12 @@ static void cftmdl_16fx( L_tmp = Mult_32_16( wk3r, x0r ); /*Q(15+Qx+Q_edct) */ L_tmp = Msub_32_16( L_tmp, wk3i, x0i ); /*Q(15+Qx+Q_edct) */ a[j3] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); L_tmp = Mult_32_16( wk3r, x0i ); /*Q(15+Qx+Q_edct) */ L_tmp = Madd_32_16( L_tmp, wk3i, x0r ); /*Q(15+Qx+Q_edct) */ a[j3 + 1] = round_fx_o( L_shl_o( L_tmp, 1, &Overflow ), &Overflow ); /*Q(Qx+Q_edct) */ + move16(); #else j1 = add( j, l ); j2 = add( j1, l ); @@ -2843,6 +2874,7 @@ void fft3_fx( const Word16 X[], Word16 Y[], const Word16 n ) acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); *RY++ = round_fx( acc ); /* bit growth = 1 (compensated by non-fractional mode MAC). */ + move16(); acc = L_shl( *IZ0--, 15 ); acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); @@ -2850,6 +2882,7 @@ void fft3_fx( const Word16 X[], Word16 Y[], const Word16 n ) acc = L_msu0( acc, *RZ2++, t_sin[s2_ind] ); acc = L_mac0( acc, *IZ2--, t_sin[c2_ind] ); *IY-- = round_fx( acc ); + move16(); c1_ind = add( c1_ind, c1_step ); s1_ind = add( s1_ind, s1_step ); @@ -2867,6 +2900,7 @@ void fft3_fx( const Word16 X[], Word16 Y[], const Word16 n ) acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); *RY++ = round_fx( acc ); + move16(); acc = L_shl( *IZ0--, 15 ); acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); @@ -2874,6 +2908,7 @@ void fft3_fx( const Word16 X[], Word16 Y[], const Word16 n ) acc = L_msu0( acc, *RZ2++, t_sin[s2_ind] ); acc = L_msu0( acc, *IZ2--, t_sin[c2_ind] ); *IY-- = round_fx( acc ); + move16(); c1_ind = add( c1_ind, c1_step ); s1_ind = add( s1_ind, s1_step ); @@ -2886,11 +2921,14 @@ void fft3_fx( const Word16 X[], Word16 Y[], const Word16 n ) acc = L_mac0( acc, *RZ1, t_sin[c1_ind] ); acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); *RY++ = round_fx( acc ); + move16(); acc = 0; + move32(); acc = L_msu0( acc, *RZ1--, t_sin[s1_ind] ); acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); *IY-- = round_fx( acc ); + move16(); IZ0++; IZ1++; IZ2++; @@ -2910,6 +2948,7 @@ void fft3_fx( const Word16 X[], Word16 Y[], const Word16 n ) acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); *RY++ = round_fx( acc ); + move16(); acc = L_mult0( *IZ0++, -32768 ); acc = L_msu0( acc, *RZ1--, t_sin[s1_ind] ); @@ -2917,6 +2956,7 @@ void fft3_fx( const Word16 X[], Word16 Y[], const Word16 n ) acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); *IY-- = round_fx( acc ); + move16(); c1_ind = add( c1_ind, c1_step ); s1_ind = add( s1_ind, s1_step ); @@ -2935,6 +2975,7 @@ void fft3_fx( const Word16 X[], Word16 Y[], const Word16 n ) acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); *RY++ = round_fx( acc ); + move16(); acc = L_mult0( *IZ0++, -32768 ); acc = L_msu0( acc, *RZ1--, t_sin[s1_ind] ); @@ -2942,6 +2983,7 @@ void fft3_fx( const Word16 X[], Word16 Y[], const Word16 n ) acc = L_mac0( acc, *RZ2--, t_sin[s2_ind] ); acc = L_mac0( acc, *IZ2++, t_sin[c2_ind] ); *IY-- = round_fx( acc ); + move16(); c1_ind = sub( c1_ind, c1_step ); s1_ind = sub( s1_ind, s1_step ); @@ -2954,11 +2996,13 @@ void fft3_fx( const Word16 X[], Word16 Y[], const Word16 n ) acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); *RY++ = round_fx( acc ); + move16(); acc = L_deposit_l( 0 ); acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); acc = L_mac0( acc, *RZ2++, t_sin[s2_ind] ); *IY-- = round_fx( acc ); + move16(); IZ0--; /* Just decrement the address counter */ IZ1--; IZ2--; @@ -2978,6 +3022,7 @@ void fft3_fx( const Word16 X[], Word16 Y[], const Word16 n ) acc = L_msu0( acc, *RZ2, t_sin[c2_ind] ); acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); *RY++ = round_fx( acc ); + move16(); acc = L_shl( *IZ0--, 15 ); acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); @@ -2985,6 +3030,7 @@ void fft3_fx( const Word16 X[], Word16 Y[], const Word16 n ) acc = L_mac0( acc, *RZ2++, t_sin[s2_ind] ); acc = L_msu0( acc, *IZ2--, t_sin[c2_ind] ); *IY-- = round_fx( acc ); + move16(); c1_ind = sub( c1_ind, c1_step ); s1_ind = sub( s1_ind, s1_step ); @@ -3002,6 +3048,7 @@ void fft3_fx( const Word16 X[], Word16 Y[], const Word16 n ) acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); *RY++ = round_fx( acc ); + move16(); acc = L_shl( *IZ0--, 15 ); acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); @@ -3009,6 +3056,7 @@ void fft3_fx( const Word16 X[], Word16 Y[], const Word16 n ) acc = L_mac0( acc, *RZ2++, t_sin[s2_ind] ); acc = L_mac0( acc, *IZ2--, t_sin[c2_ind] ); *IY-- = round_fx( acc ); + move16(); c1_ind = sub( c1_ind, c1_step ); s1_ind = sub( s1_ind, s1_step ); @@ -3021,6 +3069,7 @@ void fft3_fx( const Word16 X[], Word16 Y[], const Word16 n ) acc = L_msu0( acc, *RZ1, t_sin[c1_ind] ); acc = L_mac0( acc, *RZ2, t_sin[c2_ind] ); *RY = round_fx( acc ); + move16(); return; } @@ -3151,6 +3200,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_msu0( acc, *IZ1--, t_sin[s1_ind] ); acc = L_msu0( acc, *IZ2++, t_sin[s2_ind] ); *RY1++ = round_fx( acc ); + move16(); c0_ind = sub( c0_ind, step ); s0_ind = add( s0_ind, step ); @@ -3170,6 +3220,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); *RY1++ = round_fx( acc ); + move16(); acc = L_mult0( *IZ0--, t_sin[c0_ind] ); acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); @@ -3178,6 +3229,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_mac0( acc, *RZ1++, t_sin[s1_ind] ); acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); *IY1-- = round_fx( acc ); + move16(); c0_ind = sub( c0_ind, step ); s0_ind = add( s0_ind, step ); @@ -3198,6 +3250,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); *RY1++ = round_fx( acc ); + move16(); acc = L_mult0( *IZ0--, t_sin[c0_ind] ); acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); @@ -3206,6 +3259,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_mac0( acc, *RZ1++, t_sin[s1_ind] ); acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); *IY1-- = round_fx( acc ); + move16(); c0_ind = sub( c0_ind, step ); s0_ind = add( s0_ind, step ); @@ -3223,6 +3277,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_msu0( acc, *IZ1, t_sin[s1_ind] ); acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); *RY1++ = round_fx( acc ); + move16(); /* Construction of Y2 */ c0_ind = T_SIN_PI_2; @@ -3246,6 +3301,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_mac0( acc, *IZ1--, t_sin[s1_ind] ); acc = L_mac0( acc, *IZ2++, t_sin[s2_ind] ); *RY2++ = round_fx( acc ); + move16(); c0_ind = sub( c0_ind, step2 ); s0_ind = add( s0_ind, step2 ); @@ -3265,6 +3321,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); *RY2++ = round_fx( acc ); + move16(); acc = L_mult0( *IZ0--, t_sin[c0_ind] ); acc = L_msu0( acc, *IZ1--, t_sin[c1_ind] ); @@ -3273,6 +3330,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); acc = L_mac0( acc, *RZ2--, t_sin[s2_ind] ); *IY2-- = round_fx( acc ); + move16(); c0_ind = sub( c0_ind, step2 ); s0_ind = add( s0_ind, step2 ); @@ -3293,6 +3351,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); acc = L_mac0( acc, *IZ2, t_sin[s2_ind] ); *RY2++ = round_fx( acc ); + move16(); acc = L_mult0( *IZ0--, t_sin[c0_ind] ); acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); @@ -3301,6 +3360,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); acc = L_mac0( acc, *RZ2--, t_sin[s2_ind] ); *IY2-- = round_fx( acc ); + move16(); c0_ind = sub( c0_ind, step2 ); s0_ind = add( s0_ind, step2 ); @@ -3321,6 +3381,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); *RY2++ = round_fx( acc ); + move16(); acc = L_mult0( *IZ0--, t_sin[c0_ind] ); acc = L_mac0( acc, *IZ1--, t_sin[c1_ind] ); @@ -3329,6 +3390,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); *IY2-- = round_fx( acc ); + move16(); c0_ind = sub( c0_ind, step2 ); s0_ind = add( s0_ind, step2 ); @@ -3349,6 +3411,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); *RY2++ = round_fx( acc ); + move16(); acc = L_mult0( *IZ1--, t_sin[c1_ind] ); acc = L_msu0( acc, *IZ0--, t_sin[c0_ind] ); @@ -3357,6 +3420,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_msu0( acc, *RZ1++, t_sin[s1_ind] ); acc = L_msu0( acc, *RZ2--, t_sin[s2_ind] ); *IY2-- = round_fx( acc ); + move16(); c0_ind = add( c0_ind, step2 ); s0_ind = sub( s0_ind, step2 ); @@ -3374,6 +3438,7 @@ void ifft3_fx( const Word16 Z[], Word16 X[], const Word16 n ) acc = L_mac0( acc, *IZ1, t_sin[s1_ind] ); acc = L_msu0( acc, *IZ2, t_sin[s2_ind] ); *RY2++ = round_fx( acc ); + move16(); /* Compute the inverse FFT for all 3 blocks. */ RY0 = &Y[0]; /* Rewind the pointers. */ diff --git a/lib_com/fft_rel.c b/lib_com/fft_rel.c index 0891367c6..1cf917f8f 100644 --- a/lib_com/fft_rel.c +++ b/lib_com/fft_rel.c @@ -317,6 +317,7 @@ void fft_rel_fx( Word16 *xi2, *xi3, *xi4, *xi1; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif @@ -357,7 +358,7 @@ void fft_rel_fx( move16(); x1 = &x[1]; move16(); - FOR( i = 0; i < n / 2; i++ ) + FOR( i = 0; i < shr( n, 1 ); i++ ) { xt = *x0; move16(); @@ -400,7 +401,7 @@ void fft_rel_fx( n2 = shl( n4, 1 ); n1 = shl( n2, 1 ); - step = N_MAX_SAS / n1; + step = idiv1616( N_MAX_SAS, n1 ); x0 = x; x1 = x + n2; @@ -679,6 +680,8 @@ void fft_rel_fx32( Word32 *x2even, *x2odd; Word32 temp[512]; + test(); + test(); IF( EQ_16( n, 128 ) || EQ_16( n, 256 ) || EQ_16( n, 512 ) ) { idx = fft256_read_indexes; @@ -690,10 +693,14 @@ void fft_rel_fx32( FOR( i = 0; i < 64; i++ ) { j = *idx++; + move16(); k = *idx++; + move16(); *x2++ = L_add( x[shr( j, 1 )], x[shr( k, 1 )] ); + move16(); *x2++ = L_sub( x[shr( j, 1 )], x[shr( k, 1 )] ); + move16(); } } ELSE IF( EQ_16( n, 256 ) ) @@ -702,10 +709,14 @@ void fft_rel_fx32( FOR( i = 0; i < 128; i++ ) { j = *idx++; + move16(); k = *idx++; + move16(); *x2++ = L_add( x[j], x[k] ); + move16(); *x2++ = L_sub( x[j], x[k] ); + move16(); } } ELSE IF( EQ_16( n, 512 ) ) @@ -721,11 +732,15 @@ void fft_rel_fx32( idx++; *x2even++ = L_add( x[j], x[k] ); + move16(); *x2even++ = L_sub( x[j], x[k] ); - j++; - k++; + move16(); + j = add( j, 1 ); + k = add( k, 1 ); *x2odd++ = L_add( x[j], x[k] ); + move16(); *x2odd++ = L_sub( x[j], x[k] ); + move16(); } } @@ -746,11 +761,15 @@ void fft_rel_fx32( FOR( i = 0; i < n; i += 4 ) { *x2++ = L_add( *x0++, *x1 ); /* x[i] = xt + x[i+n2]; */ + move16(); *x2++ = *x0; + move16(); x0--; *x2++ = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2]; */ + move16(); x1++; *x2++ = L_negate( *x1 ); /* x[i+n2+n4] = -x[i+n2+n4]; */ + move16(); x0 += 4; x1 += 3; /* x1 has already advanced */ @@ -764,23 +783,27 @@ void fft_rel_fx32( *-----------------------------------------------------------------*/ j = 0; + move16(); x0 = &x[0]; - FOR( i = 0; i < n - 1; i++ ) + FOR( i = 0; i < sub( n, 1 ); i++ ) { IF( LT_16( i, j ) ) { xt = x[j]; + move32(); x[j] = *x0; + move32(); *x0 = xt; + move32(); } x0++; - k = n / 2; + k = shr( n, 1 ); WHILE( LE_16( k, j ) ) { - j -= k; + j = sub( j, k ); k = shr( k, 1 ); } - j += k; + j = add( j, k ); } /*-----------------------------------------------------------------* @@ -789,10 +812,12 @@ void fft_rel_fx32( x0 = &x[0]; x1 = &x[1]; - FOR( i = 0; i < n / 2; i++ ) + FOR( i = 0; i < shr( n, 1 ); i++ ) { *x1 = L_sub( *x0, *x1 ); + move32(); *x0 = L_sub( L_shl( *x0, 1 ), *x1 ); + move32(); x0++; x0++; @@ -813,9 +838,12 @@ void fft_rel_fx32( FOR( i = 0; i < n; i += 4 ) { - *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2]; */ + *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2]; */ + move32(); *x0 = L_sub( L_shl( *x0, 1 ), *x1++ ); /* x[i] = xt + x[i+n2]; */ - *x1 = L_negate( *x1 ); /* x[i+n2+n4] = -x[i+n2+n4]; */ + move32(); + *x1 = L_negate( *x1 ); /* x[i+n2+n4] = -x[i+n2+n4]; */ + move32(); x0 += 4; x1 += 3; /* x1 has already advanced */ @@ -835,10 +863,14 @@ void fft_rel_fx32( *-----------------------------------------------------------------*/ n4 = 1; + move16(); n2 = 2; + move16(); n1 = 4; + move16(); step = N_MAX_DIV4; + move16(); FOR( k = 3; k <= m; k++ ) { @@ -853,9 +885,12 @@ void fft_rel_fx32( FOR( i = 0; i < n; i += n1 ) { - *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2]; */ + *x1 = L_sub( *x0, *x1 ); /* x[i+n2] = xt - x[i+n2]; */ + move32(); *x0 = L_sub( L_shl( *x0, 1 ), *x1 ); /* x[i] = xt + x[i+n2]; */ - *x2 = L_negate( *x2 ); /* x[i+n2+n4] = -x[i+n2+n4]; */ + move32(); + *x2 = L_negate( *x2 ); /* x[i+n2+n4] = -x[i+n2+n4]; */ + move32(); s = sincos_t_ext_fx; c = s + N_MAX_FFT / 4; /* 1024/4 = 256, 256/4=64 */ @@ -878,9 +913,13 @@ void fft_rel_fx32( t2 = L_sub( Mpy_32_16_1( *xi3, *s ), Mpy_32_16_1( *xi4, *c ) ); /* t2 = *xi3**(pt_s+ind) - *xi4**(pt_c+ind); */ *xi4 = L_sub( *xi2, t2 ); + move32(); *xi2 = L_sub( *xi1, t1 ); + move32(); *xi1 = L_sub( L_shl( *xi1, 1 ), *xi2 ); + move32(); *xi3 = L_negate( L_add( L_shl( t2, 1 ), *xi4 ) ); + move32(); } x1 += n1; diff --git a/lib_com/fft_rel_fx.c b/lib_com/fft_rel_fx.c index 405c500b5..846099592 100644 --- a/lib_com/fft_rel_fx.c +++ b/lib_com/fft_rel_fx.c @@ -85,6 +85,7 @@ static void c_fft_fx( const Word16 *table_ptr; const Word16 *input_ptr1, *input_ptr2, *input_ptr3, *input_ptr4; Word16 shift = 0; + move16(); /* Setup Reorder Variables */ table_ptr = NULL; table_ptr = FFT_REORDER_1024; @@ -92,18 +93,23 @@ static void c_fft_fx( { case 1024: shift = 0; + move16(); BREAK; case 512: shift = 1; + move16(); BREAK; case 256: shift = 2; + move16(); BREAK; case 128: shift = 3; + move16(); BREAK; case 64: shift = 4; + move16(); BREAK; } /* The FFT part */ diff --git a/lib_com/gs_bitallocation_fx.c b/lib_com/gs_bitallocation_fx.c index b3da54d2a..112d94e51 100644 --- a/lib_com/gs_bitallocation_fx.c +++ b/lib_com/gs_bitallocation_fx.c @@ -579,7 +579,7 @@ void bands_and_bit_alloc_fx( #endif test(); - if ( EQ_16( bwidth, NB ) && GT_16( nb_bands_max, 10 ) ) + if ( ( bwidth == NB ) && GT_16( nb_bands_max, 10 ) ) { nb_bands_max = 10; move16(); @@ -588,8 +588,10 @@ void bands_and_bit_alloc_fx( /*------------------------------------------------------------------------ * Find extra number of band to code according to bit rate availables *-----------------------------------------------------------------------*/ + test(); WHILE( GE_16( bit_tmp, bit_new_bands ) && LE_16( nb_bands, sub( nb_bands_max, 1 ) ) ) { + test(); bit_tmp = sub( bit_tmp, bit_new_bands ); nb_bands = add( nb_bands, 1 ); } diff --git a/lib_com/gs_bitallocation_ivas_fx.c b/lib_com/gs_bitallocation_ivas_fx.c index 107236eb6..0e7426120 100644 --- a/lib_com/gs_bitallocation_ivas_fx.c +++ b/lib_com/gs_bitallocation_ivas_fx.c @@ -121,7 +121,7 @@ void bands_and_bit_alloc_ivas_fx( #endif nb_tot_bands = MBANDS_GN; move16(); - IF( EQ_16( L_frame, L_FRAME16k ) ) + if ( EQ_16( L_frame, L_FRAME16k ) ) { nb_tot_bands = MBANDS_GN_BITALLOC16k; move16(); @@ -137,6 +137,7 @@ void bands_and_bit_alloc_ivas_fx( #else /* BASOP_NOGLOB */ ener_vec[0] = add( Ener_per_bd_iQ[0], Ener_per_bd_iQ[1] ); /*Q12 */ #endif + move16(); Copy( Ener_per_bd_iQ_tmp + 1, ener_vec, MBANDS_GN - 1 ); /*Q12 */ ener_vec[MBANDS_GN - 1] = ener_vec[MBANDS_GN - 2]; move16(); @@ -162,7 +163,8 @@ void bands_and_bit_alloc_ivas_fx( bit_new_bands = 5; move16(); #if 1 // def ADD_LRTD - IF( GT_32( core_brate, ACELP_16k40 ) && EQ_16( L_frame, L_FRAME16k ) ) + test(); + if ( GT_32( core_brate, ACELP_16k40 ) && EQ_16( L_frame, L_FRAME16k ) ) { bit_new_bands = 7; move16(); @@ -185,8 +187,11 @@ void bands_and_bit_alloc_ivas_fx( i = add( i, 1 ); } - IF( GT_16( element_mode, EVS_MONO ) && EQ_16( coder_type, AUDIO ) && - LE_32( core_brate, STEREO_GSC_BIT_RATE_ALLOC ) && EQ_32( brate_intermed_tbl[i], ACELP_9k60 ) ) /* Bit allocation should be mapped to 8 kb/s instead of 9.6 kb/s in this case */ + test(); + test(); + test(); + if ( ( element_mode > EVS_MONO ) && EQ_16( coder_type, AUDIO ) && + LE_32( core_brate, STEREO_GSC_BIT_RATE_ALLOC ) && EQ_32( brate_intermed_tbl[i], ACELP_9k60 ) ) /* Bit allocation should be mapped to 8 kb/s instead of 9.6 kb/s in this case */ { i = sub( i, 1 ); } @@ -200,7 +205,7 @@ void bands_and_bit_alloc_ivas_fx( test(); test(); - IF( ( EQ_16( coder_type, AUDIO ) || EQ_16( coder_type, INACTIVE ) ) && EQ_16( bwidth, NB ) ) + IF( ( EQ_16( coder_type, AUDIO ) || ( coder_type == INACTIVE ) ) && ( bwidth == NB ) ) { IF( GE_32( core_brate, ACELP_9k60 ) ) { @@ -231,14 +236,17 @@ void bands_and_bit_alloc_ivas_fx( } #if 1 // def ADD_LRTD - IF( L_frame == L_FRAME16k ) + IF( EQ_16( L_frame, L_FRAME16k ) ) { - *bit -= 8; + *bit = sub( *bit, 8 ); + move16(); } - IF( coder_type == INACTIVE && core_brate <= GSC_LRES_GAINQ_LIMIT ) /* can happen only for 2nd channel inactive */ + test(); + IF( coder_type == INACTIVE && LE_32( core_brate, GSC_LRES_GAINQ_LIMIT ) ) /* can happen only for 2nd channel inactive */ { - *bit += GSC_LRES_NB_NITS; + *bit = add( *bit, GSC_LRES_NB_NITS ); + move16(); } IF( *bit > 0 ) @@ -247,7 +255,9 @@ void bands_and_bit_alloc_ivas_fx( { #ifndef FIX_802_1137_1137_GSC_IVAS_FXFLT_DECODING SWB_bit_budget = *bit; // Q0 + move16(); st_band = 5; + move16(); set_l( bits_per_bands, 0, MBANDS_GN_BITALLOC16k ); @@ -262,45 +272,48 @@ void bands_and_bit_alloc_ivas_fx( 2 = GSC bit alloc for tc frame 3 = more music like (should not happen often given music is coded with dft) */ - IF( GSC_IVAS_mode <= 3 ) + if ( LE_16( GSC_IVAS_mode, 3 ) ) { - nb_bands_max -= 6; + nb_bands_max = sub( nb_bands_max, 6 ); } - IF( GSC_IVAS_mode == 2 ) + IF( EQ_16( GSC_IVAS_mode, 2 ) ) { // bit_fracf += 0.1f; bit_fracf = L_add( bit_fracf, 26214 ); // Q18 - nb_bands_max--; + nb_bands_max = sub( nb_bands_max, 1 ); } - IF( GSC_IVAS_mode == 3 ) + IF( EQ_16( GSC_IVAS_mode, 3 ) ) { // bit_fracf -= 0.1f; bit_fracf = L_sub( bit_fracf, 26214 ); // Q18 - nb_bands_max += 3; + nb_bands_max = add( nb_bands_max, 3 ); } /* First find how much we want to share between LF and HF, at low bitrate, a miminum of bits is needed in LF by limitating the number of bands*/ /* Adjust the number of band based on the content type and bitrate */ // nb_bands_adj = 1.0f; nb_bands_adj = ONE_IN_Q18; - IF( GSC_IVAS_mode == 1 && core_brate < GSC_L_RATE_STG ) + move32(); + test(); + test(); + IF( EQ_16( GSC_IVAS_mode, 1 ) && LT_32( core_brate, GSC_L_RATE_STG ) ) { // nb_bands_adj = 0.0125f * SWB_bit_budget - 0.75f; nb_bands_adj = L_sub( Mpy_32_32( 26843546, L_shl( SWB_bit_budget, Q18 ) ), 196608 ); // Q18 } - ELSE IF( GSC_IVAS_mode != 2 && core_brate > GSC_H_RATE_STG ) + ELSE IF( NE_16( GSC_IVAS_mode, 2 ) && GT_32( core_brate, GSC_H_RATE_STG ) ) { // nb_bands_adj = 0.02f * SWB_bit_budget - 1.2f; nb_bands_adj = L_sub( Mpy_32_32( 42949673, L_shl( SWB_bit_budget, Q18 ) ), 314572 ); // Q18 } // nb_bands_max = (int16_t)(nb_bands_max * nb_bands_adj + 0.5f); - nb_bands_max = (Word16) L_shr_r( nb_bands_max * nb_bands_adj, Q18 ); // Q0 + nb_bands_max = extract_l( L_shr_r( nb_bands_max * nb_bands_adj, Q18 ) ); // Q0 nb_bands_max = check_bounds_s( nb_bands_max, 5, nb_tot_bands ); // bit_fracf *= SWB_bit_budget; - bit_fracf = bit_fracf * SWB_bit_budget; // Q18 + bit_fracf = imult3216( bit_fracf, extract_l( SWB_bit_budget ) ); // Q18 /* Estimation of the number of bit used in HF */ /* with only the first weigthing The number of bits in max_ener_band[st_band-1] = 17% of bit_fracf */ @@ -308,43 +321,52 @@ void bands_and_bit_alloc_ivas_fx( mb = Mpy_32_32( 365072220, bit_fracf ); // Q18 // mp = 2 * DSR_NB_PULSE); mp = 2359296; // Q18 - IF( core_brate < GSC_L_RATE_STG && GSC_IVAS_mode == 3 ) + move32(); + test(); + IF( LT_32( core_brate, GSC_L_RATE_STG ) && EQ_16( GSC_IVAS_mode, 3 ) ) { // mp = 1.5f * DSR_NB_PULSE; mp = 1769472; // Q18 + move32(); } - ELSE IF( core_brate < GSC_L_RATE_STG ) + ELSE IF( LT_32( core_brate, GSC_L_RATE_STG ) ) { // mp = DSR_NB_PULSE; mp = DSR_NB_PULSE_Q18; + move32(); } /* We want max_ener_band[st_band] <= max_ener_band[st_band-1] and max_ener_band[nb_bands_max-1] <= max_ener_band[st_band]*/ /* We will estimate the number of bits to allocate of HF and put the remaining bits, if any, back on LF */ /* compute the total possible number of band to be coded */ // nb_tot_bands = (int16_t)((SWB_bit_budget - bit_fracf) / (mp + (mb - mp) / 2.0f)); - nb_tot_bands = (Word16) ( L_sub( L_shl( SWB_bit_budget, Q18 ), bit_fracf ) / L_add( mp, L_shr( L_sub( mb, mp ), 1 ) ) ); - mp = min( mp, mb ); - IF( nb_tot_bands + st_band > nb_bands_max ) + nb_tot_bands = extract_l( L_sub( L_shl( SWB_bit_budget, Q18 ), bit_fracf ) / L_add( mp, L_shr( L_sub( mb, mp ), 1 ) ) ); + mp = L_min( mp, mb ); + IF( GT_16( add( nb_tot_bands, st_band ), nb_bands_max ) ) { - bit_adj = ( ( mb + mp ) / 2 ) * ( nb_tot_bands + st_band - nb_bands_max ); - bit_adj = max( 0, bit_adj ); - nb_tot_bands = nb_bands_max - st_band; + bit_adj = imult3216( L_shr( L_add( mb, mp ), 1 ), sub( add( nb_tot_bands, st_band ), nb_bands_max ) ); + bit_adj = L_max( 0, bit_adj ); + nb_tot_bands = sub( nb_bands_max, st_band ); // bit_fracf += bit_adj; bit_fracf = L_add( bit_fracf, bit_adj ); // Q18 } - nb_tot_bands += st_band; + nb_tot_bands = add( nb_tot_bands, st_band ); /* Allocate bits to LF */ // etmp = 0.23f; Word32 etmp_32fx = 493921239; // Q15 + move32(); FOR( j = 0; j < st_band; j++ ) { i = j; + move16(); max_ener_band[j] = i; + move16(); ener_vec[i] = MIN16B; + move16(); // bits_per_bands[j] = etmp * bit_fracf; bits_per_bands[j] = Mpy_32_32( bit_fracf, etmp_32fx ); // 33 - 15 = Q18 + move32(); // etmp -= 0.015f; etmp_32fx = L_sub( etmp_32fx, 32212255 ); } @@ -356,44 +378,51 @@ void bands_and_bit_alloc_ivas_fx( set_s( nb_pulse_per_band, 2, MBANDS_GN_BITALLOC16k ); FOR( i = st_band + 2; i < nb_tot_bands - 1; i++ ) { - IF( ener_vec[i] < ener_vec[i - 1] && ener_vec[i] < ener_vec[i + 1] ) + test(); + if ( LT_16( ener_vec[i], ener_vec[i - 1] ) && LT_16( ener_vec[i], ener_vec[i + 1] ) ) { nb_pulse_per_band[i] = 1; + move16(); } } FOR( j = st_band; j < nb_tot_bands; j++ ) { - IF( j > 6 ) + IF( GT_16( j, 6 ) ) { i = maximum_fx( ener_vec, nb_tot_bands, &etmp ); } ELSE { i = j; + move16(); } max_ener_band[j] = i; + move16(); ener_vec[i] = MIN16B; + move16(); } /* Recompute the final bit distribution for HF */ - IF( nb_tot_bands > st_band ) + IF( GT_16( nb_tot_bands, st_band ) ) { // bit_fracf = DSR_NB_PULSE; - mb = ( SWB_bit_budget * 2 / ( nb_tot_bands - st_band ) ) - mp; // Q18 - bit_fracf = ( mb - mp ) / ( nb_tot_bands - st_band ); + mb = ( L_shl( SWB_bit_budget, 1 ) / sub( nb_tot_bands, st_band ) ) - mp; // Q18 + bit_fracf = L_sub( mb, mp ) / sub( nb_tot_bands, st_band ); mb = L_sub( mb, bit_fracf ); /* Do the distribution */ FOR( j = st_band; j < nb_tot_bands; j++ ) { - IF( nb_pulse_per_band[max_ener_band[j]] > 1 ) + IF( GT_16( nb_pulse_per_band[max_ener_band[j]], 1 ) ) { bits_per_bands[max_ener_band[j]] = mb; + move32(); } ELSE { // bits_per_bands[max_ener_band[j]] = 4.5f; bits_per_bands[max_ener_band[j]] = 1179648; + move32(); } mb = L_sub( mb, bit_fracf ); SWB_bit_budget = L_sub( SWB_bit_budget, bits_per_bands[max_ener_band[j]] ); // Q18 @@ -403,52 +432,56 @@ void bands_and_bit_alloc_ivas_fx( /* Series of verification in case bit allocated != the budget */ IF( SWB_bit_budget > 0 ) { - i = st_band - 1; + i = sub( st_band, 1 ); WHILE( SWB_bit_budget > 0 ) { // bits_per_bands[i]++; bits_per_bands[i] = L_add( bits_per_bands[i], 262144 ); // SWB_bit_budget--; SWB_bit_budget = L_sub( SWB_bit_budget, 262144 ); - i--; - IF( i == -1 ) + i = sub( i, 1 ); + IF( EQ_16( i, -1 ) ) { - i = st_band - 1; + i = sub( st_band, 1 ); } } } nb_bands = nb_tot_bands; + move16(); sum_bit = 0; + move32(); j = 0; + move16(); FOR( i = 0; i < nb_bands; i++ ) { // if (bits_per_bands[i] > 112) - IF( bits_per_bands[i] > 29360128 ) + IF( GT_32( bits_per_bands[i], 29360128 ) ) { - sum_bit += bits_per_bands[i] - 112; + sum_bit = L_add( sum_bit, L_sub( bits_per_bands[i], 112 ) ); sum_bit = L_add( sum_bit, L_sub( bits_per_bands[i], 29360128 ) ); // bits_per_bands[i] = 112; bits_per_bands[i] = 29360128; - j = i + 1; + move32(); + j = add( i, 1 ); } /* safety check for overage bit reallocation */ // else if (bits_per_bands[i] + sum_bit / 3 > 112) - ELSE IF( bits_per_bands[i] + sum_bit / 3 > 29360128 ) + ELSE IF( GT_32( L_add( bits_per_bands[i], divide3216( L_shl( sum_bit, 1 ), 3 ) ), 29360128 ) ) { - j = i + 1; + j = add( i, 1 ); } } IF( sum_bit != 0 ) { // sum_bit /= (nb_bands - j); - sum_bit = sum_bit / ( nb_bands - j ); + sum_bit = divide3216( L_shl( sum_bit, 1 ), sub( nb_bands, j ) ); FOR( i = j; i < nb_bands; i++ ) { - bits_per_bands[i] += sum_bit; + bits_per_bands[i] = L_add( bits_per_bands[i], sum_bit ); } } #else @@ -461,7 +494,7 @@ void bands_and_bit_alloc_ivas_fx( IF( EQ_16( GSC_noisy_speech, 1 ) ) { SWB_bit_budget = *bit; - move16(); + move32(); nb_bands = 5; move16(); @@ -469,22 +502,30 @@ void bands_and_bit_alloc_ivas_fx( // fzero_val = 0.0f; fzero_val = 0; - IF( element_mode > EVS_MONO ) + move16(); + + if ( element_mode > EVS_MONO ) { fzero_val = MIN16B; + move16(); } - IF( coder_type == UNVOICED && element_mode > EVS_MONO ) + test(); + IF( EQ_16( coder_type, UNVOICED ) && element_mode > EVS_MONO ) { nb_bands = 3; - IF( SWB_bit_budget > 20 ) + move16(); + + if ( GT_32( SWB_bit_budget, 20 ) ) { nb_bands = 5; + move16(); } } - ELSE IF( bwidth < SWB ) + ELSE IF( LT_16( bwidth, SWB ) ) { nb_bands = 7; + move16(); } #endif @@ -513,11 +554,13 @@ void bands_and_bit_alloc_ivas_fx( move16(); } #if 1 // def ADD_LRTD - IF( bwidth < SWB ) + IF( LT_16( bwidth, SWB ) ) { - IF( coder_type == UNVOICED && element_mode > EVS_MONO ) + test(); + if ( EQ_16( coder_type, UNVOICED ) && element_mode > EVS_MONO ) { nb_tot_bands = 5; + move16(); } #endif FOR( ; j < nb_bands; j++ ) @@ -537,7 +580,9 @@ void bands_and_bit_alloc_ivas_fx( { i = maximum_fx( ener_vec, nb_tot_bands, &etmp ); max_ener_band[j] = i; + move16(); ener_vec[i] = fzero_val; + move16(); } } #endif @@ -545,24 +590,28 @@ void bands_and_bit_alloc_ivas_fx( } ELSE { - bit_index++; + bit_index = add( bit_index, 1 ); bit_tmp = sub( *bit, GSC_freq_bits[bit_index] ); - bit_index++; + bit_index = add( bit_index, 1 ); nb_bands_max = add( nb_bands_max, GSC_freq_bits[bit_index] ); - bit_index++; + bit_index = add( bit_index, 1 ); *pvq_len = 112; move16(); st_band = 7; move16(); #if 1 // def ADD_LRTD - IF( L_frame == L_FRAME16k && core_brate > ACELP_16k40 ) + test(); + IF( EQ_16( L_frame, L_FRAME16k ) && GT_32( core_brate, ACELP_16k40 ) ) { *pvq_len = 160; + move16(); st_band = 10; - nb_bands = *pvq_len / 16; - bit_tmp -= 35; + move16(); + nb_bands = shr( *pvq_len, 4 ); + bit_tmp = sub( bit_tmp, 35 ); bit_new_bands = 5; + move16(); } #endif IF( LE_32( core_brate, ACELP_9k60 ) ) @@ -587,7 +636,7 @@ void bands_and_bit_alloc_ivas_fx( nb_bands = shr( *pvq_len, 4 ); #if 1 // def ADD_LRTD - nb_bands_max = min( nb_bands_max, MBANDS_GN_BITALLOC16k ); + nb_bands_max = s_min( nb_bands_max, MBANDS_GN_BITALLOC16k ); #endif /*------------------------------------------------------------------------ * Ajustement of the maximum number of bands in function of the @@ -597,17 +646,17 @@ void bands_and_bit_alloc_ivas_fx( test(); test(); test(); - IF( EQ_16( coder_type, INACTIVE ) || GE_16( noise_lev, NOISE_LEVEL_SP3 ) ) + IF( coder_type == INACTIVE || GE_16( noise_lev, NOISE_LEVEL_SP3 ) ) { /* Probably classification error -> concentrate bits on LF */ #if 1 // def ADD_LRTD - IF( L_frame == L_FRAME16k && core_brate >= ACELP_24k40 ) + IF( EQ_16( L_frame, L_FRAME16k ) && GE_32( core_brate, ACELP_24k40 ) ) { - nb_bands_max = nb_tot_bands - 2; + nb_bands_max = sub( nb_tot_bands, 2 ); } - ELSE IF( core_brate >= ACELP_16k40 ) + ELSE IF( GE_32( core_brate, ACELP_16k40 ) ) { - nb_bands_max = nb_bands + 2; + nb_bands_max = add( nb_bands, 2 ); } ELSE #endif @@ -631,29 +680,30 @@ void bands_and_bit_alloc_ivas_fx( nb_bands_max = sub( nb_bands_max, 1 ); } #if 1 // def ADD_LRTD - IF( L_frame == L_FRAME16k ) + IF( EQ_16( L_frame, L_FRAME16k ) ) { - IF( core_brate < ACELP_24k40 ) + IF( LT_32( core_brate, ACELP_24k40 ) ) { - nb_bands_max -= 4; + nb_bands_max = sub( nb_bands_max, 4 ); } - ELSE IF( core_brate < ACELP_32k ) + ELSE IF( LT_32( core_brate, ACELP_32k ) ) { - IF( Diff_len > 0 || noise_lev >= NOISE_LEVEL_SP2 ) + test(); + IF( GT_16( Diff_len, 0 ) || GE_16( noise_lev, NOISE_LEVEL_SP2 ) ) { - nb_bands_max -= 2; - bit_new_bands *= 2; + nb_bands_max = sub( nb_bands_max, 2 ); + bit_new_bands = shl( bit_new_bands, 1 ); } } ELSE IF( core_brate >= ACELP_32k ) { - bit_new_bands *= 2; + bit_new_bands = shl( bit_new_bands, 1 ); } } #endif test(); - IF( EQ_16( bwidth, NB ) && GT_16( nb_bands_max, 10 ) ) + if ( ( bwidth == NB ) && GT_16( nb_bands_max, 10 ) ) { nb_bands_max = 10; move16(); @@ -662,8 +712,10 @@ void bands_and_bit_alloc_ivas_fx( /*------------------------------------------------------------------------ * Find extra number of band to code according to bit rate availables *-----------------------------------------------------------------------*/ + test(); WHILE( GE_16( bit_tmp, bit_new_bands ) && LE_16( nb_bands, sub( nb_bands_max, 1 ) ) ) { + test(); bit_tmp = sub( bit_tmp, bit_new_bands ); nb_bands = add( nb_bands, 1 ); } @@ -672,9 +724,11 @@ void bands_and_bit_alloc_ivas_fx( * Fractional bits to distribute on the first x bands *-----------------------------------------------------------------------*/ #if 1 // def ADD_LRTD - IF( L_frame == L_FRAME16k && core_brate > ACELP_32k ) + test(); + IF( EQ_16( L_frame, L_FRAME16k ) && GT_32( core_brate, ACELP_32k ) ) { bit_fracf = 0; + move32(); } ELSE #endif @@ -703,20 +757,22 @@ void bands_and_bit_alloc_ivas_fx( bit_index = add( bit_index, 1 ); } #if 1 // def ADD_LRTD - IF( L_frame == L_FRAME16k && core_brate > ACELP_16k40 ) + IF( EQ_16( L_frame, L_FRAME16k ) && GT_32( core_brate, ACELP_16k40 ) ) { bit_index = 0; - i = imax - 1; + move16(); + i = sub( imax, 1 ); // bits_per_bands[i] += Compl_GSC_freq_bits[bit_index]; bits_per_bands[i] = L_add( bits_per_bands[i], L_shl( Compl_GSC_freq_bits[bit_index], Q18 ) ); - i++; - bit_index++; + i = add( i, 1 ); + bit_index = add( bit_index, 1 ); FOR( ; i < 10; i++ ) { // bits_per_bands[i] += Compl_GSC_freq_bits[bit_index] + bit_fracf; bits_per_bands[i] = L_add( bits_per_bands[i], L_add( L_shl( Compl_GSC_freq_bits[bit_index], Q18 ), bit_fracf ) ); - bit_index++; + move32(); + bit_index = add( bit_index, 1 ); } } #endif @@ -735,7 +791,8 @@ void bands_and_bit_alloc_ivas_fx( { FOR( j = 0; j < nb_tot_bands; j++ ) { - bits_per_bands[j] = max( bits_per_bands[j], 0 ); + bits_per_bands[j] = L_max( bits_per_bands[j], 0 ); + move32(); } } @@ -744,26 +801,28 @@ void bands_and_bit_alloc_ivas_fx( * Complete the bit allocation per frequency band for 16kHz high brate mode *--------------------------------------------------------------------------*/ #if 1 // def ADD_LRTD - IF( L_frame == L_FRAME16k && core_brate > ACELP_32k ) + IF( EQ_16( L_frame, L_FRAME16k ) && GT_32( core_brate, ACELP_32k ) ) { FOR( j = st_band; j < nb_bands; j++ ) { // bits_per_bands[j] = bit_new_bands; bits_per_bands[j] = L_shl( bit_new_bands, Q18 ); + move32(); } // bit_fracf = (1.0f / nb_bands) * (bit_tmp); - bit_fracf = ( 262144 / nb_bands ) * ( bit_tmp ); + bit_fracf = L_shl( L_mult0( idiv1616( 16384, nb_bands ), bit_tmp ), 4 ); // etmp = 2.0f * bit_fracf / (nb_bands + 1); - etmp = (Word16) L_shr( L_shl( bit_fracf, Q1 ) / ( nb_bands + 1 ), Q3 ); // Q15 + etmp = divide3216( L_shr( bit_fracf, Q2 ), add( nb_bands, 1 ) ); // Q15 // bit_fracf = etmp; bit_fracf = L_shl( etmp, Q3 ); // Q18 - FOR( j = nb_bands - 1; j >= 0; j-- ) + FOR( j = sub( nb_bands, 1 ); j >= 0; j-- ) { // bits_per_bands[j] = etmp; // etmp += bit_fracf; bits_per_bands[j] = L_add( bits_per_bands[j], L_shl( etmp, Q3 ) ); // Q18 + move32(); etmp = (Word16) L_add( etmp, L_shr( bit_fracf, Q3 ) ); } } @@ -927,15 +986,17 @@ void bands_and_bit_alloc_ivas_fx( { // bits_per_bands[i] = (float)floor(bits_per_bands[i]); bits_per_bands[i] = L_shl( L_shr( bits_per_bands[i], Q18 ), Q18 ); + move32(); sum_bit = L_add( sum_bit, L_shr( bits_per_bands[i], Q18 ) ); } - IF( GSC_IVAS_mode != 0 && sum_bit < *bit ) /* If we need to add bits, we are doing it on the LF */ + test(); + IF( GSC_IVAS_mode != 0 && LT_32( sum_bit, *bit ) ) /* If we need to add bits, we are doing it on the LF */ { - reajust_bits_fx( bits_per_bands, 0, nb_bands, (int16_t) sum_bit, *bit ); + reajust_bits_fx( bits_per_bands, 0, nb_bands, (Word16) sum_bit, *bit ); } ELSE { - reajust_bits_fx( bits_per_bands, nb_bands - 1, 0, (int16_t) sum_bit, *bit ); + reajust_bits_fx( bits_per_bands, sub( nb_bands, 1 ), 0, (Word16) sum_bit, *bit ); } w_sum_bit = 0; move16(); @@ -986,7 +1047,9 @@ void bands_and_bit_alloc_ivas_fx( { set_s( out_bits_per_bands, 0, nb_tot_bands ); *nb_subbands = 0; + move16(); *pvq_len = 0; + move16(); } #endif return; @@ -1019,19 +1082,19 @@ static void reajust_bits_fx( IF( LT_16( bit_bdgt_in, sum_bit_in ) ) { amount_to_add = -1; - bit_bdgt = sum_bit_in; - sum_bit = bit_bdgt_in; move16(); + bit_bdgt = sum_bit_in; move16(); + sum_bit = bit_bdgt_in; move16(); } ELSE { bit_bdgt = bit_bdgt_in; - sum_bit = sum_bit_in; - amount_to_add = 1; move16(); + sum_bit = sum_bit_in; move16(); + amount_to_add = 1; move16(); } @@ -1051,7 +1114,7 @@ static void reajust_bits_fx( } i = add( i, incr ); - IF( EQ_16( i, end_band ) ) + if ( EQ_16( i, end_band ) ) { i = st_band; move16(); diff --git a/lib_com/gs_gains_fx.c b/lib_com/gs_gains_fx.c index e81af00c8..048099e25 100644 --- a/lib_com/gs_gains_fx.c +++ b/lib_com/gs_gains_fx.c @@ -111,6 +111,7 @@ void Comp_and_apply_gain_fx( L32 = L_shl( L_mult( exc_diffQ[i], y_gain ), tmp_exp ); /*Q_exc+16 */ exc_diffQ[i] = round_fx( L32 ); /*Q_exc */ #endif /* BASOP_NOGLOB */ + move16(); } } ELSE @@ -223,7 +224,7 @@ void Ener_per_band_comp_fx( IF( EQ_16( Eflag, 1 ) ) { - y_gain4_fx[j + 1] = Comp_band_log_ener( pt_fx, 32, Q_exc, -1 ); + y_gain4_fx[add( j, 1 )] = Comp_band_log_ener( pt_fx, 32, Q_exc, -1 ); move16(); pt_fx += 32; } @@ -237,7 +238,7 @@ void Ener_per_band_comp_ivas_fx( const Word16 Q_exc, /* i : frame length */ const Word16 Mband, /* i : Max band */ const Word16 Eflag, /* i : flag of highest band */ - const int16_t L_frame /* i : frame length */ + const Word16 L_frame /* i : frame length */ ) { const Word16 *pt_fx; @@ -260,12 +261,12 @@ void Ener_per_band_comp_ivas_fx( IF( EQ_16( Eflag, 1 ) ) { - y_gain4_fx[j + 1] = Comp_band_log_ener( pt_fx, 32, Q_exc, -1 ); + y_gain4_fx[add( j, 1 )] = Comp_band_log_ener( pt_fx, 32, Q_exc, -1 ); move16(); pt_fx += 32; } - if ( L_frame == L_FRAME16k ) + IF( EQ_16( L_frame, L_FRAME16k ) ) { y_gain4_fx[j + 2] = Comp_band_log_ener( pt_fx, 32, Q_exc, -1 ); move16(); @@ -296,7 +297,7 @@ static void GSC_gain_adj( /* Gain adjustment to fit ACELP generic inactive coding gain at low rate */ Word16 Gain_off, i; - IF( NE_16( coder_type, INACTIVE ) ) + IF( coder_type != INACTIVE ) { FOR( i = 0; i < MBANDS_GN; i++ ) { @@ -369,6 +370,7 @@ static void GSC_gain_adj_ivas_fx( /* Gain adjustment to fit ACELP generic inactive coding gain at low rate */ Word16 Gain_off, i; + test(); IF( NE_16( coder_type, INACTIVE ) && NE_16( coder_type, UNVOICED ) ) { FOR( i = 0; i < Mbands_gn; i++ ) @@ -383,6 +385,8 @@ static void GSC_gain_adj_ivas_fx( { Gain_off = 0; move16(); + + test(); IF( LE_32( core_brate, ACELP_5k00 ) && EQ_16( coder_type, UNVOICED ) ) { Gain_off = 18432; @@ -467,27 +471,27 @@ Word16 gsc_gaindec_fx( /* o : average frequency gai test(); IF( ( EQ_16( coder_type, AUDIO ) || EQ_16( coder_type, INACTIVE ) ) && EQ_16( bwidth_fx, NB ) ) { - idx_g_fx = (Word16) get_next_indice( st_fx, 6 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 6 ); VDQ_vec_fx( &mean_4g_fx, Gain_meanNB_fx, Gain_mean_dicNB_fx, idx_g_fx, 1 ); - idx_g_fx = (Word16) get_next_indice( st_fx, 6 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 6 ); move16(); VDQ_vec_fx( y_gainQ_fx, Mean_dic_NB_fx, Gain_dic1_NB_fx, idx_g_fx, 3 ); IF( LT_32( core_brate_fx, ACELP_9k60 ) ) { - idx_g_fx = (Word16) get_next_indice( st_fx, 5 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 5 ); VDQ_vec_fx( y_gainQ_fx + 3, Mean_dic_NB_fx + 3, Gain_dic2_NB_fx, idx_g_fx, 3 ); - idx_g_fx = (Word16) get_next_indice( st_fx, 4 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 4 ); VDQ_vec_fx( y_gainQ_fx + 6, Mean_dic_NB_fx + 6, Gain_dic3_NB_fx, idx_g_fx, 4 ); } ELSE { - idx_g_fx = (Word16) get_next_indice( st_fx, 6 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 6 ); VDQ_vec_fx( y_gainQ_fx + 3, Mean_dic_NB_fx + 3, Gain_dic2_NBHR_fx, idx_g_fx, 3 ); - idx_g_fx = (Word16) get_next_indice( st_fx, 7 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 7 ); VDQ_vec_fx( y_gainQ_fx + 6, Mean_dic_NB_fx + 6, Gain_dic3_NBHR_fx, idx_g_fx, 4 ); } test(); @@ -515,7 +519,7 @@ Word16 gsc_gaindec_fx( /* o : average frequency gai } ELSE { - idx_g_fx = (Word16) get_next_indice( st_fx, 6 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 6 ); VDQ_vec_fx( &mean_4g_fx, mean_m_fx, mean_gain_dic_fx, idx_g_fx, 1 ); @@ -524,10 +528,10 @@ Word16 gsc_gaindec_fx( /* o : average frequency gai /*--------------------------------------------------------------------------------------* * UQ of the first 8 bands and half of the last 8 bands *--------------------------------------------------------------------------------------*/ - idx_g_fx = (Word16) get_next_indice( st_fx, 5 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 5 ); VDQ_vec_fx( y_gainQ_fx, YGain_mean_LR_fx, YGain_dic1_LR_fx, idx_g_fx, 3 ); - idx_g_fx = (Word16) get_next_indice( st_fx, 5 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 5 ); VDQ_vec_fx( y_gainQ_fx + 3, YGain_mean_LR_fx + 3, YGain_dic2_LR_fx, idx_g_fx, 4 ); /*----------------------------------------------------------------------* @@ -535,7 +539,7 @@ Word16 gsc_gaindec_fx( /* o : average frequency gai * And scaling *----------------------------------------------------------------------*/ - idx_g_fx = (Word16) get_next_indice( st_fx, 5 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 5 ); VDQ_vec_fx( y_gainQ_fx + 7, YGain_mean_LR_fx + 7, YGain_dic3_LR_fx, idx_g_fx, 5 ); @@ -568,16 +572,16 @@ Word16 gsc_gaindec_fx( /* o : average frequency gai } ELSE { - idx_g_fx = (Word16) get_next_indice( st_fx, 6 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 6 ); VDQ_vec_fx( y_gainQ_fx, YG_mean16_fx, YG_dicMR_1_fx, idx_g_fx, 4 ); - idx_g_fx = (Word16) get_next_indice( st_fx, 5 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 5 ); VDQ_vec_fx( y_gainQ_fx + 4, YG_mean16_fx + 4, YG_dicMR_2_fx, idx_g_fx, 4 ); - idx_g_fx = (Word16) get_next_indice( st_fx, 5 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 5 ); VDQ_vec_fx( y_gainQ_fx + 8, YG_mean16_fx + 8, YG_dicMR_3_fx, idx_g_fx, 4 ); - idx_g_fx = (Word16) get_next_indice( st_fx, 4 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 4 ); VDQ_vec_fx( y_gainQ_fx + 12, YG_mean16_fx + 12, YG_dicMR_4_fx, idx_g_fx, 4 ); } } @@ -631,27 +635,26 @@ Word16 gsc_gaindec_ivas_fx( /* o : average frequenc test(); IF( ( EQ_16( coder_type, AUDIO ) || EQ_16( coder_type, INACTIVE ) ) && EQ_16( bwidth_fx, NB ) ) { - idx_g_fx = (Word16) get_next_indice( st_fx, 6 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 6 ); VDQ_vec_fx( &mean_4g_fx, Gain_meanNB_fx, Gain_mean_dicNB_fx, idx_g_fx, 1 ); - idx_g_fx = (Word16) get_next_indice( st_fx, 6 ); - move16(); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 6 ); VDQ_vec_fx( y_gainQ_fx, Mean_dic_NB_fx, Gain_dic1_NB_fx, idx_g_fx, 3 ); IF( LT_32( core_brate_fx, ACELP_9k60 ) ) { - idx_g_fx = (Word16) get_next_indice( st_fx, 5 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 5 ); VDQ_vec_fx( y_gainQ_fx + 3, Mean_dic_NB_fx + 3, Gain_dic2_NB_fx, idx_g_fx, 3 ); - idx_g_fx = (Word16) get_next_indice( st_fx, 4 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 4 ); VDQ_vec_fx( y_gainQ_fx + 6, Mean_dic_NB_fx + 6, Gain_dic3_NB_fx, idx_g_fx, 4 ); } ELSE { - idx_g_fx = (Word16) get_next_indice( st_fx, 6 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 6 ); VDQ_vec_fx( y_gainQ_fx + 3, Mean_dic_NB_fx + 3, Gain_dic2_NBHR_fx, idx_g_fx, 3 ); - idx_g_fx = (Word16) get_next_indice( st_fx, 7 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 7 ); VDQ_vec_fx( y_gainQ_fx + 6, Mean_dic_NB_fx + 6, Gain_dic3_NBHR_fx, idx_g_fx, 4 ); } test(); @@ -679,7 +682,7 @@ Word16 gsc_gaindec_ivas_fx( /* o : average frequenc } ELSE { - idx_g_fx = (Word16) get_next_indice( st_fx, 6 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 6 ); VDQ_vec_fx( &mean_4g_fx, mean_m_fx, mean_gain_dic_fx, idx_g_fx, 1 ); @@ -688,10 +691,10 @@ Word16 gsc_gaindec_ivas_fx( /* o : average frequenc /*--------------------------------------------------------------------------------------* * UQ of the first 8 bands and half of the last 8 bands *--------------------------------------------------------------------------------------*/ - idx_g_fx = (Word16) get_next_indice( st_fx, 5 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 5 ); VDQ_vec_fx( y_gainQ_fx, YGain_mean_LR_fx, YGain_dic1_LR_fx, idx_g_fx, 3 ); - idx_g_fx = (Word16) get_next_indice( st_fx, 5 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 5 ); VDQ_vec_fx( y_gainQ_fx + 3, YGain_mean_LR_fx + 3, YGain_dic2_LR_fx, idx_g_fx, 4 ); /*----------------------------------------------------------------------* @@ -699,7 +702,7 @@ Word16 gsc_gaindec_ivas_fx( /* o : average frequenc * And scaling *----------------------------------------------------------------------*/ - idx_g_fx = (Word16) get_next_indice( st_fx, 5 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 5 ); VDQ_vec_fx( y_gainQ_fx + 7, YGain_mean_LR_fx + 7, YGain_dic3_LR_fx, idx_g_fx, 5 ); @@ -717,6 +720,7 @@ Word16 gsc_gaindec_ivas_fx( /* o : average frequenc { /*y_gainQ_fx[i] *= 1.41f;*/ y_gainQ_fx[i] = round_fx( L_shl( L_mult( y_gainQ_fx[i], 23101 ), 1 ) ); /*Q12 */ + move16(); } /*----------------------------------------------------------------------* * Copy the true Q values in the specific bands @@ -732,35 +736,35 @@ Word16 gsc_gaindec_ivas_fx( /* o : average frequenc } ELSE { - if ( st_fx->L_frame == L_FRAME ) + IF( EQ_16( st_fx->L_frame, L_FRAME ) ) { - idx_g_fx = (Word16) get_next_indice( st_fx, 6 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 6 ); VDQ_vec_fx( y_gainQ_fx, YG_mean16_fx, YG_dicMR_1_fx, idx_g_fx, 4 ); - idx_g_fx = (Word16) get_next_indice( st_fx, 5 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 5 ); VDQ_vec_fx( y_gainQ_fx + 4, YG_mean16_fx + 4, YG_dicMR_2_fx, idx_g_fx, 4 ); - idx_g_fx = (Word16) get_next_indice( st_fx, 5 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 5 ); VDQ_vec_fx( y_gainQ_fx + 8, YG_mean16_fx + 8, YG_dicMR_3_fx, idx_g_fx, 4 ); - idx_g_fx = (Word16) get_next_indice( st_fx, 4 ); + idx_g_fx = (Word16) get_next_indice_fx( st_fx, 4 ); VDQ_vec_fx( y_gainQ_fx + 12, YG_mean16_fx + 12, YG_dicMR_4_fx, idx_g_fx, 4 ); } - else + ELSE { - idx_g_fx = get_next_indice( st_fx, 7 ); + idx_g_fx = get_next_indice_fx( st_fx, 7 ); VDQ_vec_fx( y_gainQ_fx, YG_mean16HR_fx, YG_dicHR_1_fx, idx_g_fx, 4 ); - idx_g_fx = get_next_indice( st_fx, 6 ); + idx_g_fx = get_next_indice_fx( st_fx, 6 ); VDQ_vec_fx( y_gainQ_fx + 4, YG_mean16HR_fx + 4, YG_dicHR_2_fx, idx_g_fx, 4 ); - idx_g_fx = get_next_indice( st_fx, 6 ); + idx_g_fx = get_next_indice_fx( st_fx, 6 ); VDQ_vec_fx( y_gainQ_fx + 8, YG_mean16HR_fx + 8, YG_dicHR_3_fx, idx_g_fx, 4 ); - idx_g_fx = get_next_indice( st_fx, 6 ); + idx_g_fx = get_next_indice_fx( st_fx, 6 ); VDQ_vec_fx( y_gainQ_fx + 12, YG_mean16HR_16kHz_fx, YG_dicHR_4_16kHz_fx, idx_g_fx, 4 ); - idx_g_fx = get_next_indice( st_fx, 3 ); + idx_g_fx = get_next_indice_fx( st_fx, 3 ); VDQ_vec_fx( y_gainQ_fx + 16, YG_meanL2G_16kHz_fx, YG_dicL2G_16kHz_fx, idx_g_fx, 2 ); } } @@ -799,7 +803,7 @@ Word16 gsc_gainQ_fx( /*Q12*/ test(); test(); - IF( ( EQ_16( coder_type, AUDIO ) || EQ_16( coder_type, INACTIVE ) ) && EQ_16( bwidth, NB ) ) + IF( ( EQ_16( coder_type, AUDIO ) || ( coder_type == INACTIVE ) ) && ( bwidth == NB ) ) { /*ftmp1 = mean(y_gain4, 10)-0.6f;*/ @@ -827,6 +831,7 @@ Word16 gsc_gainQ_fx( /*Q12*/ /* Quantized mean gain without clipping */ mean_4g[0] = round_fx( L_tmp ); + move16(); idx_g = vquant_fx( mean_4g, Gain_meanNB_fx, mean_4g, Gain_mean_dicNB_fx, 1, 64 ); push_indice_fx( hBstr, IND_MEAN_GAIN2, idx_g, 6 ); @@ -863,12 +868,18 @@ Word16 gsc_gainQ_fx( /*Q12*/ /* Some energy is needed in high band for stat_noise_uv_enc to be functional in inactive speech */ y_gain_tmp[10] = round_fx( L_mac( L_mac( L_mult( y_gain_tmp[6], 8192 ), y_gain_tmp[7], 8192 ), y_gain_tmp[8], 8192 ) ); + move16(); y_gain_tmp[11] = round_fx( L_mac( L_mac( L_mult( y_gain_tmp[7], 8192 ), y_gain_tmp[8], 8192 ), y_gain_tmp[9], 8192 ) ); + move16(); y_gain_tmp[12] = round_fx( L_mac( L_mac( L_mult( y_gain_tmp[8], 8192 ), y_gain_tmp[9], 8192 ), y_gain_tmp[10], 8192 ) ); + move16(); y_gain_tmp[13] = round_fx( L_mac( L_mac( L_mult( y_gain_tmp[9], 8192 ), y_gain_tmp[10], 8192 ), y_gain_tmp[11], 8192 ) ); + move16(); y_gain_tmp[14] = round_fx( L_mac( L_mac( L_mult( y_gain_tmp[10], 8192 ), y_gain_tmp[11], 8192 ), y_gain_tmp[12], 8192 ) ); + move16(); y_gain_tmp[15] = round_fx( L_mac( L_mac( L_mult( y_gain_tmp[11], 8192 ), y_gain_tmp[12], 8192 ), y_gain_tmp[13], 8192 ) ); + move16(); } ELSE { @@ -880,6 +891,7 @@ Word16 gsc_gainQ_fx( /*Q12*/ /*ftmp1 = mean(y_gain4, 16);*/ L_tmp = 0; + move32(); FOR( cnt = 0; cnt < 16; cnt++ ) { L_tmp = L_mac( L_tmp, y_gain4[cnt], 2048 ); @@ -889,6 +901,7 @@ Word16 gsc_gainQ_fx( /*Q12*/ tmp1 = sub( tmp16, 4915 ); tmp2 = add( tmp16, 4915 ); L_tmp = 0; + move32(); FOR( i = 0; i < 16; i++ ) { y_gain_tmp2[i] = y_gain4[i]; @@ -904,8 +917,10 @@ Word16 gsc_gainQ_fx( /*Q12*/ FOR( ; i < Mbands_gn; i++ ) { y_gain_tmp2[i] = y_gain4[i]; + move16(); /*if(y_gain4[i] < ftmp1-0.6f)*/ y_gain_tmp2[i] = s_max( y_gain_tmp2[i], tmp1 ); /* Just the last move is needed, because s_max and s_min could be done in 1 line*/ + move16(); /*else if(y_gain4[i] > ftmp1+0.6f)*/ y_gain_tmp2[i] = s_min( y_gain_tmp2[i], tmp2 ); move16(); @@ -913,6 +928,7 @@ Word16 gsc_gainQ_fx( /*Q12*/ /* Quantized mean gain without clipping */ mean_4g[0] = round_fx( L_tmp ); + move16(); /*idx_g = (short)vquant(mean_4g, mean_m, mean_4g, mean_gain_dic, 1, 64);*/ @@ -940,6 +956,7 @@ Word16 gsc_gainQ_fx( /*Q12*/ move16(); idx_g = 0; + move16(); /*idx_g = (short)vquant(y_gain_tmp2, YGain_mean_LR, y_gain_tmp2, YGain_dic1_LR, 3, 32);*/ idx_g = vquant_fx( y_gain_tmp2, YGain_mean_LR_fx, y_gain_tmp2, YGain_dic1_LR_fx, 3, 32 ); @@ -962,6 +979,7 @@ Word16 gsc_gainQ_fx( /*Q12*/ Copy( y_gain_tmp2 + 8, y_gain_tmp + 8, 3 ); y_gain_tmp[15] = y_gain_tmp2[11]; + move16(); ifft_rel_fx( y_gain_tmp + 8, 8, 3 ); FOR( i = 8; i < 16; i++ ) diff --git a/lib_com/gs_inact_switching_fx.c b/lib_com/gs_inact_switching_fx.c index 16fbddb41..446f9849b 100644 --- a/lib_com/gs_inact_switching_fx.c +++ b/lib_com/gs_inact_switching_fx.c @@ -137,6 +137,7 @@ void Inac_swtch_ematch_fx( L_tmp = L_shl( L_tmp, add( exp, 15 ) ); /* Q(Q_exc+1) -> Q(16+Q_exc)*/ *pt_exc = round_fx( L_tmp ); #endif + move16(); pt_exc++; } } @@ -152,6 +153,7 @@ void Inac_swtch_ematch_fx( L_tmp = L_shl( L_tmp, add( exp, 15 ) ); /* Q(Q_exc+1) -> Q(16+Q_exc)*/ *pt_exc = round_fx( L_tmp ); /*Q_exc*/ #endif + move16(); pt_exc++; } } @@ -243,6 +245,7 @@ void Inac_switch_ematch_ivas_fx( L_tmp = L_mult( ALPHA0_FX, lt_ener_per_band[i] ); /*Q(15+12+1)=Q(28) */ L_tmp = L_mac( L_tmp, BETA0_FX, Ener_per_bd[i] ); /*Q28 */ lt_ener_per_band[i] = round_fx( L_tmp ); /*Q12 */ + move16(); ftmp = sub( lt_ener_per_band[i], Ener_per_bd[i] ); /*Q12 */ @@ -263,6 +266,7 @@ void Inac_switch_ematch_ivas_fx( L_tmp = L_mult0( *pt_exc, ftmp ); L_tmp = L_shl_sat( L_tmp, add( exp, 15 ) ); /* Q(Q_exc) -> Q(15+Q_exc)*/ *pt_exc = round_fx_sat( L_tmp ); /*Q_exc - 1*/ + move16(); pt_exc++; } } @@ -273,6 +277,7 @@ void Inac_switch_ematch_ivas_fx( L_tmp = L_mult0( *pt_exc, ftmp ); L_tmp = L_shl_sat( L_tmp, add( exp, 15 ) ); /* Q(Q_exc) -> Q(15+Q_exc)*/ *pt_exc = round_fx_sat( L_tmp ); /*Q_exc - 1*/ + move16(); pt_exc++; } } diff --git a/lib_com/gs_noisefill_fx.c b/lib_com/gs_noisefill_fx.c index 1809a1f6f..3e5bfab96 100644 --- a/lib_com/gs_noisefill_fx.c +++ b/lib_com/gs_noisefill_fx.c @@ -96,17 +96,17 @@ static void EstimateNoiseLevel_inner_fx( noise_offset = 8192; move16(); /*0.25f * 32768 */ - IF( bitrate > ACELP_24k40 ) + IF( GT_32( bitrate, ACELP_24k40 ) ) { noise_offset = 6554; move16(); /*.2f * 32768 */ } - ELSE IF( bitrate >= ACELP_22k60 ) + ELSE IF( GE_32( bitrate, ACELP_22k60 ) ) { noise_offset = 9830; move16(); /*.3f * 32768 */ } - ELSE IF( bitrate >= ACELP_9k60 ) + ELSE IF( GE_32( bitrate, ACELP_9k60 ) ) { noise_offset = 11469; move16(); /*0.35f * 32768 */ @@ -194,6 +194,7 @@ static void EstimateNoiseLevel_fx( } } test(); + test(); IF( ( EQ_16( coder_type, INACTIVE ) || GE_16( noise_lev, NOISE_LEVEL_SP3 ) ) && EQ_16( L_frame, L_FRAME ) ) { FOR( i_band = 9; i_band < Mbands_gn; i_band++ ) @@ -280,8 +281,7 @@ static void Apply_NoiseFill_fx( FOR( i_band = 0; i_band < Mbands_gn; i_band++ ) { - StartBin += NB_Qbins; - move16(); + StartBin = add( StartBin, NB_Qbins ); NB_Qbins = freq_nsbin_per_band[i_band]; move16(); @@ -333,7 +333,7 @@ void freq_dnw_scaling_fx( start_sc = L_frame; move16(); test(); - IF( LE_32( core_brate, ACELP_8k00 ) && EQ_16( coder_type, INACTIVE ) ) + IF( LE_32( core_brate, ACELP_8k00 ) && ( coder_type == INACTIVE ) ) { sc_dyn = mult_r( sc_dyn, 4915 ); /*Q15 (0.15 in Q15) */ start_sc = 64; @@ -357,6 +357,7 @@ void freq_dnw_scaling_fx( } } + test(); IF( EQ_16( L_frame, L_FRAME16k ) && LE_32( core_brate, ACELP_24k40 ) ) { /*sc_dyn += 0.125f;*/ @@ -411,12 +412,15 @@ static void Decreas_freqPeak_fx( Word16 tmp2; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move16(); #endif move16(); /*ptr init*/ lsf_new_diff[0] = 0; /* prevent unitialized value */ + move16(); FOR( j = 1; j < ( M - 1 ); j++ ) { lsf_new_diff[j] = sub( lsf_new[j], lsf_new[j - 1] ); /*Qx2.56 */ + move16(); } avrg = 0; @@ -479,6 +483,7 @@ static void Decreas_freqPeak_fx( tmp1 = negate( tmp ); tmp2 = *src; + move16(); *( src ) = tmp1; move16(); if ( tmp2 > 0 ) @@ -528,6 +533,7 @@ static void envelop_modify_fx( Word16 Q_tmp; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move16(); #endif start_band = i_mult( last_bin, 16 ); @@ -550,7 +556,8 @@ static void envelop_modify_fx( move16(); Ener_fx = Isqrt_lc( Ener_fx, &exp1 ); /*Q(31-exp1) */ - weight_fx = 16384; /*Q15 */ + weight_fx = 16384; /*Q15 */ + move16(); src_fx = &exc_diffQ_fx[start_band]; /*Q_exc */ FOR( i = last_bin; i < last_bin + 4; i++ ) { @@ -578,7 +585,7 @@ static void envelop_modify_fx( L_tmp = L_mult0( sub( 32767, weight_fx ), Random( seed_tcx ) ); /*Q30 */ tmp1 = round_fx( L_shr( L_tmp, 2 ) ); - L_exc_diffQ_fx[16 * i + j] = L_mult0( Ener1_fx, add( tmp, tmp1 ) ); /*Q12 */ + L_exc_diffQ_fx[add( i_mult( 16, i ), j )] = L_mult0( Ener1_fx, add( tmp, tmp1 ) ); /*Q12 */ move32(); src_fx++; } @@ -609,7 +616,7 @@ static void envelop_modify_fx( L_tmp = L_mult0( sub( 32767, weight_fx ), Random( seed_tcx ) ); /*Q30 */ tmp1 = round_fx( L_shr( L_tmp, 2 ) ); /*Q12 */ - L_exc_diffQ_fx[16 * i + j] = L_mult0( Ener1_fx, add( tmp, tmp1 ) ); /*Q12 */ + L_exc_diffQ_fx[add( i_mult( 16, i ), j )] = L_mult0( Ener1_fx, add( tmp, tmp1 ) ); /*Q12 */ move32(); src_fx++; } @@ -618,7 +625,7 @@ static void envelop_modify_fx( move16(); FOR( i = start_band; i < L_FRAME; i++ ) { - IF( GT_32( L_abs( L_exc_diffQ_fx[i] ), exc_diffQ_max ) ) + if ( GT_32( L_abs( L_exc_diffQ_fx[i] ), exc_diffQ_max ) ) { exc_diffQ_max = L_abs( L_exc_diffQ_fx[i] ); } @@ -632,6 +639,7 @@ static void envelop_modify_fx( FOR( i = start_band; i < L_FRAME; i++ ) { exc_diffQ_fx[i] = extract_l( L_exc_diffQ_fx[i] ); + move16(); } } ELSE @@ -641,6 +649,7 @@ static void envelop_modify_fx( FOR( i = start_band; i < L_FRAME; i++ ) { exc_diffQ_fx[i] = extract_l( L_shr( L_exc_diffQ_fx[i], Q_tmp ) ); + move16(); } } @@ -783,11 +792,13 @@ void highband_exc_dct_in_fx( } test(); - + test(); + test(); IF( GSC_IVAS_mode == 0 && GSC_noisy_speech && !bfi && LE_16( element_mode, IVAS_SCE ) ) { set16_fx( noisepb, 3277, MBANDS_GN ); } + test(); IF( LT_32( core_brate, 6000 ) && LE_16( coder_type, UNVOICED ) ) { FOR( i = 0; i < L_frame; i++ ) @@ -829,6 +840,7 @@ void highband_exc_dct_in_fx( tmp = msu_r( -7680 * 65536, -17564, shl( i, 6 ) ); /*-15 in Q9; -0.067 in Q18 and i in Q6= Q9 */ L_tmp = L_mult( exc_diffQ[i], tmp ); /*Q(Qexc_diffQ+10) */ exc_diffQ[i] = round_fx( L_shl( L_tmp, 16 - 10 ) ); /*Qexc_diffQ */ + move16(); } } } @@ -1211,6 +1223,10 @@ void highband_exc_dct_in_ivas_fx( move16(); } + test(); + test(); + test(); + test(); IF( bfi || LT_32( core_brate, 6000 ) || ( LT_32( core_brate, 8600 ) && EQ_16( coder_type, UNVOICED ) ) ) { set16_fx( noisepb, 13107, MBANDS_GN ); /*0.4 in Q15 */ @@ -1229,12 +1245,15 @@ void highband_exc_dct_in_ivas_fx( Copy( exc_diffQ, exc_wo_nf, L_frame ); } + test(); test(); IF( GSC_IVAS_mode == 0 && GSC_noisy_speech && !bfi && LE_16( element_mode, IVAS_SCE ) ) { set16_fx( noisepb, 3277, MBANDS_GN ); } + + test(); IF( LT_32( core_brate, 6000 ) && LE_16( coder_type, UNVOICED ) ) { FOR( i = 0; i < L_frame; i++ ) @@ -1268,6 +1287,7 @@ void highband_exc_dct_in_ivas_fx( { Ener_per_band_comp_ivas_fx( exc_diffQ, Ener_per_bd_yQ, Qexc_diffQ, MBANDS_GN, 1, L_frame ); + test(); IF( LT_16( nb_subfr, 4 ) && LT_16( L_frame, L_FRAME16k ) ) { FOR( i = L_FRAME - 16; i < L_FRAME; i++ ) @@ -1276,6 +1296,7 @@ void highband_exc_dct_in_ivas_fx( tmp = msu_r( -7680 * 65536, -17564, shl( i, 6 ) ); /*-15 in Q9; -0.067 in Q18 and i in Q6= Q9 */ L_tmp = L_mult( exc_diffQ[i], tmp ); /*Q(Qexc_diffQ+10) */ exc_diffQ[i] = round_fx( L_shl( L_tmp, 16 - 10 ) ); /*Qexc_diffQ */ + move16(); } } } @@ -1328,23 +1349,30 @@ void highband_exc_dct_in_ivas_fx( { // float scale_factLF = 0.9f; Word16 scale_factLF = 29491; + move16(); // float scale_factHF = 0.9f; Word16 scale_factHF = 29491; + move16(); - IF( GSC_IVAS_mode == 1 && GSC_noisy_speech == 0 ) + test(); + test(); + IF( EQ_16( GSC_IVAS_mode, 1 ) && GSC_noisy_speech == 0 ) { // scale_factHF = 0.8f; scale_factHF = 26214; + move16(); } - ELSE IF( GSC_IVAS_mode == 2 || GSC_noisy_speech == 1 ) + ELSE IF( EQ_16( GSC_IVAS_mode, 2 ) || EQ_16( GSC_noisy_speech, 1 ) ) { // scale_factHF = 0.71f; scale_factHF = 23265; + move16(); } - ELSE IF( GSC_IVAS_mode == 3 ) + ELSE IF( EQ_16( GSC_IVAS_mode, 3 ) ) { // scale_factHF = 0.9f; scale_factHF = 29491; + move16(); } FOR( i = 0; i < pit_band_idx * 16; i++ ) { @@ -1355,44 +1383,51 @@ void highband_exc_dct_in_ivas_fx( { // exc_diffQ[i] *= scale_factHF; exc_diffQ[i] = mult_r( exc_diffQ[i], scale_factHF ); + move16(); } } ELSE IF( GSC_noisy_speech ) { // float scale_fact = 0.9f; Word16 scale_fact = 29491; + move16(); - IF( element_mode == IVAS_CPE_TD ) + IF( EQ_16( element_mode, IVAS_CPE_TD ) ) { IF( coder_type == INACTIVE ) { // scale_fact = 1.0f; scale_fact = 32767; + move16(); } ELSE { // scale_fact = 0.95f; scale_fact = 31129; + move16(); } } - ELSE IF( element_mode > IVAS_SCE ) + ELSE IF( GT_16( element_mode, IVAS_SCE ) ) { // scale_fact = 0.71f; scale_fact = 23265; + move16(); } FOR( i = 0; i < L_frame; i++ ) { // exc_diffQ[i] *= scale_fact; exc_diffQ[i] = mult_r( exc_diffQ[i], scale_fact ); + move16(); } } - IF( GSC_noisy_speech && element_mode > IVAS_SCE && core_brate < ACELP_7k20 ) + IF( GSC_noisy_speech && GT_16( element_mode, IVAS_SCE ) && LT_32( core_brate, ACELP_7k20 ) ) { FOR( i = 80; i < L_frame; i++ ) { // exc_diffQ[i] *= (+0.0024f * (float)i + 1.192f); exc_diffQ[i] = mult_r( shl( exc_diffQ[i], 1 ) /*Q16*/, (Word16) L_shr( L_add( 629 * i, 312475 ) /*Q18*/, Q4 ) /*Q14*/ ); + move16(); } } #else @@ -1421,7 +1456,7 @@ void highband_exc_dct_in_ivas_fx( Vr_add( exc_dct_in, exc_diffQ, exc_dct_in, L_frame ); test(); - IF( core_brate == ACELP_8k00 && bwidth != NB ) + IF( EQ_32( core_brate, ACELP_8k00 ) && bwidth != NB ) { IF( EQ_16( bwe_flag, 1 ) ) { diff --git a/lib_com/gs_preech.c b/lib_com/gs_preech.c index 36487d90f..8765dc7e0 100644 --- a/lib_com/gs_preech.c +++ b/lib_com/gs_preech.c @@ -159,7 +159,7 @@ void pre_echo_att_fx( Word16 att_len; test(); - IF( GT_16( gsc_attack_flag_fx, 0 ) && EQ_16( last_coder_type, AUDIO ) ) /*gsc_attack_flag_fx does not get set for all the test cases */ + IF( gsc_attack_flag_fx > 0 && EQ_16( last_coder_type, AUDIO ) ) /*gsc_attack_flag_fx does not get set for all the test cases */ { /*-------------------------------------------------------------------------* * Find where the onset (attack) occurs by computing the energy per section @@ -167,9 +167,11 @@ void pre_echo_att_fx( * gradual onset *-------------------------------------------------------------------------*/ att_len = ATT_LENGHT; - if ( L_frame == L_FRAME16k ) + move16(); + if ( EQ_16( L_frame, L_FRAME16k ) ) { att_len = ATT_LENGHT16k; + move16(); } FOR( i = 0; i < att_len; i++ ) { @@ -210,6 +212,7 @@ void pre_echo_att_fx( /* = isqrt(etmp/(*Last_frame_ener)) */ etmp_fx = L_max( etmp_fx, 1 ); *Last_frame_ener_fx = L_max( *Last_frame_ener_fx, 1 ); + move32(); n1 = norm_l( etmp_fx ); n2 = norm_l( *Last_frame_ener_fx ); @@ -234,6 +237,7 @@ void pre_echo_att_fx( { /*exc_fx[i] *= ratio_fx;*/ exc_fx[i] = round_fx( L_shl( L_mac( -8192, exc_fx[i], ratio_fx ), 2 ) ); + move16(); } } *Last_frame_ener_fx = etmp1_fx; diff --git a/lib_com/guided_plc_util_fx.c b/lib_com/guided_plc_util_fx.c index 87ab48143..a95e9a5c4 100644 --- a/lib_com/guided_plc_util_fx.c +++ b/lib_com/guided_plc_util_fx.c @@ -71,6 +71,7 @@ void updateLSFForConcealment( L_tmp = L_mult( divide_by_3_Q15, decState->lsfoldbfi1_14Q1[i] ); L_tmp = L_mac( L_tmp, divide_by_3_Q15, decState->lsfoldbfi0_14Q1[i] ); decState->lsf_adaptive_mean_14Q1[i] = mac_r( L_tmp, divide_by_3_Q15, lsf_14Q1[i] ); + move16(); decState->lsfoldbfi1_14Q1[i] = decState->lsfoldbfi0_14Q1[i]; move16(); decState->lsfoldbfi0_14Q1[i] = lsf_14Q1[i]; @@ -96,7 +97,6 @@ void getConcealedLP( Word16 lsp[( NB_DIV + 1 ) * M]; Word32 int_fs; - move16(); lsf = memDecState->lsf_con; diff --git a/lib_com/hp50.c b/lib_com/hp50.c index b080d0431..d6aaebea2 100644 --- a/lib_com/hp50.c +++ b/lib_com/hp50.c @@ -200,7 +200,7 @@ static void filter_2nd_order( BASOP_SATURATE_ERROR_ON_EVS L_sum = HP50_Mpy_32_32_fix( b2, mem[2] ); /* b2*x2 */ L_sum = L_add( L_sum, HP50_Mpy_32_32_fix( b1, mem[3] ) ); /* b1*x1 */ - x2 = shr( signal[0 * stride], prescale ); + x2 = shr( signal[0], prescale ); L_sum = L_add( L_sum, HP50_Mode2_Mpy_32_16_fix( b2, x2 ) ); /* b2*x0 */ L_sum = L_add( L_sum, HP50_Mpy_32_32_fix( mem[0], a2 ) ); /* y2*a2 */ L_sum = L_add( L_sum, HP50_Mpy_32_32_fix( mem[1], a1 ) ); /* y1*a1 */ @@ -213,16 +213,16 @@ static void filter_2nd_order( BASOP_SATURATE_ERROR_OFF_EVS BASOP_SATURATE_WARNING_OFF_EVS #ifdef BASOP_NOGLOB - signal[0 * stride] = round_fx_o( L_shl_o( L_y2, prescale, &Overflow ), &Overflow ); + signal[0] = round_fx_o( L_shl_o( L_y2, prescale, &Overflow ), &Overflow ); #else /* BASOP_NOGLOB */ - signal[0 * stride] = round_fx( L_shl( L_y2, prescale ) ); + signal[0] = round_fx( L_shl( L_y2, prescale ) ); #endif /* BASOP_NOGLOB */ BASOP_SATURATE_WARNING_ON_EVS BASOP_SATURATE_ERROR_ON_EVS L_sum = HP50_Mpy_32_32_fix( b2, mem[3] ); /* b2*x2 */ L_sum = L_add( L_sum, HP50_Mode2_Mpy_32_16_fix( b1, x2 ) ); /* b1*x1 */ - x1 = shr( signal[1 * stride], prescale ); + x1 = shr( signal[stride], prescale ); L_sum = L_add( L_sum, HP50_Mode2_Mpy_32_16_fix( b2, x1 ) ); /* b2*x0 */ L_sum = L_add( L_sum, HP50_Mpy_32_32_fix( mem[1], a2 ) ); /* y2*a2 */ L_sum = L_add( L_sum, HP50_Mpy_32_32_fix( L_y2, a1 ) ); /* y1*a1 */ @@ -235,11 +235,12 @@ static void filter_2nd_order( BASOP_SATURATE_ERROR_OFF_EVS BASOP_SATURATE_WARNING_OFF_EVS #ifdef BASOP_NOGLOB - signal[1 * stride] = round_fx_o( L_shl_o( L_y1, prescale, &Overflow ), &Overflow ); + signal[stride] = round_fx_o( L_shl_o( L_y1, prescale, &Overflow ), &Overflow ); #else /* BASOP_NOGLOB */ - signal[1 * stride] = round_fx( L_shl( L_y1, prescale ) ); + signal[stride] = round_fx( L_shl( L_y1, prescale ) ); #endif /* BASOP_NOGLOB */ BASOP_SATURATE_WARNING_ON_EVS + move16(); /* New we use a trick and toggle x1/x2 and L_y1/L_y2 to save a few cycles unrolling the loop by 2 */ FOR( i = 2; i < lg; i += 2 ) @@ -261,9 +262,10 @@ static void filter_2nd_order( BASOP_SATURATE_ERROR_OFF_EVS BASOP_SATURATE_WARNING_OFF_EVS #ifdef BASOP_NOGLOB - signal[i * stride] = round_fx_o( L_shl_o( L_y2, prescale, &Overflow ), &Overflow ); + signal[i_mult( i, stride )] = round_fx_o( L_shl_o( L_y2, prescale, &Overflow ), &Overflow ); #else /* BASOP_NOGLOB */ - signal[i * stride] = round_fx( L_shl( L_y2, prescale ) ); + signal[i_mult( i, stride )] = round_fx( L_shl( L_y2, prescale ) ); + move16(); #endif /* BASOP_NOGLOB */ BASOP_SATURATE_WARNING_ON_EVS /* y[i+1] = b2*x[i-1] + b1*x[i-0] + b2*x[i+1] + a2*y[i-1] + a1*y[i+0]; */ @@ -283,11 +285,12 @@ static void filter_2nd_order( BASOP_SATURATE_ERROR_OFF_EVS BASOP_SATURATE_WARNING_OFF_EVS #ifdef BASOP_NOGLOB - signal[( i + 1 ) * stride] = round_fx_o( L_shl_o( L_y1, prescale, &Overflow ), &Overflow ); + signal[i_mult( add( i, 1 ), stride )] = round_fx_o( L_shl_o( L_y1, prescale, &Overflow ), &Overflow ); #else /* BASOP_NOGLOB */ - signal[( i + 1 ) * stride] = round_fx( L_shl( L_y1, prescale ) ); + signal[i_mult( add( i, 1 ), stride )] = round_fx( L_shl( L_y1, prescale ) ); #endif BASOP_SATURATE_WARNING_ON_EVS + move16(); } /* update static filter memory from variables */ mem[0] = L_y2; @@ -295,7 +298,9 @@ static void filter_2nd_order( mem[1] = L_y1; move32(); mem[2] = L_deposit_h( x2 ); + move32(); mem[3] = L_deposit_h( x1 ); + move32(); return; diff --git a/lib_com/hq2_bit_alloc.c b/lib_com/hq2_bit_alloc.c index cd7f17aab..3653ffea8 100644 --- a/lib_com/hq2_bit_alloc.c +++ b/lib_com/hq2_bit_alloc.c @@ -216,7 +216,7 @@ static void Bits2indvsb_fx( th_5_fx = shl( 5, QRavg ); FOR( j = 0; j < be_cnt_fx; j++ ) { - IF( sub( abs_s( sub( Ravg_fx, shl( y_index_fx[j], QRavg ) ) ), th_5_fx ) > 0 ) + if ( sub( abs_s( sub( Ravg_fx, shl( y_index_fx[j], QRavg ) ) ), th_5_fx ) > 0 ) { enr_diffcnt_fx = add( enr_diffcnt_fx, 1 ); } @@ -461,10 +461,12 @@ void hq2_bit_alloc_har( } gmax_range_fx[i] = add( gmax_range_fx[i], temp_fx ); + move16(); } ELSE { gmax_range_fx[i] = add( gmax_range_fx[i], temp_fx ); + move16(); } } @@ -477,7 +479,7 @@ void hq2_bit_alloc_har( } grp_bound_fx[i] = harmonic_band_fx; move16(); - grp_bound_fx[i + 1] = N_fx; + grp_bound_fx[add( i, 1 )] = N_fx; move16(); @@ -575,7 +577,7 @@ void hq2_bit_alloc_har( FOR( i = 0; i < sub( N_fx, harmonic_band_fx ); i++ ) { - y_index_fx[i] = extract_h( L_shl( L_temp_band_energy[harmonic_band_fx + i], sub( 16, SWB_BWE_LR_Qbe ) ) ); + y_index_fx[i] = extract_h( L_shl( L_temp_band_energy[add( harmonic_band_fx, i )], sub( 16, SWB_BWE_LR_Qbe ) ) ); move16(); index_fx[i] = add( harmonic_band_fx, i ); move16(); @@ -643,6 +645,7 @@ void hq2_bit_alloc_har( #ifdef BASOP_NOGLOB lf_hf_ge_r_fx = round_fx_o( L_shl_o( L_temp, sub( 15 + 16, sub( add( SWB_BWE_LR_Qbe, QIns ), 30 ) ), &Overflow ), &Overflow ); Overflow = 0; /* reset BASOP Overflow */ + move16(); #else lf_hf_ge_r_fx = round_fx( L_shl( L_temp, sub( 15 + 16, sub( add( SWB_BWE_LR_Qbe, QIns ), 30 ) ) ) ); #endif @@ -694,24 +697,30 @@ void hq2_bit_alloc_har( L_temp = Mpy_32_16_1( L_Ravg_sub[1], extract_h( L_mult( bits_fact_fx, B_norm_fx ) ) ); L_temp = Mpy_32_16_1( L_temp, Inv_norm_sum_fx ); Bits_grp_fx[1] = extract_h( L_shr( L_temp, exp_shift ) ); + move16(); L_temp = Mpy_32_16_1( L_Ravg_sub[2], extract_h( L_mult( bits_fact1_fx, B_norm_fx ) ) ); L_temp = Mpy_32_16_1( L_temp, Inv_norm_sum_fx ); Bits_grp_fx[2] = extract_h( L_shr( L_temp, exp_shift ) ); + move16(); Bits_grp_fx[0] = sub( sub( B_fx, Bits_grp_fx[1] ), Bits_grp_fx[2] ); + move16(); } ELSE { L_temp = Mpy_32_16_1( L_Ravg_sub[0], extract_h( L_mult( bits_fact_fx, B_norm_fx ) ) ); L_temp = Mpy_32_16_1( L_temp, Inv_norm_sum_fx ); Bits_grp_fx[0] = extract_h( L_shr( L_temp, exp_shift ) ); + move16(); L_temp = Mpy_32_16_1( L_Ravg_sub[2], extract_h( L_mult( bits_fact1_fx, B_norm_fx ) ) ); L_temp = Mpy_32_16_1( L_temp, Inv_norm_sum_fx ); Bits_grp_fx[2] = extract_h( L_shr( L_temp, exp_shift ) ); + move16(); Bits_grp_fx[1] = sub( sub( B_fx, Bits_grp_fx[0] ), Bits_grp_fx[2] ); + move16(); } IF( sub( Bits_grp_fx[2], THR2 ) < 0 ) @@ -823,6 +832,7 @@ void hq2_bit_alloc( FOR( k = 0; k < bands; k++ ) { L_Rk[k] = L_shl( L_deposit_l( p2a_flags[k] ), SWB_BWE_LR_QRk ); + move32(); } } diff --git a/lib_com/hq2_bit_alloc_fx.c b/lib_com/hq2_bit_alloc_fx.c index eb3342b29..1f5a146c0 100644 --- a/lib_com/hq2_bit_alloc_fx.c +++ b/lib_com/hq2_bit_alloc_fx.c @@ -100,6 +100,7 @@ void Bits2indvsb_fx( FOR( i = 0; i < band_num_fx; i++ ) { y_index_fx[i] = extract_h( L_shr( L_y_ptr[i], sub( SWB_BWE_LR_Qbe, 16 ) ) ); + move16(); index_fx[i] = i; move16(); } @@ -120,6 +121,7 @@ void Bits2indvsb_fx( y_index_fx[j] = 0; move16(); L_R_temp[j] = L_deposit_l( 0 ); + move32(); } ELSE { @@ -370,14 +372,13 @@ void hq2_bit_alloc_har_fx( { FOR( temp_fx = 2; temp_fx <= k_fx; ) { - IF( LT_32( L_temp_band_energy[gmax_range_fx[i] + temp_fx - 1], L_temp_band_energy[gmax_range_fx[i] + temp_fx] ) ) + IF( LT_32( L_temp_band_energy[sub( add( gmax_range_fx[i], temp_fx ), 1 )], L_temp_band_energy[add( gmax_range_fx[i], temp_fx )] ) ) { BREAK; } - ELSE IF( GE_32( L_temp_band_energy[gmax_range_fx[i] + temp_fx - 1], L_temp_band_energy[gmax_range_fx[i] + temp_fx] ) ) + ELSE IF( GE_32( L_temp_band_energy[sub( add( gmax_range_fx[i], temp_fx ), 1 )], L_temp_band_energy[add( gmax_range_fx[i], temp_fx )] ) ) { temp_fx = add( temp_fx, 1 ); - ; IF( GT_16( temp_fx, k_fx ) ) { temp_fx = sub( temp_fx, 1 ); @@ -405,7 +406,7 @@ void hq2_bit_alloc_har_fx( } grp_bound_fx[i] = harmonic_band_fx; move16(); - grp_bound_fx[i + 1] = N_fx; + grp_bound_fx[add( i, 1 )] = N_fx; move16(); @@ -441,6 +442,7 @@ void hq2_bit_alloc_har_fx( G1_BE_DIFF_POS_fx = j; move16(); L_G1_BE_DIFF_VAL = L_temp_band_energydiff[j]; + move32(); } } @@ -502,6 +504,7 @@ void hq2_bit_alloc_har_fx( FOR( i = 0; i < j; i++ ) { y_index_fx[i] = extract_h( L_shl( L_temp_band_energy[harmonic_band_fx + i], sub( 16, SWB_BWE_LR_Qbe ) ) ); + move16(); index_fx[i] = add( harmonic_band_fx, i ); move16(); } @@ -592,6 +595,7 @@ void hq2_bit_alloc_har_fx( L_temp = L_add( L_shl( L_temp, SWB_BWE_LR_Qbe ), L_temp2 ); Bits_grp_fx[GRP_SB - 1] = extract_h( L_shl( L_temp, sub( 16, SWB_BWE_LR_Qbe ) ) ); + move16(); Bits_grp_fx[GRP_SB - 1] = s_min( Bits_grp_fx[GRP_SB - 1], 10 ); move16(); @@ -613,10 +617,12 @@ void hq2_bit_alloc_har_fx( L_temp = Mult_32_16( L_Ravg_sub[1], extract_h( L_mult( bits_fact_fx, B_norm_fx ) ) ); L_temp = Mult_32_16( L_temp, Inv_norm_sum_fx ); Bits_grp_fx[1] = extract_h( L_shr( L_temp, exp_shift ) ); + move16(); L_temp = Mult_32_16( L_Ravg_sub[2], extract_h( L_mult( bits_fact1_fx, B_norm_fx ) ) ); L_temp = Mult_32_16( L_temp, Inv_norm_sum_fx ); Bits_grp_fx[2] = extract_h( L_shr( L_temp, exp_shift ) ); + move16(); Bits_grp_fx[0] = sub( sub( B_fx, Bits_grp_fx[1] ), Bits_grp_fx[2] ); move16(); @@ -626,10 +632,12 @@ void hq2_bit_alloc_har_fx( L_temp = Mult_32_16( L_Ravg_sub[0], extract_h( L_mult( bits_fact_fx, B_norm_fx ) ) ); L_temp = Mult_32_16( L_temp, Inv_norm_sum_fx ); Bits_grp_fx[0] = extract_h( L_shr( L_temp, exp_shift ) ); + move16(); L_temp = Mult_32_16( L_Ravg_sub[2], extract_h( L_mult( bits_fact1_fx, B_norm_fx ) ) ); L_temp = Mult_32_16( L_temp, Inv_norm_sum_fx ); Bits_grp_fx[2] = extract_h( L_shr( L_temp, exp_shift ) ); + move16(); Bits_grp_fx[1] = sub( sub( B_fx, Bits_grp_fx[0] ), Bits_grp_fx[2] ); move16(); @@ -721,13 +729,21 @@ Word32 hq2_bit_alloc_fx( IF( is_transient && EQ_16( bands, 32 ) ) { L_Rk[6] = L_deposit_l( 0 ); + move32(); L_Rk[7] = L_deposit_l( 0 ); + move32(); L_Rk[14] = L_deposit_l( 0 ); + move32(); L_Rk[15] = L_deposit_l( 0 ); + move32(); L_Rk[22] = L_deposit_l( 0 ); + move32(); L_Rk[23] = L_deposit_l( 0 ); + move32(); L_Rk[30] = L_deposit_l( 0 ); + move32(); L_Rk[31] = L_deposit_l( 0 ); + move32(); } } ELSE @@ -736,6 +752,7 @@ Word32 hq2_bit_alloc_fx( FOR( k = 0; k < bands; k++ ) { L_Rk[k] = L_shl( L_deposit_l( p2a_flags[k] ), SWB_BWE_LR_QRk ); + move32(); } } @@ -814,6 +831,7 @@ Word32 hq2_bit_alloc_fx( IF( LT_32( L_Rk[k], MIN_BITS_FIX ) ) { L_Rk[k] = L_deposit_l( 0 ); + move32(); negflag = 1; move16(); } @@ -846,9 +864,10 @@ Word32 hq2_bit_alloc_fx( } /* prune worst allocation and recalculate total allocation */ - if ( GT_16( maxdex_fx, -1 ) ) + IF( GT_16( maxdex_fx, -1 ) ) { L_Rk[maxdex_fx] = L_deposit_l( 0 ); + move32(); } FOR( k = 0; k < bands; k++ ) { @@ -875,14 +894,17 @@ Word32 hq2_bit_alloc_fx( IF( LT_16( k, 11 ) && LT_32( L_Rk[k], L_THR1 ) ) { L_Rk[k] = L_deposit_l( 0 ); + move32(); } ELSE IF( GE_16( k, 11 ) && LT_16( k, 16 ) && LT_32( L_Rk[k], L_THR2 ) ) { L_Rk[k] = L_deposit_l( 0 ); + move32(); } ELSE if ( GE_16( k, 16 ) && LT_16( k, bands ) && LT_32( L_Rk[k], L_THR3 ) ) { L_Rk[k] = L_deposit_l( 0 ); + move32(); } L_dummy = L_add( L_dummy, L_Rk[k] ); @@ -898,9 +920,9 @@ Word32 hq2_bit_alloc_fx( FOR( k = 0; k < NB_SWB_SUBBANDS; k++ ) { test(); - IF( p2a_flags[bands - NB_SWB_SUBBANDS + k] == 1 && L_Rk[bands - NB_SWB_SUBBANDS + k] == 0 ) + IF( EQ_16( p2a_flags[add( sub( bands, NB_SWB_SUBBANDS ), k )], 1 ) && L_Rk[add( sub( bands, NB_SWB_SUBBANDS ), k )] == 0 ) { - p2a_flags[bands - NB_SWB_SUBBANDS + k] = 0; + p2a_flags[add( sub( bands, NB_SWB_SUBBANDS ), k )] = 0; move16(); bit_budget_temp_fx = sub( bit_budget_temp_fx, bits_lagIndices_modeNormal[k] ); } diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index d085e06be..85556a4f4 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -822,7 +822,7 @@ void smooth_dft2td_transition( void smooth_dft2td_transition_fx( CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */ Word32 *output_fx[CPE_CHANNELS], /* i/o: synthesis @external Fs */ - const int16_t output_frame /* i : output frame lenght */ + const Word16 output_frame /* i : output frame lenght */ ); /*! r: flag indicating a valid bitrate */ int16_t is_IVAS_bitrate( @@ -862,8 +862,8 @@ void ivas_mdft_fx( const Word32 *pIn, /* i : input time-domain signal */ Word32 *pOut_re, /* o : Real part of MDFT signal */ Word32 *pOut_im, /* o : Imag. part of MDFT signal */ - const int16_t length, /* i : signal length */ - const int16_t mdft_length /* i : MDFT length */ + const Word16 length, /* i : signal length */ + const Word16 mdft_length /* i : MDFT length */ ); void ivas_imdft_fx( @@ -923,7 +923,7 @@ Word16 rand_triangular_signed_fx( #endif Word64 var_32_fx( const Word32 *x, /* i : input vector */ - const int16_t len, /* i : length of inputvector */ + const Word16 len, /* i : length of inputvector */ Word16 q /* q : q-factor for the array */ ); @@ -1368,9 +1368,9 @@ void ivas_param_ism_dec_render( #ifdef IVAS_FLOAT_FIXED void ivas_param_ism_dec_render_fx( Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ - const uint16_t nSamplesAsked, /* i : number of CLDFB slots requested */ - uint16_t *nSamplesRendered, /* o : number of CLDFB slots rendered */ - uint16_t *nSamplesAvailable, /* o : number of CLDFB slots still to render */ + const UWord16 nSamplesAsked, /* i : number of CLDFB slots requested */ + UWord16 *nSamplesRendered, /* o : number of CLDFB slots rendered */ + UWord16 *nSamplesAvailable, /* o : number of CLDFB slots still to render */ Word32 *output_f_fx[] ); void ivas_param_ism_params_to_masa_param_mapping_fx( @@ -2207,7 +2207,7 @@ void stereo_tca_dec( void stereo_tca_dec_fx( CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */ Word32 *synth[CPE_CHANNELS], /* i/o: output synth */ - const int16_t output_frame /* i : length of a frame per channel */ + const Word16 output_frame /* i : length of a frame per channel */ ); void stereo_tca_scale_R_channel( @@ -2219,7 +2219,7 @@ void stereo_tca_scale_R_channel( void stereo_tca_scale_R_channel_fx( CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */ Word32 *output_fx, /* i/o: output synthesis, R channel */ - const int16_t output_frame /* i : frame length */ + const Word16 output_frame /* i : frame length */ ); void adjustTargetSignal( @@ -2232,10 +2232,10 @@ void adjustTargetSignal( void adjustTargetSignal_fx( Word32 *target_fx, - const int16_t prevShift, - const int16_t currShift, - const int16_t L_shift_adapt, - const int16_t method); + const Word16 prevShift, + const Word16 currShift, + const Word16 L_shift_adapt, + const Word16 method); /*----------------------------------------------------------------------------------* * IC-BWE Stereo prototypes *----------------------------------------------------------------------------------*/ @@ -2393,7 +2393,7 @@ void stereo_td_init_dec( void stereo_td_init_dec_fx( STEREO_TD_DEC_DATA_HANDLE hStereoTD, /* i/o: TD stereo decoder handle */ - const int16_t last_element_mode /* i : last element mode */ + const Word16 last_element_mode /* i : last element mode */ ); void tdm_configure_dec( @@ -2423,9 +2423,9 @@ void tdm_upmix_plain_fx( const Word32 SCh_2_R_fx[], /* i : secondary channel */ const Word32 LR_ratio_fx, /* i : mixing ratio */ const Word32 inv_den_LR_ratio_fx, /* i : inverse mixing ration */ - const int16_t start_index, /* i : start index */ - const int16_t end_index, /* i : end index */ - const int16_t plus_minus_flag /* i : plus/minus flag */ + const Word16 start_index, /* i : start index */ + const Word16 end_index, /* i : end index */ + const Word16 plus_minus_flag /* i : plus/minus flag */ ); void stereo_tdm_combine( @@ -2441,9 +2441,9 @@ void stereo_tdm_combine_fx( CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */ Word32 *PCh_2_L_fx, /* i/o: Primary channel -> output as left channel */ Word32 *SCh_2_R_fx, /* i/o: Secondary channel -> output as right channel*/ - const int16_t output_frame, /* i : Number of samples */ - const int16_t flag_HB, /* i : flag to distinguish between core (0) and HB (1) synthesis */ - const int16_t tdm_ratio_idx /* i : TDM ratio index */ + const Word16 output_frame, /* i : Number of samples */ + const Word16 flag_HB, /* i : flag to distinguish between core (0) and HB (1) synthesis */ + const Word16 tdm_ratio_idx /* i : TDM ratio index */ ); /*! r: replication decision; 1 = Use old LP */ @@ -2604,13 +2604,13 @@ void tdm_SCh_lsf_reuse_fx( void tdm_SCh_lsf_reuse_ivas_fx( - const int16_t enc_dec, /* i : encoder/decoder flag */ - const int32_t element_brate, /* i : element bitrate */ + const Word16 enc_dec, /* i : encoder/decoder flag */ + const Word32 element_brate, /* i : element bitrate */ Word16 lsf_new[M], /* i/o: LSFs at the end of the frame */ Word16 lsp_new[M], /* i/o: LSPs at the end of the frame */ const Word16 tdm_lsfQ_PCh[M], /* i : primary channel LSFs (log2(2.56)) */ const Word16 lsf_wgts[M], /* i : LSF weights Q15? */ - int16_t *beta_index /* i/o: quantization index */ + Word16 *beta_index /* i/o: quantization index */ ); void tdm_SCh_lsf_reuse( @@ -3214,7 +3214,7 @@ void applyDmxMdctStereo( void applyDmxMdctStereo_fx( const CPE_DEC_HANDLE hCPE, /* i : CPE handle */ Word32 *output_fx[CPE_CHANNELS], /* o : output from core decoder */ - const int16_t output_frame /* i : output frame length */ + const Word16 output_frame /* i : output frame length */ ); @@ -3305,8 +3305,8 @@ void stereo_cna_update_params( void stereo_cna_update_params_fx( CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */ Word32 *output_fx[CPE_CHANNELS], /* i : Output signal */ - const int16_t output_frame, /* i : Output frame length */ - const int16_t tdm_ratio_idx /* i : TDM ratio index */ + const Word16 output_frame, /* i : Output frame length */ + const Word16 tdm_ratio_idx /* i : TDM ratio index */ ); void dtx_enc_init( Encoder_State *st, /* i : Encoder state handle */ @@ -3588,11 +3588,11 @@ void stereo_td2dft_update( void stereo_td2dft_update_fx( CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */ - const int16_t n, /* i : channel number */ + const Word16 n, /* i : channel number */ Word32 output_fx[], /* i/o: synthesis @internal Fs */ Word32 synth_fx[], /* i/o: synthesis @output Fs */ Word32 hb_synth_fx[], /* i/o: hb synthesis */ - const int16_t output_frame /* i : frame length */ + const Word16 output_frame /* i : frame length */ ); void stereo_mdct2dft_update( CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */ @@ -3831,7 +3831,7 @@ void deindex_sph_idx( float *phi /* o : Azimuth */ ); void deindex_sph_idx_fx( - const uint16_t sphIndex, /* i : Spherical index */ + const UWord16 sphIndex, /* i : Spherical index */ const SPHERICAL_GRID_DATA *gridData, /* i : Prepared spherical grid */ Word32 *theta_fx, /* o : Elevation */ Word32 *phi_fx /* o : Azimuth */ @@ -4757,7 +4757,7 @@ void ivas_param_mc_dec_digest_tc( void ivas_param_mc_dec_digest_tc_fx( Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ - const uint8_t nCldfbSlots, /* i : number of CLFBS slots in the transport channels */ + const UWord8 nCldfbSlots, /* i : number of CLFBS slots in the transport channels */ Word32 *transport_channels_f_fx[], Word16 transport_f_e ); @@ -5224,11 +5224,19 @@ ivas_error ivas_spar_dec_open( const int16_t spar_reconfig_flag /* i : SPAR reconfiguration flag */ ); +#ifndef IVAS_FLOAT_FIXED void ivas_spar_dec_close( SPAR_DEC_HANDLE *hSpar, /* i/o: SPAR decoder handle */ const int32_t output_Fs, /* i : output sampling rate */ const int16_t spar_reconfig_flag /* i : SPAR reconfiguration flag */ ); +#else +void ivas_spar_dec_close_fx( + SPAR_DEC_HANDLE *hSpar, /* i/o: SPAR decoder handle */ + const Word32 output_Fs, /* i : output sampling rate */ + const Word16 spar_reconfig_flag /* i : SPAR reconfiguration flag */ +); +#endif ivas_error ivas_spar_dec( Decoder_Struct *st_ivas, /* i/o: IVAS decoder struct */ @@ -5691,18 +5699,18 @@ void ivas_get_spar_md_from_dirac_fx( Word32 azi_dirac[IVAS_MAX_NUM_BANDS][MAX_PARAM_SPATIAL_SUBFRAMES], Word32 ele_dirac[IVAS_MAX_NUM_BANDS][MAX_PARAM_SPATIAL_SUBFRAMES], Word32 diffuseness[IVAS_MAX_NUM_BANDS], - const int16_t n_ts, + const Word16 n_ts, Word32 ***mixer_mat, ivas_spar_md_t *hSpar_md, ivas_spar_md_com_cfg *hSpar_md_cfg, - const int16_t start_band, - const int16_t end_band, - const int16_t order, - const int16_t dtx_vad, + const Word16 start_band, + const Word16 end_band, + const Word16 order, + const Word16 dtx_vad, Word32 Wscale_d[IVAS_MAX_NUM_BANDS], - const uint8_t useLowerRes, - const int16_t active_w_vlbr, - const int16_t dyn_active_w_flag + const UWord8 useLowerRes, + const Word16 active_w_vlbr, + const Word16 dyn_active_w_flag ); #endif void ivas_get_spar_md_from_dirac( @@ -5765,10 +5773,10 @@ void ivas_spar_md_dec_close( void ivas_spar_get_parameters_fx( SPAR_DEC_HANDLE hSpar, /* i/o: SPAR decoder handle */ const DECODER_CONFIG_HANDLE hDecoderConfig, /* i : configuration structure */ - const int16_t ts, /* i : time slot index */ - const int16_t num_ch_out, /* i : number of channels out */ - const int16_t num_ch_in, /* i : number of channels in */ - const int16_t num_spar_bands, /* i : number of SPAR bands */ + const Word16 ts, /* i : time slot index */ + const Word16 num_ch_out, /* i : number of channels out */ + const Word16 num_ch_in, /* i : number of channels in */ + const Word16 num_spar_bands, /* i : number of SPAR bands */ Word32 par_mat_fx[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH][IVAS_MAX_NUM_BANDS] /* o : mixing matrix */ ); void ivas_spar_get_parameters( @@ -5806,10 +5814,10 @@ void ivas_spar_to_dirac( void ivas_spar_to_dirac_fx( Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ ivas_spar_md_dec_state_t *hMdDec, /* i/o: SPAR MD decoder handle */ - const int16_t dtx_vad, /* i : DTX frame flag */ - const int16_t num_bands_out, /* i : number of output bands */ - const int16_t bw, /* i : band joining factor */ - const int16_t dyn_active_w_flag /* i : dynamic active W flag */ + const Word16 dtx_vad, /* i : DTX frame flag */ + const Word16 num_bands_out, /* i : number of output bands */ + const Word16 bw, /* i : band joining factor */ + const Word16 dyn_active_w_flag /* i : dynamic active W flag */ ); void ivas_spar_update_md_hist( ivas_spar_md_dec_state_t *hMdDec /* i/o: SPAR MD decoder handle */ @@ -6289,12 +6297,12 @@ void ivas_quantise_real_values( ); void ivas_quantise_real_values_fx( const Word32 *values_fx, - const int16_t q_levels, + const Word16 q_levels, const Word32 min_value_fx, const Word32 max_value_fx, - int16_t *index, + Word16 *index, Word32 *quant_fx, - const int16_t dim); + const Word16 dim); void ivas_spar_get_uniform_quant_strat( ivas_spar_md_com_cfg *pSpar_md_com_cfg, diff --git a/lib_com/ivas_prot_fx.h b/lib_com/ivas_prot_fx.h index 56709404a..14d5b40cf 100644 --- a/lib_com/ivas_prot_fx.h +++ b/lib_com/ivas_prot_fx.h @@ -2100,9 +2100,9 @@ void td_bwe_dec_init_ivas_fx( ); void ivas_dirac_dec_render_sf_fx( - Decoder_Struct *st_ivas, /* i/o: IVAS decoder structure */ - Word32 *output_fx[], /* i/o: synthesized core-coder transport channels/DirAC output */ - const int16_t nchan_transport, /* i : number of transport channels */ + Decoder_Struct *st_ivas, /* i/o: IVAS decoder structure */ + Word32 *output_fx[], /* i/o: synthesized core-coder transport channels/DirAC output */ + const Word16 nchan_transport, /* i : number of transport channels */ Word32 *pppQMfFrame_ts_re_fx[IVAS_MAX_FB_MIXER_IN_CH][CLDFB_NO_COL_MAX], Word32 *pppQMfFrame_ts_im_fx[IVAS_MAX_FB_MIXER_IN_CH][CLDFB_NO_COL_MAX] ); @@ -2116,15 +2116,15 @@ void ivas_dirac_dec_render_fx( ); void ivas_dirac_dec_read_BS_fx( - const int32_t ivas_total_brate, /* i : IVAS total bitrate */ + const Word32 ivas_total_brate, /* i : IVAS total bitrate */ Decoder_State *st, /* i/o: decoder Core state structure */ DIRAC_DEC_HANDLE hDirAC, /* i/o: decoder DirAC handle */ SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom, /* i/o: common spatial rendering data handle */ IVAS_QMETADATA_HANDLE hQMetaData, /* i/o: q metadata */ - int16_t *nb_bits, /* o : number of bits read */ - const int16_t last_bit_pos, /* i : last read bitstream position */ - const int16_t hodirac_flag, /* i : flag to indicate HO-DirAC mode */ - int16_t *dirac_to_spar_md_bands /* o : DirAC->SPAR MD bands */ + Word16 *nb_bits, /* o : number of bits read */ + const Word16 last_bit_pos, /* i : last read bitstream position */ + const Word16 hodirac_flag, /* i : flag to indicate HO-DirAC mode */ + Word16 *dirac_to_spar_md_bands /* o : DirAC->SPAR MD bands */ ); ivas_error ivas_dirac_dec_config_fx( @@ -2166,23 +2166,23 @@ ivas_error ivas_ism_metadata_dec_create_fx( Word32 element_brate_tmp[] /* o : element bitrate per object */ ); ivas_error ivas_sba_dec_reconfigure_fx( - Decoder_Struct *st_ivas, /* i/o: IVAS decoder structure */ - uint16_t *nSamplesFlushed, /* o : number of samples flushed */ - int16_t *data /* o : output synthesis signal */ + Decoder_Struct *st_ivas, /* i/o: IVAS decoder structure */ + UWord16 *nSamplesFlushed, /* o : number of samples flushed */ + Word16 *data /* o : output synthesis signal */ ); ivas_error ivas_spar_md_dec_matrix_open_fx( ivas_spar_md_dec_state_t *hMdDec, /* i/o: SPAR MD decoder handle */ - const int16_t num_channels, /* i : number of internal channels */ - const int16_t num_md_sub_frames /* i : number of MD subframes */ + const Word16 num_channels, /* i : number of internal channels */ + const Word16 num_md_sub_frames /* i : number of MD subframes */ ); void ivas_spar_md_dec_matrix_close_fx( ivas_spar_md_dec_state_t *hMdDecoder, /* i/o: SPAR MD decoder handle */ - const int16_t num_channels /* i : number of internal channels */ + const Word16 num_channels /* i : number of internal channels */ ); ivas_error ivas_spar_dec_open_fx( - Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ - const int16_t spar_reconfig_flag /* i : SPAR reconfiguration flag */ + Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ + const Word16 spar_reconfig_flag /* i : SPAR reconfiguration flag */ ); void ivas_param_mc_dec_read_BS_fx( @@ -2226,8 +2226,8 @@ ivas_error ivas_FB_mixer_open_fx( void ivas_FB_mixer_close_fx( IVAS_FB_MIXER_HANDLE *hFbMixer_in, /* i/o: FB mixer handle */ - const int32_t sampling_rate, /* i : sampling rate in Hz */ - const int16_t spar_reconfig_flag /* i : SPAR reconfiguration flag */ + const Word32 sampling_rate, /* i : sampling rate in Hz */ + const Word16 spar_reconfig_flag /* i : SPAR reconfiguration flag */ ); void ivas_fb_mixer_cross_fading_fx( @@ -2235,9 +2235,9 @@ void ivas_fb_mixer_cross_fading_fx( Word32 **ppOut_pcm_fx, Word32 *pMdft_out_old_fx, Word32 *pMdft_out_new_fx, - const int16_t ch, - const int16_t frame_len, - const int16_t cf_offset ); + const Word16 ch, + const Word16 frame_len, + const Word16 cf_offset ); // ivas_omasa_dec.c ivas_error ivas_omasa_dirac_td_binaural_jbm_fx( diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c index 39de3240e..0e24db967 100644 --- a/lib_com/ivas_tools.c +++ b/lib_com/ivas_tools.c @@ -182,7 +182,9 @@ UWord32 ivas_syn_output_fx( { Word16 i, n; Word16 synth_loc[MAX_JBM_L_FRAME48k]; + UWord32 tmp; UWord32 noClipping = 0; + move32(); /*-----------------------------------------------------------------* * float to integer conversion with saturation control @@ -190,11 +192,13 @@ UWord32 ivas_syn_output_fx( FOR( n = 0; n < n_channels; n++ ) { - noClipping += mvl2s_r( synth[n], q_synth, synth_loc, output_frame ); + tmp = mvl2s_r( synth[n], q_synth, synth_loc, output_frame ); + noClipping = UL_addNsD( noClipping, tmp ); FOR( i = 0; i < output_frame; i++ ) { - synth_out[i * n_channels + n] = synth_loc[i]; + synth_out[L_add( imult1616( i, n_channels ), n )] = synth_loc[i]; + move16(); } } @@ -253,7 +257,8 @@ void ivas_syn_output_f_fx( { FOR( i = 0; i < output_frame; i++ ) { - synth_out[i * n_channels + n] = synth[n][i]; + synth_out[L_add( imult1616( i, n_channels ), n )] = synth[n][i]; + move16(); } } @@ -289,25 +294,29 @@ void mvr2r_inc_fixed_one( IF( y_fx < x_fx ) { ix = 0; + move16(); iy = 0; + move16(); FOR( i = 0; i < n; i++ ) { y_fx[iy] = x_fx[ix]; + move32(); - ix += x_inc; - iy += y_inc; + ix = add( ix, x_inc ); + iy = add( iy, y_inc ); } } ELSE { - ix = ( n - 1 ) * x_inc; - iy = ( n - 1 ) * y_inc; - FOR( i = n - 1; i >= 0; i-- ) + ix = imult1616( sub( n, 1 ), x_inc ); + iy = imult1616( sub( n, 1 ), y_inc ); + FOR( i = sub( n, 1 ); i >= 0; i-- ) { y_fx[iy] = x_fx[ix]; + move32(); - ix -= x_inc; - iy -= y_inc; + ix = sub( ix, x_inc ); + iy = sub( iy, y_inc ); } } @@ -351,7 +360,7 @@ void mvr2r_inc_fixed( { ix = i_mult( sub( n, 1 ), x_inc ); iy = i_mult( sub( n, 1 ), y_inc ); - FOR( i = n - 1; i >= 0; i-- ) + FOR( i = sub( n, 1 ); i >= 0; i-- ) { y_fx[iy] = x_fx[ix]; move32(); @@ -457,12 +466,16 @@ void v_add_inc_fx( Word16 ix1 = 0; Word16 ix2 = 0; Word16 iy = 0; + move16(); + move16(); + move16(); FOR( i = 0; i < N; i++ ) { y[iy] = L_add( x1[ix1], x2[ix2] ); - ix1 += x_inc; - ix2 += x2_inc; - iy += y_inc; + move32(); + ix1 = add( ix1, x_inc ); + ix2 = add( ix2, x2_inc ); + iy = add( iy, y_inc ); } return; } @@ -493,10 +506,16 @@ void v_mult_inc_fx( Word16 ix2 = 0; Word16 iy = 0; + move16(); + move16(); + move16(); + FOR( i = 0; i < N; i++ ) { y_fx[iy] = Mpy_32_32( x1_fx[ix1], x2_fx[ix2] ); + move32(); y_q_fx[iy] = sub( add( x1_q_fx[ix1], x2_q_fx[ix2] ), 31 ); + move16(); ix1 = add( ix1, x1_inc ); ix2 = add( ix2, x2_inc ); @@ -522,9 +541,14 @@ void v_mult_inc_fixed( Word16 ix2 = 0; Word16 iy = 0; + move16(); + move16(); + move16(); + FOR( i = 0; i < N; i++ ) { y_fx[iy] = Mpy_32_32( x1_fx[ix1], x2_fx[ix2] ); + move32(); ix1 = add( ix1, x1_inc ); ix2 = add( ix2, x2_inc ); @@ -556,44 +580,6 @@ void v_mult_inc( int16_t ix2 = 0; int16_t iy = 0; -#ifdef IVAS_FLOAT_FIXED - ///////////////// to be removed //////////////////////////////// - Word32 x1_fx[2 * MAX_OUTPUT_CHANNELS * CLDFB_NO_CHANNELS_MAX]; - Word32 x2_fx[2 * MAX_OUTPUT_CHANNELS * CLDFB_NO_CHANNELS_MAX]; - Word32 y_fx[2 * MAX_OUTPUT_CHANNELS * CLDFB_NO_CHANNELS_MAX]; - Word16 y_q_fx[2 * MAX_OUTPUT_CHANNELS * CLDFB_NO_CHANNELS_MAX]; - Word16 x1_q_fx[2 * MAX_OUTPUT_CHANNELS * CLDFB_NO_CHANNELS_MAX]; - Word16 x2_q_fx[2 * MAX_OUTPUT_CHANNELS * CLDFB_NO_CHANNELS_MAX]; - FOR( i = 0; i < N; i++ ) - { - x1_q_fx[ix1] = Q_factor_L( x1[ix1] ); - x1_fx[ix1] = (Word32) ( x1[ix1] * ( W_shl( 1, x1_q_fx[ix1] ) ) ); - x2_q_fx[ix2] = Q_factor_L( x2[ix2] ); - x2_fx[ix2] = (Word32) ( x2[ix2] * ( W_shl( 1, x2_q_fx[ix2] ) ) ); - ix1 = add( ix1, x1_inc ); - ix2 = add( ix2, x2_inc ); - iy = add( iy, y_inc ); - } - //////////////////////////////////////////////////////////////////// - - v_mult_inc_fx( x1_fx, x1_q_fx, x1_inc, x2_fx, x2_q_fx, x2_inc, y_fx, y_q_fx, y_inc, N ); - - /////////////////////// to be removed /////////////////////////////// - iy = 0; - FOR( i = 0; i < N; i++ ) - { - IF( LT_16( y_q_fx[iy], 0 ) ) - { - y[iy] = (Float32) y_fx[iy] * ( W_shl( 1, ( -y_q_fx[iy] ) ) ); - } - ELSE - { - y[iy] = (Float32) y_fx[iy] / ( W_shl( 1, y_q_fx[iy] ) ); - } - iy = add( iy, y_inc ); - } - /////////////////////////////////////////////////////////////////////// -#else for ( i = 0; i < N; i++ ) { y[iy] = x1[ix1] * x2[ix2]; @@ -601,7 +587,7 @@ void v_mult_inc( ix2 += x2_inc; iy += y_inc; } -#endif + return; } @@ -624,6 +610,7 @@ void v_addc_fx( FOR( i = 0; i < N; i++ ) { y_fx[i] = L_add( c_fx, x_fx[i] ); + move32(); } return; @@ -647,6 +634,7 @@ void v_addc_fixed( FOR( i = 0; i < N; i++ ) { y[i] = L_add( c, x[i] ); + move32(); } return; @@ -662,32 +650,11 @@ void v_addc( { int16_t i; -#ifdef IVAS_FLOAT_FIXED - Word32 x_fx[CLDFB_NO_CHANNELS_MAX]; - Word32 y_fx[CLDFB_NO_CHANNELS_MAX]; - const Word32 c_fx = (Word32) ( c * L_shl( 1, 29 ) ); - - //////////////////////////// to be removed ////////////////////// - FOR( i = 0; i < N; i++ ) - { - x_fx[i] = (Word32) ( x[i] * L_shl( 1, 29 ) ); - } - ///////////////////////////////////////////////////////////////// - - v_addc_fx( (const Word32 *) x_fx, c_fx, y_fx, N ); - - ///////////////////////////// to be removed ////////////////////// - FOR( i = 0; i < N; i++ ) - { - y[i] = (Float32) y_fx[i] / L_shl( 1, 29 ); - } - /////////////////////////////////////////////////////////////////// -#else for ( i = 0; i < N; i++ ) { y[i] = c + x[i]; } -#endif + return; } @@ -717,12 +684,16 @@ void v_min_fx( IF( LT_32( L_shr( x1_fx[i], sub( x1_q_fx[i], x2_q_fx[i] ) ), x2_fx[i] ) ) { y_fx[i] = x1_fx[i]; + move32(); y_q_fx[i] = x1_q_fx[i]; + move16(); } ELSE { y_fx[i] = x2_fx[i]; + move32(); y_q_fx[i] = x2_q_fx[i]; + move16(); } } ELSE @@ -730,12 +701,16 @@ void v_min_fx( IF( LT_32( x1_fx[i], L_shr( x2_fx[i], sub( x2_q_fx[i], x1_q_fx[i] ) ) ) ) { y_fx[i] = x1_fx[i]; + move32(); y_q_fx[i] = x1_q_fx[i]; + move16(); } ELSE { y_fx[i] = x2_fx[i]; + move32(); y_q_fx[i] = x2_q_fx[i]; + move16(); } } } @@ -758,37 +733,11 @@ void v_min( { int16_t i; -#ifdef IVAS_FLOAT_FIXED - //////////////////////// to be removed /////////////////////////////// - Word32 x1_fx[MASA_FREQUENCY_BANDS]; - Word32 x2_fx[MASA_FREQUENCY_BANDS]; - Word32 y_fx[MASA_FREQUENCY_BANDS]; - Word16 x1_q_fx[MASA_FREQUENCY_BANDS]; - Word16 x2_q_fx[MASA_FREQUENCY_BANDS]; - Word16 y_q_fx[MASA_FREQUENCY_BANDS]; - FOR( i = 0; i < N; i++ ) - { - x1_q_fx[i] = Q_factor_L( x1[i] ); - x1_fx[i] = (Word32) ( x1[i] * ( W_shl( 1, x1_q_fx[i] ) ) ); - x2_q_fx[i] = Q_factor_L( x2[i] ); - x2_fx[i] = (Word32) ( x2[i] * ( W_shl( 1, x2_q_fx[i] ) ) ); - } - ///////////////////////////////////////////////////////////////////////// - - v_min_fx( (const Word32 *) x1_fx, x1_q_fx, (const Word32 *) x2_fx, x2_q_fx, y_fx, y_q_fx, N ); - - //////////////////////// to be removed //////////////////////////////// - FOR( i = 0; i < N; i++ ) - { - y[i] = (Float32) y_fx[i] / ( W_shl( 1, y_q_fx[i] ) ); - } - /////////////////////////////////////////////////////////////////////// -#else for ( i = 0; i < N; i++ ) { y[i] = ( x1[i] < x2[i] ) ? x1[i] : x2[i]; } -#endif + return; } @@ -811,6 +760,7 @@ void v_sqrt_fx( FOR( i = 0; i < N; i++ ) { y[i] = Sqrt32( x[i], &exp[i] ); + move32(); } return; @@ -853,6 +803,7 @@ void v_sub_s16_fx( FOR( i = 0; LT_16( i, N ); i++ ) { y[i] = sub( x1[i], x2[i] ); + move16(); } return; @@ -981,7 +932,9 @@ void v_mult_mat_fx( { pt_x_fx = x_fx; *pt_y_fx = 0; + move32(); y_q_fx[i] = 0; + move32(); FOR( j = 0; j < Nr; j++ ) { temp = Mpy_32_32( *pt_x_fx++, *pt_A_fx++ ); @@ -989,18 +942,23 @@ void v_mult_mat_fx( IF( EQ_16( j, 0 ) ) { *pt_y_fx = temp; + move32(); y_q_fx[i] = temp_q; + move16(); } ELSE { IF( GT_16( y_q_fx[i], temp_q ) ) { *pt_y_fx = L_add( L_shr( *pt_y_fx, sub( y_q_fx[i], temp_q ) ), temp ); + move32(); y_q_fx[i] = temp_q; + move16(); } ELSE { *pt_y_fx = L_add( *pt_y_fx, L_shr( temp, sub( temp_q, y_q_fx[i] ) ) ); + move32(); } } } @@ -1030,55 +988,6 @@ void v_mult_mat( { int16_t i, j; -#ifdef IVAS_FLOAT_FIXED - //////////////////// to be removed ////////////////////// - Word32 y_fx[NB_MEL_BANDS]; - Word32 x_fx[NB_MEL_BANDS]; - Word32 A_fx[NB_MEL_BANDS * NB_MEL_COEF]; - Word16 y_q_fx[NB_MEL_BANDS]; - Word16 x_q_fx[NB_MEL_BANDS]; - Word16 A_q_fx[NB_MEL_BANDS * NB_MEL_COEF]; - Word32 *pt_x_fx, *pt_A_fx; - const Float32 *pt_x, *pt_A; - Word16 *pt_x_q_fx, *pt_A_q_fx; - - pt_A_fx = A_fx; - pt_A_q_fx = A_q_fx; - pt_A = A; - - FOR( i = 0; i < Nc; i++ ) - { - pt_x = x; - pt_x_fx = x_fx; - pt_x_q_fx = x_q_fx; - FOR( j = 0; j < Nr; j++ ) - { - IF( EQ_16( i, 0 ) ) - { - *pt_x_q_fx = sub( Q_factor_L( *pt_x ), 3 ); - *pt_x_fx++ = (Word32) ( *pt_x++ * ( W_shl( 1, *pt_x_q_fx++ ) ) ); - } - *pt_A_q_fx = sub( Q_factor_L( *pt_A ), 3 ); - *pt_A_fx++ = (Word32) ( *pt_A++ * ( W_shl( 1, *pt_A_q_fx++ ) ) ); - } - } - - v_mult_mat_fx( y_fx, y_q_fx, (const Word32 *) x_fx, x_q_fx, (const Word32 *) A_fx, A_q_fx, Nr, Nc ); - - ////////////////////////////// to be removed //////////////////////// - FOR( i = 0; i < Nc; i++ ) - { - IF( LT_16( y_q_fx[i], 0 ) ) - { - y[i] = (Float32) y_fx[i] * W_shl( 1, -y_q_fx[i] ); - } - ELSE - { - y[i] = (Float32) y_fx[i] / W_shl( 1, y_q_fx[i] ); - } - } - //////////////////////////////////////////////////////////////////// -#else const float *pt_x, *pt_A; float tmp_y[MAX_V_MULT_MAT]; float *pt_y; @@ -1098,7 +1007,7 @@ void v_mult_mat( } mvr2r( tmp_y, y, Nc ); -#endif + return; } @@ -1380,12 +1289,13 @@ Word16 matrix_product_mant_exp_fx( Word16 out_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS]; Word16 *Zp_fx_e = out_e; Word16 row, col; + Word16 x_idx, y_idx; /* Processing */ test(); test(); test(); - IF( EQ_16( transpX, 1 ) && EQ_16( transpY, 0 ) ) /* We use X transpose */ + IF( EQ_16( transpX, 1 ) && transpY == 0 ) /* We use X transpose */ { IF( NE_16( rowsX, rowsY ) ) { @@ -1401,16 +1311,21 @@ Word16 matrix_product_mant_exp_fx( move16(); FOR( k = 0; k < rowsX; ++k ) { - ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); + x_idx = add( k, imult1616( i, rowsX ) ); + y_idx = add( k, imult1616( j, rowsY ) ); + ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); + move32(); } Zp_fx++; Zp_fx_e++; } } row = colsY; + move16(); col = colsX; + move16(); } - ELSE IF( EQ_16( transpX, 0 ) && EQ_16( transpY, 1 ) ) /* We use Y transpose */ + ELSE IF( transpX == 0 && EQ_16( transpY, 1 ) ) /* We use Y transpose */ { IF( NE_16( colsX, colsY ) ) { @@ -1426,14 +1341,19 @@ Word16 matrix_product_mant_exp_fx( move16(); FOR( k = 0; k < colsX; ++k ) { - ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); + x_idx = add( i, imult1616( k, rowsX ) ); + y_idx = add( j, imult1616( k, rowsY ) ); + ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); + move32(); } Zp_fx++; Zp_fx_e++; } } row = rowsY; + move16(); col = rowsX; + move16(); } ELSE IF( EQ_16( transpX, 1 ) && EQ_16( transpY, 1 ) ) /* We use both transpose */ { @@ -1451,7 +1371,10 @@ Word16 matrix_product_mant_exp_fx( move16(); FOR( k = 0; k < colsX; ++k ) { - ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); + x_idx = add( k, imult1616( i, rowsX ) ); + y_idx = add( j, imult1616( k, rowsY ) ); + ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); + move32(); } Zp_fx++; @@ -1459,7 +1382,9 @@ Word16 matrix_product_mant_exp_fx( } } row = rowsY; + move16(); col = colsX; + move16(); } ELSE /* Regular case */ { @@ -1478,18 +1403,24 @@ Word16 matrix_product_mant_exp_fx( move16(); FOR( k = 0; k < colsX; ++k ) { - ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); + x_idx = add( i, imult1616( k, rowsX ) ); + y_idx = add( k, imult1616( j, rowsY ) ); + ( *Zp_fx ) = BASOP_Util_Add_Mant32Exp( *Zp_fx, *Zp_fx_e, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ), add( X_fx_e, Y_fx_e ), Zp_fx_e ); + move32(); } Zp_fx++; Zp_fx_e++; } } row = colsY; + move16(); col = rowsX; + move16(); } Zp_fx = Z_fx; Zp_fx_e = out_e; Word16 max_exp = -31; + move16(); FOR( j = 0; j < row; ++j ) { FOR( i = 0; i < col; ++i ) @@ -1500,11 +1431,13 @@ Word16 matrix_product_mant_exp_fx( } Zp_fx_e = out_e; *Z_fx_e = max_exp; + move16(); FOR( j = 0; j < row; ++j ) { FOR( i = 0; i < col; ++i ) { *Zp_fx = L_shr_r( *Zp_fx, sub( *Z_fx_e, *Zp_fx_e ) ); + move32(); Zp_fx++; Zp_fx_e++; } @@ -1526,13 +1459,14 @@ Word16 matrix_product_fx( ) { Word16 i, j, k; + Word16 x_idx, y_idx; Word32 *Zp_fx = Z_fx; /* Processing */ test(); test(); test(); - IF( EQ_16( transpX, 1 ) && EQ_16( transpY, 0 ) ) /* We use X transpose */ + IF( EQ_16( transpX, 1 ) && transpY == 0 ) /* We use X transpose */ { IF( NE_16( rowsX, rowsY ) ) { @@ -1546,13 +1480,16 @@ Word16 matrix_product_fx( move32(); FOR( k = 0; k < rowsX; ++k ) { - ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ) ); + x_idx = add( k, imult1616( i, rowsX ) ); + y_idx = add( k, imult1616( j, rowsY ) ); + ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ) ); + move32(); } Zp_fx++; } } } - ELSE IF( EQ_16( transpX, 0 ) && EQ_16( transpY, 1 ) ) /* We use Y transpose */ + ELSE IF( transpX == 0 && EQ_16( transpY, 1 ) ) /* We use Y transpose */ { IF( NE_16( colsX, colsY ) ) { @@ -1566,7 +1503,10 @@ Word16 matrix_product_fx( move32(); FOR( k = 0; k < colsX; ++k ) { - ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ) ); + x_idx = add( i, imult1616( k, rowsX ) ); + y_idx = add( j, imult1616( k, rowsY ) ); + ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ) ); + move32(); } Zp_fx++; } @@ -1586,7 +1526,10 @@ Word16 matrix_product_fx( move32(); FOR( k = 0; k < colsX; ++k ) { - ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ) ); + x_idx = add( k, imult1616( i, rowsX ) ); + y_idx = add( j, imult1616( k, rowsY ) ); + ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ) ); + move32(); } Zp_fx++; @@ -1608,7 +1551,10 @@ Word16 matrix_product_fx( move32(); FOR( k = 0; k < colsX; ++k ) { - ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ) ); + x_idx = add( i, imult1616( k, rowsX ) ); + y_idx = add( k, imult1616( j, rowsY ) ); + ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ) ); + move32(); } Zp_fx++; } @@ -1631,6 +1577,7 @@ Word16 matrix_product_q30_fx( ) { Word16 i, j, k; + Word16 x_idx, y_idx; Word32 *Zp_fx = Z_fx; Word64 W_tmp; @@ -1638,7 +1585,7 @@ Word16 matrix_product_q30_fx( test(); test(); test(); - IF( EQ_16( transpX, 1 ) && EQ_16( transpY, 0 ) ) /* We use X transpose */ + IF( EQ_16( transpX, 1 ) && transpY == 0 ) /* We use X transpose */ { IF( NE_16( rowsX, rowsY ) ) { @@ -1654,15 +1601,18 @@ Word16 matrix_product_q30_fx( FOR( k = 0; k < rowsX; ++k ) { //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ) ); - W_tmp = W_add( W_tmp, W_mult0_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ) ); // Q56 + x_idx = add( k, imult1616( i, rowsX ) ); + y_idx = add( k, imult1616( j, rowsY ) ); + W_tmp = W_add( W_tmp, W_mult0_32_32( X_fx[x_idx], Y_fx[y_idx] ) ); // Q56 } W_tmp = W_shl( W_tmp, 6 ); ( *Zp_fx ) = L_sub( W_round64_L( W_tmp ), 64 ); // adjusting for precision + move32(); Zp_fx++; } } } - ELSE IF( EQ_16( transpX, 0 ) && EQ_16( transpY, 1 ) ) /* We use Y transpose */ + ELSE IF( transpX == 0 && EQ_16( transpY, 1 ) ) /* We use Y transpose */ { IF( NE_16( colsX, colsY ) ) { @@ -1678,10 +1628,13 @@ Word16 matrix_product_q30_fx( FOR( k = 0; k < colsX; ++k ) { //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ) ); - W_tmp = W_add( W_tmp, W_mult0_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ) ); // Q56 + x_idx = add( i, imult1616( k, rowsX ) ); + y_idx = add( j, imult1616( k, rowsY ) ); + W_tmp = W_add( W_tmp, W_mult0_32_32( X_fx[x_idx], Y_fx[y_idx] ) ); // Q56 } W_tmp = W_shl( W_tmp, 6 ); ( *Zp_fx ) = L_sub( W_round64_L( W_tmp ), 64 ); // adjusting for precision + move32(); Zp_fx++; } } @@ -1702,11 +1655,14 @@ Word16 matrix_product_q30_fx( FOR( k = 0; k < colsX; ++k ) { //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ) ); + x_idx = add( k, imult1616( i, rowsX ) ); + y_idx = add( j, imult1616( k, rowsY ) ); W_tmp = W_add( W_tmp, W_mult0_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ) ); // Q56 } W_tmp = W_shl( W_tmp, 6 ); ( *Zp_fx ) = L_sub( W_round64_L( W_tmp ), 64 ); // adjusting for precision + move32(); Zp_fx++; } } @@ -1728,10 +1684,13 @@ Word16 matrix_product_q30_fx( FOR( k = 0; k < colsX; ++k ) { //( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ) ); - W_tmp = W_add( W_tmp, W_mult0_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ) ); // Q56 + x_idx = add( i, imult1616( k, rowsX ) ); + y_idx = add( k, imult1616( j, rowsY ) ); + W_tmp = W_add( W_tmp, W_mult0_32_32( X_fx[x_idx], Y_fx[y_idx] ) ); // Q56 } W_tmp = W_shl( W_tmp, 6 ); ( *Zp_fx ) = L_sub( W_round64_L( W_tmp ), 64 ); // adjusting for precision + move32(); Zp_fx++; } } @@ -1760,12 +1719,13 @@ Word16 matrix_product_mant_exp( Word16 *Zp_e = Z_e; Word32 L_tmp; Word16 tmp_e; + Word16 x_idx, y_idx; /* Processing */ test(); test(); test(); - IF( EQ_16( transpX, 1 ) && EQ_16( transpY, 0 ) ) /* We use X transpose */ + IF( EQ_16( transpX, 1 ) && transpY == 0 ) /* We use X transpose */ { IF( NE_16( rowsX, rowsY ) ) { @@ -1781,11 +1741,14 @@ Word16 matrix_product_mant_exp( move16(); FOR( k = 0; k < rowsX; ++k ) { + x_idx = add( k, imult1616( i, rowsX ) ); + y_idx = add( k, imult1616( j, rowsY ) ); //( *Zp ) += X[k + i * rowsX] * Y[k + j * rowsY]; - L_tmp = Mpy_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ); - tmp_e = add( X_e[k + i * rowsX], Y_e[k + j * rowsY] ); + L_tmp = Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); + tmp_e = add( X_e[x_idx], Y_e[y_idx] ); ( *Zp ) = BASOP_Util_Add_Mant32Exp( *Zp, *Zp_e, L_tmp, tmp_e, &tmp_e ); + move32(); ( *Zp_e ) = tmp_e; move16(); } @@ -1794,7 +1757,7 @@ Word16 matrix_product_mant_exp( } } } - ELSE IF( EQ_16( transpX, 0 ) && EQ_16( transpY, 1 ) ) /* We use Y transpose */ + ELSE IF( transpX == 0 && EQ_16( transpY, 1 ) ) /* We use Y transpose */ { IF( NE_16( colsX, colsY ) ) { @@ -1810,9 +1773,11 @@ Word16 matrix_product_mant_exp( move16(); FOR( k = 0; k < colsX; ++k ) { + x_idx = add( i, imult1616( k, rowsX ) ); + y_idx = add( j, imult1616( k, rowsY ) ); //( *Zp ) += X_fx[i + k * rowsX] * Y_fx[j + k * rowsY]; - L_tmp = Mpy_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ); - tmp_e = add( X_e[i + k * rowsX], Y_e[j + k * rowsY] ); + L_tmp = Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); + tmp_e = add( X_e[x_idx], Y_e[y_idx] ); ( *Zp ) = BASOP_Util_Add_Mant32Exp( *Zp, *Zp_e, L_tmp, tmp_e, &tmp_e ); ( *Zp_e ) = tmp_e; @@ -1839,11 +1804,14 @@ Word16 matrix_product_mant_exp( move16(); FOR( k = 0; k < colsX; ++k ) { + x_idx = add( k, imult1616( i, rowsX ) ); + y_idx = add( j, imult1616( k, rowsY ) ); //( *Zp ) += X_fx[k + i * rowsX] * Y_fx[j + k * rowsY]; - L_tmp = Mpy_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ); - tmp_e = add( X_e[k + i * rowsX], Y_e[j + k * rowsY] ); + L_tmp = Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); + tmp_e = add( X_e[x_idx], Y_e[y_idx] ); ( *Zp ) = BASOP_Util_Add_Mant32Exp( *Zp, *Zp_e, L_tmp, tmp_e, &tmp_e ); + move32(); ( *Zp_e ) = tmp_e; move16(); } @@ -1870,11 +1838,14 @@ Word16 matrix_product_mant_exp( move16(); FOR( k = 0; k < colsX; ++k ) { + x_idx = add( i, imult1616( k, rowsX ) ); + y_idx = add( k, imult1616( j, rowsY ) ); //( *Zp ) += X_fx[i + k * rowsX] * Y_fx[k + j * rowsY]; - L_tmp = Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ); - tmp_e = add( X_e[i + k * rowsX], Y_e[k + j * rowsY] ); + L_tmp = Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); + tmp_e = add( X_e[x_idx], Y_e[y_idx] ); ( *Zp ) = BASOP_Util_Add_Mant32Exp( *Zp, *Zp_e, L_tmp, tmp_e, &tmp_e ); + move32(); ( *Zp_e ) = tmp_e; move16(); } @@ -1959,6 +1930,7 @@ Word16 matrix_diag_product_fx( { Word16 i, j; Word32 *Zp = Z; + Word16 tmp; /* Processing */ IF( EQ_16( transpX, 1 ) ) /* We use X transpose */ @@ -1971,7 +1943,9 @@ Word16 matrix_diag_product_fx( { FOR( i = 0; i < colsX; ++i ) { - *( Zp ) = Mpy_32_32( X[j + i * rowsX], Y[j] ); + tmp = add( j, imult1616( i, rowsX ) ); + *( Zp ) = Mpy_32_32( X[tmp], Y[j] ); + move32(); Zp++; } } @@ -1988,6 +1962,7 @@ Word16 matrix_diag_product_fx( FOR( i = 0; i < rowsX; ++i ) { *( Zp ) = Mpy_32_32( *( X ), Y[j] ); + move32(); Zp++; X++; } @@ -1995,6 +1970,7 @@ Word16 matrix_diag_product_fx( } *Z_e = add( X_e, Y_e ); + move16(); return EXIT_SUCCESS; } @@ -2015,6 +1991,7 @@ Word16 diag_matrix_product_fx( { Word16 i, j; Word32 *Zp = Z; + Word16 tmp; /* Processing */ IF( EQ_16( transpX, 1 ) ) /* We use X transpose */ @@ -2027,7 +2004,9 @@ Word16 diag_matrix_product_fx( { FOR( j = 0; j < entriesY; ++j ) { - *( Zp ) = Mpy_32_32( X[i + j * rowsX], Y[j] ); + tmp = add( i, imult1616( j, rowsX ) ); + *( Zp ) = Mpy_32_32( X[tmp], Y[j] ); + move32(); Zp++; } } @@ -2043,6 +2022,7 @@ Word16 diag_matrix_product_fx( FOR( j = 0; j < entriesY; ++j ) { *( Zp ) = Mpy_32_32( *( X ), Y[j] ); + move32(); Zp++; X++; } @@ -2050,6 +2030,7 @@ Word16 diag_matrix_product_fx( } *Z_e = add( Y_e, X_e ); + move16(); return EXIT_SUCCESS; } @@ -2126,12 +2107,13 @@ Word16 matrix_product_diag_fx( { Word16 j, k; Word32 *Zp = Z; + Word16 y_idx, x_idx; /* Processing */ test(); test(); test(); - IF( EQ_16( transpX, 1 ) && EQ_16( transpY, 0 ) ) /* We use X transpose */ + IF( EQ_16( transpX, 1 ) && transpY == 0 ) /* We use X transpose */ { IF( NE_16( rowsX, rowsY ) ) { @@ -2141,14 +2123,18 @@ Word16 matrix_product_diag_fx( FOR( j = 0; j < colsY; ++j ) { ( *Zp ) = 0; + move32(); FOR( k = 0; k < rowsX; ++k ) { - ( *Zp ) = L_add( ( *Zp ), Mpy_32_32( X[k + j * rowsX], Y[k + j * rowsY] ) ); + x_idx = add( k, imult1616( j, rowsX ) ); + y_idx = add( k, imult1616( j, rowsY ) ); + ( *Zp ) = L_add( ( *Zp ), Mpy_32_32( X[x_idx], Y[y_idx] ) ); + move32(); } Zp++; } } - ELSE IF( EQ_16( transpX, 0 ) && EQ_16( transpY, 1 ) ) /* We use Y transpose */ + ELSE IF( transpX == 0 && EQ_16( transpY, 1 ) ) /* We use Y transpose */ { IF( NE_16( colsX, colsY ) ) { @@ -2157,9 +2143,13 @@ Word16 matrix_product_diag_fx( FOR( j = 0; j < rowsY; ++j ) { ( *Zp ) = 0; + move32(); FOR( k = 0; k < colsX; ++k ) { - ( *Zp ) = L_add( ( *Zp ), Mpy_32_32( X[j + k * rowsX], Y[j + k * rowsY] ) ); + x_idx = add( j, imult1616( k, rowsX ) ); + y_idx = add( j, imult1616( k, rowsY ) ); + ( *Zp ) = L_add( ( *Zp ), Mpy_32_32( X[x_idx], Y[y_idx] ) ); + move32(); } Zp++; } @@ -2175,9 +2165,13 @@ Word16 matrix_product_diag_fx( { ( *Zp ) = 0; + move32(); FOR( k = 0; k < colsX; ++k ) { - ( *Zp ) = L_add( ( *Zp ), Mpy_32_32( X[k + j * rowsX], Y[j + k * rowsY] ) ); + x_idx = add( k, imult1616( j, rowsX ) ); + y_idx = add( j, imult1616( k, rowsY ) ); + ( *Zp ) = L_add( ( *Zp ), Mpy_32_32( X[x_idx], Y[y_idx] ) ); + move32(); } Zp++; @@ -2193,15 +2187,20 @@ Word16 matrix_product_diag_fx( FOR( j = 0; j < colsY; ++j ) { ( *Zp ) = 0; + move32(); FOR( k = 0; k < colsX; ++k ) { - ( *Zp ) = L_add( ( *Zp ), Mpy_32_32( X[j + k * rowsX], Y[k + j * rowsY] ) ); + x_idx = add( j, imult1616( k, rowsX ) ); + y_idx = add( k, imult1616( j, rowsY ) ); + ( *Zp ) = L_add( ( *Zp ), Mpy_32_32( X[x_idx], Y[y_idx] ) ); + move32(); } Zp++; } } *Z_e = add( X_e, Y_e ); + move16(); return EXIT_SUCCESS; } @@ -2317,6 +2316,7 @@ void cmplx_matrix_square_fx( Word16 i, j, k; Word32 *realZp, *imagZp; const Word32 *p_real1, *p_real2, *p_imag1, *p_imag2; + Word16 tmp1, tmp2; /* resulting matrix is hermitean, we only need to calc the upper triangle */ /* we assume transposition needed */ @@ -2326,12 +2326,12 @@ void cmplx_matrix_square_fx( { FOR( j = i; j < nCols; j++ ) { - p_real1 = realX + i * mRows; - p_imag1 = imagX + i * mRows; - p_real2 = realX + j * mRows; - p_imag2 = imagX + j * mRows; - realZp = realZ + ( i + nCols * j ); - imagZp = imagZ + ( i + nCols * j ); + p_real1 = realX + imult1616( i, mRows ); + p_imag1 = imagX + imult1616( i, mRows ); + p_real2 = realX + imult1616( j, mRows ); + p_imag2 = imagX + imult1616( j, mRows ); + realZp = realZ + add( i, imult1616( nCols, j ) ); + imagZp = imagZ + add( i, imult1616( nCols, j ) ); *( realZp ) = 0; move32(); *( imagZp ) = 0; @@ -2340,7 +2340,9 @@ void cmplx_matrix_square_fx( FOR( k = 0; k < mRows; k++ ) { *( imagZp ) = L_add( *( imagZp ), L_sub( Mpy_32_32( *( p_real1 ), *( p_imag2 ) ), Mpy_32_32( *( p_real2 ), *( p_imag1 ) ) ) ); + move32(); *( realZp ) = L_add( *( realZp ), L_add( Mpy_32_32( *( p_real1 ), *( p_real2 ) ), Mpy_32_32( *( p_imag1 ), *( p_imag2 ) ) ) ); + move32(); p_real1++; p_real2++; p_imag1++; @@ -2354,9 +2356,11 @@ void cmplx_matrix_square_fx( { FOR( j = 0; j < i; j++ ) { - realZ[i + nCols * j] = realZ[j + nCols * i]; + tmp1 = add( i, imult1616( nCols, j ) ); + tmp2 = add( j, imult1616( nCols, i ) ); + realZ[tmp1] = realZ[tmp2]; move32(); - imagZ[i + nCols * j] = imagZ[j + nCols * i]; + imagZ[tmp1] = imagZ[tmp2]; move32(); } } @@ -2463,6 +2467,7 @@ void v_multc_acc_32_16( FOR( i = 0; i < N; i++ ) { y[i] = L_add( y[i], Mpy_32_16_1( x[i], c ) ); + move32(); } return; @@ -2479,6 +2484,7 @@ void v_multc_acc_32_32( FOR( i = 0; i < N; i++ ) { y[i] = L_add( y[i], Mpy_32_32( x[i], c ) ); + move32(); } return; @@ -2549,54 +2555,66 @@ void lls_interp_n_fx( { Word16 i; const Word16 n_i_fx[11] = { 0, 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, 20480 }; // Q11 + move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); + const Word16 one_by_n_fx[11] = { 0, 32767, 16384, 10911, 8192, 6553, 5459, 4681, 4096, 3640, 3276 }; + move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); + const Word16 sum_i_fx[12] = { 0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55 }; + move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); // 1.0f/ ( N * sum_ii[N] - sum_i[N] * sum_i[N] ) const Word32 res_table[12] = { 0, 0, 0, 357913952, 107374184, 42949672, 20452226, 10956549, 6391320, 3976821, 2603010, 385 }; + move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); Word32 sum_x_fx, sum_ix_fx, slope_fx, offset_fx; Word16 dot_exp = 0, sum_ix_q = 0; - ; + move16(); move16(); + Word32 num; - assert( N > 0 && N <= 10 ); + assert( N > 0 && LE_16( N, 10 ) ); sum_x_fx = 0; + move32(); FOR( int idx = 0; idx < N; idx++ ) { - sum_x_fx = (Word32) sum_x_fx + x_fx[idx]; + sum_x_fx = L_add( sum_x_fx, x_fx[idx] ); } sum_ix_fx = dotp_fx( x_fx, n_i_fx, N, &dot_exp ); - sum_ix_q = 30 - ( dot_exp - ( 11 + 15 ) ); + sum_ix_q = sub( 30, sub( dot_exp, ( 11 + 15 ) ) ); - sum_ix_fx = L_shr( sum_ix_fx, sum_ix_q - 15 ); - num = L_sub( ( sum_ix_fx * N ), ( sum_x_fx * sum_i_fx[N] ) ); + sum_ix_fx = L_shr( sum_ix_fx, sub( sum_ix_q, 15 ) ); + num = L_sub( imult3216( sum_ix_fx, N ), imult3216( sum_x_fx, sum_i_fx[N] ) ); slope_fx = Mpy_32_32( num, res_table[N] ); - offset_fx = Mpy_32_16_1( L_sub( sum_x_fx, ( slope_fx * sum_i_fx[N] ) ), one_by_n_fx[N] ); + offset_fx = Mpy_32_16_1( L_sub( sum_x_fx, imult3216( slope_fx, sum_i_fx[N] ) ), one_by_n_fx[N] ); IF( upd ) { FOR( i = 0; i < N; i++ ) { - IF( slope_fx * i > MAX_WORD16 ) + IF( GT_32( imult3216( slope_fx, i ), MAX_WORD16 ) ) { x_fx[i] = MAX_WORD16; + move16(); } ELSE { - x_fx[i] = (Word16) L_add_sat( ( slope_fx * i ), offset_fx ); + x_fx[i] = extract_l( L_add_sat( imult3216( slope_fx, i ), offset_fx ) ); + move16(); } } } IF( a_fx != NULL ) { - *a_fx = (Word16) slope_fx; + *a_fx = extract_l( slope_fx ); + move16(); } IF( b_fx != NULL ) { - *b_fx = (Word16) offset_fx; + *b_fx = extract_l( offset_fx ); + move16(); } return; @@ -2628,6 +2646,7 @@ static Word32 wrap_azi_fixed( const Word32 azi_deg ) { Word32 azi = azi_deg; + move32(); /* Wrap azimuth value */ WHILE( GT_32( azi, ANGLE_180_DEG_Q22 ) ) @@ -2730,12 +2749,16 @@ void panning_wrap_angles_fixed( Word32 azi, ele; azi = azi_deg; + move32(); ele = ele_deg; + move32(); - IF( L_abs( ele ) < ANGLE_90_DEG_Q22 ) + IF( LT_32( L_abs( ele ), ANGLE_90_DEG_Q22 ) ) { *ele_wrapped = ele; + move32(); *azi_wrapped = wrap_azi_fixed( azi ); + move32(); return; } ELSE @@ -2744,36 +2767,40 @@ void panning_wrap_angles_fixed( IF( ( ( ele % ANGLE_90_DEG_Q22 ) == 0 ) && ( ( ele % ANGLE_180_DEG_Q22 ) != 0 ) ) { *azi_wrapped = 0; - WHILE( ele > ANGLE_90_DEG_Q22 ) + move32(); + WHILE( GT_32( ele, ANGLE_90_DEG_Q22 ) ) { - ele -= ANGLE_360_DEG_Q22; + ele = L_sub( ele, ANGLE_360_DEG_Q22 ); } - WHILE( ele < -ANGLE_90_DEG_Q22 ) + WHILE( LT_32( ele, -ANGLE_90_DEG_Q22 ) ) { - ele += ANGLE_360_DEG_Q22; + ele = L_add( ele, ANGLE_360_DEG_Q22 ); } *ele_wrapped = ele; + move32(); } ELSE { /* Wrap elevation and adjust azimuth accordingly */ - WHILE( L_abs( ele ) > ANGLE_90_DEG_Q22 ) + WHILE( GT_32( L_abs( ele ), ANGLE_90_DEG_Q22 ) ) { /* Flip to other hemisphere */ - azi += ANGLE_180_DEG_Q22; + azi = L_add( azi, ANGLE_180_DEG_Q22 ); /* Compensate elevation accordingly */ - IF( ele > ANGLE_90_DEG_Q22 ) + IF( GT_32( ele, ANGLE_90_DEG_Q22 ) ) { - ele = ANGLE_180_DEG_Q22 - ele; + ele = L_sub( ANGLE_180_DEG_Q22, ele ); } - ELSE IF( ele < -ANGLE_90_DEG_Q22 ) + ELSE IF( LT_32( ele, -ANGLE_90_DEG_Q22 ) ) { - ele = -ANGLE_180_DEG_Q22 - ele; + ele = L_sub( -ANGLE_180_DEG_Q22, ele ); } } *azi_wrapped = wrap_azi_fixed( azi ); + move32(); *ele_wrapped = ele; + move32(); } return; @@ -2840,19 +2867,28 @@ void v_sort_ind_fixed( FOR( i = 0; i < len; i++ ) { idx[i] = i; + move16(); } FOR( i = len - 2; i >= 0; i-- ) { tempr = x[i]; + move32(); tempi = idx[i]; - FOR( j = i + 1; ( j < len ) && ( tempr > x[j] ); j++ ) + move16(); + test(); + FOR( j = add( i, 1 ); LT_16( j, len ) && GT_32( tempr, x[j] ); j++ ) { + test(); x[j - 1] = x[j]; + move32(); idx[j - 1] = idx[j]; + move16(); } x[j - 1] = tempr; + move32(); idx[j - 1] = tempi; + move16(); } return; @@ -2893,12 +2929,16 @@ Word16 is_IVAS_bitrate_fx( Word16 j; j = SIZE_IVAS_BRATE_TBL - IVAS_NUM_ACTIVE_BRATES; /* skip NO_DATA and SID bitrates */ + move16(); + + test(); WHILE( LE_16( j, SIZE_IVAS_BRATE_TBL ) && NE_32( ivas_total_brate, ivas_brate_tbl[j] ) ) { - j++; + test(); + j = add( j, 1 ); } - IF( j >= SIZE_IVAS_BRATE_TBL ) + IF( GE_16( j, SIZE_IVAS_BRATE_TBL ) ) { return 0; } @@ -2961,7 +3001,7 @@ Word16 is_SIDrate( test(); test(); - IF( EQ_32( ivas_total_brate, SID_1k75 ) || + if ( EQ_32( ivas_total_brate, SID_1k75 ) || EQ_32( ivas_total_brate, SID_2k40 ) || EQ_32( ivas_total_brate, IVAS_SID_5k2 ) ) { @@ -3023,7 +3063,7 @@ Word16 rand_triangular_signed_fx( Word16 tmp1, tmp2; Word16 exp1, exp = 1; move16(); - IF( LE_16( rand_val, 0 ) ) + IF( rand_val <= 0 ) { /* rand_val in [-1, 0] */ /*0.5f * (sqrtf(rand_val + 1.0f) - 1)*/ @@ -3056,7 +3096,6 @@ Word16 rand_triangular_signed_fx( * * calculates ceil(log2(val)) *-------------------------------------------------------------------*/ - Word16 ceil_log_2( UWord64 val ) { @@ -3065,70 +3104,263 @@ Word16 ceil_log_2( { assert( 0 ); } - return val <= 1 ? 0 : val <= 2 ? 1 - : val <= 4 ? 2 - : val <= 8 ? 3 - : val <= 16 ? 4 - : val <= 32 ? 5 - : val <= 64 ? 6 - : val <= 128 ? 7 - : val <= 256 ? 8 - : val <= 512 ? 9 - : val <= 1024 ? 10 - : val <= 2048 ? 11 - : val <= 4096 ? 12 - : val <= 8192 ? 13 - : val <= 16384 ? 14 - : val <= 32768 ? 15 - : val <= 65536 ? 16 - : val <= 131072 ? 17 - : val <= 262144 ? 18 - : val <= 524288 ? 19 - : val <= 1048576 ? 20 - : val <= 2097152 ? 21 - : val <= 4194304 ? 22 - : val <= 8388608 ? 23 - : val <= 16777216 ? 24 - : val <= 33554432 ? 25 - : val <= 67108864 ? 26 - : val <= 134217728 ? 27 - : val <= 268435456 ? 28 - : val <= 536870912 ? 29 - : val <= 1073741824 ? 30 - : val <= 2147483648 ? 31 - : val <= 4294967296 ? 32 - : val <= 8589934592 ? 33 - : val <= 17179869184 ? 34 - : val <= 34359738368 ? 35 - : val <= 68719476736 ? 36 - : val <= 137438953472 ? 37 - : val <= 274877906944 ? 38 - : val <= 549755813888 ? 39 - : val <= 1099511627776 ? 40 - : val <= 2199023255552 ? 41 - : val <= 4398046511104 ? 42 - : val <= 8796093022208 ? 43 - : val <= 17592186044416 ? 44 - : val <= 35184372088832 ? 45 - : val <= 70368744177664 ? 46 - : val <= 140737488355328 ? 47 - : val <= 281474976710656 ? 48 - : val <= 562949953421312 ? 49 - : val <= 1125899906842624 ? 50 - : val <= 2251799813685248 ? 51 - : val <= 4503599627370496 ? 52 - : val <= 9007199254740992 ? 53 - : val <= 18014398509481984 ? 54 - : val <= 36028797018963968 ? 55 - : val <= 72057594037927936 ? 56 - : val <= 144115188075855872 ? 57 - : val <= 288230376151711744 ? 58 - : val <= 576460752303423488 ? 59 - : val <= 1152921504606846976 ? 60 - : val <= 2305843009213693952 ? 61 - : val <= 4611686018427387904 ? 62 - : val <= 9223372036854775807 ? 63 - : 64; + ELSE IF( LE_64( val, 1 ) ) + { + return 0; + } + ELSE IF( LE_64( val, 2 ) ) + { + return 1; + } + ELSE IF( LE_64( val, 4 ) ) + { + return 2; + } + ELSE IF( LE_64( val, 8 ) ) + { + return 3; + } + ELSE IF( LE_64( val, 16 ) ) + { + return 4; + } + ELSE IF( LE_64( val, 32 ) ) + { + return 5; + } + ELSE IF( LE_64( val, 64 ) ) + { + return 6; + } + ELSE IF( LE_64( val, 128 ) ) + { + return 7; + } + ELSE IF( LE_64( val, 256 ) ) + { + return 8; + } + ELSE IF( LE_64( val, 512 ) ) + { + return 9; + } + ELSE IF( LE_64( val, 1024 ) ) + { + return 10; + } + ELSE IF( LE_64( val, 2048 ) ) + { + return 11; + } + ELSE IF( LE_64( val, 4096 ) ) + { + return 12; + } + ELSE IF( LE_64( val, 8192 ) ) + { + return 13; + } + ELSE IF( LE_64( val, 16384 ) ) + { + return 14; + } + ELSE IF( LE_64( val, 32768 ) ) + { + return 15; + } + ELSE IF( LE_64( val, 65536 ) ) + { + return 16; + } + ELSE IF( LE_64( val, 131072 ) ) + { + return 17; + } + ELSE IF( LE_64( val, 262144 ) ) + { + return 18; + } + ELSE IF( LE_64( val, 524288 ) ) + { + return 19; + } + ELSE IF( LE_64( val, 1048576 ) ) + { + return 20; + } + ELSE IF( LE_64( val, 2097152 ) ) + { + return 21; + } + ELSE IF( LE_64( val, 4194304 ) ) + { + return 22; + } + ELSE IF( LE_64( val, 8388608 ) ) + { + return 23; + } + ELSE IF( LE_64( val, 16777216 ) ) + { + return 24; + } + ELSE IF( LE_64( val, 33554432 ) ) + { + return 25; + } + ELSE IF( LE_64( val, 67108864 ) ) + { + return 26; + } + ELSE IF( LE_64( val, 134217728 ) ) + { + return 27; + } + ELSE IF( LE_64( val, 268435456 ) ) + { + return 28; + } + ELSE IF( LE_64( val, 536870912 ) ) + { + return 29; + } + ELSE IF( LE_64( val, 1073741824 ) ) + { + return 30; + } + ELSE IF( LE_64( val, 2147483648 ) ) + { + return 31; + } + ELSE IF( LE_64( val, 4294967296 ) ) + { + return 32; + } + ELSE IF( LE_64( val, 8589934592 ) ) + { + return 33; + } + ELSE IF( LE_64( val, 17179869184 ) ) + { + return 34; + } + ELSE IF( LE_64( val, 34359738368 ) ) + { + return 35; + } + ELSE IF( LE_64( val, 68719476736 ) ) + { + return 36; + } + ELSE IF( LE_64( val, 137438953472 ) ) + { + return 37; + } + ELSE IF( LE_64( val, 274877906944 ) ) + { + return 38; + } + ELSE IF( LE_64( val, 549755813888 ) ) + { + return 39; + } + ELSE IF( LE_64( val, 1099511627776 ) ) + { + return 40; + } + ELSE IF( LE_64( val, 2199023255552 ) ) + { + return 41; + } + ELSE IF( LE_64( val, 4398046511104 ) ) + { + return 42; + } + ELSE IF( LE_64( val, 8796093022208 ) ) + { + return 43; + } + ELSE IF( LE_64( val, 17592186044416 ) ) + { + return 44; + } + ELSE IF( LE_64( val, 35184372088832 ) ) + { + return 45; + } + ELSE IF( LE_64( val, 70368744177664 ) ) + { + return 46; + } + ELSE IF( LE_64( val, 140737488355328 ) ) + { + return 47; + } + ELSE IF( LE_64( val, 281474976710656 ) ) + { + return 48; + } + ELSE IF( LE_64( val, 562949953421312 ) ) + { + return 49; + } + ELSE IF( LE_64( val, 1125899906842624 ) ) + { + return 50; + } + ELSE IF( LE_64( val, 2251799813685248 ) ) + { + return 51; + } + ELSE IF( LE_64( val, 4503599627370496 ) ) + { + return 52; + } + ELSE IF( LE_64( val, 9007199254740992 ) ) + { + return 53; + } + ELSE IF( LE_64( val, 18014398509481984 ) ) + { + return 54; + } + ELSE IF( LE_64( val, 36028797018963968 ) ) + { + return 55; + } + ELSE IF( LE_64( val, 72057594037927936 ) ) + { + return 56; + } + ELSE IF( LE_64( val, 144115188075855872 ) ) + { + return 57; + } + ELSE IF( LE_64( val, 288230376151711744 ) ) + { + return 58; + } + ELSE IF( LE_64( val, 576460752303423488 ) ) + { + return 59; + } + ELSE IF( LE_64( val, 1152921504606846976 ) ) + { + return 60; + } + ELSE IF( LE_64( val, 2305843009213693952 ) ) + { + return 61; + } + ELSE IF( LE_64( val, 4611686018427387904 ) ) + { + return 62; + } + ELSE IF( LE_64( val, 9223372036854775807 ) ) + { + return 63; + } + return 64; } @@ -3142,7 +3374,7 @@ Word16 ceil_log_2( Word64 var_32_fx( const Word32 *x, /* i : input vector */ - const int16_t len, /* i : length of inputvector */ + const Word16 len, /* i : length of inputvector */ Word16 q /* q : q-factor for the array */ ) { @@ -3150,23 +3382,25 @@ Word64 var_32_fx( Word64 mean, var; mean = 0; + move64(); var = 0; + move64(); FOR( int i = 0; i < len; i++ ) { - mean = mean + x[i]; + mean = W_add( mean, x[i] ); } - mean = mean / len; + mean = mean / len; /* NOTE: No BASOP for 64 bit division */ FOR( int i = 0; i < len; i++ ) { - var = var + Mpy_32_32( L_sub( x[i], (Word32) mean ), L_sub( x[i], (Word32) mean ) ); + var = W_add( var, Mpy_32_32( L_sub( x[i], W_extract_l( mean ) ), L_sub( x[i], W_extract_l( mean ) ) ) ); } - var = var << ( 31 - q ); + var = W_shl( var, sub( 31, q ) ); - var = var / len; + var = var / len; /* NOTE: No BASOP for 64 bit division */ return var; } diff --git a/lib_com/wi_fx.c b/lib_com/wi_fx.c index 602487f71..391f6d9e5 100644 --- a/lib_com/wi_fx.c +++ b/lib_com/wi_fx.c @@ -160,8 +160,11 @@ void DTFS_sub_fx( move16(); } tmp->lag_fx = s_max( X1.lag_fx, X2.lag_fx ); + move16(); tmp->nH_fx = s_max( X1.nH_fx, X2.nH_fx ); + move16(); tmp->nH_4kHz_fx = s_max( X1.nH_4kHz_fx, X2.nH_4kHz_fx ); + move16(); tmp->upper_cut_off_freq_of_interest_fx = X1.upper_cut_off_freq_of_interest_fx; move16(); tmp->upper_cut_off_freq_fx = X1.upper_cut_off_freq_fx; @@ -180,7 +183,6 @@ static void DTFS_fast_fs_inv_fx( DTFS_STRUCTURE *X_fx, Word16 *out_fx, Word16 N_ Word16 dbuf_fx[256 + 1]; M_2 = s_min( shr( X_fx->lag_fx, 1 ), X_fx->nH_fx ); - move16(); N_2 = shr( N_fx, 1 ); s = negate( X_fx->Q ); @@ -193,7 +195,7 @@ static void DTFS_fast_fs_inv_fx( DTFS_STRUCTURE *X_fx, Word16 *out_fx, Word16 N_ { dbuf_fx[2 * i] = shl_r( X_fx->a_fx[i], s ); move16(); - dbuf_fx[2 * i + 1] = shl_r( X_fx->b_fx[i], s ); + dbuf_fx[add( 2 * i, 1 )] = shl_r( X_fx->b_fx[i], s ); move16(); } @@ -201,7 +203,7 @@ static void DTFS_fast_fs_inv_fx( DTFS_STRUCTURE *X_fx, Word16 *out_fx, Word16 N_ { dbuf_fx[2 * i] = 0; move16(); - dbuf_fx[2 * i + 1] = 0; + dbuf_fx[add( 2 * i, 1 )] = 0; move16(); } @@ -265,6 +267,7 @@ static Word16 DTFS_alignment_weight_fx( Word32 L_tmp; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif diff_fx = 0; /* to avoid compilation warnings */ @@ -278,6 +281,7 @@ static Word16 DTFS_alignment_weight_fx( FOR( k = 0; k < M + 1; k++ ) { tmplpc_fx[k] = mult_r( LPC1[k], pwf_fx[k] ); /* Q12 */ + move16(); } DTFS_zeroFilter_fx( &X1, tmplpc_fx, M + 1, S_fx, C_fx ); @@ -287,6 +291,7 @@ static Word16 DTFS_alignment_weight_fx( FOR( k = 0; k < M + 1; k++ ) { tmplpc_fx[k] = mult_r( LPC2[k], pwf_fx[k] ); /* Q12 */ + move16(); } DTFS_zeroFilter_fx( &X2, tmplpc_fx, M + 1, S_fx, C_fx ); @@ -295,7 +300,7 @@ static Word16 DTFS_alignment_weight_fx( move16(); fshift_fx = Eshift; move16(); - Adiff_fx = (Word16) s_max( 768, mult_r( 4915, shl( X2.lag_fx, 7 ) ) ); /* Q7, 768=6*128, 4915 = 0.15*32768 */ + Adiff_fx = s_max( 768, mult_r( 4915, shl( X2.lag_fx, 7 ) ) ); /* Q7, 768=6*128, 4915 = 0.15*32768 */ if ( LT_16( X2.lag_fx, 60 ) ) { @@ -311,7 +316,7 @@ static Word16 DTFS_alignment_weight_fx( HalfLag = s_min( shr( X2.lag_fx, 1 ), X2.nH_4kHz_fx ); exp = norm_s( X2.lag_fx ); tmp = div_s( shl( 1, sub( 14, exp ) ), X2.lag_fx ); - L_tmp = L_shl( tmp, exp + 6 ); + L_tmp = L_shl( tmp, add( exp, 6 ) ); inv_lag = round_fx( L_tmp ); FOR( k = 0; k <= HalfLag; k++ ) @@ -323,6 +328,8 @@ static Word16 DTFS_alignment_weight_fx( ab1[k] = round_fx( L_mac0( L_mult0( X1.a_fx[k], X2.a_fx[k] ), X1.b_fx[k], X2.b_fx[k] ) ); /* Q(-15) */ ab2[k] = round_fx( L_msu0( L_mult0( X1.a_fx[k], X2.b_fx[k] ), X1.b_fx[k], X2.a_fx[k] ) ); /* Q(-15) */ #endif + move16(); + move16(); } start = sub( Eshift, Adiff_fx ); @@ -345,11 +352,13 @@ static Word16 DTFS_alignment_weight_fx( { #ifdef BASOP_NOGLOB corr_fx = L_mac0_o( corr_fx, ab1[k], cos_table[s_and( temp, 511 )], &Overflow ); - corr_fx = L_mac0_o( corr_fx, ab2[k], cos_table[s_and( ( temp + 128 ), 511 )], &Overflow ); + corr_fx = L_mac0_o( corr_fx, ab2[k], cos_table[s_and( add( temp, 128 ), 511 )], &Overflow ); #else corr_fx = L_mac0( corr_fx, ab1[k], cos_table[s_and( temp, 511 )] ); - corr_fx = L_mac0( corr_fx, ab2[k], cos_table[s_and( ( temp + 128 ), 511 )] ); + corr_fx = L_mac0( corr_fx, ab2[k], cos_table[s_and( add( temp, 128 ), 511 )] ); #endif + move32(); + move32(); temp = add( temp, temp1 ); } temp = sub( 8192, mult_r( 20972, abs_s( sub( n_fx, Eshift ) ) ) ); /* Q13, 20972 = Q21 of 0.01. */ @@ -384,7 +393,7 @@ static Word16 DTFS_alignment_weight_fx( #endif } - if ( diff_corr > 0 ) + IF( diff_corr > 0 ) { fshift_fx = n_fx; move16(); @@ -434,6 +443,7 @@ Word16 DTFS_alignment_full_fx( Word16 Eshift, Adiff_fx; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif /* Calculating the expected alignment shift */ @@ -570,7 +580,9 @@ void DTFS_phaseShift_fx( DTFS_STRUCTURE *X_fx, Word16 ph, Word16 Lag, Word16 *S_ temp = X_fx->a_fx[k]; L_temp = L_shr( L_temp2, 5 ); /* Q2 */ X_fx->a_fx[k] = round_fx( L_msu( L_mult( temp, C_fx[L_temp % ( 4 * Lag )] ), X_fx->b_fx[k], S_fx[L_temp % ( 4 * Lag )] ) ); /* X.Q */ + move16(); X_fx->b_fx[k] = round_fx( L_mac( L_mult( X_fx->b_fx[k], C_fx[L_temp % ( 4 * Lag )] ), temp, S_fx[L_temp % ( 4 * Lag )] ) ); + move16(); L_temp2 = L_add( L_temp2, ph ); } } @@ -582,7 +594,9 @@ void DTFS_phaseShift_fx( DTFS_STRUCTURE *X_fx, Word16 ph, Word16 Lag, Word16 *S_ temp = X_fx->a_fx[k]; L_temp = L_shr( L_negate( L_temp2 ), 5 ); /* Q2 */ X_fx->a_fx[k] = round_fx( L_mac( L_mult( temp, C_fx[L_temp % ( 4 * Lag )] ), X_fx->b_fx[k], S_fx[L_temp % ( 4 * Lag )] ) ); /* X.Q */ + move16(); X_fx->b_fx[k] = round_fx( L_msu( L_mult( X_fx->b_fx[k], C_fx[L_temp % ( 4 * Lag )] ), temp, S_fx[L_temp % ( 4 * Lag )] ) ); + move16(); L_temp2 = L_add( L_temp2, ph ); } } @@ -623,6 +637,7 @@ void Q2phaseShift_fx( Word32 temp2; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif temp2 = L_deposit_l( 0 ); @@ -643,6 +658,8 @@ void Q2phaseShift_fx( X_fx->b_fx[k] = round_fx( L_mac( L_mult( X_fx->b_fx[k], C_fx[temp2 % ( 4 * Lag )] ), temp, S_fx[temp2 % ( 4 * Lag )] ) ); temp2 = L_add( temp2, (Word32) ph ); #endif + move16(); + move16(); } } @@ -661,6 +678,8 @@ void Q2phaseShift_fx( X_fx->b_fx[k] = round_fx( L_msu( L_mult( X_fx->b_fx[k], C_fx[temp2 % ( 4 * Lag )] ), temp, S_fx[temp2 % ( 4 * Lag )] ) ); temp2 = add( (Word16) temp2, negate( ph ) ); #endif + move16(); + move16(); } } } @@ -712,10 +731,12 @@ void DTFS_zeroPadd_fx( /* recompute nH for new lag */ diff_fx = find_rem( 12800, X_fx->lag_fx, &rem_fx ); X_fx->nH_fx = find_rem( X_fx->upper_cut_off_freq_fx, diff_fx, &rem_fx ); + move16(); if ( GE_16( sub( X_fx->upper_cut_off_freq_fx, shr( (Word16) L_mult( diff_fx, X_fx->nH_fx ), 1 ) ), diff_fx ) ) { X_fx->nH_fx = add( X_fx->nH_fx, 1 ); + move16(); } } /*===================================================================*/ @@ -762,6 +783,7 @@ void DTFS_to_fs_fx( Word32 L_tmp1; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif IF( !FR_flag ) @@ -869,19 +891,23 @@ void DTFS_to_fs_fx( sum = add( sum, temp ); } La[k] = L_shr( L_a, 6 ); /* Q8 of a[k]*2.0 */ + move32(); Lb[k] = L_shr( L_b, 6 ); /* Q8 of b[k]*2.0 */ + move32(); L_temp = L_abs( La[k] ); if ( GT_32( L_temp, Labmax ) ) { Labmax = L_temp; + move32(); } L_temp = L_abs( Lb[k] ); if ( GT_32( L_temp, Labmax ) ) { Labmax = L_temp; + move32(); } } @@ -942,16 +968,20 @@ void DTFS_to_fs_fx( { #ifdef BASOP_NOGLOB X_fx->a_fx[k] = round_fx_o( L_shl_o( La[k], temp, &Overflow ), &Overflow ); /* Q(8+temp-16)=Q(temp-8) */ + move16(); X_fx->a_fx[k] = mult_ro( X_fx->a_fx[k], inv_lag, &Overflow ); move16(); /* Q(temp-8+19+1-16)=Q(temp-4) of a[k]*2.0/N */ X_fx->b_fx[k] = round_fx_o( L_shl_o( Lb[k], temp, &Overflow ), &Overflow ); /* Q(8+temp-16)=Q(temp-8) */ + move16(); X_fx->b_fx[k] = mult_ro( X_fx->b_fx[k], inv_lag, &Overflow ); move16(); /* Q(temp-8+19+1-16)=Q(temp-4) of b[k]*2.0/N */ #else X_fx->a_fx[k] = round_fx( L_shl( La[k], temp ) ); /* Q(8+temp-16)=Q(temp-8) */ + move16(); X_fx->a_fx[k] = mult_r( X_fx->a_fx[k], inv_lag ); move16(); /* Q(temp-8+19+1-16)=Q(temp-4) of a[k]*2.0/N */ X_fx->b_fx[k] = round_fx( L_shl( Lb[k], temp ) ); /* Q(8+temp-16)=Q(temp-8) */ + move16(); X_fx->b_fx[k] = mult_r( X_fx->b_fx[k], inv_lag ); move16(); /* Q(temp-8+19+1-16)=Q(temp-4) of b[k]*2.0/N */ #endif @@ -967,12 +997,14 @@ void DTFS_to_fs_fx( X_fx->a_fx[k] = round_fx( L_shl( La[k], temp ) ); /* Q(8+temp-16)=Q(temp-8) */ X_fx->a_fx[k] = mult_r( X_fx->a_fx[k], inv_lag ); #endif + move16(); move16(); /* Q(temp-8+19+1-16)=Q(temp-4) of a[k]*1.0/N */ X_fx->b_fx[k] = 0; move16(); } X_fx->Q = sub( temp, 4 ); + move16(); tmp = s_min( shr( X_fx->lag_fx, 1 ), sub( MAXLAG_WI, 1 ) ); FOR( k = add( nH_band, 1 ); k <= tmp; k++ ) @@ -1028,11 +1060,12 @@ static void DTFS_transform_fx( Word32 Lw_fx, temp32_fx; Word16 x2_256_fx[256], x1_256_fx[256]; Word16 k, m, l1; - Word32 x_r_fx[256] = { 0 }; + Word32 x_r_fx[256]; Word32 tmptmp3_40_fx; Word16 temp_w; Word16 N1; Word16 nrg_flag = 0; + move16(); Word32 L_tmp; Word16 tmp, tmp1, tmp2, frac, exp1, exp2; Word16 expa, expb, fraca, fracb, scale; @@ -1044,6 +1077,8 @@ static void DTFS_transform_fx( DTFS_STRUCTURE *tmp3_dtfs_fx; error = IVAS_ERR_OK; + move16(); + set32_fx( x_r_fx, 0, 256 ); IF( ( error = DTFS_new_fx( &tmp1_dtfs_fx ) ) != IVAS_ERR_OK ) { @@ -1062,7 +1097,7 @@ static void DTFS_transform_fx( DTFS_copy_fx( tmp2_dtfs_fx, X2_fx ); tmp2 = 0; /* to avoid compilation warnings */ - + move16(); DTFS_fast_fs_inv_fx( tmp1_dtfs_fx, x1_256_fx, 256, 8 ); DTFS_fast_fs_inv_fx( tmp2_dtfs_fx, x2_256_fx, 256, 8 ); @@ -1082,7 +1117,7 @@ static void DTFS_transform_fx( { tmp2 = negate( tmp1 ); } - tmp = div_s( shl( 1, ( 14 - exp2 ) ), tmp2 ); /* 29-exp2 */ + tmp = div_s( shl( 1, sub( 14, exp2 ) ), tmp2 ); /* 29-exp2 */ L_tmp = L_shl( tmp, 16 ); if ( tmp1 < 0 ) @@ -1169,11 +1204,12 @@ static void DTFS_transform_fx( w_fx = div_s( fracb, fraca ); exp1 = sub( expb, expa ); - w_fx = shl( w_fx, exp1 - 1 ); /*Q14*/ + w_fx = shl( w_fx, sub( exp1, 1 ) ); /*Q14*/ } ELSE { w_fx = 0; + move16(); } Lw_fx = L_deposit_h( w_fx ); @@ -1189,6 +1225,7 @@ static void DTFS_transform_fx( Lw_fx = L_add( Lw_fx, inv_fx ); /* (i+1)*inv */ /* mapping phase to 8x256 length signal */ temp32_fx = phase_fx[i]; /* Q(27-11)=Q16 due to multiplication by pow(2.0,11) */ + move32(); j = rint_new_fx( temp32_fx ); j = s_and( j, 0x07ff ); @@ -1201,15 +1238,16 @@ static void DTFS_transform_fx( FOR( j = 0; j < 12; j++ ) { - m = ( 1000 * LL + l1 - OSLENGTH / 2 + j ) % LL; /* use circular addressing */ + m = L_add( 1000 * LL - OSLENGTH / 2, add( l1, j ) ) % LL; /* use circular addressing */ x_r_fx[m] = L_mac( L_mult( x1_256_fx[m], temp_w ), x2_256_fx[m], w_fx ); + move32(); } tmptmp3_40_fx = L_deposit_l( 0 ); FOR( j = 0; j < 12; j++ ) { - m = ( 1000 * LL + l1 - OSLENGTH / 2 + j ) % LL; /* use circular addressing */ + m = L_add( 1000 * LL - OSLENGTH / 2, add( l1, j ) ) % LL; /* use circular addressing */ tmptmp3_40_fx = L_add( tmptmp3_40_fx, Mult_32_16( x_r_fx[m], sinc_fx[k][j] ) ); } #ifdef BASOP_NOGLOB @@ -1217,6 +1255,7 @@ static void DTFS_transform_fx( #else out_fx[i] = round_fx( L_shl( tmptmp3_40_fx, 2 ) ); #endif + move16(); } @@ -1264,6 +1303,7 @@ void DTFS_zeroFilter_fx( Word16 Qmin, Qab[MAXLAG_WI], na, nb; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif Qmin = 32767; move16(); @@ -1282,11 +1322,11 @@ void DTFS_zeroFilter_fx( FOR( n = 0; n < N; n++ ) { #ifdef BASOP_NOGLOB - sum1_fx = L_mac_o( sum1_fx, LPC[n], C_fx[( 4 * temp2 ) % ( 4 * X_fx->lag_fx )], &Overflow ); /* Q(12+15+1) */ + sum1_fx = L_mac_o( sum1_fx, LPC[n], C_fx[i_mult( 4, temp2 ) % i_mult( 4, X_fx->lag_fx )], &Overflow ); /* Q(12+15+1) */ sum2_fx = L_mac_o( sum2_fx, LPC[n], S_fx[( 4 * temp2 ) % ( 4 * X_fx->lag_fx )], &Overflow ); #else - sum1_fx = L_mac( sum1_fx, LPC[n], C_fx[( 4 * temp2 ) % ( 4 * X_fx->lag_fx )] ); /* Q(12+15+1) */ - sum2_fx = L_mac( sum2_fx, LPC[n], S_fx[( 4 * temp2 ) % ( 4 * X_fx->lag_fx )] ); + sum1_fx = L_mac( sum1_fx, LPC[n], C_fx[i_mult( 4, temp2 ) % i_mult( 4, X_fx->lag_fx )] ); /* Q(12+15+1) */ + sum2_fx = L_mac( sum2_fx, LPC[n], S_fx[i_mult( 4, temp2 ) % i_mult( 4, X_fx->lag_fx )] ); #endif temp2 = add( temp2, temp ); } @@ -1336,8 +1376,11 @@ void DTFS_zeroFilter_fx( X_fx->a_fx[k] = round_fx( (Word32) L_shl( L_temp1, nb ) ); /* Q(13+Q+nb-16)=Q(Q+nb-3) */ X_fx->b_fx[k] = round_fx( (Word32) L_shl( L_temp2, nb ) ); /* Q(Q+nb-3) */ #endif + move32(); + move32(); Qab[k] = sub( nb, 3 ); + move16(); if ( LT_16( Qab[k], Qmin ) ) { @@ -1455,9 +1498,12 @@ void DTFS_poleFilter_fx_9( } nb = sub( nb, 1 ); /* leave one more sign bit */ X_fx->a_fx[k] = round_fx( (Word32) L_shl( sum1_fx, nb ) ); /* Q(-3+n2+Q+exp+nb ) */ + move16(); X_fx->b_fx[k] = round_fx( (Word32) L_shl( sum2_fx, nb ) ); + move16(); Qab[k] = add( sub( nb, 3 ), n2_temp1 ); + move16(); if ( LT_16( Qab[k], Qmin ) ) { @@ -1466,7 +1512,7 @@ void DTFS_poleFilter_fx_9( } } /* bring to the same Q */ - move16(); + FOR( k = 0; k <= HalfLag; k++ ) { X_fx->a_fx[k] = shl( X_fx->a_fx[k], sub( Qmin, Qab[k] ) ); @@ -1476,6 +1522,7 @@ void DTFS_poleFilter_fx_9( } X_fx->Q = add( X_fx->Q, Qmin ); + move16(); return; } /*===================================================================*/ @@ -1539,20 +1586,21 @@ void DTFS_adjustLag_fx( tmp = div_s( shl( 1, sub( 14, exp ) ), X_DTFS_FX->lag_fx ); /* 29 - exp */ L_tmp = L_mult0( 12800, tmp ); temp32_fx = L_shl( L_tmp, sub( exp, 23 ) ); - diff_fx = (Word16) L_shl( L_tmp, sub( exp, 29 ) ); + diff_fx = extract_l( L_shl( L_tmp, sub( exp, 29 ) ) ); exp = norm_s( diff_fx ); tmp = div_s( shl( 1, sub( 14, exp ) ), diff_fx ); /* 29 - exp */ L_tmp = L_mult0( X_DTFS_FX->upper_cut_off_freq_fx, tmp ); - X_DTFS_FX->nH_fx = (Word16) L_shl( L_tmp, sub( exp, 29 ) ); + X_DTFS_FX->nH_fx = extract_l( L_shl( L_tmp, sub( exp, 29 ) ) ); + move16(); L_tmp = L_mult0( 4000, tmp ); tempnH_fx = L_shl( L_tmp, sub( exp, 23 ) ); X_DTFS_FX->nH_4kHz_fx = extract_l( L_shl( L_tmp, sub( exp, 29 ) ) ); + move16(); - - if ( GE_16( sub( X_DTFS_FX->upper_cut_off_freq_fx, shr( (Word16) L_mult( diff_fx, X_DTFS_FX->nH_fx ), 1 ) ), diff_fx ) ) + if ( GE_16( sub( X_DTFS_FX->upper_cut_off_freq_fx, shr( extract_l( L_mult( diff_fx, X_DTFS_FX->nH_fx ) ), 1 ) ), diff_fx ) ) { X_DTFS_FX->nH_fx = add( X_DTFS_FX->nH_fx, 1 ); move16(); @@ -1595,9 +1643,11 @@ Word32 DTFS_getEngy_fx( { Word16 k, HalfLag_fx; Word32 en_fx = 0; + move32(); Word16 temp_a_fx, temp_b_fx; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif HalfLag_fx = shr( sub( X_fx->lag_fx, 1 ), 1 ); HalfLag_fx = s_min( HalfLag_fx, X_fx->nH_fx ); @@ -1619,6 +1669,7 @@ Word32 DTFS_getEngy_fx( en_fx = L_shr( en_fx, 1 ); temp_a_fx = X_fx->a_fx[0]; + move16(); #ifdef BASOP_NOGLOB en_fx = L_mac0_o( en_fx, temp_a_fx, temp_a_fx, &Overflow ); #else @@ -1668,8 +1719,10 @@ Word32 DTFS_getEngy_P2A_fx( { Word16 k, HalfLag_fx; Word32 en_fx = 0; + move32(); #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif HalfLag_fx = shr( sub( X_fx->lag_fx, 1 ), 1 ); HalfLag_fx = s_min( HalfLag_fx, X_fx->nH_fx ); @@ -1740,7 +1793,7 @@ static Word32 DTFS_setEngy_fx( en1_fx = DTFS_getEngy_fx( X_DTFS_FX ); move16(); - if ( en1_fx == 0 ) + IF( en1_fx == 0 ) { return 0; } @@ -1748,6 +1801,7 @@ static Word32 DTFS_setEngy_fx( IF( en2_fx == 0 ) { factor_fx = 0; + move32(); } ELSE { @@ -1780,9 +1834,11 @@ static Word32 DTFS_setEngy_fx( { L_temp_fx = Mult_32_16( factor_fx, X_DTFS_FX->a_fx[k] ); /* Q(temp+X1.Q-15) */ X_DTFS_FX->a_fx[k] = round_fx( L_temp_fx ); /* Q(temp+X1.Q-15-16)=Q(temp+X1.Q-31); */ + move16(); L_temp_fx = Mult_32_16( factor_fx, X_DTFS_FX->b_fx[k] ); /* Q(temp+X1.Q-15) */ X_DTFS_FX->b_fx[k] = round_fx( L_temp_fx ); /* Q(temp+X1.Q-15-16)=Q(temp+X1.Q-31); */ + move16(); } @@ -1830,6 +1886,7 @@ void DTFS_car2pol_fx( Word16 exp, tmp, frac; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif HalfLag_fx = s_min( shr( sub( X_fx->lag_fx, 1 ), 1 ), X_fx->nH_fx ); FOR( k = 1; k <= HalfLag_fx; k++ ) @@ -1853,6 +1910,7 @@ void DTFS_car2pol_fx( Ltemp_fx = Isqrt_lc( L_deposit_h( tmp ), &exp ); /* Q(31-exp) */ X_fx->a_fx[k] = extract_h( L_shl( Ltemp_fx, sub( add( X_fx->Q, exp ), 15 ) ) ); /* Q */ + move16(); } ELSE { @@ -1880,6 +1938,7 @@ void DTFS_car2pol_fx( Ltemp_fx = Isqrt_lc( L_deposit_h( tmp ), &exp ); /* Q(31-exp) */ X_fx->a_fx[k] = extract_h( L_shl( Ltemp_fx, sub( add( X_fx->Q, exp ), 15 ) ) ); /* Q */ + move16(); } ELSE { @@ -1943,9 +2002,12 @@ Word32 DTFS_setEngyHarm_fx( Word16 exp, tmp, expa, expb, fraca, fracb, scale; Word32 L_tmp; Word32 Lacc_max = 0; + move32(); Word16 expp = 0; + move16(); #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif @@ -2058,15 +2120,18 @@ Word32 DTFS_setEngyHarm_fx( #else X_fx->a_fx[k] = round_fx( L_temp_fx ); /* Q(temp+X1.Q-15-16)=Q(temp+X1.Q-31); */ #endif + move16(); } if ( GE_32( Lacc_max, 2147483647 ) ) { *Qa_fx = sub( sub( X_fx->Q, add( 1, s_min( 2, expp ) ) ), exp ); + move16(); } else { *Qa_fx = sub( sub( X_fx->Q, 1 ), exp ); + move16(); } return en1_fx; /* Q(2*X1.Q) */ @@ -2120,20 +2185,23 @@ static void cubicPhase_fx( Word16 dbgshft; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif num_flag = 0; + move16(); den_flag = 0; + move16(); N = sub( N, L2 ); exp = norm_s( L1 ); tmp = div_s( shl( 1, sub( 14, exp ) ), L1 ); - L_tmp = L_shl( tmp, exp + 6 ); + L_tmp = L_shl( tmp, add( exp, 6 ) ); f1 = round_fx( L_tmp ); exp = norm_s( L2 ); tmp = div_s( shl( 1, sub( 14, exp ) ), L2 ); - L_tmp = L_shl( tmp, exp + 6 ); + L_tmp = L_shl( tmp, add( exp, 6 ) ); Ltemp4 = inverse_table[L2]; f2 = round_fx( L_tmp ); @@ -2173,7 +2241,7 @@ static void cubicPhase_fx( Ltemp2 = L_shl( Mult_32_16( Ltemp2, N ), 1 ); /* IF(N%2) */ - if ( s_and( N, 1 ) == 1 ) + if ( EQ_16( s_and( N, 1 ), 1 ) ) { Ltemp2 = L_add( Ltemp2, 1 ); } @@ -2326,7 +2394,7 @@ static void cubicPhase_fx( tmp = negate( tmp ); } - Lacc = L_shl( tmp, exp + 27 ); + Lacc = L_shl( tmp, add( exp, 27 ) ); Lacc = L_add( Lacc, 0x08000 ); c1 = extract_h( Lacc ); /* c1 in Q27 */ @@ -2384,7 +2452,7 @@ static void cubicPhase_fx( N2 = L_shl( L_mult0( n, n ), 14 ); Ltemp3 = L_shl( Mult_32_16( N2, n ), 1 ); - if ( s_and( N, 1 ) == 1 ) + if ( EQ_16( s_and( N, 1 ), 1 ) ) { Ltemp3 = L_add( Ltemp3, 1 ); } @@ -2453,6 +2521,7 @@ void DTFS_to_erb_fx( Word16 expa, expb, fraca, fracb, scale; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif const Word16 *erb_fx = NULL; num_erb_fx = NUM_ERB_NB; @@ -2556,6 +2625,7 @@ void DTFS_to_erb_fx( out_fx[i] = round_fx( L_tmp ); #endif + move16(); } } return; @@ -2599,6 +2669,7 @@ void erb_slot_fx( Word16 fraca, fracb, expa, expb, scale; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif const Word16 *erb_fx = NULL; /*move16(); */ @@ -2610,14 +2681,12 @@ void erb_slot_fx( upper_cut_off_freq_fx = 4000; move16(); erb_fx = &( erb_NB_fx[0] ); - move16(); } ELSE IF( EQ_16( num_erb_fx, NUM_ERB_WB ) ) { upper_cut_off_freq_fx = 6400; move16(); erb_fx = &( erb_WB_fx[0] ); - move16(); } exp = norm_s( lag_fx ); @@ -2688,6 +2757,7 @@ void erb_slot_fx( #else mfreq_fx[j] = round_fx( L_shl( mf_fx[j], 11 ) ); /* Q15 */ #endif + move16(); IF( GT_16( out_fx[j], 1 ) ) { expb = norm_l( mf_fx[j] ); @@ -2771,6 +2841,7 @@ void DTFS_erb_inv_fx( Word16 exp, tmp; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif IF( EQ_16( num_erb_fx, NUM_ERB_NB ) ) @@ -2809,7 +2880,7 @@ void DTFS_erb_inv_fx( exp = norm_s( X_fx->lag_fx ); tmp = div_s( shl( 1, sub( 14, exp ) ), X_fx->lag_fx ); /* 29-exp */ - diff_fx = shl( tmp, exp - 10 ); /* Q19 */ + diff_fx = shl( tmp, sub( exp, 10 ) ); /* Q19 */ min_q = EVS_SW_MAX; move16(); @@ -2861,7 +2932,9 @@ void DTFS_erb_inv_fx( #else X_fx->a_fx[i] = round_fx( L_shl( Ltemp_fx, d2h ) ); /* Q(28-n+d2h) */ #endif + move16(); q[i] = add( sub( 28, n ), d2h ); + move16(); min_q = s_min( min_q, q[i] ); BREAK; @@ -2876,6 +2949,7 @@ void DTFS_erb_inv_fx( } X_fx->Q = min_q; + move16(); return; } @@ -2987,8 +3061,10 @@ void erb_add_fx( move16(); } ELSE + { curr_erb_fx[i] = 0; - move16(); + move16(); + } } tmp_fx = add( shl( index_fx[1], 3 ), index_fx[1] ); /* tmp=9*index[1] */ @@ -3002,19 +3078,23 @@ void erb_add_fx( IF( EQ_16( num_erb_fx, NUM_ERB_NB ) ) { curr_erb_fx[i] = add( AmpCB2_NB_fx[sub( add( tmp_fx, i ), 11 )], t_prev_erb_fx[i] ); /* Q13+Q13=Q13 */ + move16(); curr_erb_fx[i] = s_max( 0, curr_erb_fx[i] ); move16(); } ELSE IF( EQ_16( num_erb_fx, NUM_ERB_WB ) ) { curr_erb_fx[i] = add( AmpCB2_WB_fx[sub( add( tmp2_fx, i ), 11 )], t_prev_erb_fx[i] ); /* Q13 */ + move16(); curr_erb_fx[i] = s_max( 0, curr_erb_fx[i] ); move16(); } } ELSE + { curr_erb_fx[i] = 0; - move16(); + move16(); + } } return; } @@ -3075,17 +3155,24 @@ void quant_target_fx( DTFS_STRUCTURE *X_fx, const Word16 *curr_lpc, Word16 *w, W tmp = round_fx( L_shl( Lacc, exp ) ); exp = sub( sub( 30, exp ), 23 ); IF( tmp ) - tmp = div_s( 16384, tmp ); /* 15+exp1 */ + { + tmp = div_s( 16384, tmp ); /* 15+exp1 */ + } ELSE + { tmp = 0; + move16(); + } Ltemp = L_deposit_h( tmp ); tmp = round_fx( Ltemp ); /* tmp in Q(22-n) */ Ltemp1 = Mult_32_16( Ltemp1, tmp ); /* Q(30-n) */ n = sub( 8, exp ); w[0] = round_fx( L_shl( Ltemp1, n ) ); /* w[0] in Q15 */ + move16(); Ltemp2 = Mult_32_16( Ltemp2, tmp ); w[1] = round_fx( L_shl( Ltemp2, n ) ); /* w[1] in Q15 */ + move16(); logLag = log10_fx( X_fx->lag_fx ); /* logLag=10*log10(lag), Q23 */ Ltemp3 = L_shl( L_mult( shl( X_fx->Q, 1 ), 24660 ), 9 ); /* Ltemp3=2Q*10log10(2), Q23 */ @@ -3096,20 +3183,28 @@ void quant_target_fx( DTFS_STRUCTURE *X_fx, const Word16 *curr_lpc, Word16 *w, W Ltemp = L_add( L_sub( Ltemp, Ltemp3 ), logLag ); /* Ltemp=10*log10(lag*eng), Q23 */ target[0] = round_fx( L_shl( Mult_32_16( Ltemp, 0x6666 ), 1 ) ); /* Q11 */ + move16(); /* Process high band */ Ltemp = DTFS_setEngyHarm_fx( 2828, X_fx->upper_cut_off_freq_of_interest_fx, 2828, X_fx->upper_cut_off_freq_fx, 1, 0, &Qh, X_fx ); Ltemp = log10_fx( Ltemp ); Ltemp = L_add( L_sub( Ltemp, Ltemp3 ), logLag ); /* Ltemp=10*log10(lag*eng), Q23 */ target[1] = round_fx( L_shl( Mult_32_16( Ltemp, 0x6666 ), 1 ) ); /* Q11 */ + move16(); /* Need to unify the Q factors of both bands */ X_fx->Q = s_min( Ql, Qh ); /* set Q factor to be the smaller one */ - n = sub( Ql, Qh ); /* compare band Q factors */ + move16(); + n = sub( Ql, Qh ); /* compare band Q factors */ IF( n < 0 ) - rshiftHarmBand_fx( X_fx, 2828, X_fx->upper_cut_off_freq_fx, n ); - ELSE IF( n > 0 ) rshiftHarmBand_fx( X_fx, 0, 2828, sub( Qh, Ql ) ); + { + rshiftHarmBand_fx( X_fx, 2828, X_fx->upper_cut_off_freq_fx, n ); + } + ELSE IF( n > 0 ) + { + rshiftHarmBand_fx( X_fx, 0, 2828, sub( Qh, Ql ) ); + } } /*==========================================================================*/ @@ -3152,6 +3247,8 @@ ivas_error WIsyn_fx( { Word16 i; UWord16 I = 1, flag = 0; + move16(); + move16(); /* Word16 *phase_fx = (Word16*)malloc(sizeof(Word16) * (2*N)); new operator used size 2*N */ Word32 phase_fx[WI_FX_phase_fx]; Word16 alignment_fx; @@ -3166,6 +3263,7 @@ ivas_error WIsyn_fx( ivas_error error; error = IVAS_ERR_OK; + move16(); IF( ( error = DTFS_new_fx( &CURRCW_DTFS_FX ) ) != IVAS_ERR_OK ) { @@ -3217,7 +3315,9 @@ ivas_error WIsyn_fx( IF( temp < 0 ) - temp = add( temp, shl( CURRCW_DTFS_FX->lag_fx, 7 ) ); /* Q7 */ + { + temp = add( temp, shl( CURRCW_DTFS_FX->lag_fx, 7 ) ); /* Q7 */ + } find_rem( temp, shl( CURRCW_DTFS_FX->lag_fx, 7 ), &tmp_fx ); /* Q7 */ IF( FR_flag == 0 ) @@ -3241,7 +3341,7 @@ ivas_error WIsyn_fx( tmp = shl( CURRCW_DTFS_FX->lag_fx, 7 ); exp = norm_s( tmp ); tmp = div_s( shl( 1, sub( 14, exp ) ), tmp ); /* 22-exp */ - L_tmp = L_shl( L_mult( temp, tmp ), exp + 1 ); + L_tmp = L_shl( L_mult( temp, tmp ), add( exp, 1 ) ); tmp_fx = round_fx( L_tmp ); } ELSE IF( alignment_fx < 0 ) @@ -3250,7 +3350,7 @@ ivas_error WIsyn_fx( tmp = shl( CURRCW_DTFS_FX->lag_fx, 7 ); exp = norm_s( tmp ); tmp = div_s( shl( 1, sub( 14, exp ) ), tmp ); /* 22-exp */ - L_tmp = L_shl( L_mult( temp, tmp ), exp + 1 ); + L_tmp = L_shl( L_mult( temp, tmp ), add( exp, 1 ) ); tmp_fx = negate( round_fx( L_tmp ) ); } ELSE @@ -3260,7 +3360,7 @@ ivas_error WIsyn_fx( tmp = shl( CURRCW_DTFS_FX->lag_fx, 7 ); exp = norm_s( tmp ); tmp = div_s( shl( 1, sub( 14, exp ) ), tmp ); /* 22-exp */ - L_tmp = L_shl( L_mult( temp, tmp ), exp + 1 ); + L_tmp = L_shl( L_mult( temp, tmp ), add( exp, 1 ) ); tmp_fx = round_fx( L_tmp ); } @@ -3301,9 +3401,10 @@ ivas_error WIsyn_fx( { /* empty loop */ } - L_temp = L_temp & 0x7fff; + L_temp = L_and( L_temp, 0x7fff ); move16(); /* fraction part */ *ph_offset_fx = extract_l( L_temp ); + move16(); /* free(phase_fx) ; */ free( CURRCW_DTFS_FX ); @@ -3342,12 +3443,18 @@ Word16 ppp_extract_pitch_period_fx( { Word16 i, j, k; Word16 spike = 0, range; + move16(); Word16 max = 0; + move16(); const Word16 *ptr = in + L_FRAME - l; Word32 en1 = 0, Lacc, L_tmp; + move32(); Word16 spike_near_edge = 0, scale; + move16(); Word16 pos_max, neg_max; Word16 spike_pos = 0, spike_neg = 0; + move16(); + move16(); Word16 x, tmp, expa, fraca, expb, fracb, scale1, exp; pos_max = -0x8000L; @@ -3362,7 +3469,7 @@ Word16 ppp_extract_pitch_period_fx( { x = abs_s( ptr[i] ); - if ( GT_16( x, max ) ) + IF( GT_16( x, max ) ) { max = x; move16(); @@ -3383,9 +3490,10 @@ Word16 ppp_extract_pitch_period_fx( /* search for neg spike around the pos spike */ FOR( j = spike - 10; j < spike + 10; j++ ) { - k = ( j + l ) % l; + k = add( j, l ) % l; + move16(); - if ( LT_16( ptr[k], neg_max ) ) + IF( LT_16( ptr[k], neg_max ) ) { neg_max = ptr[k]; move16(); @@ -3401,9 +3509,10 @@ Word16 ppp_extract_pitch_period_fx( /* search for pos spike around the neg spike */ FOR( j = spike - 10; j < spike + 10; j++ ) { - k = ( j + l ) % l; + k = add( j, l ) % l; + move16(); - if ( GT_16( ptr[k], pos_max ) ) + IF( GT_16( ptr[k], pos_max ) ) { pos_max = ptr[k]; move16(); @@ -3414,7 +3523,7 @@ Word16 ppp_extract_pitch_period_fx( } test(); - IF( ( LE_16( ( l - 1 - s_max( spike_pos, spike_neg ) ), 2 ) ) || ( LE_16( s_min( spike_pos, spike_neg ), 2 ) ) ) + IF( ( LE_16( sub( sub( l, 1 ), s_max( spike_pos, spike_neg ) ), 2 ) ) || ( LE_16( s_min( spike_pos, spike_neg ), 2 ) ) ) { *out_of_bound = 1; move16(); @@ -3425,10 +3534,14 @@ Word16 ppp_extract_pitch_period_fx( tmp = (Word16) ( L_max( L_mult( CUTFREE_REL_RANGE_Q2, l ), CUTFREE_ABS_RANGE_Q3 ) ); /* Q3 */ IF( tmp > 0 ) - tmp = add( tmp, 4 ); /* Q3 */ + { + tmp = add( tmp, 4 ); /* Q3 */ + } ELSE + { tmp = sub( tmp, 4 ); /* Q3 */ - range = shr( tmp, 3 ); /* Q0 */ + } + range = shr( tmp, 3 ); /* Q0 */ test(); IF( ( LT_16( spike, range ) ) || ( GE_16( add( spike, range ), l ) ) ) @@ -3566,6 +3679,8 @@ void DTFS_peaktoaverage_fx( DTFS_STRUCTURE X_fx, Word32 *pos_fx, Word16 *Qpos, W { Word32 L_sum; Word32 maxPosEn_fx = 0, maxNegEn_fx = 0, L_temp; + move32(); + move32(); Word16 i; Word16 time_fx[256]; Word16 expa, expb, fraca, fracb, scale; @@ -3573,6 +3688,7 @@ void DTFS_peaktoaverage_fx( DTFS_STRUCTURE X_fx, Word32 *pos_fx, Word16 *Qpos, W Word32 L_tmp; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif L_sum = DTFS_getEngy_P2A_fx( &X_fx ); /* 2Q */ DTFS_fast_fs_inv_fx( &X_fx, time_fx, 256, 8 ); @@ -3589,6 +3705,7 @@ void DTFS_peaktoaverage_fx( DTFS_STRUCTURE X_fx, Word32 *pos_fx, Word16 *Qpos, W if ( GT_32( L_temp, maxPosEn_fx ) ) { maxPosEn_fx = L_temp; /* Q(1) */ + move32(); } } ELSE @@ -3596,6 +3713,7 @@ void DTFS_peaktoaverage_fx( DTFS_STRUCTURE X_fx, Word32 *pos_fx, Word16 *Qpos, W if ( GT_32( L_temp, maxNegEn_fx ) ) { maxNegEn_fx = L_temp; /* Q(1) */ + move32(); } } } @@ -3604,15 +3722,17 @@ void DTFS_peaktoaverage_fx( DTFS_STRUCTURE X_fx, Word32 *pos_fx, Word16 *Qpos, W IF( L_sum == 0 ) { *pos_fx = *neg_fx = 0; - move16(); - move16(); + move32(); + move32(); } ELSE { IF( maxPosEn_fx == 0 ) { *pos_fx = 0; + move32(); *Qpos = 31; + move16(); } ELSE { @@ -3647,13 +3767,17 @@ void DTFS_peaktoaverage_fx( DTFS_STRUCTURE X_fx, Word32 *pos_fx, Word16 *Qpos, W L_tmp = Isqrt_lc( L_deposit_h( tmp ), &exp ); /* Q(31-exp) */ *pos_fx = L_tmp; + move32(); *Qpos = sub( 31, exp ); + move16(); } IF( maxNegEn_fx == 0 ) { *neg_fx = 0; + move32(); *Qneg = 31; + move16(); } ELSE { @@ -3664,7 +3788,7 @@ void DTFS_peaktoaverage_fx( DTFS_STRUCTURE X_fx, Word32 *pos_fx, Word16 *Qpos, W #else fraca = extract_h( L_shl( maxNegEn_fx, expa ) ); #endif - expa = 30 - expa - 1; + expa = sub( Q29, expa ); // 30 - expa - 1; expb = norm_l( L_sum ); @@ -3673,7 +3797,7 @@ void DTFS_peaktoaverage_fx( DTFS_STRUCTURE X_fx, Word32 *pos_fx, Word16 *Qpos, W #else fracb = round_fx( L_shl( L_sum, expb ) ); #endif - expb = 30 - expb - ( 2 * X_fx.Q ); + expb = sub( 30, add( expb, shl( X_fx.Q, 1 ) ) ); scale = shr( sub( fraca, fracb ), 15 ); @@ -3689,7 +3813,9 @@ void DTFS_peaktoaverage_fx( DTFS_STRUCTURE X_fx, Word32 *pos_fx, Word16 *Qpos, W L_tmp = Isqrt_lc( L_deposit_h( tmp ), &exp ); /* Q(31-exp) */ *neg_fx = L_tmp; - *Qneg = 31 - exp; + move32(); + *Qneg = sub( Q31, exp ); + move16(); } } } @@ -3777,19 +3903,19 @@ void GetSinCosTab_fx( Word16 L, Word16 *sinTab, Word16 *cosTab ) w = extract_h( Lw ); /* w in Q9 */ dl = extract_l( Lw ); /* dl has 16 bits left-over */ w = s_and( w, 511 ); - move16(); - move16(); /* t1=cos_table[w%512]; */ t1 = cos_table[w]; + move16(); /* t2=cos_table[add(w,1)%512]; */ dt = cos_diff_table[w]; + move16(); /*t2=cos_table[s_and(add(w,1),511)]; */ /*dt = sub(t2,t1); */ /* Q15 */ IF( dl < 0 ) { L_tmp = L_add( 65536, dl ); - Ltemp = ( L_tmp * dt ); + Ltemp = imult3216( L_tmp, dt ); } ELSE { @@ -3809,13 +3935,14 @@ void GetSinCosTab_fx( Word16 L, Word16 *sinTab, Word16 *cosTab ) move16(); /* t2=cos_table[add(w,1)%512]; */ dt = cos_diff_table[w]; + move16(); /*t2=cos_table[s_and(add(w,1),511)];move16(); */ /*dt = sub(t2,t1); */ /* dt=t2-t1, Q15 */ IF( dl < 0 ) { L_tmp = L_add( 65536, dl ); - Ltemp = ( L_tmp * dt ); + Ltemp = imult3216( L_tmp, dt ); } ELSE { @@ -3844,11 +3971,8 @@ static void c_fft_wi_fx( Word16 *farray_ptr_fx, Word16 size, Word16 stage, Word1 Word16 n_2, K; Word16 ii_table[8]; - - move16(); - move16(); - K = 0; + move16(); FOR( k = 256; k > 0; k -= size ) { @@ -3856,45 +3980,47 @@ static void c_fft_wi_fx( Word16 *farray_ptr_fx, Word16 size, Word16 stage, Word1 } n_2 = shr( size, 1 ); FOR( i = 1; i <= stage; i++ ) - ii_table[i - 1] = shr( size, i ); + { + ii_table[i - 1] = shr( size, i ); + } /* Rearrange the input array in bit reversed order */ j = 0; move16(); FOR( i = 0; i < size - 2; i = i + 2 ) { - move16(); - move16(); - move16(); - move16(); - move16(); - move16(); IF( GT_16( j, i ) ) { ftmp_fx = *( farray_ptr_fx + i ); *( farray_ptr_fx + i ) = *( farray_ptr_fx + j ); *( farray_ptr_fx + j ) = (Word16) ftmp_fx; + move32(); + move16(); + move16(); - ftmp_fx = *( farray_ptr_fx + i + 1 ); - *( farray_ptr_fx + i + 1 ) = *( farray_ptr_fx + j + 1 ); - *( farray_ptr_fx + j + 1 ) = (Word16) ftmp_fx; + ftmp_fx = *( farray_ptr_fx + add( i, 1 ) ); + *( farray_ptr_fx + add( i, 1 ) ) = *( farray_ptr_fx + add( j, 1 ) ); + *( farray_ptr_fx + add( j, 1 ) ) = (Word16) ftmp_fx; + move32(); + move16(); + move16(); } k = n_2; move16(); - WHILE( j >= k ) + WHILE( GE_16( j, k ) ) { j = sub( j, k ); k = shr( k, 1 ); } - j += k; + j = add( j, k ); } /* The FFT part */ - IF( isign == 1 ) + IF( EQ_16( isign, 1 ) ) { FOR( i = 0; i < stage; i++ ) /* i is stage counter */ { @@ -3907,21 +4033,21 @@ static void c_fft_wi_fx( Word16 *farray_ptr_fx, Word16 size, Word16 stage, Word1 FOR( j = 0; j < jj; j = j + 2 ) /* j is sample counter */ { - ji = j * ii; /* ji is phase table index */ + ji = i_mult( j, ii ); /* ji is phase table index */ FOR( k = j; k < size; k = k + kk ) /* k is butterfly top */ { kj = add( k, jj ); /* kj is butterfly bottom */ - temp_sand = s_and( ( ji * K + 384 ), 511 ); + temp_sand = s_and( ( add( i_mult( ji, K ), 384 ) ), 511 ); /* Butterfly computations */ /* ftmp_real_fx = L_sub(L_mult(*(farray_ptr_fx + kj), cos_table[ji*K]), */ /* L_mult(*(farray_ptr_fx + kj + 1), cos_table[(ji*K+384)%512])); */ - ftmp_real_fx = L_msu( L_mult( *( farray_ptr_fx + kj ), cos_table[ji * K] ), - *( farray_ptr_fx + kj + 1 ), cos_table[temp_sand] ); + ftmp_real_fx = L_msu( L_mult( *( farray_ptr_fx + kj ), cos_table[i_mult( ji, K )] ), + *( farray_ptr_fx + add( kj, 1 ) ), cos_table[temp_sand] ); /* ftmp_imag_fx = L_add(L_mult(*(farray_ptr_fx + kj + 1), cos_table[ji*K]), */ /* L_mult(*(farray_ptr_fx + kj), cos_table[(ji*K+384)%512])); */ - ftmp_imag_fx = L_mac( L_mult( *( farray_ptr_fx + kj + 1 ), cos_table[ji * K] ), + ftmp_imag_fx = L_mac( L_mult( *( farray_ptr_fx + add( kj, 1 ) ), cos_table[i_mult( ji, K )] ), *( farray_ptr_fx + kj ), cos_table[temp_sand] ); tmp1 = round_fx( ftmp_real_fx ); @@ -3931,16 +4057,16 @@ static void c_fft_wi_fx( Word16 *farray_ptr_fx, Word16 size, Word16 stage, Word1 *( farray_ptr_fx + kj ) = shr( tmp, 1 ); move16(); - tmp = sub( *( farray_ptr_fx + k + 1 ), tmp2 ); - *( farray_ptr_fx + kj + 1 ) = shr( tmp, 1 ); + tmp = sub( *( farray_ptr_fx + add( k, 1 ) ), tmp2 ); + *( farray_ptr_fx + add( kj, 1 ) ) = shr( tmp, 1 ); move16(); tmp = add( *( farray_ptr_fx + k ), tmp1 ); *( farray_ptr_fx + k ) = shr( tmp, 1 ); move16(); - tmp = add( *( farray_ptr_fx + k + 1 ), tmp2 ); - *( farray_ptr_fx + k + 1 ) = shr( tmp, 1 ); + tmp = add( *( farray_ptr_fx + add( k, 1 ) ), tmp2 ); + *( farray_ptr_fx + add( k, 1 ) ) = shr( tmp, 1 ); move16(); } } @@ -3958,21 +4084,21 @@ static void c_fft_wi_fx( Word16 *farray_ptr_fx, Word16 size, Word16 stage, Word1 FOR( j = 0; j < jj; j = j + 2 ) /* j is sample counter */ { - ji = j * ii; /* ji is phase table index */ + ji = i_mult( j, ii ); /* ji is phase table index */ FOR( k = j; k < size; k = k + kk ) /* k is butterfly top */ { kj = add( k, jj ); /* kj is butterfly bottom */ - temp_sand = s_and( ( ji * K + 384 ), 511 ); + temp_sand = s_and( ( add( i_mult( ji, K ), 384 ) ), 511 ); /* Butterfly computations */ /* ftmp_real_fx = L_add(L_mult(*(farray_ptr_fx + kj), cos_table[ji*K]), */ /* L_mult(*(farray_ptr_fx + kj + 1), cos_table[(ji*K+384)%512])); */ - ftmp_real_fx = L_mac( L_mult( *( farray_ptr_fx + kj ), cos_table[ji * K] ), + ftmp_real_fx = L_mac( L_mult( *( farray_ptr_fx + kj ), cos_table[i_mult( ji, K )] ), *( farray_ptr_fx + kj + 1 ), cos_table[temp_sand] ); /* ftmp_imag_fx = L_sub(L_mult(*(farray_ptr_fx + kj + 1), cos_table[ji*K]), */ /* L_mult(*(farray_ptr_fx + kj), cos_table[(ji*K+384)%512])); */ - ftmp_imag_fx = L_msu( L_mult( *( farray_ptr_fx + kj + 1 ), cos_table[ji * K] ), + ftmp_imag_fx = L_msu( L_mult( *( farray_ptr_fx + add( kj, 1 ) ), cos_table[i_mult( ji, K )] ), *( farray_ptr_fx + kj ), cos_table[temp_sand] ); tmp1 = round_fx( ftmp_real_fx ); @@ -4024,7 +4150,7 @@ void r_fft_4_fx( Word16 *farray_ptr_fx, Word16 size, Word16 stage, Word16 isign } /* The FFT part */ - IF( isign == 1 ) + IF( EQ_16( isign, 1 ) ) { /* Perform the complex FFT */ c_fft_wi_fx( farray_ptr_fx, size, stage, isign ); @@ -4033,10 +4159,12 @@ void r_fft_4_fx( Word16 *farray_ptr_fx, Word16 size, Word16 stage, Word16 isign ftmp1_real_fx = *farray_ptr_fx; ftmp2_real_fx = *( farray_ptr_fx + 1 ); *farray_ptr_fx = add( ftmp1_real_fx, ftmp2_real_fx ); + move16(); *( farray_ptr_fx + 1 ) = sub( ftmp1_real_fx, ftmp2_real_fx ); + move16(); /* Now, handle the remaining positive frequencies */ - j = size - 2; + j = sub( size, 2 ); FOR( i = 2; i <= n_2; i = i + 2 ) { ftmp1_real_fx = add( *( farray_ptr_fx + i ), *( farray_ptr_fx + j ) ); @@ -4046,24 +4174,28 @@ void r_fft_4_fx( Word16 *farray_ptr_fx, Word16 size, Word16 stage, Word16 isign Lftmp1_real_fx = L_deposit_h( ftmp1_real_fx ); Lftmp1_imag_fx = L_deposit_h( ftmp1_imag_fx ); - temp_sand = s_and( ( i * K + 384 ), 511 ); + temp_sand = s_and( add( i_mult( i, K ), 384 ), 511 ); /* Ltmp1_fx = L_sub(L_mult(ftmp2_real_fx, cos_table[i*K]), L_mult(ftmp2_imag_fx, cos_table[(i*K+384)%512])); */ - Ltmp1_fx = L_msu( L_mult( ftmp2_real_fx, cos_table[i * K] ), ftmp2_imag_fx, cos_table[temp_sand] ); + Ltmp1_fx = L_msu( L_mult( ftmp2_real_fx, cos_table[i_mult( i, K )] ), ftmp2_imag_fx, cos_table[temp_sand] ); *( farray_ptr_fx + i ) = round_fx( L_shr( L_add( Lftmp1_real_fx, Ltmp1_fx ), 1 ) ); + move16(); /* Ltmp1_fx = L_add(L_mult(ftmp2_imag_fx, cos_table[i*K]), L_mult(ftmp2_real_fx, cos_table[(i*K+384)%512])); */ - Ltmp1_fx = L_mac( L_mult( ftmp2_imag_fx, cos_table[i * K] ), ftmp2_real_fx, cos_table[temp_sand] ); + Ltmp1_fx = L_mac( L_mult( ftmp2_imag_fx, cos_table[i_mult( i, K )] ), ftmp2_real_fx, cos_table[temp_sand] ); *( farray_ptr_fx + i + 1 ) = round_fx( L_shr( L_add( Lftmp1_imag_fx, Ltmp1_fx ), 1 ) ); + move16(); /* Ltmp1_fx = L_add(L_mult(ftmp2_real_fx, cos_table[j*K]), L_mult(ftmp2_imag_fx, cos_table[(j*K+384)%512])); */ - Ltmp1_fx = L_mac( L_mult( ftmp2_real_fx, cos_table[j * K] ), ftmp2_imag_fx, cos_table[temp_sand] ); + Ltmp1_fx = L_mac( L_mult( ftmp2_real_fx, cos_table[i_mult( j, K )] ), ftmp2_imag_fx, cos_table[temp_sand] ); *( farray_ptr_fx + j ) = round_fx( L_shr( L_add( Lftmp1_real_fx, Ltmp1_fx ), 1 ) ); + move16(); /* Ltmp1_fx = L_add(L_negate(L_mult(ftmp2_imag_fx, cos_table[j*K])), L_mult(ftmp2_real_fx, cos_table[(j*K+384)%512])); */ - Ltmp1_fx = L_msu( L_mult( ftmp2_real_fx, cos_table[temp_sand] ), ftmp2_imag_fx, cos_table[j * K] ); + Ltmp1_fx = L_msu( L_mult( ftmp2_real_fx, cos_table[temp_sand] ), ftmp2_imag_fx, cos_table[i_mult( j, K )] ); Ltmp2_fx = L_sub( Ltmp1_fx, Lftmp1_imag_fx ); *( farray_ptr_fx + j + 1 ) = round_fx( L_shr( Ltmp2_fx, 1 ) ); - j = size - i; + move16(); + j = sub( size, i ); } } ELSE @@ -4093,23 +4225,27 @@ void r_fft_4_fx( Word16 *farray_ptr_fx, Word16 size, Word16 stage, Word16 isign Lftmp1_real_fx = L_deposit_h( ftmp1_real_fx ); Lftmp1_imag_fx = L_deposit_h( ftmp1_imag_fx ); - temp_sand = s_and( ( i * K + 384 ), 511 ); + temp_sand = s_and( ( add( i_mult( i, K ), 384 ) ), 511 ); /* Ltmp1_fx = L_add(L_mult(ftmp2_real_fx, cos_table[i*K]), L_mult(ftmp2_imag_fx, cos_table[(i*K+384)%512])); */ - Ltmp1_fx = L_mac( L_mult( ftmp2_real_fx, cos_table[i * K] ), ftmp2_imag_fx, cos_table[temp_sand] ); + Ltmp1_fx = L_mac( L_mult( ftmp2_real_fx, cos_table[i_mult( i, K )] ), ftmp2_imag_fx, cos_table[temp_sand] ); *( farray_ptr_fx + i ) = round_fx( L_shr( L_add( Lftmp1_real_fx, Ltmp1_fx ), 1 ) ); + move16(); /* Ltmp1_fx = L_sub(L_mult(ftmp2_imag_fx, cos_table[i*K]), L_mult(ftmp2_real_fx, cos_table[(i*K+384)%512])); */ - Ltmp1_fx = L_msu( L_mult( ftmp2_imag_fx, cos_table[i * K] ), ftmp2_real_fx, cos_table[temp_sand] ); + Ltmp1_fx = L_msu( L_mult( ftmp2_imag_fx, cos_table[i_mult( i, K )] ), ftmp2_real_fx, cos_table[temp_sand] ); *( farray_ptr_fx + i + 1 ) = round_fx( L_shr( L_add( Lftmp1_imag_fx, Ltmp1_fx ), 1 ) ); + move16(); /* Ltmp1_fx = L_sub(L_mult(ftmp2_real_fx, cos_table[j*K]), L_mult(ftmp2_imag_fx, cos_table[(j*K+384)%512])); */ - Ltmp1_fx = L_msu( L_mult( ftmp2_real_fx, cos_table[j * K] ), ftmp2_imag_fx, cos_table[temp_sand] ); + Ltmp1_fx = L_msu( L_mult( ftmp2_real_fx, cos_table[i_mult( j, K )] ), ftmp2_imag_fx, cos_table[temp_sand] ); *( farray_ptr_fx + j ) = round_fx( L_shr( L_add( Lftmp1_real_fx, Ltmp1_fx ), 1 ) ); + move16(); /* Ltmp1_fx = L_negate(L_add(L_mult(ftmp2_imag_fx, cos_table[j*K]), L_mult(ftmp2_real_fx, cos_table[(j*K+384)%512]))); */ - Ltmp1_fx = L_negate( L_mac( L_mult( ftmp2_imag_fx, cos_table[j * K] ), ftmp2_real_fx, cos_table[temp_sand] ) ); + Ltmp1_fx = L_negate( L_mac( L_mult( ftmp2_imag_fx, cos_table[i_mult( j, K )] ), ftmp2_real_fx, cos_table[temp_sand] ) ); Ltmp2_fx = L_sub( Ltmp1_fx, Lftmp1_imag_fx ); *( farray_ptr_fx + j + 1 ) = round_fx( L_shr( Ltmp2_fx, 1 ) ); + move16(); } /* Perform the complex IFFT */ @@ -4162,13 +4298,15 @@ void copy_phase_fx( DTFS_STRUCTURE *X1_fx, DTFS_STRUCTURE X2_fx, DTFS_STRUCTURE Word16 exp, tmp, exp1; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif - move16(); retX_fx->lag_fx = X1_fx->lag_fx; + move16(); retX_fx->Q = sub( X2_fx.Q, 1 ); /* equivalent to 2x MIN_FX(shr(sub(X_fx->lag_fx,1),1),X_fx->nH_fx) */ - HalfLag = s_min( shr( X1_fx->lag_fx, 1 ), X1_fx->nH_fx ); move16(); + HalfLag = s_min( shr( X1_fx->lag_fx, 1 ), X1_fx->nH_fx ); + FOR( k = 1; k <= HalfLag; k++ ) { #ifdef BASOP_NOGLOB @@ -4184,11 +4322,15 @@ void copy_phase_fx( DTFS_STRUCTURE *X1_fx, DTFS_STRUCTURE X2_fx, DTFS_STRUCTURE exp1 = sub( sub( 30, exp ), 2 * X1_fx->Q + 1 ); IF( tmp ) - tmp = div_s( 16384, tmp ); /* 15+exp1 */ + { + tmp = div_s( 16384, tmp ); /* 15+exp1 */ + } ELSE + { tmp = 0; + } tmp = shr( tmp, 1 ); - q = 15 + exp1 + 16 - 1; + q = sub( add( add( 15, exp1 ), 16 ), 1 ); IF( tmp ) { @@ -4199,10 +4341,15 @@ void copy_phase_fx( DTFS_STRUCTURE *X1_fx, DTFS_STRUCTURE X2_fx, DTFS_STRUCTURE Ltemp_fx = Isqrt_lc( L_tmp, &exp ); /* Q(31-exp) */ } ELSE + { Ltemp_fx = 0; + move32(); + } - if ( s_and( q, 1 ) ) + IF( s_and( q, 1 ) ) + { Ltemp_fx = Mult_32_16( Ltemp_fx, 23170 ); /* 23170 is 1/sqrt(2) in Q15 */ + } q = shr( q, 1 ); /* Ltemp_fx in Q(q+16) */ @@ -4213,21 +4360,25 @@ void copy_phase_fx( DTFS_STRUCTURE *X1_fx, DTFS_STRUCTURE X2_fx, DTFS_STRUCTURE Ltemp_fx = L_add_o( L_shr( Ltemp_fx, 15 ), L_mult_o( X1_fx->b_fx[k], d1h, &Overflow ), &Overflow ); /* sin(w) in Q(q+16+Q-15) */ sn = round_fx_o( L_shl_o( Ltemp_fx, sub( 30, add( q, X1_fx->Q ) ), &Overflow ), &Overflow ); /* Q15 */ retX_fx->b_fx[k] = mult_ro( X2_fx.a_fx[k], sn, &Overflow ); /* X2_fx.Q */ + move16(); Ltemp_fx = L_mult0( X1_fx->a_fx[k], d1l ); Ltemp_fx = L_add_o( L_shr( Ltemp_fx, 15 ), L_mult_o( X1_fx->a_fx[k], d1h, &Overflow ), &Overflow ); /* cos(w) in Q(q+Q+1) */ cn = round_fx_o( L_shl_o( Ltemp_fx, sub( 30, add( q, X1_fx->Q ) ), &Overflow ), &Overflow ); /* Q15 */ retX_fx->a_fx[k] = mult_ro( X2_fx.a_fx[k], cn, &Overflow ); /* X2_fx.Q */ + move16(); #else Ltemp_fx = L_mult0( X1_fx->b_fx[k], d1l ); Ltemp_fx = L_add( L_shr( Ltemp_fx, 15 ), L_mult( X1_fx->b_fx[k], d1h ) ); /* sin(w) in Q(q+16+Q-15) */ sn = round_fx( L_shl( Ltemp_fx, sub( 30, add( q, X1_fx->Q ) ) ) ); /* Q15 */ retX_fx->b_fx[k] = mult_r( X2_fx.a_fx[k], sn ); /* X2_fx.Q */ + move16(); Ltemp_fx = L_mult0( X1_fx->a_fx[k], d1l ); Ltemp_fx = L_add( L_shr( Ltemp_fx, 15 ), L_mult( X1_fx->a_fx[k], d1h ) ); /* cos(w) in Q(q+Q+1) */ cn = round_fx( L_shl( Ltemp_fx, sub( 30, add( q, X1_fx->Q ) ) ) ); /* Q15 */ retX_fx->a_fx[k] = mult_r( X2_fx.a_fx[k], cn ); /* X2_fx.Q */ + move16(); #endif } k = sub( k, 1 ); @@ -4283,6 +4434,7 @@ Word32 getSpEngyFromResAmp_fx( DTFS_STRUCTURE *X_fx, Word16 lband, Word16 hband, Word32 L_tmp; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif en = L_deposit_l( 0 ); @@ -4321,7 +4473,7 @@ Word32 getSpEngyFromResAmp_fx( DTFS_STRUCTURE *X_fx, Word16 lband, Word16 hband, #ifdef BASOP_NOGLOB Lacc = L_mac_o( Lacc, curr_lpc[i], cos_tab[n % M_fx], &Overflow ); /* Q28 */ #else - Lacc = L_mac( Lacc, curr_lpc[i], cos_tab[n % M_fx] ); /* Q28 */ + Lacc = L_mac( Lacc, curr_lpc[i], cos_tab[n % M_fx] ); /* Q28 */ #endif n = add( n, k4 ); /* n=4*i*k */ } @@ -4336,7 +4488,7 @@ Word32 getSpEngyFromResAmp_fx( DTFS_STRUCTURE *X_fx, Word16 lband, Word16 hband, #ifdef BASOP_NOGLOB Lacc = L_msu_o( Lacc, curr_lpc[i], sin_tab[n % M_fx], &Overflow ); /* Q28 */ #else - Lacc = L_msu( Lacc, curr_lpc[i], sin_tab[n % M_fx] ); /* Q28 */ + Lacc = L_msu( Lacc, curr_lpc[i], sin_tab[n % M_fx] ); /* Q28 */ #endif n = add( n, k4 ); /* n=4*i*k */ } @@ -4345,7 +4497,7 @@ Word32 getSpEngyFromResAmp_fx( DTFS_STRUCTURE *X_fx, Word16 lband, Word16 hband, #ifdef BASOP_NOGLOB Lacc = L_add_o( Mult_32_32( Re, Re ), Mult_32_32( Im, Im ), &Overflow ); /* Lacc=Re^2+Im^2 in Q23 */ #else - Lacc = L_add( Mult_32_32( Re, Re ), Mult_32_32( Im, Im ) ); /* Lacc=Re^2+Im^2 in Q23 */ + Lacc = L_add( Mult_32_32( Re, Re ), Mult_32_32( Im, Im ) ); /* Lacc=Re^2+Im^2 in Q23 */ #endif Ltemp = L_mult0( X_fx->a_fx[k], X_fx->a_fx[k] ); /* 2*a[k]^2 in 2Q */ /* Ltemp=(Word32)L_sat32_40(divide_dp(Ltemp,Lacc,-19,1)); : Ltemp in Q(2Q-13) */ @@ -4428,6 +4580,7 @@ void DTFS_poleFilter_fx( DTFS_STRUCTURE *X_fx, Word16 *LPC, Word16 N, Word16 *S_ Word16 exp, tmp; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif Qmin = 32767; move16(); @@ -4443,8 +4596,8 @@ void DTFS_poleFilter_fx( DTFS_STRUCTURE *X_fx, Word16 *LPC, Word16 N, Word16 *S_ FOR( n = 0; n < N; n++ ) { #ifdef BASOP_NOGLOB - sum1_fx = L_mac_o( sum1_fx, LPC[n], C_fx[( 4 * temp2 ) % ( 4 * X_fx->lag_fx )], &Overflow ); /* Q(12+15+1) */ - sum2_fx = L_mac_o( sum2_fx, LPC[n], S_fx[( 4 * temp2 ) % ( 4 * X_fx->lag_fx )], &Overflow ); /* Q(12+15+1) */ + sum1_fx = L_mac_o( sum1_fx, LPC[n], C_fx[shl( temp2, 2 ) % shl( X_fx->lag_fx, 2 )], &Overflow ); /* Q(12+15+1) */ + sum2_fx = L_mac_o( sum2_fx, LPC[n], S_fx[shl( temp2, 2 ) % shl( X_fx->lag_fx, 2 )], &Overflow ); /* Q(12+15+1) */ #else sum1_fx = L_mac( sum1_fx, LPC[n], C_fx[( 4 * temp2 ) % ( 4 * X_fx->lag_fx )] ); /* Q(12+15+1) */ sum2_fx = L_mac( sum2_fx, LPC[n], S_fx[( 4 * temp2 ) % ( 4 * X_fx->lag_fx )] ); /* Q(12+15+1) */ @@ -4520,7 +4673,9 @@ void DTFS_poleFilter_fx( DTFS_STRUCTURE *X_fx, Word16 *LPC, Word16 N, Word16 *S_ } nb = sub( nb, 1 ); /* leave one more sign bit */ X_fx->a_fx[k] = round_fx( L_shl( sum1_fx, nb ) ); /* Q(Q-temp1+22+nb-16)=Q(Q-temp1+nb+2) */ + move16(); X_fx->b_fx[k] = round_fx( L_shl( sum2_fx, nb ) ); /* Q(Q-temp1+nb+2) */ + move16(); Qab[k] = add( sub( add( nb, 2 ), temp1 ), X_fx->Q ); @@ -4540,6 +4695,7 @@ void DTFS_poleFilter_fx( DTFS_STRUCTURE *X_fx, Word16 *LPC, Word16 N, Word16 *S_ } X_fx->Q = Qmin; + move16(); } /*===================================================================*/ /* FUNCTION : poleFilter_setup_fx() */ @@ -4574,6 +4730,7 @@ void poleFilter_setup_fx( const Word16 *LPC, Word16 N, DTFS_STRUCTURE X_fx, Word Word16 exp, tmp; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move32(); #endif HalfLag = s_min( shr( X_fx.lag_fx, 1 ), X_fx.nH_fx ); @@ -4618,7 +4775,11 @@ void poleFilter_setup_fx( const Word16 *LPC, Word16 N, DTFS_STRUCTURE X_fx, Word } n2 = sub( n2, 1 ); temp1 = pf_temp1[k] = round_fx( (Word32) L_shl( sum1_fx, n2 ) ); /* Q(12+15+1+n2-16)=Q(12+n2) */ + move16(); + move16(); temp2 = pf_temp2[k] = round_fx( (Word32) L_shl( sum2_fx, n2 ) ); /* Q(12+n2) */ + move16(); + move16(); /* Calculate the circular convolution */ sum = L_mac( L_mult( temp1, temp1 ), temp2, temp2 ); /* Q(12+n2+12+n2+1)=Q(25+2*n2) */ @@ -4677,7 +4838,7 @@ Word32 DTFS_getEngy_band_wb_fx( DTFS_STRUCTURE X_fx, Word16 lband, Word16 hband { freq_fx = L_mult( k, 12800 ); - if ( GE_32( freq_fx, L_lband ) ) + IF( GE_32( freq_fx, L_lband ) ) { BREAK; } @@ -4687,7 +4848,7 @@ Word32 DTFS_getEngy_band_wb_fx( DTFS_STRUCTURE X_fx, Word16 lband, Word16 hband FOR( k = 1; k <= HalfLag; k++ ) { freq_fx = L_mult( k, 12800 ); - if ( GE_32( freq_fx, L_hband ) ) + IF( GE_32( freq_fx, L_hband ) ) { BREAK; } @@ -4702,14 +4863,14 @@ Word32 DTFS_getEngy_band_wb_fx( DTFS_STRUCTURE X_fx, Word16 lband, Word16 hband } en_fx = L_shr( en_fx, 1 ); /* 2*X1.Q+1 */ - if ( lband == 0 ) + IF( lband == 0 ) { en_fx = L_mac0( en_fx, X_fx.a_fx[0], X_fx.a_fx[0] ); /* 2*X1.Q+1 */ } /* IF ((X_fx.lag_fx%2 == 0) && (hband == X_fx.upper_cut_off_freq_fx)) */ test(); - IF( ( s_and( X_fx.lag_fx, 1 ) == 0 ) && ( hband == X_fx.upper_cut_off_freq_fx ) ) + IF( ( s_and( X_fx.lag_fx, 1 ) == 0 ) && EQ_16( hband, X_fx.upper_cut_off_freq_fx ) ) { en_fx = L_mac0( en_fx, X_fx.a_fx[k], X_fx.a_fx[k] ); en_fx = L_mac0( en_fx, X_fx.b_fx[k], X_fx.b_fx[k] ); diff --git a/lib_com/window_fx.c b/lib_com/window_fx.c index d848b2dd3..6ef8081fd 100644 --- a/lib_com/window_fx.c +++ b/lib_com/window_fx.c @@ -32,16 +32,17 @@ void ham_cos_window( Word32 cte, cc; #ifdef BASOP_NOGLOB_DECLARE_LOCAL Flag Overflow = 0; + move16(); #endif assert( n1 >= 102 ); /* if n1 is too low -> overflow in div_l */ /* cte = PI2/(Float32)(2*n1 - 1); */ BASOP_SATURATE_WARNING_OFF_EVS - move16(); cte = L_deposit_l( div_l( PI2_10Q21, sub( shl( n1, 1 ), 1 ) ) ); /*0Q15*/ BASOP_SATURATE_WARNING_ON_EVS cc = 0; + move32(); FOR( i = 0; i < n1; i++ ) { /* fh_f[i] = 0.54f - 0.46f * (Float32)cos(cc); */ @@ -61,7 +62,7 @@ void ham_cos_window( /* cte = PI2/(Float32)(4*n2 - 1); */ cte = L_deposit_l( div_l( PI2_11Q20, sub( shl( n2, 2 ), 1 ) ) ); /*0Q15*/ cc = 0; - move16(); + move32(); add( n1, n2 ); BASOP_SATURATE_WARNING_OFF_EVS @@ -96,9 +97,10 @@ void ham_cos_window_ivas( // cte_fx = div_s(1, sub(shl(n1, 1), 1)); cc_fx = 0; move16(); - for ( i = 0; i < n1; i++ ) + FOR( i = 0; i < n1; i++ ) { *fh++ = sub( 17694, mult( getCosWord16R2( cc_fx ), 15073 ) ); + move16(); cc_fx = div_s( add( i, 1 ), sub( shl( n1, 1 ), 1 ) ); // add(cc_fx, cte_fx); } @@ -109,8 +111,8 @@ void ham_cos_window_ivas( for ( i = 0; i < n2; i++ ) { *fh++ = getCosWord16R2( cc_fx ); + move16(); cc_fx = div_s( add( i, 1 ), sub( shl( n1, 2 ), 1 ) ); - ; } return; diff --git a/lib_com/window_ola_fx.c b/lib_com/window_ola_fx.c index 32ae48d50..ac7e63d8a 100644 --- a/lib_com/window_ola_fx.c +++ b/lib_com/window_ola_fx.c @@ -32,7 +32,7 @@ void sinq_fx( #ifdef BASOP_NOGLOB tmp1 = add_sat( shl_sat( tmp, 1 ), phi ); /*Q15 */ #else - tmp1 = add( shl( tmp, 1 ), phi ); /*Q15 */ + tmp1 = add( shl( tmp, 1 ), phi ); /*Q15 */ #endif L_tmp = L_mult( tmp1, tmp1 ); /*Q31 */ L_tmp = Mult_32_16( L_tmp, tmp1 ); /*Q31 */ @@ -67,9 +67,10 @@ void sinq_fx( tmp_old = L_shl_sat( L_tmp, 1 ); /*Q31 */ x[i] = round_fx_sat( tmp_old ); /*Q15 */ #else - tmp_old = L_shl( L_tmp, 1 ); /*Q31 */ - x[i] = round_fx( tmp_old ); /*Q15 */ + tmp_old = L_shl( L_tmp, 1 ); /*Q31 */ + x[i] = round_fx( tmp_old ); /*Q15 */ #endif + move16(); } return; @@ -130,6 +131,7 @@ void window_ola_fx( move32(); } *Q_sig = add( *Q_sig, temp ); + move16(); /* rescaling for overlapp add */ @@ -137,14 +139,17 @@ void window_ola_fx( { Copy_Scale_sig( OldauOut, OldauOut, L, sub( *Q_sig, add( *Q_old, 15 ) ) ); *Q_old = sub( *Q_sig, 15 ); + move16(); } ELSE IF( LT_16( add( *Q_old, 15 ), *Q_sig ) ) { Scale_sig32( ImdctOut, L, sub( add( *Q_old, 15 ), *Q_sig ) ); *Q_sig = add( *Q_old, 15 ); + move16(); } *Q_sig = *Q_old; /*fixing output to new Q_old */ + move16(); decimate = 1; @@ -285,6 +290,7 @@ void window_ola_fx( FOR( i = 0; i < temp_len; i++ ) { *p1++ = round_fx( L_shl( *pa++, 1 ) ); + move16(); } /*p1=paout+shr(L,1);*/ @@ -293,6 +299,7 @@ void window_ola_fx( FOR( i = 0; i < temp_len; i++ ) { *p1++ = round_fx( L_negate( L_shl( *pa--, 1 ) ) ); + move16(); } @@ -323,11 +330,10 @@ void window_ola_fx( { #ifdef BASOP_NOGLOB *p1 = add_sat( mult( *p1, *p2 ), shl_sat( mult( *p4, *p3 ), 5 ) ); /*auOut[i]*SS2[i]+ OldauOut[i+n]*(SS2[L-n-i-1])/(wret2[i]+0.01f);;*/ - move16(); #else - *p1 = add( mult( *p1, *p2 ), shl( mult( *p4, *p3 ), 5 ) ); /*auOut[i]*SS2[i]+ OldauOut[i+n]*(SS2[L-n-i-1])/(wret2[i]+0.01f);;*/ - move16(); + *p1 = add( mult( *p1, *p2 ), shl( mult( *p4, *p3 ), 5 ) ); /*auOut[i]*SS2[i]+ OldauOut[i+n]*(SS2[L-n-i-1])/(wret2[i]+0.01f);;*/ #endif + move16(); p1++; p2++; p3++; @@ -362,6 +368,7 @@ void window_ola_fx( p3 -= decimate; *p1++ = round_fx( L_add( L_shl( Mult_32_16( *pa++, *p5-- ), 1 ), L_deposit_h( *p4++ ) ) ); /* (( Qin + Q15 -15)+1 + ( Qin - 15 + 16))-1 */ #endif + move16(); /* paout[i] = ImdctOut[L/2 + i] * win_right[(2*L_FRAME16k-(n16+(i-n)/2))*decimate-1-decay-windecay48]+OldauOut[i]; paout[i+1] = ImdctOut[L/2 + i +1] * win_int_right[2*L_FRAME16k-(n16+(i-n)/2)-1-windecay16]+OldauOut[i+1];*/ } @@ -380,6 +387,7 @@ void window_ola_fx( *p1++ = round_fx( L_sub( L_deposit_h( *p4++ ), L_shl( Mult_32_16( *pa--, *p5-- ), 1 ) ) ); /* (( Qin + Q15 -15)+1 + ( Qin - 15 + 16))-1 */ *p1++ = round_fx( L_sub( L_deposit_h( *p4++ ), L_shl( Mult_32_16( *pa--, *p3 ), 1 ) ) ); /* (( Qin + Q15 -15)+1 + ( Qin - 15 + 16))-1 */ #endif + move16(); p3 -= decimate; /* paout[L/2 + i ] = -ImdctOut[L - 1 - i] * win_int_right[(3*L_FRAME16k/2-1-i/2)-windecay16]+OldauOut[i+L/2]; paout[L/2 + i +1] = -ImdctOut[L - 1 - (i+1)] * win_right[(3*L_FRAME16k/2-1-i/2)*decimate+decay-windecay48]+OldauOut[i+L/2+1]; */ @@ -387,7 +395,9 @@ void window_ola_fx( FOR( i = 0; i < n; i += 2 ) { *p1++ = round_fx_sat( L_sub_sat( L_deposit_h( *p4++ ), L_shl( *pa--, 1 ) ) ); + move16(); *p1++ = round_fx_sat( L_sub_sat( L_deposit_h( *p4++ ), L_shl( *pa--, 1 ) ) ); + move16(); /* paout[L/2 + i +1] = -ImdctOut[L - 1 - (i+1)]+OldauOut[i+L/2+1] ; paout[L/2 + i ] = -ImdctOut[L - 1 - i]+OldauOut[i+L/2]; */ } @@ -402,7 +412,9 @@ void window_ola_fx( FOR( i = 0; i < temp_len; i += 2 ) { *p1++ = round_fx( L_negate( L_shl( Mult_32_16( *pa++, *p2-- ), 1 ) ) ); + move16(); *p1++ = round_fx( L_negate( L_shl( Mult_32_16( *pa++, *p3 ), 1 ) ) ); + move16(); p3 -= decimate; /* OldauOut[L/2 + i] = -ImdctOut[i] * win_int_left[(L_FRAME16k/2-i/2-1)]; OldauOut[L/2 + i +1] = -ImdctOut[i+1] * win_left[(L_FRAME16k/2-i/2-1)*decimate+decay]*/ @@ -417,7 +429,9 @@ void window_ola_fx( FOR( i = 0; i < temp_len; i += 2 ) { *p1++ = round_fx( L_negate( L_shl( Mult_32_16( *pa--, *p2 ), 1 ) ) ); + move16(); *p1++ = round_fx( L_negate( L_shl( Mult_32_16( *pa--, *p3-- ), 1 ) ) ); + move16(); p2 -= decimate; /* OldauOut[ i] = -ImdctOut[L/2 - 1 - i] *win_left[(L_FRAME16k-i/2)*decimate-decay-1]; OldauOut[ i +1] = -ImdctOut[L/2 - 1 - (i +1)] * win_int_left[L_FRAME16k-(i/2)-1];; */ @@ -441,6 +455,7 @@ void window_ola_fx( #else *p1++ = round_fx( L_add( L_shl( Mult_32_16( *pa++, *p3 ), 1 ), L_deposit_h( *p4++ ) ) ); /* (( Qin + Q15 -15)+1 + ( Qin - 15 + 16))-1 */ #endif + move16(); p3 -= decimate; /*paout[i] = ImdctOut[L/2 + i] * win_right[(2*L-i)*decimate-1-decay-windecay48]+OldauOut[i];*/ } @@ -456,12 +471,14 @@ void window_ola_fx( #else *p1++ = round_fx( L_sub( L_deposit_h( *p4++ ), L_shl( Mult_32_16( *pa--, *p3 ), 1 ) ) ); /* (( Qin + Q15 -15)+1 + ( Qin - 15 + 16))-1 */ #endif + move16(); p3 -= decimate; /* paout[L/2 + i] = -ImdctOut[L - 1 - i] * win_right[(3*L/2-1-i)*decimate+decay-windecay48]+OldauOut[i+L/2]; */ } FOR( i = 0; i < n; i++ ) { *p1++ = round_fx_sat( L_sub_sat( L_deposit_h( *p4++ ), L_shl( *pa--, 1 ) ) ); + move16(); /* paout[L/2 + i] = -ImdctOut[L - 1 - i] + OldauOut[i+L/2]; */ } } @@ -475,6 +492,7 @@ void window_ola_fx( FOR( i = 0; i < temp_len; i++ ) { *p1++ = round_fx( L_negate( L_shl( Mult_32_16( *pa++, *p2 ), 1 ) ) ); + move16(); p2 -= decimate; /*OldauOut[L/2 + i] = -ImdctOut[i] * win_left[(L/2-i-1)*decimate+decay]; */ } @@ -486,6 +504,7 @@ void window_ola_fx( FOR( i = 0; i < temp_len; i++ ) { *p1++ = round_fx( L_negate( L_shl( Mult_32_16( *pa--, *p2 ), 1 ) ) ); + move16(); p2 -= decimate; /* OldauOut[ i] = -ImdctOut[L/2 - 1 - i] * win_left[(L-i)*decimate-decay-1]; */ } @@ -498,7 +517,9 @@ void window_ola_fx( FOR( i = 0; i < n; i++ ) { *p1 = round_fx( L_negate( L_shl( *pa--, 1 ) ) ); + move16(); *p2++ = *p1++; + move16(); } return; @@ -532,35 +553,51 @@ void window_ola_ext_fx( n = shr( mult( shl( L, Q5 ), N_ZERO_BY_FS ), Q5 ); n16 = N16_CORE_SW; + move16(); windecay48 = WINDECAY48; + move16(); windecay16 = WINDECAY16; + move16(); decimate = 1; + move16(); decay = 0; + move16(); tcx_get_windows_mode1( left_mode, right_mode, win_left, win_right, win_int_left, win_int_right, L ); + test(); IF( EQ_16( L, L_FRAME32k ) || EQ_16( L, L_FRAME16k ) ) { decimate = 3; + move16(); decay = 1; + move16(); } ELSE IF( EQ_16( L, L_FRAME8k ) ) { decimate = 6; + move16(); decay = 2; + move16(); } ELSE IF( EQ_16( L, 512 ) ) { windecay48 = WINDECAY48_256; + move16(); decimate = 1; + move16(); decay = 0; + move16(); } ELSE IF( EQ_16( L, 256 ) ) { windecay48 = WINDECAY48_256; + move16(); decimate = 2; + move16(); decay = 0; + move16(); } paout = auOut - n; @@ -569,71 +606,88 @@ void window_ola_ext_fx( { FOR( i = n; i < shr( L, 1 ); i += 2 ) { - paout[i] = L_add_sat( Mpy_32_16_1( Mpy_32_16_1( ImdstOut[shr( L, 1 ) + i], sign_right ), win_right[( 2 * L_FRAME16k - ( n16 + ( i - n ) / 2 ) ) * decimate - 1 - decay - windecay48] ), OldauOut[i] ); - paout[i + 1] = L_add_sat( Mpy_32_16_1( Mpy_32_16_1( ImdstOut[shr( L, 1 ) + i + 1], sign_right ), win_int_right[2 * L_FRAME16k - ( n16 + ( i - n ) / 2 ) - 1 - windecay16] ), OldauOut[i + 1] ); + paout[i] = L_add_sat( Mpy_32_16_1( Mpy_32_16_1( ImdstOut[add( shr( L, 1 ), i )], sign_right ), win_right[sub( sub( sub( i_mult( ( sub( 2 * L_FRAME16k, add( n16, shr( sub( i, n ), 1 ) ) ) ), decimate ), 1 ), decay ), windecay48 )] ), OldauOut[i] ); + move32(); + paout[i + 1] = L_add_sat( Mpy_32_16_1( Mpy_32_16_1( ImdstOut[add( add( shr( L, 1 ), i ), 1 )], sign_right ), win_int_right[sub( sub( sub( 2 * L_FRAME16k, add( n16, shr( sub( i, n ), 1 ) ) ), 1 ), windecay16 )] ), OldauOut[i + 1] ); + move32(); } FOR( i = 0; i < shr( L, 1 ) - n; i += 2 ) { - paout[shr( L, 1 ) + i + 1] = L_add_sat( Mpy_32_16_1( L_negate( ImdstOut[L - 1 - ( i + 1 )] ), win_right[( 3 * L_FRAME16k / 2 - 1 - i / 2 ) * decimate + decay - windecay48] ), OldauOut[i + shr( L, 1 ) + 1] ); - paout[shr( L, 1 ) + i] = L_add_sat( Mpy_32_16_1( L_negate( ImdstOut[L - 1 - i] ), win_int_right[( 3 * L_FRAME16k / 2 - 1 - i / 2 ) - windecay16] ), OldauOut[i + shr( L, 1 )] ); + paout[add( add( shr( L, 1 ), i ), 1 )] = L_add_sat( Mpy_32_16_1( L_negate( ImdstOut[sub( sub( L, 1 ), add( i, 1 ) )] ), win_right[sub( add( i_mult( ( sub( 3 * L_FRAME16k / 2 - 1, shr( i, 1 ) ) ), decimate ), decay ), windecay48 )] ), OldauOut[add( add( i, shr( L, 1 ) ), 1 )] ); + move32(); + paout[add( shr( L, 1 ), i )] = L_add_sat( Mpy_32_16_1( L_negate( ImdstOut[sub( sub( L, 1 ), i )] ), win_int_right[sub( sub( 3 * L_FRAME16k / 2 - 1, shr( i, 1 ) ), windecay16 )] ), OldauOut[add( i, shr( L, 1 ) )] ); + move32(); } FOR( i = sub( shr( L, 1 ), n ); i < shr( L, 1 ); i += 2 ) { - paout[shr( L, 1 ) + i + 1] = L_add_sat( L_negate( ImdstOut[L - 1 - ( i + 1 )] ), OldauOut[i + shr( L, 1 ) + 1] ); - paout[shr( L, 1 ) + i] = L_add_sat( L_negate( ImdstOut[L - 1 - i] ), OldauOut[i + shr( L, 1 )] ); + paout[add( add( shr( L, 1 ), i ), 1 )] = L_add_sat( L_negate( ImdstOut[sub( sub( L, 1 ), add( i, 1 ) )] ), OldauOut[add( add( i, shr( L, 1 ) ), 1 )] ); + move32(); + paout[add( shr( L, 1 ), i )] = L_add_sat( L_negate( ImdstOut[sub( L, add( 1, i ) )] ), OldauOut[add( i, shr( L, 1 ) )] ); + move32(); } FOR( i = 0; i < shr( L, 1 ); i += 2 ) { - OldauOut[shr( L, 1 ) + i + 1] = Mpy_32_16_1( Mpy_32_16_1( ImdstOut[i + 1], sign_left ), win_left[( L_FRAME16k / 2 - i / 2 - 1 ) * decimate + decay] ); - OldauOut[shr( L, 1 ) + i] = Mpy_32_16_1( Mpy_32_16_1( ImdstOut[i], sign_left ), win_int_left[( L_FRAME16k / 2 - i / 2 - 1 )] ); + OldauOut[add( add( shr( L, 1 ), i ), 1 )] = Mpy_32_16_1( Mpy_32_16_1( ImdstOut[i + 1], sign_left ), win_left[add( i_mult( sub( L_FRAME16k / 2 - 1, shr( i, 1 ) ), decimate ), decay )] ); + move32(); + OldauOut[add( shr( L, 1 ), i )] = Mpy_32_16_1( Mpy_32_16_1( ImdstOut[i], sign_left ), win_int_left[sub( L_FRAME16k / 2 - 1, shr( i, 1 ) )] ); + move32(); } FOR( i = n; i < shr( L, 1 ); i += 2 ) { - OldauOut[i] = Mpy_32_16_1( L_negate( ImdstOut[shr( L, 1 ) - 1 - i] ), win_left[( L_FRAME16k - i / 2 ) * decimate - decay - 1] ); - OldauOut[i + 1] = Mpy_32_16_1( L_negate( ImdstOut[shr( L, 1 ) - 1 - ( i + 1 )] ), win_int_left[L_FRAME16k - i / 2 - 1] ); + OldauOut[i] = Mpy_32_16_1( L_negate( ImdstOut[sub( sub( shr( L, 1 ), 1 ), i )] ), win_left[sub( sub( i_mult( sub( L_FRAME16k, shr( i, 1 ) ), decimate ), decay ), 1 )] ); + move32(); + OldauOut[i + 1] = Mpy_32_16_1( L_negate( ImdstOut[sub( sub( shr( L, 1 ), 1 ), add( i, 1 ) )] ), win_int_left[sub( L_FRAME16k - 1, shr( i, 1 ) )] ); + move32(); } } ELSE { FOR( i = n; i < shr( L, 1 ); i++ ) { - paout[i] = L_add_sat( Mpy_32_16_1( Mpy_32_16_1( ImdstOut[shr( L, 1 ) + i], sign_right ), win_right[( 2 * L - i ) * decimate - 1 - decay - windecay48] ), OldauOut[i] ); + paout[i] = L_add_sat( Mpy_32_16_1( Mpy_32_16_1( ImdstOut[add( shr( L, 1 ), i )], sign_right ), win_right[sub( sub( sub( i_mult( ( sub( shl( L, 1 ), i ) ), decimate ), 1 ), decay ), windecay48 )] ), OldauOut[i] ); + move32(); } FOR( i = 0; i < shr( L, 1 ) - n; i++ ) { - paout[shr( L, 1 ) + i] = L_add_sat( Mpy_32_16_1( L_negate( ImdstOut[L - 1 - i] ), win_right[( 3 * shr( L, 1 ) - 1 - i ) * decimate + decay - windecay48] ), OldauOut[i + shr( L, 1 )] ); + paout[add( shr( L, 1 ), i )] = L_add_sat( Mpy_32_16_1( L_negate( ImdstOut[sub( L, add( 1, i ) )] ), win_right[sub( add( i_mult( ( sub( sub( i_mult( 3, shr( L, 1 ) ), 1 ), i ) ), decimate ), decay ), windecay48 )] ), OldauOut[add( i, shr( L, 1 ) )] ); + move32(); } FOR( i = sub( shr( L, 1 ), n ); i < shr( L, 1 ); i++ ) { - paout[shr( L, 1 ) + i] = L_add_sat( L_negate( ImdstOut[L - 1 - i] ), OldauOut[i + shr( L, 1 )] ); + paout[add( shr( L, 1 ), i )] = L_add_sat( L_negate( ImdstOut[sub( L, add( 1, i ) )] ), OldauOut[add( i, shr( L, 1 ) )] ); + move32(); } FOR( i = 0; i < shr( L, 1 ); i++ ) { - OldauOut[shr( L, 1 ) + i] = Mpy_32_16_1( Mpy_32_16_1( ImdstOut[i], sign_left ), win_left[( shr( L, 1 ) - i - 1 ) * decimate + decay] ); + OldauOut[add( shr( L, 1 ), i )] = Mpy_32_16_1( Mpy_32_16_1( ImdstOut[i], sign_left ), win_left[add( i_mult( ( sub( shr( L, 1 ), add( i, 1 ) ) ), decimate ), decay )] ); + move32(); } FOR( i = n; i < shr( L, 1 ); i++ ) { - OldauOut[i] = Mpy_32_16_1( L_negate( ImdstOut[shr( L, 1 ) - 1 - i] ), win_left[( L - i ) * decimate - decay - 1] ); + OldauOut[i] = Mpy_32_16_1( L_negate( ImdstOut[sub( shr( L, 1 ), add( 1, i ) )] ), win_left[sub( sub( i_mult( sub( L, i ), decimate ), decay ), 1 )] ); + move32(); } } FOR( i = 0; i < n; i++ ) { - OldauOut[i] = L_negate( ImdstOut[shr( L, 1 ) - 1 - i] ); + OldauOut[i] = L_negate( ImdstOut[sub( shr( L, 1 ), add( 1, i ) )] ); + move32(); } FOR( i = 0; i < n; i++ ) { - paout[L + i] = OldauOut[i]; + paout[add( L, i )] = OldauOut[i]; + move32(); } return; @@ -665,6 +719,8 @@ void core_switching_OLA_fx( const Word16 *pt4, *pt5; Word16 tmp, tmp2, temp_len; Word16 decimate = 0, delta = 0; /* initialize just to avoid compiler warnings */ + move16(); + move16(); const Word16 *on_win, *on_win_int; Word16 a, b, divisor, buf_offset; Word32 L_tmp; diff --git a/lib_com/wtda.c b/lib_com/wtda.c index 053e892f1..868cd7057 100644 --- a/lib_com/wtda.c +++ b/lib_com/wtda.c @@ -374,127 +374,153 @@ void wtda_fx32( tcx_get_windows_mode1( left_mode, right_mode, win_left, win_right, win_int_left, win_int_right, L ); decimate = 1; /* L_FRAME 48k */ + move16(); decay = 0; + move16(); windecay48 = (Word16) WINDECAY48; // (int16_t)(2 * ((float)L_FRAME48k * N_ZERO_MDCT_NS / FRAME_SIZE_NS)) + R1_48 + move16(); IF( EQ_16( L, L_FRAME32k ) || EQ_16( L, L_FRAME16k ) ) { decimate = 3; + move16(); decay = 1; + move16(); } ELSE IF( EQ_16( L, L_FRAME8k ) ) { decimate = 6; + move16(); decay = 2; + move16(); } - switch ( L ) // (int16_t)((float)L * N_ZERO_MDCT_NS / FRAME_SIZE_NS) + SWITCH( L ) // (int16_t)((float)L * N_ZERO_MDCT_NS / FRAME_SIZE_NS) { case L_FRAME16k: n = 90; - break; + move16(); + BREAK; case L_FRAME32k: n = 180; - break; + move16(); + BREAK; case L_FRAME48k: n = 270; - break; + move16(); + BREAK; default: n = (Word16) ( ( L * N_ZERO_MDCT_NS ) / FRAME_SIZE_NS ); - break; + move16(); + BREAK; } windecay16 = (Word16) WINDECAY16; // (int16_t)(2 * ((float)L_FRAME16k * N_ZERO_MDCT_NS / FRAME_SIZE_NS)) + R1_16; /* algorithmic delay reduction */ i = 0; + move16(); - if ( old_wtda == NULL ) + IF( old_wtda == NULL ) { allsig_r = new_audio + n; allsig_l = new_audio + n - L; } - else + ELSE { allsig_r = new_audio + n; allsig_l = old_wtda + n; } - if ( EQ_16( L, L_FRAME32k ) ) + IF( EQ_16( L, L_FRAME32k ) ) { - for ( i = 0; i < L / 2 - n; i += 2 ) + FOR( i = 0; i < sub( L / 2, n ); i += 2 ) { - idx1 = L / 2 - i - 1; - idx2 = 3 * L_FRAME16k / 2 - i / 2 - 1 - windecay16; - idx3 = L / 2 + i; - idx4 = 3 * L_FRAME16k / 2 + i / 2 - windecay16; + idx1 = sub( sub( shr( L, 1 ), i ), 1 ); + idx2 = sub( sub( 3 * L_FRAME16k / 2 - 1, shr( i, 1 ) ), windecay16 ); + idx3 = add( shr( L, 1 ), i ); + idx4 = sub( add( 3 * L_FRAME16k / 2, shr( i, 1 ) ), windecay16 ); wtda_audio[i] = L_sub_sat( Mpy_32_16_1( -allsig_r[idx1], win_int_right[idx2] ), Mpy_32_16_1( allsig_r[idx3], win_int_right[idx4] ) ); + move32(); - idx1 = L / 2 - ( i + 1 ) - 1; - idx2 = ( 3 * L_FRAME16k / 2 - i / 2 - 1 ) * decimate + decay - windecay48; - idx3 = L / 2 + i + 1; - idx4 = ( 3 * L_FRAME16k / 2 + 1 + i / 2 ) * decimate - decay - 1 - windecay48; + idx1 = sub( sub( shr( L, 1 ), add( i, 1 ) ), 1 ); + idx2 = sub( add( i_mult( ( sub( sub( 3 * L_FRAME16k / 2, shr( i, 1 ) ), 1 ) ), decimate ), decay ), windecay48 ); + idx3 = add( add( shr( L, 1 ), i ), 1 ); + idx4 = sub( sub( sub( i_mult( ( add( 3 * L_FRAME16k / 2 + 1, shr( i, 1 ) ) ), decimate ), decay ), 1 ), windecay48 ); wtda_audio[i + 1] = L_sub_sat( Mpy_32_16_1( -allsig_r[idx1], win_right[idx2] ), Mpy_32_16_1( allsig_r[idx3], win_right[idx4] ) ); + move32(); } - for ( i = L / 2 - n; i < L / 2; i += 2 ) + FOR( i = L / 2 - n; i < L / 2; i += 2 ) { - wtda_audio[i] = -allsig_r[L / 2 - i - 1]; - wtda_audio[i + 1] = -allsig_r[L / 2 - ( i + 1 ) - 1]; + wtda_audio[i] = L_negate( allsig_r[sub( sub( shr( L, 1 ), i ), 1 )] ); + move32(); + wtda_audio[i + 1] = L_negate( allsig_r[sub( sub( shr( L, 1 ), add( i, 1 ) ), 1 )] ); + move32(); } - for ( i = 0; i < n; i += 2 ) + FOR( i = 0; i < n; i += 2 ) { - wtda_audio[i + L / 2] = L_sub_sat( Mpy_32_16_1( allsig_l[i], win_left[( i / 2 ) * decimate + decay] ), new_audio[n - i - 1] ); - wtda_audio[i + L / 2 + 1] = L_sub_sat( Mpy_32_16_1( allsig_l[i + 1], win_int_left[i / 2] ), new_audio[n - ( i + 1 ) - 1] ); + wtda_audio[add( i, shr( L, 1 ) )] = L_sub_sat( Mpy_32_16_1( allsig_l[i], win_left[add( i_mult( shr( i, 1 ), decimate ), decay )] ), new_audio[sub( sub( n, i ), 1 )] ); + move32(); + wtda_audio[add( add( i, shr( L, 1 ) ), 1 )] = L_sub_sat( Mpy_32_16_1( allsig_l[i + 1], win_int_left[i / 2] ), new_audio[sub( sub( n, add( i, 1 ) ), 1 )] ); + move32(); } - for ( i = n; i < L / 2; i += 2 ) + FOR( i = n; i < L / 2; i += 2 ) { idx1 = i; - idx2 = ( i / 2 ) * decimate + decay; - idx3 = L - i - 1; - idx4 = ( L / 2 - i / 2 ) * decimate - 1 - decay; - wtda_audio[i + L / 2] = L_sub_sat( Mpy_32_16_1( allsig_l[idx1], win_left[idx2] ), Mpy_32_16_1( allsig_l[idx3], win_left[idx4] ) ); - - idx1 = i + 1; - idx2 = i / 2; - idx3 = L - ( i + 1 ) - 1; - idx4 = L / 2 - i / 2 - 1; - wtda_audio[i + L / 2 + 1] = L_sub_sat( Mpy_32_16_1( allsig_l[idx1], win_int_left[idx2] ), Mpy_32_16_1( allsig_l[idx3], win_int_left[idx4] ) ); + move16(); + idx2 = add( i_mult( shr( i, 1 ), decimate ), decay ); + idx3 = sub( sub( L, i ), 1 ); + idx4 = sub( sub( i_mult( sub( shr( L, 1 ), shr( i, 1 ) ), decimate ), 1 ), decay ); + wtda_audio[add( i, shr( L, 1 ) )] = L_sub_sat( Mpy_32_16_1( allsig_l[idx1], win_left[idx2] ), Mpy_32_16_1( allsig_l[idx3], win_left[idx4] ) ); + move32(); + + idx1 = add( i, 1 ); + idx2 = shr( i, 1 ); + idx3 = sub( sub( L, add( i, 1 ) ), 1 ); + idx4 = sub( sub( shr( L, 1 ), shr( i, 1 ) ), 1 ); + wtda_audio[add( add( i, shr( L, 1 ) ), 1 )] = L_sub_sat( Mpy_32_16_1( allsig_l[idx1], win_int_left[idx2] ), Mpy_32_16_1( allsig_l[idx3], win_int_left[idx4] ) ); + move32(); } } - else + ELSE { - for ( i = 0; i < L / 2 - n; i++ ) + FOR( i = 0; i < L / 2 - n; i++ ) { - idx1 = L / 2 - i - 1; - idx2 = 3 * L / 2 * decimate - ( i + 1 ) * decimate + decay - windecay48; - idx3 = L / 2 + i; - idx4 = 3 * L / 2 * decimate - 1 + ( i + 1 ) * decimate - decay - windecay48; + idx1 = sub( sub( shr( L, 1 ), i ), 1 ); + idx2 = sub( add( sub( i_mult( i_mult( 3, shr( L, 1 ) ), decimate ), i_mult( add( i, 1 ), decimate ) ), decay ), windecay48 ); + idx3 = add( shr( L, 1 ), i ); + idx4 = sub( sub( add( sub( i_mult( i_mult( 3, shr( L, 1 ) ), decimate ), 1 ), i_mult( add( i, 1 ), decimate ) ), decay ), windecay48 ); wtda_audio[i] = L_sub_sat( Mpy_32_16_1( -allsig_r[idx1], win_right[idx2] ), Mpy_32_16_1( allsig_r[idx3], win_right[idx4] ) ); + move32(); } - for ( i = L / 2 - n; i < L / 2; i++ ) + FOR( i = L / 2 - n; i < L / 2; i++ ) { - wtda_audio[i] = -allsig_r[L / 2 - i - 1]; + wtda_audio[i] = L_negate( allsig_r[sub( sub( shr( L, 1 ), i ), 1 )] ); + move32(); } - for ( i = 0; i < n; i++ ) + FOR( i = 0; i < n; i++ ) { - wtda_audio[i + L / 2] = L_sub_sat( Mpy_32_16_1( allsig_l[i], win_left[i * decimate + decay] ), new_audio[n - i - 1] ); + wtda_audio[add( i, shr( L, 1 ) )] = L_sub_sat( Mpy_32_16_1( allsig_l[i], win_left[add( i_mult( i, decimate ), decay )] ), new_audio[sub( sub( n, i ), 1 )] ); + move32(); } - for ( i = n; i < L / 2; i++ ) + FOR( i = n; i < L / 2; i++ ) { idx1 = i; - idx2 = i * decimate + decay; - idx3 = L - i - 1; - idx4 = L * decimate - i * decimate - 1 - decay; - wtda_audio[i + L / 2] = L_sub_sat( Mpy_32_16_1( allsig_l[idx1], win_left[idx2] ), Mpy_32_16_1( allsig_l[idx3], win_left[idx4] ) ); + move16(); + idx2 = add( i_mult( i, decimate ), decay ); + idx3 = sub( sub( L, i ), 1 ); + idx4 = sub( sub( sub( i_mult( L, decimate ), i_mult( i, decimate ) ), 1 ), decay ); + wtda_audio[add( i, shr( L, 1 ) )] = L_sub_sat( Mpy_32_16_1( allsig_l[idx1], win_left[idx2] ), Mpy_32_16_1( allsig_l[idx3], win_left[idx4] ) ); + move32(); } } - if ( old_wtda != NULL ) + IF( old_wtda != NULL ) { Copy32( new_audio, old_wtda, L ); } diff --git a/lib_com/wtda_fx.c b/lib_com/wtda_fx.c index dcc9b8229..07699e408 100644 --- a/lib_com/wtda_fx.c +++ b/lib_com/wtda_fx.c @@ -284,7 +284,7 @@ void wtda_fx( } - IF( L == L_FRAME32k ) + IF( EQ_16( L, L_FRAME32k ) ) { /* decimate = 3 */ @@ -403,8 +403,9 @@ void wtda_fx( } *Q = add( *Q, 15 ); /* output Q */ + move16(); - if ( old_wtda != NULL ) + IF( old_wtda != NULL ) { Copy( new_audio, old_wtda, L ); diff --git a/lib_dec/fd_cng_dec.c b/lib_dec/fd_cng_dec.c index 3eca9decd..d1fb87d05 100644 --- a/lib_dec/fd_cng_dec.c +++ b/lib_dec/fd_cng_dec.c @@ -552,12 +552,7 @@ void configureFdCngDec_ivas_fx( hsCom->startBand = 2; move16(); hsCom->stopBand = add( hsCom->FdCngSetup.sidPartitions[sub( hsCom->FdCngSetup.numPartitions, 1 )], 1 ); -#ifndef IVAS_FLOAT_FIXED - /* remove floating point dependency */ - initPartitions_flt( hsCom->FdCngSetup.sidPartitions, hsCom->FdCngSetup.numPartitions, hsCom->startBand, hsCom->stopBand, hsCom->part, &hsCom->npart, hsCom->midband, hsCom->psize_flt, hsCom->psize_inv_flt, 0 ); -#else // IVAS_FLOAT_FIXED initPartitions( hsCom->FdCngSetup.sidPartitions, hsCom->FdCngSetup.numPartitions, hsCom->startBand, hsCom->stopBand, hsCom->part, &hsCom->npart, hsCom->midband, hsCom->psize, hsCom->psize_norm, &hsCom->psize_norm_exp, hsCom->psize_inv, 0 ); -#endif // IVAS_FLOAT_FIXED IF( EQ_16( hsCom->stopFFTbin, 160 ) ) { @@ -578,9 +573,6 @@ void configureFdCngDec_ivas_fx( FOR( j = 0; j < hsCom->nCLDFBpart; j++ ) { hsCom->CLDFBpart[j] = sub( hsCom->part[add( j, hsCom->nFFTpart )], sub( hsCom->stopFFTbin, hsCom->startBand ) ); -#ifndef IVAS_FLOAT_FIXED - hsCom->CLDFBpsize_inv_flt[j] = hsCom->psize_inv_flt[j + hsCom->nFFTpart]; // TODO remove floating point dependency -#endif move16(); hsCom->CLDFBpsize_inv[j] = hsCom->psize_inv[add( j, hsCom->nFFTpart )]; move16(); @@ -594,12 +586,7 @@ void configureFdCngDec_ivas_fx( move16(); } -#ifdef IVAS_FLOAT_FIXED initPartitions( hsCom->FdCngSetup.shapingPartitions, hsCom->FdCngSetup.numShapingPartitions, hsCom->startBand, hsCom->stopFFTbin, hFdCngDec->part_shaping, &hFdCngDec->npart_shaping, hFdCngDec->midband_shaping, hFdCngDec->psize_shaping, hFdCngDec->psize_shaping_norm, &hFdCngDec->psize_shaping_norm_exp, hFdCngDec->psize_inv_shaping, stopBandFR ); -#else // IVAS_FLOAT_FIXED - /* remove floating point dependency */ - initPartitions_flt( hsCom->FdCngSetup.shapingPartitions, hsCom->FdCngSetup.numShapingPartitions, hsCom->startBand, hsCom->stopFFTbin, hFdCngDec->part_shaping, &hFdCngDec->npart_shaping, hFdCngDec->midband_shaping, hFdCngDec->psize_shaping_float, hFdCngDec->psize_inv_shaping_float, stopBandFR ); -#endif // IVAS_FLOAT_FIXED hFdCngDec->nFFTpart_shaping = hFdCngDec->npart_shaping; move16(); @@ -1338,7 +1325,7 @@ void FdCng_decodeSID_ivas_fx( Decoder_State *st /* i/o: decoder state structure */ ) { - int16_t N; + Word16 N; Word32 *sidNoiseEst; Word32 gain; Word16 i, index; @@ -1370,6 +1357,7 @@ void FdCng_decodeSID_ivas_fx( sidNoiseEst = hFdCngCom->sidNoiseEst; N = hFdCngCom->npart; + move16(); gain = 0; move32(); hFdCngCom->sid_frame_counter = add( hFdCngCom->sid_frame_counter, 1 ); @@ -1993,6 +1981,7 @@ void generate_masking_noise_ivas_fx( Word32 *ptr_r_fx; Word32 *ptr_i_fx; Word16 startBand = hFdCngCom->startBand; + move16(); Word16 *seed = &( hFdCngCom->seed ); Word32 scale_fx = 0x40000000; // 1.0 in Q30 move32(); @@ -2004,6 +1993,7 @@ void generate_masking_noise_ivas_fx( } scale_sig32( hFdCngCom->cngNoiseLevel, FFTCLDFBLEN, shift ); hFdCngCom->cngNoiseLevelExp = sub( hFdCngCom->cngNoiseLevelExp, shift ); + move16(); /* skip noise generating if level is very low, to avoid problems with possibly running into denormals */ *exp_out = Q15; @@ -2054,7 +2044,7 @@ void generate_masking_noise_ivas_fx( /* Generate Gaussian random noise in real and imaginary parts of the FFT bins Amplitudes are adjusted to the estimated noise level cngNoiseLevel_flt in each bin */ - IF( EQ_16( startBand, 0 ) ) + IF( startBand == 0 ) { rand_gauss_fx( &fftBuffer_fx[0], seed, *exp_out ); // Q15 ptr_r_fx = fftBuffer_fx + 2; @@ -2111,6 +2101,7 @@ void generate_masking_noise_ivas_fx( SynthesisSTFT_fx( fftBuffer_fx, *exp_out, maskingNoise_fx, hFdCngCom->olapBufferSynth2_fx, hFdCngCom->olapWinSyn_fx, 0, hFdCngCom, element_mode, nchan_out ); } *exp_out = sub( *exp_out, Q9 ); + move16(); /* Add some comfort noise on top of decoded signal */ IF( return_noise ) @@ -2390,10 +2381,10 @@ void generate_stereo_masking_noise_fx( Word16 Q_syn, Decoder_State *st, /* i/o: decoder state structure */ STEREO_TD_DEC_DATA_HANDLE hStereoTD, /* i : TD stereo structure */ - const int16_t flag_sec_CNA, /* i : CNA flag for secondary channel */ - const int16_t fadeOut, /* i : only fade out of previous state */ + const Word16 flag_sec_CNA, /* i : CNA flag for secondary channel */ + const Word16 fadeOut, /* i : only fade out of previous state */ STEREO_CNG_DEC_HANDLE hStereoCng, /* i : Stereo CNG handle */ - const int16_t nchan_out /* i : number of output channels */ + const Word16 nchan_out /* i : number of output channels */ ) { HANDLE_FD_CNG_COM hFdCngCom; @@ -2403,7 +2394,7 @@ void generate_stereo_masking_noise_fx( Word32 N1_fx[L_FRAME16k]; Word32 N2_fx[L_FRAME16k]; Word16 N1_fx_exp, N2_fx_exp; - int16_t i; + Word16 i; IF( st->idchan == 0 ) { @@ -2556,6 +2547,7 @@ void generate_stereo_masking_noise_fx( move16(); } hStereoTD->prevSP_ratio_fx = extract_h( hStereoTD->SP_ratio_LT_fx ); + move16(); } return; @@ -2708,6 +2700,7 @@ void generate_masking_noise_lb_dirac_fx( n_samples_out = i_mult( shr( hFdCngCom->frameSize, 4 ), nCldfbTs ); n_samples_start = 0; Word16 exp_out = Q15; + move16(); /*LB CLDFB - CNA from STFT*/ IF( cna_flag ) { @@ -2799,7 +2792,7 @@ void generate_masking_noise_lb_dirac_fx( /* Perform STFT synthesis */ SynthesisSTFT_dirac_fx( fftBuffer, tdBuffer + n_samples_start, hFdCngCom->olapBufferSynth2_fx, hFdCngCom->olapWinSyn_fx, n_samples_out_loop, hFdCngCom ); } - hFdCngCom->fftBuffer_exp = sub( 31, 15 ); + hFdCngCom->fftBuffer_exp = 31 - 15; n_samples_out = sub( n_samples_out, hFdCngCom->frameSize ); n_samples_start = add( n_samples_start, hFdCngCom->frameSize ); } @@ -3417,7 +3410,7 @@ void FdCngDecodeDiracMDCTStereoSID_fx( exp_diff = sub( sts[0]->hFdCngDec->hFdCngCom->sidNoiseEstExp, sts[1]->hFdCngDec->hFdCngCom->sidNoiseEstExp ); FOR( p = 0; p < N; p++ ) { - IF( GT_16( exp_diff, 0 ) ) + IF( exp_diff > 0 ) { sts[0]->hFdCngDec->hFdCngCom->sidNoiseEst[p] = L_add( L_shr( sts[0]->hFdCngDec->hFdCngCom->sidNoiseEst[p], 1 ), L_shr( sts[1]->hFdCngDec->hFdCngCom->sidNoiseEst[p], add( exp_diff, 1 ) ) ); move32(); @@ -3428,7 +3421,7 @@ void FdCngDecodeDiracMDCTStereoSID_fx( move32(); } } - IF( LT_16( exp_diff, 0 ) ) + IF( exp_diff < 0 ) { sts[0]->hFdCngDec->hFdCngCom->sidNoiseEstExp = add( sts[0]->hFdCngDec->hFdCngCom->sidNoiseEstExp, negate( exp_diff ) ); } diff --git a/lib_dec/ivas_init_dec.c b/lib_dec/ivas_init_dec.c index bb9894dfd..5840e553d 100644 --- a/lib_dec/ivas_init_dec.c +++ b/lib_dec/ivas_init_dec.c @@ -4862,7 +4862,7 @@ void ivas_destroy_dec_fx( } /* SPAR handle */ - ivas_spar_dec_close( &( st_ivas->hSpar ), st_ivas->hDecoderConfig->output_Fs, 0 ); + ivas_spar_dec_close_fx( &( st_ivas->hSpar ), st_ivas->hDecoderConfig->output_Fs, 0 ); /* HOA decoder matrix */ IF( st_ivas->hoa_dec_mtx != NULL ) diff --git a/lib_dec/ivas_sba_dec.c b/lib_dec/ivas_sba_dec.c index 8933057bd..336155665 100644 --- a/lib_dec/ivas_sba_dec.c +++ b/lib_dec/ivas_sba_dec.c @@ -926,7 +926,7 @@ ivas_error ivas_sba_dec_reconfigure_fx( test(); IF( NE_16( nchan_transport_old, ivas_get_sba_num_TCs( ivas_total_brate, sba_order_internal ) ) || ( GE_32( last_ivas_total_brate, IVAS_512k ) && LT_32( ivas_total_brate, IVAS_512k ) ) || ( LT_32( last_ivas_total_brate, IVAS_512k ) && GE_32( ivas_total_brate, IVAS_512k ) ) ) { - ivas_spar_dec_close( &( st_ivas->hSpar ), hDecoderConfig->output_Fs, 1 ); + ivas_spar_dec_close_fx( &( st_ivas->hSpar ), hDecoderConfig->output_Fs, 1 ); if ( NE_32( ( error = ivas_spar_dec_open_fx( st_ivas, 1 ) ), IVAS_ERR_OK ) ) { diff --git a/lib_dec/ivas_spar_decoder.c b/lib_dec/ivas_spar_decoder.c index f18e474e4..e3d43dd5a 100644 --- a/lib_dec/ivas_spar_decoder.c +++ b/lib_dec/ivas_spar_decoder.c @@ -280,137 +280,137 @@ ivas_error ivas_spar_dec_open( #else ivas_error ivas_spar_dec_open_fx( - Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ - const int16_t spar_reconfig_flag /* i : SPAR reconfiguration flag */ + Decoder_Struct *st_ivas, /* i/o: IVAS decoder handle */ + const Word16 spar_reconfig_flag /* i : SPAR reconfiguration flag */ ) { SPAR_DEC_HANDLE hSpar; ivas_error error; - int16_t sba_order_internal, num_channels_internal; + Word16 sba_order_internal, num_channels_internal; IVAS_FB_CFG *fb_cfg; - int16_t i, j, b, active_w_mixing; - int32_t output_Fs; - int16_t num_decor_chs, map_idx; + Word16 i, j, b, active_w_mixing; + Word32 output_Fs; + Word16 num_decor_chs, map_idx; error = IVAS_ERR_OK; + move32(); sba_order_internal = min( st_ivas->sba_analysis_order, IVAS_MAX_SBA_ORDER ); + move16(); num_channels_internal = ivas_sba_get_nchan_metadata_fx( sba_order_internal, st_ivas->hDecoderConfig->ivas_total_brate ); hSpar = st_ivas->hSpar; - if ( !spar_reconfig_flag ) + IF( !spar_reconfig_flag ) { /* SPAR decoder handle */ - if ( ( hSpar = (SPAR_DEC_HANDLE) malloc( sizeof( SPAR_DEC_DATA ) ) ) == NULL ) + IF( ( hSpar = (SPAR_DEC_HANDLE) malloc( sizeof( SPAR_DEC_DATA ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR decoder" ); } } output_Fs = st_ivas->hDecoderConfig->output_Fs; - if ( num_channels_internal > ( SBA_HOA2_ORDER + 1 ) * ( SBA_HOA2_ORDER + 1 ) ) + move32(); + IF( GT_16( num_channels_internal, ( SBA_HOA2_ORDER + 1 ) * ( SBA_HOA2_ORDER + 1 ) ) ) { num_decor_chs = IVAS_HBR_MAX_DECOR_CHS; + move16(); } - else + ELSE { - num_decor_chs = num_channels_internal - 1; + num_decor_chs = sub( num_channels_internal, 1 ); } /* TD decorr. */ - if ( ( st_ivas->ivas_format == SBA_FORMAT ) && ( ( st_ivas->hDecoderConfig->output_config == IVAS_AUDIO_CONFIG_MONO || st_ivas->hDecoderConfig->output_config == IVAS_AUDIO_CONFIG_STEREO ) || ( st_ivas->hDecoderConfig->ivas_total_brate >= IVAS_256k && st_ivas->hDecoderConfig->output_config == IVAS_AUDIO_CONFIG_FOA ) ) ) + test(); + test(); + test(); + test(); + IF( EQ_32( st_ivas->ivas_format, SBA_FORMAT ) && ( ( EQ_32( st_ivas->hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_MONO ) || EQ_32( st_ivas->hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_STEREO ) ) || ( GE_32( st_ivas->hDecoderConfig->ivas_total_brate, IVAS_256k ) && EQ_32( st_ivas->hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_FOA ) ) ) ) { hSpar->hTdDecorr = NULL; } - else + ELSE { -#ifdef IVAS_FLOAT_FIXED - if ( ( error = ivas_td_decorr_dec_open_fx( &hSpar->hTdDecorr, output_Fs, num_decor_chs + 1, 1 ) ) != IVAS_ERR_OK ) -#else - if ( ( error = ivas_td_decorr_dec_open( &hSpar->hTdDecorr, output_Fs, num_decor_chs + 1, 1 ) ) != IVAS_ERR_OK ) -#endif + IF( NE_32( ( error = ivas_td_decorr_dec_open_fx( &hSpar->hTdDecorr, output_Fs, add( num_decor_chs, 1 ), 1 ) ), IVAS_ERR_OK ) ) { return error; } } /* MD handle */ - if ( ( error = ivas_spar_md_dec_open( &hSpar->hMdDec, st_ivas->hDecoderConfig, num_channels_internal, sba_order_internal, st_ivas->sid_format, st_ivas->last_active_ivas_total_brate ) ) != IVAS_ERR_OK ) + IF( NE_32( ( error = ivas_spar_md_dec_open( &hSpar->hMdDec, st_ivas->hDecoderConfig, num_channels_internal, sba_order_internal, st_ivas->sid_format, st_ivas->last_active_ivas_total_brate ) ), IVAS_ERR_OK ) ) { return error; } hSpar->hMdDec->td_decorr_flag = 1; + move16(); if ( hSpar->hTdDecorr ) { hSpar->hTdDecorr->ducking_flag = ivas_spar_br_table_consts[hSpar->hMdDec->table_idx].td_ducking; + move16(); } /* set FB config. */ active_w_mixing = -1; - if ( ( error = ivas_fb_set_cfg( &fb_cfg, SBA_FORMAT, num_channels_internal, num_channels_internal, active_w_mixing, output_Fs, 0 ) ) != IVAS_ERR_OK ) + move16(); + IF( NE_32( ( error = ivas_fb_set_cfg( &fb_cfg, SBA_FORMAT, num_channels_internal, num_channels_internal, active_w_mixing, output_Fs, 0 ) ), IVAS_ERR_OK ) ) { return error; } fb_cfg->pcm_offset = NS2SA( output_Fs, DELAY_FB_1_NS + IVAS_ENC_DELAY_NS + IVAS_DEC_DELAY_NS ); + move16(); fb_cfg->remix_order = remix_order_set[hSpar->hMdDec->spar_md_cfg.remix_unmix_order]; + move16(); /* FB mixer handle */ -#ifdef IVAS_FLOAT_FIXED - IF( ( error = ivas_FB_mixer_open_fx( &hSpar->hFbMixer, output_Fs, fb_cfg, spar_reconfig_flag ) ) != IVAS_ERR_OK ) - { - return error; - } -#else - if ( ( error = ivas_FB_mixer_open( &hSpar->hFbMixer, output_Fs, fb_cfg, spar_reconfig_flag ) ) != IVAS_ERR_OK ) + IF( NE_32( ( error = ivas_FB_mixer_open_fx( &hSpar->hFbMixer, output_Fs, fb_cfg, spar_reconfig_flag ) ), IVAS_ERR_OK ) ) { return error; } -#endif /* AGC handle */ -#ifdef IVAS_FLOAT_FIXED - if ( ( error = ivas_spar_agc_dec_open_fx( &hSpar->hAgcDec, output_Fs ) ) != IVAS_ERR_OK ) -#else - if ( ( error = ivas_spar_agc_dec_open( &hSpar->hAgcDec, output_Fs ) ) != IVAS_ERR_OK ) -#endif // IVAS_FLOAT_FIXED + IF( NE_32( ( error = ivas_spar_agc_dec_open_fx( &hSpar->hAgcDec, output_Fs ) ), IVAS_ERR_OK ) ) { return error; } /* PCA handle */ hSpar->hPCA = NULL; - if ( st_ivas->hDecoderConfig->ivas_total_brate == PCA_BRATE && sba_order_internal == 1 ) + test(); + IF( EQ_32( st_ivas->hDecoderConfig->ivas_total_brate, PCA_BRATE ) && EQ_16( sba_order_internal, 1 ) ) { - if ( ( hSpar->hPCA = (PCA_DEC_STATE *) malloc( sizeof( PCA_DEC_STATE ) ) ) == NULL ) + IF( ( hSpar->hPCA = (PCA_DEC_STATE *) malloc( sizeof( PCA_DEC_STATE ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for PCA decoder" ); } - // ivas_pca_dec_init( hSpar->hPCA ); -#ifdef IVAS_FLOAT_FIXED ivas_pca_dec_init_fx( hSpar->hPCA ); -#endif } /* mixer_mat intitialization */ - for ( i = 0; i < num_channels_internal; i++ ) + FOR( i = 0; i < num_channels_internal; i++ ) { - for ( j = 0; j < num_channels_internal; j++ ) + FOR( j = 0; j < num_channels_internal; j++ ) { - for ( b = 0; b < IVAS_MAX_NUM_BANDS; b++ ) + FOR( b = 0; b < IVAS_MAX_NUM_BANDS; b++ ) { hSpar->hMdDec->mixer_mat_fx[i][j][b] = 0; - for ( int16_t i_ts = 0; i_ts < ( MAX_PARAM_SPATIAL_SUBFRAMES + 1 ); i_ts++ ) + move32(); + FOR( Word16 i_ts = 0; i_ts < ( MAX_PARAM_SPATIAL_SUBFRAMES + 1 ); i_ts++ ) { hSpar->hMdDec->mixer_mat_prev_fx[i_ts][i][j][b] = 0; + move32(); } } } } hSpar->i_subframe = 0; + move16(); hSpar->AGC_flag = 0; + move16(); /*-----------------------------------------------------------------* * Configuration - set SPAR high-level parameters @@ -418,84 +418,108 @@ ivas_error ivas_spar_dec_open_fx( ivas_spar_config( st_ivas->hDecoderConfig->ivas_total_brate, sba_order_internal, &st_ivas->nchan_transport, &st_ivas->nSCE, &st_ivas->nCPE, &hSpar->core_nominal_brate, st_ivas->sid_format ); - switch ( sba_order_internal ) + SWITCH( sba_order_internal ) { case 1: st_ivas->transport_config = IVAS_AUDIO_CONFIG_FOA; - break; + move32(); + BREAK; case 2: st_ivas->transport_config = IVAS_AUDIO_CONFIG_HOA2; - break; + move32(); + BREAK; case 3: st_ivas->transport_config = IVAS_AUDIO_CONFIG_HOA3; - break; + move32(); + BREAK; } ivas_output_init( &( st_ivas->hTransSetup ), st_ivas->transport_config ); - set_s( hSpar->subframe_nbslots, 0, MAX_JBM_SUBFRAMES_5MS ); - set_s( hSpar->subframe_nbslots, JBM_CLDFB_SLOTS_IN_SUBFRAME, DEFAULT_JBM_SUBFRAMES_5MS ); + set16_fx( hSpar->subframe_nbslots, 0, MAX_JBM_SUBFRAMES_5MS ); + set16_fx( hSpar->subframe_nbslots, JBM_CLDFB_SLOTS_IN_SUBFRAME, DEFAULT_JBM_SUBFRAMES_5MS ); hSpar->nb_subframes = DEFAULT_JBM_SUBFRAMES_5MS; + move16(); hSpar->subframes_rendered = 0; + move16(); hSpar->slots_rendered = 0; + move16(); hSpar->num_slots = DEFAULT_JBM_SUBFRAMES_5MS * JBM_CLDFB_SLOTS_IN_SUBFRAME; + move16(); /* init render timeslot mapping */ - set_s( hSpar->render_to_md_map, 0, MAX_JBM_SUBFRAMES_5MS * JBM_CLDFB_SLOTS_IN_SUBFRAME ); - for ( map_idx = 0; map_idx < DEFAULT_JBM_CLDFB_TIMESLOTS; map_idx++ ) + set16_fx( hSpar->render_to_md_map, 0, MAX_JBM_SUBFRAMES_5MS * JBM_CLDFB_SLOTS_IN_SUBFRAME ); + FOR( map_idx = 0; map_idx < DEFAULT_JBM_CLDFB_TIMESLOTS; map_idx++ ) { hSpar->render_to_md_map[map_idx] = map_idx; + move16(); } /* allocate transport channels*/ - if ( st_ivas->hTcBuffer == NULL ) + IF( st_ivas->hTcBuffer == NULL ) { - int16_t nchan_to_allocate; - int16_t nchan_tc; + Word16 nchan_to_allocate; + Word16 nchan_tc; TC_BUFFER_MODE buffer_mode; - int16_t granularity; + Word16 granularity; buffer_mode = TC_BUFFER_MODE_RENDERER; + move32(); nchan_tc = ivas_jbm_dec_get_num_tc_channels( st_ivas ); nchan_to_allocate = num_channels_internal; + move16(); - if ( st_ivas->ivas_format == SBA_ISM_FORMAT && st_ivas->ism_mode == ISM_SBA_MODE_DISC ) + test(); + if ( EQ_32( st_ivas->ivas_format, SBA_ISM_FORMAT ) && EQ_32( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) ) { - nchan_to_allocate += st_ivas->nchan_ism; + nchan_to_allocate = add( nchan_to_allocate, st_ivas->nchan_ism ); } granularity = NS2SA( st_ivas->hDecoderConfig->output_Fs, CLDFB_SLOT_NS ); + move16(); - if ( ( st_ivas->hDecoderConfig->output_config == IVAS_AUDIO_CONFIG_STEREO || st_ivas->hDecoderConfig->output_config == IVAS_AUDIO_CONFIG_MONO ) ) + test(); + test(); + test(); + IF( ( EQ_32( st_ivas->hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_STEREO ) || EQ_32( st_ivas->hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_MONO ) ) ) { - if ( ( st_ivas->ivas_format == SBA_ISM_FORMAT && st_ivas->ism_mode == ISM_SBA_MODE_DISC && st_ivas->hDecoderConfig->output_config == IVAS_AUDIO_CONFIG_STEREO ) ) + test(); + test(); + IF( ( EQ_32( st_ivas->ivas_format, SBA_ISM_FORMAT ) && EQ_32( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) && EQ_32( st_ivas->hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_STEREO ) ) ) { - nchan_tc = st_ivas->hDecoderConfig->nchan_out + st_ivas->nchan_ism; + nchan_tc = add( st_ivas->hDecoderConfig->nchan_out, st_ivas->nchan_ism ); nchan_to_allocate = nchan_tc; + move16(); } - else + ELSE { buffer_mode = TC_BUFFER_MODE_BUFFER; + move32(); nchan_tc = st_ivas->hDecoderConfig->nchan_out; + move16(); nchan_to_allocate = nchan_tc; + move16(); } } - else if ( st_ivas->renderer_type == RENDERER_BINAURAL_PARAMETRIC || st_ivas->renderer_type == RENDERER_BINAURAL_PARAMETRIC_ROOM || st_ivas->renderer_type == RENDERER_STEREO_PARAMETRIC ) + ELSE IF( EQ_32( st_ivas->renderer_type, RENDERER_BINAURAL_PARAMETRIC ) || EQ_32( st_ivas->renderer_type, RENDERER_BINAURAL_PARAMETRIC_ROOM ) || EQ_32( st_ivas->renderer_type, RENDERER_STEREO_PARAMETRIC ) ) { nchan_to_allocate = 2 * BINAURAL_CHANNELS; + move16(); } - if ( st_ivas->ivas_format == SBA_ISM_FORMAT && ( st_ivas->hDecoderConfig->output_config == IVAS_AUDIO_CONFIG_BINAURAL || st_ivas->hDecoderConfig->output_config == IVAS_AUDIO_CONFIG_BINAURAL_ROOM_REVERB ) && st_ivas->ism_mode == ISM_SBA_MODE_DISC ) + test(); + test(); + test(); + IF( EQ_32( st_ivas->ivas_format, SBA_ISM_FORMAT ) && ( EQ_32( st_ivas->hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_BINAURAL ) || EQ_32( st_ivas->hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_BINAURAL_ROOM_REVERB ) ) && EQ_32( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) ) { /* get correct granularity in case of binaural rendering of the discrete objects with the td obj renderer */ - granularity = (int16_t) ( st_ivas->hDecoderConfig->output_Fs / ( FRAMES_PER_SEC * MAX_PARAM_SPATIAL_SUBFRAMES ) ); + Word32 quo, rem; + iDiv_and_mod_32( st_ivas->hDecoderConfig->output_Fs, FRAMES_PER_SEC * MAX_PARAM_SPATIAL_SUBFRAMES, &quo, &rem, 0 ); + granularity = extract_l( quo ); + move16(); } -#ifdef IVAS_FLOAT_FIXED - if ( ( error = ivas_jbm_dec_tc_buffer_open_fx( st_ivas, buffer_mode, nchan_tc, nchan_to_allocate, nchan_to_allocate, granularity ) ) != IVAS_ERR_OK ) -#else - if ( ( error = ivas_jbm_dec_tc_buffer_open( st_ivas, buffer_mode, nchan_tc, nchan_to_allocate, nchan_to_allocate, granularity ) ) != IVAS_ERR_OK ) -#endif + IF( NE_32( ( error = ivas_jbm_dec_tc_buffer_open_fx( st_ivas, buffer_mode, nchan_tc, nchan_to_allocate, nchan_to_allocate, granularity ) ), IVAS_ERR_OK ) ) { return error; } @@ -513,6 +537,7 @@ ivas_error ivas_spar_dec_open_fx( * Deallocate SPAR handle *------------------------------------------------------------------------*/ +#ifndef IVAS_FLOAT_FIXED void ivas_spar_dec_close( SPAR_DEC_HANDLE *hSpar, /* i/o: SPAR decoder handle */ const int32_t output_Fs, /* i : output sampling rate */ @@ -531,18 +556,11 @@ void ivas_spar_dec_close( ivas_td_decorr_dec_close( &( *hSpar )->hTdDecorr ); /* FB mixer handle */ -#ifdef IVAS_FLOAT_FIXED - ivas_FB_mixer_close_fx( &( *hSpar )->hFbMixer, output_Fs, spar_reconfig_flag ); -#else + ivas_FB_mixer_close( &( *hSpar )->hFbMixer, output_Fs, spar_reconfig_flag ); -#endif /* AGC */ -#ifdef IVAS_FLOAT_FIXED - ivas_spar_agc_dec_close_fx( &( *hSpar )->hAgcDec ); -#else ivas_spar_agc_dec_close( &( *hSpar )->hAgcDec ); -#endif // IVAS_FLOAT_FIXED /* PCA */ if ( ( *hSpar )->hPCA != NULL ) @@ -559,6 +577,47 @@ void ivas_spar_dec_close( return; } +#else +void ivas_spar_dec_close_fx( + SPAR_DEC_HANDLE *hSpar, /* i/o: SPAR decoder handle */ + const Word32 output_Fs, /* i : output sampling rate */ + const Word16 spar_reconfig_flag /* i : SPAR reconfiguration flag */ +) +{ + test(); + IF( hSpar == NULL || *hSpar == NULL ) + { + return; + } + + /* MD handle */ + ivas_spar_md_dec_close( &( *hSpar )->hMdDec ); + + /* TD decorrelator handle */ + ivas_td_decorr_dec_close( &( *hSpar )->hTdDecorr ); + + /* FB mixer handle */ + ivas_FB_mixer_close_fx( &( *hSpar )->hFbMixer, output_Fs, spar_reconfig_flag ); + + /* AGC */ + ivas_spar_agc_dec_close_fx( &( *hSpar )->hAgcDec ); + + /* PCA */ + IF( ( *hSpar )->hPCA != NULL ) + { + free( ( *hSpar )->hPCA ); + ( *hSpar )->hPCA = NULL; + } + + IF( !spar_reconfig_flag ) + { + free( ( *hSpar ) ); + ( *hSpar ) = NULL; + } + + return; +} +#endif /*-------------------------------------------------------------------* @@ -579,20 +638,31 @@ ivas_error ivas_spar_dec_fx( Word16 next_bit_pos_orig, last_bit_pos; UWord16 bstr_meta[MAX_BITS_METADATA], *bit_stream_orig; ivas_error error; + Word32 quo, rem; push_wmops( "ivas_spar_decode" ); error = IVAS_ERR_OK; + move32(); hDecoderConfig = st_ivas->hDecoderConfig; - st0 = GT_16( st_ivas->nSCE, 0 ) ? st_ivas->hSCE[0]->hCoreCoder[0] : st_ivas->hCPE[0]->hCoreCoder[0]; + st0 = NULL; + IF( st_ivas->nSCE > 0 ) + { + st0 = st_ivas->hSCE[0]->hCoreCoder[0]; + } + ELSE + { + st0 = st_ivas->hCPE[0]->hCoreCoder[0]; + } + bit_stream_orig = st0->bit_stream; - move16(); next_bit_pos_orig = st0->next_bit_pos; move16(); IF( EQ_32( st_ivas->ivas_format, SBA_ISM_FORMAT ) ) { - last_bit_pos = sub( extract_l( L_sub( ( hDecoderConfig->ivas_total_brate / FRAMES_PER_SEC ), 1 ) ), nb_bits_read[1] ); + iDiv_and_mod_32( hDecoderConfig->ivas_total_brate, FRAMES_PER_SEC, &quo, &rem, 0 ); + last_bit_pos = sub( extract_l( L_sub( quo, 1 ) ), nb_bits_read[1] ); } ELSE { @@ -610,15 +680,17 @@ ivas_error ivas_spar_dec_fx( IF( EQ_32( st_ivas->ivas_format, SBA_ISM_FORMAT ) ) { - last_bit_pos = sub( extract_l( L_sub( ( hDecoderConfig->ivas_total_brate / FRAMES_PER_SEC ), 1 ) ), nb_bits_read[1] ); + iDiv_and_mod_32( hDecoderConfig->ivas_total_brate, FRAMES_PER_SEC, &quo, &rem, 0 ); + last_bit_pos = sub( extract_l( L_sub( quo, 1 ) ), nb_bits_read[1] ); } ELSE { - last_bit_pos = extract_l( L_sub( ( hDecoderConfig->ivas_total_brate / FRAMES_PER_SEC ), 1 ) ); + iDiv_and_mod_32( hDecoderConfig->ivas_total_brate, FRAMES_PER_SEC, &quo, &rem, 0 ); + last_bit_pos = extract_l( L_sub( quo, 1 ) ); } test(); - IF( !st0->bfi && EQ_32( hDecoderConfig->ivas_total_brate, IVAS_SID_5k2 ) ) + if ( !st0->bfi && EQ_32( hDecoderConfig->ivas_total_brate, IVAS_SID_5k2 ) ) { last_bit_pos = sub( last_bit_pos, SID_FORMAT_NBITS ); } @@ -629,16 +701,16 @@ ivas_error ivas_spar_dec_fx( /* reverse the bitstream for easier reading of indices */ FOR( i = 0; i < s_min( MAX_BITS_METADATA, last_bit_pos ); i++ ) { - bstr_meta[i] = st_ivas->bit_stream[last_bit_pos - i]; + bstr_meta[i] = st_ivas->bit_stream[sub( last_bit_pos, i )]; move16(); } st0->bit_stream = bstr_meta; - move16(); st0->next_bit_pos = 0; move16(); st0->bits_frame = s_min( MAX_BITS_METADATA, add( last_bit_pos, 1 ) ); + move16(); - IF( !st0->bfi ) + if ( !st0->bfi ) { st0->total_brate = hDecoderConfig->ivas_total_brate; /* to avoid BER detect */ move32(); @@ -648,22 +720,27 @@ ivas_error ivas_spar_dec_fx( * Decode SPAR metadata *---------------------------------------------------------------------*/ - IF( ( error = ivas_spar_dec_MD_fx( st_ivas, st0 ) ) != IVAS_ERR_OK ) + IF( NE_32( ( error = ivas_spar_dec_MD_fx( st_ivas, st0 ) ), IVAS_ERR_OK ) ) { return error; } - *nb_bits_read = st0->next_bit_pos + nb_bits_read_orig; + *nb_bits_read = add( st0->next_bit_pos, nb_bits_read_orig ); + move16(); st0->bit_stream = bit_stream_orig; st0->next_bit_pos = next_bit_pos_orig; + move16(); - IF( !st0->bfi && hDecoderConfig->ivas_total_brate == IVAS_SID_5k2 ) + test(); + IF( !st0->bfi && EQ_32( hDecoderConfig->ivas_total_brate, IVAS_SID_5k2 ) ) { Word16 zero_pad_bits; - *nb_bits_read += SID_FORMAT_NBITS; - zero_pad_bits = (Word16) ( IVAS_SID_5k2 - SID_2k40 ) / FRAMES_PER_SEC - *nb_bits_read; + *nb_bits_read = add( *nb_bits_read, SID_FORMAT_NBITS ); + move16(); + zero_pad_bits = sub( ( IVAS_SID_5k2 - SID_2k40 ) / FRAMES_PER_SEC, *nb_bits_read ); assert( zero_pad_bits <= 1 ); - *nb_bits_read += zero_pad_bits; + *nb_bits_read = add( *nb_bits_read, zero_pad_bits ); + move16(); } pop_wmops(); @@ -792,13 +869,15 @@ static Word16 ivas_get_spar_table_idx_from_coded_idx( test(); IF( EQ_32( ivas_spar_br_table_consts[i].ivas_total_brate, ivas_total_brate ) && EQ_16( ivas_spar_br_table_consts[i].sba_order, sba_order ) ) { - ind1[j++] = i; + ind1[j] = i; move16(); + j = add( j, 1 ); } } assert( j > 0 ); - *bitlen = ivas_get_bits_to_encode( j - 1 ); + *bitlen = ivas_get_bits_to_encode( sub( j, 1 ) ); + move16(); ind2 = get_next_indice_fx( st0, *bitlen ); @@ -856,6 +935,7 @@ static Word16 ivas_parse_spar_header( Word16 bitlen, bwidth; *table_idx = ivas_get_spar_table_idx_from_coded_idx( ivas_total_brate, sba_order, st0, &bitlen ); + move16(); bwidth = ivas_spar_br_table_consts[( *table_idx )].bwidth; move16(); @@ -938,6 +1018,7 @@ static void matrix_inverse_fx( out[0][0] = L_shl( tmp_32, shift ); move32(); *out_q = add( shift, sub( Q15, tmp_e ) ); + move16(); return; } @@ -960,6 +1041,7 @@ static void matrix_inverse_fx( move32(); *out_q = add( Q27, sub( q_fac, 31 ) ); + move16(); return; } @@ -992,6 +1074,7 @@ static void matrix_inverse_fx( move32(); *out_q = add( Q23, sub( q_fac, 31 ) ); + move16(); return; } @@ -1077,7 +1160,9 @@ void ivas_spar_get_cldfb_gains_fx( Word16 seed, split_band, slot_row, slot_col, slot, tmp_idx; pt_len = cldfbAnaDec0->p_filter_length; + move16(); num_cldfb_bands = cldfbAnaDec0->no_channels; + move16(); stride = NS2SA_fx2( output_Fs_fx, DELAY_CLDFB_NS ); encfb_delay = NS2SA_fx2( output_Fs_fx, IVAS_FB_ENC_DELAY_NS ); @@ -1087,28 +1172,37 @@ void ivas_spar_get_cldfb_gains_fx( cf_end = add( sub( hSpar->hFbMixer->cross_fade_end_offset, encfb_delay ), decfb_delay ); cf_len = sub( cf_end, cf_start ); weights_fx = hSpar->hFbMixer->cldfb_cross_fade_fx; - cf_cldfb_start = shr( (Word16) ceil_fixed( sub( divide1616( sub( cf_start, shr( decfb_delay, 1 ) ), shl( stride, 9 ) ), 32 ), 6 ), 6 ); + cf_cldfb_start = shr( extract_l( ceil_fixed( sub( divide1616( sub( cf_start, shr( decfb_delay, 1 ) ), shl( stride, 9 ) ), 32 ), 6 ) ), 6 ); cf_cldfb_end = shr( divide1616( add( sub( cf_start, shr( decfb_delay, 1 ) ), cf_len ), shl( stride, 9 ) ), 6 ); /*q-factor of stride is 9(as max value is 60)*/ num_cf_slots = add( sub( cf_cldfb_end, cf_cldfb_start ), 1 ); num_samples = add( imult1616( num_cf_slots, stride ), sub( pt_len, stride ) ); seed = RANDOM_INITSEED; + move16(); split_band = SPAR_DIRAC_SPLIT_START_BAND; + move16(); pp_ts_im_fx[0] = ts_im_fx; pp_ts_re_fx[0] = ts_re_fx; set32_fx( tgt_fx, 0, ( 3 - 1 ) * CLDFB_NO_CHANNELS_MAX + 10 * CLDFB_NO_CHANNELS_MAX ); cf_start_s_fx = divide3232( ( sub( cf_start, shr( decfb_delay, 1 ) ) ), output_Fs_fx ); cf_len_s_fx = divide3232( sub( hSpar->hFbMixer->cross_fade_end_offset, hSpar->hFbMixer->cross_fade_start_offset ), output_Fs_fx ); - Word16 Q_cf_start_s = norm_s( cf_start_s_fx ) - 1; + Word16 Q_cf_start_s = sub( norm_s( cf_start_s_fx ), 1 ); Word16 Q_cf_len_s = norm_s( cf_len_s_fx ); - Word16 Q_weights = 15 + Q_cf_start_s - Q_cf_len_s; + Word16 Q_weights = add( 15, sub( Q_cf_start_s, Q_cf_len_s ) ); FOR( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ ) { weights_fx[ts] = divide1616( shl( sub( divide3232( L_mult0( add( shl( ts, 1 ), 1 ), shr( stride, 1 ) ), output_Fs_fx ), cf_start_s_fx ), Q_cf_start_s ), shl( cf_len_s_fx, Q_cf_len_s ) ); + move16(); weights_fx[ts] = s_max( s_min( weights_fx[ts], shl( 1, Q_weights ) ), 0 ); + move16(); } hSpar->hFbMixer->cldfb_cross_fade_start = cf_cldfb_start; + move16(); hSpar->hFbMixer->cldfb_cross_fade_end = cf_cldfb_end; + move16(); + test(); + test(); + test(); IF( GT_16( num_cf_slots, 3 ) || GT_16( pt_len, 10 * CLDFB_NO_CHANNELS_MAX ) || GT_16( stride, CLDFB_NO_CHANNELS_MAX ) || EQ_16( split_band, IVAS_MAX_NUM_BANDS ) ) { return; @@ -1121,34 +1215,44 @@ void ivas_spar_get_cldfb_gains_fx( // floatToFixed_arrL( (float *)hSpar->hFbMixer->pFilterbank_cross_fade, pFilterbank_cross_fade_fx, Q31, cf_len ); FOR( sample = 0; sample < cf_len; sample++ ) { - tgt_fx[tmp_idx++] = L_deposit_h( hSpar->hFbMixer->pFilterbank_cross_fade_fx[sample] ); + tgt_fx[tmp_idx] = L_deposit_h( hSpar->hFbMixer->pFilterbank_cross_fade_fx[sample] ); + move32(); + tmp_idx = add( tmp_idx, 1 ); /* increasing window function */ } FOR( ; tmp_idx < num_samples; tmp_idx++ ) { /* fill up with ones*/ tgt_fx[tmp_idx] = MAX_32; + move32(); } FOR( sample = 0; sample < num_samples; sample++ ) { /* initialize trasnform matrix with zeros*/ T_fx[sample][0] = T_fx[sample][1] = T_fx[sample][2] = 0; + move32(); + move32(); + move32(); } - FOR( sample = 0; sample < pt_len - stride; sample++ ) + FOR( sample = 0; sample < sub( pt_len, stride ); sample++ ) { /* fill internal CLDFB analysis time buffer with data*/ Word16 x_fx = get_random_number_fx( &seed ); cldfbAnaDec0->cldfb_state_fx[sample] = L_shl( x_fx, 12 ); + move32(); } Word16 q_cldfb = 27; + move16(); cldfbAnaDec0->Q_cldfb_state = q_cldfb; + move16(); FOR( slot = 0; slot < num_cf_slots; slot++ ) { FOR( sample = 0; sample < stride; sample++ ) { Word16 x_fx = get_random_number_fx( &seed ); ts_inout_fx[sample] = L_shl( x_fx, 12 ); /*Q-27*/ + move32(); } cldfbAnalysis_ts_fx_fixed_q( ts_inout_fx, ts_re_fx, ts_im_fx, num_cldfb_bands, cldfbAnaDec0, &q_cldfb ); @@ -1157,12 +1261,15 @@ void ivas_spar_get_cldfb_gains_fx( FOR( sample = 0; sample < stride; sample++ ) { - T_fx[slot * stride + sample][slot] = ts_inout_fx[sample]; + T_fx[add( imult1616( slot, stride ), sample )][slot] = ts_inout_fx[sample]; + move32(); } tmp_idx = sub( pt_len, 1 ); FOR( sample = stride; sample < pt_len; sample++ ) { - T_fx[add( imult1616( slot, stride ), sample )][slot] = cldfbSynDec0->cldfb_state_fx[tmp_idx--]; + T_fx[add( imult1616( slot, stride ), sample )][slot] = cldfbSynDec0->cldfb_state_fx[tmp_idx]; + move32(); + tmp_idx = sub( tmp_idx, 1 ); } } @@ -1170,6 +1277,7 @@ void ivas_spar_get_cldfb_gains_fx( FOR( sample = 0; sample < num_samples; sample++ ) { tgt_fx[sample] = L_shl( Mult_32_32( tgt_fx[sample], L_add( T_fx[sample][0], L_add( T_fx[sample][1], T_fx[sample][2] ) ) ), 10 ); /*Q31*/ + move32(); } /* compute matrices */ FOR( slot_row = 0; slot_row < num_cf_slots; slot_row++ ) @@ -1177,37 +1285,48 @@ void ivas_spar_get_cldfb_gains_fx( FOR( slot_col = slot_row; slot_col < num_cf_slots; slot_col++ ) { Tt_T_fx[slot_row][slot_col] = 0; + move32(); FOR( sample = 0; sample < num_samples; sample++ ) { Tt_T_fx[slot_row][slot_col] = L_add( Tt_T_fx[slot_row][slot_col], Mult_32_32( L_shl( T_fx[sample][slot_row], 8 ), L_shl( T_fx[sample][slot_col], 8 ) ) ); /*Q58-Q31*/ + move32(); } } } Tt_T_fx[1][0] = Tt_T_fx[0][1]; + move32(); Tt_T_fx[2][0] = Tt_T_fx[0][2]; + move32(); Tt_T_fx[2][1] = Tt_T_fx[1][2]; + move32(); FOR( slot_row = 0; slot_row < num_cf_slots; slot_row++ ) { Tt_tgt_fx[slot_row] = 0; + move32(); FOR( sample = 0; sample < num_samples; sample++ ) { Tt_tgt_fx[slot_row] = L_add( Tt_tgt_fx[slot_row], Mult_32_32( T_fx[sample][slot_row], tgt_fx[sample] ) ); + move32(); } } Word16 output_q = 27; + move16(); matrix_inverse_fx( Tt_T_fx, Tt_T_inv_fx, num_cf_slots, &output_q ); /* compute the optimal coefficients */ FOR( slot_row = 0; slot_row < num_cf_slots; slot_row++ ) { Word32 tmp = 0; + move32(); FOR( slot_col = 0; slot_col < num_cf_slots; slot_col++ ) { tmp = L_add( tmp, Mult_32_32( Tt_T_inv_fx[slot_row][slot_col], Tt_tgt_fx[slot_col] ) ); } - weights_fx[add( cf_cldfb_start, slot_row )] = extract_l( L_shr( L_max( L_min( tmp, L_shl( 1, ( output_q - 10 ) ) ), 0 ), output_q - 10 - Q_weights ) ); /*Q_weights*/ + weights_fx[add( cf_cldfb_start, slot_row )] = extract_l( L_shr( L_max( L_min( tmp, L_shl( 1, sub( output_q, 10 ) ) ), 0 ), sub( sub( output_q, 10 ), Q_weights ) ) ); /*Q_weights*/ + move16(); } hSpar->hFbMixer->cldfb_cross_fade_q = Q_weights; + move16(); cldfb_reset_memory_fx( cldfbSynDec0 ); cldfb_reset_memory_fx( cldfbAnaDec0 ); @@ -1465,7 +1584,7 @@ static ivas_error ivas_spar_dec_MD_fx( move16(); test(); - IF( GT_32( ivas_total_brate, FRAME_NO_DATA ) && !bfi ) + IF( ( ivas_total_brate > FRAME_NO_DATA ) && !bfi ) { IF( GT_32( ivas_total_brate, IVAS_SID_5k2 ) ) { @@ -1479,19 +1598,20 @@ static ivas_error ivas_spar_dec_MD_fx( ELSE { hSpar->hMdDec->spar_md.num_bands = s_min( SPAR_DIRAC_SPLIT_START_BAND, IVAS_MAX_NUM_BANDS ); + move16(); } IF( NE_16( hSpar->hMdDec->table_idx, table_idx ) ) { hSpar->hMdDec->table_idx = table_idx; move16(); - IF( hSpar->hTdDecorr ) + if ( hSpar->hTdDecorr ) { hSpar->hTdDecorr->ducking_flag = ivas_spar_br_table_consts[table_idx].td_ducking; move16(); } - IF( ( error = ivas_spar_md_dec_init( hSpar->hMdDec, hDecoderConfig, num_channels, sba_order ) ) != IVAS_ERR_OK ) + IF( NE_32( ( error = ivas_spar_md_dec_init( hSpar->hMdDec, hDecoderConfig, num_channels, sba_order ) ), IVAS_ERR_OK ) ) { return error; } @@ -1524,6 +1644,7 @@ static ivas_error ivas_spar_dec_MD_fx( IF( EQ_16( hSpar->hMdDec->spar_md_cfg.nchan_transport, 1 ) ) { hSpar->AGC_flag = get_next_indice_fx( st0, 1 ); + move16(); } ivas_agc_read_bits_fx( hSpar->hAgcDec, st0, hSpar->hMdDec->spar_md_cfg.nchan_transport, hSpar->AGC_flag ); @@ -1536,7 +1657,7 @@ static ivas_error ivas_spar_dec_MD_fx( test(); test(); test(); - IF( EQ_16( st0->m_old_frame_type, ZERO_FRAME ) && EQ_32( ivas_total_brate, IVAS_SID_5k2 ) && EQ_16( st0->prev_bfi, 0 ) && EQ_16( hSpar->hMdDec->spar_md_cfg.nchan_transport, 1 ) ) + IF( EQ_16( st0->m_old_frame_type, ZERO_FRAME ) && EQ_32( ivas_total_brate, IVAS_SID_5k2 ) && ( st0->prev_bfi == 0 ) && EQ_16( hSpar->hMdDec->spar_md_cfg.nchan_transport, 1 ) ) { ivas_spar_setup_md_smoothing_fx( hSpar->hMdDec, num_bands_out, num_md_sub_frames ); } @@ -1552,7 +1673,7 @@ static ivas_error ivas_spar_dec_MD_fx( ivas_spar_smooth_md_dtx_fx( hSpar->hMdDec, num_bands_out, num_md_sub_frames ); } - set_s( hSpar->hMdDec->valid_bands, 0, IVAS_MAX_NUM_BANDS ); + set16_fx( hSpar->hMdDec->valid_bands, 0, IVAS_MAX_NUM_BANDS ); } pop_wmops(); @@ -1682,83 +1803,104 @@ static ivas_error ivas_spar_dec_MD( static Word16 ivas_spar_get_cldfb_slot_gain_fx( SPAR_DEC_HANDLE hSpar, /* i/o: SPAR decoder handle */ const DECODER_CONFIG_HANDLE hDecoderConfig, /* i : configuration structure */ - const int16_t time_slot_idx, - int16_t *time_slot_idx0, - int16_t *time_slot_idx1, + const Word16 time_slot_idx, + Word16 *time_slot_idx0, + Word16 *time_slot_idx1, Word16 *weight_lowfreq_fx ) { Word16 weight_fx; Word32 encfb_delay_fx, decfb_delay_fx; Word32 xfade_start_ns_fx; - int16_t xfade_delay_subframes; - int16_t i_hist; - int16_t split_band; + Word16 xfade_delay_subframes; + Word16 i_hist; + Word16 split_band; *weight_lowfreq_fx = hSpar->hFbMixer->cldfb_cross_fade_fx[time_slot_idx]; + move16(); encfb_delay_fx = IVAS_FB_ENC_DELAY_NS; + move32(); decfb_delay_fx = IVAS_FB_DEC_DELAY_NS; + move32(); Word32 one_by_outfs = 0; // q15 - switch ( hDecoderConfig->output_Fs ) + move32(); + SWITCH( hDecoderConfig->output_Fs ) { case 16000: one_by_outfs = 2048000000; - break; + move32(); + BREAK; case 32000: one_by_outfs = 1024000000; - break; + move32(); + BREAK; case 48000: one_by_outfs = 682666688; - break; + move32(); + BREAK; default: assert( 0 ); } - Word64 fade_start = (Word64) one_by_outfs * hSpar->hFbMixer->cross_fade_start_offset; + Word64 fade_start = W_mult0_32_32( one_by_outfs, hSpar->hFbMixer->cross_fade_start_offset ); + move64(); fade_start = W_shr( fade_start, 15 ); - xfade_start_ns_fx = ( (Word32) fade_start - encfb_delay_fx + L_shr( decfb_delay_fx, 1 ) ); - xfade_delay_subframes = (int16_t) ( xfade_start_ns_fx / ( FRAME_SIZE_NS / MAX_PARAM_SPATIAL_SUBFRAMES ) ); + xfade_start_ns_fx = L_add( L_sub( W_extract_l( fade_start ), encfb_delay_fx ), L_shr( decfb_delay_fx, 1 ) ); + xfade_delay_subframes = extract_l( Mpy_32_32( xfade_start_ns_fx, 429 /* 1 / ( FRAME_SIZE_NS / MAX_PARAM_SPATIAL_SUBFRAMES ) in Q31 -> 429 */ ) ); - i_hist = 4 - xfade_delay_subframes; + i_hist = sub( 4, xfade_delay_subframes ); split_band = SPAR_DIRAC_SPLIT_START_BAND; + move16(); - if ( split_band < IVAS_MAX_NUM_BANDS ) + IF( LT_16( split_band, IVAS_MAX_NUM_BANDS ) ) { - if ( hSpar->i_subframe > 3 ) + IF( GT_16( hSpar->i_subframe, 3 ) ) { Word16 mod_res = time_slot_idx % MAX_PARAM_SPATIAL_SUBFRAMES; - switch ( mod_res ) + move16(); + SWITCH( mod_res ) { case 0: weight_fx = 0; - break; + move16(); + BREAK; case 1: weight_fx = 8191; - break; + move16(); + BREAK; case 2: weight_fx = 16383; - break; + move16(); + BREAK; case 3: weight_fx = 24575; - break; + move16(); + BREAK; default: weight_fx = 0; - break; + move16(); + BREAK; } } - else + ELSE { weight_fx = 0; + move16(); } *time_slot_idx0 = i_hist; - *time_slot_idx1 = i_hist + 1; + move16(); + *time_slot_idx1 = add( i_hist, 1 ); + move16(); } - else + ELSE { /* determine cross-fade gain for current frame Parameters*/ *time_slot_idx0 = hSpar->hFbMixer->cldfb_cross_fade_start; + move16(); *time_slot_idx1 = hSpar->hFbMixer->cldfb_cross_fade_end; + move16(); weight_fx = *weight_lowfreq_fx; + move16(); } return weight_fx; @@ -1826,50 +1968,56 @@ static float ivas_spar_get_cldfb_slot_gain( void ivas_spar_get_parameters_fx( SPAR_DEC_HANDLE hSpar, /* i/o: SPAR decoder handle */ const DECODER_CONFIG_HANDLE hDecoderConfig, /* i : configuration structure */ - const int16_t ts, - const int16_t num_ch_out, - const int16_t num_ch_in, - const int16_t num_spar_bands, + const Word16 ts, + const Word16 num_ch_out, + const Word16 num_ch_in, + const Word16 num_spar_bands, Word32 par_mat_fx[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH][IVAS_MAX_NUM_BANDS] ) { - int16_t spar_band, out_ch, in_ch; + Word16 spar_band, out_ch, in_ch; Word16 weight_fx, weight_20ms_fx; - int16_t ts0, ts1, split_band; + Word16 ts0, ts1, split_band; // weight = ivas_spar_get_cldfb_slot_gain(hSpar, hDecoderConfig, ts, &ts0, &ts1, &weight_20ms); weight_fx = ivas_spar_get_cldfb_slot_gain_fx( hSpar, hDecoderConfig, ts, &ts0, &ts1, &weight_20ms_fx ); split_band = SPAR_DIRAC_SPLIT_START_BAND; - for ( spar_band = 0; spar_band < num_spar_bands; spar_band++ ) + move16(); + FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ ) { - for ( out_ch = 0; out_ch < num_ch_out; out_ch++ ) + FOR( out_ch = 0; out_ch < num_ch_out; out_ch++ ) { - if ( split_band < IVAS_MAX_NUM_BANDS - /* 20ms cross-fade for Transport channels in all frequency bands */ - && ( 0 == ivas_is_res_channel( out_ch, hSpar->hMdDec->spar_md_cfg.nchan_transport ) ) /* sub-frame processing for missing channels in all frequency bands*/ + test(); + IF( LT_16( split_band, IVAS_MAX_NUM_BANDS ) + /* 20ms cross-fade for Transport channels in all frequency bands */ + && ( 0 == ivas_is_res_channel( out_ch, hSpar->hMdDec->spar_md_cfg.nchan_transport ) ) /* sub-frame processing for missing channels in all frequency bands*/ ) { - for ( in_ch = 0; in_ch < num_ch_in; in_ch++ ) + FOR( in_ch = 0; in_ch < num_ch_in; in_ch++ ) { - if ( hSpar->i_subframe > 3 ) + IF( GT_16( hSpar->i_subframe, 3 ) ) { par_mat_fx[out_ch][in_ch][spar_band] = L_add_sat( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[ts0][out_ch][in_ch][spar_band], sub( MAX_WORD16, weight_fx ) ), Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[ts1][out_ch][in_ch][spar_band], weight_fx ) ); + move32(); } - else + ELSE { par_mat_fx[out_ch][in_ch][spar_band] = hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][spar_band]; + move32(); } } } - else + ELSE { - for ( in_ch = 0; in_ch < num_ch_in; in_ch++ ) + FOR( in_ch = 0; in_ch < num_ch_in; in_ch++ ) { /* 20ms Transport channel reconstruction with matching encoder/decoder processing */ - int16_t prev_idx = SPAR_DIRAC_SPLIT_START_BAND < IVAS_MAX_NUM_BANDS ? 1 : 0; /* if SPAR_DIRAC_SPLIT_START_BAND == IVAS_MAX_NUM_BANDS, then the sub-frame mixer_mat delay line is not active */ + Word16 prev_idx = SPAR_DIRAC_SPLIT_START_BAND < IVAS_MAX_NUM_BANDS ? 1 : 0; /* if SPAR_DIRAC_SPLIT_START_BAND == IVAS_MAX_NUM_BANDS, then the sub-frame mixer_mat delay line is not active */ + move16(); par_mat_fx[out_ch][in_ch][spar_band] = L_add_sat( Mpy_32_16_1( hSpar->hMdDec->mixer_mat_prev_fx[prev_idx][out_ch][in_ch][spar_band], sub( MAX_WORD16, weight_20ms_fx ) ), Mpy_32_16_1( hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][spar_band], weight_20ms_fx ) ); + move32(); } } } @@ -2018,7 +2166,9 @@ static void ivas_spar_get_skip_mat_fx( FOR( in_ch = 0; in_ch < num_ch_in; in_ch++ ) { skip_mat[out_ch][in_ch] = 1; + move16(); skip_flag = 1; + move16(); FOR( i_ts = 0; i_ts < MAX_PARAM_SPATIAL_SUBFRAMES; i_ts++ ) { FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ ) @@ -2026,6 +2176,7 @@ static void ivas_spar_get_skip_mat_fx( IF( hSpar->hMdDec->mixer_mat_prev_fx[1 + i_ts][out_ch][in_ch][spar_band] != 0 ) { skip_flag = 0; + move16(); BREAK; } } @@ -2033,19 +2184,21 @@ static void ivas_spar_get_skip_mat_fx( IF( skip_flag == 0 ) { skip_mat[out_ch][in_ch] = 0; + move16(); BREAK; } } - IF( skip_mat[out_ch][in_ch] == 1 ) + IF( EQ_16( skip_mat[out_ch][in_ch], 1 ) ) { FOR( i_ts = 0; i_ts < num_md_sub_frames; i_ts++ ) { FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ ) { - IF( hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][spar_band + i_ts * IVAS_MAX_NUM_BANDS] != 0 ) + IF( hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][add( spar_band, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] != 0 ) { skip_flag = 0; + move16(); BREAK; } } @@ -2053,6 +2206,7 @@ static void ivas_spar_get_skip_mat_fx( IF( skip_flag == 0 ) { skip_mat[out_ch][in_ch] = 0; + move16(); BREAK; } } @@ -2083,89 +2237,118 @@ static void ivas_spar_calc_smooth_facs_fx( Word32 L_temp; Word16 exp_tmp, q_tmp; bin = 0; + move16(); FOR( b = 0; b < nbands_spar; b++ ) { - IF( GE_16( bin, CLDFB_NO_CHANNELS_MAX ) || ( GT_16( b, 0 ) && LT_16( bin2band->p_cldfb_map_to_spar_band[bin], bin2band->p_cldfb_map_to_spar_band[bin - 1] ) ) ) + test(); + test(); + IF( GE_16( bin, CLDFB_NO_CHANNELS_MAX ) || ( ( b > 0 ) && LT_16( bin2band->p_cldfb_map_to_spar_band[bin], bin2band->p_cldfb_map_to_spar_band[bin - 1] ) ) ) { BREAK; } /* calculate band-wise subframe energies */ subframe_band_nrg_fx[b] = 0; + move32(); WHILE( LT_16( bin, CLDFB_NO_CHANNELS_MAX ) && EQ_16( b, bin2band->p_cldfb_map_to_spar_band[bin] ) ) { + test(); FOR( ts = 0; ts < nSlots; ts++ ) { L_temp = L_add( L_shr( Mpy_32_32( cldfb_in_ts_re_fx[ts][bin], cldfb_in_ts_re_fx[ts][bin] ), 4 ), L_shr( Mpy_32_32( cldfb_in_ts_im_fx[ts][bin], cldfb_in_ts_im_fx[ts][bin] ), 4 ) ); // 2*q_cldfb - 35 subframe_band_nrg_fx[b] = L_add_sat( subframe_band_nrg_fx[b], L_temp ); // 2*q_cldfb - 35 (saturation reached in 1 orig pytest) + move32(); } - bin++; + bin = add( bin, 1 ); } - exp_tmp = sub( 66, i_mult( 2, q_cldfb ) ); + exp_tmp = sub( 66, shl( q_cldfb, 1 ) ); subframe_band_nrg_fx[b] = Sqrt32( subframe_band_nrg_fx[b], &exp_tmp ); - q_tmp = 31 - exp_tmp; + move32(); + q_tmp = sub( 31, exp_tmp ); + test(); IF( isFirstSubframe && LT_16( nSlots, MAX_PARAM_SPATIAL_SUBFRAMES ) ) { /* fill up to full 5ms subframe */ smooth_buf_fx[b][0] = L_add( smooth_buf_fx[b][0], L_shr( subframe_band_nrg_fx[b], q_tmp ) ); // Q0 + move32(); } ELSE { smooth_buf_fx[b][0] = L_shr( subframe_band_nrg_fx[b], q_tmp ); // Q0 + move32(); } /* calculate short and long energy averages */ smooth_short_avg_fx[b] = 0; + move32(); FOR( i = 0; i < 2 * SBA_DIRAC_NRG_SMOOTH_SHORT; i++ ) { smooth_short_avg_fx[b] = L_add( smooth_short_avg_fx[b], smooth_buf_fx[b][i] ); // Q0 + move32(); } smooth_long_avg_fx[b] = smooth_short_avg_fx[b]; // Q0 + move32(); FOR( i = 2 * SBA_DIRAC_NRG_SMOOTH_SHORT; i < 2 * SBA_DIRAC_NRG_SMOOTH_LONG; i++ ) { smooth_long_avg_fx[b] = L_add( smooth_long_avg_fx[b], smooth_buf_fx[b][i] ); // Q0 + move32(); } smooth_short_avg_fx[b] = Mpy_32_16_1( smooth_short_avg_fx[b], 5461 /*(1/6 in Q15)*/ ); // Q0 - smooth_long_avg_fx[b] = Mpy_32_16_1( smooth_long_avg_fx[b], 1639 /*(1/20 in Q15)*/ ); // Q0 + move32(); + smooth_long_avg_fx[b] = Mpy_32_16_1( smooth_long_avg_fx[b], 1639 /*(1/20 in Q15)*/ ); // Q0 + move32(); /* calculate smoothing factor based on energy averages */ /* reduce factor for higher short-term energy */ IF( smooth_long_avg_fx[b] <= 0 ) { smooth_fac_fx[b] = 0; + move16(); } - ELSE IF( smooth_long_avg_fx[b] >= smooth_short_avg_fx[b] ) + ELSE IF( GE_32( smooth_long_avg_fx[b], smooth_short_avg_fx[b] ) ) { smooth_fac_fx[b] = MAX_16; // 1.0f in Q15 + move16(); } ELSE { smooth_fac_fx[b] = divide3232( smooth_long_avg_fx[b], smooth_short_avg_fx[b] ); // Q15 + move16(); } /* map factor to range [0;1] */ smooth_fac_fx[b] = shl( mult_r( s_max( 0, sub( smooth_fac_fx[b], 9830 ) ), 23405 /*Q14*/ ), 1 ); // Q15 + move16(); /* compress factor (higher compression in lowest bands) */ IF( LT_16( b, 2 ) ) { exp_tmp = 0; + move16(); smooth_fac_fx[b] = Sqrt16( smooth_fac_fx[b], &exp_tmp ); + move16(); smooth_fac_fx[b] = Sqrt16( smooth_fac_fx[b], &exp_tmp ); + move16(); smooth_fac_fx[b] = shl( smooth_fac_fx[b], exp_tmp ); // Q15 + move16(); } ELSE { exp_tmp = 0; + move16(); smooth_fac_fx[b] = Sqrt16( smooth_fac_fx[b], &exp_tmp ); + move16(); smooth_fac_fx[b] = shl( smooth_fac_fx[b], exp_tmp ); // Q15 + move16(); } /* apply upper bounds depending on band */ smooth_fac_fx[b] = s_max( min_smooth_gains1_fx[b], s_min( max_smooth_gains2_fx[b], smooth_fac_fx[b] ) ); + move16(); } /* only update if we collected a full 5ms worth of energies for the buffer */ + test(); IF( isFirstSubframe || EQ_16( nSlots, MAX_PARAM_SPATIAL_SUBFRAMES ) ) { FOR( b = 0; b < nbands_spar; b++ ) @@ -2173,6 +2356,7 @@ static void ivas_spar_calc_smooth_facs_fx( FOR( i = 2 * SBA_DIRAC_NRG_SMOOTH_LONG; i > 0; i-- ) { smooth_buf_fx[b][i] = smooth_buf_fx[b][i - 1]; + move32(); } } } @@ -2372,7 +2556,6 @@ void ivas_spar_dec_agc_pca_fx( IF( hSpar->hMdDec->td_decorr_flag ) { num_in_ingest = ivas_sba_get_nchan_metadata_fx( st_ivas->sba_analysis_order, st_ivas->hDecoderConfig->ivas_total_brate ); - move16(); } ELSE { @@ -2454,7 +2637,8 @@ void ivas_spar_dec_set_render_map_fx( /* copy also to tc buffer */ /* only for non-combined formats and combinded formats w/o discrete objects */ - IF( !( EQ_16( st_ivas->ivas_format, SBA_ISM_FORMAT ) && EQ_16( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) ) ) + test(); + IF( !( EQ_32( st_ivas->ivas_format, SBA_ISM_FORMAT ) && EQ_32( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) ) ) { st_ivas->hTcBuffer->nb_subframes = hSpar->nb_subframes; move16(); @@ -2504,7 +2688,9 @@ void ivas_spar_dec_set_render_params_fx( hSpar = st_ivas->hSpar; nchan_transport = hSpar->hMdDec->spar_md_cfg.nchan_transport; + move16(); num_bands_out = hSpar->hFbMixer->pFb->filterbank_num_bands; + move16(); ivas_spar_dec_gen_umx_mat_fx( hSpar->hMdDec, nchan_transport, num_bands_out, st_ivas->bfi, ivas_get_spar_dec_md_num_subframes( st_ivas->sba_order, st_ivas->hDecoderConfig->ivas_total_brate, st_ivas->last_active_ivas_total_brate ) ); ivas_spar_dec_set_render_map_fx( st_ivas, n_cldfb_slots ); @@ -2610,7 +2796,7 @@ void ivas_spar_dec_digest_tc_fx( hSpar = st_ivas->hSpar; test(); test(); - IF( hSpar->hMdDec->td_decorr_flag && !( EQ_16( st_ivas->renderer_type, RENDERER_BINAURAL_PARAMETRIC ) || EQ_16( st_ivas->renderer_type, RENDERER_BINAURAL_PARAMETRIC_ROOM ) ) ) + IF( hSpar->hMdDec->td_decorr_flag && !( EQ_32( st_ivas->renderer_type, RENDERER_BINAURAL_PARAMETRIC ) || EQ_32( st_ivas->renderer_type, RENDERER_BINAURAL_PARAMETRIC_ROOM ) ) ) { Word16 nchan_internal, ch; Word16 ch_sba_idx; @@ -2619,12 +2805,13 @@ void ivas_spar_dec_digest_tc_fx( Word32 *p_tc[MAX_SPAR_INTERNAL_CHANNELS]; Word32 Pcm_tmp[MAX_SPAR_INTERNAL_CHANNELS][L_FRAME48k]; Word16 q_format = Q11; + move16(); ch_sba_idx = 0; move16(); - IF( EQ_16( st_ivas->ivas_format, SBA_ISM_FORMAT ) ) + IF( EQ_32( st_ivas->ivas_format, SBA_ISM_FORMAT ) ) { - IF( EQ_16( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) ) + if ( EQ_32( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) ) { ch_sba_idx = st_ivas->nchan_ism; move16(); @@ -2632,15 +2819,17 @@ void ivas_spar_dec_digest_tc_fx( } /* TD decorrelator */ - default_frame = (Word16) ( st_ivas->hDecoderConfig->output_Fs / FRAMES_PER_SEC ); + Word32 quo, rem; + iDiv_and_mod_32( st_ivas->hDecoderConfig->output_Fs, FRAMES_PER_SEC, &quo, &rem, 0 ); + default_frame = extract_l( quo ); nSamplesLeftForTD = nSamplesForRendering; + move16(); nchan_internal = ivas_sba_get_nchan_metadata_fx( st_ivas->sba_analysis_order, st_ivas->hDecoderConfig->ivas_total_brate ); FOR( ch = 0; ch < nchan_internal; ch++ ) { pPcm_tmp[ch] = Pcm_tmp[ch]; - p_tc[ch] = st_ivas->hTcBuffer->tc_fx[ch + ch_sba_idx]; - move32(); + p_tc[ch] = st_ivas->hTcBuffer->tc_fx[add( ch, ch_sba_idx )]; } WHILE( nSamplesLeftForTD ) @@ -2652,22 +2841,23 @@ void ivas_spar_dec_digest_tc_fx( ivas_td_decorr_process_fx( hSpar->hTdDecorr, p_tc, pPcm_tmp, nSamplesToDecorr, &q_format ); st_ivas->hTcBuffer->q_tc_fx = s_min( st_ivas->hTcBuffer->q_tc_fx, q_format ); + move16(); IF( GE_16( hSpar->hTdDecorr->num_apd_outputs, sub( nchan_internal, nchan_transport ) ) ) { FOR( ch = 0; ch < sub( nchan_internal, nchan_transport ); ch++ ) { - Copy32( pPcm_tmp[hSpar->hTdDecorr->num_apd_outputs - 1 - ch], p_tc[nchan_internal - 1 - ch], nSamplesToDecorr ); + Copy32( pPcm_tmp[sub( sub( hSpar->hTdDecorr->num_apd_outputs, 1 ), ch )], p_tc[sub( sub( nchan_internal, 1 ), ch )], nSamplesToDecorr ); } } ELSE { FOR( ch = 0; ch < sub( nchan_internal, nchan_transport ); ch++ ) { - set32_fx( p_tc[nchan_internal - 1 - ch], 0, nSamplesToDecorr ); + set32_fx( p_tc[sub( sub( nchan_internal, 1 ), ch )], 0, nSamplesToDecorr ); } FOR( ch = 0; ch < hSpar->hTdDecorr->num_apd_outputs; ch++ ) { - Copy32( pPcm_tmp[hSpar->hTdDecorr->num_apd_outputs - 1 - ch], p_tc[nchan_internal - 1 - ch], nSamplesToDecorr ); + Copy32( pPcm_tmp[sub( sub( hSpar->hTdDecorr->num_apd_outputs, 1 ), ch )], p_tc[sub( sub( nchan_internal, 1 ), ch )], nSamplesToDecorr ); } } } @@ -2828,7 +3018,7 @@ void ivas_spar_dec_upmixer_sf_fx( Word16 slot_size, slot_idx_start; Word16 md_idx; Word32 *p_tc_fx[MAX_OUTPUT_CHANNELS + MAX_NUM_OBJECTS]; - Word32 Pcm_tmp_fx[MAX_OUTPUT_CHANNELS + MAX_NUM_OBJECTS][L_FRAME48k] = { 0 }; + Word32 Pcm_tmp_fx[MAX_OUTPUT_CHANNELS + MAX_NUM_OBJECTS][L_FRAME48k]; Word16 numch_out_dirac; Word32 mixer_mat_fx[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH][IVAS_MAX_NUM_BANDS]; Word16 b_skip_mat[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH]; @@ -2836,34 +3026,45 @@ void ivas_spar_dec_upmixer_sf_fx( SPAR_DEC_HANDLE hSpar; Word16 num_md_sub_frames; Word16 q1 = 30; + move16(); push_wmops( "ivas_spar_dec_upmixer_sf" ); hSpar = st_ivas->hSpar; hDecoderConfig = st_ivas->hDecoderConfig; nchan_transport = hSpar->hMdDec->spar_md_cfg.nchan_transport; + move16(); num_cldfb_bands = hSpar->hFbMixer->pFb->fb_bin_to_band.num_cldfb_bands; + move16(); numch_in = hSpar->hFbMixer->fb_cfg->num_in_chans; + move16(); numch_out = hSpar->hFbMixer->fb_cfg->num_out_chans; + move16(); num_md_sub_frames = ivas_get_spar_dec_md_num_subframes( st_ivas->sba_order, hDecoderConfig->ivas_total_brate, st_ivas->last_active_ivas_total_brate ); slot_size = NS2SA( st_ivas->hDecoderConfig->output_Fs, CLDFB_SLOT_NS ); + move16(); slot_idx_start = hSpar->slots_rendered; + move16(); - IF( EQ_16( st_ivas->ivas_format, SBA_ISM_FORMAT ) && EQ_16( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) ) + test(); + IF( EQ_32( st_ivas->ivas_format, SBA_ISM_FORMAT ) && EQ_32( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) ) { Word16 nchan_ism; nchan_ism = st_ivas->nchan_ism; + move16(); FOR( i = 0; i < nchan_internal; i++ ) { - p_tc_fx[i] = st_ivas->hTcBuffer->tc_fx[i + nchan_ism] + slot_idx_start * slot_size; + p_tc_fx[i] = st_ivas->hTcBuffer->tc_fx[add( i, nchan_ism )] + i_mult( slot_idx_start, slot_size ); } - IF( EQ_16( st_ivas->ivas_format, SBA_ISM_FORMAT ) && EQ_16( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) && EQ_16( st_ivas->renderer_type, RENDERER_BINAURAL_FASTCONV_ROOM ) ) + test(); + test(); + IF( EQ_32( st_ivas->ivas_format, SBA_ISM_FORMAT ) && EQ_32( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) && EQ_32( st_ivas->renderer_type, RENDERER_BINAURAL_FASTCONV_ROOM ) ) { FOR( i = 0; i < nchan_ism; i++ ) { - p_tc_fx[i + nchan_internal] = st_ivas->hTcBuffer->tc_fx[i] + slot_idx_start * slot_size; + p_tc_fx[add( i, nchan_internal )] = st_ivas->hTcBuffer->tc_fx[i] + i_mult( slot_idx_start, slot_size ); } } } @@ -2871,7 +3072,7 @@ void ivas_spar_dec_upmixer_sf_fx( { FOR( i = 0; i < nchan_internal; i++ ) { - p_tc_fx[i] = st_ivas->hTcBuffer->tc_fx[i] + slot_idx_start * slot_size; + p_tc_fx[i] = st_ivas->hTcBuffer->tc_fx[i] + i_mult( slot_idx_start, slot_size ); } } @@ -2883,10 +3084,12 @@ void ivas_spar_dec_upmixer_sf_fx( IF( hSpar->hMdDec->td_decorr_flag ) { num_in_ingest = nchan_internal; + move16(); } ELSE { num_in_ingest = nchan_transport; + move16(); } /*---------------------------------------------------------------------* @@ -2894,6 +3097,7 @@ void ivas_spar_dec_upmixer_sf_fx( *---------------------------------------------------------------------*/ hSpar->hFbMixer->fb_cfg->num_in_chans = num_in_ingest; + move16(); /*---------------------------------------------------------------------* @@ -2901,15 +3105,15 @@ void ivas_spar_dec_upmixer_sf_fx( *---------------------------------------------------------------------*/ /* set-up pointers */ - IF( NE_16( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_FOA ) ) + IF( NE_32( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_FOA ) ) { /* at this point, output channels are used as intermediate procesing buffers */ FOR( in_ch = 0; in_ch < MAX_OUTPUT_CHANNELS + MAX_NUM_OBJECTS; in_ch++ ) { FOR( ts = 0; ts < MAX_PARAM_SPATIAL_SUBFRAMES; ts++ ) { - cldfb_in_ts_re_fx[in_ch][ts] = &Pcm_tmp_fx[in_ch][ts * num_cldfb_bands]; - cldfb_in_ts_im_fx[in_ch][ts] = &Pcm_tmp_fx[in_ch][ts * num_cldfb_bands + 4 * num_cldfb_bands]; + cldfb_in_ts_re_fx[in_ch][ts] = &Pcm_tmp_fx[in_ch][i_mult( ts, num_cldfb_bands )]; + cldfb_in_ts_im_fx[in_ch][ts] = &Pcm_tmp_fx[in_ch][add( i_mult( ts, num_cldfb_bands ), ( 4 * num_cldfb_bands ) )]; } } } @@ -2919,8 +3123,8 @@ void ivas_spar_dec_upmixer_sf_fx( { FOR( ts = 0; ts < MAX_PARAM_SPATIAL_SUBFRAMES; ts++ ) { - cldfb_in_ts_re_fx[in_ch][ts] = &Pcm_tmp_fx[in_ch][ts * num_cldfb_bands]; - cldfb_in_ts_im_fx[in_ch][ts] = &Pcm_tmp_fx[in_ch][ts * num_cldfb_bands + 4 * num_cldfb_bands]; + cldfb_in_ts_re_fx[in_ch][ts] = &Pcm_tmp_fx[in_ch][i_mult( ts, num_cldfb_bands )]; + cldfb_in_ts_im_fx[in_ch][ts] = &Pcm_tmp_fx[in_ch][add( i_mult( ts, num_cldfb_bands ), ( 4 * num_cldfb_bands ) )]; } } } @@ -2930,12 +3134,14 @@ void ivas_spar_dec_upmixer_sf_fx( *---------------------------------------------------------------------*/ num_spar_bands = hSpar->hFbMixer->pFb->filterbank_num_bands; + move16(); /* apply parameters */ /* determine if we can skip certain data */ ivas_spar_get_skip_mat_fx( hSpar, numch_out, numch_in, num_spar_bands, b_skip_mat, num_md_sub_frames ); /* this can be precomputed based on bitrate and format*/ numch_out_dirac = hDecoderConfig->nchan_out; + move16(); /* CLDFB analysis of incoming frame */ @@ -2944,35 +3150,45 @@ void ivas_spar_dec_upmixer_sf_fx( FOR( ts = 0; ts < hSpar->subframe_nbslots[hSpar->subframes_rendered]; ts++ ) { Word16 q_cldfb = 11; - cldfbAnalysis_ts_fx_fixed_q( &p_tc_fx[in_ch][ts * num_cldfb_bands], cldfb_in_ts_re_fx[in_ch][ts], cldfb_in_ts_im_fx[in_ch][ts], num_cldfb_bands, st_ivas->cldfbAnaDec[in_ch], &q_cldfb ); + move16(); + cldfbAnalysis_ts_fx_fixed_q( &p_tc_fx[in_ch][i_mult( ts, num_cldfb_bands )], cldfb_in_ts_re_fx[in_ch][ts], cldfb_in_ts_im_fx[in_ch][ts], num_cldfb_bands, st_ivas->cldfbAnaDec[in_ch], &q_cldfb ); } } - IF( EQ_16( st_ivas->ivas_format, SBA_ISM_FORMAT ) && EQ_16( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) && EQ_16( st_ivas->renderer_type, RENDERER_BINAURAL_FASTCONV_ROOM ) ) + test(); + test(); + IF( EQ_32( st_ivas->ivas_format, SBA_ISM_FORMAT ) && EQ_32( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) && EQ_32( st_ivas->renderer_type, RENDERER_BINAURAL_FASTCONV_ROOM ) ) { - FOR( ; in_ch < st_ivas->nchan_ism + numch_in; in_ch++ ) + FOR( ; in_ch < add( st_ivas->nchan_ism, numch_in ); in_ch++ ) { FOR( ts = 0; ts < hSpar->subframe_nbslots[hSpar->subframes_rendered]; ts++ ) { Word16 q_cldfb = 11; - cldfbAnalysis_ts_fx_fixed_q( &p_tc_fx[in_ch][ts * num_cldfb_bands], cldfb_in_ts_re_fx[in_ch][ts], cldfb_in_ts_im_fx[in_ch][ts], num_cldfb_bands, st_ivas->cldfbAnaDec[in_ch], &q_cldfb ); + move16(); + cldfbAnalysis_ts_fx_fixed_q( &p_tc_fx[in_ch][i_mult( ts, num_cldfb_bands )], cldfb_in_ts_re_fx[in_ch][ts], cldfb_in_ts_im_fx[in_ch][ts], num_cldfb_bands, st_ivas->cldfbAnaDec[in_ch], &q_cldfb ); } } } - IF( ( LT_32( hDecoderConfig->ivas_total_brate, IVAS_24k4 ) ) && ( ( EQ_16( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_HOA2 ) ) || ( EQ_16( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_HOA3 ) ) ) ) + test(); + test(); + IF( ( LT_32( hDecoderConfig->ivas_total_brate, IVAS_24k4 ) ) && ( ( EQ_32( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_HOA2 ) ) || ( EQ_32( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_HOA3 ) ) ) ) { Word16 q_cldfb = 6; + move16(); ivas_spar_calc_smooth_facs_fx( cldfb_in_ts_re_fx[0], cldfb_in_ts_im_fx[0], q_cldfb, num_spar_bands, hSpar->subframe_nbslots[hSpar->subframes_rendered], hSpar->subframes_rendered == 0, &hSpar->hFbMixer->pFb->fb_bin_to_band, hSpar->hMdDec->smooth_fac_fx, hSpar->hMdDec->smooth_buf_fx ); } FOR( ts = 0; ts < hSpar->subframe_nbslots[hSpar->subframes_rendered]; ts++ ) { - md_idx = hSpar->render_to_md_map[ts + slot_idx_start]; + md_idx = hSpar->render_to_md_map[add( ts, slot_idx_start )]; + move16(); floatToFixed_arr( hSpar->hFbMixer->cldfb_cross_fade, hSpar->hFbMixer->cldfb_cross_fade_fx, Q15, CLDFB_NO_COL_MAX ); ivas_spar_get_parameters_fx( hSpar, hDecoderConfig, md_idx, numch_out, numch_in, num_spar_bands, mixer_mat_fx ); - IF( ( LT_32( hDecoderConfig->ivas_total_brate, IVAS_24k4 ) ) && ( ( EQ_16( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_HOA2 ) ) || ( EQ_16( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_HOA3 ) ) ) ) + test(); + test(); + IF( ( LT_32( hDecoderConfig->ivas_total_brate, IVAS_24k4 ) ) && ( ( EQ_32( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_HOA2 ) ) || ( EQ_32( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_HOA3 ) ) ) ) { FOR( spar_band = 0; spar_band < num_spar_bands; spar_band++ ) { @@ -2981,7 +3197,9 @@ void ivas_spar_dec_upmixer_sf_fx( FOR( in_ch = 0; in_ch < numch_in; in_ch++ ) { mixer_mat_fx[out_ch][in_ch][spar_band] = L_add( Mult_32_16( mixer_mat_fx[out_ch][in_ch][spar_band], sub( 32767, hSpar->hMdDec->smooth_fac_fx[spar_band] ) ), Mult_32_16( hSpar->hMdDec->mixer_mat_prev2_fx[out_ch][in_ch][spar_band], hSpar->hMdDec->smooth_fac_fx[spar_band] ) ); /*q1*/ + move32(); hSpar->hMdDec->mixer_mat_prev2_fx[out_ch][in_ch][spar_band] = mixer_mat_fx[out_ch][in_ch][spar_band]; + move32(); } } } @@ -2997,19 +3215,24 @@ void ivas_spar_dec_upmixer_sf_fx( FOR( out_ch = 0; out_ch < numch_out; out_ch++ ) { out_re_fx[out_ch] = 0; + move32(); out_im_fx[out_ch] = 0; + move32(); FOR( in_ch = 0; in_ch < numch_in; in_ch++ ) { - IF( EQ_16( b_skip_mat[out_ch][in_ch], 0 ) ) + IF( b_skip_mat[out_ch][in_ch] == 0 ) { IF( LT_16( cldfb_band, CLDFB_PAR_WEIGHT_START_BAND ) ) /* tuning parameter, depends on how much SPAR Filters overlap for the CLDFB bands */ { spar_band = bin2band->p_cldfb_map_to_spar_band[cldfb_band]; + move16(); cldfb_par_fx = mixer_mat_fx[out_ch][in_ch][spar_band]; + move32(); } ELSE { cldfb_par_fx = 0; + move32(); FOR( spar_band = bin2band->p_spar_start_bands[cldfb_band]; spar_band < num_spar_bands; spar_band++ ) { /* accumulate contributions from all SPAR bands */ @@ -3019,7 +3242,9 @@ void ivas_spar_dec_upmixer_sf_fx( } out_re_fx[out_ch] = L_add( out_re_fx[out_ch], Mult_32_32( cldfb_in_ts_re_fx[in_ch][ts][cldfb_band], cldfb_par_fx ) ); /*q1-25*/ + move32(); out_im_fx[out_ch] = L_add( out_im_fx[out_ch], Mult_32_32( cldfb_in_ts_im_fx[in_ch][ts][cldfb_band], cldfb_par_fx ) ); /*q1-25*/ + move32(); } } } @@ -3027,16 +3252,25 @@ void ivas_spar_dec_upmixer_sf_fx( /*update CLDFB data with the parameter-modified data*/ FOR( out_ch = 0; out_ch < numch_out; out_ch++ ) { - cldfb_in_ts_re_fx[out_ch][ts][cldfb_band] = L_shl( out_re_fx[out_ch], 31 - q1 ); /*Q=6*/ - cldfb_in_ts_im_fx[out_ch][ts][cldfb_band] = L_shl( out_im_fx[out_ch], 31 - q1 ); /*Q=6*/ + cldfb_in_ts_re_fx[out_ch][ts][cldfb_band] = L_shl( out_re_fx[out_ch], sub( 31, q1 ) ); /*Q=6*/ + move32(); + cldfb_in_ts_im_fx[out_ch][ts][cldfb_band] = L_shl( out_im_fx[out_ch], sub( 31, q1 ) ); /*Q=6*/ + move32(); } } - IF( ( EQ_16( ( add( add( slot_idx_start, ts ), 1 ) ), hSpar->num_slots ) ) || ( NE_16( ( md_idx / JBM_CLDFB_SLOTS_IN_SUBFRAME ), ( hSpar->render_to_md_map[ts + slot_idx_start + 1] / JBM_CLDFB_SLOTS_IN_SUBFRAME ) ) ) ) + + test(); + IF( ( EQ_16( ( add( add( slot_idx_start, ts ), 1 ) ), hSpar->num_slots ) ) || ( NE_16( ( shr( md_idx, 2 ) /* md_idx / JBM_CLDFB_SLOTS_IN_SUBFRAME */ ), ( hSpar->render_to_md_map[add( add( slot_idx_start, ts ), 1 )] / JBM_CLDFB_SLOTS_IN_SUBFRAME /*It's value is 4*/ ) ) ) ) { /* we have crossed an unadapted parameter sf border, update previous mixing matrices */ - Word16 md_sf = md_idx / JBM_CLDFB_SLOTS_IN_SUBFRAME; + Word16 md_sf = shr( md_idx, 2 ) /* md_idx / JBM_CLDFB_SLOTS_IN_SUBFRAME */; split_band = SPAR_DIRAC_SPLIT_START_BAND; - md_sf = ( num_md_sub_frames == MAX_PARAM_SPATIAL_SUBFRAMES ) ? md_sf : 0; + move16(); + if ( NE_16( num_md_sub_frames, MAX_PARAM_SPATIAL_SUBFRAMES ) ) + { + md_sf = 0; + move16(); + } IF( LT_16( split_band, IVAS_MAX_NUM_BANDS ) ) { Copy32( hSpar->hMdDec->mixer_mat_prev_fx[1][0][0], hSpar->hMdDec->mixer_mat_prev_fx[0][0][0], IVAS_MAX_FB_MIXER_OUT_CH * IVAS_MAX_SPAR_FB_MIXER_IN_CH * IVAS_MAX_NUM_BANDS ); @@ -3050,17 +3284,22 @@ void ivas_spar_dec_upmixer_sf_fx( { FOR( b = 0; b < num_spar_bands; b++ ) { - hSpar->hMdDec->mixer_mat_prev_fx[4][out_ch][in_ch][b] = hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][b + md_sf * IVAS_MAX_NUM_BANDS]; + hSpar->hMdDec->mixer_mat_prev_fx[4][out_ch][in_ch][b] = hSpar->hMdDec->mixer_mat_fx[out_ch][in_ch][add( b, i_mult( md_sf, IVAS_MAX_NUM_BANDS ) )]; + move32(); } } } - hSpar->i_subframe++; + hSpar->i_subframe = add( hSpar->i_subframe, 1 ); + move16(); hSpar->i_subframe = s_min( hSpar->i_subframe, MAX_PARAM_SPATIAL_SUBFRAMES ); + move16(); } } } - IF( NE_16( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_FOA ) && NE_16( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_STEREO ) && NE_16( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_MONO ) ) + test(); + test(); + IF( NE_32( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_FOA ) && NE_32( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_STEREO ) && NE_32( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_MONO ) ) { ivas_dirac_dec_render_sf_fx( st_ivas, output_fx, nchan_internal, cldfb_in_ts_re_fx, cldfb_in_ts_im_fx ); } @@ -3069,38 +3308,49 @@ void ivas_spar_dec_upmixer_sf_fx( { Word16 outchannels, idx_in, idx_lfe, ch; idx_in = 0; + move16(); idx_lfe = 0; + move16(); outchannels = add( st_ivas->hOutSetup.nchan_out_woLFE, st_ivas->hOutSetup.num_lfe ); FOR( ch = 0; ch < outchannels; ch++ ) { - IF( ( GT_16( st_ivas->hOutSetup.num_lfe, 0 ) ) && ( EQ_16( st_ivas->hOutSetup.index_lfe[idx_lfe], ch ) ) ) + test(); + IF( ( st_ivas->hOutSetup.num_lfe > 0 ) && ( EQ_16( st_ivas->hOutSetup.index_lfe[idx_lfe], ch ) ) ) { - set_zero_fx( output_fx[ch], hSpar->subframe_nbslots[hSpar->subframes_rendered] * num_cldfb_bands ); - IF( LT_16( idx_lfe, ( sub( st_ivas->hDirACRend->hOutSetup.num_lfe, 1 ) ) ) ) + set_zero_fx( output_fx[ch], i_mult( hSpar->subframe_nbslots[hSpar->subframes_rendered], num_cldfb_bands ) ); + if ( LT_16( idx_lfe, ( sub( st_ivas->hDirACRend->hOutSetup.num_lfe, 1 ) ) ) ) { - idx_lfe++; + idx_lfe = add( idx_lfe, 1 ); } } ELSE { - IF( ( EQ_16( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_FOA ) || !( EQ_16( st_ivas->hOutSetup.output_config, IVAS_AUDIO_CONFIG_BINAURAL ) || EQ_16( st_ivas->hOutSetup.output_config, IVAS_AUDIO_CONFIG_BINAURAL_ROOM_IR ) || EQ_16( st_ivas->hOutSetup.output_config, IVAS_AUDIO_CONFIG_BINAURAL_ROOM_REVERB ) ) ) && - !( EQ_16( st_ivas->ivas_format, SBA_ISM_FORMAT ) && EQ_16( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) && EQ_16( st_ivas->renderer_type, RENDERER_BINAURAL_FASTCONV_ROOM ) ) ) + test(); + test(); + test(); + test(); + test(); + test(); + IF( ( EQ_32( hDecoderConfig->output_config, IVAS_AUDIO_CONFIG_FOA ) || !( EQ_32( st_ivas->hOutSetup.output_config, IVAS_AUDIO_CONFIG_BINAURAL ) || EQ_32( st_ivas->hOutSetup.output_config, IVAS_AUDIO_CONFIG_BINAURAL_ROOM_IR ) || EQ_32( st_ivas->hOutSetup.output_config, IVAS_AUDIO_CONFIG_BINAURAL_ROOM_REVERB ) ) ) && + !( EQ_32( st_ivas->ivas_format, SBA_ISM_FORMAT ) && EQ_32( st_ivas->ism_mode, ISM_SBA_MODE_DISC ) && EQ_32( st_ivas->renderer_type, RENDERER_BINAURAL_FASTCONV_ROOM ) ) ) { Scale_sig32( st_ivas->cldfbSynDec[idx_in]->cldfb_state_fx, st_ivas->cldfbSynDec[idx_in]->p_filter_length, -6 ); st_ivas->cldfbSynDec[idx_in]->Q_cldfb_state = sub( st_ivas->cldfbSynDec[idx_in]->Q_cldfb_state, 6 ); + move16(); Scale_sig32( output_fx[ch], out_len, -6 ); FOR( ts = 0; ts < hSpar->subframe_nbslots[hSpar->subframes_rendered]; ts++ ) { - cldfbSynthesis_ivas_fx( &cldfb_in_ts_re_fx[idx_in][ts], &cldfb_in_ts_im_fx[idx_in][ts], &output_fx[ch][ts * num_cldfb_bands], num_cldfb_bands, st_ivas->cldfbSynDec[idx_in] ); + cldfbSynthesis_ivas_fx( &cldfb_in_ts_re_fx[idx_in][ts], &cldfb_in_ts_im_fx[idx_in][ts], &output_fx[ch][i_mult( ts, num_cldfb_bands )], num_cldfb_bands, st_ivas->cldfbSynDec[idx_in] ); } Scale_sig32( output_fx[ch], out_len, 6 ); Scale_sig32( st_ivas->cldfbSynDec[idx_in]->cldfb_state_fx, st_ivas->cldfbSynDec[idx_in]->p_filter_length, 6 ); st_ivas->cldfbSynDec[idx_in]->Q_cldfb_state = add( st_ivas->cldfbSynDec[idx_in]->Q_cldfb_state, 6 ); + move16(); } - idx_in++; + idx_in = add( idx_in, 1 ); } } } @@ -3111,19 +3361,23 @@ void ivas_spar_dec_upmixer_sf_fx( { Scale_sig32( st_ivas->cldfbSynDec[out_ch]->cldfb_state_fx, st_ivas->cldfbSynDec[out_ch]->p_filter_length, -6 ); st_ivas->cldfbSynDec[out_ch]->Q_cldfb_state = sub( st_ivas->cldfbSynDec[out_ch]->Q_cldfb_state, 6 ); + move16(); Scale_sig32( output_fx[out_ch], out_len, -6 ); FOR( ts = 0; ts < hSpar->subframe_nbslots[hSpar->subframes_rendered]; ts++ ) { - cldfbSynthesis_ivas_fx( &cldfb_in_ts_re_fx[out_ch][ts], &cldfb_in_ts_im_fx[out_ch][ts], &output_fx[out_ch][ts * num_cldfb_bands], num_cldfb_bands, st_ivas->cldfbSynDec[out_ch] ); + cldfbSynthesis_ivas_fx( &cldfb_in_ts_re_fx[out_ch][ts], &cldfb_in_ts_im_fx[out_ch][ts], &output_fx[out_ch][i_mult( ts, num_cldfb_bands )], num_cldfb_bands, st_ivas->cldfbSynDec[out_ch] ); } Scale_sig32( output_fx[out_ch], out_len, 6 ); Scale_sig32( st_ivas->cldfbSynDec[out_ch]->cldfb_state_fx, st_ivas->cldfbSynDec[out_ch]->p_filter_length, 6 ); st_ivas->cldfbSynDec[out_ch]->Q_cldfb_state = add( st_ivas->cldfbSynDec[out_ch]->Q_cldfb_state, 6 ); + move16(); } } hSpar->slots_rendered = add( hSpar->slots_rendered, hSpar->subframe_nbslots[hSpar->subframes_rendered] ); - hSpar->subframes_rendered++; + move16(); + hSpar->subframes_rendered = add( hSpar->subframes_rendered, 1 ); + move16(); pop_wmops(); diff --git a/lib_dec/ivas_spar_md_dec.c b/lib_dec/ivas_spar_md_dec.c index 6e7f24878..48cbad61d 100644 --- a/lib_dec/ivas_spar_md_dec.c +++ b/lib_dec/ivas_spar_md_dec.c @@ -69,7 +69,7 @@ static const int16_t ivas_spar_dec_plc_spatial_target[IVAS_SPAR_MAX_CH] = { 1, 0 *------------------------------------------------------------------------------------------*/ #ifdef IVAS_FLOAT_FIXED -static void ivas_get_spar_matrices_fx( ivas_spar_md_dec_state_t *hMdDec, const int16_t num_bands_out, const int16_t n_ts, const int16_t bw, const int16_t dtx_vad, const int16_t nB, const int16_t numch_out, const int16_t active_w_vlbr, const int16_t dyn_active_w_flag ); +static void ivas_get_spar_matrices_fx( ivas_spar_md_dec_state_t *hMdDec, const Word16 num_bands_out, const Word16 n_ts, const Word16 bw, const Word16 dtx_vad, const Word16 nB, const Word16 numch_out, const Word16 active_w_vlbr, const Word16 dyn_active_w_flag ); #else static void ivas_get_spar_matrices( ivas_spar_md_dec_state_t *hMdDec, const int16_t num_bands_out, const int16_t n_ts, const int16_t bw, const int16_t dtx_vad, const int16_t nB, const int16_t numch_out, const int16_t active_w_vlbr, const int16_t dyn_active_w_flag ); #endif @@ -83,7 +83,7 @@ static void ivas_fill_band_coeffs_idx( ivas_band_coeffs_ind_t *pBands_idx, const #ifndef IVAS_FLOAT_FIXED static void ivas_mat_col_rearrange( float in_re[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH], const int16_t order[IVAS_SPAR_MAX_CH], const int16_t i_ts, float ***mixer_mat, const int16_t bands, const int16_t num_ch ); #else -static void ivas_mat_col_rearrange_fx( Word32 in_re[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH], const int16_t order[IVAS_SPAR_MAX_CH], const int16_t i_ts, Word32 ***mixer_mat, const int16_t bands, const int16_t num_ch ); +static void ivas_mat_col_rearrange_fx( Word32 in_re[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH], const Word16 order[IVAS_SPAR_MAX_CH], const Word16 i_ts, Word32 ***mixer_mat, const Word16 bands, const Word16 num_ch ); #endif #ifndef IVAS_FLOAT_FIXED @@ -101,24 +101,33 @@ static void ivas_spar_md_fill_invalid_bands_fx( ivas_spar_dec_matrices_t *pSpar_ static void ivas_spar_md_fill_invalid_bands( ivas_spar_dec_matrices_t *pSpar_coeffs, ivas_spar_dec_matrices_t *pSpar_coeffs_prev, const int16_t *valid_bands, int16_t *base_band_age, const int16_t num_bands, const int16_t numch_out, const int16_t num_md_sub_frames ); #endif +#ifdef IVAS_FLOAT_FIXED +static void ivas_spar_md_fill_invalid_bandcoeffs( ivas_band_coeffs_t *pBand_coeffs, ivas_band_coeffs_t *pBand_coeffs_prev, const Word16 *valid_bands, Word16 *base_band_age, Word16 *first_valid_frame, const Word16 num_bands ); +#else static void ivas_spar_md_fill_invalid_bandcoeffs( ivas_band_coeffs_t *pBand_coeffs, ivas_band_coeffs_t *pBand_coeffs_prev, const int16_t *valid_bands, int16_t *base_band_age, int16_t *first_valid_frame, const int16_t num_bands ); +#endif + #ifdef IVAS_FLOAT_FIXED -static ivas_error ivas_spar_set_dec_config( ivas_spar_md_dec_state_t *hMdDec, const int16_t nchan_transport, Word32 *pFC ); +static ivas_error ivas_spar_set_dec_config( ivas_spar_md_dec_state_t *hMdDec, const Word16 nchan_transport, Word32 *pFC ); #else static ivas_error ivas_spar_set_dec_config( ivas_spar_md_dec_state_t *hMdDec, const int16_t nchan_transport, float *pFC ); #endif +#ifdef IVAS_FLOAT_FIXED +static void ivas_parse_parameter_bitstream_dtx( ivas_spar_md_t *pSpar_md, Decoder_State *st, const Word16 bw, const Word16 num_bands, Word16 *num_dmx_per_band, Word16 *num_dec_per_band ); +#else static void ivas_parse_parameter_bitstream_dtx( ivas_spar_md_t *pSpar_md, Decoder_State *st, const int16_t bw, const int16_t num_bands, int16_t *num_dmx_per_band, int16_t *num_dec_per_band ); +#endif #ifndef IVAS_FLOAT_FIXED static ivas_error ivas_deindex_real_index( const int16_t *index, const int16_t q_levels, const float min_value, const float max_value, float *quant, const int16_t num_ch_dim2 ); #endif #ifdef IVAS_FLOAT_FIXED -static ivas_error ivas_deindex_real_index_fx( const int16_t *index, const int16_t q_levels, const Word32 min_value, const Word32 max_value, Word32 *quant, const int16_t num_ch_dim2 ); +static ivas_error ivas_deindex_real_index_fx( const Word16 *index, const Word16 q_levels, const Word32 min_value, const Word32 max_value, Word32 *quant, const Word16 num_ch_dim2 ); #endif #ifdef IVAS_FLOAT_FIXED -static void ivas_spar_dec_parse_md_bs_fx( ivas_spar_md_dec_state_t *hMdDec, Decoder_State *st, int16_t *nB, int16_t *bands_bw, int16_t *dtx_vad, const int32_t ivas_total_brate, const int16_t sba_inactive_mode ); +static void ivas_spar_dec_parse_md_bs_fx( ivas_spar_md_dec_state_t *hMdDec, Decoder_State *st, Word16 *nB, Word16 *bands_bw, Word16 *dtx_vad, const Word32 ivas_total_brate, const Word16 sba_inactive_mode ); #else static void ivas_spar_dec_parse_md_bs( ivas_spar_md_dec_state_t *hMdDec, Decoder_State *st, int16_t *nB, int16_t *bands_bw, int16_t *dtx_vad, const int32_t ivas_total_brate, const int16_t sba_inactive_mode ); #endif @@ -299,90 +308,90 @@ ivas_error ivas_spar_md_dec_matrix_open( #else ivas_error ivas_spar_md_dec_matrix_open_fx( ivas_spar_md_dec_state_t *hMdDec, /* i/o: SPAR MD decoder handle */ - const int16_t num_channels, /* i : number of internal channels */ - const int16_t num_md_sub_frames /* i : number of MD subframes */ + const Word16 num_channels, /* i : number of internal channels */ + const Word16 num_md_sub_frames /* i : number of MD subframes */ ) { - int16_t i, j; - int16_t k; - if ( ( hMdDec->spar_md.band_coeffs = (ivas_band_coeffs_t *) malloc( IVAS_MAX_NUM_BANDS * num_md_sub_frames * sizeof( ivas_band_coeffs_t ) ) ) == NULL ) + Word16 i, j; + Word16 k; + IF( ( hMdDec->spar_md.band_coeffs = (ivas_band_coeffs_t *) malloc( i_mult( i_mult( IVAS_MAX_NUM_BANDS, num_md_sub_frames ), sizeof( ivas_band_coeffs_t ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for band_coeffs in SPAR MD" ); } - if ( ( hMdDec->band_coeffs_prev = (ivas_band_coeffs_t *) malloc( IVAS_MAX_NUM_BANDS * sizeof( ivas_band_coeffs_t ) ) ) == NULL ) + IF( ( hMdDec->band_coeffs_prev = (ivas_band_coeffs_t *) malloc( IVAS_MAX_NUM_BANDS * sizeof( ivas_band_coeffs_t ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for band_coeffs in SPAR MD" ); } - if ( ( hMdDec->mixer_mat_fx = (Word32 ***) malloc( num_channels * sizeof( Word32 ** ) ) ) == NULL ) + IF( ( hMdDec->mixer_mat_fx = (Word32 ***) malloc( i_mult( num_channels, sizeof( Word32 ** ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } - for ( i = 0; i < num_channels; i++ ) + FOR( i = 0; i < num_channels; i++ ) { - if ( ( hMdDec->mixer_mat_fx[i] = (Word32 **) malloc( num_channels * sizeof( Word32 * ) ) ) == NULL ) + IF( ( hMdDec->mixer_mat_fx[i] = (Word32 **) malloc( i_mult( num_channels, sizeof( Word32 * ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } - for ( j = 0; j < num_channels; j++ ) + FOR( j = 0; j < num_channels; j++ ) { - if ( ( hMdDec->mixer_mat_fx[i][j] = (Word32 *) malloc( num_md_sub_frames * IVAS_MAX_NUM_BANDS * sizeof( Word32 ) ) ) == NULL ) + IF( ( hMdDec->mixer_mat_fx[i][j] = (Word32 *) malloc( i_mult( i_mult( num_md_sub_frames, IVAS_MAX_NUM_BANDS ), sizeof( Word32 ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } } } - if ( ( hMdDec->spar_coeffs.C_re_fx = (Word32 ***) malloc( num_channels * sizeof( Word32 ** ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs.C_re_fx = (Word32 ***) malloc( i_mult( num_channels, sizeof( Word32 ** ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } - for ( i = 0; i < num_channels; i++ ) + FOR( i = 0; i < num_channels; i++ ) { - if ( ( hMdDec->spar_coeffs.C_re_fx[i] = (Word32 **) malloc( num_channels * sizeof( Word32 * ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs.C_re_fx[i] = (Word32 **) malloc( i_mult( num_channels, sizeof( Word32 * ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } - for ( j = 0; j < num_channels; j++ ) + FOR( j = 0; j < num_channels; j++ ) { - if ( ( hMdDec->spar_coeffs.C_re_fx[i][j] = (Word32 *) malloc( num_md_sub_frames * IVAS_MAX_NUM_BANDS * sizeof( Word32 ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs.C_re_fx[i][j] = (Word32 *) malloc( i_mult( i_mult( num_md_sub_frames, IVAS_MAX_NUM_BANDS ), sizeof( Word32 ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } } } - if ( ( hMdDec->spar_coeffs.P_re_fx = (Word32 ***) malloc( num_channels * sizeof( Word32 ** ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs.P_re_fx = (Word32 ***) malloc( i_mult( num_channels, sizeof( Word32 ** ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } - for ( i = 0; i < num_channels; i++ ) + FOR( i = 0; i < num_channels; i++ ) { - if ( ( hMdDec->spar_coeffs.P_re_fx[i] = (Word32 **) malloc( num_channels * sizeof( Word32 * ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs.P_re_fx[i] = (Word32 **) malloc( i_mult( num_channels, sizeof( Word32 * ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } - for ( j = 0; j < num_channels; j++ ) + FOR( j = 0; j < num_channels; j++ ) { - if ( ( hMdDec->spar_coeffs.P_re_fx[i][j] = (Word32 *) malloc( num_md_sub_frames * IVAS_MAX_NUM_BANDS * sizeof( Word32 ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs.P_re_fx[i][j] = (Word32 *) malloc( i_mult( num_md_sub_frames, IVAS_MAX_NUM_BANDS * sizeof( Word32 ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } } } - if ( ( hMdDec->spar_coeffs_prev.C_re_fx = (Word32 ***) malloc( num_channels * sizeof( Word32 ** ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs_prev.C_re_fx = (Word32 ***) malloc( i_mult( num_channels, sizeof( Word32 ** ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } - for ( i = 0; i < num_channels; i++ ) + FOR( i = 0; i < num_channels; i++ ) { - if ( ( hMdDec->spar_coeffs_prev.C_re_fx[i] = (Word32 **) malloc( num_channels * sizeof( Word32 * ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs_prev.C_re_fx[i] = (Word32 **) malloc( i_mult( num_channels, sizeof( Word32 * ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } - for ( j = 0; j < num_channels; j++ ) + FOR( j = 0; j < num_channels; j++ ) { - if ( ( hMdDec->spar_coeffs_prev.C_re_fx[i][j] = (Word32 *) malloc( IVAS_MAX_NUM_BANDS * sizeof( Word32 ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs_prev.C_re_fx[i][j] = (Word32 *) malloc( IVAS_MAX_NUM_BANDS * sizeof( Word32 ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } @@ -390,73 +399,77 @@ ivas_error ivas_spar_md_dec_matrix_open_fx( } // Fix Memory - if ( ( hMdDec->spar_coeffs_prev.P_re_fx = (Word32 ***) malloc( num_channels * sizeof( Word32 ** ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs_prev.P_re_fx = (Word32 ***) malloc( i_mult( num_channels, sizeof( Word32 ** ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } - for ( i = 0; i < num_channels; i++ ) + FOR( i = 0; i < num_channels; i++ ) { - if ( ( hMdDec->spar_coeffs_prev.P_re_fx[i] = (Word32 **) malloc( num_channels * sizeof( Word32 * ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs_prev.P_re_fx[i] = (Word32 **) malloc( i_mult( num_channels, sizeof( Word32 * ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } - for ( j = 0; j < num_channels; j++ ) + FOR( j = 0; j < num_channels; j++ ) { - if ( ( hMdDec->spar_coeffs_prev.P_re_fx[i][j] = (Word32 *) malloc( IVAS_MAX_NUM_BANDS * sizeof( Word32 ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs_prev.P_re_fx[i][j] = (Word32 *) malloc( IVAS_MAX_NUM_BANDS * sizeof( Word32 ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } } } // Fix Memory - if ( ( hMdDec->spar_coeffs_tar.C_re_fx = (Word32 ***) malloc( num_channels * sizeof( Word32 ** ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs_tar.C_re_fx = (Word32 ***) malloc( i_mult( num_channels, sizeof( Word32 ** ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } - for ( i = 0; i < num_channels; i++ ) + FOR( i = 0; i < num_channels; i++ ) { - if ( ( hMdDec->spar_coeffs_tar.C_re_fx[i] = (Word32 **) malloc( num_channels * sizeof( Word32 * ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs_tar.C_re_fx[i] = (Word32 **) malloc( i_mult( num_channels, sizeof( Word32 * ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } - for ( j = 0; j < num_channels; j++ ) + FOR( j = 0; j < num_channels; j++ ) { - if ( ( hMdDec->spar_coeffs_tar.C_re_fx[i][j] = (Word32 *) malloc( IVAS_MAX_NUM_BANDS * sizeof( Word32 ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs_tar.C_re_fx[i][j] = (Word32 *) malloc( IVAS_MAX_NUM_BANDS * sizeof( Word32 ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } } } // Fix Memory - if ( ( hMdDec->spar_coeffs_tar.P_re_fx = (Word32 ***) malloc( num_channels * sizeof( Word32 ** ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs_tar.P_re_fx = (Word32 ***) malloc( i_mult( num_channels, sizeof( Word32 ** ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } - for ( i = 0; i < num_channels; i++ ) + FOR( i = 0; i < num_channels; i++ ) { - if ( ( hMdDec->spar_coeffs_tar.P_re_fx[i] = (Word32 **) malloc( num_channels * sizeof( Word32 * ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs_tar.P_re_fx[i] = (Word32 **) malloc( i_mult( num_channels, sizeof( Word32 * ) ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } - for ( j = 0; j < num_channels; j++ ) + FOR( j = 0; j < num_channels; j++ ) { - if ( ( hMdDec->spar_coeffs_tar.P_re_fx[i][j] = (Word32 *) malloc( IVAS_MAX_NUM_BANDS * sizeof( Word32 ) ) ) == NULL ) + IF( ( hMdDec->spar_coeffs_tar.P_re_fx[i][j] = (Word32 *) malloc( IVAS_MAX_NUM_BANDS * sizeof( Word32 ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD matrix" ); } } } - for ( i = 0; i < num_channels; i++ ) + FOR( i = 0; i < num_channels; i++ ) { - for ( j = 0; j < num_channels; j++ ) + FOR( j = 0; j < num_channels; j++ ) { - for ( k = 0; k < IVAS_MAX_NUM_BANDS; k++ ) + FOR( k = 0; k < IVAS_MAX_NUM_BANDS; k++ ) { hMdDec->spar_coeffs_prev.C_re_fx[i][j][k] = 0; + move32(); hMdDec->spar_coeffs_prev.P_re_fx[i][j][k] = 0; + move32(); hMdDec->spar_coeffs_tar.C_re_fx[i][j][k] = 0; + move32(); hMdDec->spar_coeffs_tar.P_re_fx[i][j][k] = 0; + move32(); } } } @@ -485,7 +498,7 @@ Word16 ivas_get_spar_dec_md_num_subframes( move16(); IF( GT_16( sba_order, SBA_FOA_ORDER ) ) { - IF( GE_32( ivas_total_brate, IVAS_512k ) ) + if ( GE_32( ivas_total_brate, IVAS_512k ) ) { num_subframes = 1; move16(); @@ -495,7 +508,7 @@ Word16 ivas_get_spar_dec_md_num_subframes( test(); test(); test(); - IF( ( LE_32( ivas_total_brate, IVAS_SID_5k2 ) && LT_32( ivas_last_active_brate, IVAS_24k4 ) ) || ( GT_32( ivas_total_brate, IVAS_SID_5k2 ) && LT_32( ivas_total_brate, IVAS_24k4 ) ) ) + if ( ( LE_32( ivas_total_brate, IVAS_SID_5k2 ) && LT_32( ivas_last_active_brate, IVAS_24k4 ) ) || ( GT_32( ivas_total_brate, IVAS_SID_5k2 ) && LT_32( ivas_total_brate, IVAS_24k4 ) ) ) { num_subframes = 1; move16(); @@ -541,51 +554,51 @@ int16_t ivas_get_spar_dec_md_num_subframes( ivas_error ivas_spar_md_dec_open( ivas_spar_md_dec_state_t **hMdDec_out, /* i/o: SPAR MD decoder handle */ const DECODER_CONFIG_HANDLE hDecoderConfig, /* i : configuration structure */ - const int16_t num_channels, /* i : number of internal channels */ - const int16_t sba_order, /* i : SBA order */ - const int16_t sid_format, /* i : SID format */ - const int32_t last_active_ivas_total_brate /* i : IVAS last active bitrate */ + const Word16 num_channels, /* i : number of internal channels */ + const Word16 sba_order, /* i : SBA order */ + const Word16 sid_format, /* i : SID format */ + const Word32 last_active_ivas_total_brate /* i : IVAS last active bitrate */ ) { ivas_spar_md_dec_state_t *hMdDec; ivas_error error; - int16_t num_md_sub_frames; + Word16 num_md_sub_frames; error = IVAS_ERR_OK; + move32(); - if ( ( hMdDec = (ivas_spar_md_dec_state_t *) malloc( sizeof( ivas_spar_md_dec_state_t ) ) ) == NULL ) + IF( ( hMdDec = (ivas_spar_md_dec_state_t *) malloc( sizeof( ivas_spar_md_dec_state_t ) ) ) == NULL ) { return IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for SPAR MD decoder" ); } num_md_sub_frames = ivas_get_spar_dec_md_num_subframes( sba_order, hDecoderConfig->ivas_total_brate, last_active_ivas_total_brate ); -#ifndef IVAS_FLOAT_FIXED - if ( ( error = ivas_spar_md_dec_matrix_open( hMdDec, num_channels, num_md_sub_frames ) ) != IVAS_ERR_OK ) -#else - if ( ( error = ivas_spar_md_dec_matrix_open_fx( hMdDec, num_channels, num_md_sub_frames ) ) != IVAS_ERR_OK ) -#endif + IF( NE_32( ( error = ivas_spar_md_dec_matrix_open_fx( hMdDec, num_channels, num_md_sub_frames ) ), IVAS_ERR_OK ) ) { return error; } - if ( hDecoderConfig->ivas_total_brate == IVAS_SID_5k2 ) + IF( EQ_32( hDecoderConfig->ivas_total_brate, IVAS_SID_5k2 ) ) { - if ( sid_format == SID_SBA_2TC ) + IF( EQ_16( sid_format, SID_SBA_2TC ) ) { hMdDec->table_idx = ivas_get_spar_table_idx( IVAS_48k, sba_order, SPAR_CONFIG_BW, NULL, NULL ); + move16(); } - else + ELSE { hMdDec->table_idx = ivas_get_spar_table_idx( IVAS_24k4, sba_order, SPAR_CONFIG_BW, NULL, NULL ); + move16(); } } - else + ELSE { hMdDec->table_idx = ivas_get_spar_table_idx( hDecoderConfig->ivas_total_brate, sba_order, SPAR_CONFIG_BW, NULL, NULL ); + move16(); } - if ( ( error = ivas_spar_md_dec_init( hMdDec, hDecoderConfig, num_channels, sba_order ) ) != IVAS_ERR_OK ) + IF( NE_32( ( error = ivas_spar_md_dec_init( hMdDec, hDecoderConfig, num_channels, sba_order ) ), IVAS_ERR_OK ) ) { return error; } @@ -718,27 +731,27 @@ void ivas_spar_md_dec_matrix_close( #else void ivas_spar_md_dec_matrix_close_fx( ivas_spar_md_dec_state_t *hMdDecoder, /* i/o: SPAR MD decoder handle */ - const int16_t num_channels /* i : number of internal channels */ + const Word16 num_channels /* i : number of internal channels */ ) { - int16_t i, j; + Word16 i, j; - if ( hMdDecoder->spar_md.band_coeffs != NULL ) + IF( hMdDecoder->spar_md.band_coeffs != NULL ) { free( hMdDecoder->spar_md.band_coeffs ); hMdDecoder->spar_md.band_coeffs = NULL; } - if ( hMdDecoder->band_coeffs_prev != NULL ) + IF( hMdDecoder->band_coeffs_prev != NULL ) { free( hMdDecoder->band_coeffs_prev ); hMdDecoder->band_coeffs_prev = NULL; } - if ( hMdDecoder->mixer_mat_fx != NULL ) + IF( hMdDecoder->mixer_mat_fx != NULL ) { - for ( i = 0; i < num_channels; i++ ) + FOR( i = 0; i < num_channels; i++ ) { - for ( j = 0; j < num_channels; j++ ) + FOR( j = 0; j < num_channels; j++ ) { free( hMdDecoder->mixer_mat_fx[i][j] ); } @@ -746,11 +759,11 @@ void ivas_spar_md_dec_matrix_close_fx( } free( hMdDecoder->mixer_mat_fx ); } - if ( hMdDecoder->spar_coeffs.C_re_fx != NULL ) + IF( hMdDecoder->spar_coeffs.C_re_fx != NULL ) { - for ( i = 0; i < num_channels; i++ ) + FOR( i = 0; i < num_channels; i++ ) { - for ( j = 0; j < num_channels; j++ ) + FOR( j = 0; j < num_channels; j++ ) { free( hMdDecoder->spar_coeffs.C_re_fx[i][j] ); } @@ -758,11 +771,11 @@ void ivas_spar_md_dec_matrix_close_fx( } free( hMdDecoder->spar_coeffs.C_re_fx ); } - if ( hMdDecoder->spar_coeffs.P_re_fx != NULL ) + IF( hMdDecoder->spar_coeffs.P_re_fx != NULL ) { - for ( i = 0; i < num_channels; i++ ) + FOR( i = 0; i < num_channels; i++ ) { - for ( j = 0; j < num_channels; j++ ) + FOR( j = 0; j < num_channels; j++ ) { free( hMdDecoder->spar_coeffs.P_re_fx[i][j] ); } @@ -771,11 +784,11 @@ void ivas_spar_md_dec_matrix_close_fx( free( hMdDecoder->spar_coeffs.P_re_fx ); } - if ( hMdDecoder->spar_coeffs_prev.C_re_fx != NULL ) + IF( hMdDecoder->spar_coeffs_prev.C_re_fx != NULL ) { - for ( i = 0; i < num_channels; i++ ) + FOR( i = 0; i < num_channels; i++ ) { - for ( j = 0; j < num_channels; j++ ) + FOR( j = 0; j < num_channels; j++ ) { free( hMdDecoder->spar_coeffs_prev.C_re_fx[i][j] ); } @@ -784,11 +797,11 @@ void ivas_spar_md_dec_matrix_close_fx( free( hMdDecoder->spar_coeffs_prev.C_re_fx ); } - if ( hMdDecoder->spar_coeffs_prev.P_re_fx != NULL ) + IF( hMdDecoder->spar_coeffs_prev.P_re_fx != NULL ) { - for ( i = 0; i < num_channels; i++ ) + FOR( i = 0; i < num_channels; i++ ) { - for ( j = 0; j < num_channels; j++ ) + FOR( j = 0; j < num_channels; j++ ) { free( hMdDecoder->spar_coeffs_prev.P_re_fx[i][j] ); } @@ -797,11 +810,11 @@ void ivas_spar_md_dec_matrix_close_fx( free( hMdDecoder->spar_coeffs_prev.P_re_fx ); } - if ( hMdDecoder->spar_coeffs_tar.C_re_fx != NULL ) + IF( hMdDecoder->spar_coeffs_tar.C_re_fx != NULL ) { - for ( i = 0; i < num_channels; i++ ) + FOR( i = 0; i < num_channels; i++ ) { - for ( j = 0; j < num_channels; j++ ) + FOR( j = 0; j < num_channels; j++ ) { free( hMdDecoder->spar_coeffs_tar.C_re_fx[i][j] ); } @@ -809,11 +822,11 @@ void ivas_spar_md_dec_matrix_close_fx( } free( hMdDecoder->spar_coeffs_tar.C_re_fx ); } - if ( hMdDecoder->spar_coeffs_tar.P_re_fx != NULL ) + IF( hMdDecoder->spar_coeffs_tar.P_re_fx != NULL ) { - for ( i = 0; i < num_channels; i++ ) + FOR( i = 0; i < num_channels; i++ ) { - for ( j = 0; j < num_channels; j++ ) + FOR( j = 0; j < num_channels; j++ ) { free( hMdDecoder->spar_coeffs_tar.P_re_fx[i][j] ); } @@ -837,16 +850,13 @@ void ivas_spar_md_dec_close( ) { ivas_spar_md_dec_state_t *hMdDecoder; - int16_t num_channels; + Word16 num_channels; hMdDecoder = *hMdDec; num_channels = hMdDecoder->spar_md_cfg.num_umx_chs; + move16(); -#ifndef IVAS_FLOAT_FIXED - ivas_spar_md_dec_matrix_close( hMdDecoder, num_channels ); -#else ivas_spar_md_dec_matrix_close_fx( hMdDecoder, num_channels ); -#endif free( *hMdDec ); *hMdDec = NULL; @@ -961,15 +971,12 @@ ivas_error ivas_spar_md_dec_init( ivas_error ivas_spar_md_dec_init( ivas_spar_md_dec_state_t *hMdDec, /* i/o: SPAR MD decoder handle */ const DECODER_CONFIG_HANDLE hDecoderConfig, /* i : configuration structure */ - const int16_t num_channels, /* i : number of internal channels */ - const int16_t sba_order /* i : SBA order */ + const Word16 num_channels, /* i : number of internal channels */ + const Word16 sba_order /* i : SBA order */ ) { - int16_t i, j; - int16_t nchan_transport; -#ifndef IVAS_FLOAT_FIXED - Word32 pFC[IVAS_MAX_NUM_BANDS]; -#endif + Word16 i, j; + Word16 nchan_transport; Word32 *pFC_fx = NULL, PR_minmax_fx[2]; ivas_error error; @@ -977,20 +984,21 @@ ivas_error ivas_spar_md_dec_init( ivas_sba_get_spar_hoa_ch_ind_fx( num_channels, hDecoderConfig->ivas_total_brate, hMdDec->HOA_md_ind ); - hMdDec->spar_md.num_bands = ( hMdDec->spar_hoa_md_flag ) ? IVAS_MAX_NUM_BANDS : min( IVAS_MAX_NUM_BANDS, SPAR_DIRAC_SPLIT_START_BAND ); + IF( hMdDec->spar_hoa_md_flag ) + { + hMdDec->spar_md.num_bands = IVAS_MAX_NUM_BANDS; + move16(); + } + ELSE + { + hMdDec->spar_md.num_bands = min( IVAS_MAX_NUM_BANDS, SPAR_DIRAC_SPLIT_START_BAND ); + move16(); + } ivas_spar_set_bitrate_config_fx( &hMdDec->spar_md_cfg, hMdDec->table_idx, hMdDec->spar_md.num_bands, hMdDec->spar_hoa_dirac2spar_md_flag, 0, 0, 0 ); nchan_transport = hMdDec->spar_md_cfg.nchan_transport; - - -#ifndef IVAS_FLOAT_FIXED - /* get FB coefficients */ - FOR( i = 0; i < IVAS_MAX_NUM_BANDS; i++ ) - { - pFC[i] = L_shr( Mpy_32_32( ivas_fb_fcs_12band_1ms_fx[i], hDecoderConfig->output_Fs ), 1 ); // Q0 - } -#endif + move16(); IF( EQ_32( hDecoderConfig->output_Fs, 8000 ) ) { @@ -1012,86 +1020,78 @@ ivas_error ivas_spar_md_dec_init( { pFC_fx = pFC_48k; } - else + ELSE { assert( 0 ); // update sample rate } -#ifdef IVAS_FLOAT_FIXED - if ( ( error = ivas_spar_set_dec_config( hMdDec, nchan_transport, pFC_fx ) ) != IVAS_ERR_OK ) -#else - if ( ( error = ivas_spar_set_dec_config( hMdDec, nchan_transport, pFC ) ) != IVAS_ERR_OK ) -#endif + IF( NE_32( ( error = ivas_spar_set_dec_config( hMdDec, nchan_transport, pFC_fx ) ), IVAS_ERR_OK ) ) { return error; } - if ( nchan_transport != 2 && ( ( hMdDec->spar_md_cfg.remix_unmix_order == 2 ) || ( hMdDec->spar_md_cfg.remix_unmix_order == 1 ) ) ) + test(); + test(); + IF( NE_16( nchan_transport, 2 ) && ( EQ_16( hMdDec->spar_md_cfg.remix_unmix_order, 2 ) || EQ_16( hMdDec->spar_md_cfg.remix_unmix_order, 1 ) ) ) { return IVAS_ERR_INTERNAL; } /* DTX quant init */ PR_minmax_fx[0] = hMdDec->spar_md_cfg.quant_strat[0].PR.min_fx; + move32(); PR_minmax_fx[1] = hMdDec->spar_md_cfg.quant_strat[0].PR.max_fx; + move32(); ivas_spar_quant_dtx_init_fx( &hMdDec->spar_md, PR_minmax_fx ); ivas_spar_arith_coeffs_com_init_fx( &hMdDec->arith_coeffs, &hMdDec->spar_md_cfg, hMdDec->table_idx, DEC ); ivas_spar_huff_coeffs_com_init_fx( &hMdDec->huff_coeffs, &hMdDec->spar_md_cfg, hMdDec->table_idx, DEC ); hMdDec->spar_md_cfg.prev_quant_idx = -1; + move16(); /* initialize PLC state */ - set_s( hMdDec->valid_bands, 0, IVAS_MAX_NUM_BANDS ); - set_s( hMdDec->base_band_age, 0, IVAS_MAX_NUM_BANDS ); - set_s( hMdDec->base_band_coeffs_age, 0, IVAS_MAX_NUM_BANDS ); + set16_fx( hMdDec->valid_bands, 0, IVAS_MAX_NUM_BANDS ); + set16_fx( hMdDec->base_band_age, 0, IVAS_MAX_NUM_BANDS ); + set16_fx( hMdDec->base_band_coeffs_age, 0, IVAS_MAX_NUM_BANDS ); hMdDec->spar_plc_num_lost_frames = 0; + move16(); hMdDec->spar_plc_enable_fadeout_flag = 1; + move16(); hMdDec->dtx_md_smoothing_cntr = 1; + move16(); -#ifndef IVAS_FLOAT_FIXED - ivas_clear_band_coeffs( hMdDec->spar_md.band_coeffs, IVAS_MAX_NUM_BANDS ); - ivas_clear_band_coeffs( hMdDec->band_coeffs_prev, IVAS_MAX_NUM_BANDS ); -#else ivas_clear_band_coeffs_fx( hMdDec->spar_md.band_coeffs, IVAS_MAX_NUM_BANDS ); ivas_clear_band_coeffs_fx( hMdDec->band_coeffs_prev, IVAS_MAX_NUM_BANDS ); -#endif + ivas_clear_band_coeff_idx( hMdDec->spar_md.band_coeffs_idx, IVAS_MAX_NUM_BANDS ); ivas_clear_band_coeff_idx( hMdDec->spar_md_prev.band_coeffs_idx, IVAS_MAX_NUM_BANDS ); ivas_clear_band_coeff_idx( hMdDec->spar_md_prev.band_coeffs_idx_mapped, IVAS_MAX_NUM_BANDS ); hMdDec->spar_md.dtx_vad = 0; + move16(); hMdDec->td_decorr_flag = 1; + move16(); set32_fx( hMdDec->spar_md.en_ratio_slow_fx, 0, IVAS_MAX_NUM_BANDS ); set32_fx( hMdDec->spar_md.ref_pow_slow_fx, 0, IVAS_MAX_NUM_BANDS ); set16_fx( hMdDec->smooth_fac_fx, 0, IVAS_MAX_NUM_BANDS ); -#ifndef IVAS_FLOAT_FIXED - set_f( hMdDec->spar_md.en_ratio_slow, 0.0f, IVAS_MAX_NUM_BANDS ); - set_f( hMdDec->spar_md.ref_pow_slow, 0.0f, IVAS_MAX_NUM_BANDS ); - set_zero( hMdDec->smooth_fac, IVAS_MAX_NUM_BANDS ); -#endif - for ( i = 0; i < IVAS_MAX_NUM_BANDS; i++ ) + FOR( i = 0; i < IVAS_MAX_NUM_BANDS; i++ ) { set32_fx( hMdDec->smooth_buf_fx[i], 0, 2 * SBA_DIRAC_NRG_SMOOTH_LONG + 1 ); -#ifndef IVAS_FLOAT_FIXED - set_zero( hMdDec->smooth_buf[i], 2 * SBA_DIRAC_NRG_SMOOTH_LONG + 1 ); -#endif } - for ( i = 0; i < IVAS_SPAR_MAX_CH; i++ ) + FOR( i = 0; i < IVAS_SPAR_MAX_CH; i++ ) { - for ( j = 0; j < IVAS_SPAR_MAX_CH; j++ ) + FOR( j = 0; j < IVAS_SPAR_MAX_CH; j++ ) { set32_fx( hMdDec->mixer_mat_prev2_fx[i][j], 0, IVAS_MAX_NUM_BANDS ); -#ifndef IVAS_FLOAT_FIXED - set_zero( hMdDec->mixer_mat_prev2[i][j], IVAS_MAX_NUM_BANDS ); -#endif } } hMdDec->first_valid_frame = 1; + move16(); return IVAS_ERR_OK; } @@ -1107,53 +1107,64 @@ ivas_error ivas_spar_md_dec_init( #ifdef IVAS_FLOAT_FIXED static ivas_error ivas_spar_set_dec_config( ivas_spar_md_dec_state_t *hMdDec, - const int16_t nchan_transport, + const Word16 nchan_transport, Word32 *pFC ) { - int16_t i, j, nchan, dmx_ch; + Word16 i, j, nchan, dmx_ch; - for ( i = 0; i < nchan_transport; i++ ) + FOR( i = 0; i < nchan_transport; i++ ) { hMdDec->spar_md_cfg.max_freq_per_chan[i] = ivas_spar_br_table_consts[hMdDec->table_idx].fpcs; + move16(); } nchan = ivas_sba_get_nchan_metadata_fx( ivas_spar_br_table_consts[hMdDec->table_idx].sba_order, ivas_spar_br_table_consts[hMdDec->table_idx].ivas_total_brate ); - switch ( nchan ) + SWITCH( nchan ) { case 4: /* FOA_CHANNELS */ hMdDec->num_decorr = IVAS_TD_DECORR_OUT_3CH; - break; + move16(); + BREAK; case 9: /* IVAS_HOA_2_CH */ hMdDec->num_decorr = IVAS_TD_DECORR_OUT_5CH; - break; + move16(); + BREAK; case 6: /* IVAS_HOA_2_CH */ hMdDec->num_decorr = IVAS_TD_DECORR_OUT_2CH; - break; + move16(); + BREAK; case 8: /* IVAS_HOA_3_CH */ hMdDec->num_decorr = IVAS_TD_DECORR_OUT_4CH; - break; + move16(); + BREAK; } hMdDec->spar_md_cfg.num_umx_chs = nchan; + move16(); dmx_ch = 0; - for ( i = 0; i < IVAS_MAX_NUM_BANDS; i++ ) + move16(); + FOR( i = 0; i < IVAS_MAX_NUM_BANDS; i++ ) { dmx_ch = 0; - for ( j = 0; j < nchan_transport; j++ ) + move16(); + FOR( j = 0; j < nchan_transport; j++ ) { - if ( pFC[i] < hMdDec->spar_md_cfg.max_freq_per_chan[j] ) + if ( LT_32( pFC[i], hMdDec->spar_md_cfg.max_freq_per_chan[j] ) ) { - dmx_ch += 1; + dmx_ch = add( dmx_ch, 1 ); } } hMdDec->spar_md_cfg.num_dmx_chans_per_band[i] = hMdDec->spar_md_cfg.nchan_transport; - hMdDec->spar_md_cfg.num_decorr_per_band[i] = nchan - hMdDec->spar_md_cfg.nchan_transport; + move16(); + hMdDec->spar_md_cfg.num_decorr_per_band[i] = sub( nchan, hMdDec->spar_md_cfg.nchan_transport ); + move16(); } hMdDec->spar_md_cfg.nchan_transport = dmx_ch; + move16(); return IVAS_ERR_OK; } @@ -1296,9 +1307,9 @@ static void ivas_dec_mono_sba_handling_fx( test(); test(); - IF( - ( NE_32( azimuth_fx, 0 ) ) || - ( NE_32( elevation_fx, 0 ) ) || + if ( + ( ( azimuth_fx != 0 ) ) || + ( ( elevation_fx != 0 ) ) || ( GT_32( energy_ratio_fx, 161061274 /* 0.15f in Q30 */ ) ) ) /* 0.15f is just above the lowest quantised value. */ { mono_flag = 0; @@ -1308,6 +1319,7 @@ static void ivas_dec_mono_sba_handling_fx( } /* Combine the SPAR prediction coefs flag with the azimuth, elevation and energy ratio flag.*/ + test(); mono_flag = mono_flag && ivas_spar_chk_zero_coefs_fx( st_ivas ); IF( mono_flag ) @@ -1361,7 +1373,16 @@ void ivas_spar_md_dec_process_fx( hMdDec = st_ivas->hSpar->hMdDec; - active_w_vlbr = ( st_ivas->hDecoderConfig->ivas_total_brate < IVAS_24k4 ) ? 1 : 0; + IF( LT_32( st_ivas->hDecoderConfig->ivas_total_brate, IVAS_24k4 ) ) + { + active_w_vlbr = 1; + move16(); + } + ELSE + { + active_w_vlbr = 0; + move16(); + } num_md_chs = ivas_sba_get_nchan_metadata_fx( sba_order, st_ivas->hDecoderConfig->ivas_total_brate ); @@ -1389,7 +1410,7 @@ void ivas_spar_md_dec_process_fx( } } } - else + ELSE { dyn_active_w_flag = 0; move16(); @@ -1399,23 +1420,6 @@ void ivas_spar_md_dec_process_fx( } } -#if 0 // ndef IVAS_FLOAT_FIXED_TO_BE_REMOVED - FOR (Word16 i = 0; i < IVAS_MAX_NUM_BANDS; i++) - { - FOR (Word16 ii = 0; ii < IVAS_SPAR_MAX_CH - IVAS_SPAR_MAX_DMX_CHS; ii++) - { - FOR (Word16 jj = 0; jj < IVAS_SPAR_MAX_DMX_CHS - 1; jj++) - { - hMdDec->spar_md.band_coeffs[i].C_re_fx[ii][jj] = (Word32)(hMdDec->spar_md.band_coeffs[i].C_re[ii][jj] * (1 << 22)); - } - } - FOR (Word16 jj = 0; jj < IVAS_SPAR_MAX_DMX_CHS - 1; jj++) - { - hMdDec->spar_md.band_coeffs[i].pred_re_fx[jj] = (Word32)(hMdDec->spar_md.band_coeffs[i].pred_re[jj] * (1 << 22)); - hMdDec->spar_md.band_coeffs[i].P_re_fx[jj] = (Word32)(hMdDec->spar_md.band_coeffs[i].P_re[jj] * (1 << 22)); - } - } -#endif ivas_spar_dec_parse_md_bs_fx( hMdDec, st0, &nB, &bw, &dtx_vad, st_ivas->hDecoderConfig->ivas_total_brate, st_ivas->hQMetaData->sba_inactive_mode ); @@ -1428,23 +1432,7 @@ void ivas_spar_md_dec_process_fx( &hMdDec->base_band_coeffs_age[0], &hMdDec->first_valid_frame, nB ); -#if 0 // ndef IVAS_FLOAT_FIXED_TO_BE_REMOVED - for (int i = 0; i < IVAS_MAX_NUM_BANDS; i++) - { - for (int ii = 0; ii < IVAS_SPAR_MAX_CH - IVAS_SPAR_MAX_DMX_CHS; ii++) - { - for (int jj = 0; jj < IVAS_SPAR_MAX_DMX_CHS - 1; jj++) - { - hMdDec->spar_md.band_coeffs[i].C_re[ii][jj] = (float)hMdDec->spar_md.band_coeffs[i].C_re_fx[ii][jj] / (1 << 22); - } - } - for (int jj = 0; jj < IVAS_SPAR_MAX_DMX_CHS - 1; jj++) - { - hMdDec->spar_md.band_coeffs[i].pred_re[jj] = (float)hMdDec->spar_md.band_coeffs[i].pred_re_fx[jj] / (1 << 22); - hMdDec->spar_md.band_coeffs[i].P_re[jj] = (float)hMdDec->spar_md.band_coeffs[i].P_re_fx[jj] / (1 << 22); - } - } -#endif + ivas_dec_mono_sba_handling_fx( st_ivas ); /* SPAR to DirAC conversion */ @@ -1458,37 +1446,28 @@ void ivas_spar_md_dec_process_fx( move16(); /* expand DirAC MD to all time slots */ - for ( i_ts = 1; i_ts < num_md_sub_frames; i_ts++ ) + FOR( i_ts = 1; i_ts < num_md_sub_frames; i_ts++ ) { - for ( b = 0; b < hMdDec->spar_md.num_bands; b++ ) + FOR( b = 0; b < hMdDec->spar_md.num_bands; b++ ) { - for ( j = 0; j < IVAS_SPAR_MAX_CH - 1; j++ ) + FOR( j = 0; j < IVAS_SPAR_MAX_CH - 1; j++ ) { -#if 0 // ndef IVAS_FLOAT_FIXED_TO_BE_REMOVED - hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].pred_re[j] = hMdDec->spar_md.band_coeffs[b].pred_re[j]; -#endif - hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].pred_re_fx[j] = hMdDec->spar_md.band_coeffs[b].pred_re_fx[j]; + hMdDec->spar_md.band_coeffs[add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )].pred_re_fx[j] = hMdDec->spar_md.band_coeffs[b].pred_re_fx[j]; move32(); } - for ( j = 0; j < IVAS_SPAR_MAX_CH - IVAS_SPAR_MAX_DMX_CHS; j++ ) + FOR( j = 0; j < IVAS_SPAR_MAX_CH - IVAS_SPAR_MAX_DMX_CHS; j++ ) { - for ( k = 0; k < IVAS_SPAR_MAX_DMX_CHS - 1; k++ ) + FOR( k = 0; k < IVAS_SPAR_MAX_DMX_CHS - 1; k++ ) { -#if 0 // ndef IVAS_FLOAT_FIXED_TO_BE_REMOVED - hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].C_re[j][k] = hMdDec->spar_md.band_coeffs[b].C_re[j][k]; -#endif - hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].C_re_fx[j][k] = hMdDec->spar_md.band_coeffs[b].C_re_fx[j][k]; + hMdDec->spar_md.band_coeffs[add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )].C_re_fx[j][k] = hMdDec->spar_md.band_coeffs[b].C_re_fx[j][k]; move32(); } } - for ( j = 0; j < IVAS_SPAR_MAX_CH - 1; j++ ) + FOR( j = 0; j < IVAS_SPAR_MAX_CH - 1; j++ ) { -#if 0 // ndef IVAS_FLOAT_FIXED_TO_BE_REMOVED - hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].P_re[j] = hMdDec->spar_md.band_coeffs[b].P_re[j]; -#endif - hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].P_re_fx[j] = hMdDec->spar_md.band_coeffs[b].P_re_fx[j]; + hMdDec->spar_md.band_coeffs[add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )].P_re_fx[j] = hMdDec->spar_md.band_coeffs[b].P_re_fx[j]; move32(); } } @@ -1511,6 +1490,7 @@ void ivas_spar_md_dec_process_fx( hMdDec->dtx_md_smoothing_cntr = 1; + move16(); return; } @@ -1700,36 +1680,42 @@ Word16 ivas_spar_chk_zero_coefs_fx( Word16 j, k, b; ivas_spar_md_dec_state_t *hMdDec; Word16 mono = 1; + move16(); Word16 ndec, ndm; hMdDec = st_ivas->hSpar->hMdDec; ndec = hMdDec->spar_md_cfg.num_decorr_per_band[0]; + move16(); ndm = hMdDec->spar_md_cfg.num_dmx_chans_per_band[0]; + move16(); FOR( b = 0; b < min( hMdDec->spar_md.num_bands, SPAR_DIRAC_SPLIT_START_BAND ); b++ ) { - FOR( j = 0; j < ndm + ndec - 1; j++ ) + FOR( j = 0; j < sub( add( ndm, ndec ), 1 ); j++ ) { - IF( NE_32( hMdDec->spar_md.band_coeffs[b].pred_re_fx[j], 0 ) ) + if ( hMdDec->spar_md.band_coeffs[b].pred_re_fx[j] != 0 ) { mono = 0; + move16(); } } FOR( j = 0; j < ndec; j++ ) { - FOR( k = 0; k < ndm - 1; k++ ) + FOR( k = 0; k < sub( ndm, 1 ); k++ ) { - IF( NE_32( hMdDec->spar_md.band_coeffs[b].C_re_fx[j][k], 0 ) ) + if ( hMdDec->spar_md.band_coeffs[b].C_re_fx[j][k] != 0 ) { mono = 0; + move16(); } } } FOR( j = 0; j < ndec; j++ ) { - IF( NE_32( hMdDec->spar_md.band_coeffs[b].P_re_fx[j], 0 ) ) + if ( hMdDec->spar_md.band_coeffs[b].P_re_fx[j] != 0 ) { mono = 0; + move16(); } } } @@ -1805,7 +1791,7 @@ void ivas_spar_smooth_md_dtx_fx( { FOR( k = dmx_ch; k < FOA_CHANNELS; k++ ) { - hMdDec->spar_coeffs.P_re_fx[j][k][b + i_ts * IVAS_MAX_NUM_BANDS] = hMdDec->spar_coeffs.P_re_fx[j][k][b]; /* Q22 */ + hMdDec->spar_coeffs.P_re_fx[j][k][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] = hMdDec->spar_coeffs.P_re_fx[j][k][b]; /* Q22 */ move32(); } } @@ -1814,7 +1800,7 @@ void ivas_spar_smooth_md_dtx_fx( { FOR( k = 0; k < dmx_ch; k++ ) { - hMdDec->spar_coeffs.C_re_fx[j][k][b + i_ts * IVAS_MAX_NUM_BANDS] = hMdDec->spar_coeffs.C_re_fx[j][k][b]; /* Q22 */ + hMdDec->spar_coeffs.C_re_fx[j][k][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] = hMdDec->spar_coeffs.C_re_fx[j][k][b]; /* Q22 */ move32(); } } @@ -1822,6 +1808,7 @@ void ivas_spar_smooth_md_dtx_fx( } hMdDec->dtx_md_smoothing_cntr = s_min( add( hMdDec->dtx_md_smoothing_cntr, 1 ), IVAS_DEFAULT_DTX_CNG_RAMP ); + move16(); return; } @@ -2390,14 +2377,14 @@ static void ivas_get_spar_matrices( static void ivas_get_spar_matrices_fx( ivas_spar_md_dec_state_t *hMdDec, - const int16_t num_bands_out, - const int16_t n_ts, - const int16_t bw, - const int16_t dtx_vad, - const int16_t nB, - const int16_t numch_out, - const int16_t active_w_vlbr, - const int16_t dyn_active_w_flag ) + const Word16 num_bands_out, + const Word16 n_ts, + const Word16 bw, + const Word16 dtx_vad, + const Word16 nB, + const Word16 numch_out, + const Word16 active_w_vlbr, + const Word16 dyn_active_w_flag ) { Word16 num_bands, dmx_ch, split_band; Word16 i, j, k, m, b, i_ts, active_w; @@ -2405,9 +2392,11 @@ static void ivas_get_spar_matrices_fx( Word32 active_w_dm_fac_fx, re_fx, re_fx1; num_bands = num_bands_out; + move16(); order = remix_order_set[hMdDec->spar_md_cfg.remix_unmix_order]; split_band = SPAR_DIRAC_SPLIT_START_BAND; + move16(); // Dead code as SPAR_DIRAC_SPLIT_START_BAND = 8 and IVAS_MAX_NUM_BANDS = 12 IF( GE_16( split_band, IVAS_MAX_NUM_BANDS ) ) @@ -2427,15 +2416,30 @@ static void ivas_get_spar_matrices_fx( } } - IF( bw == IVAS_RED_BAND_FACT ) + if ( EQ_16( bw, IVAS_RED_BAND_FACT ) ) { num_bands = shl( num_bands, 1 ); } + test(); active_w = EQ_16( dyn_active_w_flag, 1 ) || EQ_16( hMdDec->spar_md_cfg.active_w, 1 ); - active_w_dm_fac_fx = EQ_16( dtx_vad, 0 ) ? IVAS_ACTIVEW_DM_F_SCALE_DTX_FX : ( EQ_16( active_w_vlbr, 1 ) ? IVAS_ACTIVEW_DM_F_SCALE_VLBR_FX : IVAS_ACTIVEW_DM_F_SCALE_FX ); - move16(); + IF( dtx_vad == 0 ) + { + active_w_dm_fac_fx = IVAS_ACTIVEW_DM_F_SCALE_DTX_FX; + move32(); + } + ELSE IF( EQ_16( active_w_vlbr, 1 ) ) + { + active_w_dm_fac_fx = IVAS_ACTIVEW_DM_F_SCALE_VLBR_FX; + move32(); + } + ELSE + { + active_w_dm_fac_fx = IVAS_ACTIVEW_DM_F_SCALE_FX; + move32(); + } + FOR( i_ts = 0; i_ts < n_ts; i_ts++ ) { FOR( i = 0; i < numch_out; i++ ) @@ -2443,18 +2447,20 @@ static void ivas_get_spar_matrices_fx( FOR( j = 0; j < numch_out; j++ ) { - set32_fx( &hMdDec->spar_coeffs.C_re_fx[i][j][i_ts * IVAS_MAX_NUM_BANDS], 0, IVAS_MAX_NUM_BANDS ); - set32_fx( &hMdDec->spar_coeffs.P_re_fx[i][j][i_ts * IVAS_MAX_NUM_BANDS], 0, IVAS_MAX_NUM_BANDS ); + set32_fx( &hMdDec->spar_coeffs.C_re_fx[i][j][i_mult( i_ts, IVAS_MAX_NUM_BANDS )], 0, IVAS_MAX_NUM_BANDS ); + set32_fx( &hMdDec->spar_coeffs.P_re_fx[i][j][i_mult( i_ts, IVAS_MAX_NUM_BANDS )], 0, IVAS_MAX_NUM_BANDS ); } } num_bands = min( num_bands, nB ); + move16(); FOR( b = 0; b < num_bands; b++ ) { Word32 tmp_C1_re_fx[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH]; Word32 tmp_C2_re_fx[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH]; Word32 tmp_dm_re_fx[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH]; - dmx_ch = hMdDec->spar_md_cfg.num_dmx_chans_per_band[bw * b]; + dmx_ch = hMdDec->spar_md_cfg.num_dmx_chans_per_band[i_mult( bw, b )]; + move16(); FOR( j = 0; j < numch_out; j++ ) { @@ -2472,7 +2478,7 @@ static void ivas_get_spar_matrices_fx( FOR( j = 1; j < numch_out; j++ ) { - tmp_C1_re_fx[j][0] = hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].pred_re_fx[j - 1]; // Q.22 + tmp_C1_re_fx[j][0] = hMdDec->spar_md.band_coeffs[add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )].pred_re_fx[j - 1]; // Q.22 move32(); } @@ -2481,7 +2487,7 @@ static void ivas_get_spar_matrices_fx( FOR( j = 1; j < numch_out; j++ ) { - tmp_C2_re_fx[0][j] = Mpy_32_32( active_w_dm_fac_fx, -hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].pred_re_fx[j - 1] ); // Q31 *Q22=Q22 + tmp_C2_re_fx[0][j] = Mpy_32_32( active_w_dm_fac_fx, L_negate( hMdDec->spar_md.band_coeffs[add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )].pred_re_fx[j - 1] ) ); // Q31 *Q22=Q22 move32(); } re_fx = Mpy_32_32( tmp_C2_re_fx[0][1], tmp_C1_re_fx[1][0] ); // Q22 *Q22 =Q13 @@ -2494,23 +2500,29 @@ static void ivas_get_spar_matrices_fx( tmp_dm_re_fx[0][0] = L_shl( L_add( re_fx1, re_fx ), Q9 ); // (Q13+Q13) << Q9 = Q22; move32(); - if ( EQ_16( dyn_active_w_flag, 1 ) ) + IF( EQ_16( dyn_active_w_flag, 1 ) ) { tmp_dm_re_fx[0][0] = L_shl( Mpy_32_32( tmp_dm_re_fx[0][0], IVAS_SPAR_DYN_ACTIVEW_THRESH_FX ), Q9 ); // Q13 *Q31 =Q13 << Q9=Q.22 move32(); } tmp_dm_re_fx[0][1] = tmp_C2_re_fx[0][1]; + move32(); tmp_dm_re_fx[0][2] = tmp_C2_re_fx[0][2]; + move32(); tmp_dm_re_fx[0][3] = tmp_C2_re_fx[0][3]; + move32(); tmp_dm_re_fx[1][0] = tmp_C1_re_fx[1][0]; + move32(); tmp_dm_re_fx[2][0] = tmp_C1_re_fx[2][0]; + move32(); tmp_dm_re_fx[3][0] = tmp_C1_re_fx[3][0]; + move32(); IF( NE_16( hMdDec->spar_md_cfg.remix_unmix_order, 3 ) ) { @@ -2521,12 +2533,11 @@ static void ivas_get_spar_matrices_fx( { IF( NE_16( hMdDec->spar_md_cfg.remix_unmix_order, 3 ) ) { - ivas_mat_col_rearrange_fx( tmp_C1_re_fx, order, i_ts, hMdDec->mixer_mat_fx, b, numch_out ); } } - IF( GT_16( dmx_ch, 0 ) ) + IF( dmx_ch > 0 ) { Word32 tmpC_re_fx[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH]; Word32 tmpP_re_fx[IVAS_SPAR_MAX_CH][IVAS_SPAR_MAX_CH]; @@ -2552,7 +2563,7 @@ static void ivas_get_spar_matrices_fx( { FOR( k = 1; k < dmx_ch; k++ ) { - tmpC_re_fx[j][k] = hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].C_re_fx[j - dmx_ch][k - 1]; // Q22 + tmpC_re_fx[j][k] = hMdDec->spar_md.band_coeffs[add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )].C_re_fx[sub( j, dmx_ch )][k - 1]; // Q22 move32(); } } @@ -2563,7 +2574,7 @@ static void ivas_get_spar_matrices_fx( { IF( EQ_16( sub( j, dmx_ch ), sub( k, dmx_ch ) ) ) { - tmpP_re_fx[j][k] = hMdDec->spar_md.band_coeffs[b + i_ts * IVAS_MAX_NUM_BANDS].P_re_fx[k - dmx_ch]; // Q22 + tmpP_re_fx[j][k] = hMdDec->spar_md.band_coeffs[add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )].P_re_fx[sub( k, dmx_ch )]; // Q22 move32(); } ELSE @@ -2581,9 +2592,9 @@ static void ivas_get_spar_matrices_fx( { FOR( m = 0; m < numch_out; m++ ) { - re_fx = Mpy_32_32( hMdDec->mixer_mat_fx[j][m][b + i_ts * IVAS_MAX_NUM_BANDS], tmpP_re_fx[m][k] ); // Q30*Q22 + re_fx = Mpy_32_32( hMdDec->mixer_mat_fx[j][m][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )], tmpP_re_fx[m][k] ); // Q30*Q22 re_fx = L_shl( re_fx, 1 ); - hMdDec->spar_coeffs.P_re_fx[j][k][( b * bw ) + i_ts * IVAS_MAX_NUM_BANDS] = L_add( hMdDec->spar_coeffs.P_re_fx[j][k][( b * bw ) + i_ts * IVAS_MAX_NUM_BANDS], re_fx ); + hMdDec->spar_coeffs.P_re_fx[j][k][add( i_mult( b, bw ), i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] = L_add( hMdDec->spar_coeffs.P_re_fx[j][k][add( i_mult( b, bw ), i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )], re_fx ); move32(); } } @@ -2596,23 +2607,24 @@ static void ivas_get_spar_matrices_fx( { FOR( m = 0; m < numch_out; m++ ) { - re_fx = Mpy_32_32( hMdDec->mixer_mat_fx[j][m][b + i_ts * IVAS_MAX_NUM_BANDS], tmpC_re_fx[m][k] ); // Q30* Q22 + re_fx = Mpy_32_32( hMdDec->mixer_mat_fx[j][m][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )], tmpC_re_fx[m][k] ); // Q30* Q22 re_fx = L_shl( re_fx, 1 ); - hMdDec->spar_coeffs.C_re_fx[j][k][( b * bw ) + i_ts * IVAS_MAX_NUM_BANDS] = L_add( hMdDec->spar_coeffs.C_re_fx[j][k][( b * bw ) + i_ts * IVAS_MAX_NUM_BANDS], re_fx ); + hMdDec->spar_coeffs.C_re_fx[j][k][add( i_mult( b, bw ), i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] = L_add( hMdDec->spar_coeffs.C_re_fx[j][k][add( i_mult( b, bw ), i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )], re_fx ); move32(); } } } - hMdDec->spar_coeffs.C_re_fx[0][0][( b * bw ) + i_ts * IVAS_MAX_NUM_BANDS] = - max( 0, hMdDec->spar_coeffs.C_re_fx[0][0][( b * bw ) + i_ts * IVAS_MAX_NUM_BANDS] ); + hMdDec->spar_coeffs.C_re_fx[0][0][add( i_mult( b, bw ), i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] = + max( 0, hMdDec->spar_coeffs.C_re_fx[0][0][add( i_mult( b, bw ), i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] ); + move32(); } } /* band mixing */ IF( EQ_16( bw, IVAS_RED_BAND_FACT ) ) { - FOR( b = 0; b < num_bands_out; b = b + bw ) + FOR( b = 0; b < num_bands_out; b = add( b, bw ) ) { dmx_ch = hMdDec->spar_md_cfg.num_dmx_chans_per_band[b]; move16(); @@ -2620,8 +2632,7 @@ static void ivas_get_spar_matrices_fx( { FOR( k = dmx_ch; k < numch_out; k++ ) { - - hMdDec->spar_coeffs.P_re_fx[j][k][( b + 1 ) + i_ts * IVAS_MAX_NUM_BANDS] = hMdDec->spar_coeffs.P_re_fx[j][k][b + i_ts * IVAS_MAX_NUM_BANDS]; + hMdDec->spar_coeffs.P_re_fx[j][k][add( add( b, 1 ), i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] = hMdDec->spar_coeffs.P_re_fx[j][k][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )]; move32(); } } @@ -2630,9 +2641,7 @@ static void ivas_get_spar_matrices_fx( { FOR( k = 0; k < dmx_ch; k++ ) { - - hMdDec->spar_coeffs.C_re_fx[j][k][( b + 1 ) + i_ts * IVAS_MAX_NUM_BANDS] = hMdDec->spar_coeffs.C_re_fx[j][k][b + i_ts * IVAS_MAX_NUM_BANDS]; - + hMdDec->spar_coeffs.C_re_fx[j][k][add( add( b, 1 ), i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] = hMdDec->spar_coeffs.C_re_fx[j][k][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )]; move32(); } } @@ -2697,7 +2706,7 @@ static void ivas_mat_col_rearrange_fx( FOR( j = 0; j < num_ch; j++ ) { - mixer_mat[j][i][bands + i_ts * IVAS_MAX_NUM_BANDS] = L_shl_sat( in_re[j][idx], Q8 ); + mixer_mat[j][i][add( bands, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] = L_shl_sat( in_re[j][idx], Q8 ); move32(); } } @@ -2732,7 +2741,7 @@ void ivas_spar_dec_gen_umx_mat_fx( { FOR( b = 0; b < num_bands_out; b++ ) { - hMdDec->mixer_mat_fx[i][j][b + i_ts * IVAS_MAX_NUM_BANDS] = L_shl_sat( hMdDec->spar_coeffs.C_re_fx[i][j][b + i_ts * IVAS_MAX_NUM_BANDS], Q8 ); + hMdDec->mixer_mat_fx[i][j][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] = L_shl_sat( hMdDec->spar_coeffs.C_re_fx[i][j][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )], Q8 ); move32(); } } @@ -2743,7 +2752,7 @@ void ivas_spar_dec_gen_umx_mat_fx( { FOR( b = 0; b < num_bands_out; b++ ) { - hMdDec->mixer_mat_fx[i][j][b + i_ts * IVAS_MAX_NUM_BANDS] = L_shl_sat( hMdDec->spar_coeffs.P_re_fx[i][j][b + i_ts * IVAS_MAX_NUM_BANDS], Q8 ); + hMdDec->mixer_mat_fx[i][j][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] = L_shl_sat( hMdDec->spar_coeffs.P_re_fx[i][j][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )], Q8 ); move32(); } } @@ -2757,7 +2766,7 @@ void ivas_spar_dec_gen_umx_mat_fx( { FOR( b = 0; b < num_bands_out; b++ ) { - hMdDec->mixer_mat_fx[i][j][b + i_ts * IVAS_MAX_NUM_BANDS] = L_shl_sat( hMdDec->spar_coeffs.C_re_fx[i][j][b + i_ts * IVAS_MAX_NUM_BANDS], Q8 ); + hMdDec->mixer_mat_fx[i][j][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] = L_shl_sat( hMdDec->spar_coeffs.C_re_fx[i][j][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )], Q8 ); move32(); } } @@ -2829,51 +2838,48 @@ void ivas_spar_dec_gen_umx_mat( static void ivas_spar_md_band_upmix( ivas_band_coeffs_t *band_coeffs, - int16_t *nB, - int16_t *bands_bw, - int16_t *valid_bands, - int16_t bw_final, - int16_t ndec, - int16_t ndm ) + Word16 *nB, + Word16 *bands_bw, + Word16 *valid_bands, + Word16 bw_final, + Word16 ndec, + Word16 ndm ) { - int16_t i, ii, jj, b, idx, bw_fact; + Word16 i, ii, jj, b, idx, bw_fact; - bw_fact = *bands_bw / bw_final; - for ( i = *nB - 1; i >= 0; i-- ) + bw_fact = idiv1616( *bands_bw, bw_final ); + FOR( i = sub( *nB, 1 ); i >= 0; i-- ) { - for ( b = bw_fact - 1; b >= 0; b-- ) + FOR( b = sub( bw_fact, 1 ); b >= 0; b-- ) { - idx = i * bw_fact + b; - for ( ii = 0; ii < ndec + ndm - 1; ii++ ) + idx = add( i_mult( i, bw_fact ), b ); + FOR( ii = 0; ii < sub( add( ndec, ndm ), 1 ); ii++ ) { -#if 0 // ndef IVAS_FLOAT_FIXED_TO_BE_REMOVED - band_coeffs[idx].pred_re[ii] = band_coeffs[i].pred_re[ii]; -#endif band_coeffs[idx].pred_re_fx[ii] = band_coeffs[i].pred_re_fx[ii]; + move32(); } - for ( ii = 0; ii < ndec; ii++ ) + FOR( ii = 0; ii < ndec; ii++ ) { - for ( jj = 0; jj < ndm - 1; jj++ ) + FOR( jj = 0; jj < sub( ndm, 1 ); jj++ ) { -#if 0 // ndef IVAS_FLOAT_FIXED_TO_BE_REMOVED - band_coeffs[idx].C_re[ii][jj] = band_coeffs[i].C_re[ii][jj]; -#endif band_coeffs[idx].C_re_fx[ii][jj] = band_coeffs[i].C_re_fx[ii][jj]; + move32(); } } - for ( jj = 0; jj < ndec; jj++ ) + FOR( jj = 0; jj < ndec; jj++ ) { -#if 0 // ndef IVAS_FLOAT_FIXED_TO_BE_REMOVED - band_coeffs[idx].P_re[jj] = band_coeffs[i].P_re[jj]; -#endif band_coeffs[idx].P_re_fx[jj] = band_coeffs[i].P_re_fx[jj]; + move32(); } valid_bands[idx] = valid_bands[i]; + move16(); } } - *nB = ( *nB ) * ( *bands_bw ) / bw_final; + *nB = idiv1616( i_mult( ( *nB ), ( *bands_bw ) ), bw_final ); + move16(); *bands_bw = bw_final; + move16(); return; } @@ -3122,99 +3128,112 @@ static void ivas_spar_dec_parse_md_bs( static void ivas_spar_dec_parse_md_bs_fx( ivas_spar_md_dec_state_t *hMdDec, Decoder_State *st0, - int16_t *nB, - int16_t *bands_bw, - int16_t *dtx_vad, - const int32_t ivas_total_brate, - const int16_t sba_inactive_mode ) + Word16 *nB, + Word16 *bands_bw, + Word16 *dtx_vad, + const Word32 ivas_total_brate, + const Word16 sba_inactive_mode ) { - int16_t i, j, k, num_bands; - int16_t ii, jj, ndec, ndm; - uint16_t qsi; + Word16 i, j, k, num_bands; + Word16 ii, jj, ndec, ndm; + UWord16 qsi; ivas_quant_strat_t qs; - int16_t strat, no_ec; - int16_t do_diff[IVAS_MAX_NUM_BANDS]; - // float quant[IVAS_SPAR_MAX_C_COEFF]; + Word16 strat, no_ec; + Word16 do_diff[IVAS_MAX_NUM_BANDS]; Word32 quant_fx[IVAS_SPAR_MAX_C_COEFF]; - int16_t do_repeat[IVAS_MAX_NUM_BANDS]; + Word16 do_repeat[IVAS_MAX_NUM_BANDS]; *dtx_vad = 1; + move16(); *bands_bw = 1; + move16(); qsi = 0; + move16(); num_bands = hMdDec->spar_md.num_bands; + move16(); - if ( ivas_total_brate > IVAS_SID_5k2 ) + IF( GT_32( ivas_total_brate, IVAS_SID_5k2 ) ) { - if ( hMdDec->spar_md_cfg.quant_strat_bits > 0 ) + IF( hMdDec->spar_md_cfg.quant_strat_bits > 0 ) { - if ( ivas_total_brate >= BRATE_SPAR_Q_STRAT ) + IF( GE_32( ivas_total_brate, BRATE_SPAR_Q_STRAT ) ) { /*only one bit written for quantization strategy to indicate either a fixed quantization strategy or dtx_vad==0 */ qsi = get_next_indice( st0, 1 ); - if ( qsi == 1 ) + if ( EQ_32( qsi, 1 ) ) { *dtx_vad = 0; + move16(); } } - else + ELSE { - if ( sba_inactive_mode == 1 ) + IF( EQ_16( sba_inactive_mode, 1 ) ) { *dtx_vad = 0; - qsi = hMdDec->spar_md_cfg.quant_strat_bits + 1; + move16(); + qsi = add( hMdDec->spar_md_cfg.quant_strat_bits, 1 ); } - else + ELSE { qsi = get_next_indice( st0, hMdDec->spar_md_cfg.quant_strat_bits ); } } } - else + ELSE { qsi = 0; + move16(); } } - else + ELSE { *dtx_vad = 0; + move16(); } hMdDec->dtx_vad = *dtx_vad; + move16(); - if ( *dtx_vad == 0 ) + IF( *dtx_vad == 0 ) { *nB = SPAR_DTX_BANDS; - *bands_bw = num_bands / *nB; + move16(); + *bands_bw = idiv1616( num_bands, *nB ); + move16(); - for ( i = 0; i < *nB; i++ ) + FOR( i = 0; i < *nB; i++ ) { - for ( j = 0; j < IVAS_SPAR_MAX_CH - 1; j++ ) + FOR( j = 0; j < IVAS_SPAR_MAX_CH - 1; j++ ) { - // hMdDec->spar_md.band_coeffs[i].pred_re[j] = 0; hMdDec->spar_md.band_coeffs[i].pred_re_fx[j] = 0; - // hMdDec->spar_md.band_coeffs[i].P_re[j] = 0; + move32(); hMdDec->spar_md.band_coeffs[i].P_re_fx[j] = 0; + move32(); } hMdDec->valid_bands[i] = 1; + move16(); } - for ( i = 0; i < num_bands; i++ ) + FOR( i = 0; i < num_bands; i++ ) { - for ( j = 0; j < ( IVAS_SPAR_MAX_CH - IVAS_SPAR_MAX_DMX_CHS ); j++ ) + FOR( j = 0; j < ( IVAS_SPAR_MAX_CH - IVAS_SPAR_MAX_DMX_CHS ); j++ ) { - for ( k = 0; k < ( IVAS_SPAR_MAX_DMX_CHS - 1 ); k++ ) + FOR( k = 0; k < ( IVAS_SPAR_MAX_DMX_CHS - 1 ); k++ ) { - // hMdDec->spar_md.band_coeffs[i].C_re[j][k] = 0; hMdDec->spar_md.band_coeffs[i].C_re_fx[j][k] = 0; + move32(); } } } ivas_parse_parameter_bitstream_dtx( &hMdDec->spar_md, st0, *bands_bw, *nB, hMdDec->spar_md_cfg.num_dmx_chans_per_band, hMdDec->spar_md_cfg.num_decorr_per_band ); - if ( *bands_bw != 1 ) + IF( NE_16( *bands_bw, 1 ) ) { ndec = hMdDec->spar_md_cfg.num_decorr_per_band[0]; + move16(); ndm = hMdDec->spar_md_cfg.num_dmx_chans_per_band[0]; + move16(); ivas_spar_md_band_upmix( hMdDec->spar_md.band_coeffs, nB, @@ -3229,124 +3248,175 @@ static void ivas_spar_dec_parse_md_bs_fx( } qs = hMdDec->spar_md_cfg.quant_strat[qsi]; + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); strat = get_next_indice( st0, 3 ); no_ec = 0; + move16(); - if ( strat < 2 ) + IF( LT_16( strat, 2 ) ) { - *bands_bw = strat + 1; - *nB = num_bands / *bands_bw; - for ( i = 0; i < *nB; i++ ) + *bands_bw = add( strat, 1 ); + move16(); + *nB = idiv1616( num_bands, *bands_bw ); + move16(); + FOR( i = 0; i < *nB; i++ ) { do_diff[i] = 0; + move16(); do_repeat[i] = 0; + move16(); } } - else if ( strat < 4 ) + ELSE IF( LT_16( strat, 4 ) ) { - *bands_bw = strat - 1; - *nB = num_bands / *bands_bw; - for ( i = 0; i < *nB; i++ ) + *bands_bw = sub( strat, 1 ); + move16(); + *nB = idiv1616( num_bands, *bands_bw ); + move16(); + FOR( i = 0; i < *nB; i++ ) { do_diff[i] = 0; + move16(); do_repeat[i] = 0; + move16(); } no_ec = 1; + move16(); } - else if ( ivas_total_brate < IVAS_24k4 ) + ELSE IF( LT_32( ivas_total_brate, IVAS_24k4 ) ) { *bands_bw = 2; - *nB = num_bands / *bands_bw; + move16(); + *nB = idiv1616( num_bands, *bands_bw ); + move16(); - for ( i = 0; i < *nB; i++ ) + FOR( i = 0; i < *nB; i++ ) { do_diff[i] = 0; - do_repeat[i] = ( ( strat % 2 ) == ( ( i + 1 ) % 2 ) ); + move16(); + do_repeat[i] = extract_l( EQ_16( ( strat % 2 ), ( add( i, 1 ) % 2 ) ) ); + move16(); } } - else + ELSE { *bands_bw = 1; + move16(); *nB = num_bands; + move16(); - for ( i = 0; i < *nB; i++ ) + FOR( i = 0; i < *nB; i++ ) { - do_diff[i] = ( ( ( i + 1 ) & 3 ) != strat - 4 ); + do_diff[i] = extract_l( NE_16( s_and( add( i, 1 ), 3 ), sub( strat, 4 ) ) ); + move16(); do_repeat[i] = 0; + move16(); } - if ( hMdDec->spar_md_cfg.prev_quant_idx >= 0 ) + IF( hMdDec->spar_md_cfg.prev_quant_idx >= 0 ) { ivas_map_prior_coeffs_quant( &hMdDec->spar_md_prev, &hMdDec->spar_md_cfg, qsi, *nB ); } } hMdDec->spar_md_cfg.prev_quant_idx = qsi; + move16(); - if ( no_ec == 0 ) + IF( no_ec == 0 ) { ivas_decode_arith_bs( hMdDec, st0, qsi, *nB, *bands_bw, do_diff, strat, ivas_total_brate ); } - else + ELSE { ivas_decode_huffman_bs( hMdDec, st0, qsi, *nB, *bands_bw ); } - for ( i = 0; i < *nB; i++ ) + FOR( i = 0; i < *nB; i++ ) { - ndec = hMdDec->spar_md_cfg.num_decorr_per_band[( *bands_bw ) * i]; - ndm = hMdDec->spar_md_cfg.num_dmx_chans_per_band[( *bands_bw ) * i]; + ndec = hMdDec->spar_md_cfg.num_decorr_per_band[i_mult( ( *bands_bw ), i )]; + move16(); + ndm = hMdDec->spar_md_cfg.num_dmx_chans_per_band[i_mult( ( *bands_bw ), i )]; + move16(); - // ivas_deindex_real_index( hMdDec->spar_md.band_coeffs_idx[i].pred_index_re, qs.PR.q_levels[0], qs.PR.min, qs.PR.max, hMdDec->spar_md.band_coeffs[i].pred_re, ndm + ndec - 1 ); - ivas_deindex_real_index_fx( hMdDec->spar_md.band_coeffs_idx[i].pred_index_re, qs.PR.q_levels[0], qs.PR.min_fx, qs.PR.max_fx, hMdDec->spar_md.band_coeffs[i].pred_re_fx, ndm + ndec - 1 ); + ivas_deindex_real_index_fx( hMdDec->spar_md.band_coeffs_idx[i].pred_index_re, qs.PR.q_levels[0], qs.PR.min_fx, qs.PR.max_fx, hMdDec->spar_md.band_coeffs[i].pred_re_fx, sub( add( ndm, ndec ), 1 ) ); j = 0; - for ( ii = 0; ii < ndec; ii++ ) + move16(); + FOR( ii = 0; ii < ndec; ii++ ) { - for ( jj = 0; jj < ndm - 1; jj++ ) + FOR( jj = 0; jj < sub( ndm, 1 ); jj++ ) { - // quant[j] = hMdDec->spar_md.band_coeffs[i].C_re[ii][jj]; quant_fx[j] = hMdDec->spar_md.band_coeffs[i].C_re_fx[ii][jj]; - j++; + move32(); + j = add( j, 1 ); } } - // ivas_deindex_real_index( hMdDec->spar_md.band_coeffs_idx[i].drct_index_re, qs.C.q_levels[0], qs.C.min, qs.C.max, quant, ndec * ( ndm - 1 ) ); - ivas_deindex_real_index_fx( hMdDec->spar_md.band_coeffs_idx[i].drct_index_re, qs.C.q_levels[0], qs.C.min_fx, qs.C.max_fx, quant_fx, ndec * ( ndm - 1 ) ); + ivas_deindex_real_index_fx( hMdDec->spar_md.band_coeffs_idx[i].drct_index_re, qs.C.q_levels[0], qs.C.min_fx, qs.C.max_fx, quant_fx, i_mult( ndec, sub( ndm, 1 ) ) ); j = 0; - for ( ii = 0; ii < ndec; ii++ ) + move16(); + FOR( ii = 0; ii < ndec; ii++ ) { - for ( jj = 0; jj < ndm - 1; jj++ ) + FOR( jj = 0; jj < sub( ndm, 1 ); jj++ ) { - // hMdDec->spar_md.band_coeffs[i].C_re[ii][jj] = quant[j]; hMdDec->spar_md.band_coeffs[i].C_re_fx[ii][jj] = quant_fx[j]; - j++; + move32(); + j = add( j, 1 ); } } - // ivas_deindex_real_index( hMdDec->spar_md.band_coeffs_idx[i].decd_index_re, qs.P_r.q_levels[0], qs.P_r.min, qs.P_r.max, hMdDec->spar_md.band_coeffs[i].P_re, ndm + ndec - 1 ); - ivas_deindex_real_index_fx( hMdDec->spar_md.band_coeffs_idx[i].decd_index_re, qs.P_r.q_levels[0], qs.P_r.min_fx, qs.P_r.max_fx, hMdDec->spar_md.band_coeffs[i].P_re_fx, ndm + ndec - 1 ); + ivas_deindex_real_index_fx( hMdDec->spar_md.band_coeffs_idx[i].decd_index_re, qs.P_r.q_levels[0], qs.P_r.min_fx, qs.P_r.max_fx, hMdDec->spar_md.band_coeffs[i].P_re_fx, sub( add( ndm, ndec ), 1 ) ); /* Store prior coefficient indices */ - for ( j = 0; j < ndm + ndec - 1; j++ ) + FOR( j = 0; j < sub( add( ndm, ndec ), 1 ); j++ ) { hMdDec->spar_md_prev.band_coeffs_idx[i].pred_index_re[j] = hMdDec->spar_md.band_coeffs_idx[i].pred_index_re[j]; + move16(); } - for ( j = 0; j < ndec * ( ndm - 1 ); j++ ) + FOR( j = 0; j < i_mult( ndec, sub( ndm, 1 ) ); j++ ) { hMdDec->spar_md_prev.band_coeffs_idx[i].drct_index_re[j] = hMdDec->spar_md.band_coeffs_idx[i].drct_index_re[j]; + move16(); } - for ( j = 0; j < ndec; j++ ) + FOR( j = 0; j < ndec; j++ ) { hMdDec->spar_md_prev.band_coeffs_idx[i].decd_index_re[j] = hMdDec->spar_md.band_coeffs_idx[i].decd_index_re[j]; + move16(); + } + test(); + IF( ( do_diff[i] == 0 ) && ( do_repeat[i] == 0 ) ) + { + hMdDec->valid_bands[i] = s_or( hMdDec->valid_bands[i], 1 ); + move16(); + } + ELSE + { + hMdDec->valid_bands[i] = s_or( hMdDec->valid_bands[i], 0 ); + move16(); } - hMdDec->valid_bands[i] |= ( do_diff[i] == 0 && do_repeat[i] == 0 ) ? 1 : 0; } ndec = hMdDec->spar_md_cfg.num_decorr_per_band[0]; + move16(); ndm = hMdDec->spar_md_cfg.num_dmx_chans_per_band[0]; - if ( *bands_bw != 1 ) + move16(); + IF( NE_16( *bands_bw, 1 ) ) { ivas_spar_md_band_upmix( hMdDec->spar_md.band_coeffs, @@ -3702,37 +3772,47 @@ static void ivas_spar_get_plc_interp_weights( #endif static void ivas_spar_get_plc_interp_weights_fx( - int16_t valid_band_idx[IVAS_MAX_NUM_BANDS], - int16_t last_valid_band_idx, - int16_t idx, - int16_t b, + Word16 valid_band_idx[IVAS_MAX_NUM_BANDS], + Word16 last_valid_band_idx, + Word16 idx, + Word16 b, Word16 *w, - int16_t *id0, - int16_t *id1 ) + Word16 *id0, + Word16 *id1 ) { - if ( last_valid_band_idx < 0 ) /* Extrapolation */ + IF( last_valid_band_idx < 0 ) /* Extrapolation */ { *id1 = valid_band_idx[0]; + move16(); *id0 = 0; + move16(); *w = MAX_WORD16; + move16(); } - else if ( last_valid_band_idx == idx ) /* Extrapolation */ + ELSE IF( EQ_16( last_valid_band_idx, idx ) ) /* Extrapolation */ { *id1 = valid_band_idx[last_valid_band_idx]; + move16(); *id0 = valid_band_idx[last_valid_band_idx]; + move16(); *w = 0; + move16(); } - else /* Interpolation */ + ELSE /* Interpolation */ { *id0 = valid_band_idx[last_valid_band_idx]; + move16(); *id1 = valid_band_idx[last_valid_band_idx + 1]; - if ( ( b - *id0 ) == 0 ) + move16(); + IF( sub( b, *id0 ) == 0 ) { *w = 0; + move16(); } - else + ELSE { - *w = divide3232( ( b - *id0 ), ( *id1 - *id0 ) ); + *w = divide3232( sub( b, *id0 ), sub( *id1, *id0 ) ); + move16(); } } return; @@ -3756,13 +3836,15 @@ static void ivas_spar_md_fill_invalid_bands_fx( { Word16 i, j, b, all_valid; Word16 valid_band_idx[IVAS_MAX_NUM_BANDS], idx = -1; + move16(); Word16 last_valid_band_idx[IVAS_MAX_NUM_BANDS]; Word16 w_fx = 0; + move16(); ivas_spar_plc_get_band_age( valid_bands, base_band_age, num_bands, last_valid_band_idx, valid_band_idx, &all_valid, &idx ); assert( idx > 0 ); /* some bands should be valid */ - IF( EQ_16( all_valid, 0 ) ) + IF( all_valid == 0 ) { FOR( b = 0; b < num_bands; b++ ) { @@ -3786,7 +3868,7 @@ static void ivas_spar_md_fill_invalid_bands_fx( } ELSE /* young invalid bands */ { - IF( EQ_16( valid_bands[b], 0 ) ) + IF( valid_bands[b] == 0 ) { FOR( i = 0; i < num_channels; i++ ) { @@ -3801,7 +3883,7 @@ static void ivas_spar_md_fill_invalid_bands_fx( } } - IF( EQ_16( valid_bands[b], 0 ) ) + IF( valid_bands[b] == 0 ) { Word16 i_ts; FOR( i = 0; i < num_channels; i++ ) @@ -3810,9 +3892,9 @@ static void ivas_spar_md_fill_invalid_bands_fx( { FOR( i_ts = 1; i_ts < num_md_sub_frames; i_ts++ ) { - pSpar_coeffs->C_re_fx[i][j][add( b, i_ts * IVAS_MAX_NUM_BANDS )] = pSpar_coeffs->C_re_fx[i][j][b]; + pSpar_coeffs->C_re_fx[i][j][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] = pSpar_coeffs->C_re_fx[i][j][b]; move32(); - pSpar_coeffs->P_re_fx[i][j][add( b, i_ts * IVAS_MAX_NUM_BANDS )] = pSpar_coeffs->P_re_fx[i][j][b]; + pSpar_coeffs->P_re_fx[i][j][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] = pSpar_coeffs->P_re_fx[i][j][b]; move32(); } } @@ -3907,87 +3989,89 @@ static void ivas_spar_md_fill_invalid_bands( static void ivas_spar_md_fill_invalid_bandcoeffs( ivas_band_coeffs_t *pBand_coeffs, ivas_band_coeffs_t *pBand_coeffs_prev, - const int16_t *valid_bands, - int16_t *base_band_age, - int16_t *first_valid_frame, - const int16_t num_bands ) + const Word16 *valid_bands, + Word16 *base_band_age, + Word16 *first_valid_frame, + const Word16 num_bands ) { - int16_t j, k, b, all_valid; - int16_t valid_band_idx[IVAS_MAX_NUM_BANDS], idx = -1; - int16_t last_valid_band_idx[IVAS_MAX_NUM_BANDS]; - // float w = 0; + Word16 j, k, b, all_valid; + Word16 valid_band_idx[IVAS_MAX_NUM_BANDS], idx = -1; + move16(); + Word16 last_valid_band_idx[IVAS_MAX_NUM_BANDS]; Word16 w_fx = 0; + move16(); ivas_spar_plc_get_band_age( valid_bands, base_band_age, num_bands, last_valid_band_idx, valid_band_idx, &all_valid, &idx ); assert( idx > 0 ); /* some bands should be valid */ - if ( all_valid == 0 ) + IF( all_valid == 0 ) { - for ( b = 0; b < num_bands; b++ ) + FOR( b = 0; b < num_bands; b++ ) { /* check against non zero in if and else if */ - if ( ( base_band_age[b] > 3 ) || ( *first_valid_frame == 0 ) ) /* old invalid bands */ + test(); + IF( GT_16( base_band_age[b], 3 ) || ( *first_valid_frame == 0 ) ) /* old invalid bands */ { - int16_t id0, id1; - // ivas_spar_get_plc_interp_weights( valid_band_idx, last_valid_band_idx[b], - // idx, b, &w, &id0, &id1 ); + Word16 id0, id1; + ivas_spar_get_plc_interp_weights_fx( valid_band_idx, last_valid_band_idx[b], idx, b, &w_fx, &id0, &id1 ); - for ( j = 0; j < IVAS_SPAR_MAX_CH - 1; j++ ) + FOR( j = 0; j < IVAS_SPAR_MAX_CH - 1; j++ ) { - // pBand_coeffs[b].pred_re[j] = ( 1 - w ) * pBand_coeffs[id0].pred_re[j] + w * pBand_coeffs[id1].pred_re[j]; pBand_coeffs[b].pred_re_fx[j] = L_add( Mpy_32_16_1( pBand_coeffs[id0].pred_re_fx[j], sub( MAX_WORD16, w_fx ) ), Mpy_32_16_1( pBand_coeffs[id1].pred_re_fx[j], w_fx ) ); + move32(); } - for ( j = 0; j < IVAS_SPAR_MAX_CH - IVAS_SPAR_MAX_DMX_CHS; j++ ) + FOR( j = 0; j < IVAS_SPAR_MAX_CH - IVAS_SPAR_MAX_DMX_CHS; j++ ) { - for ( k = 0; k < IVAS_SPAR_MAX_DMX_CHS - 1; k++ ) + FOR( k = 0; k < IVAS_SPAR_MAX_DMX_CHS - 1; k++ ) { - // pBand_coeffs[b].C_re[j][k] = ( 1 - w ) * pBand_coeffs[id0].C_re[j][k] + w * pBand_coeffs[id1].C_re[j][k]; pBand_coeffs[b].C_re_fx[j][k] = L_add( Mpy_32_16_1( pBand_coeffs[id0].C_re_fx[j][k], sub( MAX_WORD16, w_fx ) ), Mpy_32_16_1( pBand_coeffs[id1].C_re_fx[j][k], w_fx ) ); + move32(); } } - for ( j = 0; j < IVAS_SPAR_MAX_CH - 1; j++ ) + FOR( j = 0; j < IVAS_SPAR_MAX_CH - 1; j++ ) { - // pBand_coeffs[b].P_re[j] = ( 1 - w ) * pBand_coeffs[id0].P_re[j] + w * pBand_coeffs[id1].P_re[j]; pBand_coeffs[b].P_re_fx[j] = L_add( Mpy_32_16_1( pBand_coeffs[id0].P_re_fx[j], sub( MAX_WORD16, w_fx ) ), Mpy_32_16_1( pBand_coeffs[id1].P_re_fx[j], w_fx ) ); + move32(); } } - else /* young invalid bands */ + ELSE /* young invalid bands */ { - if ( valid_bands[b] == 0 ) + IF( valid_bands[b] == 0 ) { - for ( j = 0; j < IVAS_SPAR_MAX_CH - 1; j++ ) + FOR( j = 0; j < IVAS_SPAR_MAX_CH - 1; j++ ) { - // pBand_coeffs[b].pred_re[j] = pBand_coeffs_prev[b].pred_re[j]; pBand_coeffs[b].pred_re_fx[j] = pBand_coeffs_prev[b].pred_re_fx[j]; + move32(); } - for ( j = 0; j < IVAS_SPAR_MAX_CH - IVAS_SPAR_MAX_DMX_CHS; j++ ) + FOR( j = 0; j < IVAS_SPAR_MAX_CH - IVAS_SPAR_MAX_DMX_CHS; j++ ) { - for ( k = 0; k < IVAS_SPAR_MAX_DMX_CHS - 1; k++ ) + FOR( k = 0; k < IVAS_SPAR_MAX_DMX_CHS - 1; k++ ) { - // pBand_coeffs[b].C_re[j][k] = pBand_coeffs_prev[b].C_re[j][k]; pBand_coeffs[b].C_re_fx[j][k] = pBand_coeffs_prev[b].C_re_fx[j][k]; + move32(); } } - for ( j = 0; j < IVAS_SPAR_MAX_CH - 1; j++ ) + FOR( j = 0; j < IVAS_SPAR_MAX_CH - 1; j++ ) { - // pBand_coeffs[b].P_re[j] = pBand_coeffs_prev[b].P_re[j]; pBand_coeffs[b].P_re_fx[j] = pBand_coeffs_prev[b].P_re_fx[j]; + move32(); } } } } } - else + ELSE { *first_valid_frame = 1; + move16(); } return; @@ -4120,8 +4204,9 @@ static void ivas_spar_dec_compute_ramp_down_post_matrix_fx( Word16 norm_nff; /*norm of num_fade_frames*/ num_fade_frames = s_max( sub( hMdDec->spar_plc_num_lost_frames, ivas_spar_dec_plc_num_frames_keep ), 0 ); norm_nff = norm_s( num_fade_frames ); - gain_dB = -imult1616( s_min( num_fade_frames, ivas_spar_dec_plc_max_num_frames_ramp_down ), ivas_spar_dec_plc_per_frame_ramp_down_gain_dB ); /*abs(gain_dB)<99*/ /*Q(gain_dB)=7Q24*/ - Word16 exp_gain; /*stores exponent for gain_fx*/ + gain_dB = negate( imult1616( s_min( num_fade_frames, ivas_spar_dec_plc_max_num_frames_ramp_down ), ivas_spar_dec_plc_per_frame_ramp_down_gain_dB ) ); /*abs(gain_dB)<99*/ /*Q(gain_dB)=7Q24*/ + Word16 exp_gain = 0; + move16(); /*stores exponent for gain_fx*/ gain_fx = BASOP_util_Pow2( Mult_32_16( imult3216( 13421773 /*=2^28/20*/, gain_dB ), 27213 /*=log2(10)*2^13*/ ), 5, &exp_gain ); Q_gain = sub( 31, exp_gain ); FOR( i = 0; i < IVAS_SPAR_MAX_CH; i++ ) @@ -4141,7 +4226,7 @@ static void ivas_spar_dec_compute_ramp_down_post_matrix_fx( { FOR( b = 0; b < num_bands_out; b++ ) { - hMdDec->mixer_mat_fx[i][j][add( b, imult1616( i_ts, IVAS_MAX_NUM_BANDS ) )] = Mult_32_32( hMdDec->mixer_mat_fx[i][j][add( b, imult1616( i_ts, IVAS_MAX_NUM_BANDS ) )], post_matrix_fx[i] ); + hMdDec->mixer_mat_fx[i][j][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )] = Mult_32_32( hMdDec->mixer_mat_fx[i][j][add( b, i_mult( i_ts, IVAS_MAX_NUM_BANDS ) )], post_matrix_fx[i] ); move32(); } } @@ -4227,56 +4312,47 @@ static void ivas_spar_dec_compute_ramp_down_post_matrix( #ifdef IVAS_FLOAT_FIXED static void ivas_spar_unquant_dtx_indicies( ivas_spar_md_t *pSpar_md, - const int16_t nB, - const int16_t bw, - int16_t *ndm_per_band ) + const Word16 nB, + const Word16 bw, + Word16 *ndm_per_band ) { - int16_t i, b; - int16_t q_lvl; + Word16 i, b; + Word16 q_lvl; // float val; Word32 val_fx; - int16_t idx; + Word16 idx; // float pr_min_max[2]; Word32 pr_min_max_fx[2]; - // pr_min_max[0] = pSpar_md->min_max[0]; - // pr_min_max[1] = pSpar_md->min_max[1]; - pr_min_max_fx[0] = pSpar_md->min_max_fx[0]; + move32(); pr_min_max_fx[1] = pSpar_md->min_max_fx[1]; + move32(); - for ( b = 0; b < nB; b++ ) + FOR( b = 0; b < nB; b++ ) { - for ( i = 0; i < FOA_CHANNELS - 1; i++ ) + FOR( i = 0; i < FOA_CHANNELS - 1; i++ ) { - q_lvl = dtx_pr_real_q_levels[ndm_per_band[bw * b] - 1][i]; - // idx = pSpar_md->band_coeffs_idx[b].pred_index_re[i]; - // ivas_deindex_real_index( &idx, q_lvl, pr_min_max[0], pr_min_max[1], &val, 1 ); - // pSpar_md->band_coeffs[b].pred_re[i] = val; - // pSpar_md->band_coeffs[b].pred_re_fx[i] = val * (1 << 22); + q_lvl = dtx_pr_real_q_levels[sub( ndm_per_band[i_mult( bw, b )], 1 )][i]; + move16(); idx = pSpar_md->band_coeffs_idx[b].pred_index_re[i]; + move16(); ivas_deindex_real_index_fx( &idx, q_lvl, pr_min_max_fx[0], pr_min_max_fx[1], &val_fx, 1 ); pSpar_md->band_coeffs[b].pred_re_fx[i] = val_fx; -#if 0 // ndef IVAS_FLOAT_FIXED_TO_BE_REMOVED - pSpar_md->band_coeffs[b].pred_re[i] = (float)val_fx / (1 << 22); -#endif + move32(); } - for ( i = 0; i < FOA_CHANNELS - ndm_per_band[bw * b]; i++ ) + FOR( i = 0; i < sub( FOA_CHANNELS, ndm_per_band[i_mult( bw, b )] ); i++ ) { - q_lvl = dtx_pd_real_q_levels[ndm_per_band[bw * b] - 1][i]; - // idx = pSpar_md->band_coeffs_idx[b].decd_index_re[i]; - // ivas_deindex_real_index( &idx, q_lvl, dtx_pd_real_min_max[0], dtx_pd_real_min_max[1], &val, 1 ); - // pSpar_md->band_coeffs[b].P_re[i] = val; - // pSpar_md->band_coeffs[b].P_re_fx[i] = val*(1<<22); + q_lvl = dtx_pd_real_q_levels[sub( ndm_per_band[i_mult( bw, b )], 1 )][i]; + move16(); idx = pSpar_md->band_coeffs_idx[b].decd_index_re[i]; + move16(); ivas_deindex_real_index_fx( &idx, q_lvl, dtx_pd_real_min_max_fx[0], dtx_pd_real_min_max_fx[1], &val_fx, 1 ); pSpar_md->band_coeffs[b].P_re_fx[i] = val_fx; -#if 0 // ndef IVAS_FLOAT_FIXED_TO_BE_REMOVED - pSpar_md->band_coeffs[b].P_re[i] = (float)val_fx / (1 << 22); -#endif + move32(); } } @@ -4330,91 +4406,102 @@ static void ivas_spar_unquant_dtx_indicies( static void ivas_parse_parameter_bitstream_dtx( ivas_spar_md_t *pSpar_md, Decoder_State *st0, /* i/o: decoder state structure - for bitstream handling*/ - const int16_t bw, - const int16_t num_bands, - int16_t *num_dmx_per_band, - int16_t *num_dec_per_band ) + const Word16 bw, + const Word16 num_bands, + Word16 *num_dmx_per_band, + Word16 *num_dec_per_band ) { - int16_t i, j, ndec, ndm; - // float val; + Word16 i, j, ndec, ndm; Word32 val_fx; - int16_t idx; - // float pr_min_max[2]; + Word16 idx; Word32 pr_min_max_fx[2]; - int16_t pr_q_lvls, pr, pd, pd_q_lvls, pr_pd_bits; - int16_t zero_pad_bits, sid_bits_len; + Word16 pr_q_lvls, pr, pd, pd_q_lvls, pr_pd_bits; + Word16 zero_pad_bits, sid_bits_len; sid_bits_len = st0->next_bit_pos; - // pr_min_max[0] = pSpar_md->min_max[0]; + move16(); pr_min_max_fx[0] = pSpar_md->min_max_fx[0]; - // pr_min_max[1] = pSpar_md->min_max[1]; + move32(); pr_min_max_fx[1] = pSpar_md->min_max_fx[1]; + move32(); - for ( i = 0; i < num_bands; i++ ) + FOR( i = 0; i < num_bands; i++ ) { - ndec = num_dec_per_band[bw * i]; - ndm = num_dmx_per_band[bw * i]; + ndec = num_dec_per_band[i_mult( bw, i )]; + move16(); + ndm = num_dmx_per_band[i_mult( bw, i )]; + move16(); - for ( j = 0; j < FOA_CHANNELS - 1; j++ ) + FOR( j = 0; j < FOA_CHANNELS - 1; j++ ) { - int16_t pr_idx_1, pr_idx_2, pd_idx_1, pd_idx_2; - uint16_t value; + Word16 pr_idx_1, pr_idx_2, pd_idx_1, pd_idx_2; + UWord16 value; pr_idx_1 = pr_pr_idx_pairs[ndm - 1][j][0]; + move16(); pr_idx_2 = pr_pr_idx_pairs[ndm - 1][j][1]; + move16(); pd_idx_1 = pr_pd_idx_pairs[ndm - 1][j][0]; + move16(); pd_idx_2 = pr_pd_idx_pairs[ndm - 1][j][1]; + move16(); - if ( pr_idx_1 != 0 || pd_idx_1 != 0 || pr_idx_2 != 0 || pd_idx_2 != 0 ) + test(); + test(); + test(); + IF( pr_idx_1 != 0 || pd_idx_1 != 0 || pr_idx_2 != 0 || pd_idx_2 != 0 ) { pr_q_lvls = dtx_pr_real_q_levels[ndm - 1][pd_idx_1 - 1]; + move16(); - if ( ( j + 1 ) > ndec ) + IF( GT_16( add( j, 1 ), ndec ) ) { pd_q_lvls = 1; + move16(); } - else + ELSE { pd_q_lvls = dtx_pd_real_q_levels[ndm - 1][pd_idx_2 - 1]; + move16(); } - pr_pd_bits = ivas_get_bits_to_encode( pd_q_lvls * pr_q_lvls ); + pr_pd_bits = ivas_get_bits_to_encode( L_mult0( pd_q_lvls, pr_q_lvls ) ); value = get_next_indice( st0, pr_pd_bits ); - pr = (int16_t) floor( value / pd_q_lvls ); - pd = value - pr * pd_q_lvls; - // val = dtx_pd_real_min_max[0]; + pr = idiv1616( value, pd_q_lvls ); + pd = extract_l( L_sub( value, i_mult( pr, pd_q_lvls ) ) ); val_fx = dtx_pd_real_min_max_fx[0]; - // ivas_quantise_real_values( &val, pd_q_lvls, dtx_pd_real_min_max[0], dtx_pd_real_min_max[1], &idx, &val, 1 ); + move32(); ivas_quantise_real_values_fx( &val_fx, pd_q_lvls, dtx_pd_real_min_max_fx[0], dtx_pd_real_min_max_fx[1], &idx, &val_fx, 1 ); - pd = pd + idx; + pd = add( pd, idx ); - // val = pr_min_max[0]; val_fx = pr_min_max_fx[0]; - // ivas_quantise_real_values( &val, pr_q_lvls, pr_min_max[0], pr_min_max[1], &idx, &val, 1 ); + move32(); ivas_quantise_real_values_fx( &val_fx, pr_q_lvls, pr_min_max_fx[0], pr_min_max_fx[1], &idx, &val_fx, 1 ); - pr = pr + idx; + pr = add( pr, idx ); - if ( ( j + 1 ) <= ndec ) + if ( LE_16( add( j, 1 ), ndec ) ) { pSpar_md->band_coeffs_idx[i].decd_index_re[pd_idx_2 - 1] = pd; + move16(); } pSpar_md->band_coeffs_idx[i].pred_index_re[pd_idx_1 - 1] = pr; + move16(); } } } - sid_bits_len = st0->next_bit_pos - sid_bits_len; - zero_pad_bits = ( SPAR_DTX_BANDS * SPAR_SID_BITS_TAR_PER_BAND ) - sid_bits_len; + sid_bits_len = sub( st0->next_bit_pos, sid_bits_len ); + zero_pad_bits = sub( ( SPAR_DTX_BANDS * SPAR_SID_BITS_TAR_PER_BAND ), sid_bits_len ); assert( zero_pad_bits >= 0 ); - if ( num_dmx_per_band[0] == 2 ) + if ( EQ_16( num_dmx_per_band[0], 2 ) ) { - zero_pad_bits -= 1; + zero_pad_bits = sub( zero_pad_bits, 1 ); } - for ( j = 0; j < zero_pad_bits; j++ ) + FOR( j = 0; j < zero_pad_bits; j++ ) { get_next_indice( st0, 1 ); } @@ -4558,34 +4645,36 @@ static ivas_error ivas_deindex_real_index( #else static ivas_error ivas_deindex_real_index_fx( - const int16_t *index, - const int16_t q_levels, + const Word16 *index, + const Word16 q_levels, const Word32 min_value, const Word32 max_value, Word32 *quant, - const int16_t dim ) + const Word16 dim ) { - int16_t i; + Word16 i; Word32 q_step_fx; - if ( q_levels == 0 ) + IF( q_levels == 0 ) { return IVAS_ERR_INTERNAL; } - if ( q_levels == 1 ) + IF( EQ_16( q_levels, 1 ) ) { - for ( i = 0; i < dim; i++ ) + FOR( i = 0; i < dim; i++ ) { quant[i] = 0; + move32(); } } - else + ELSE { q_step_fx = L_sub( max_value, min_value ); - q_step_fx = Mpy_32_32( q_step_fx, one_by_q_level[( q_levels - 1 )] ); - for ( i = 0; i < dim; i++ ) + q_step_fx = Mpy_32_32( q_step_fx, one_by_q_level[q_levels - 1] ); + FOR( i = 0; i < dim; i++ ) { quant[i] = Mpy_32_32( L_shl( index[i], 31 - 6 ), q_step_fx ); //(25+28)-31 = 22 + move32(); } } @@ -4873,122 +4962,157 @@ void ivas_spar_to_dirac( void ivas_spar_to_dirac_fx( Decoder_Struct *st_ivas, ivas_spar_md_dec_state_t *hMdDec, /* i/o: SPAR MD decoder handle */ - const int16_t dtx_vad, /* i : DTX frame flag */ - const int16_t num_bands_out, /* i : number of output bands */ - const int16_t bw, /* i : band joining factor */ - const int16_t dyn_active_w_flag ) + const Word16 dtx_vad, /* i : DTX frame flag */ + const Word16 num_bands_out, /* i : number of output bands */ + const Word16 bw, /* i : band joining factor */ + const Word16 dyn_active_w_flag ) { DIRAC_DEC_HANDLE hDirAC; - int16_t start_band, end_band, band, qmf_band_start, qmf_band_end; - int16_t block, b; - int16_t *band_grouping; - // float diffuseness[IVAS_MAX_NUM_BANDS]; + Word16 start_band, end_band, band, qmf_band_start, qmf_band_end; + Word16 block, b; + Word16 *band_grouping; + Word32 diffuseness_fx[IVAS_MAX_NUM_BANDS]; - int16_t sba_order_internal; - // float azi_dirac[IVAS_MAX_NUM_BANDS][MAX_PARAM_SPATIAL_SUBFRAMES]; + Word16 sba_order_internal; + Word32 azi_dirac_fx[IVAS_MAX_NUM_BANDS][MAX_PARAM_SPATIAL_SUBFRAMES]; - // float ele_dirac[IVAS_MAX_NUM_BANDS][MAX_PARAM_SPATIAL_SUBFRAMES]; + Word32 ele_dirac_fx[IVAS_MAX_NUM_BANDS][MAX_PARAM_SPATIAL_SUBFRAMES]; - int16_t azi[IVAS_MAX_NUM_BANDS]; - int16_t ele[IVAS_MAX_NUM_BANDS]; - // float dvx[IVAS_MAX_NUM_BANDS], dvy[IVAS_MAX_NUM_BANDS], dvz[IVAS_MAX_NUM_BANDS]; + Word16 azi[IVAS_MAX_NUM_BANDS]; + Word16 ele[IVAS_MAX_NUM_BANDS]; + Word32 dvx_fx[IVAS_MAX_NUM_BANDS], dvy_fx[IVAS_MAX_NUM_BANDS], dvz_fx[IVAS_MAX_NUM_BANDS]; - // float radius; + Word32 radius_fx; - // float en_ratio, res_pow; + Word32 en_ratio_fx, res_pow_fx; Word16 en_ratio_q; - int16_t num_slots_in_subfr; - int16_t tmp_write_idx_param_band; - int16_t tmp_write_idx_band; - // float pred_re_20ms[IVAS_MAX_NUM_BANDS][IVAS_SPAR_MAX_CH - 1]; + Word16 num_slots_in_subfr; + Word16 tmp_write_idx_param_band; + Word16 tmp_write_idx_band; + Word32 pred_re_20ms_fx[IVAS_MAX_NUM_BANDS][IVAS_SPAR_MAX_CH - 1]; - int16_t pred_idx; - int16_t *dirac_to_spar_md_bands; - int16_t enc_param_start_band; - int16_t active_w_vlbr; - int16_t i, num_subframes; - int16_t active_w; + Word16 pred_idx; + Word16 *dirac_to_spar_md_bands; + Word16 enc_param_start_band; + Word16 active_w_vlbr; + Word16 i, num_subframes; + Word16 active_w; SPAT_PARAM_REND_COMMON_DATA_HANDLE hSpatParamRendCom; - active_w = ( dyn_active_w_flag == 1 ) || ( hMdDec->spar_md_cfg.active_w == 1 ); + test(); + active_w = EQ_16( dyn_active_w_flag, 1 ) || EQ_16( hMdDec->spar_md_cfg.active_w, 1 ); sba_order_internal = min( st_ivas->sba_analysis_order, IVAS_MAX_SBA_ORDER ); + move16(); start_band = 0; - end_band = min( num_bands_out, SPAR_DIRAC_SPLIT_START_BAND ) / bw; + move16(); + end_band = idiv1616( min( num_bands_out, SPAR_DIRAC_SPLIT_START_BAND ), bw ); hDirAC = st_ivas->hDirAC; hSpatParamRendCom = st_ivas->hSpatParamRendCom; dirac_to_spar_md_bands = st_ivas->hSpar->dirac_to_spar_md_bands; - enc_param_start_band = st_ivas->hSpar->enc_param_start_band / bw; - active_w_vlbr = ( st_ivas->hDecoderConfig->ivas_total_brate < IVAS_24k4 ) ? 1 : 0; - if ( hDirAC != NULL && ivas_get_hodirac_flag_fx( st_ivas->hDecoderConfig->ivas_total_brate, st_ivas->sba_analysis_order ) == 0 ) + IF( st_ivas->hSpar->enc_param_start_band > 0 ) + { + enc_param_start_band = idiv1616( st_ivas->hSpar->enc_param_start_band, bw ); + } + ELSE + { + enc_param_start_band = 0; + move16(); + } + + IF( LT_32( st_ivas->hDecoderConfig->ivas_total_brate, IVAS_24k4 ) ) + { + active_w_vlbr = 1; + move16(); + } + ELSE + { + active_w_vlbr = 0; + move16(); + } + + test(); + IF( hDirAC != NULL && ivas_get_hodirac_flag_fx( st_ivas->hDecoderConfig->ivas_total_brate, st_ivas->sba_analysis_order ) == 0 ) { band_grouping = hDirAC->band_grouping; - num_slots_in_subfr = st_ivas->hDirAC->hConfig->dec_param_estim ? CLDFB_NO_COL_MAX / MAX_PARAM_SPATIAL_SUBFRAMES : 1; - for ( band = start_band; band < end_band; band++ ) + IF( st_ivas->hDirAC->hConfig->dec_param_estim ) + { + num_slots_in_subfr = CLDFB_NO_COL_MAX / MAX_PARAM_SPATIAL_SUBFRAMES; + move16(); + } + ELSE + { + num_slots_in_subfr = 1; + move16(); + } + + FOR( band = start_band; band < end_band; band++ ) { - // float PR[3], Pd[3], dvnorm, g_pred; Word32 PR_fx[3], Pd_fx[3], dvnorm_fx, g_pred_fx; Word16 q_g_pred; Word16 q_dvnorm; - // PR[0] = hMdDec->spar_md.band_coeffs[band].pred_re[2]; PR_fx[0] = hMdDec->spar_md.band_coeffs[band].pred_re_fx[2]; - // PR[1] = hMdDec->spar_md.band_coeffs[band].pred_re[0]; + move32(); PR_fx[1] = hMdDec->spar_md.band_coeffs[band].pred_re_fx[0]; - // PR[2] = hMdDec->spar_md.band_coeffs[band].pred_re[1]; + move32(); PR_fx[2] = hMdDec->spar_md.band_coeffs[band].pred_re_fx[1]; - // g_pred = PR[0] * PR[0] + PR[1] * PR[1] + PR[2] * PR[2]; - g_pred_fx = Mpy_32_32( PR_fx[0], PR_fx[0] ) + Mpy_32_32( PR_fx[1], PR_fx[1] ) + Mpy_32_32( PR_fx[2], PR_fx[2] ); + move32(); + g_pred_fx = L_add( L_add( Mpy_32_32( PR_fx[0], PR_fx[0] ), Mpy_32_32( PR_fx[1], PR_fx[1] ) ), Mpy_32_32( PR_fx[2], PR_fx[2] ) ); q_g_pred = Q22 + Q22 - Q31; - q_dvnorm = Q31 - q_g_pred; + move16(); + q_dvnorm = sub( Q31, q_g_pred ); q_g_pred = q_dvnorm; + move16(); IF( LE_32( g_pred_fx, EPSILON_FIX ) ) { dvx_fx[band] = ONE_IN_Q22; + move32(); dvy_fx[band] = 0; + move32(); dvz_fx[band] = 0; + move32(); azi[band] = 0; + move16(); ele[band] = 0; + move16(); q_g_pred = Q22; + move16(); q_dvnorm = Q22; + move16(); } - else + ELSE { - // g_pred = Sqrt32( g_pred, ); - // dvnorm = 1.0f / g_pred; dvnorm_fx = ISqrt32( g_pred_fx, &q_dvnorm ); g_pred_fx = Sqrt32( g_pred_fx, &q_g_pred ); - if ( q_g_pred < 0 ) + IF( q_g_pred < 0 ) { - g_pred_fx = L_shr( g_pred_fx, ( -1 * q_g_pred ) ); + g_pred_fx = L_shr( g_pred_fx, negate( q_g_pred ) ); q_g_pred = 0; + move16(); } - /*dvx[band] = PR[0] * dvnorm; -dvy[band] = PR[1] * dvnorm; - dvz[band] = PR[2] * dvnorm;*/ - dvx_fx[band] = Mpy_32_32( PR_fx[0], dvnorm_fx ); + move32(); dvy_fx[band] = Mpy_32_32( PR_fx[1], dvnorm_fx ); + move32(); dvz_fx[band] = Mpy_32_32( PR_fx[2], dvnorm_fx ); - Word16 q_1 = ( 22 ) + ( 31 - q_dvnorm ) - 31; + move32(); + Word16 q_1 = sub( add( 22, sub( 31, q_dvnorm ) ), 31 ); - Word32 temp = Mpy_32_32( dvx_fx[band], dvx_fx[band] ) + Mpy_32_32( dvy_fx[band], dvy_fx[band] ); - Word16 q2 = q_1 + q_1 - 31; - Word16 q_temp = 31 - q2; + Word32 temp = L_add( Mpy_32_32( dvx_fx[band], dvx_fx[band] ), Mpy_32_32( dvy_fx[band], dvy_fx[band] ) ); + Word16 q2 = sub( add( q_1, q_1 ), 31 ); + Word16 q_temp = sub( 31, q2 ); radius_fx = Sqrt32( temp, &q_temp ); - // radius = sqrtf(dvx[band] * dvx[band] + dvy[band] * dvy[band]); - - // float check_qzi = atan2f(dvy[band], dvx[band]); Word16 check_azi_fx = BASOP_util_atan2( dvy_fx[band], dvx_fx[band], 0 ); Word32 check_azi_fx_32 = L_shl( check_azi_fx, 16 ); Word16 check_azi_fx_res; - IF( LT_32( check_azi_fx_32, 0 ) ) + IF( check_azi_fx_32 < 0 ) { check_azi_fx_res = negate( divide3232( L_negate( check_azi_fx_32 ), 1686629760 ) ); } @@ -4997,314 +5121,376 @@ dvy[band] = PR[1] * dvnorm; check_azi_fx_res = divide3232( check_azi_fx_32, 1686629760 ); } Word32 azi_intermediate = Mpy_32_16_1( DEGREE_180_Q_22, check_azi_fx_res ); - azi_intermediate = azi_intermediate + ONE_IN_Q21; - // Word16 azi_res = L_shr_r(azi_intermediate, 22); - Word16 azi_res = extract_l( azi_intermediate / ( 1 << 22 ) ); + azi_intermediate = L_add( azi_intermediate, ONE_IN_Q21 ); + + Word16 azi_res; + IF( azi_intermediate < 0 ) + { + azi_res = negate( extract_l( L_shr( L_negate( azi_intermediate ), 22 ) ) ); + } + ELSE + { + azi_res = extract_l( L_shr( azi_intermediate, 22 ) ); + } - Word16 check_ele_fx = BASOP_util_atan2( dvz_fx[band], radius_fx, ( 9 + q_dvnorm ) - q_temp ); + Word16 check_ele_fx = BASOP_util_atan2( dvz_fx[band], radius_fx, sub( add( 9, q_dvnorm ), q_temp ) ); Word32 check_ele_fx_32 = L_shl( check_ele_fx, 16 ); Word16 check_ele_fx_res; - IF( LT_32( check_azi_fx_32, 0 ) ) + IF( check_azi_fx_32 < 0 ) { check_ele_fx_res = negate( divide3232( L_negate( check_ele_fx_32 ), 1686629760 ) ); } - else + ELSE { check_ele_fx_res = divide3232( check_ele_fx_32, 1686629760 ); } Word32 ele_intermediate = Mpy_32_16_1( DEGREE_180_Q_22, check_ele_fx_res ); - ele_intermediate = ele_intermediate + ONE_IN_Q21; - // Word16 ele_res = L_shr_r(ele_intermediate, 22); - Word16 ele_res = extract_l( ele_intermediate / ( 1 << 22 ) ); + ele_intermediate = L_add( ele_intermediate, ONE_IN_Q21 ); + Word16 ele_res; + IF( ele_intermediate < 0 ) + { + ele_res = negate( extract_l( L_shr( L_negate( ele_intermediate ), 22 ) ) ); + } + ELSE + { + ele_res = extract_l( L_shr( ele_intermediate, 22 ) ); + } - // azi[band] = (int16_t)(max(-180.0f, min(180.0f, atan2f(dvy[band], dvx[band]) / EVS_PI * 180.0f)) + 0.5f); azi[band] = max( -180, min( 180, azi_res ) ); - // ele[band] = (int16_t)(max(-90.0f, min(180.0f, atan2f(dvz[band], radius) / EVS_PI * 180.0f)) + 0.5f); + move16(); ele[band] = max( -90, min( 180, ele_res ) ); + move16(); } - if ( st_ivas->nchan_transport == 1 ) + IF( EQ_16( st_ivas->nchan_transport, 1 ) ) { // float w_en_norm, f_scale; Word32 w_en_norm_fx, f_scale_fx; Word16 q_w_en_norm_fx; - if ( active_w ) + IF( active_w ) { - if ( dtx_vad == 0 ) + IF( dtx_vad == 0 ) { - // f_scale = IVAS_ACTIVEW_DM_F_SCALE_DTX; f_scale_fx = IVAS_ACTIVEW_DM_F_SCALE_DTX_FX; + move32(); } - else + ELSE { - // f_scale = ( active_w_vlbr ) ? IVAS_ACTIVEW_DM_F_SCALE_VLBR : IVAS_ACTIVEW_DM_F_SCALE; - f_scale_fx = ( active_w_vlbr ) ? IVAS_ACTIVEW_DM_F_SCALE_VLBR_FX : IVAS_ACTIVEW_DM_F_SCALE_FX; + IF( active_w_vlbr ) + { + f_scale_fx = IVAS_ACTIVEW_DM_F_SCALE_VLBR_FX; + move32(); + } + ELSE + { + f_scale_fx = IVAS_ACTIVEW_DM_F_SCALE_FX; + move32(); + } } } - else + ELSE { - // f_scale = 0.0f; f_scale_fx = 0; + move32(); } - // w_en_norm = ( 1.0f - ( f_scale * g_pred * g_pred ) ); Word32 temp_result = Mpy_32_32( Mpy_32_32( f_scale_fx, g_pred_fx ), g_pred_fx ); temp_result = L_sub( L_shr( ONE_IN_Q31, q_g_pred ), temp_result ); - // w_en_norm *= w_en_norm; + w_en_norm_fx = Mpy_32_32( temp_result, temp_result ); - q_w_en_norm_fx = q_g_pred + q_g_pred; - // Pd[0] = hMdDec->spar_md.band_coeffs[band].P_re[1]; - // Pd[1] = hMdDec->spar_md.band_coeffs[band].P_re[0]; - // Pd[2] = hMdDec->spar_md.band_coeffs[band].P_re[2]; + q_w_en_norm_fx = add( q_g_pred, q_g_pred ); + Pd_fx[0] = hMdDec->spar_md.band_coeffs[band].P_re_fx[1]; + move32(); Pd_fx[1] = hMdDec->spar_md.band_coeffs[band].P_re_fx[0]; + move32(); Pd_fx[2] = hMdDec->spar_md.band_coeffs[band].P_re_fx[2]; - // en_ratio = PR[0] * PR[0] + PR[1] * PR[1] + PR[2] * PR[2]; - en_ratio_fx = Mpy_32_32( PR_fx[0], PR_fx[0] ) + Mpy_32_32( PR_fx[1], PR_fx[1] ) + Mpy_32_32( PR_fx[2], PR_fx[2] ); // 22+22-31 = 13 - Word32 Pd_temp_res = Mpy_32_32( Pd_fx[0], Pd_fx[0] ) + Mpy_32_32( Pd_fx[1], Pd_fx[1] ) + Mpy_32_32( Pd_fx[2], Pd_fx[2] ); // q = 22+22-31 = 13 - // res_pow = w_en_norm + en_ratio + ( Pd[0] * Pd[0] + Pd[1] * Pd[1] + Pd[2] * Pd[2] ); - res_pow_fx = L_shr( w_en_norm_fx, ( 31 - q_w_en_norm_fx ) - 13 ) + en_ratio_fx + Pd_temp_res; - // res_pow_q = 13; + move32(); + + en_ratio_fx = L_add( L_add( Mpy_32_32( PR_fx[0], PR_fx[0] ), Mpy_32_32( PR_fx[1], PR_fx[1] ) ), Mpy_32_32( PR_fx[2], PR_fx[2] ) ); // 22+22-31 = 13 + Word32 Pd_temp_res = L_add( L_add( Mpy_32_32( Pd_fx[0], Pd_fx[0] ), Mpy_32_32( Pd_fx[1], Pd_fx[1] ) ), Mpy_32_32( Pd_fx[2], Pd_fx[2] ) ); // q = 22+22-31 = 13 + + res_pow_fx = L_add( L_shr( w_en_norm_fx, sub( sub( 31, q_w_en_norm_fx ), 13 ) ), L_add( en_ratio_fx, Pd_temp_res ) ); + res_pow_fx = L_shr( res_pow_fx, 1 ); - // res_pow *= 0.5f; - // hMdDec->spar_md.en_ratio_slow[band] = 0.75f * hMdDec->spar_md.en_ratio_slow[band] + 0.25f * en_ratio; - hMdDec->spar_md.en_ratio_slow_fx[band] = Mpy_32_32( 1610612736, hMdDec->spar_md.en_ratio_slow_fx[band] ) + Mpy_32_32( 536870912, en_ratio_fx ); - // hMdDec->spar_md.ref_pow_slow[band] = 0.75f * hMdDec->spar_md.ref_pow_slow[band] + 0.25f * res_pow; - hMdDec->spar_md.ref_pow_slow_fx[band] = Mpy_32_32( 1610612736, hMdDec->spar_md.ref_pow_slow_fx[band] ) + Mpy_32_32( 536870912, res_pow_fx ); - // en_ratio = sqrtf( hMdDec->spar_md.en_ratio_slow[band] ) / ( hMdDec->spar_md.ref_pow_slow[band] + EPSILON ); + + hMdDec->spar_md.en_ratio_slow_fx[band] = L_add( Mpy_32_32( 1610612736, hMdDec->spar_md.en_ratio_slow_fx[band] ), Mpy_32_32( 536870912, en_ratio_fx ) ); + move32(); + + hMdDec->spar_md.ref_pow_slow_fx[band] = L_add( Mpy_32_32( 1610612736, hMdDec->spar_md.ref_pow_slow_fx[band] ), Mpy_32_32( 536870912, res_pow_fx ) ); + move32(); + en_ratio_q = 31 - 13; + move16(); en_ratio_fx = Sqrt32( hMdDec->spar_md.en_ratio_slow_fx[band], &en_ratio_q ); - if ( en_ratio_q < 0 ) + IF( en_ratio_q < 0 ) { - en_ratio_fx = L_shr( en_ratio_fx, -1 * ( en_ratio_q ) ); + en_ratio_fx = L_shr( en_ratio_fx, negate( en_ratio_q ) ); en_ratio_q = 0; + move16(); } - Word32 en_ratio_fx_scaled = L_shr( en_ratio_fx, ( 31 - en_ratio_q - 13 ) ); - if ( en_ratio_fx_scaled > hMdDec->spar_md.ref_pow_slow_fx[band] ) + Word32 en_ratio_fx_scaled = L_shr( en_ratio_fx, ( sub( sub( 31, en_ratio_q ), 13 ) ) ); + IF( GT_32( en_ratio_fx_scaled, hMdDec->spar_md.ref_pow_slow_fx[band] ) ) { diffuseness_fx[band] = 0; + move32(); } - else if ( en_ratio_fx_scaled == 0 ) + ELSE IF( en_ratio_fx_scaled == 0 ) { diffuseness_fx[band] = ONE_IN_Q30; + move32(); } - else if ( en_ratio_fx_scaled == hMdDec->spar_md.ref_pow_slow_fx[band] ) + ELSE IF( EQ_32( en_ratio_fx_scaled, hMdDec->spar_md.ref_pow_slow_fx[band] ) ) { diffuseness_fx[band] = ONE_IN_Q30; + move32(); } - else + ELSE { - en_ratio_fx = divide3232( en_ratio_fx_scaled, ( hMdDec->spar_md.ref_pow_slow_fx[band] + EPSILON_FX ) ); + en_ratio_fx = divide3232( en_ratio_fx_scaled, L_add( hMdDec->spar_md.ref_pow_slow_fx[band], EPSILON_FX ) ); en_ratio_fx = L_shl( en_ratio_fx, 15 ); - diffuseness_fx[band] = ONE_IN_Q30 - en_ratio_fx; + diffuseness_fx[band] = L_sub( ONE_IN_Q30, en_ratio_fx ); + move32(); } } - else + ELSE { - // en_ratio = PR[0] * PR[0] + PR[1] * PR[1] + PR[2] * PR[2]; - en_ratio_fx = Mpy_32_32( PR_fx[0], PR_fx[0] ) + Mpy_32_32( PR_fx[1], PR_fx[1] ) + Mpy_32_32( PR_fx[2], PR_fx[2] ); - // hMdDec->spar_md.en_ratio_slow[band] = 0.75f * hMdDec->spar_md.en_ratio_slow[band] + 0.25f * en_ratio; - hMdDec->spar_md.en_ratio_slow_fx[band] = Mpy_32_32( 1610612736, hMdDec->spar_md.en_ratio_slow_fx[band] ) + Mpy_32_32( 536870912, en_ratio_fx ); - // en_ratio = sqrtf( hMdDec->spar_md.en_ratio_slow[band] ); + en_ratio_fx = L_add( L_add( Mpy_32_32( PR_fx[0], PR_fx[0] ), Mpy_32_32( PR_fx[1], PR_fx[1] ) ), Mpy_32_32( PR_fx[2], PR_fx[2] ) ); + + hMdDec->spar_md.en_ratio_slow_fx[band] = L_add( Mpy_32_32( 1610612736, hMdDec->spar_md.en_ratio_slow_fx[band] ), Mpy_32_32( 536870912, en_ratio_fx ) ); + move32(); + en_ratio_q = 31 - 13; + move16(); en_ratio_fx = Sqrt32( hMdDec->spar_md.en_ratio_slow_fx[band], &en_ratio_q ); - if ( en_ratio_q < 0 ) + IF( en_ratio_q < 0 ) { - en_ratio_fx = L_shr( en_ratio_fx, -1 * ( en_ratio_q ) ); + en_ratio_fx = L_shr( en_ratio_fx, ( -en_ratio_q ) ); en_ratio_q = 0; + move16(); } Word32 en_ratio_fx_scaled = L_shr( en_ratio_fx, 1 ); - if ( en_ratio_fx_scaled > ONE_IN_Q30 ) + IF( GT_32( en_ratio_fx_scaled, ONE_IN_Q30 ) ) { diffuseness_fx[band] = 0; + move32(); } - else + ELSE { - diffuseness_fx[band] = ONE_IN_Q30 - en_ratio_fx_scaled; + diffuseness_fx[band] = L_sub( ONE_IN_Q30, en_ratio_fx_scaled ); + move32(); } } - // diffuseness[band] = 1.0f - en_ratio; /*compute diffuseness*/ - // diffuseness[band] = ( ( diffuseness[band] < 1.0f ) ? ( ( diffuseness[band] < 0.0f ) ? 0.f : diffuseness[band] ) : 1.0f ); - // diffuseness[band] = (float)diffuseness_fx[band] / (1 << 30); } - for ( band = start_band; band < end_band; band++ ) + FOR( band = start_band; band < end_band; band++ ) { - int16_t azi_dith, ele_dith; + Word16 azi_dith, ele_dith; tmp_write_idx_param_band = hDirAC->spar_to_dirac_write_idx; + move16(); + + en_ratio_fx = L_sub( ONE_IN_Q30, diffuseness_fx[band] ); + + masa_sq_fx( L_sub( ONE_IN_Q30, en_ratio_fx ), diffuseness_thresholds_fx, DIRAC_DIFFUSE_LEVELS ); - // en_ratio = 1.0f - diffuseness[band]; - en_ratio_fx = ONE_IN_Q30 - diffuseness_fx[band]; - // en_ratio = (float)en_ratio_fx / (1 << 30); - // cam delete the below function call - // masa_sq( 1.0f - en_ratio, diffuseness_thresholds, DIRAC_DIFFUSE_LEVELS ); - //#ifdef IVAS_FLOAT_FIXED - masa_sq_fx( ONE_IN_Q30 - en_ratio_fx, diffuseness_thresholds_fx, DIRAC_DIFFUSE_LEVELS ); - //#endif qmf_band_start = band_grouping[band]; + move16(); qmf_band_end = band_grouping[band + 1]; + move16(); - for ( block = 0; block < MAX_PARAM_SPATIAL_SUBFRAMES; block++ ) + FOR( block = 0; block < MAX_PARAM_SPATIAL_SUBFRAMES; block++ ) { - int16_t ts_start, ts_end, ts; + Word16 ts_start, ts_end, ts; ts_start = DirAC_block_grouping[block]; + move16(); ts_end = DirAC_block_grouping[block + 1]; - for ( b = qmf_band_start; b < qmf_band_end; b++ ) + move16(); + FOR( b = qmf_band_start; b < qmf_band_end; b++ ) { azi_dith = azi[band]; + move16(); ele_dith = ele[band]; + move16(); hSpatParamRendCom->energy_ratio1_fx[block][b] = en_ratio_fx; + move32(); tmp_write_idx_band = tmp_write_idx_param_band; + move16(); - if ( hDirAC->hConfig->dec_param_estim == FALSE ) + IF( hDirAC->hConfig->dec_param_estim == FALSE ) { hSpatParamRendCom->elevation[tmp_write_idx_band][b] = ele_dith; + move16(); hSpatParamRendCom->azimuth[tmp_write_idx_band][b] = azi_dith; + move16(); hSpatParamRendCom->diffuseness_vector_fx[tmp_write_idx_band][b] = diffuseness_fx[band]; + move32(); } - else + ELSE { - for ( ts = ts_start; ts < ts_end; ts++ ) + FOR( ts = ts_start; ts < ts_end; ts++ ) { hSpatParamRendCom->elevation[tmp_write_idx_band][b] = ele_dith; + move16(); hSpatParamRendCom->azimuth[tmp_write_idx_band][b] = azi_dith; + move16(); hSpatParamRendCom->diffuseness_vector_fx[tmp_write_idx_band][b] = diffuseness_fx[band]; - tmp_write_idx_band = ( tmp_write_idx_band + 1 ) % hSpatParamRendCom->dirac_md_buffer_length; + move32(); + tmp_write_idx_band = add( tmp_write_idx_band, 1 ) % hSpatParamRendCom->dirac_md_buffer_length; + move16(); } } } - tmp_write_idx_param_band = ( tmp_write_idx_param_band + num_slots_in_subfr ) % hSpatParamRendCom->dirac_md_buffer_length; + tmp_write_idx_param_band = add( tmp_write_idx_param_band, num_slots_in_subfr ) % hSpatParamRendCom->dirac_md_buffer_length; + move16(); } } /* update buffer write index */ - if ( hDirAC->hConfig->dec_param_estim == FALSE ) + IF( hDirAC->hConfig->dec_param_estim == FALSE ) { - hDirAC->spar_to_dirac_write_idx = ( hDirAC->spar_to_dirac_write_idx + MAX_PARAM_SPATIAL_SUBFRAMES ) % hSpatParamRendCom->dirac_md_buffer_length; + hDirAC->spar_to_dirac_write_idx = add( hDirAC->spar_to_dirac_write_idx, MAX_PARAM_SPATIAL_SUBFRAMES ) % hSpatParamRendCom->dirac_md_buffer_length; + move16(); } - else + ELSE { - hDirAC->spar_to_dirac_write_idx = ( hDirAC->spar_to_dirac_write_idx + CLDFB_NO_COL_MAX ) % hSpatParamRendCom->dirac_md_buffer_length; + hDirAC->spar_to_dirac_write_idx = add( hDirAC->spar_to_dirac_write_idx, CLDFB_NO_COL_MAX ) % hSpatParamRendCom->dirac_md_buffer_length; + move16(); } } - else + ELSE { band = end_band; + move16(); } /*read DirAC metadata, convert DirAC to SPAR*/ - for ( ; band < num_bands_out / bw; band++ ) + FOR( ; band < idiv1616( num_bands_out, bw ); band++ ) { - int16_t dirac_band_idx; + Word16 dirac_band_idx; - dirac_band_idx = dirac_to_spar_md_bands[band] - enc_param_start_band; + dirac_band_idx = sub( dirac_to_spar_md_bands[band], enc_param_start_band ); num_subframes = MAX_PARAM_SPATIAL_SUBFRAMES; + move16(); if ( st_ivas->hQMetaData->useLowerRes ) { num_subframes = 1; + move16(); } - for ( block = 0; block < num_subframes; block++ ) + FOR( block = 0; block < num_subframes; block++ ) { - // if ( st_ivas->hQMetaData->q_direction->band_data[dirac_band_idx].azimuth[block] < 0.f ) - //{ - // st_ivas->hQMetaData->q_direction->band_data[dirac_band_idx].azimuth[block] += 360.f; - // } - if ( st_ivas->hQMetaData->q_direction->band_data[dirac_band_idx].azimuth_fx[block] < 0 ) + + IF( st_ivas->hQMetaData->q_direction->band_data[dirac_band_idx].azimuth_fx[block] < 0 ) { st_ivas->hQMetaData->q_direction->band_data[dirac_band_idx].azimuth_fx[block] = L_add( L_shl( 360, 22 ), st_ivas->hQMetaData->q_direction->band_data[dirac_band_idx].azimuth_fx[block] ); + move32(); } - // azi_dirac[band][block] = st_ivas->hQMetaData->q_direction->band_data[dirac_band_idx].azimuth[block]; - // ele_dirac[band][block] = st_ivas->hQMetaData->q_direction->band_data[dirac_band_idx].elevation[block]; - //#ifdef IVAS_FLOAT_FIXED + azi_dirac_fx[band][block] = st_ivas->hQMetaData->q_direction->band_data[dirac_band_idx].azimuth_fx[block]; + move32(); ele_dirac_fx[band][block] = st_ivas->hQMetaData->q_direction->band_data[dirac_band_idx].elevation_fx[block]; - //#endif + move32(); } - // diffuseness[band] = 1.0f - st_ivas->hQMetaData->q_direction->band_data[dirac_band_idx].energy_ratio[0]; - //#ifdef IVAS_FLOAT_FIXED - diffuseness_fx[band] = ONE_IN_Q30 - st_ivas->hQMetaData->q_direction->band_data[dirac_band_idx].energy_ratio_fx[0]; - //#endif + diffuseness_fx[band] = L_sub( ONE_IN_Q30, st_ivas->hQMetaData->q_direction->band_data[dirac_band_idx].energy_ratio_fx[0] ); + move32(); } /* DirAC MD averaged over 4 subframes and converted to SPAR format similar to encoder processing */ - if ( hMdDec->spar_md_cfg.nchan_transport > 1 ) + IF( GT_16( hMdDec->spar_md_cfg.nchan_transport, 1 ) ) { -#ifdef IVAS_FLOAT_FIXED - ivas_get_spar_md_from_dirac_fx( azi_dirac_fx, ele_dirac_fx, diffuseness_fx, 1, NULL, &hMdDec->spar_md, &hMdDec->spar_md_cfg, end_band, num_bands_out, ( hMdDec->spar_hoa_md_flag ) ? 1 : sba_order_internal, dtx_vad, NULL, st_ivas->hQMetaData->useLowerRes, active_w_vlbr, dyn_active_w_flag ); -#else - ivas_get_spar_md_from_dirac( azi_dirac, ele_dirac, diffuseness, 1, NULL, &hMdDec->spar_md, &hMdDec->spar_md_cfg, end_band, num_bands_out, ( hMdDec->spar_hoa_md_flag ) ? 1 : sba_order_internal, dtx_vad, NULL, st_ivas->hQMetaData->useLowerRes, active_w_vlbr, dyn_active_w_flag ); -#endif + Word16 order; + IF( hMdDec->spar_hoa_md_flag ) + { + order = 1; + move16(); + } + ELSE + { + order = sba_order_internal; + move16(); + } + ivas_get_spar_md_from_dirac_fx( azi_dirac_fx, ele_dirac_fx, diffuseness_fx, 1, NULL, &hMdDec->spar_md, &hMdDec->spar_md_cfg, end_band, num_bands_out, order, dtx_vad, NULL, st_ivas->hQMetaData->useLowerRes, active_w_vlbr, dyn_active_w_flag ); /* temporarily copy frame-wise prediction coefficients in DirAC bands*/ - for ( pred_idx = 0; pred_idx < FOA_CHANNELS - 1; pred_idx++ ) + FOR( pred_idx = 0; pred_idx < FOA_CHANNELS - 1; pred_idx++ ) { - for ( band = SPAR_DIRAC_SPLIT_START_BAND; band < IVAS_MAX_NUM_BANDS; band++ ) + FOR( band = SPAR_DIRAC_SPLIT_START_BAND; band < IVAS_MAX_NUM_BANDS; band++ ) { - // pred_re_20ms[band][pred_idx] = hMdDec->spar_md.band_coeffs[band].pred_re[pred_idx]; pred_re_20ms_fx[band][pred_idx] = hMdDec->spar_md.band_coeffs[band].pred_re_fx[pred_idx]; + move32(); } } } - int16_t num_md_sub_frames; + Word16 num_md_sub_frames; num_md_sub_frames = ivas_get_spar_dec_md_num_subframes( sba_order_internal, st_ivas->hDecoderConfig->ivas_total_brate, st_ivas->last_active_ivas_total_brate ); - //#ifdef IVAS_FLOAT_FIXED - ivas_get_spar_md_from_dirac_fx( azi_dirac_fx, ele_dirac_fx, diffuseness_fx, num_md_sub_frames, NULL, &hMdDec->spar_md, &hMdDec->spar_md_cfg, end_band, num_bands_out / bw, ( hMdDec->spar_hoa_md_flag ) ? 1 : sba_order_internal, dtx_vad, NULL, st_ivas->hQMetaData->useLowerRes, active_w_vlbr, dyn_active_w_flag ); - //#else - // ivas_get_spar_md_from_dirac( azi_dirac, ele_dirac, diffuseness, num_md_sub_frames, NULL, &hMdDec->spar_md, &hMdDec->spar_md_cfg, end_band, num_bands_out / bw, ( hMdDec->spar_hoa_md_flag ) ? 1 : sba_order_internal, dtx_vad, NULL, st_ivas->hQMetaData->useLowerRes, active_w_vlbr, dyn_active_w_flag ); - //#endif - if ( st_ivas->hQMetaData->useLowerRes && dtx_vad ) + + Word16 order; + IF( hMdDec->spar_hoa_md_flag ) { - for ( band = SPAR_DIRAC_SPLIT_START_BAND; band < IVAS_MAX_NUM_BANDS; band++ ) + order = 1; + move16(); + } + ELSE + { + order = sba_order_internal; + move16(); + } + ivas_get_spar_md_from_dirac_fx( azi_dirac_fx, ele_dirac_fx, diffuseness_fx, num_md_sub_frames, NULL, &hMdDec->spar_md, &hMdDec->spar_md_cfg, end_band, idiv1616( num_bands_out, bw ), order, dtx_vad, NULL, st_ivas->hQMetaData->useLowerRes, active_w_vlbr, dyn_active_w_flag ); + + test(); + IF( st_ivas->hQMetaData->useLowerRes && dtx_vad ) + { + FOR( band = SPAR_DIRAC_SPLIT_START_BAND; band < IVAS_MAX_NUM_BANDS; band++ ) { - for ( block = 1; block < num_md_sub_frames; block++ ) + FOR( block = 1; block < num_md_sub_frames; block++ ) { - for ( i = 0; i < FOA_CHANNELS - 1; i++ ) /* pred coefficient index (index 0, 1, 2 predicts Y, Z, X respectively) */ + FOR( i = 0; i < FOA_CHANNELS - 1; i++ ) /* pred coefficient index (index 0, 1, 2 predicts Y, Z, X respectively) */ { - // hMdDec->spar_md.band_coeffs[band + block * IVAS_MAX_NUM_BANDS].pred_re[i] = hMdDec->spar_md.band_coeffs[band].pred_re[i]; - hMdDec->spar_md.band_coeffs[band + block * IVAS_MAX_NUM_BANDS].pred_re_fx[i] = hMdDec->spar_md.band_coeffs[band].pred_re_fx[i]; + hMdDec->spar_md.band_coeffs[add( band, i_mult( block, IVAS_MAX_NUM_BANDS ) )].pred_re_fx[i] = hMdDec->spar_md.band_coeffs[band].pred_re_fx[i]; + move32(); } - for ( i = 0; i < FOA_CHANNELS - 1; i++ ) /* pred coefficient index (index 0, 1, 2 predicts Y, Z, X respectively) */ + FOR( i = 0; i < FOA_CHANNELS - 1; i++ ) /* pred coefficient index (index 0, 1, 2 predicts Y, Z, X respectively) */ { - // hMdDec->spar_md.band_coeffs[band + block * IVAS_MAX_NUM_BANDS].P_re[i] = hMdDec->spar_md.band_coeffs[band].P_re[i]; - hMdDec->spar_md.band_coeffs[band + block * IVAS_MAX_NUM_BANDS].P_re_fx[i] = hMdDec->spar_md.band_coeffs[band].P_re_fx[i]; + hMdDec->spar_md.band_coeffs[add( band, i_mult( block, IVAS_MAX_NUM_BANDS ) )].P_re_fx[i] = hMdDec->spar_md.band_coeffs[band].P_re_fx[i]; + move32(); } } } } /* expand DirAC TC 20ms MD for residual channels to all subframes*/ - for ( block = 0; block < num_md_sub_frames; block++ ) + FOR( block = 0; block < num_md_sub_frames; block++ ) { - for ( band = SPAR_DIRAC_SPLIT_START_BAND; band < IVAS_MAX_NUM_BANDS; band++ ) + FOR( band = SPAR_DIRAC_SPLIT_START_BAND; band < IVAS_MAX_NUM_BANDS; band++ ) { - for ( pred_idx = 0; pred_idx < FOA_CHANNELS - 1; pred_idx++ ) /* pred coefficient index (index 0, 1, 2 predicts Y, Z, X respectively) */ + FOR( pred_idx = 0; pred_idx < FOA_CHANNELS - 1; pred_idx++ ) /* pred coefficient index (index 0, 1, 2 predicts Y, Z, X respectively) */ { - if ( ivas_is_res_channel( pred_idx + 1, hMdDec->spar_md_cfg.nchan_transport ) ) + IF( ivas_is_res_channel( add( pred_idx, 1 ), hMdDec->spar_md_cfg.nchan_transport ) ) { /* use 20ms coefficients only for residual channels */ - // hMdDec->spar_md.band_coeffs[band + block * IVAS_MAX_NUM_BANDS].pred_re[pred_idx] = pred_re_20ms[band][pred_idx]; - hMdDec->spar_md.band_coeffs[band + block * IVAS_MAX_NUM_BANDS].pred_re_fx[pred_idx] = pred_re_20ms_fx[band][pred_idx]; + hMdDec->spar_md.band_coeffs[add( band, i_mult( block, IVAS_MAX_NUM_BANDS ) )].pred_re_fx[pred_idx] = pred_re_20ms_fx[band][pred_idx]; + move32(); } } } } - for ( b = end_band * bw; b < num_bands_out; b++ ) + FOR( b = i_mult( end_band, bw ); b < num_bands_out; b++ ) { hMdDec->valid_bands[b] = 1; + move16(); } return; diff --git a/lib_dec/ivas_stereo_adapt_GR_dec.c b/lib_dec/ivas_stereo_adapt_GR_dec.c index 79b9d272f..6eaef9c9e 100644 --- a/lib_dec/ivas_stereo_adapt_GR_dec.c +++ b/lib_dec/ivas_stereo_adapt_GR_dec.c @@ -61,28 +61,35 @@ static Word16 read_GR2( p = bit_stream; nb = 0; + move16(); FOR( i = 0; i < len; i++ ) { ready = 0; + move16(); temp = 0; + move16(); DO { b = *p++; - IF( EQ_16( (Word16) b, 0 ) ) + move16(); + IF( b == 0 ) { ready = 1; + move16(); } ELSE { - temp = add( (Word16) temp, 1 ); + temp = (UWord16) ( L_add( temp, 1 ) ); } } - WHILE( EQ_16( (Word16) ready, 0 ) ); + WHILE( ready == 0 ); b = *p++; - b = (UWord16) add( shl( (Word16) b, 1 ), (Word16) *p++ ); - ind[i] = add( shl( (Word16) temp, 2 ), (Word16) b ); - nb = (UWord16) add( (Word16) nb, add( (Word16) temp, 3 ) ); + move16(); + b = (UWord16) L_add( L_shl( b, 1 ), *p++ ); + ind[i] = extract_l( L_add( L_shl( temp, 2 ), b ) ); + move16(); + nb = (UWord16) L_add( nb, L_add( temp, 3 ) ); } return nb; @@ -108,28 +115,35 @@ static Word16 read_GR1( p = bit_stream; nb = 0; + move16(); FOR( i = 0; i < len; i++ ) { ready = 0; + move16(); temp = 0; + move16(); DO { b = *p++; - IF( EQ_16( (Word16) b, 0 ) ) + move16(); + IF( b == 0 ) { ready = 1; + move16(); } ELSE { - temp = add( (Word16) temp, 1 ); + temp = (UWord16) ( L_add( temp, 1 ) ); } } - WHILE( EQ_16( (Word16) ready, 0 ) ); + WHILE( ready == 0 ); b = *p++; - ind[i] = add( shl( (Word16) temp, 1 ), (Word16) b ); - nb = (UWord16) add( (Word16) nb, add( (Word16) temp, 2 ) ); + move16(); + ind[i] = extract_l( L_add( L_shl( temp, 1 ), b ) ); + move16(); + nb = (UWord16) L_add( nb, L_add( temp, 2 ) ); } return nb; @@ -155,26 +169,32 @@ Word16 read_GR0( p = bit_stream; nb = 0; + move16(); FOR( i = 0; i < len; i++ ) { ready = 0; + move16(); temp = 0; + move16(); DO { b = *p++; - IF( EQ_16( (Word16) b, 0 ) ) + move16(); + IF( b == 0 ) { ready = 1; + move16(); } ELSE { - temp = add( (Word16) temp, 1 ); + temp = (UWord16) L_add( temp, 1 ); } } - WHILE( EQ_16( (Word16) ready, 0 ) ); + WHILE( ready == 0 ); ind[i] = temp; - nb = (UWord16) add( (Word16) nb, add( (Word16) temp, 1 ) ); + move16(); + nb = (UWord16) L_add( nb, L_add( temp, 1 ) ); } return nb; @@ -196,10 +216,13 @@ static ivas_error find_map( ) { *map_idx = 0; + move16(); WHILE( ( NE_16( map[*map_idx], val ) ) && ( NE_16( *map_idx, len ) ) ) { - ( *map_idx )++; + test(); + *map_idx = add( *map_idx, 1 ); + move16(); } @@ -226,7 +249,7 @@ static void decode_adapt_GR_indices1( IF( EQ_16( no_symb, NO_SYMB_GR_SIDE_G ) ) { - map = &map0[15 * no_symb]; + map = &map0[i_mult( 15, no_symb )]; } ELSE { @@ -236,8 +259,9 @@ static void decode_adapt_GR_indices1( FOR( i = 0; i < len; i++ ) { map_symb = ind[i]; + move16(); find_map( out + i, map, map_symb, no_symb ); - map = &( map0[out[i] * no_symb] ); + map = &( map0[i_mult( out[i], no_symb )] ); } return; @@ -258,11 +282,13 @@ Word16 get_value( { Word16 i; UWord16 mask = 0, val = 0; + move16(); + move16(); - FOR( i = nbits - 1; i >= 0; i-- ) + FOR( i = sub( nbits, 1 ); i >= 0; i-- ) { - val = (UWord16) add( (Word16) val, shl( bit_stream[i], mask ) ); - mask = add( mask, 1 ); + val = (UWord16) L_add( val, L_shl( bit_stream[i], mask ) ); + mask = (UWord16) L_add( mask, 1 ); } return val; @@ -287,27 +313,31 @@ Word16 read_BS_GR( Word16 b, ind1_tmp[STEREO_DFT_BAND_MAX], tmp, i; *GR_ord = bit_stream[nb]; + move16(); b = 1; + move16(); - IF( EQ_16( *GR_ord, 0 ) ) + IF( *GR_ord == 0 ) { - b = add( b, read_GR0( &bit_stream[nb + b], ind1_tmp, len ) ); + b = add( b, read_GR0( &bit_stream[add( nb, b )], ind1_tmp, len ) ); } ELSE { - b = add( b, read_GR1( &bit_stream[nb + b], ind1_tmp, len ) ); + b = add( b, read_GR1( &bit_stream[add( nb, b )], ind1_tmp, len ) ); } FOR( i = 0; i < len; i++ ) { tmp = add( ind1_tmp[i], 1 ); - IF( tmp & 1 ) /* if odd number */ + IF( s_and( tmp, 1 ) ) /* if odd number */ { ind1[i] = negate( shr( ind1_tmp[i], 1 ) ); + move16(); } ELSE { ind1[i] = shr( tmp, 1 ); + move16(); } } @@ -335,30 +365,36 @@ Word16 read_BS_adapt_GR_sg( /* read first component */ b = 0; + move16(); b = add( b, read_GR1( &bit_stream[nb], ind1_tmp, 1 ) ); /* read GR ord */ - ord = bit_stream[nb + b]; + ord = bit_stream[add( nb, b )]; + move16(); b = add( b, 1 ); - IF( EQ_16( ord, 0 ) ) + IF( ord == 0 ) { *GR_ord = 1; - b = add( b, read_GR1( &bit_stream[nb + b], &ind1_tmp[1], len - 1 ) ); + move16(); + b = add( b, read_GR1( &bit_stream[add( nb, b )], &ind1_tmp[1], sub( len, 1 ) ) ); } ELSE { - ord = bit_stream[nb + b]; + ord = bit_stream[add( nb, b )]; + move16(); b = add( b, 1 ); - IF( EQ_16( ord, 0 ) ) + IF( ord == 0 ) { *GR_ord = 0; + move16(); - b = add( b, read_GR0( &bit_stream[nb + b], &ind1_tmp[1], len - 1 ) ); + b = add( b, read_GR0( &bit_stream[add( nb, b )], &ind1_tmp[1], sub( len, 1 ) ) ); } ELSE { *GR_ord = 2; - b = add( b, read_GR2( &bit_stream[nb + b], &ind1_tmp[1], len - 1 ) ); + move16(); + b = add( b, read_GR2( &bit_stream[add( nb, b )], &ind1_tmp[1], sub( len, 1 ) ) ); } } @@ -381,6 +417,7 @@ Word16 read_itd( ) { Word16 huff_flag, sign_flag, I, i, nb = 0, ready; + move16(); huff_flag = (Word16) get_next_indice( st, 1 ); sign_flag = (Word16) get_next_indice( st, 1 ); @@ -389,24 +426,30 @@ Word16 read_itd( IF( EQ_16( huff_flag, 1 ) ) { ready = 0; + move16(); I = 0; + move16(); - WHILE( ( EQ_16( ready, 0 ) ) && ( LT_16( nb, 10 ) ) ) + WHILE( ( ready == 0 ) && ( LT_16( nb, 10 ) ) ) { + test(); I = add( shl( I, 1 ), (Word16) get_next_indice( st, 1 ) ); nb = add( nb, 1 ); FOR( i = 0; i < 20; i++ ) { + test(); IF( ( EQ_16( I, dft_code_itd[i] ) ) && ( EQ_16( dft_len_itd[i], ( sub( nb, 2 ) ) ) ) ) { I = i; + move16(); ready = 1; + move16(); BREAK; } } } - IF( EQ_16( ready, 0 ) ) + IF( ready == 0 ) { printf( "Error reading Huffman code for ITD: \n" ); } @@ -419,6 +462,7 @@ Word16 read_itd( I = add( I, shl( sign_flag, 8 ) ); *pI = I; + move16(); return nb; } @@ -451,22 +495,24 @@ Word16 read_BS_adapt_GR_rpg( FOR( i = start; i < total_no; i++ ) { ind1_pred[i] = 0; + move16(); } } ELSE { - *GR_ord = bit_stream[nb + b]; /* GR order */ + *GR_ord = bit_stream[add( nb, b )]; /* GR order */ + move16(); b = add( b, 1 ); - IF( EQ_16( *GR_ord, 0 ) ) + IF( *GR_ord == 0 ) { - b = add( b, read_GR0( &bit_stream[nb + b], &ind1_tmp[1], len - 1 ) ); + b = add( b, read_GR0( &bit_stream[add( nb, b )], &ind1_tmp[1], sub( len, 1 ) ) ); } ELSE { /* GR ord 1 */ - b = add( b, read_GR1( &bit_stream[nb + b], &ind1_tmp[1], len - 1 ) ); + b = add( b, read_GR1( &bit_stream[add( nb, b )], &ind1_tmp[1], sub( len, 1 ) ) ); } - decode_adapt_GR_indices1( ind1_tmp, total_no - start, NO_SYMB_GR_PRED_G, &ind1_pred[start], dft_maps_rpg ); + decode_adapt_GR_indices1( ind1_tmp, sub( total_no, start ), NO_SYMB_GR_PRED_G, &ind1_pred[start], dft_maps_rpg ); } return b; @@ -488,14 +534,17 @@ Word16 read_flag_EC_DFT( Word16 flg; flg = bit_stream[0]; - IF( EQ_16( flg, 0 ) ) + move16(); + IF( flg == 0 ) { *flag = flg; + move16(); return 1; } ELSE { *flag = add( 2, bit_stream[1] ); + move16(); return 2; } } diff --git a/lib_dec/ivas_stereo_cng_dec.c b/lib_dec/ivas_stereo_cng_dec.c index 48394bd86..5aef907a7 100644 --- a/lib_dec/ivas_stereo_cng_dec.c +++ b/lib_dec/ivas_stereo_cng_dec.c @@ -727,6 +727,7 @@ static void stereo_dft_generate_comfort_noise_fx( q_shift = s_min( getScaleFactor32( hFdCngCom->cngNoiseLevel, FFTCLDFBLEN ), sub( hFdCngCom->cngNoiseLevelExp, 27 ) ); scale_sig32( hFdCngCom->cngNoiseLevel, FFTCLDFBLEN, q_shift ); hFdCngCom->cngNoiseLevelExp = sub( hFdCngCom->cngNoiseLevelExp, q_shift ); + move16(); enr = 0; /* Eliminates compiler warning. They are always set before they are used */ move32(); @@ -741,25 +742,29 @@ static void stereo_dft_generate_comfort_noise_fx( q_shb_shape = 0; move16(); - IF( EQ_16( chan, 0 ) ) + IF( chan == 0 ) { - pSideGain = hStereoDft->side_gain_fx + i_mult( STEREO_DFT_NBDIV, STEREO_DFT_BAND_MAX ); + pSideGain = hStereoDft->side_gain_fx + STEREO_DFT_NBDIV * STEREO_DFT_BAND_MAX; FOR( b = 0; b < hStereoDft->nbands; b++ ) { - IF( EQ_16( hStereoCng->xfade_frame_counter, 0 ) ) + IF( hStereoCng->xfade_frame_counter == 0 ) { hStereoDft->g_state_fx[b] = extract_h( *pSideGain++ ); + move16(); } ELSE { hStereoDft->g_state_fx[b] = add( mult( ONE_MINUS_A_GFILT_FX, extract_h( *pSideGain++ ) ), mult( A_GFILT_FX, hStereoDft->g_state_fx[b] ) ); + move16(); } + test(); IF( hStereoCng->first_SID ) { IF( hStereoCng->first_SID_after_TD ) { hStereoCng->cm_fx[b] = extract_h( Mpy_32_32( hStereoCng->c_LR_LT_fx, hStereoCng->c_LR_LT_fx ) ); + move16(); } ELSE { @@ -772,15 +777,18 @@ static void stereo_dft_generate_comfort_noise_fx( IF( GT_16( hStereoCng->nr_corr_frames, CORR_INIT ) ) { hStereoCng->cm_fx[b] = extract_h( Mpy_32_32( hStereoCng->c_LR_LT_fx, hStereoCng->c_LR_LT_fx ) ); + move16(); } ELSE { hStereoCng->cm_fx[b] = add( mult( ONE_MINUS_A_GFILT_FX, hStereoCng->coh_fx[b] ), mult( A_GFILT_FX, hStereoCng->cm_fx[b] ) ); + move16(); } } ELSE { hStereoCng->cm_fx[b] = add( mult( ONE_MINUS_A_GFILT_FX, hStereoCng->coh_fx[b] ), mult( A_GFILT_FX, hStereoCng->cm_fx[b] ) ); + move16(); } } @@ -794,8 +802,8 @@ static void stereo_dft_generate_comfort_noise_fx( { gamma = hStereoCng->cm_fx[b]; /* Q15 */ move16(); - gamma = BASOP_Util_Divide1616_Scale( gamma, 32767 - gamma, &q_div ); /* e(q_div) */ - q_sqrt2 = q_div + 16; + gamma = BASOP_Util_Divide1616_Scale( gamma, sub( 32767, gamma ), &q_div ); /* e(q_div) */ + q_sqrt2 = add( q_div, 16 ); tmp32_1 = BASOP_Util_Add_Mant32Exp( gamma, add( q_div, 16 ), sub( 32767, mult_r( hStereoDft->g_state_fx[b], hStereoDft->g_state_fx[b] ) ), 16, &q_sqrt1 ); /* e(q_sqrt1) */ tmp32_1 = Sqrt32( tmp32_1, &q_sqrt1 ); /* e(q_sqrt1) */ tmp32_2 = Sqrt32( gamma, &q_sqrt2 ); /* e(q_sqrt2) */ @@ -838,10 +846,11 @@ static void stereo_dft_generate_comfort_noise_fx( } scaleAvg = BASOP_Util_Divide1616_Scale( scaleAvg, hStereoDft->nbands, &q_div ); hStereoDft->scale_fx = shl_sat( scaleAvg, q_div ); + move16(); } } - IF( EQ_16( st->cng_type, LP_CNG ) ) + IF( st->cng_type == LP_CNG ) { set32_fx( cngNoiseLevel_upd, 0, st->L_frame ); @@ -850,26 +859,31 @@ static void stereo_dft_generate_comfort_noise_fx( move16(); dmpf[1] = negate( st->preemph_fac ); move16(); - mvs2s( st->Aq_cng, Atmp, M + 1 ); + Copy( st->Aq_cng, Atmp, M + 1 ); conv_fx_32( Atmp, dmpf, cngNoiseLevel_upd, M + 2 ); IF( EQ_16( st->L_frame, L_FRAME ) ) { pTrigo = hStereoDft->dft_trigo_12k8_fx; trigo_step = STEREO_DFT_TRIGO_SRATE_12k8_STEP * STEREO_DFT_TRIGO_DEC_STEP; + move16(); } ELSE { pTrigo = hStereoDft->dft_trigo_16k_fx; trigo_step = STEREO_DFT_TRIGO_SRATE_16k_STEP * STEREO_DFT_TRIGO_DEC_STEP; + move16(); } - FOR( i = 0; i < st->L_frame / 4; i++ ) + FOR( i = 0; i < shr( st->L_frame, 2 ); i++ ) { trigo_dec[i] = pTrigo[i_mult( i, trigo_step )]; - trigo_dec[sub( shr( st->L_frame, 1 ), i )] = pTrigo[i_mult( i, trigo_step )]; + move16(); + trigo_dec[sub( st->L_frame / 2, i )] = pTrigo[i_mult( i, trigo_step )]; + move16(); } - trigo_dec[shr( st->L_frame, 2 )] = pTrigo[i_mult( shr( st->L_frame, 2 ), trigo_step )]; + trigo_dec[st->L_frame / 4] = pTrigo[i_mult( st->L_frame / 4, trigo_step )]; + move16(); Word16 rshift_cng = sub( find_guarded_bits_fx( st->L_frame ), getScaleFactor32( cngNoiseLevel_upd, st->L_frame ) ); v_shr_32( cngNoiseLevel_upd, cngNoiseLevel_upd, st->L_frame, rshift_cng ); @@ -886,7 +900,7 @@ static void stereo_dft_generate_comfort_noise_fx( ptr0 = cngNoiseLevel_upd; ptr1 = ptr0 + 2; ptr2 = ptr1 + 1; - assert( GT_32( st->lp_ener_fx, 0 ) ); + assert( st->lp_ener_fx > 0 ); // lp_ener_fx will be in Q6 at this point. // So applying appropriate left shift on the denominator. @@ -898,30 +912,34 @@ static void stereo_dft_generate_comfort_noise_fx( FOR( i = 0; i < sub( shr( st->L_frame, 1 ), 1 ); i++ ) { ftmp = Madd_32_32( Mpy_32_32( *ptr1, *ptr1 ), *ptr2, *ptr2 ); - assert( GT_32( ftmp, 0 ) ); + assert( ftmp > 0 ); q_sqrt = sub( 62, shl( q_cngNoiseLevel_upd, 1 ) ); - IF( EQ_32( ftmp, 0 ) ) + IF( ftmp == 0 ) { ftmp = EPSILLON_FX; + move32(); q_sqrt = s_max( 0, q_sqrt ); } tmp = ISqrt32( ftmp, &q_sqrt ); e_norm_temp[i] = sub( 31, add( q_sqrt, q_div ) ); + move16(); *ptr0++ = Mpy_32_32( factor, tmp ); + move32(); ptr1 += 2; ptr2 += 2; } FOR( i = 0; i < sub( shr( st->L_frame, 1 ), 1 ); i++ ) { - q_cngNoiseLevel_upd = s_min( q_cngNoiseLevel_upd, e_norm_temp[i] + norm_l( cngNoiseLevel_upd[i] ) ); + q_cngNoiseLevel_upd = s_min( q_cngNoiseLevel_upd, add( e_norm_temp[i], norm_l( cngNoiseLevel_upd[i] ) ) ); } FOR( i = 0; i < st->L_frame; i++ ) { cngNoiseLevel_upd[i] = L_shl( cngNoiseLevel_upd[i], sub( q_cngNoiseLevel_upd, e_norm_temp[i] ) ); + move32(); } - IF( GT_16( sub( s_min( output_frame, L_FRAME32k ), hFdCngCom->stopFFTbin ), 0 ) ) + IF( sub( s_min( output_frame, L_FRAME32k ), hFdCngCom->stopFFTbin ) > 0 ) { /* Transform shb LP spectrum */ set32_fx( shb_shape, 0, L_FRAME16k ); @@ -930,14 +948,18 @@ static void stereo_dft_generate_comfort_noise_fx( IF( NE_16( st->L_frame, L_FRAME16k ) ) { pTrigo = hStereoDft->dft_trigo_16k_fx; - trigo_step = i_mult( STEREO_DFT_TRIGO_SRATE_16k_STEP, STEREO_DFT_TRIGO_DEC_STEP ); + trigo_step = STEREO_DFT_TRIGO_SRATE_16k_STEP * STEREO_DFT_TRIGO_DEC_STEP; + move16(); FOR( i = 0; i < L_FRAME16k / 4; i++ ) { trigo_dec[i] = pTrigo[i_mult( i, trigo_step )]; - trigo_dec[sub( shr( L_FRAME16k, 1 ), i )] = pTrigo[i_mult( i, trigo_step )]; + move16(); + trigo_dec[sub( L_FRAME16k / 2, i )] = pTrigo[i_mult( i, trigo_step )]; + move16(); } - trigo_dec[shr( L_FRAME16k, 2 )] = pTrigo[i_mult( shr( L_FRAME16k, 2 ), trigo_step )]; + trigo_dec[L_FRAME16k / 4] = pTrigo[i_mult( L_FRAME16k / 4, trigo_step )]; + move16(); } Word16 rshift_shb = sub( find_guarded_bits_fx( L_FRAME16k ), getScaleFactor32( shb_shape, L_FRAME16k ) ); @@ -962,8 +984,9 @@ static void stereo_dft_generate_comfort_noise_fx( { ftmp = Madd_32_32( Mpy_32_32( *ptr1, *ptr1 ), *ptr2, *ptr2 ); q_tmp = sub( shl( q_shb_shape, 1 ), 31 ); - assert( GT_32( ftmp, 0 ) ); + assert( ftmp > 0 ); q_div = 0; + move16(); ftmp = BASOP_Util_Divide3232_Scale_cadence( L_shl( 1, q_tmp ), ftmp, &q_div ); /* in float: both a = "div"=(1/(x^2+y^2) and sqrt(a) is used and summed up in the same loop. @@ -974,11 +997,14 @@ static void stereo_dft_generate_comfort_noise_fx( enr = BASOP_Util_Add_Mant32Exp( enr, q_enr, ftmp, q_div, &q_enr ); tmp32_1 = Sqrt32( ftmp, &q_div ); *ptr0++ = tmp32_1; + move32(); e_norm_temp[i] = sub( 31, q_div ); + move16(); ptr1 += 2; ptr2 += 2; } q_shb_shape = MAX_16; + move16(); FOR( i = 0; i < L_FRAME16k; i++ ) { @@ -987,20 +1013,25 @@ static void stereo_dft_generate_comfort_noise_fx( FOR( i = 0; i < L_FRAME16k; i++ ) { shb_shape[i] = L_shl( shb_shape[i], sub( q_shb_shape, e_norm_temp[i] ) ); + move32(); } } /* Update CNG noise level from MS noise estimation */ - mvl2l( st->hFdCngDec->bandNoiseShape, cngNoiseLevel_hist, sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ) ); + Copy32( st->hFdCngDec->bandNoiseShape, cngNoiseLevel_hist, sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ) ); ptr_cng = cngNoiseLevel_hist; - FOR( i = 0; i < sub( st->last_L_frame, hFdCngCom->startBand ) / 2; i++ ) + FOR( i = 0; i < shr( sub( st->last_L_frame, hFdCngCom->startBand ), 1 ); i++ ) { tmp = *( cngNoiseLevel_hist + shl( i, 1 ) ); tmp = L_add( tmp, *( cngNoiseLevel_hist + add( shl( i, 1 ), 1 ) ) ); *ptr_cng++ = L_shr( tmp, 1 ); + move32(); } - IF( EQ_16( last_element_mode, IVAS_CPE_TD ) && EQ_16( chan, 0 ) && EQ_16( hStereoCng->xfade_frame_counter, 0 ) && GE_16( hFdCngCom->msFrCnt_init_counter, hFdCngCom->msFrCnt_init_thresh ) ) + test(); + test(); + test(); + IF( EQ_16( last_element_mode, IVAS_CPE_TD ) && ( chan == 0 ) && ( hStereoCng->xfade_frame_counter == 0 ) && GE_16( hFdCngCom->msFrCnt_init_counter, hFdCngCom->msFrCnt_init_thresh ) ) { ptr_cng = cngNoiseLevel_hist + hFdCngCom->startBand; ptr_tmp = cngNoiseLevel_upd + hFdCngCom->startBand; @@ -1017,6 +1048,7 @@ static void stereo_dft_generate_comfort_noise_fx( { tmp_16 = shl( tmp_16, q_div ); hStereoCng->xfade_length = sub( MAX_XFADE_FX, extract_l( L_shr( imult3216( MAX_XFADE_FX, tmp_16 ), Q15 ) ) ); + move16(); } ELSE { @@ -1024,6 +1056,7 @@ static void stereo_dft_generate_comfort_noise_fx( tmp_16 = Invert16( tmp_16, &q_inv ); tmp_16 = shl( tmp_16, add( q_inv, q_div ) ); hStereoCng->xfade_length = sub( MAX_XFADE_FX, extract_l( L_shr( imult3216( MAX_XFADE_FX, tmp_16 ), Q15 ) ) ); + move16(); } } @@ -1031,7 +1064,7 @@ static void stereo_dft_generate_comfort_noise_fx( { ptr_cng = cngNoiseLevel_hist + hFdCngCom->startBand; ptr_tmp = cngNoiseLevel_upd + hFdCngCom->startBand; - FOR( i = 0; i < ( st->last_L_frame - hFdCngCom->startBand ) / 2; i++ ) + FOR( i = 0; i < shr( sub( st->last_L_frame, hFdCngCom->startBand ), 1 ); i++ ) { tmp_16 = BASOP_Util_Divide1616_Scale( hStereoCng->xfade_frame_counter, hStereoCng->xfade_length, &q_div ); tmp_16 = shl( tmp_16, q_div ); @@ -1051,13 +1084,13 @@ static void stereo_dft_generate_comfort_noise_fx( ptr_i = ptr_r + 1; scale = shr( output_frame, 1 ); - FOR( i = 0; i < ( hFdCngCom->stopFFTbin - hFdCngCom->startBand ) / 2; i++ ) + FOR( i = 0; i < shr( sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ), 1 ); i++ ) { /* Real part in FFT bins */ rand_gauss_fx( ptr_r, &st->hTdCngDec->cng_seed, q_dft ); tmp32_1 = Mpy_32_16_1( *ptr_level, scale ); // Q(q_cngNoiseLevel_upd - 15) q_shift = norm_l( tmp32_1 ); - IF( GT_16( q_shift, sub( 46, q_cngNoiseLevel_upd ) ) ) + if ( GT_16( q_shift, sub( 46, q_cngNoiseLevel_upd ) ) ) { q_shift = sub( 46, q_cngNoiseLevel_upd ); } @@ -1065,21 +1098,23 @@ static void stereo_dft_generate_comfort_noise_fx( q_shift = sub( sub( 46, q_cngNoiseLevel_upd ), q_shift ); ( *ptr_r ) = L_shl( Mpy_32_32( ( *ptr_r ), tmp32_1 ), q_shift ); + move32(); ptr_r += 2; /* Imaginary part in FFT bins */ rand_gauss_fx( ptr_i, &st->hTdCngDec->cng_seed, q_dft ); ( *ptr_i ) = L_shl( Mpy_32_32( ( *ptr_i ), tmp32_1 ), q_shift ); + move32(); ptr_i += 2; ptr_level++; } - IF( GT_16( sub( s_min( output_frame, L_FRAME32k ), hFdCngCom->stopFFTbin ), 0 ) ) + IF( sub( s_min( output_frame, L_FRAME32k ), hFdCngCom->stopFFTbin ) > 0 ) { Word32 scale_32; Word16 q_res; /* high band generation, flipped spectrum */ - assert( NE_32( enr, 0 ) ); + assert( enr != 0 ); // 10 ^ (0.1 * st->hTdCngDec->shb_cng_gain) // Above expression equivalent to 2 ^ (3.321928094 * 0.1 * st->hTdCngDec->shb_cng_gain) // 3.321928094 * 0.1 = 0.3321928094 @@ -1091,19 +1126,23 @@ static void stereo_dft_generate_comfort_noise_fx( move16(); scale = BASOP_Util_Divide3232_Scale( scale_32, enr, &q_div ); // q_res = add(q_div, sub( q_enr, sub(Q31, q_res))); move16(); - q_res = q_res - q_enr + q_div; + q_res = add( sub( q_res, q_enr ), q_div ); // q_div = sub(Q15, q_res); move16(); q_div = q_res; + move16(); inv_scale = ISqrt16( scale, &q_res ); scale = Sqrt16( scale, &q_div ); - ptr_shb = shb_shape + sub( shr( L_FRAME16k, 1 ), 1 ); + ptr_shb = shb_shape + ( L_FRAME16k / 2 - 1 ); /* Averaging for Nyquist frequency */ Word16 q1, q2; tmp32_1 = Mpy_32_16_1( cngNoiseLevel_upd[sub( shr( sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ), 1 ), 1 )], inv_scale ); tmp32_2 = *( ptr_shb - 1 ); + move32(); q1 = sub( add( q_cngNoiseLevel_upd, 1 ), q_res ); q2 = q_shb_shape; + move16(); *ptr_shb = BASOP_Util_Add_Mant32Exp( tmp32_1, sub( Q31, q1 ), tmp32_2, sub( Q31, q2 ), &q_tmp ); + move32(); q_tmp = sub( q_tmp, 1 ); IF( LT_16( sub( Q31, q_tmp ), q_shb_shape ) ) { @@ -1112,19 +1151,21 @@ static void stereo_dft_generate_comfort_noise_fx( } *ptr_shb = L_shl( *ptr_shb, sub( q_shb_shape, sub( Q31, q_tmp ) ) ); + move32(); // *ptr_shb = 0.5f * ( cngNoiseLevel_upd[( hFdCngCom->stopFFTbin - hFdCngCom->startBand ) / 2 - 1] / scale + *( ptr_shb - 1 ) ); - ptr_r = DFT[chan] + hFdCngCom->stopFFTbin + i_mult( k, STEREO_DFT32MS_N_MAX ); + ptr_r = DFT[chan] + add( hFdCngCom->stopFFTbin, i_mult( k, STEREO_DFT32MS_N_MAX ) ); ptr_i = ptr_r + 1; - FOR( i = 0; i < shr( sub( s_min( output_frame, i_mult( hFdCngCom->regularStopBand, 16 ) ), hFdCngCom->stopFFTbin ), 1 ); i++ ) + FOR( i = 0; i < shr( sub( s_min( output_frame, shl( hFdCngCom->regularStopBand, 4 ) ), hFdCngCom->stopFFTbin ), 1 ); i++ ) { /* Real part in FFT bins */ rand_gauss_fx( ptr_r, &st->hTdCngDec->cng_seed, q_dft ); tmp32_1 = *ptr_shb; + move32(); q_shift = norm_l( tmp32_1 ); - IF( GT_16( q_shift, sub( 31, q_shb_shape ) ) ) + if ( GT_16( q_shift, sub( 31, q_shb_shape ) ) ) { q_shift = sub( 31, q_shb_shape ); } @@ -1132,10 +1173,12 @@ static void stereo_dft_generate_comfort_noise_fx( q_shift = sub( sub( 31, q_shb_shape ), q_shift ); ( *ptr_r ) = L_shl( Mpy_32_32( ( *ptr_r ), tmp32_1 ), q_shift ); + move32(); ptr_r += 2; /* Imaginary part in FFT bins */ rand_gauss_fx( ptr_i, &st->hTdCngDec->cng_seed, q_dft ); ( *ptr_i ) = L_shl( Mpy_32_32( ( *ptr_i ), tmp32_1 ), q_shift ); + move32(); ptr_i += 2; ptr_shb--; } @@ -1143,7 +1186,7 @@ static void stereo_dft_generate_comfort_noise_fx( /* rescale */ tmp32_1 = L_mult0( scale, shr( output_frame, 1 ) ); q_shift = norm_l( tmp32_1 ); - IF( GT_16( q_shift, add( Q16, q_div ) ) ) + if ( GT_16( q_shift, add( Q16, q_div ) ) ) { q_shift = add( Q16, q_div ); } @@ -1152,10 +1195,12 @@ static void stereo_dft_generate_comfort_noise_fx( ptr_r = DFT[chan] + add( hFdCngCom->stopFFTbin, i_mult( k, STEREO_DFT32MS_N_MAX ) ); ptr_i = ptr_r + 1; - FOR( i = 0; i < ( min( output_frame, hFdCngCom->regularStopBand * 16 ) - hFdCngCom->stopFFTbin ) / 2; i++ ) + FOR( i = 0; i < shr( sub( min( output_frame, shl( hFdCngCom->regularStopBand, 4 ) ), hFdCngCom->stopFFTbin ), 1 ); i++ ) { ( *ptr_r ) = L_shl( Mpy_32_32( ( *ptr_r ), tmp32_1 ), q_shift ); + move32(); ( *ptr_i ) = L_shl( Mpy_32_32( ( *ptr_i ), tmp32_1 ), q_shift ); + move32(); ptr_r += 2; ptr_i += 2; } @@ -1171,10 +1216,12 @@ static void stereo_dft_generate_comfort_noise_fx( q_tmp = s_max( hFdCngCom->cngNoiseLevelExp, shl( sub( Q31, q_cngNoiseLevel_upd ), 1 ) ); scale_sig32( hFdCngCom->cngNoiseLevel, FFTCLDFBLEN, sub( hFdCngCom->cngNoiseLevelExp, q_tmp ) ); hFdCngCom->cngNoiseLevelExp = q_tmp; + move16(); FOR( i = 0; i < shr( sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ), 1 ); i++ ) { *ptr_level-- = L_shl( Mpy_32_32( *ptr_tmp, *ptr_tmp ), sub( shl( sub( Q31, q_cngNoiseLevel_upd ), 1 ), hFdCngCom->cngNoiseLevelExp ) ); + move32(); ptr_tmp--; *ptr_level = *( ptr_level + 1 ); move32(); @@ -1184,16 +1231,18 @@ static void stereo_dft_generate_comfort_noise_fx( ELSE { /* FD-CNG */ - IF( !( hFdCngCom->msFrCnt_init_counter < hFdCngCom->msFrCnt_init_thresh ) ) + IF( !LT_16( hFdCngCom->msFrCnt_init_counter, hFdCngCom->msFrCnt_init_thresh ) ) { - IF( hStereoCng->xfade_frame_counter <= MAX_K && hStereoCng->last_act_element_mode == IVAS_CPE_TD && chan == 0 ) + test(); + test(); + IF( LE_16( hStereoCng->xfade_frame_counter, (Word16) MAX_K ) && EQ_16( hStereoCng->last_act_element_mode, IVAS_CPE_TD ) && ( chan == 0 ) ) { /* Fade MS -> SID/MS */ j = 0; move16(); - FOR( k = 0; k < ( hFdCngCom->nFFTpart - 2 ); k++ ) + FOR( k = 0; k < sub( hFdCngCom->nFFTpart, 2 ); k++ ) { - factor = BASOP_Util_Divide3232_Scale_cadence( ( hFdCngCom->sidNoiseEstLp[k] + DELTA_FX ), ( st->hFdCngDec->partNoiseShape[k] + DELTA_FX ), &q_div ); + factor = BASOP_Util_Divide3232_Scale_cadence( L_add( hFdCngCom->sidNoiseEstLp[k], DELTA_FX ), L_add( st->hFdCngDec->partNoiseShape[k], DELTA_FX ), &q_div ); factor = s_min( add( hStereoDft->scale_fx, extract_l( Mpy_32_16_1( L_mult( sub( extract_h( factor ), hStereoDft->scale_fx ), hStereoCng->xfade_frame_counter ), ONE_BY_MAX_K ) ) ), extract_h( factor ) ); FOR( ; j <= hFdCngCom->part[k]; j++ ) { @@ -1208,7 +1257,7 @@ static void stereo_dft_generate_comfort_noise_fx( FOR( k = 0; k < STEREO_DFT_NBDIV; k++ ) { ptr_level = hFdCngCom->cngNoiseLevel; - ptr_r = DFT[chan] + hFdCngCom->startBand + k * STEREO_DFT32MS_N_MAX; + ptr_r = DFT[chan] + add( hFdCngCom->startBand, i_mult( k, STEREO_DFT32MS_N_MAX ) ); ptr_i = ptr_r + 1; q_cngNoiseLevel = sub( Q31, hFdCngCom->cngNoiseLevelExp ); FOR( i = 0; i < shr( sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ), 1 ); i++ ) @@ -1222,6 +1271,7 @@ static void stereo_dft_generate_comfort_noise_fx( q_sqrt = sub( Q31, q_cngNoiseLevel ); tmp = Sqrt32( tmp, &q_sqrt ); *( ptr_r ) = imult3216( Mpy_32_32( *( ptr_r ), tmp ), scale ); + move32(); ( *ptr_r ) = L_shl( ( *ptr_r ), q_sqrt ); move32(); ptr_r += 2; @@ -1231,8 +1281,8 @@ static void stereo_dft_generate_comfort_noise_fx( move32(); ptr_i += 2; } - ptr_level = hFdCngCom->cngNoiseLevel + hFdCngCom->stopFFTbin - hFdCngCom->startBand; - ptr_r = DFT[chan] + hFdCngCom->stopFFTbin + k * STEREO_DFT32MS_N_MAX; + ptr_level = hFdCngCom->cngNoiseLevel + sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ); + ptr_r = DFT[chan] + add( hFdCngCom->stopFFTbin, i_mult( k, STEREO_DFT32MS_N_MAX ) ); ptr_i = ptr_r + 1; FOR( j = hFdCngCom->numCoreBands; j < hFdCngCom->regularStopBand; j++ ) { @@ -1264,9 +1314,10 @@ static void stereo_dft_generate_comfort_noise_fx( } } - IF( hStereoCng->last_act_element_mode == IVAS_CPE_TD && chan > 0 ) + test(); + IF( EQ_16( hStereoCng->last_act_element_mode, IVAS_CPE_TD ) && ( chan > 0 ) ) { - Word32 log_lp_noise = L_add( L_shl( hFdCngCom->cngNoiseLevelExp, Q25 ), BASOP_Util_Log2( lp_noise + DELTA_FX ) ); + Word32 log_lp_noise = L_add( L_shl( hFdCngCom->cngNoiseLevelExp, Q25 ), BASOP_Util_Log2( L_add( lp_noise, DELTA_FX ) ) ); // log10(x) is computed as log2(x) * log10(2) // log_lp_noise at this stage is in Q25. where as the structure value is in Q23 // Hence the 16-bit constant log10(2) will be stored in Q13 @@ -1274,11 +1325,11 @@ static void stereo_dft_generate_comfort_noise_fx( st1->lp_noise = L_add( Mpy_32_16_1( st1->lp_noise, 29491 /* 0.9f in Q15 */ ), log_lp_noise ); move32(); } - ELSE IF( EQ_16( chan, 0 ) ) + ELSE IF( ( chan == 0 ) ) { Word16 q_lp_noise = 0; // st->hFdCngDec->q_lp_noise; // to be populated appropriately. - Word32 log_lp_noise = L_add( L_shl( hFdCngCom->cngNoiseLevelExp, Q25 ), BASOP_Util_Log2( lp_noise + DELTA_FX ) ); move16(); + Word32 log_lp_noise = L_add( L_shl( hFdCngCom->cngNoiseLevelExp, Q25 ), BASOP_Util_Log2( L_add( lp_noise, DELTA_FX ) ) ); // log10(x) is computed as log2(x) * log10(2) // log_lp_noise at this stage is in Q25. where as the structure value is in Q23 // Hence the 16-bit constant log10(2) will be stored in Q13 @@ -1287,14 +1338,15 @@ static void stereo_dft_generate_comfort_noise_fx( move32(); st->lp_noise = st->hFdCngDec->lp_noise; move32(); - st->hFdCngDec->hFdCngCom->flag_noisy_speech = (Word16) LT_32( L_shr( L_sub( st->hFdCngDec->lp_speech, st->hFdCngDec->lp_noise ), q_lp_noise ), 28 ); + st->hFdCngDec->hFdCngCom->flag_noisy_speech = extract_l( LT_32( L_shr( L_sub( st->hFdCngDec->lp_speech, st->hFdCngDec->lp_noise ), q_lp_noise ), 28 ) ); move16(); st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_32fx = L_add( Mpy_32_32( Q31_0_99, st->hFdCngDec->hFdCngCom->likelihood_noisy_speech_32fx ), imult3216( Q31_0_01, st->hFdCngDec->hFdCngCom->flag_noisy_speech ) ); move32(); } - IF( EQ_16( chan, 0 ) && LE_32( st->core_brate, SID_2k40 ) ) + test(); + IF( ( chan == 0 ) && LE_32( st->core_brate, SID_2k40 ) ) { Word32 max_smoothed_psd = 0; move32(); @@ -1303,9 +1355,14 @@ static void stereo_dft_generate_comfort_noise_fx( FOR( i = hFdCngCom->startBand; i < hFdCngCom->stopFFTbin; i++ ) { Word16 l_shift_val = sub( st->hFdCngDec->q_smoothed_psd, sub( Q31, hFdCngCom->cngNoiseLevelExp ) ); - move16(); - l_shift_val = max_smoothed_psd == 0 ? 0 : l_shift_val; - ftmp = hFdCngCom->cngNoiseLevel[i - hFdCngCom->startBand]; + + if ( max_smoothed_psd == 0 ) + { + l_shift_val = 0; + move16(); + } + + ftmp = hFdCngCom->cngNoiseLevel[sub( i, hFdCngCom->startBand )]; move32(); IF( !st->hFdCngDec->first_cna_noise_updated ) { @@ -1317,7 +1374,7 @@ static void stereo_dft_generate_comfort_noise_fx( { alpha = (Word16) ( 0x799A ); move16(); - IF( GT_32( st->hFdCngDec->smoothed_psd_fx[i], 0 ) && GT_32( Mpy_32_16_1( ftmp, (Word16) 0x3333 ), L_shr_sat( st->hFdCngDec->smoothed_psd_fx[i], l_shift_val ) ) ) + IF( ( st->hFdCngDec->smoothed_psd_fx[i] > 0 ) && GT_32( Mpy_32_16_1( ftmp, (Word16) 0x3333 ), L_shr_sat( st->hFdCngDec->smoothed_psd_fx[i], l_shift_val ) ) ) { /* prevent abrupt upward update steps */ ftmp = L_add( L_shl( st->hFdCngDec->smoothed_psd_fx[i], 2 ), L_shr( st->hFdCngDec->smoothed_psd_fx[i], 1 ) ); @@ -1338,9 +1395,10 @@ static void stereo_dft_generate_comfort_noise_fx( } /* update msNoiseEst in SID and NO_DATA frames */ - bandcombinepow( &st->hFdCngDec->smoothed_psd_fx[hFdCngCom->startBand], ( Q31 - st->hFdCngDec->q_smoothed_psd ), hFdCngCom->stopFFTbin - hFdCngCom->startBand, st->hFdCngDec->part_shaping, st->hFdCngDec->nFFTpart_shaping, st->hFdCngDec->psize_inv_shaping, st->hFdCngDec->msNoiseEst, &st->hFdCngDec->msNoiseEst_exp ); + bandcombinepow( &st->hFdCngDec->smoothed_psd_fx[hFdCngCom->startBand], sub( Q31, st->hFdCngDec->q_smoothed_psd ), sub( hFdCngCom->stopFFTbin, hFdCngCom->startBand ), st->hFdCngDec->part_shaping, st->hFdCngDec->nFFTpart_shaping, st->hFdCngDec->psize_inv_shaping, st->hFdCngDec->msNoiseEst, &st->hFdCngDec->msNoiseEst_exp ); Scale_sig32( st->hFdCngDec->msNoiseEst, NPART_SHAPING, sub( st->hFdCngDec->msNoiseEst_exp, 27 ) ); st->hFdCngDec->msNoiseEst_exp = 27; + move16(); st->hFdCngDec->first_cna_noise_updated = 1; move16(); Copy32( st->hFdCngDec->msNoiseEst, st->hFdCngDec->msPeriodog_ST_fx, st->hFdCngDec->nFFTpart_shaping ); @@ -1412,7 +1470,8 @@ void stereo_dtf_cng_fx( { IF( LT_16( hCPE->hStereoCng->nr_dft_frames, CM_INIT ) ) { - hCPE->hStereoCng->nr_dft_frames++; + hCPE->hStereoCng->nr_dft_frames = add( hCPE->hStereoCng->nr_dft_frames, 1 ); + move16(); } IF( LE_32( ivas_total_brate, IVAS_SID_5k2 ) ) @@ -1420,7 +1479,8 @@ void stereo_dtf_cng_fx( test(); IF( LT_16( hCPE->hStereoCng->nr_sid_frames, SID_INIT ) && EQ_32( ivas_total_brate, IVAS_SID_5k2 ) ) { - hCPE->hStereoCng->nr_sid_frames++; + hCPE->hStereoCng->nr_sid_frames = add( hCPE->hStereoCng->nr_sid_frames, 1 ); + move16(); } FOR( n = 0; n < CPE_CHANNELS; n++ ) @@ -1459,7 +1519,8 @@ void stereo_cng_dec_update( } IF( LT_16( hCPE->hStereoCng->active_frame_counter, MAX_FRAME_COUNTER ) ) { - hCPE->hStereoCng->active_frame_counter++; + hCPE->hStereoCng->active_frame_counter = add( hCPE->hStereoCng->active_frame_counter, 1 ); + move16(); } } ELSE @@ -1468,14 +1529,15 @@ void stereo_cng_dec_update( move16(); IF( LT_16( hCPE->hStereoCng->xfade_frame_counter, MAX_FRAME_COUNTER ) ) { - hCPE->hStereoCng->xfade_frame_counter++; + hCPE->hStereoCng->xfade_frame_counter = add( hCPE->hStereoCng->xfade_frame_counter, 1 ); + move16(); } } IF( EQ_16( hCPE->element_mode, IVAS_CPE_DFT ) ) { test(); - IF( EQ_32( ivas_total_brate, IVAS_SID_5k2 ) || EQ_32( ivas_total_brate, FRAME_NO_DATA ) ) + IF( EQ_32( ivas_total_brate, IVAS_SID_5k2 ) || ( ivas_total_brate == FRAME_NO_DATA ) ) { hCPE->hStereoCng->prev_sid_nodata = 1; move16(); @@ -1512,36 +1574,50 @@ void stereo_cng_compute_PScorr_fx( { Word16 i; Word32 enrP_fx = 0, enrS_fx = 0, dotPS_fx = 0, c_PS_fx; + move32(); + move32(); + move32(); Word32 outputResamp_fx[L_FRAME16k]; Word32 *outputPptr_fx, *outputSptr_fx; Word16 outputPptr_Q = 0, outputSptr_Q = 0; + move16(); + move16(); Word32 tmp1 = 0, tmp2 = 0, tmp3 = 0; + move32(); + move32(); + move32(); IF( GT_16( L_frame_0, L_frame_1 ) ) { outputPptr_fx = output_fx0; L_lerp_fx( output_fx1, outputResamp_fx, L_frame_0, L_frame_1, output_Q ); outputPptr_Q = *output_Q; + move16(); outputSptr_fx = outputResamp_fx; outputSptr_Q = 1; + move16(); } ELSE IF( GT_16( L_frame_1, L_frame_0 ) ) { outputSptr_fx = output_fx1; L_lerp_fx( output_fx0, outputResamp_fx, L_frame_1, L_frame_0, output_Q ); outputSptr_Q = *output_Q; + move16(); outputPptr_fx = outputResamp_fx; outputPptr_Q = 1; + move16(); } ELSE { outputPptr_fx = output_fx0; outputSptr_fx = output_fx1; outputPptr_Q = *output_Q; + move16(); outputSptr_Q = *output_Q; + move16(); } - Word16 guard_bits = (Word16) find_guarded_bits_fx( L_frame_0 ); + Word16 guard_bits = find_guarded_bits_fx( L_frame_0 ); FOR( i = 0; i < L_frame_0; i++ ) { @@ -1553,16 +1629,17 @@ void stereo_cng_compute_PScorr_fx( enrS_fx = L_add( enrS_fx, tmp2 ); tmp3 = Mpy_32_32( *outputPptr_fx++, *outputSptr_fx++ ); // outputSptr_Q + outputPptr_Q - 31 - tmp3 = L_shr( tmp3, guard_bits + 1 ); // outputSptr_Q + outputPptr_Q - 31 - guard_bits + tmp3 = L_shr( tmp3, add( guard_bits, 1 ) ); // outputSptr_Q + outputPptr_Q - 31 - guard_bits dotPS_fx = L_add( dotPS_fx, tmp3 ); } Word32 Var1 = Mpy_32_32( enrP_fx, enrS_fx ); // 2*( outputPptr_Q - 31 - guard_bits + outputSptr_Q - 31 - guard_bits ) - Word16 exp = 31 - ( 2 * ( outputPptr_Q - 31 - guard_bits + outputSptr_Q - 31 - guard_bits ) ); + Word16 exp = sub( 31, shl( add( sub( sub( outputPptr_Q, 31 ), guard_bits ), sub( sub( outputSptr_Q, 31 ), guard_bits ) ), 1 ) ); - IF( Var1 == 0 ) + if ( Var1 == 0 ) { Var1 = 1; + move32(); } Word32 Var2, Var3; @@ -1570,26 +1647,37 @@ void stereo_cng_compute_PScorr_fx( Var3 = L_add( dotPS_fx, 1 ); /*adding one in order to make temp non-zero. q = Q_outputSptr + Q_outputPptr - guard_bits*/ c_PS_fx = Mpy_32_32( Var3, Var2 ); /* outputSptr_Q + outputPptr_Q - guard_bits - exp */ - Word16 c_PS_fx_Q = outputSptr_Q + outputPptr_Q - guard_bits - exp - 31; + Word16 c_PS_fx_Q = add( outputSptr_Q, sub( sub( sub( outputPptr_Q, guard_bits ), exp ), 31 ) ); Word32 a = 0, b = 0; + move32(); + move32(); Word16 a_q = 0, b_q = 0, ab_q = 0; + move16(); + move16(); + move16(); a = Mpy_32_32( c_PS_fx, ONE_MINUS_STEREO_TD_PS_CORR_FILT_Q31 ); // Q(c_PS_fx_Q) a_q = c_PS_fx_Q; + move16(); b = Mpy_32_32( STEREO_TD_PS_CORR_FILT_Q31, *c_PS_LT_fx ); // Q(Q_c_PS_LT_fx) b_q = Q_c_PS_LT_fx; + move16(); IF( GT_16( a_q, b_q ) ) { - ab_q = a_q - b_q; - *c_PS_LT_fx = L_add( L_shr( a, (Word16) ab_q ), b ); + ab_q = sub( a_q, b_q ); + *c_PS_LT_fx = L_add( L_shr( a, ab_q ), b ); + move32(); Q_c_PS_LT_fx = b_q; + move16(); } ELSE IF( LT_16( b_q, a_q ) ) { - ab_q = b_q - a_q; - *c_PS_LT_fx = L_add( a, L_shr( b, (Word16) ab_q ) ); + ab_q = sub( b_q, a_q ); + *c_PS_LT_fx = L_add( a, L_shr( b, ab_q ) ); + move32(); Q_c_PS_LT_fx = a_q; + move16(); } return; @@ -1700,8 +1788,8 @@ static void stereo_cng_compute_LRcorr( static void stereo_cng_compute_LRcorr_fx( CPE_DEC_HANDLE hCPE, /* i/o: CPE handle */ Word32 *output_fx[CPE_CHANNELS], /* i : Output signal */ - const int16_t output_frame, /* i : Output frame length */ - const int16_t tdm_ratio_idx /* i : TDM ratio index */ + const Word16 output_frame, /* i : Output frame length */ + const Word16 tdm_ratio_idx /* i : TDM ratio index */ ) { Word16 i; @@ -1723,139 +1811,169 @@ static void stereo_cng_compute_LRcorr_fx( beta_fx = tdm_ratio_tabl_fx[tdm_ratio_idx]; enrL_fx = 0; + move64(); enrR_fx = 0; + move64(); dotLR_fx = 0; + move64(); FOR( i = 0; i < output_frame; i++ ) { - - enrL_fx += ( (Word64) output_fx[0][i] ) * ( output_fx[0][i] ); - enrR_fx += ( (Word64) output_fx[1][i] ) * ( output_fx[1][i] ); - dotLR_fx += ( ( (Word64) output_fx[0][i] ) * ( output_fx[1][i] ) ); + enrL_fx = W_add( enrL_fx, W_mult0_32_32( output_fx[0][i], output_fx[0][i] ) ); + enrR_fx = W_add( enrR_fx, W_mult0_32_32( output_fx[1][i], output_fx[1][i] ) ); + dotLR_fx = W_add( dotLR_fx, W_mult0_32_32( output_fx[0][i], output_fx[1][i] ) ); } - dotLR_fx = dotLR_fx + EPSILON_FX_SMALL; - IF( dotLR_fx < 0 ) + dotLR_fx = W_add( dotLR_fx, EPSILON_FX_SMALL ); + if ( dotLR_fx < 0 ) { dotLR_fx = W_neg( dotLR_fx ); } - enrL_fx = enrL_fx + EPSILON_FX_SMALL; - enrR_fx = enrR_fx + EPSILON_FX_SMALL; + enrL_fx = W_add( enrL_fx, EPSILON_FX_SMALL ); + enrR_fx = W_add( enrR_fx, EPSILON_FX_SMALL ); headroom_left_enrL = W_norm( enrL_fx ); headroom_left_enrR = W_norm( enrR_fx ); { - IF( headroom_left_enrL < 32 ) + IF( LT_16( headroom_left_enrL, 32 ) ) { - enrL_fx = W_shr( enrL_fx, ( 32 - headroom_left_enrL ) ); - enrL_q = ( 31 - ( ( 2 * OUTPUT_Q ) - ( 32 - headroom_left_enrL ) ) ); + enrL_fx = W_shr( enrL_fx, sub( 32, headroom_left_enrL ) ); + enrL_q = sub( 31, sub( ( 2 * OUTPUT_Q ), sub( 32, headroom_left_enrL ) ) ); } ELSE { - enrL_q = 31 - ( 2 * OUTPUT_Q ); + enrL_q = sub( 31, ( 2 * OUTPUT_Q ) ); } - IF( headroom_left_enrR < 32 ) + IF( LT_16( headroom_left_enrR, 32 ) ) { - enrR_fx = W_shr( enrR_fx, ( 32 - headroom_left_enrR ) ); - enrR_q = ( 31 - ( ( 2 * OUTPUT_Q ) - ( 32 - headroom_left_enrR ) ) ); + enrR_fx = W_shr( enrR_fx, sub( 32, headroom_left_enrR ) ); + enrR_q = sub( 31, sub( ( 2 * OUTPUT_Q ), sub( 32, headroom_left_enrR ) ) ); } ELSE { - enrR_q = 31 - ( 2 * OUTPUT_Q ); + enrR_q = sub( 31, ( 2 * OUTPUT_Q ) ); } enrL_inv_q = enrL_q; + move16(); enrR_inv_q = enrR_q; + move16(); - temp_x_inv = ISqrt32( (Word32) enrL_fx, &enrL_inv_q ); - temp_y_inv = ISqrt32( (Word32) enrR_fx, &enrR_inv_q ); + temp_x_inv = ISqrt32( W_extract_l( enrL_fx ), &enrL_inv_q ); + temp_y_inv = ISqrt32( W_extract_l( enrR_fx ), &enrR_inv_q ); energy_xy_fx = Mpy_32_32( temp_x_inv, temp_y_inv ); - res_q = enrL_inv_q + enrR_inv_q; + res_q = add( enrL_inv_q, enrR_inv_q ); headroom_left_dotLR = W_norm( dotLR_fx ); dotLR_q = OUTPUT_Q * 2; - IF( headroom_left_dotLR < 32 ) + move16(); + IF( LT_16( headroom_left_dotLR, 32 ) ) { - dotLR_fx = W_shr( dotLR_fx, 32 - headroom_left_dotLR ); - dotLR_q = dotLR_q - ( 32 - headroom_left_dotLR ); + dotLR_fx = W_shr( dotLR_fx, sub( 32, headroom_left_dotLR ) ); + dotLR_q = sub( dotLR_q, sub( 32, headroom_left_dotLR ) ); } - dotLR_fx = Mpy_32_32( (Word32) dotLR_fx, energy_xy_fx ); - dotLR_q = dotLR_q + ( 31 - res_q ) - 31; - dotLR_fx = W_shl( dotLR_fx, 31 - dotLR_q ); + dotLR_fx = Mpy_32_32( W_extract_l( dotLR_fx ), energy_xy_fx ); + dotLR_q = add( dotLR_q, sub( sub( 31, res_q ), 31 ) ); + dotLR_fx = W_shl( dotLR_fx, sub( 31, dotLR_q ) ); } - c_LR_fx = extract_h( (Word32) dotLR_fx ); + c_LR_fx = extract_h( W_extract_l( dotLR_fx ) ); Word16 div_q = 0; - c_fx = BASOP_Util_Divide3232_Scale( (Word32) enrL_fx, (Word32) enrR_fx, &div_q ); - c_q = 15 - div_q + ( enrR_q - enrL_q ); + move16(); + c_fx = BASOP_Util_Divide3232_Scale( W_extract_l( enrL_fx ), W_extract_l( enrR_fx ), &div_q ); + c_q = add( sub( 15, div_q ), sub( enrR_q, enrL_q ) ); hCPE->hStereoTD->c_LR_LT_fx = L_add( Mpy_32_32( STEREO_TD_PS_CORR_FILT_FX, hCPE->hStereoTD->c_LR_LT_fx ), - Mpy_32_32( L_sub( ONE_IN_Q31, STEREO_TD_PS_CORR_FILT_FX ), L_deposit_h( (Word16) c_LR_fx ) ) ); + Mpy_32_32( L_sub( ONE_IN_Q31, STEREO_TD_PS_CORR_FILT_FX ), L_deposit_h( extract_l( c_LR_fx ) ) ) ); + move32(); hCPE->hStereoCng->c_LR_LT_fx = hCPE->hStereoTD->c_LR_LT_fx; + move32(); - Word16 sqrt_q = 31 - c_q; + Word16 sqrt_q = sub( 31, c_q ); Word32 sqrt_res = Sqrt32( c_fx, &sqrt_q ); tmp_fx = Mpy_32_32( beta_fx, L_sub( ONE_IN_Q31, beta_fx ) ); tmp_fx = Mpy_32_32( tmp_fx, hCPE->hStereoTD->c_LR_LT_fx ); tmp_fx = Mpy_32_32( tmp_fx, sqrt_res ); - Word16 tmp_q = ( 31 - sqrt_q ) - 1; // *2 handled with q reduction + Word16 tmp_q = sub( sub( 31, sqrt_q ), 1 ); // *2 handled with q reduction beta_sqr = Mpy_32_32( beta_fx, beta_fx ); one_minus_beta_sqr = Mpy_32_32( L_sub( ONE_IN_Q31, beta_fx ), L_sub( ONE_IN_Q31, beta_fx ) ); - numer_1 = Mpy_32_16_1( one_minus_beta_sqr, (Word16) c_fx ); // c_q+31-15 - numer_1_q = c_q + 31 - 15; + numer_1 = Mpy_32_16_1( one_minus_beta_sqr, extract_l( c_fx ) ); // c_q+31-15 + numer_1_q = add( c_q, 31 - 15 ); numer_2 = beta_sqr; // 31 + move32(); numer_2_q = 31; + move16(); numer_3 = tmp_fx; // tmp_q + move32(); numer_3_q = tmp_q; + move16(); num_q = 31; - num_q = num_q < numer_1_q ? num_q : numer_1_q; - num_q = num_q < numer_2_q ? num_q : numer_2_q; - num_q = num_q < numer_3_q ? num_q : numer_3_q; + move16(); + + num_q = s_min( num_q, numer_1_q ); + num_q = s_min( num_q, numer_2_q ); - denom_1 = Mpy_32_16_1( beta_sqr, (Word16) c_fx ); // c_q+31-15; - denom_1_q = c_q + 31 - 15; + num_q = s_min( num_q, numer_3_q ); + + denom_1 = Mpy_32_16_1( beta_sqr, extract_l( c_fx ) ); // c_q+31-15; + denom_1_q = add( c_q, 31 - 15 ); denom_2 = one_minus_beta_sqr; // 31 + move32(); denom_2_q = 31; + move16(); denom_3 = tmp_fx; // tmp_q + move32(); denom_3_q = tmp_q; + move16(); denom_q = 31; - denom_q = denom_q < denom_1_q ? denom_q : denom_1_q; - denom_q = denom_q < denom_2_q ? denom_q : denom_2_q; - denom_q = denom_q < denom_3_q ? denom_q : denom_3_q; + move16(); + + denom_q = s_min( denom_q, denom_1_q ); + + denom_q = s_min( denom_q, denom_2_q ); - num = L_add( L_shr( numer_1, numer_1_q - num_q ), L_sub( L_shr( numer_2, numer_2_q - num_q ), L_shr( numer_3, numer_3_q - num_q ) ) ) + DELTA_FX; - denom = L_add( L_shr( denom_1, denom_1_q - denom_q ), L_add( L_shr( denom_2, denom_2_q - denom_q ), L_shr( denom_3, denom_3_q - denom_q ) ) ) + DELTA_FX; + denom_q = s_min( denom_q, denom_3_q ); + + num = L_add( L_add( L_shr( numer_1, sub( numer_1_q, num_q ) ), L_sub( L_shr( numer_2, sub( numer_2_q, num_q ) ), L_shr( numer_3, sub( numer_3_q, num_q ) ) ) ), DELTA_FX ); + denom = L_add( L_add( L_shr( denom_1, sub( denom_1_q, denom_q ) ), L_add( L_shr( denom_2, sub( denom_2_q, denom_q ) ), L_shr( denom_3, sub( denom_3_q, denom_q ) ) ) ), DELTA_FX ); Word16 q = 0; + move16(); sc_fx = BASOP_Util_Divide3232_Scale( num, denom, &q ); - sc_q = 15 - ( num_q - denom_q ) - q; + sc_q = sub( sub( 15, sub( num_q, denom_q ) ), q ); Word32 sc_thr = L_shr( sc_fx, sc_q ); - IF( sc_thr > 4 ) + IF( GT_32( sc_thr, 4 ) ) { sc_fx = MAX_WORD16; + move32(); } ELSE { - sc_fx = L_shr( sc_fx, sc_q - 13 ); + sc_fx = L_shr( sc_fx, sub( sc_q, 13 ) ); } sqrt_q = 2; + move16(); sqrt_res = Sqrt32( L_shl_sat( sc_fx, 16 ), &sqrt_q ); IF( sqrt_q < 0 ) { - sqrt_res = L_shr( sqrt_res, -1 * sqrt_q ); + sqrt_res = L_shr( sqrt_res, ( -sqrt_q ) ); sqrt_q = 0; + move16(); } hCPE->hStereoTD->SP_ratio_LT_fx = L_add_sat( Mpy_32_32( 1932735283, hCPE->hStereoTD->SP_ratio_LT_fx ), L_shl_sat( Mpy_32_32( 214748364, sqrt_res ), sqrt_q ) ); + move32(); - IF( hCPE->hStereoCng->nr_corr_frames < CM_INIT ) + IF( LT_16( hCPE->hStereoCng->nr_corr_frames, CM_INIT ) ) { - hCPE->hStereoCng->nr_corr_frames++; + hCPE->hStereoCng->nr_corr_frames = add( hCPE->hStereoCng->nr_corr_frames, 1 ); + move16(); } hCPE->hStereoCng->nr_dft_frames = 0; + move16(); return; } @@ -1916,51 +2034,58 @@ static void FindEmEs_fx( Word16 headroom_left_ener_fx, headroom_left_ener_side_fx; Word16 ener_q, ener_side_q; ener_fx = 1; + move64(); ener_side_fx = 1; + move64(); Word32 square_res, log_res, division_res; Word16 temp_q = 0; - for ( i = 0; i < len; i++ ) + move16(); + FOR( i = 0; i < len; i++ ) { mono_i_fx = L_add( L_shr( ch1_fx[i], 1 ), L_shr( ch2_fx[i], 1 ) ); - ener_fx = W_add( ener_fx, ( (Word64) mono_i_fx * mono_i_fx ) ); + ener_fx = W_add( ener_fx, W_mult0_32_32( mono_i_fx, mono_i_fx ) ); side_i_fx = L_sub( L_shr( ch1_fx[i], 1 ), L_shr( ch2_fx[i], 1 ) ); - ener_side_fx = W_add( ener_side_fx, ( (Word64) side_i_fx * side_i_fx ) ); + ener_side_fx = W_add( ener_side_fx, W_mult0_32_32( side_i_fx, side_i_fx ) ); } headroom_left_ener_fx = W_norm( ener_fx ); headroom_left_ener_side_fx = W_norm( ener_side_fx ); - IF( headroom_left_ener_fx < 32 ) + IF( LT_16( headroom_left_ener_fx, 32 ) ) { - ener_fx = W_shr( ener_fx, ( 32 - headroom_left_ener_fx ) ); - ener_q = ( ( 2 * OUTPUT_Q ) - ( 32 - headroom_left_ener_fx ) ); + ener_fx = W_shr( ener_fx, sub( 32, headroom_left_ener_fx ) ); + ener_q = sub( ( 2 * OUTPUT_Q ), sub( 32, headroom_left_ener_fx ) ); } ELSE { ener_q = ( 2 * OUTPUT_Q ); + move16(); } - IF( headroom_left_ener_side_fx < 32 ) + IF( LT_16( headroom_left_ener_side_fx, 32 ) ) { - ener_side_fx = W_shr( ener_side_fx, ( 32 - headroom_left_ener_side_fx ) ); - ener_side_q = ( ( 2 * OUTPUT_Q ) - ( 32 - headroom_left_ener_side_fx ) ); + ener_side_fx = W_shr( ener_side_fx, sub( 32, headroom_left_ener_side_fx ) ); + ener_side_q = sub( ( 2 * OUTPUT_Q ), sub( 32, headroom_left_ener_side_fx ) ); } ELSE { ener_side_q = ( 2 * OUTPUT_Q ); + move16(); } /**es_em = 10 * ( log10( sqrt( ener_side / len ) ) - log10( sqrt( ener / len ) ) ); */ - division_res = BASOP_Util_Divide3232_Scale( (Word32) ener_side_fx, (Word32) ener_fx, &temp_q ); - temp_q = temp_q - ( ener_side_q - ener_q ); - square_res = Sqrt32( L_deposit_h( (Word16) division_res ), &temp_q ); + division_res = BASOP_Util_Divide3232_Scale( W_extract_l( ener_side_fx ), W_extract_l( ener_fx ), &temp_q ); + temp_q = sub( temp_q, sub( ener_side_q, ener_q ) ); + square_res = Sqrt32( L_deposit_h( extract_l( division_res ) ), &temp_q ); IF( temp_q < 0 ) { - square_res = L_shr( square_res, -1 * temp_q ); + square_res = L_shr( square_res, ( -temp_q ) ); temp_q = 0; + move16(); } log_res = BASOP_Util_Log2( square_res ); es_em_fx = Mpy_32_32( log_res, 1616107501 ); // 25+30 /* long-term estimate */ *lt_es_em_fx = L_add( Mpy_32_32( 858993459, *lt_es_em_fx ), Mpy_32_32( 1288490188, es_em_fx ) ); + move32(); return; } #endif @@ -2086,11 +2211,11 @@ void stereo_cna_update_params( void stereo_cna_update_params_fx( CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */ Word32 *output_fx[CPE_CHANNELS], /* i : Output signal */ - const int16_t output_frame, /* i : Output frame length */ - const int16_t tdm_ratio_idx /* i : TDM ratio index */ + const Word16 output_frame, /* i : Output frame length */ + const Word16 tdm_ratio_idx /* i : TDM ratio index */ ) { - int16_t i; + Word16 i; Word64 enrL_fx, enrR_fx, dotLR_fx; Word32 c_fx, c_LR_fx, c_ILD_fx; Decoder_State **sts; @@ -2113,110 +2238,124 @@ void stereo_cna_update_params_fx( ///// - if ( hCPE->element_mode == IVAS_CPE_DFT ) + IF( EQ_16( hCPE->element_mode, IVAS_CPE_DFT ) ) { - if ( hCPE->nchan_out > 1 ) + IF( GT_16( hCPE->nchan_out, 1 ) ) { FindEmEs_fx( output_fx[0], output_fx[1], output_frame, &hCPE->lt_es_em_fx ); } - else + ELSE { hCPE->lt_es_em_fx = 0; + move32(); } } - else if ( hCPE->element_mode == IVAS_CPE_TD ) + ELSE IF( EQ_16( hCPE->element_mode, IVAS_CPE_TD ) ) { FindEmEs_fx( output_fx[0], output_fx[1], output_frame, &hCPE->lt_es_em_fx ); hCPE->hStereoCng->first_SID_after_TD = 1; + move16(); stereo_cng_compute_LRcorr_fx( hCPE, output_fx, output_frame, tdm_ratio_idx ); } - else + ELSE { return; } enrL_fx = 0; + move64(); enrR_fx = 0; + move64(); dotLR_fx = 0; + move64(); - - if ( hCPE->element_mode == IVAS_CPE_TD || ( hCPE->element_mode == IVAS_CPE_DFT && sts[0]->core_brate > SID_2k40 && sts[0]->VAD == 0 ) ) + test(); + test(); + test(); + IF( EQ_16( hCPE->element_mode, IVAS_CPE_TD ) || ( EQ_16( hCPE->element_mode, IVAS_CPE_DFT ) && GT_32( sts[0]->core_brate, SID_2k40 ) && ( sts[0]->VAD == 0 ) ) ) { - if ( hCPE->nchan_out == 1 ) + IF( EQ_16( hCPE->nchan_out, 1 ) ) { c_LR_fx = MAX_WORD16; + move32(); c_ILD_fx = 0; + move32(); } - else + ELSE { FOR( i = 0; i < output_frame; i++ ) { - - enrL_fx += ( (Word64) output_fx[0][i] ) * ( output_fx[0][i] ); - enrR_fx += ( (Word64) output_fx[1][i] ) * ( output_fx[1][i] ); - dotLR_fx += ( ( (Word64) output_fx[0][i] ) * ( output_fx[1][i] ) ); + enrL_fx = W_add( enrL_fx, W_mult0_32_32( output_fx[0][i], output_fx[0][i] ) ); + enrR_fx = W_add( enrR_fx, W_mult0_32_32( output_fx[1][i], output_fx[1][i] ) ); + dotLR_fx = W_add( dotLR_fx, W_mult0_32_32( output_fx[0][i], output_fx[1][i] ) ); } - dotLR_fx = dotLR_fx + EPSILON_FX_SMALL; + dotLR_fx = W_add( dotLR_fx, EPSILON_FX_SMALL ); if ( dotLR_fx < 0 ) { dotLR_fx = W_neg( dotLR_fx ); } - enrL_fx = enrL_fx + EPSILON_FX_SMALL; - enrR_fx = enrR_fx + EPSILON_FX_SMALL; + enrL_fx = W_add( enrL_fx, EPSILON_FX_SMALL ); + enrR_fx = W_add( enrR_fx, EPSILON_FX_SMALL ); headroom_left_x = W_norm( enrL_fx ); headroom_left_y = W_norm( enrR_fx ); - IF( headroom_left_x < 32 ) + IF( LT_16( headroom_left_x, 32 ) ) { - enrL_fx = W_shr( enrL_fx, ( 32 - headroom_left_x ) ); - x_q = ( 31 - ( ( 2 * OUTPUT_Q ) - ( 32 - headroom_left_x ) ) ); + enrL_fx = W_shr( enrL_fx, sub( 32, headroom_left_x ) ); + x_q = sub( 31, sub( ( 2 * OUTPUT_Q ), sub( 32, headroom_left_x ) ) ); } ELSE { - x_q = 31 - ( 2 * OUTPUT_Q ); + x_q = sub( 31, ( 2 * OUTPUT_Q ) ); } - IF( headroom_left_y < 32 ) + IF( LT_16( headroom_left_y, 32 ) ) { - enrR_fx = W_shr( enrR_fx, ( 32 - headroom_left_y ) ); - y_q = ( 31 - ( ( 2 * OUTPUT_Q ) - ( 32 - headroom_left_y ) ) ); + enrR_fx = W_shr( enrR_fx, sub( 32, headroom_left_y ) ); + y_q = sub( 31, sub( ( 2 * OUTPUT_Q ), sub( 32, headroom_left_y ) ) ); } ELSE { - y_q = 31 - ( 2 * OUTPUT_Q ); + y_q = sub( 31, ( 2 * OUTPUT_Q ) ); } x_inv_q = x_q; + move16(); y_inv_q = y_q; + move16(); - temp_x_inv = ISqrt32( (Word32) enrL_fx, &x_inv_q ); - temp_y_inv = ISqrt32( (Word32) enrR_fx, &y_inv_q ); + temp_x_inv = ISqrt32( W_extract_l( enrL_fx ), &x_inv_q ); + temp_y_inv = ISqrt32( W_extract_l( enrR_fx ), &y_inv_q ); energy_xy_fx = Mpy_32_32( temp_x_inv, temp_y_inv ); - temp_res_q = x_inv_q + y_inv_q; + temp_res_q = add( x_inv_q, y_inv_q ); headroom_left_dotLR_fx = W_norm( dotLR_fx ); dotLR_fx_q = OUTPUT_Q * 2; - IF( headroom_left_dotLR_fx < 32 ) + move16(); + IF( LT_16( headroom_left_dotLR_fx, 32 ) ) { - dotLR_fx = W_shr( dotLR_fx, 32 - headroom_left_dotLR_fx ); - dotLR_fx_q = dotLR_fx_q - ( 32 - headroom_left_dotLR_fx ); + dotLR_fx = W_shr( dotLR_fx, sub( 32, headroom_left_dotLR_fx ) ); + dotLR_fx_q = sub( dotLR_fx_q, sub( 32, headroom_left_dotLR_fx ) ); } - dotLR_fx = Mpy_32_32( (Word32) dotLR_fx, energy_xy_fx ); - dotLR_fx_q = dotLR_fx_q + ( 31 - temp_res_q ) - 31; - dotLR_fx = (Word64) L_shl_sat( (Word32) dotLR_fx, 31 - dotLR_fx_q ); + dotLR_fx = Mpy_32_32( W_extract_l( dotLR_fx ), energy_xy_fx ); + dotLR_fx_q = add( dotLR_fx_q, sub( sub( 31, temp_res_q ), 31 ) ); + dotLR_fx = W_deposit32_l( L_shl_sat( W_extract_l( dotLR_fx ), sub( 31, dotLR_fx_q ) ) ); /* estimate L/R correlation factor and ILD in time domain */ - c_LR_fx = extract_h( (Word32) dotLR_fx ); + c_LR_fx = extract_h( W_extract_l( dotLR_fx ) ); temp_res_q = 0; - c_fx = BASOP_Util_Divide3232_Scale( (Word32) enrL_fx, (Word32) enrR_fx, &temp_res_q ); - c_q = 15 - temp_res_q + ( y_q - x_q ); + move16(); + c_fx = BASOP_Util_Divide3232_Scale( W_extract_l( enrL_fx ), W_extract_l( enrR_fx ), &temp_res_q ); + c_q = add( sub( 15, temp_res_q ), sub( y_q, x_q ) ); Word32 one_in_c_q; IF( GT_16( c_q, 31 ) ) { c_q = 31; - c_fx = L_shr_sat( c_fx, c_q - 31 ); + move16(); + c_fx = L_shr_sat( c_fx, sub( c_q, 31 ) ); one_in_c_q = ONE_IN_Q31; + move32(); } ELSE { @@ -2227,28 +2366,34 @@ void stereo_cna_update_params_fx( Word32 denom_c_fx = L_add_sat( c_fx, one_in_c_q ); Word16 num_c_headroom = norm_l( L_sub_sat( c_fx, one_in_c_q ) ); Word16 denom_c_headroom = norm_l( L_add_sat( c_fx, one_in_c_q ) ); - Word16 min_headroom_left = num_c_headroom < denom_c_headroom ? num_c_headroom : denom_c_headroom; + Word16 min_headroom_left = s_min( num_c_headroom, denom_c_headroom ); + move16(); temp_res_q = 0; + move16(); c_ILD_fx = BASOP_Util_Divide3232_Scale( L_shl_sat( num_c_fx, min_headroom_left ), L_shl_sat( denom_c_fx, min_headroom_left ), &temp_res_q ); - if ( temp_res_q < 0 ) + IF( temp_res_q < 0 ) { - c_ILD_fx = L_shr( c_ILD_fx, -1 * temp_res_q ); + c_ILD_fx = L_shr( c_ILD_fx, negate( temp_res_q ) ); } } /* update of long-term ILD and LR correlation factors for stereo CNA */ - if ( !hFdCngDec->first_cna_noise_updated ) + IF( !hFdCngDec->first_cna_noise_updated ) { - hFdCngDec->cna_LR_LT_fx = (Word16) c_LR_fx; - hFdCngDec->cna_ILD_LT_fx = (Word16) c_ILD_fx; + hFdCngDec->cna_LR_LT_fx = extract_l( c_LR_fx ); + move16(); + hFdCngDec->cna_ILD_LT_fx = extract_l( c_ILD_fx ); + move16(); } - else + ELSE { hFdCngDec->cna_LR_LT_fx = extract_h( L_add_sat( Mpy_32_16_1( STEREO_CNA_LR_CORR_LT_FILT_FX, hFdCngDec->cna_LR_LT_fx ), - Mpy_32_16_1( L_sub( ONE_IN_Q31, STEREO_CNA_LR_CORR_LT_FILT_FX ), (Word16) c_LR_fx ) ) ); + Mpy_32_16_1( L_sub( ONE_IN_Q31, STEREO_CNA_LR_CORR_LT_FILT_FX ), extract_l( c_LR_fx ) ) ) ); + move16(); hFdCngDec->cna_ILD_LT_fx = extract_h( L_add_sat( Mpy_32_16_1( STEREO_CNA_ILD_LT_FILT_FX, hFdCngDec->cna_ILD_LT_fx ), - Mpy_32_16_1( L_sub( ONE_IN_Q31, STEREO_CNA_ILD_LT_FILT_FX ), (Word16) c_ILD_fx ) ) ); + Mpy_32_16_1( L_sub( ONE_IN_Q31, STEREO_CNA_ILD_LT_FILT_FX ), extract_l( c_ILD_fx ) ) ) ); + move16(); } set16_fx( hFdCngDec->cna_g_state_fx, hFdCngDec->cna_ILD_LT_fx, hFdCngDec->cna_nbands ); @@ -2257,33 +2402,37 @@ void stereo_cna_update_params_fx( /* Soft VAD for stereo CNA */ - if ( hCPE->element_mode == IVAS_CPE_TD || hCPE->element_mode == IVAS_CPE_DFT ) + test(); + IF( EQ_16( hCPE->element_mode, IVAS_CPE_TD ) || EQ_16( hCPE->element_mode, IVAS_CPE_DFT ) ) { - if ( !hFdCngDec->first_cna_noise_updated ) + IF( !hFdCngDec->first_cna_noise_updated ) { hFdCngDec->cna_act_fact_fx = 0; + move16(); } - else + ELSE { - if ( hCPE->element_mode == IVAS_CPE_TD ) + IF( EQ_16( hCPE->element_mode, IVAS_CPE_TD ) ) { /* quickly decrease in TD stereo mode */ hFdCngDec->cna_act_fact_fx = extract_h( Mpy_32_16_1( 1503238553, hFdCngDec->cna_act_fact_fx ) ); + move16(); } - else if ( ( sts[0]->VAD * MAX_WORD16 ) > hFdCngDec->cna_act_fact_fx ) // VAD is one bit + ELSE IF( GT_32( L_mult0( sts[0]->VAD, MAX_WORD16 ), hFdCngDec->cna_act_fact_fx ) ) // VAD is one bit { /* quickly increase during active frames in DFT stereo mode */ - hFdCngDec->cna_act_fact_fx = extract_h( Mpy_32_16_1( 1503238553, hFdCngDec->cna_act_fact_fx ) + ( 644245094 * sts[0]->VAD ) ); + hFdCngDec->cna_act_fact_fx = extract_h( L_add( Mpy_32_16_1( 1503238553, hFdCngDec->cna_act_fact_fx ), ( 644245094 * sts[0]->VAD ) ) ); + move16(); } - else + ELSE { /* slowly decrease during inactive frames in DFT stereo mode */ - hFdCngDec->cna_act_fact_fx = extract_h( Mpy_32_16_1( 2040109440, hFdCngDec->cna_act_fact_fx ) + ( 107374184 * sts[0]->VAD ) ); + hFdCngDec->cna_act_fact_fx = extract_h( L_add( Mpy_32_16_1( 2040109440, hFdCngDec->cna_act_fact_fx ), ( 107374184 * sts[0]->VAD ) ) ); + move16(); } } } - return; } #endif @@ -2355,8 +2504,8 @@ void stereo_cng_init_dec_fx( hStereoCng->enableSecCNA = 0; move16(); hStereoCng->c_PS_LT_fx = 16384; /* 0.5 in Q15 */ - hStereoCng->frameSize = frameSize; move16(); + hStereoCng->frameSize = frameSize; hStereoCng->last_act_element_mode = IVAS_CPE_DFT; move16(); diff --git a/lib_dec/ivas_stereo_dft_dec.c b/lib_dec/ivas_stereo_dft_dec.c index daa92a594..c9bf60ddf 100644 --- a/lib_dec/ivas_stereo_dft_dec.c +++ b/lib_dec/ivas_stereo_dft_dec.c @@ -695,20 +695,21 @@ void stereo_dft_dec_analyze_fx( * Initialization *-----------------------------------------------------------------*/ - IF( input_frame == output_frame ) + IF( EQ_16( input_frame, output_frame ) ) { trigo_fx = hStereoDft->dft_trigo_fx; - trigo_step = hStereoDft->dft_trigo_step * STEREO_DFT_TRIGO_DEC_STEP; + trigo_step = i_mult( hStereoDft->dft_trigo_step, STEREO_DFT_TRIGO_DEC_STEP ); win_right_fx = hStereoDft->win32ms_fx; win_left_fx = hStereoDft->win32ms_fx; win2_fx = hStereoDft->win232ms_fx; - IF( ana_type == DFT_STEREO_DEC_ANA_BPF ) + test(); + IF( EQ_32( ana_type, DFT_STEREO_DEC_ANA_BPF ) ) { assert( ( chan == 0 ) && "DFT stereo: BPF memory only FOR M channel" ); mem_fx = hCPE->input_mem_BPF_fx[chan]; } - ELSE IF( ana_type == DFT_STEREO_DEC_ANA_LB || ana_type == DFT_STEREO_DEC_ANA_LB_ADD ) + ELSE IF( EQ_32( ana_type, DFT_STEREO_DEC_ANA_LB ) || EQ_32( ana_type, DFT_STEREO_DEC_ANA_LB_ADD ) ) { mem_fx = hCPE->input_mem_LB_fx[chan]; } @@ -717,20 +718,22 @@ void stereo_dft_dec_analyze_fx( mem_fx = hCPE->input_mem_fx[chan]; } } - ELSE IF( input_frame == L_FRAME ) + ELSE IF( EQ_16( input_frame, L_FRAME ) ) { trigo_fx = hStereoDft->dft_trigo_12k8_fx; trigo_step = STEREO_DFT_TRIGO_SRATE_12k8_STEP * STEREO_DFT_TRIGO_DEC_STEP; + move16(); win_right_fx = hStereoDft->win32ms_12k8_fx; win_left_fx = hStereoDft->win32ms_12k8_fx; win2_fx = hStereoDft->win232ms_12k8_fx; - IF( ana_type == DFT_STEREO_DEC_ANA_BPF ) + test(); + IF( EQ_32( ana_type, DFT_STEREO_DEC_ANA_BPF ) ) { assert( ( chan == 0 ) && "DFT stereo: BPF memory only FOR M channel" ); mem_fx = hCPE->input_mem_BPF_fx[chan]; } - ELSE IF( ana_type == DFT_STEREO_DEC_ANA_LB || ana_type == DFT_STEREO_DEC_ANA_LB_ADD ) + ELSE IF( EQ_32( ana_type, DFT_STEREO_DEC_ANA_LB ) || EQ_32( ana_type, DFT_STEREO_DEC_ANA_LB_ADD ) ) { mem_fx = hCPE->input_mem_LB_fx[chan]; } @@ -740,34 +743,37 @@ void stereo_dft_dec_analyze_fx( mem_fx = hCPE->input_mem_fx[chan]; } } - ELSE IF( input_frame == L_FRAME16k ) + ELSE IF( EQ_16( input_frame, L_FRAME16k ) ) { trigo_fx = hStereoDft->dft_trigo_16k_fx; trigo_step = STEREO_DFT_TRIGO_SRATE_16k_STEP * STEREO_DFT_TRIGO_DEC_STEP; + move16(); win_right_fx = hStereoDft->win32ms_16k_fx; win_left_fx = hStereoDft->win32ms_16k_fx; win2_fx = hStereoDft->win232ms_16k_fx; - IF( ana_type == DFT_STEREO_DEC_ANA_BPF ) + test(); + IF( EQ_32( ana_type, DFT_STEREO_DEC_ANA_BPF ) ) { assert( ( chan == 0 ) && "DFT stereo: BPF memory only FOR M channel" ); mem_fx = hCPE->input_mem_BPF_fx[chan]; } - ELSE IF( ana_type == DFT_STEREO_DEC_ANA_LB || ana_type == DFT_STEREO_DEC_ANA_LB_ADD ) + ELSE IF( EQ_32( ana_type, DFT_STEREO_DEC_ANA_LB ) || EQ_32( ana_type, DFT_STEREO_DEC_ANA_LB_ADD ) ) { mem_fx = hCPE->input_mem_LB_fx[chan]; } ELSE { - assert( ( chan == 1 ) && hCPE->hStereoDft->hConfig->res_pred_mode == STEREO_DFT_RESPRED_ESF && "16kHz sampling rate only FOR second channel with allpass signal" ); + assert( ( chan == 1 ) && ( hCPE->hStereoDft->hConfig->res_pred_mode == STEREO_DFT_RESPRED_ESF ) && "16kHz sampling rate only FOR second channel with allpass signal" ); mem_fx = hCPE->input_mem_fx[chan]; } } - ELSE IF( input_frame == L_FRAME8k ) + ELSE IF( EQ_16( input_frame, L_FRAME8k ) ) { assert( ( chan == 1 ) && "DFT stereo: 8kHz analysis only FOR residual coding" ); trigo_fx = hStereoDft->dft_trigo_8k_fx; trigo_step = STEREO_DFT_TRIGO_SRATE_8k_STEP * STEREO_DFT_TRIGO_DEC_STEP; + move16(); win_right_fx = hStereoDft->win32ms_8k_fx; win_left_fx = hStereoDft->win32ms_8k_fx; win2_fx = hStereoDft->win232ms_8k_fx; @@ -776,27 +782,32 @@ void stereo_dft_dec_analyze_fx( ELSE { IVAS_ERROR( IVAS_ERR_INTERNAL_FATAL, "Error in DFT stereo: sampling rate not supported" ); - mem_fx = NULL; /* to avoid compilation warning */ - trigo_fx = NULL; /* to avoid compilation warning */ - trigo_step = -1; /* to avoid compilation warning */ + mem_fx = NULL; /* to avoid compilation warning */ + trigo_fx = NULL; /* to avoid compilation warning */ + trigo_step = -1; /* to avoid compilation warning */ + move16(); win_right_fx = NULL; /* to avoid compilation warning */ win_left_fx = NULL; /* to avoid compilation warning */ win2_fx = NULL; /* to avoid compilation warning */ } - inputFs = input_frame * FRAMES_PER_SEC; + inputFs = L_mult0( input_frame, FRAMES_PER_SEC ); delay_dec = NS2SA( inputFs, STEREO_DFT32MS_OVL_NS ); + move16(); zp = NS2SA( inputFs, STEREO_DFT32MS_ZP_NS ); + move16(); ovl = NS2SA( inputFs, STEREO_DFT32MS_OVL_NS ); + move16(); NFFT = NS2SA( inputFs, STEREO_DFT32MS_N_NS ); Word16 qfac_fx; fac_fx = BASOP_Util_Divide3232_Scale_cadence( hStereoDft->NFFT, NFFT, &qfac_fx ); qfac_fx = sub( 31, qfac_fx ); ovl2 = NS2SA( inputFs, STEREO_DFT32MS_OVL2_NS ); + move16(); /* Offset FOR the time buffers */ - assert( ( delay >= -NS2SA( input_frame * FRAMES_PER_SEC, STEREO_DFT_DELAY_DEC_BWE_NS + STEREO_DFT_OVL_NS / 2 ) ) && ( delay <= NS2SA( input_frame * FRAMES_PER_SEC, STEREO_DFT_OVL_NS ) ) ); - mem_size = delay_dec + delay; + assert( ( delay >= -NS2SA( ( input_frame * FRAMES_PER_SEC ), STEREO_DFT_DELAY_DEC_BWE_NS + STEREO_DFT_OVL_NS / 2 ) ) && ( delay <= NS2SA( ( input_frame * FRAMES_PER_SEC ), STEREO_DFT_OVL_NS ) ) ); + mem_size = add( delay_dec, delay ); /* Update buffers */ Copy32( mem_fx, input_buff_fx, mem_size ); @@ -804,7 +815,8 @@ void stereo_dft_dec_analyze_fx( Copy32( input_buff_fx + input_frame, mem_fx, mem_size ); pInput_buff_fx = input_buff_fx; - IF( hCPE->nchan_out == 1 && hCPE->hStereoDft->hConfig->res_cod_mode == STEREO_DFT_RES_COD_OFF ) + test(); + IF( EQ_16( hCPE->nchan_out, 1 ) && ( hCPE->hStereoDft->hConfig->res_cod_mode == STEREO_DFT_RES_COD_OFF ) ) { pop_wmops(); return; @@ -815,145 +827,176 @@ void stereo_dft_dec_analyze_fx( * DFT Analysis: loop over frame *-----------------------------------------------------------------*/ - assert( k_offset <= STEREO_DFT_NBDIV ); + assert( ( k_offset <= STEREO_DFT_NBDIV ) ); - FOR( i = 0; i < NFFT / 4; i++ ) + FOR( i = 0; i < shr( NFFT, 2 ); i++ ) { - trigo_dec_fx[i] = trigo_fx[i * trigo_step]; - trigo_dec_fx[NFFT / 2 - i] = trigo_fx[i * trigo_step]; + trigo_dec_fx[i] = trigo_fx[i_mult( i, trigo_step )]; + move16(); + trigo_dec_fx[sub( NFFT / 2, i )] = trigo_fx[i_mult( i, trigo_step )]; + move16(); } - trigo_dec_fx[NFFT / 4] = trigo_fx[NFFT / 4 * trigo_step]; + trigo_dec_fx[NFFT / 4] = trigo_fx[i_mult( NFFT / 4, trigo_step )]; + move16(); - FOR( k = 0; k < STEREO_DFT_NBDIV - k_offset; k++ ) + FOR( k = 0; k < sub( STEREO_DFT_NBDIV, k_offset ); k++ ) { set32_fx( DFT_fx, 0, STEREO_DFT32MS_N_MAX ); IF( k == 0 ) { offset = 0; + move16(); } ELSE { /* If OVL2 = OVL offset = 10ms */ offset = NS2SA( inputFs, STEREO_DFT32MS_WIN_CENTER_NS - STEREO_DFT32MS_OVL2_NS / 2 ); + move16(); } pInput_fx = pInput_buff_fx + offset; - pDFT_out_fx = out_DFT_fx[chan] + k * STEREO_DFT32MS_N_MAX; + pDFT_out_fx = out_DFT_fx[chan] + i_mult( k, STEREO_DFT32MS_N_MAX ); /*Forwards FFT: L and R*/ /* Zero Padding & Flat Portion */ - Copy32( pInput_fx, DFT_fx + zp, NFFT - 2 * zp ); + Copy32( pInput_fx, DFT_fx + zp, sub( NFFT, i_mult( 2, zp ) ) ); /* Overlapping portions */ IF( k == 0 ) { FOR( i = 0; i < ovl; i++ ) { - DFT_fx[i + zp] = Mpy_32_16_1( DFT_fx[i + zp], win_left_fx[STEREO_DFT32MS_STEP * i] ); + DFT_fx[add( i, zp )] = Mpy_32_16_1( DFT_fx[add( i, zp )], win_left_fx[i_mult( STEREO_DFT32MS_STEP, i )] ); + move32(); } FOR( i = 0; i < ovl2; i++ ) { - DFT_fx[NFFT - zp - 1 - i] = Mpy_32_16_1( DFT_fx[NFFT - zp - 1 - i], win2_fx[i] ); + DFT_fx[sub( sub( sub( NFFT, zp ), 1 ), i )] = Mpy_32_16_1( DFT_fx[sub( sub( sub( NFFT, zp ), 1 ), i )], win2_fx[i] ); + move32(); } } ELSE { FOR( i = 0; i < ovl2; i++ ) { - DFT_fx[i + zp] = Mpy_32_16_1( DFT_fx[i + zp], win2_fx[i] ); + DFT_fx[add( i, zp )] = Mpy_32_16_1( DFT_fx[add( i, zp )], win2_fx[i] ); + move32(); } FOR( i = 0; i < ovl; i++ ) { - DFT_fx[NFFT - zp - i - 1] = Mpy_32_16_1( DFT_fx[NFFT - zp - i - 1], win_right_fx[STEREO_DFT32MS_STEP * i] ); + DFT_fx[sub( sub( sub( NFFT, zp ), i ), 1 )] = Mpy_32_16_1( DFT_fx[sub( sub( sub( NFFT, zp ), i ), 1 )], win_right_fx[i_mult( STEREO_DFT32MS_STEP, i )] ); + move32(); } } Word16 q_DFT, q_shift, guarded_bits; q_DFT = *q; + move16(); guarded_bits = find_guarded_bits_fx( NFFT ); - q_shift = L_norm_arr( DFT_fx, NFFT ) - guarded_bits; + q_shift = sub( L_norm_arr( DFT_fx, NFFT ), guarded_bits ); FOR( Word16 j = 0; j < NFFT; j++ ) { DFT_fx[j] = L_shl( DFT_fx[j], q_shift ); + move32(); } - q_DFT += q_shift; + q_DFT = add( q_DFT, q_shift ); rfft_fx( DFT_fx, trigo_dec_fx, NFFT, -1 ); - q_shift = L_norm_arr( DFT_fx, NFFT ) - ( 31 - qfac_fx ); + q_shift = sub( L_norm_arr( DFT_fx, NFFT ), sub( 31, qfac_fx ) ); FOR( Word16 j = 0; j < NFFT; j++ ) { DFT_fx[j] = L_shl( DFT_fx[j], q_shift ); + move32(); } - q_DFT += q_shift; - IF( q_out_DFT[chan] - q_DFT > 0 ) + q_DFT = add( q_DFT, q_shift ); + IF( sub( q_out_DFT[chan], q_DFT ) > 0 ) { FOR( int j = 0; j < NFFT; j++ ) { - out_DFT_fx[chan][j] = L_shr( out_DFT_fx[chan][j], q_out_DFT[chan] - q_DFT ); + out_DFT_fx[chan][j] = L_shr( out_DFT_fx[chan][j], sub( q_out_DFT[chan], q_DFT ) ); + move32(); } q_out_DFT[chan] = q_DFT; + move16(); } ELSE { FOR( int j = 0; j < NFFT; j++ ) { - DFT_fx[j] = L_shr( DFT_fx[j], q_DFT - q_out_DFT[chan] ); + DFT_fx[j] = L_shr( DFT_fx[j], sub( q_DFT, q_out_DFT[chan] ) ); + move32(); } q_DFT = q_out_DFT[chan]; + move16(); } /*Resampling: filtering+scaling*/ - IF( ana_type == DFT_STEREO_DEC_ANA_FB || ana_type == DFT_STEREO_DEC_ANA_LB || ana_type == DFT_STEREO_DEC_ANA_NOCORE ) + test(); + test(); + IF( ( ana_type == DFT_STEREO_DEC_ANA_FB ) || EQ_32( ana_type, DFT_STEREO_DEC_ANA_LB ) || EQ_32( ana_type, DFT_STEREO_DEC_ANA_NOCORE ) ) { - pDFT_out_fx[0] = L_shl( Mpy_32_32( DFT_fx[0], fac_fx ), 31 - qfac_fx ); /*DC*/ - IF( NFFT == hStereoDft->NFFT ) /*Nyquist*/ + pDFT_out_fx[0] = L_shl( Mpy_32_32( DFT_fx[0], fac_fx ), sub( 31, qfac_fx ) ); /*DC*/ + move32(); + IF( EQ_16( NFFT, hStereoDft->NFFT ) ) /*Nyquist*/ { - pDFT_out_fx[1] = L_shl( Mpy_32_32( DFT_fx[1], fac_fx ), 31 - qfac_fx ); + pDFT_out_fx[1] = L_shl( Mpy_32_32( DFT_fx[1], fac_fx ), sub( 31, qfac_fx ) ); + move32(); } ELSE { pDFT_out_fx[1] = 0; + move32(); } FOR( i = 2; i < NFFT; i++ ) { - pDFT_out_fx[i] = L_shl( Mpy_32_32( DFT_fx[i], fac_fx ), 31 - qfac_fx ); + pDFT_out_fx[i] = L_shl( Mpy_32_32( DFT_fx[i], fac_fx ), sub( 31, qfac_fx ) ); + move32(); } FOR( i = NFFT; i < hStereoDft->NFFT; i++ ) { pDFT_out_fx[i] = 0; + move32(); } } - ELSE IF( ana_type == DFT_STEREO_DEC_ANA_BPF ) + ELSE IF( EQ_32( ana_type, DFT_STEREO_DEC_ANA_BPF ) ) { - pDFT_out_fx[0] = L_sub( pDFT_out_fx[0], L_shl( Mpy_32_32( Mpy_32_32( DFT_fx[0], fac_fx ), dft_bpf_weights_fx[0] ), 32 - qfac_fx ) ); + pDFT_out_fx[0] = L_sub( pDFT_out_fx[0], L_shl( Mpy_32_32( Mpy_32_32( DFT_fx[0], fac_fx ), dft_bpf_weights_fx[0] ), sub( 32, qfac_fx ) ) ); + move32(); FOR( i = 1; i < STEREO_DFT_BPF_SIZE; i++ ) { - pDFT_out_fx[2 * i] = L_sub( pDFT_out_fx[2 * i], L_shl( Mpy_32_32( Mpy_32_32( DFT_fx[2 * i], fac_fx ), dft_bpf_weights_fx[i] ), 32 - qfac_fx ) ); - pDFT_out_fx[2 * i + 1] = L_sub( pDFT_out_fx[2 * i + 1], L_shl( Mpy_32_32( Mpy_32_32( DFT_fx[2 * i + 1], fac_fx ), dft_bpf_weights_fx[i] ), 32 - qfac_fx ) ); + pDFT_out_fx[2 * i] = L_sub( pDFT_out_fx[2 * i], L_shl( Mpy_32_32( Mpy_32_32( DFT_fx[2 * i], fac_fx ), dft_bpf_weights_fx[i] ), sub( 32, qfac_fx ) ) ); + move32(); + pDFT_out_fx[add( 2 * i, 1 )] = L_sub( pDFT_out_fx[add( 2 * i, 1 )], L_shl( Mpy_32_32( Mpy_32_32( DFT_fx[add( 2 * i, 1 )], fac_fx ), dft_bpf_weights_fx[i] ), sub( 32, qfac_fx ) ) ); + move32(); } } - ELSE IF( ana_type == DFT_STEREO_DEC_ANA_HB_ADD ) + ELSE IF( EQ_32( ana_type, DFT_STEREO_DEC_ANA_HB_ADD ) ) { - NFFT_core = NS2SA( hCPE->hCoreCoder[0]->L_frame * FRAMES_PER_SEC, STEREO_DFT32MS_N_NS ); + NFFT_core = NS2SA( L_mult0( hCPE->hCoreCoder[0]->L_frame, FRAMES_PER_SEC ), STEREO_DFT32MS_N_NS ); + move16(); FOR( i = NFFT_core; i < NFFT; i++ ) { - pDFT_out_fx[i] = L_add( L_shl( Mpy_32_32( DFT_fx[i], fac_fx ), 31 - qfac_fx ), pDFT_out_fx[i] ); + pDFT_out_fx[i] = L_add( L_shl( Mpy_32_32( DFT_fx[i], fac_fx ), sub( 31, qfac_fx ) ), pDFT_out_fx[i] ); + move32(); } } ELSE { - pDFT_out_fx[0] = L_add( pDFT_out_fx[0], L_shl( Mpy_32_32( DFT_fx[0], fac_fx ), 31 - qfac_fx ) ); /*DC*/ - IF( NFFT == hStereoDft->NFFT ) /*Nyquist*/ + pDFT_out_fx[0] = L_add( pDFT_out_fx[0], L_shl( Mpy_32_32( DFT_fx[0], fac_fx ), sub( 31, qfac_fx ) ) ); /*DC*/ + move32(); + IF( EQ_16( NFFT, hStereoDft->NFFT ) ) /*Nyquist*/ { - pDFT_out_fx[1] = L_add( L_shl( Mpy_32_32( DFT_fx[1], fac_fx ), 31 - qfac_fx ), pDFT_out_fx[1] ); + pDFT_out_fx[1] = L_add( L_shl( Mpy_32_32( DFT_fx[1], fac_fx ), sub( 31, qfac_fx ) ), pDFT_out_fx[1] ); + move32(); } FOR( i = 2; i < NFFT; i++ ) { - pDFT_out_fx[i] = L_add( L_shl( Mpy_32_32( DFT_fx[i], fac_fx ), 31 - qfac_fx ), pDFT_out_fx[i] ); + pDFT_out_fx[i] = L_add( L_shl( Mpy_32_32( DFT_fx[i], fac_fx ), sub( 31, qfac_fx ) ), pDFT_out_fx[i] ); + move32(); } } } @@ -2505,10 +2548,6 @@ void stereo_dft_dec_read_BS( hStereoDft->res_global_gain = ECSQ_dequantize_gain( I ); -#ifdef DUMPS_ENABLED - dbgwrite_txt( &hStereoDft->res_global_gain, 1, "float_res_global_gain.txt", NULL ); -#endif // DUMPS_ENABLED - ecsq_inst.config_index = 2 * hStereoDft->res_cod_mode[k_offset] - 1; ECSQ_decode( &ecsq_inst, hStereoDft->res_cod_line_max, dec ); @@ -2517,10 +2556,6 @@ void stereo_dft_dec_read_BS( set_zero( res_buf, STEREO_DFT_N_8k ); ECSQ_dequantize_vector( dec, hStereoDft->res_global_gain, hStereoDft->res_cod_line_max, res_buf ); - -#ifdef DUMPS_ENABLED - dbgwrite_txt( res_buf, hStereoDft->res_cod_line_max, "float_res_buf.txt", NULL ); -#endif // DUMPS_ENABLED } else { diff --git a/lib_enc/ivas_mcmasa_enc.c b/lib_enc/ivas_mcmasa_enc.c index 34f1c2249..a4bcf14f6 100644 --- a/lib_enc/ivas_mcmasa_enc.c +++ b/lib_enc/ivas_mcmasa_enc.c @@ -1018,7 +1018,34 @@ void ivas_mcmasa_param_est_enc( { mvr2r( intensity_real[2], &( hMcMasa->buffer_intensity_real_vert[index - 1][0] ), num_freq_bands ); computeVerticalDiffuseness( hMcMasa->buffer_intensity_real_vert, hMcMasa->buffer_energy, hMcMasa->no_col_avg_diff, num_freq_bands, vertical_diffuseness_vector ); +#ifdef IVAS_FLOAT_FIXED + //////////////////////// to be removed /////////////////////////////// + Word32 x1_fx[MASA_FREQUENCY_BANDS]; + Word32 x2_fx[MASA_FREQUENCY_BANDS]; + Word32 y_fx[MASA_FREQUENCY_BANDS]; + Word16 x1_q_fx[MASA_FREQUENCY_BANDS]; + Word16 x2_q_fx[MASA_FREQUENCY_BANDS]; + Word16 y_q_fx[MASA_FREQUENCY_BANDS]; + FOR(i = 0; i < num_freq_bands; i++) + { + x1_q_fx[i] = Q_factor_L(diffuseness_vector[i]); + x1_fx[i] = (Word32)(diffuseness_vector[i] * (W_shl(1, x1_q_fx[i]))); + x2_q_fx[i] = Q_factor_L(vertical_diffuseness_vector[i]); + x2_fx[i] = (Word32)(vertical_diffuseness_vector[i] * (W_shl(1, x2_q_fx[i]))); + } + ///////////////////////////////////////////////////////////////////////// + + v_min_fx((const Word32 *)x1_fx, x1_q_fx, (const Word32 *)x2_fx, x2_q_fx, y_fx, y_q_fx, num_freq_bands ); + + //////////////////////// to be removed //////////////////////////////// + FOR(i = 0; i < num_freq_bands; i++) + { + diffuseness_vector[i] = (Float32)y_fx[i] / (W_shl(1, y_q_fx[i])); + } + /////////////////////////////////////////////////////////////////////// +#else v_min( diffuseness_vector, vertical_diffuseness_vector, diffuseness_vector, num_freq_bands ); +#endif } for ( band_m_idx = 0; band_m_idx < hMcMasa->nbands; band_m_idx++ ) diff --git a/lib_enc/speech_music_classif.c b/lib_enc/speech_music_classif.c index 3fe6fe53b..d529e73a6 100644 --- a/lib_enc/speech_music_classif.c +++ b/lib_enc/speech_music_classif.c @@ -1350,7 +1350,57 @@ int16_t ivas_smc_gmm( pt_mel_fb += len; } +#ifdef IVAS_FLOAT_FIXED + //////////////////// to be removed ////////////////////// + Word32 y_fx[NB_MEL_BANDS]; + Word32 x_fx[NB_MEL_BANDS]; + Word32 A_fx[NB_MEL_BANDS * NB_MEL_COEF]; + Word16 y_q_fx[NB_MEL_BANDS]; + Word16 x_q_fx[NB_MEL_BANDS]; + Word16 A_q_fx[NB_MEL_BANDS * NB_MEL_COEF]; + Word32 *pt_x_fx, *pt_A_fx; + const Float32 *pt_x, *pt_A; + Word16 *pt_x_q_fx, *pt_A_q_fx; + + pt_A_fx = A_fx; + pt_A_q_fx = A_q_fx; + pt_A = dct_mtx; + + FOR( i = 0; i < NB_MEL_COEF; i++ ) + { + pt_x = melS; + pt_x_fx = x_fx; + pt_x_q_fx = x_q_fx; + FOR( j = 0; j < NB_MEL_BANDS; j++ ) + { + IF( EQ_16( i, 0 ) ) + { + *pt_x_q_fx = sub( Q_factor_L( *pt_x ), 3 ); + *pt_x_fx++ = (Word32) ( *pt_x++ * ( W_shl( 1, *pt_x_q_fx++ ) ) ); + } + *pt_A_q_fx = sub( Q_factor_L( *pt_A ), 3 ); + *pt_A_fx++ = (Word32) ( *pt_A++ * ( W_shl( 1, *pt_A_q_fx++ ) ) ); + } + } + + v_mult_mat_fx( y_fx, y_q_fx, (const Word32 *) x_fx, x_q_fx, (const Word32 *) A_fx, A_q_fx, NB_MEL_BANDS, NB_MEL_COEF ); + + ////////////////////////////// to be removed //////////////////////// + FOR( i = 0; i < NB_MEL_COEF; i++ ) + { + IF( LT_16( y_q_fx[i], 0 ) ) + { + mfcc[i] = (Float32) y_fx[i] * W_shl( 1, -y_q_fx[i] ); + } + ELSE + { + mfcc[i] = (Float32) y_fx[i] / W_shl( 1, y_q_fx[i] ); + } + } + //////////////////////////////////////////////////////////////////// +#else v_mult_mat( mfcc, melS, dct_mtx, NB_MEL_BANDS, NB_MEL_COEF ); +#endif *pFV++ = mfcc[2]; *pFV++ = mfcc[6]; -- GitLab From ffbb3cc30c1e7131e33acaf4bc560e2bde2c6a87 Mon Sep 17 00:00:00 2001 From: Sandesh Venkatesh Date: Mon, 8 Jul 2024 21:17:19 +0530 Subject: [PATCH 2/2] clang formatting changes --- lib_com/ivas_tools.c | 67 ++++++++++++++++++++++++++++++++------- lib_enc/ivas_mcmasa_enc.c | 16 +++++----- 2 files changed, 63 insertions(+), 20 deletions(-) diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c index 0e24db967..e42efb441 100644 --- a/lib_com/ivas_tools.c +++ b/lib_com/ivas_tools.c @@ -2555,21 +2555,64 @@ void lls_interp_n_fx( { Word16 i; const Word16 n_i_fx[11] = { 0, 2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, 18432, 20480 }; // Q11 - move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); const Word16 one_by_n_fx[11] = { 0, 32767, 16384, 10911, 8192, 6553, 5459, 4681, 4096, 3640, 3276 }; - move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); const Word16 sum_i_fx[12] = { 0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55 }; - move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); // 1.0f/ ( N * sum_ii[N] - sum_i[N] * sum_i[N] ) const Word32 res_table[12] = { 0, 0, 0, 357913952, 107374184, 42949672, 20452226, 10956549, 6391320, 3976821, 2603010, 385 }; - move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); + move16(); Word32 sum_x_fx, sum_ix_fx, slope_fx, offset_fx; Word16 dot_exp = 0, sum_ix_q = 0; - move16(); move16(); + move16(); + move16(); Word32 num; assert( N > 0 && LE_16( N, 10 ) ); @@ -3002,8 +3045,8 @@ Word16 is_SIDrate( test(); test(); if ( EQ_32( ivas_total_brate, SID_1k75 ) || - EQ_32( ivas_total_brate, SID_2k40 ) || - EQ_32( ivas_total_brate, IVAS_SID_5k2 ) ) + EQ_32( ivas_total_brate, SID_2k40 ) || + EQ_32( ivas_total_brate, IVAS_SID_5k2 ) ) { sid_rate_flag = 1; move16(); @@ -3373,9 +3416,9 @@ Word16 ceil_log_2( *-------------------------------------------------------------------*/ Word64 var_32_fx( - const Word32 *x, /* i : input vector */ - const Word16 len, /* i : length of inputvector */ - Word16 q /* q : q-factor for the array */ + const Word32 *x, /* i : input vector */ + const Word16 len, /* i : length of inputvector */ + Word16 q /* q : q-factor for the array */ ) { @@ -3391,7 +3434,7 @@ Word64 var_32_fx( mean = W_add( mean, x[i] ); } - mean = mean / len; /* NOTE: No BASOP for 64 bit division */ + mean = mean / len; /* NOTE: No BASOP for 64 bit division */ FOR( int i = 0; i < len; i++ ) { @@ -3400,7 +3443,7 @@ Word64 var_32_fx( var = W_shl( var, sub( 31, q ) ); - var = var / len; /* NOTE: No BASOP for 64 bit division */ + var = var / len; /* NOTE: No BASOP for 64 bit division */ return var; } diff --git a/lib_enc/ivas_mcmasa_enc.c b/lib_enc/ivas_mcmasa_enc.c index a4bcf14f6..3748ef6a4 100644 --- a/lib_enc/ivas_mcmasa_enc.c +++ b/lib_enc/ivas_mcmasa_enc.c @@ -1026,21 +1026,21 @@ void ivas_mcmasa_param_est_enc( Word16 x1_q_fx[MASA_FREQUENCY_BANDS]; Word16 x2_q_fx[MASA_FREQUENCY_BANDS]; Word16 y_q_fx[MASA_FREQUENCY_BANDS]; - FOR(i = 0; i < num_freq_bands; i++) + FOR( i = 0; i < num_freq_bands; i++ ) { - x1_q_fx[i] = Q_factor_L(diffuseness_vector[i]); - x1_fx[i] = (Word32)(diffuseness_vector[i] * (W_shl(1, x1_q_fx[i]))); - x2_q_fx[i] = Q_factor_L(vertical_diffuseness_vector[i]); - x2_fx[i] = (Word32)(vertical_diffuseness_vector[i] * (W_shl(1, x2_q_fx[i]))); + x1_q_fx[i] = Q_factor_L( diffuseness_vector[i] ); + x1_fx[i] = (Word32) ( diffuseness_vector[i] * ( W_shl( 1, x1_q_fx[i] ) ) ); + x2_q_fx[i] = Q_factor_L( vertical_diffuseness_vector[i] ); + x2_fx[i] = (Word32) ( vertical_diffuseness_vector[i] * ( W_shl( 1, x2_q_fx[i] ) ) ); } ///////////////////////////////////////////////////////////////////////// - v_min_fx((const Word32 *)x1_fx, x1_q_fx, (const Word32 *)x2_fx, x2_q_fx, y_fx, y_q_fx, num_freq_bands ); + v_min_fx( (const Word32 *) x1_fx, x1_q_fx, (const Word32 *) x2_fx, x2_q_fx, y_fx, y_q_fx, num_freq_bands ); //////////////////////// to be removed //////////////////////////////// - FOR(i = 0; i < num_freq_bands; i++) + FOR( i = 0; i < num_freq_bands; i++ ) { - diffuseness_vector[i] = (Float32)y_fx[i] / (W_shl(1, y_q_fx[i])); + diffuseness_vector[i] = (Float32) y_fx[i] / ( W_shl( 1, y_q_fx[i] ) ); } /////////////////////////////////////////////////////////////////////// #else -- GitLab