/****************************************************************************************************** (C) 2022 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB, Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other contributors to this repository. All Rights Reserved. This software is protected by copyright law and by international treaties. The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB, Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD., Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange, Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other contributors to this repository retain full ownership rights in their respective contributions in the software. This notice grants no license of any kind, including but not limited to patent license, nor is any license granted by implication, estoppel or otherwise. Contributors are required to enter into the IVAS codec Public Collaboration agreement before making contributions. This software is provided "AS IS", without any express or implied warranties. The software is in the development stage. It is intended exclusively for experts who have experience with such software and solely for the purpose of inspection. All implied warranties of non-infringement, merchantability and fitness for a particular purpose are hereby disclaimed and excluded. Any dispute, controversy or claim arising under or in relation to providing this software shall be submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and the United Nations Convention on Contracts on the International Sales of Goods. *******************************************************************************************************/ /*==================================================================================== EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0 ====================================================================================*/ #include #include "options.h" #ifdef DEBUGGING #include "debug.h" #endif #include #include "rom_dec.h" #include "rom_com.h" #include "cnst.h" #include "prot.h" #include "wmops.h" /*---------------------------------------------------------------------* * Local constants *---------------------------------------------------------------------*/ #define FEC_MAX 512 #define FEC_NB_PULSE_MAX 20 #define FEC_FFT_MAX_SIZE 512 #define FEC_DCIM_FILT_SIZE_MAX 60 #define PHASE_DITH ( PI2 ) #define DELTA_CORR 6 /* Range for phase correction around peak */ #define THRESH_TR_dB 10.0f #define THRESH_TR_LIN (float) pow( 10.0f, THRESH_TR_dB / 10.0f ) #define THRESH_TR_LIN_INV (float) pow( 10.0f, -THRESH_TR_dB / 10.0f ) #define MAX_INCREASE_GRPOW 0.0f /* maximum amplification in case of transients */ #define MAX_INCREASE_GRPOW_LIN (float) pow( 10.0f, MAX_INCREASE_GRPOW / 10.0f ) #define PHASE_DITH_SCALE (float) pow( 2.0, -16.0 ) /* for scaling random short values to +/- pi */ #define BURST_PHDITH_THRESH ( 4 - 1 ) /* speech start phase dither with losses in a row */ #define BURST_PHDITH_RAMPUP_LEN 2 /* speech ramp up degree of phase dither over a length of frames */ #define BURST_ATT_THRESH ( 3 - 1 ) /* speech start attenuate with losses in a row */ #define ATT_PER_FRAME 4 /* speech attenuation in dB */ #define BETA_MUTE_THR 10 /* time threshold to start beta-noise attenuation */ #define BETA_MUTE_FAC 0.5f /* attenuation factor per additional bad frame */ #define LGW32k 7 #define LGW16k 6 #define LGW48k LGW32k + 1 /* Use the same frequency groups as for SWB + 1 */ #define L_TRANA_LOG32k 8 #define L_TRANA_LOG16k 7 #define DELTA_CORR_F0_INT 2 /* Constant controls the bin range where Jacobsen is used */ #define ST_PFIND_SENS 0.93f /* peakfinder sensitivity */ #define L_PROT_NS 32000000L /* Prototype frame length in nanoseconds (32 ms) */ #define PH_ECU_CORR_LIMIT 0.85f /* Correlation limit for IVAS Phase ECU activation */ #define PH_ECU_N_LIMIT 56 /* fec_alg analysis frame limit for IVAS Phase ECU activation */ #define PFIND_SENS 0.97f /* peakfinder sensitivity */ /*---------------------------------------------------------------------* * Local functions *---------------------------------------------------------------------*/ static int16_t rand_phase( const int16_t seed, float *sin_F, float *cos_F ); static float imax2_jacobsen_mag( const float *y_re, const float *y_im ); /*-------------------------------------------------------------------* * mult_rev2() * * Multiplication of two vectors second vector is multiplied in reverse order *-------------------------------------------------------------------*/ static void mult_rev2( const float x1[], /* i : Input vector 1 */ const float x2[], /* i : Input vector 2 */ float y[], /* o : Output vector that contains vector 1 .* vector 2 */ const int16_t N /* i : Vector length */ ) { int16_t i, j; for ( i = 0, j = N - 1; i < N; i++, j-- ) { y[i] = x1[i] * x2[j]; } return; } /*-------------------------------------------------------------------* * fft_spec2() * * Square magnitude of fft spectrum *-------------------------------------------------------------------*/ static void fft_spec2( float x[], /* i/o: Input vector: complex spectrum -> square magnitude spectrum */ const int16_t N /* i : Vector length */ ) { int16_t i, j; for ( i = 1, j = N - 1; i < N / 2; i++, j-- ) { x[i] = x[i] * x[i] + x[j] * x[j]; } x[0] *= x[0]; x[N / 2] *= x[N / 2]; return; } /*------------------------------------------------------------------* * rand_phase() * * randomized phase in form of sin and cos components *------------------------------------------------------------------*/ /*! r: Updated seed from RNG */ static int16_t rand_phase( const int16_t seed, /* i : RNG seed */ float *sin_F, /* o : random phase sin value */ float *cos_F /* o : random phase cos value */ ) { const float *sincos = sincos_t_ext + 128; int16_t seed2 = seed; own_random( &seed2 ); if ( seed2 & 0x40 ) { *sin_F = sincos[seed2 >> 8]; } else { *sin_F = -sincos[seed2 >> 8]; } if ( seed2 & 0x80 ) { *cos_F = sincos[-( seed2 >> 8 )]; } else { *cos_F = -sincos[-( seed2 >> 8 )]; } return seed2; } /*----------------------------------------------------------------------------- * imax2_jacobsen_mag() * * refine peak interpolation using jacobsen and periodic speca ana windows *----------------------------------------------------------------------------*/ /*! r: The location, relative to the middle of the 3 given data point, of the maximum. (Q15)*/ float imax2_jacobsen_mag( const float *y_re, /* i : The 3 given data points. real part order -1 0 1 */ const float *y_im /* i : The 3 given data points. imag part order 1 0 -1 (from FFT) */ ) { float posi; const float *pY; float y_m1_re, y_0_re, y_p1_re; float y_m1_im, y_0_im, y_p1_im; float N_re, N_im; float D_re, D_im; float numer, denom; /* Jacobsen estimates peak offset relative y_0 using * X_m1 - X_p1 * d = REAL ( ------------------- ) * c_jacob * 2*X_0 - X_m1 -Xp1 * * Where c_jacob is a window dependent constant */ #define C_JACOB 1.1453f /* % assume 0.1875 hammrect window 'symmetric' */ /* Get the bin parameters into variables */ pY = y_re; y_m1_re = *pY++; y_0_re = *pY++; y_p1_re = *pY++; /* Same for imaginary parts - note reverse order from FFT */ pY = y_im; y_p1_im = *pY++; y_0_im = *pY++; y_m1_im = *pY++; /* prepare numerator real and imaginary parts*/ N_re = y_m1_re - y_p1_re; N_im = y_m1_im - y_p1_im; /* prepare denominator real and imaginary parts */ D_re = 2 * y_0_re - y_m1_re - y_p1_re; D_im = 2 * y_0_im - y_m1_im - y_p1_im; /* REAL part of complex division */ numer = N_re * D_re + N_im * D_im; denom = D_re * D_re + D_im * D_im; test(); if ( numer != 0 && denom != 0 ) { posi = numer / denom * C_JACOB; } else { posi = 0; /* flat top, division is not possible choose center freq */ } return posi; } /*------------------------------------------------------------------* * trans_ana() * * Transient analysis *------------------------------------------------------------------*/ static void trans_ana( const float *xfp, /* i : Input signal */ float *mag_chg, /* i/o: Magnitude modification */ float *ph_dith, /* i/o: Phase dither */ float *mag_chg_1st, /* i/o: per band magnitude modifier for transients */ const int16_t output_frame, /* i : Frame length */ const int16_t time_offs, /* i : Time offset */ const float est_mus_content, /* i : 0.0=speech_like ... 1.0=Music (==st->env_stab ) */ const int16_t last_fec, /* i : signal that previous frame was concealed with fec_alg*/ float *alpha, /* o : Magnitude modification factors for fade to average */ float *beta, /* o : Magnitude modification factors for fade to average */ float *beta_mute, /* o : Factor for long-term mute */ float Xavg[LGW_MAX] /* o : Frequency group average gain to fade to */ ) { const float *w_hamm; float grp_pow_chg, att_val, att_degree; float xfp_left[L_TRANA48k], xfp_right[L_TRANA48k]; float gr_pow_left[LGW_MAX], gr_pow_right[LGW_MAX]; const float *xfp_; int16_t Ltrana, Ltrana_2, Lprot, LtranaLog = 0, Lgw, k, burst_len; int16_t att_always[LGW_MAX]; /* fixed attenuation per frequency group if set to 1*/ int16_t burst_phdith_thresh; int16_t burst_att_thresh; float att_per_frame; int16_t tr_dec[LGW_MAX]; /* check burst error */ burst_len = time_offs / output_frame + 1; set_s( att_always, 0, LGW_MAX ); *ph_dith = 0.0f; /* softly shift attenuation just a bit later for estimated "stable" music_content */ burst_phdith_thresh = BURST_PHDITH_THRESH + (int16_t) ( est_mus_content * 1.0f + 0.5f ); burst_att_thresh = BURST_ATT_THRESH + (int16_t) ( est_mus_content * 1.0f + 0.5f ); att_per_frame = (float) ( ATT_PER_FRAME - (int16_t) ( est_mus_content * 1.0f + 0.5f ) ); /* only slighty less att for music */ att_per_frame *= 0.1f; if ( burst_len > burst_phdith_thresh ) { /* increase degree of dither */ *ph_dith = PHASE_DITH * min( 1.0f, ( (float) burst_len - (float) burst_phdith_thresh ) / (float) BURST_PHDITH_RAMPUP_LEN ); } att_degree = 0; if ( burst_len > burst_att_thresh ) { set_s( att_always, 1, LGW_MAX ); /* increase degree of attenuation */ if ( burst_len - burst_att_thresh <= PH_ECU_MUTE_START ) { att_degree = (float) ( burst_len - burst_att_thresh ) * att_per_frame; } else { att_degree = (float) PH_ECU_MUTE_START * att_per_frame + ( burst_len - burst_att_thresh - PH_ECU_MUTE_START ) * 6.0206f; } } Lprot = ( 2 * output_frame * 4 ) / 5; /* 4/5==1024/1280, keep mult within short */ Ltrana = Lprot / QUOT_LPR_LTR; Ltrana_2 = Ltrana / 2; if ( output_frame == L_FRAME48k ) { w_hamm = w_hamm48k_2; Lgw = LGW48k; } else if ( output_frame == L_FRAME32k ) { w_hamm = w_hamm32k_2; LtranaLog = L_TRANA_LOG32k; Lgw = LGW32k; } else { w_hamm = w_hamm16k_2; LtranaLog = L_TRANA_LOG16k; Lgw = LGW16k; } if ( burst_len <= 1 || ( burst_len == 2 && last_fec ) ) { set_f( alpha, 1.0f, LGW_MAX ); set_f( beta, 0.0f, LGW_MAX ); *beta_mute = BETA_MUTE_FAC_INI; /* apply hamming window */ v_mult( xfp, w_hamm, xfp_left, Ltrana_2 ); mult_rev2( xfp + Ltrana_2, w_hamm, xfp_left + Ltrana_2, Ltrana_2 ); xfp_ = xfp + Lprot - Ltrana; v_mult( xfp_, w_hamm, xfp_right, Ltrana_2 ); mult_rev2( xfp_ + Ltrana_2, w_hamm, xfp_right + Ltrana_2, Ltrana_2 ); /* spectrum */ if ( output_frame == L_FRAME48k ) { fft3( xfp_left, xfp_left, Ltrana ); fft3( xfp_right, xfp_right, Ltrana ); } else { fft_rel( xfp_left, Ltrana, LtranaLog ); fft_rel( xfp_right, Ltrana, LtranaLog ); } /* square representation */ fft_spec2( xfp_left, Ltrana ); fft_spec2( xfp_right, Ltrana ); /* band powers in frequency groups exclude bin at 0 and at EVS_PI from calculation */ xfp_left[Ltrana_2] = 0.0f; xfp_right[Ltrana_2] = 0.0f; } for ( k = 0; k < Lgw; k++ ) { if ( burst_len <= 1 || ( burst_len == 2 && last_fec ) ) { gr_pow_left[k] = sum_f( xfp_left + gw[k], gw[k + 1] - gw[k] ); gr_pow_right[k] = sum_f( xfp_right + gw[k], gw[k + 1] - gw[k] ); /* check if transient in any of the bands */ gr_pow_left[k] += FLT_MIN; /* otherwise div by zero may occur */ gr_pow_right[k] += FLT_MIN; Xavg[k] = (float) ( sqrt( 0.5f * ( gr_pow_left[k] + gr_pow_right[k] ) / (float) ( gw[k + 1] - gw[k] ) ) ); grp_pow_chg = gr_pow_right[k] / gr_pow_left[k]; /* dither phase in case of transient */ /* separate transition detection and application of forced burst dithering */ tr_dec[k] = ( grp_pow_chg > THRESH_TR_LIN ) || ( grp_pow_chg < THRESH_TR_LIN_INV ); /* magnitude modification */ if ( tr_dec[k] || att_always[k] ) { att_val = min( MAX_INCREASE_GRPOW_LIN, grp_pow_chg ); att_val = (float) sqrt( att_val ); mag_chg_1st[k] = att_val; mag_chg[k] = att_val; } else { mag_chg_1st[k] = 1.0f; mag_chg[k] = 1.0f; } } else { if ( burst_len < OFF_FRAMES_LIMIT ) { mag_chg[k] = mag_chg_1st[k] * (float) pow( 10.0, -att_degree / 20.0 ); } else { mag_chg[k] = 0; } if ( burst_len > BETA_MUTE_THR ) { *beta_mute *= BETA_MUTE_FAC; } alpha[k] = mag_chg[k]; beta[k] = (float) ( sqrt( 1.0f - SQR( alpha[k] ) ) * *beta_mute ); if ( k >= LGW32k - 1 ) { beta[k] *= 0.1f; } else if ( k >= LGW16k - 1 ) { beta[k] *= 0.5f; } } } return; } /*------------------------------------------------------------------* * peakfinder() * * Peak-picking algorithm *------------------------------------------------------------------*/ void peakfinder( const float *x0, /* i : vector from which the maxima will be found */ const int16_t len0, /* i : length of input vector */ int16_t *plocs, /* o : the indicies of the identified peaks in x0 */ int16_t *cInd, /* o : number of identified peaks */ const float sel, /* i : The amount above surrounding data for a peak to be identified */ const int16_t endpoints /* i : Flag to include endpoints in peak search */ ) { float minMag, tempMag, leftMin; float dx0[L_PROT48k_2], x[L_PROT48k_2 + 1], peakMag[MAX_PLOCS]; int16_t k, i, len, tempLoc = 0, foundPeak, ii, xInd; int16_t *ind, indarr[L_PROT48k_2 + 1], peakLoc[MAX_PLOCS]; ind = indarr; /* Find derivative */ v_sub( x0 + 1, x0, dx0, len0 - 1 ); /* This is so we find the first of repeated values */ for ( i = 0; i < len0 - 1; i++ ) { if ( dx0[i] == 0.0f ) { dx0[i] = -1.0e-12f; } } /* Find where the derivative changes sign Include endpoints in potential peaks and valleys */ k = 0; if ( endpoints ) { x[k] = x0[0]; ind[k++] = 0; } for ( i = 1; i < len0 - 1; i++ ) { if ( dx0[i - 1] * dx0[i] < 0 ) { ind[k] = i; x[k++] = x0[i]; } } if ( endpoints ) { ind[k] = len0 - 1; x[k++] = x0[len0 - 1]; } /* x only has the peaks, valleys, and endpoints */ len = k; minimum( x, len, &minMag ); if ( ( len > 2 ) || ( !endpoints && ( len > 0 ) ) ) { /* Set initial parameters for loop */ tempMag = minMag; foundPeak = 0; leftMin = minMag; if ( endpoints ) { /* Deal with first point a little differently since tacked it on Calculate the sign of the derivative since we taked the first point on it does not necessarily alternate like the rest. */ /* The first point is larger or equal to the second */ if ( x[0] >= x[1] ) { ii = -1; if ( x[1] >= x[2] ) /* x[1] is not extremum -> overwrite with x[0] */ { x[1] = x[0]; ind[1] = ind[0]; ind++; len--; } } else /* First point is smaller than the second */ { ii = 0; if ( x[1] < x[2] ) /* x[1] is not extremum -> overwrite with x[0] */ { x[1] = x[0]; ind[1] = ind[0]; ind++; len--; } } } else { ii = -1; /* First point is a peak */ if ( len >= 2 ) { if ( x[1] >= x[0] ) { ii = 0; /* First point is a valley, skip it */ } } } *cInd = 0; /* Loop through extrema which should be peaks and then valleys */ while ( ii < len - 1 ) { ii++; /* This is a peak */ /*Reset peak finding if we had a peak and the next peak is bigger than the last or the left min was small enough to reset.*/ if ( foundPeak ) { tempMag = minMag; foundPeak = 0; } /* Make sure we don't iterate past the length of our vector */ if ( ii == len - 1 ) { break; /* We assign the last point differently out of the loop */ } /* Found new peak that was larger than temp mag and selectivity larger than the minimum to its left. */ if ( ( x[ii] > tempMag ) && ( x[ii] > leftMin + sel ) ) { tempLoc = ii; tempMag = x[ii]; } ii++; /* Move onto the valley */ /* Come down at least sel from peak */ if ( !foundPeak && ( tempMag > sel + x[ii] ) ) { foundPeak = 1; /* We have found a peak */ leftMin = x[ii]; peakLoc[*cInd] = tempLoc; /* Add peak to index */ peakMag[*cInd] = tempMag; ( *cInd )++; } else if ( x[ii] < leftMin ) /* New left minimum */ { leftMin = x[ii]; } } /* Check end point */ if ( x[len - 1] > tempMag && x[len - 1] > leftMin + sel ) { peakLoc[*cInd] = len - 1; peakMag[*cInd] = x[len - 1]; ( *cInd )++; } else if ( !foundPeak && tempMag > minMag ) /* Check if we still need to add the last point */ { peakLoc[*cInd] = tempLoc; peakMag[*cInd] = tempMag; ( *cInd )++; } /* Create output */ for ( i = 0; i < *cInd; i++ ) { plocs[i] = ind[peakLoc[i]]; } } else { if ( endpoints ) { /* This is a monotone function where an endpoint is the only peak */ xInd = ( x[0] > x[1] ) ? 0 : 1; peakMag[0] = x[xInd]; if ( peakMag[0] > minMag + sel ) { plocs[0] = ind[xInd]; *cInd = 1; } else { *cInd = 0; } } else { /* Input constant or all zeros -- no peaks found */ *cInd = 0; } } return; } /*-------------------------------------------------------------------* * imax_pos() * * Get interpolated maximum position *-------------------------------------------------------------------*/ /*! r: interpolated maximum position */ float imax_pos( const float *y /* i : Input vector for peak interpolation */ ) { float posi, y1, y2, y3, y3_y1, y2i; float ftmp_den1, ftmp_den2; /* Seek the extrema of the parabola P(x) defined by 3 consecutive points so that P([-1 0 1]) = [y1 y2 y3] */ y1 = y[0]; y2 = y[1]; y3 = y[2]; y3_y1 = y3 - y1; ftmp_den1 = ( y1 + y3 - 2 * y2 ); ftmp_den2 = ( 4 * y2 - 2 * y1 - 2 * y3 ); if ( ftmp_den2 == 0.0f || ftmp_den1 == 0.0f ) { return ( 0.0f ); /* early exit with left-most value */ } y2i = -0.125f * SQR( y3_y1 ) / ( ftmp_den1 ) + y2; /* their corresponding normalized locations */ posi = y3_y1 / ( ftmp_den2 ); /* Interpolated maxima if locations are not within [-1,1], calculated extrema are ignored */ if ( posi >= 1.0f || posi <= -1.0f ) { posi = y3 > y1 ? 1.0f : -1.0f; } else { if ( y1 >= y2i ) { posi = ( y1 > y3 ) ? -1.0f : 1.0f; } else if ( y3 >= y2i ) { posi = 1.0f; } } return posi + 1.0f; } /*-------------------------------------------------------------------* * spec_ana() * * Spectral analysis *-------------------------------------------------------------------*/ static void spec_ana( const float *prevsynth, /* i : Input signal */ int16_t *plocs, /* o : The indicies of the identified peaks */ float *plocsi, /* o : Interpolated positions of the identified peaks */ int16_t *num_plocs, /* o : Number of identified peaks */ float *X_sav, /* o : Stored fft spectrum */ const int16_t output_frame, /* i : Frame length */ const int16_t bwidth, /* i : Encoded bandwidth */ const int16_t element_mode, /* i : IVAS element mode */ float *noise_fac, /* o : for few peaks zeroing valleys decision making */ const float pcorr ) { int16_t i, Lprot, LprotLog2 = 0, hamm_len2 = 0, Lprot2_1, m; float *pPlocsi; int16_t *pPlocs; int16_t currPlocs, endPlocs, Lprot2p1, nJacob; int16_t n, k; int16_t st_point; int16_t end_point; float sig, noise, nsr; float window_corr_step, window_corr; const float *w_hamm = NULL; float xfp[L_PROT48k]; float Xmax, Xmin, sel; int16_t stop_band_start; int16_t stop_band_length; Lprot = 2 * output_frame * L_PROT32k / 1280; Lprot2_1 = Lprot / 2 + 1; if ( output_frame == L_FRAME48k ) { w_hamm = w_hamm_sana48k_2; hamm_len2 = L_PROT_HAMM_LEN2_48k; } else if ( output_frame == L_FRAME32k ) { w_hamm = w_hamm_sana32k_2; hamm_len2 = L_PROT_HAMM_LEN2_32k; LprotLog2 = 10; } else { w_hamm = w_hamm_sana16k_2; hamm_len2 = L_PROT_HAMM_LEN2_16k; LprotLog2 = 9; } /* Apply hamming-rect window */ mvr2r( prevsynth + hamm_len2, xfp + hamm_len2, Lprot - 2 * hamm_len2 ); if ( element_mode == EVS_MONO ) { v_mult( prevsynth, w_hamm, xfp, hamm_len2 ); mult_rev2( prevsynth + Lprot - hamm_len2, w_hamm, xfp + Lprot - hamm_len2, hamm_len2 ); } else { window_corr = w_hamm[0]; window_corr_step = w_hamm[0] / hamm_len2; for ( i = 0; i < hamm_len2; i++ ) { xfp[i] = prevsynth[i] * ( w_hamm[i] - window_corr ); xfp[Lprot - i - 1] = prevsynth[Lprot - i - 1] * ( w_hamm[i] - window_corr ); window_corr -= window_corr_step; } } /* Spectrum */ if ( output_frame == L_FRAME48k ) { fft3( xfp, xfp, Lprot ); } else { fft_rel( xfp, Lprot, LprotLog2 ); } /* Apply zeroing of non-coded FFT spectrum */ if ( output_frame > inner_frame_tbl[bwidth] ) { stop_band_start = 128 << bwidth; stop_band_length = Lprot - ( stop_band_start << 1 ); stop_band_start = stop_band_start + 1; set_f( xfp + stop_band_start, 0, stop_band_length ); } mvr2r( xfp, X_sav, Lprot ); /* Magnitude representation */ fft_spec2( xfp, Lprot ); for ( i = 0; i < Lprot2_1; i++ ) { xfp[i] = (float) sqrt( (double) xfp[i] ); } /* Find maxima */ maximum( xfp, Lprot2_1, &Xmax ); minimum( xfp, Lprot2_1, &Xmin ); if ( element_mode == EVS_MONO ) { sel = ( Xmax - Xmin ) * ( 1.0f - PFIND_SENS ); } else { sel = ( Xmax - Xmin ) * ( 1.0f - ST_PFIND_SENS ); } peakfinder( xfp, Lprot2_1, plocs, num_plocs, sel, TRUE ); /* NB peak at xfp[0] and xfp Lprot2_1-1 may occur */ /* Currently not the pitch correlation but some LF correlation */ if ( element_mode != EVS_MONO && *num_plocs > 50 && pcorr < 0.6f ) { *num_plocs = 0; } if ( element_mode == EVS_MONO ) { /* Refine peaks */ for ( m = 0; m < *num_plocs; m++ ) { if ( plocs[m] == 0 ) { plocsi[m] = plocs[m] + imax_pos( &xfp[plocs[m]] ); } else if ( plocs[m] == Lprot / 2 ) { plocsi[m] = plocs[m] - 2 + imax_pos( &xfp[plocs[m] - 2] ); } else { plocsi[m] = plocs[m] - 1 + imax_pos( &xfp[plocs[m] - 1] ); } } } else { Lprot2p1 = Lprot / 2 + 1; /* Refine peaks */ pPlocsi = plocsi; pPlocs = plocs; n = *num_plocs; /* number of peaks to process */ /* Special case-- The very 1st peak if it is at 0 index position (DC) */ /* With DELTA_CORR_F0_INT == 2 one needs to handle both *pPlocs==0 and *pPlocs==1 */ if ( n > 0 && *pPlocs == 0 ) /* Very 1st peak position possible to have a peak at 0/DC index position. */ { *pPlocsi++ = *pPlocs + imax_pos( &xfp[*pPlocs] ); pPlocs++; n = n - 1; } if ( n > 0 && *pPlocs == 1 ) /* Also 2nd peak position uses DC which makes jacobsen unsuitable. */ { *pPlocsi++ = *pPlocs - 1 + imax_pos( &xfp[*pPlocs - 1] ); currPlocs = *pPlocs++; n = n - 1; } /* All remaining peaks except the very last two possible integer positions */ currPlocs = *pPlocs++; endPlocs = Lprot2p1 - DELTA_CORR_F0_INT; /* last *pPlocs position for Jacobsen */ /* precompute number of turns based on endpoint integer location and make into a proper for loop */ if ( n > 0 ) { nJacob = n; if ( sub( endPlocs, plocs[sub( *num_plocs, 1 )] ) <= 0 ) { nJacob = sub( nJacob, 1 ); } for ( k = 0; k < nJacob; k++ ) { *pPlocsi++ = currPlocs + imax2_jacobsen_mag( &( X_sav[currPlocs - 1] ), &( X_sav[Lprot - 1 - currPlocs] ) ); currPlocs = *pPlocs++; } n = n - nJacob; } /* At this point there should at most two plocs left to process */ /* the position before fs/2 and fs/2 both use the same magnitude points */ if ( n > 0 ) { /* [ . . . . . . . ] Lprot/2+1 positions */ /* | | | */ /* 0 (Lprot/2-2) (Lprot/2) */ if ( currPlocs == ( Lprot2p1 - DELTA_CORR_F0_INT ) ) /* Also 2nd last peak position uses fs/2 which makes jacobsen less suitable. */ { *pPlocsi++ = currPlocs - 1 + imax_pos( &xfp[currPlocs - 1] ); currPlocs = *pPlocs++; n = n - 1; } /* Here the only remaining point would be a fs/2 plocs */ /* pXfp = xfp + sub(Lprot2,1); already set just a reminder where it * whould point */ if ( n > 0 ) /* fs/2 which makes special case . */ { *pPlocsi++ = currPlocs - 2 + imax_pos( &xfp[currPlocs - 2] ); currPlocs = *pPlocs++; n = n - 1; } } /* For few peaks decide noise floor attenuation */ if ( *num_plocs < 3 && *num_plocs > 0 ) { sig = sum_f( xfp, Lprot2_1 ) + EPSILON; /*excluding peaks and neighboring bins*/ for ( i = 0; i < *num_plocs; i++ ) { st_point = max( 0, plocs[i] - DELTA_CORR ); end_point = min( Lprot2_1 - 1, plocs[i] + DELTA_CORR ); set_f( &xfp[st_point], 0.0f, end_point - st_point + 1 ); } noise = sum_f( xfp, Lprot2_1 ) + EPSILON; nsr = noise / sig; if ( nsr < 0.03f ) { *noise_fac = 0.5f; } else { *noise_fac = 1.0f; } } } return; } /*-------------------------------------------------------------------* * subst_spec() * * Substitution spectrum calculation *-------------------------------------------------------------------*/ static void subst_spec( const int16_t *plocs, /* i : The indicies of the identified peaks */ const float *plocsi, /* i : Interpolated positions of the identified peaks */ int16_t *num_plocs, /* i/o: Number of identified peaks */ const int16_t time_offs, /* i : Time offset */ float *X, /* i/o: FFT spectrum */ const float *mag_chg, /* i : Magnitude modification */ const float ph_dith, /* i : Phase dither */ const int16_t *is_trans, /* i : Transient flags */ const int16_t output_frame, /* i : Frame length */ int16_t *seed, /* i/o: Random seed */ const float *alpha, /* i : Magnitude modification factors for fade to average */ const float *beta, /* i : Magnitude modification factors for fade to average */ float beta_mute, /* i : Factor for long-term mute */ const float Xavg[LGW_MAX], /* i : Frequency group averages to fade to */ const int16_t element_mode, /* i : IVAS element mode */ const int16_t ph_ecu_lookahead, /* i : Phase ECU lookahead */ const float noise_fac /* i : noise factor */ ) { const float *sincos; int16_t Xph_short; float corr_phase[MAX_PLOCS], Xph; float Lprot_1, cos_F, sin_F, tmp; int16_t Lprot, Lecu, m, i, e, im_ind, delta_corr_up, delta_corr_dn, delta_tmp; float mag_chg_local; /* for peak attenuation in burst */ int16_t k; float one_peak_flag_mask; float alpha_local; float beta_local; sincos = sincos_t_ext + 128; Lprot = (int16_t) ( L_PROT32k * output_frame / 640 ); Lprot_1 = 1.0f / Lprot; Lecu = output_frame * 2; /* Correction phase of the identified peaks */ if ( is_trans[0] || is_trans[1] ) { *num_plocs = 0; } else { tmp = PI2 * ( Lecu - ( Lecu - Lprot ) / 2 + NS2SA( output_frame * FRAMES_PER_SEC, PH_ECU_ALDO_OLP2_NS ) - ph_ecu_lookahead - output_frame / 2 + time_offs ) * Lprot_1; for ( m = 0; m < *num_plocs; m++ ) { corr_phase[m] = plocsi[m] * tmp; } } one_peak_flag_mask = 1; /* all ones mask -> keep */ if ( element_mode != EVS_MONO ) { if ( ( *num_plocs > 0 ) && sub( *num_plocs, 3 ) < 0 ) { one_peak_flag_mask = noise_fac; /* all zeroes mask -> zero */ } if ( *num_plocs == 0 ) { X[0] = 0; /* reset DC if there are no peaks */ X[shr( Lprot, 1 )] = 0; /* also reset fs/2 if there are no peaks */ } } i = 1; k = 0; im_ind = Lprot - 1; for ( m = 0; m < *num_plocs; m++ ) { delta_corr_dn = DELTA_CORR; delta_corr_up = DELTA_CORR; if ( m > 0 ) { delta_tmp = ( plocs[m] - plocs[m - 1] - 1 ) / 2; if ( delta_tmp < DELTA_CORR ) { delta_corr_dn = delta_tmp; } } if ( m < *num_plocs - 1 ) { delta_tmp = ( plocs[m + 1] - plocs[m] - 1 ) / 2; if ( delta_tmp < DELTA_CORR ) { delta_corr_up = delta_tmp; } } /* Input Xph */ while ( i < plocs[m] - delta_corr_dn ) { *seed = own_random( seed ); if ( *seed & 0x40 ) { sin_F = sincos[*seed >> 8]; } else { sin_F = -sincos[*seed >> 8]; } if ( *seed & 0x80 ) { cos_F = sincos[-( *seed >> 8 )]; } else { cos_F = -sincos[-( *seed >> 8 )]; } if ( element_mode == EVS_MONO ) { tmp = ( X[i] * cos_F - X[im_ind] * sin_F ); X[im_ind] = ( X[i] * sin_F + X[im_ind] * cos_F ); } else { tmp = one_peak_flag_mask * ( X[i] * cos_F - X[im_ind] * sin_F ); X[im_ind] = one_peak_flag_mask * ( X[i] * sin_F + X[im_ind] * cos_F ); } if ( alpha[k] < 1.0f ) { *seed = rand_phase( *seed, &sin_F, &cos_F ); X[i] = alpha[k] * tmp + beta[k] * Xavg[k] * cos_F; X[im_ind] = alpha[k] * X[im_ind] + beta[k] * Xavg[k] * sin_F; } else { X[i] = mag_chg[k] * tmp; X[im_ind] *= mag_chg[k]; } i++; im_ind--; if ( i >= gwlpr[k + 1] ) { k++; } } e = plocs[m] + delta_corr_up; if ( e > Lprot / 2 - 1 ) { e = Lprot / 2 - 1; } Xph = corr_phase[m]; Xph_short = (int16_t) ( ( (int32_t) ( Xph * 512 / EVS_PI ) ) % 32768 ) & 0x03ff; if ( Xph_short >= 512 ) { sin_F = -sincos_t_ext[Xph_short - 512]; if ( Xph_short < 768 ) { cos_F = -sincos_t_ext[Xph_short - 512 + 256]; } else { cos_F = sincos_t_ext[-Xph_short + 1024 + 256]; } } else { sin_F = sincos_t_ext[Xph_short]; if ( Xph_short < 256 ) { cos_F = sincos_t_ext[Xph_short + 256]; } else { cos_F = -sincos_t_ext[-Xph_short + 256 + 512]; } } while ( i <= e ) { mag_chg_local = mag_chg[k]; if ( ph_dith != 0.0f ) { /* Call phase randomization only when needed */ Xph = corr_phase[m]; *seed = own_random( seed ); Xph += *seed * ph_dith * PHASE_DITH_SCALE; /* where ph_dith is 0..2PI, or -2PI (in transient), bin phase scaling factor from trans_ana */ if ( ph_dith > 0.0f ) { /* up to 6 dB additional att of peaks in non_transient longer bursts, (when peak phase is randomized) */ /* 0.5~= sqrt((float)pow(10.0,-6/10.0)); ph_dith= 0..2pi,--> scale=1.0 ...5 */ mag_chg_local *= 0.5f + ( 1.0f - ( 1.0f / PHASE_DITH ) * ph_dith ) * 0.5f; } Xph_short = (int16_t) ( Xph * 512 / EVS_PI ) & 0x03ff; if ( Xph_short >= 512 ) { sin_F = -sincos_t_ext[Xph_short - 512]; if ( Xph_short < 768 ) { cos_F = -sincos_t_ext[Xph_short - 512 + 256]; } else { cos_F = sincos_t_ext[-Xph_short + 1024 + 256]; } } else { sin_F = sincos_t_ext[Xph_short]; if ( Xph_short < 256 ) { cos_F = sincos_t_ext[Xph_short + 256]; } else { cos_F = -sincos_t_ext[-Xph_short + 256 + 512]; } } } tmp = ( X[i] * cos_F - X[im_ind] * sin_F ); X[im_ind] = ( X[i] * sin_F + X[im_ind] * cos_F ); if ( alpha[k] < 1.0f ) { alpha_local = mag_chg_local; beta_local = (float) ( beta_mute * sqrt( 1.0f - SQR( alpha_local ) ) ); if ( k >= LGW32k - 1 ) { beta_local *= 0.1f; } else if ( k >= LGW16k - 1 ) { beta_local *= 0.5f; } *seed = rand_phase( *seed, &sin_F, &cos_F ); X[i] = alpha_local * tmp + beta_local * Xavg[k] * cos_F; X[im_ind] = alpha_local * X[im_ind] + beta_local * Xavg[k] * sin_F; } else { X[i] = mag_chg_local * tmp; X[im_ind] *= mag_chg_local; } i++; im_ind--; if ( i >= gwlpr[k + 1] ) { k++; } } } while ( i < Lprot / 2 ) { *seed = own_random( seed ); if ( *seed & 0x40 ) { sin_F = sincos[*seed >> 8]; } else { sin_F = -sincos[*seed >> 8]; } if ( *seed & 0x80 ) { cos_F = sincos[-( *seed >> 8 )]; } else { cos_F = -sincos[-( *seed >> 8 )]; } if ( element_mode == EVS_MONO ) { tmp = ( X[i] * cos_F - X[im_ind] * sin_F ); X[im_ind] = ( X[i] * sin_F + X[im_ind] * cos_F ); } else { tmp = one_peak_flag_mask * ( X[i] * cos_F - X[im_ind] * sin_F ); X[im_ind] = one_peak_flag_mask * ( X[i] * sin_F + X[im_ind] * cos_F ); } if ( alpha[k] < 1.0f ) { *seed = rand_phase( *seed, &sin_F, &cos_F ); X[i] = alpha[k] * tmp + beta[k] * Xavg[k] * cos_F; X[im_ind] = alpha[k] * X[im_ind] + beta[k] * Xavg[k] * sin_F; im_ind--; } else { X[i] = mag_chg[k] * tmp; X[im_ind--] *= mag_chg[k]; } i++; if ( i >= gwlpr[k + 1] ) { k++; } } return; } /*-------------------------------------------------------------------------- * rec_wtda() * * Windowing and TDA of reconstructed frame *--------------------------------------------------------------------------*/ static void rec_wtda( float *X, /* i/o: ECU frame / unwindowed ECU frame */ float *ecu_rec, /* o : Reconstructed frame in tda domain */ const int16_t output_frame, /* i : Frame length */ const int16_t Lprot, /* i : Prototype frame length */ const float old_dec[270], /* i : end of last decoded for OLA before tda and itda */ const int16_t element_mode, /* i : IVAS element mode */ const int16_t *num_p, /* i : Number of peaks */ const int16_t *plocs /* i : Peak locations */ ) { int16_t timesh; int16_t xf_len; int16_t i; float *p_ecu; float g; float tbl_delta; float xsubst_[2 * L_FRAME48k]; const float *w_hamm; float *pX_start, *pX_end; float tmp; int16_t hamm_len2; float *pNew; const float *pOldW, *pNewW; float xfwin[NS2SA( L_FRAME48k * FRAMES_PER_SEC, N_ZERO_MDCT_NS - ( 2 * FRAME_SIZE_NS - L_PROT_NS ) / 2 )]; const float *pOld; int16_t copy_len; int16_t ola_len; copy_len = NS2SA( output_frame * FRAMES_PER_SEC, ( 2 * FRAME_SIZE_NS - L_PROT_NS ) / 2 ); /* prototype fill on each side of xsubst to fill MDCT Frame */ ola_len = NS2SA( output_frame * FRAMES_PER_SEC, N_ZERO_MDCT_NS - ( 2 * FRAME_SIZE_NS - L_PROT_NS ) / 2 ); /* remaining lengt of LA_ZEROS to overlap add decoded with xsubst */ if ( output_frame == L_FRAME48k ) { w_hamm = w_hamm_sana48k_2; hamm_len2 = L_PROT_HAMM_LEN2_48k; } else if ( output_frame == L_FRAME32k ) { w_hamm = w_hamm_sana32k_2; hamm_len2 = L_PROT_HAMM_LEN2_32k; } else { w_hamm = w_hamm_sana16k_2; hamm_len2 = L_PROT_HAMM_LEN2_16k; } if ( element_mode != EVS_MONO && *num_p > 0 && plocs[0] > 3 ) { /* Perform inverse windowing of hammrect */ pX_start = X; pX_end = X + Lprot - 1; for ( i = 0; i < hamm_len2; i++ ) { tmp = 1.0f / *w_hamm; *pX_start *= tmp; *pX_end *= tmp; pX_start++; pX_end--; w_hamm++; } } /* extract reconstructed frame with aldo window */ timesh = NS2SA( output_frame * FRAMES_PER_SEC, N_ZERO_MDCT_NS ) - ( 2 * output_frame - Lprot ) / 2; set_f( xsubst_, 0.0f, 2 * output_frame - Lprot + timesh ); mvr2r( X, xsubst_ + 2 * output_frame - Lprot + timesh, Lprot - timesh ); /* Copy and OLA look ahead zero part of MDCT window from decoded signal */ if ( element_mode != EVS_MONO ) { mvr2r( old_dec, xsubst_ + NS2SA( output_frame * FRAMES_PER_SEC, N_ZERO_MDCT_NS ), copy_len ); /* also need to scale to Q0 ?? */ pOld = old_dec + copy_len; pNew = xsubst_ + copy_len + NS2SA( output_frame * FRAMES_PER_SEC, N_ZERO_MDCT_NS ); sinq( EVS_PI / ( ola_len * 2 ), 0.0f, ola_len, xfwin ); v_mult( xfwin, xfwin, xfwin, ola_len ); /* xfwin = sin^2 of 0..pi/4 */ pOldW = xfwin + ola_len - 1; pNewW = xfwin; for ( i = 0; i < ola_len; i++ ) { *pNew = *pOld * *pOldW + *pNew * *pNewW; pOld += 1; pNew += 1; pOldW -= 1; pNewW += 1; } } else { /* Smoothen onset of ECU frame */ xf_len = (int16_t) ( (float) output_frame * N_ZERO_MDCT_NS / FRAME_SIZE_NS ) - ( output_frame - Lprot / 2 ); p_ecu = xsubst_ + 2 * output_frame - Lprot + timesh; tbl_delta = 64.f / xf_len; /* 64 samples = 1/4 cycle in sincos_t */ for ( i = 0; i < xf_len; i++, p_ecu++ ) { g = sincos_t[( (int16_t) ( i * tbl_delta ) )]; g *= g; *p_ecu = g * ( *p_ecu ); } } /* Apply TDA and windowing to ECU frame */ wtda( xsubst_ + output_frame, ecu_rec, NULL, ALDO_WINDOW, ALDO_WINDOW, output_frame ); return; } /*-------------------------------------------------------------------------- * rec_frame() * * Frame reconstruction *--------------------------------------------------------------------------*/ static void rec_frame( float *X, /* i/o: FFT spectrum / IFFT of spectrum */ float *ecu_rec, /* o : Reconstructed frame in tda domain */ const int16_t output_frame, /* i : Frame length */ const float *old_dec, /* i : end of last decoded for OLA before tda and itda */ const int16_t element_mode, /* i : IVAS element mode */ const int16_t *num_p, /* i : Number of peaks */ const int16_t *plocs /* i : Peak locations */ ) { int16_t Lprot, LprotLog2 = 0; Lprot = 2 * output_frame * L_PROT32k / 1280; if ( output_frame == L_FRAME48k ) { LprotLog2 = 9; } else if ( output_frame == L_FRAME32k ) { LprotLog2 = 10; } else { LprotLog2 = 9; } /* extend spectrum and IDFT */ if ( output_frame == L_FRAME48k ) { ifft3( X, X, Lprot ); } else { ifft_rel( X, Lprot, LprotLog2 ); } rec_wtda( X, ecu_rec, output_frame, Lprot, old_dec, element_mode, num_p, plocs ); return; } /*-------------------------------------------------------------------------- * fir_dwn() * * FIR downsampling filter *--------------------------------------------------------------------------*/ static void fir_dwn( const float x[], /* i : input vector */ const float h[], /* i : impulse response of the FIR filter */ float y[], /* o : output vector (result of filtering) */ const int16_t L, /* i : input vector size */ const int16_t K, /* i : order of the FIR filter (K+1 coefs.) */ const int16_t decimation /* i : decimation */ ) { float s; int16_t i, j, k, mmax; k = 0; /* do the filtering */ for ( i = K / 2; i < L; i += decimation ) { s = x[i] * h[0]; if ( i < K ) { mmax = i; } else { mmax = K; } for ( j = 1; j <= mmax; j++ ) { s += h[j] * x[i - j]; } y[k] = s; k++; } for ( ; i < L + K / 2; i += decimation ) { s = 0; for ( j = i - L + 1; j <= K; j++ ) { s += h[j] * x[i - j]; } y[k] = s; k++; } return; } /*-------------------------------------------------------------------------- * fec_ecu_pitch() * * Pitch/correlation analysis and adaptive analysis frame length calculation *--------------------------------------------------------------------------*/ static void fec_ecu_pitch( const float *prevsynth, /* i : previous synthesis */ float *prevsynth_LP, /* o : down-sampled synthesis */ const int16_t L, /* i : Output frame length */ int16_t *N, /* o : Analysis frame length in 8kHz */ float *min_corr, /* o : correlation */ int16_t *decimatefator, /* o : Decimation factor */ const int16_t HqVoicing /* i : Hq voicing flag */ ) { int16_t i, filt_size; float accA, accB, accC, Ryy; int16_t delay_ind, k, cb_start, cb_end, tmp_short, Lon20; float Asr_LP[FEC_DCIM_FILT_SIZE_MAX + 1]; switch ( L ) { case L_FRAME48k: *decimatefator = 6; filt_size = 60; mvr2r( Asr_LP48, Asr_LP, filt_size + 1 ); break; case L_FRAME32k: *decimatefator = 4; filt_size = 40; mvr2r( Asr_LP32, Asr_LP, filt_size + 1 ); break; case L_FRAME16k: *decimatefator = 2; filt_size = 20; mvr2r( Asr_LP16, Asr_LP, filt_size + 1 ); break; default: *decimatefator = 2; filt_size = 40; mvr2r( Asr_LP16, Asr_LP, filt_size + 1 ); break; } /* We need to inverse the ALDO window */ /* Resampling to work at 8Khz */ fir_dwn( prevsynth, Asr_LP, prevsynth_LP, 2 * L, filt_size, *decimatefator ); /* resampling without delay */ Lon20 = (int16_t) ( ( L / 20 ) / *decimatefator ); /* Correlation analysis */ *min_corr = 0; accC = 0; for ( k = 0; k < 6 * Lon20; k++ ) { accC += prevsynth_LP[34 * Lon20 + k] * prevsynth_LP[34 * Lon20 + k]; } if ( HqVoicing == 1 ) { cb_start = 0; cb_end = 33 * Lon20; } else { cb_start = 0; cb_end = 28 * Lon20; } tmp_short = 34 * Lon20; accB = 0; delay_ind = cb_start; for ( i = cb_start; i < cb_end; i++ ) /* cb_end = 35 let 6 ms min of loop size */ { accA = 0; if ( i == cb_start ) { accB = 0; for ( k = 0; k < 6 * Lon20; k++ ) { accA += prevsynth_LP[i + k] * prevsynth_LP[tmp_short + k]; accB += prevsynth_LP[i + k] * prevsynth_LP[i + k]; } } else { accB = accB - prevsynth_LP[i - 1] * prevsynth_LP[i - 1] + prevsynth_LP[i + 6 * Lon20 - 1] * prevsynth_LP[i + 6 * Lon20 - 1]; for ( k = 0; k < 6 * Lon20; k++ ) { accA += prevsynth_LP[i + k] * prevsynth_LP[tmp_short + k]; } } /* tests to avoid division by zero */ if ( accB <= 0 ) { accB = 1.0f; } if ( accC <= 0 ) { accC = 1.0f; } if ( accB * accC <= 0 ) { Ryy = accA; } else { Ryy = accA / (float) sqrt( ( accB * accC ) ); } if ( Ryy > *min_corr ) { *min_corr = Ryy; delay_ind = i; } if ( HqVoicing == 0 && *min_corr > 0.95f ) { break; } } *N = 40 * Lon20 - delay_ind - 6 * Lon20; return; } /*-------------------------------------------------------------------------- * fec_ecu_dft() * * DFT analysis on adaptive frame length. Analysis frame stretched to * next power of 2 using linear interpolation. *--------------------------------------------------------------------------*/ static void fec_ecu_dft( const float *prevsynth_LP, /* i : Downsampled past synthesis (2*160 samples) */ const int16_t N, /* i : Analysis frame length in 8 kHz (corr. max) */ float *Tfr, /* o : DFT coefficients, real part */ float *Tfi, /* o : DFT coefficients, imag part */ float *sum_Tf_abs, /* o : Sum of magnitude spectrum */ float *Tf_abs, /* o : Magnitude spectrum */ int16_t *Nfft, /* o : DFT analysis length 2^(nextpow2(N)) */ const int16_t element_mode /* i : IVAS element mode */ ) { float target[2 * L_FRAME8k], tmp; int16_t i, Lon20, tmp_short, N_LP, k; int16_t alignment_point; Lon20 = (int16_t) 160 / 20; if ( element_mode == EVS_MONO ) { alignment_point = 2 * 160 - 3 * Lon20; } else { alignment_point = 2 * 160; } for ( i = 0; i < N; i++ ) { target[i] = prevsynth_LP[alignment_point - N + i]; } /* DFT */ *sum_Tf_abs = 0; *Nfft = (int16_t) pow( 2, (int16_t) ceil( log( N ) / log( 2 ) ) ); tmp = ( (float) N - 1.0f ) / ( (float) *Nfft - 1.0f ); set_f( Tfr, 0.0f, *Nfft ); set_f( Tfi, 0.0f, *Nfft ); Tfr[0] = target[0]; Tfr[*Nfft - 1] = target[N - 1]; for ( i = 1; i < *Nfft - 1; i++ ) /* interpolation for FFT */ { tmp_short = (int16_t) floor( i * tmp ); Tfr[i] = target[tmp_short] + ( (float) i * tmp - ( (float) tmp_short ) ) * ( target[tmp_short + 1] - target[tmp_short] ); } DoRTFTn( Tfr, Tfi, *Nfft ); N_LP = (int16_t) ceil( *Nfft / 2 ); for ( k = 0; k < N_LP; k++ ) { Tf_abs[k] = (float) sqrt( Tfr[k] * Tfr[k] + Tfi[k] * Tfi[k] ); *sum_Tf_abs += Tf_abs[k]; } return; } /*--------------------------------------------------------------------------* * singenerator() * * fast cosinus generator Amp*cos(2*pi*freq+phi) *--------------------------------------------------------------------------*/ static void singenerator( const int16_t L, /* i : size of output */ const float cosfreq, /* i : cosine of 1-sample dephasing at the given frequency */ const float sinfreq, /* i : sine of 1-sample dephasing at the given frequency */ const float a_re, /* i : real part of complex spectral coefficient at the given frequency */ const float a_im, /* i : imag part of complex spectral coefficient at the given frequency */ float xx[] /* o : output vector */ ) { float *ptr, L_C0, L_S0, L_C1, L_S1; float C0, S0, C1, S1; int16_t i; L_C0 = a_re; S0 = a_im; ptr = xx; *ptr = *ptr + L_C0; ptr++; for ( i = 0; i < L / 2 - 1; i++ ) { C0 = L_C0; L_C1 = C0 * cosfreq; L_C1 = L_C1 - S0 * sinfreq; L_S1 = C0 * sinfreq; S1 = L_S1 + S0 * cosfreq; *ptr = *ptr + L_C1; ptr++; C1 = L_C1; L_C0 = C1 * cosfreq; L_C0 = L_C0 - S1 * sinfreq; L_S0 = C1 * sinfreq; S0 = L_S0 + S1 * cosfreq; *ptr = *ptr + L_C0; ptr++; } C0 = L_C0; L_C1 = C0 * cosfreq; L_C1 = L_C1 - S0 * sinfreq; *ptr = *ptr + L_C1; ptr++; return; } /*-------------------------------------------------------------------------- * sinusoidal_synthesis() * * ECU frame sinusoid generation *--------------------------------------------------------------------------*/ static void sinusoidal_synthesis( const float *Tfr, /* i : DFT coefficients, real part */ const float *Tfi, /* i : DFT coefficients, imag part */ float *Tf_abs, /* i : Magnitude spectrum */ const int16_t N, /* i : Analysis frame length in 8 kHz (corr. max) */ const int16_t L, /* i : Output frame length */ const int16_t decimate_factor, /* i : Downsampling factor */ const int16_t Nfft, /* i : DFT analysis length 2^(nextpow2(N)) */ const float sum_Tf_abs, /* i : Sum of magnitude spectrum */ float *synthesis, /* o : ECU sinusoidal synthesis */ const int16_t HqVoicing /* i : HQ voicing flag */ ) { int16_t i, k, nb_pulses, indmax = 0, nb_pulses_final; int16_t pulses[FEC_MAX / 2]; float a_re[FEC_NB_PULSE_MAX], a_im[FEC_NB_PULSE_MAX], cosfreq, sinfreq; float freq[FEC_NB_PULSE_MAX], tmp; float mmax, cumsum; int16_t Lon20 = 8; /* peak selection */ int16_t PL, cpt; float old, new_s; int16_t *p_pulses; int16_t glued; p_pulses = pulses; nb_pulses = 0; new_s = Tf_abs[1]; glued = 1; cpt = 0; old = 0; PL = 0; if ( N > Lon20 * 10 || HqVoicing ) { PL = 1; } while ( cpt <= N / 2 - 1 - 2 ) { if ( Tf_abs[cpt] > old && Tf_abs[cpt] > new_s ) { glued = cpt; for ( i = glued; i < cpt + PL + 1; i++ ) { *p_pulses++ = i; nb_pulses++; } old = Tf_abs[cpt + PL]; new_s = Tf_abs[cpt + 2 + PL]; cpt = cpt + PL + 1; glued = 1; } else { old = Tf_abs[cpt]; new_s = Tf_abs[cpt + 2]; cpt++; glued = 0; } } nb_pulses_final = 0; /* peak selection : keep the more energetics (max 20) */ tmp = 1.0f / (float) ( Nfft / 2 ); cumsum = 0; for ( i = 0; i < min( FEC_NB_PULSE_MAX, nb_pulses ); i++ ) { mmax = 0; for ( k = 0; k < nb_pulses; k++ ) { if ( Tf_abs[pulses[k]] > mmax ) { mmax = Tf_abs[pulses[k]]; indmax = pulses[k]; } } cumsum += Tf_abs[indmax]; if ( HqVoicing || cumsum < sum_Tf_abs * 0.7f ) { a_re[i] = Tfr[indmax] * tmp; a_im[i] = Tfi[indmax] * tmp; freq[i] = (float) indmax * 2 / ( (float) N ); nb_pulses_final++; Tf_abs[indmax] = -1; } else { a_re[i] = Tfr[indmax] * tmp; a_im[i] = Tfi[indmax] * tmp; freq[i] = (float) indmax * 2 / ( (float) N ); nb_pulses_final++; break; } } nb_pulses = nb_pulses_final; /* sinusoidal synthesis */ set_f( synthesis, 0.0f, 40 * L / 20 ); if ( HqVoicing ) { k = 40 * L / 20; } else { k = 40 * L / 20; } for ( i = 0; i < nb_pulses; i++ ) { cosfreq = (float) cos( EVS_PI * freq[i] / (float) decimate_factor ); sinfreq = (float) sin( EVS_PI * freq[i] / (float) decimate_factor ); singenerator( k, cosfreq, sinfreq, a_re[i], a_im[i], synthesis ); } return; } /*-------------------------------------------------------------------------- * fec_noise_filling() * * Adds noise component to ECU frame by * - subtracting the sinusoidal synthesis from the analysis frame * - copying noise segments at random indices and adding them together * with an overlap-add operation * Copying the beginning of the frame from the past synthesis and aligning * it to be inserted into wtda *--------------------------------------------------------------------------*/ static void fec_noise_filling( const float *prevsynth, /* i : Past synthesis buffer (length 2*L) */ float *synthesis, /* i/o: Sinusoidal ECU / Sinusoidal ECU + noise */ const int16_t L, /* i : Output frame length */ const int16_t N, /* i : Analysis frame length in 8 kHz (corr. max) */ const int16_t HqVoicing, /* i : HQ voicing flag */ float *gapsynth, /* o : Buffer for crossfade to good frame */ int16_t *ni_seed_forfec, /* i/o: Random seed for picking noise segments */ const int16_t element_mode, /* i : IVAS element mode */ const float *old_out ) { float SS[L_FRAME48k / 2], tmp; int16_t Rnd_N_noise; int16_t k, kk, i; int16_t N_noise; float noisevect[34 * L_FRAME48k / 20]; const float *p_mdct_ola; int16_t alignment_point; if ( element_mode == EVS_MONO ) { alignment_point = 2 * L - 3 * L / 20; } else { alignment_point = 2 * L; } mvr2r( prevsynth + alignment_point - N, noisevect, N ); /* Noise addition on full band */ /* residual */ if ( N < L ) { N_noise = ( N / 2 ); for ( k = 0; k < N; k++ ) { noisevect[k] = ( noisevect[k] - synthesis[k] ); } } else { N_noise = L / 2; for ( k = 0; k < L; k++ ) { noisevect[k] = ( noisevect[N - L + k] - synthesis[N - L + k] ); } } if ( HqVoicing ) { for ( i = 0; i < N; i++ ) { noisevect[i] *= 0.25f; } } kk = 0; k = 0; Rnd_N_noise = N_noise; while ( k < 2 * L ) { if ( kk == 0 ) { tmp = ( ( own_random( ni_seed_forfec ) / (PCM16_TO_FLT_FAC) *0.2f ) + 0.5f ); kk = 1; } else { tmp = ( ( own_random( ni_seed_forfec ) / (PCM16_TO_FLT_FAC) *0.3f ) + 0.7f ); kk = 0; } Rnd_N_noise = (int16_t) ( (float) N_noise * tmp ); sinq( (const float) EVS_PI / ( 2.0f * (float) Rnd_N_noise ), (const float) EVS_PI / ( 4.0f * (float) Rnd_N_noise ), (const int16_t) Rnd_N_noise, SS ); for ( i = 0; i < Rnd_N_noise; i++ ) { if ( k < 2 * L ) { synthesis[k] += ( noisevect[N_noise - Rnd_N_noise + i] * SS[i] + noisevect[N_noise + i] * SS[Rnd_N_noise - i - 1] ); /* *noisefact; */ } k++; } } if ( element_mode == EVS_MONO ) { kk = 7 * L / 20; p_mdct_ola = prevsynth + 37 * L / 20; } else { kk = NS2SA( L * FRAMES_PER_SEC, N_ZERO_MDCT_NS ); p_mdct_ola = old_out + kk; } /* overlappadd with the ms of valid mdct of the last frame */ sinq( EVS_PI / ( 6.0f * L / 20.0f ), EVS_PI / ( 12.0f * L / 20.0f ), 3 * L / 20, SS ); for ( k = 0; k < 3 * L / 20; k++ ) { tmp = SS[k] * SS[k]; synthesis[k] = p_mdct_ola[k] * ( 1 - tmp ) + synthesis[k] * tmp; } mvr2r( synthesis, synthesis + kk, 2 * L - kk ); mvr2r( synthesis + L, gapsynth, L ); mvr2r( prevsynth + alignment_point - kk, synthesis, kk ); return; } /*-------------------------------------------------------------------------- * fec_alg() * * Pitch based error-concealment algorithm with adaptive analysis frame * length *--------------------------------------------------------------------------*/ static void fec_alg( const float *prevsynth, /* i : previous synthesis */ const float *prevsynth_LP, /* i : down-sampled synthesis */ float *ecu_rec, /* o : ECU frame with Windowing/TDA */ const int16_t output_frame, /* i : Output frame length */ const int16_t N, /* i : Analysis frame length in 8 kHz (corr. max) */ const int16_t decimatefactor, /* i : Downsampling factor */ const int16_t HqVoicing, /* i : HQ voicing flag */ float *gapsynth, /* o : Buffer for crossfade to good frame */ int16_t *ni_seed_forfec, /* i/o: Random seed for picking noise segments */ const int16_t element_mode, /* i : IVAS element mode */ const float *old_out ) { int16_t n, Nfft; float sum_Tf_abs; float Tfr[FEC_FFT_MAX_SIZE]; float Tfi[FEC_FFT_MAX_SIZE]; float Tf_abs[FEC_FFT_MAX_SIZE / 2]; float synthesis[2 * L_FRAME48k]; fec_ecu_dft( prevsynth_LP, N, Tfr, Tfi, &sum_Tf_abs, Tf_abs, &Nfft, element_mode ); sinusoidal_synthesis( Tfr, Tfi, Tf_abs, N, output_frame, decimatefactor, Nfft, sum_Tf_abs, synthesis, HqVoicing ); fec_noise_filling( prevsynth, synthesis, output_frame, N * decimatefactor, HqVoicing, gapsynth, ni_seed_forfec, element_mode, old_out ); n = (int16_t) ( (float) output_frame * N_ZERO_MDCT_NS / FRAME_SIZE_NS ); wtda( synthesis + ( output_frame - n ), ecu_rec, NULL, ALDO_WINDOW, ALDO_WINDOW, output_frame ); return; } /*-------------------------------------------------------------------------- * hq_phase_ecu() * * Main routine for HQ phase ECU *--------------------------------------------------------------------------*/ static void hq_phase_ecu( const float *prevsynth, /* i : buffer of previously synthesized signal */ float *ecu_rec, /* o : reconstructed frame in tda domain */ int16_t *time_offs, /* i/o: Sample offset for consecutive frame losses*/ float *X_sav, /* i/o: Stored spectrum of prototype frame */ int16_t *num_p, /* i/o: Number of identified peaks */ int16_t *plocs, /* i/o: Peak locations */ float *plocsi, /* i/o: Interpolated peak locations */ const float env_stab, /* i : Envelope stability parameter */ int16_t *last_fec, /* i/o: Flag for usage of pitch dependent ECU */ const int16_t prev_bfi, /* i : indicating burst frame error */ const int16_t old_is_transient[2], /* i : flags indicating previous transient frames*/ float *mag_chg_1st, /* i/o: per band magnitude modifier for transients*/ float Xavg[LGW_MAX], /* i/o: Frequency group average gain to fade to */ float *beta_mute, /* o : Factor for long-term mute */ const int16_t bwidth, /* i : Encoded bandwidth */ const int16_t output_frame, /* i : frame length */ const float pcorr, /* i : pitch correlation */ const int16_t element_mode /* i : IVAS element mode */ ) { int16_t Lprot; float mag_chg[LGW_MAX], ph_dith, X[L_PROT48k]; int16_t seed; float alpha[LGW_MAX], beta[LGW_MAX]; const float *old_dec; float noise_fac; int16_t ph_ecu_lookahead; noise_fac = 1.0f; if ( element_mode == EVS_MONO ) { ph_ecu_lookahead = NS2SA( output_frame * FRAMES_PER_SEC, PH_ECU_LOOKAHEAD_NS ); } else { ph_ecu_lookahead = 0; } Lprot = ( 2 * output_frame * 4 ) / 5; if ( !prev_bfi || ( prev_bfi && *last_fec && ( *time_offs == output_frame ) ) ) { if ( prev_bfi && *last_fec && element_mode == EVS_MONO ) { *time_offs += 0; } else { *time_offs = 0; } trans_ana( prevsynth + 2 * output_frame - Lprot - *time_offs + ph_ecu_lookahead, mag_chg, &ph_dith, mag_chg_1st, output_frame, *time_offs, env_stab, *last_fec, alpha, beta, beta_mute, Xavg ); spec_ana( prevsynth + 2 * output_frame - Lprot - *time_offs + ph_ecu_lookahead, plocs, plocsi, num_p, X_sav, output_frame, bwidth, element_mode, &noise_fac, pcorr ); if ( prev_bfi && *last_fec ) { *time_offs += output_frame; } } else { *time_offs += output_frame; if ( *time_offs <= 0 ) { /* detect wrap around of st->time_offs */ *time_offs = MAX16B; /* continued muting will ensure that the now fixed seeds are not creating tones */ } trans_ana( prevsynth + 2 * output_frame - Lprot, mag_chg, &ph_dith, mag_chg_1st, output_frame, *time_offs, env_stab, 0, alpha, beta, beta_mute, Xavg ); /* 1.0 stable-music, 0.0 speech-like */ } mvr2r( X_sav, X, Lprot ); /* seed for own_rand2 */ seed = *time_offs; if ( *num_p > 0 ) { seed += plocs[*num_p - 1]; } subst_spec( plocs, plocsi, num_p, *time_offs, X, mag_chg, ph_dith, old_is_transient, output_frame, &seed, alpha, beta, *beta_mute, Xavg, element_mode, ph_ecu_lookahead, noise_fac ); /* reconstructed frame in tda domain */ old_dec = prevsynth + 2 * output_frame - NS2SA( output_frame * FRAMES_PER_SEC, N_ZERO_MDCT_NS ); rec_frame( X, ecu_rec, output_frame, old_dec, element_mode, num_p, plocs ); return; } /*-------------------------------------------------------------------------- * hq_ecu() * * Main routine for HQ ECU *--------------------------------------------------------------------------*/ void hq_ecu( const float *prevsynth, /* i : buffer of previously synthesized signal */ float *ecu_rec, /* o : reconstructed frame in tda domain */ int16_t *time_offs, /* i/o: Sample offset for consecutive frame losses*/ float *X_sav, /* i/o: Stored spectrum of prototype frame */ int16_t *num_p, /* i/o: Number of identified peaks */ int16_t *plocs, /* i/o: Peak locations */ float *plocsi, /* i/o: Interpolated peak locations */ const float env_stab, /* i : Envelope stability parameter */ int16_t *last_fec, /* i/o: Flag for usage of pitch dependent ECU */ const int16_t ph_ecu_HqVoicing, /* i : HQ Voicing flag */ int16_t *ph_ecu_active, /* i : Phase ECU active flag */ float *gapsynth, /* o : Gap synthesis for crossfade to good frame */ const int16_t prev_bfi, /* i : indicating burst frame error */ const int16_t old_is_transient[2], /* i : flags indicating previous transient frames*/ float *mag_chg_1st, /* i/o: per band magnitude modifier for transients*/ float Xavg[LGW_MAX], /* i/o: Frequency group average gain to fade to */ float *beta_mute, /* o : Factor for long-term mute */ const int16_t output_frame, /* i : frame length */ Decoder_State *st /* i/o: decoder state structure */ ) { int16_t N; float corr = 0.0f; int16_t decimatefactor; float prevsynth_LP[2 * L_FRAME8k]; HQ_DEC_HANDLE hHQ_core; const float *fec_alg_input; int16_t evs_mode_selection; int16_t ivas_mode_selection; hHQ_core = st->hHQ_core; if ( st->element_mode == EVS_MONO ) { fec_alg_input = prevsynth + NS2SA( output_frame * FRAMES_PER_SEC, ACELP_LOOK_NS / 2 - PH_ECU_LOOKAHEAD_NS ); } else { fec_alg_input = prevsynth - NS2SA( output_frame * FRAMES_PER_SEC, PH_ECU_LOOKAHEAD_NS ); } /* find pitch and R value */ if ( !( output_frame < L_FRAME16k ) ) { fec_ecu_pitch( fec_alg_input, prevsynth_LP, output_frame, &N, &corr, &decimatefactor, ph_ecu_HqVoicing ); } else { corr = 0.0f; decimatefactor = 4; N = output_frame / 4; } evs_mode_selection = ( st->total_brate >= 48000 && ( output_frame >= L_FRAME16k && !prev_bfi && ( !old_is_transient[0] || old_is_transient[1] ) && ( ph_ecu_HqVoicing || ( ( ( hHQ_core->env_stab_plc > 0.5 ) && ( corr < 0.6 ) ) || ( hHQ_core->env_stab_plc < 0.5 && ( corr > 0.85 ) ) ) ) ) ) || ( st->total_brate < 48000 && ( ( ph_ecu_HqVoicing || corr > 0.85 ) && !prev_bfi && ( !old_is_transient[0] || old_is_transient[1] ) ) ); ivas_mode_selection = ( N < PH_ECU_N_LIMIT ) || ( corr < PH_ECU_CORR_LIMIT ); if ( ( ( st->element_mode == EVS_MONO ) && evs_mode_selection ) || ( ( st->element_mode != EVS_MONO ) && evs_mode_selection && ivas_mode_selection ) ) { fec_alg( fec_alg_input, prevsynth_LP, ecu_rec, output_frame, N, decimatefactor, ph_ecu_HqVoicing, gapsynth, &hHQ_core->ni_seed_forfec, st->element_mode, st->hHQ_core->old_out ); *last_fec = 1; *ph_ecu_active = 0; *time_offs = output_frame; } else { hq_phase_ecu( prevsynth - NS2SA( output_frame * FRAMES_PER_SEC, PH_ECU_LOOKAHEAD_NS ), ecu_rec, time_offs, X_sav, num_p, plocs, plocsi, env_stab, last_fec, prev_bfi, old_is_transient, mag_chg_1st, Xavg, beta_mute, st->bwidth, output_frame, corr, st->element_mode ); *last_fec = 0; *ph_ecu_active = 1; } return; }