diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index 363237d9cfdb44a5ac608fc098ab1231b672e160..99dff9b37084259d10a4c85953977ff5a8537fb1 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -3459,6 +3459,13 @@ float sumAbs( const int16_t lvec /* i : length of input vector */ ); +#ifdef IVAS_FLOAT_FIXED +Word32 sumAbs_fx( + const Word32 *vec, /* i : input vector */ + const Word16 lvec /* i : length of input vector */ +); +#endif + void mvc2c( const uint8_t x[], /* i : input vector */ uint8_t y[], /* o : output vector */ @@ -3472,6 +3479,14 @@ float dot_product_cholesky( const int16_t N /* i : vector & matrix size */ ); +#ifdef IVAS_FLOAT_FIXED +Word32 dot_product_cholesky_fx( + const Word32 *x, /* i : vector x */ + const Word32 *A, /* i : Cholesky matrix A */ + const Word16 N /* i : vector & matrix size */ +); +#endif + #ifdef IVAS_FLOAT_FIXED void v_mult_mat_fx( Word32 *y_fx, /* o : the product x*A */ diff --git a/lib_com/ivas_tools.c b/lib_com/ivas_tools.c index dfc98c90deb164f1a52d12a6476fd6b8eada42e4..39de3240eb6a2060a81bb410b755e573ccdcc5f5 100644 --- a/lib_com/ivas_tools.c +++ b/lib_com/ivas_tools.c @@ -76,6 +76,26 @@ float sumAbs( return tmp; } +#ifdef IVAS_FLOAT_FIXED +Word32 sumAbs_fx( + const Word32 *vec, /* i : input vector */ + const Word16 lvec /* i : length of input vector */ +) +{ + Word16 i; + Word32 tmp; + + tmp = 0; + move32(); + FOR( i = 0; i < lvec; i++ ) + { + tmp = L_add( tmp, L_abs( vec[i] ) ); + } + + return tmp; +} +#endif + /*---------------------------------------------------------------------* * mvc2c() * @@ -893,6 +913,39 @@ float dot_product_cholesky( return suma; } +#ifdef IVAS_FLOAT_FIXED +Word32 dot_product_cholesky_fx( + const Word32 *x, /* i : vector x */ + const Word32 *A, /* i : Cholesky matrix A */ + const Word16 N /* i : vector & matrix size */ +) +{ + Word16 i, j; + Word32 suma, tmp_sum; + const Word32 *pt_x, *pt_A; + + pt_A = A; + suma = 0; + move32(); + + FOR( i = 0; i < N; i++ ) + { + tmp_sum = 0; + move32(); + pt_x = x; + FOR( j = 0; j <= i; j++ ) + { + tmp_sum = L_add( tmp_sum, Mpy_32_32( *pt_x++, *pt_A++ ) ); + } + + suma = L_add( suma, Mpy_32_32( tmp_sum, tmp_sum ) ); + } + + return suma; +} + +#endif + #ifdef IVAS_FLOAT_FIXED /*---------------------------------------------------------------------* * v_mult_mat_fx() diff --git a/lib_enc/ivas_stereo_ica_enc.c b/lib_enc/ivas_stereo_ica_enc.c index 42848988216e49603783038d9e2ed73d04218b48..dc9fdc3019d0549ddf5e6b248d4eb91c66c7171b 100644 --- a/lib_enc/ivas_stereo_ica_enc.c +++ b/lib_enc/ivas_stereo_ica_enc.c @@ -42,6 +42,7 @@ #include "rom_com.h" #include "ivas_rom_com.h" #include "prot_fx1.h" +#include "prot_fx2.h" /*--------------------------------------------------------------- * Local function prototypes @@ -371,6 +372,7 @@ static void utilCrossCorr( * Non-causal shift estimation to encode future samples. * ---------------------------------------------------------------*/ +#ifdef IVAS_FLOAT_FIXED static void corrStatsEst( STEREO_TCA_ENC_HANDLE hStereoTCA, /* i/o: Stereo TCA Encoder handle */ const float *buf1, /* i : channel 1 */ @@ -431,8 +433,31 @@ static void corrStatsEst( } mvr2r( corrEst, hStereoTCA->corrEstPrev[2], tempLen ); +#ifdef IVAS_FLOAT_FIXED + Word32 buf1_fx[160]; + Word32 buf2_fx[160]; + Word16 buf1_q, buf2_q, guard_bits; + Word32 temp_A_fx, temp_B_fx; + f2me_buf( buf1, buf1_fx, &buf1_q, 160 ); + f2me_buf( buf2, buf2_fx, &buf2_q, 160 ); + buf1_q = sub( 31, buf1_q ); + buf2_q = sub( 31, buf2_q ); + Word16 buf_q = s_min( buf1_q, buf2_q ); + guard_bits = find_guarded_bits_fx( 160 ); + scale_sig32( buf1_fx, 160, sub( sub( buf_q, guard_bits ), buf1_q ) ); + scale_sig32( buf2_fx, 160, sub( sub( buf_q, guard_bits ), buf2_q ) ); + buf_q = sub( buf_q, guard_bits ); +#endif + +#ifdef IVAS_FLOAT_FIXED + temp_A_fx = sumAbs_fx( buf1_fx, L_FRAME_DS - L_XCORRMEM_DS ) + sumAbs_fx( buf2_fx, L_FRAME_DS - L_XCORRMEM_DS ); + temp_B_fx = sumAbs_fx( buf1_fx + ( L_FRAME_DS - L_XCORRMEM_DS ), L_XCORRMEM_DS ) + sumAbs_fx( buf2_fx + ( L_FRAME_DS - L_XCORRMEM_DS ), L_XCORRMEM_DS ); + temp_A = fixedToFloat( temp_A_fx, buf_q ); + temp_B = fixedToFloat( temp_B_fx, buf_q ); +#else temp_A = sumAbs( buf1, L_FRAME_DS - L_XCORRMEM_DS ) + sumAbs( buf2, L_FRAME_DS - L_XCORRMEM_DS ); temp_B = sumAbs( buf1 + ( L_FRAME_DS - L_XCORRMEM_DS ), L_XCORRMEM_DS ) + sumAbs( buf2 + ( L_FRAME_DS - L_XCORRMEM_DS ), L_XCORRMEM_DS ); +#endif tempF = temp_A + temp_B + hStereoTCA->mem_tempF; hStereoTCA->mem_tempF = temp_B; @@ -637,7 +662,274 @@ static void corrStatsEst( return; } +#else +static void corrStatsEst( + STEREO_TCA_ENC_HANDLE hStereoTCA, /* i/o: Stereo TCA Encoder handle */ + const float *buf1, /* i : channel 1 */ + const float *buf2, /* i : channel 2 */ + const int16_t bufLenDS, /* i : buffer length */ + const int16_t dsFactor, /* i : buffer length */ + const int16_t vad_flag1, /* i : VAD flag channel 1 */ + const int16_t vad_flag2, /* i : VAD flag channel 2 */ + STEREO_CLASSIF_HANDLE hStereoClassif /* i/o: stereo classifier handle */ +) +{ + int16_t lagSearchRange[2]; + float corrEst[2 * L_NCSHIFT_DS + 1]; + int16_t corrLagStats[3]; + float *tempRK; + const float *winInterp; + float rInterp[MAX_INTERPOLATE]; + int16_t interpMin, interpMax, interpLen; + int16_t i, j, k, m; + float tempF, alpha; + float win_bias; + int16_t tempLen, win_width; + float loc_weight_win[4 * L_NCSHIFT_DS + 1]; + float X_hat, Y_hat, XY_hat, X_SQR_hat; + float alpha_reg, beta_reg, reg_prv_corr, dist_reg_prv_corr, bias_par, width_par; + float k1, k2, temp_A, temp_B; + int16_t stmp; + float corrEst_ncorr; + + /* init of regression parameters*/ + X_hat = 0; + X_SQR_hat = 0; + XY_hat = 0; + /* Initializations */ + alpha = 0.7f; + lagSearchRange[0] = -L_NCSHIFT_DS; + lagSearchRange[1] = L_NCSHIFT_DS; + tempLen = ( 2 * L_NCSHIFT_DS + 1 ); + + set_s( corrLagStats, 0, 3 ); + + /* First iteration of xcorr estimation */ + utilCrossCorr_mod( hStereoTCA, buf1, buf2, corrEst, lagSearchRange, bufLenDS - L_XCORRMEM_DS ); + + /* calculate features for the UNCLR classifier */ + unclr_calc_corr_features( hStereoClassif, hStereoTCA, buf1, buf2, bufLenDS - L_XCORRMEM_DS, corrEst, lagSearchRange, &corrEst_ncorr ); + + for ( i = 1; i < 3; i++ ) + { + v_add( hStereoTCA->corrEstPrev[i], hStereoTCA->corrEstPrev[0], hStereoTCA->corrEstPrev[0], tempLen ); + } + + /* back up the corrEst */ + for ( i = 0; i < 2; i++ ) + { + mvr2r( hStereoTCA->corrEstPrev[i + 1], hStereoTCA->corrEstPrev[i], tempLen ); + } + mvr2r( corrEst, hStereoTCA->corrEstPrev[2], tempLen ); + + temp_A = sumAbs( buf1, L_FRAME_DS - L_XCORRMEM_DS ) + sumAbs( buf2, L_FRAME_DS - L_XCORRMEM_DS ); + temp_B = sumAbs( buf1 + ( L_FRAME_DS - L_XCORRMEM_DS ), L_XCORRMEM_DS ) + sumAbs( buf2 + ( L_FRAME_DS - L_XCORRMEM_DS ), L_XCORRMEM_DS ); + tempF = temp_A + temp_B + hStereoTCA->mem_tempF; + hStereoTCA->mem_tempF = temp_B; + + alpha = 0.93f; + if ( tempF > 4.0f * hStereoTCA->ica_envVarLT ) + { + alpha = 0.83f; + } + else if ( tempF > 2.0f * hStereoTCA->ica_envVarLT ) + { + alpha = 0.85f; + } + else if ( tempF > hStereoTCA->ica_envVarLT ) + { + alpha = 0.90f; + } + + hStereoTCA->corrStatsSmoothFac = alpha; + + /* long term corr Stats estimation */ + v_multc( hStereoTCA->corrEstLT, alpha, hStereoTCA->corrEstLT, 2 * L_NCSHIFT_DS + 1 ); + v_multc( corrEst, 1.0f - alpha, corrEst, 2 * L_NCSHIFT_DS + 1 ); + v_add( hStereoTCA->corrEstLT, corrEst, hStereoTCA->corrEstLT, 2 * L_NCSHIFT_DS + 1 ); + + hStereoTCA->ica_envVarLT = SMOOTH_ENV_FACTOR * hStereoTCA->ica_envVarLT + ( 1 - SMOOTH_ENV_FACTOR ) * tempF; + + mvr2r( hStereoTCA->corrEstLT, corrEst, 2 * L_NCSHIFT_DS + 1 ); + Y_hat = hStereoTCA->delay_0_mem[0]; + /* Note: keep X_hat and X_SQR_hat calculations inside the loop to allow future tuning of MAX_DELAYREGLEN */ + for ( i = 1; i < MAX_DELAYREGLEN; i++ ) + { + X_hat += (float) i; + Y_hat += hStereoTCA->delay_0_mem[i]; + XY_hat += i * hStereoTCA->delay_0_mem[i]; + X_SQR_hat += (float) ( i * i ); + } + X_hat *= INV_MAX_DELAYREGLEN; + Y_hat *= INV_MAX_DELAYREGLEN; + XY_hat *= INV_MAX_DELAYREGLEN; + X_SQR_hat *= INV_MAX_DELAYREGLEN; + + beta_reg = 0; + tempF = X_SQR_hat - ( X_hat * X_hat ); + if ( tempF != 0 ) + { + beta_reg = ( XY_hat - X_hat * Y_hat ) / tempF; + } + alpha_reg = ( Y_hat - beta_reg * X_hat ); + reg_prv_corr = beta_reg * MAX_DELAYREGLEN + alpha_reg; + + if ( TRUNC( reg_prv_corr ) <= -L_NCSHIFT_DS ) + { + reg_prv_corr = -L_NCSHIFT_DS + 1; + } + + if ( TRUNC( reg_prv_corr ) >= L_NCSHIFT_DS ) + { + reg_prv_corr = L_NCSHIFT_DS - 1; + } + + bias_par = A_BIAS * hStereoTCA->smooth_dist_reg_prv_corr + B_BIAS; + bias_par = min( bias_par, XH_BIAS ); + bias_par = max( bias_par, XL_BIAS ); + + width_par = A_WIDTH * hStereoTCA->smooth_dist_reg_prv_corr + B_WIDTH; + width_par = min( width_par, XH_WIDTH ); + width_par = max( width_par, XL_WIDTH ); + + win_width = (int16_t) ( width_par * ( 4 * L_NCSHIFT_DS + 1 ) ); + win_bias = bias_par; + k1 = 0.5f * ( 1.0f + win_bias ); + k2 = 0.5f * ( 1.0f - win_bias ); + + for ( i = 0; i < ( 2 * L_NCSHIFT_DS - 2 * win_width ); i++ ) + { + loc_weight_win[i] = win_bias; + } + + for ( i = ( 2 * L_NCSHIFT_DS - 2 * win_width ); i <= ( 2 * L_NCSHIFT_DS + 2 * win_width ); i++ ) + { + loc_weight_win[i] = k1 + k2 * cosf( EVS_PI * ( ( i - 2 * L_NCSHIFT_DS ) / ( 2.0f * win_width ) ) ); + } + + for ( i = ( 2 * L_NCSHIFT_DS + 2 * win_width ); i < ( 4 * L_NCSHIFT_DS + 1 ); i++ ) + { + loc_weight_win[i] = win_bias; + } + + for ( i = 0, j = L_NCSHIFT_DS - TRUNC( reg_prv_corr ); i < 2 * L_NCSHIFT_DS + 1; i++, j++ ) + { + corrEst[i] *= loc_weight_win[j]; + } + + if ( hStereoTCA->prevTargetGain < 0.8f && vad_flag1 ) + { + /* ch 2 is prev reference channel */ + v_multc( corrEst, 1.2f, corrEst, L_NCSHIFT_DS + 1 ); + v_multc( corrEst + L_NCSHIFT_DS + 1, 0.833f, corrEst + L_NCSHIFT_DS + 1, L_NCSHIFT_DS ); + } + else if ( hStereoTCA->prevTargetGain > 1.2f && vad_flag1 ) + { + /* ch 1 is prev reference channel */ + v_multc( corrEst, 0.833f, corrEst, L_NCSHIFT_DS ); + v_multc( corrEst + L_NCSHIFT_DS, 1.2f, corrEst + L_NCSHIFT_DS, L_NCSHIFT_DS + 1 ); + } + + if ( corrEst_ncorr > 0.8f && vad_flag1 ) + { + i = max( 0, hStereoTCA->prevCorrLagStats[0] - 1 + L_NCSHIFT_DS ); + j = min( 2 * L_NCSHIFT_DS, hStereoTCA->prevCorrLagStats[0] + 1 + L_NCSHIFT_DS ); + k = j - i + 1; + v_multc( corrEst + i, 1.2f, corrEst + i, k ); + } + + /* Initial corr lag estimate */ + corrLagStats[0] = maximum( corrEst, ( lagSearchRange[1] - lagSearchRange[0] + 1 ), &tempF ); + corrLagStats[0] += lagSearchRange[0]; + + stmp = corrLagStats[0] * dsFactor; + hStereoClassif->unclr_fv[E_corrLagStats0] = (float) stmp; + hStereoClassif->xtalk_fv[E_corrLagStats0] = (float) stmp; + hStereoClassif->xtalk_fv[E_ica_corr_value0] = tempF; + + if ( vad_flag1 == 0 && alpha > 0.7f ) + { + corrLagStats[0] = 0; + } + + dist_reg_prv_corr = fabsf( reg_prv_corr - corrLagStats[0] ); + + if ( vad_flag1 == 1 && vad_flag2 == 1 ) + { + hStereoTCA->smooth_dist_reg_prv_corr = SMOOTH_DIST_FACTOR * hStereoTCA->smooth_dist_reg_prv_corr + ( 1.0f - SMOOTH_DIST_FACTOR ) * dist_reg_prv_corr; + + mvr2r( &( hStereoTCA->delay_0_mem[1] ), &( hStereoTCA->delay_0_mem[0] ), MAX_DELAYREGLEN - 1 ); + + hStereoTCA->delay_0_mem[MAX_DELAYREGLEN - 1] = 0.2f * hStereoTCA->delay_0_mem[MAX_DELAYREGLEN - 1] + 0.8f * corrLagStats[0]; + + if ( fabsf( reg_prv_corr - hStereoTCA->delay_0_mem[0] ) > 25 ) + { + set_f( &( hStereoTCA->delay_0_mem[0] ), hStereoTCA->delay_0_mem[MAX_DELAYREGLEN - 1], MAX_DELAYREGLEN - 1 ); + } + } + else + { + hStereoTCA->smooth_dist_reg_prv_corr = 0.; + } + + if ( vad_flag1 == 0 || vad_flag2 == 0 ) + { + corrLagStats[0] = TRUNC( hStereoTCA->delay_0_mem[MAX_DELAYREGLEN - 1] ); + } + + /* second iteration of xcorr update @ inputFs with interp*/ + tempRK = hStereoTCA->corrEstLT - lagSearchRange[0] + corrLagStats[0]; + set_f( rInterp, 0, MAX_INTERPOLATE ); + + /* select the Rk interp sinc window */ + winInterp = ica_sincInterp4 + SINC_ORDER1; + if ( dsFactor == 2 ) + { + winInterp = ica_sincInterp2 + SINC_ORDER1; + } + else if ( dsFactor == 6 ) + { + winInterp = ica_sincInterp6 + SINC_ORDER1; + } + + corrLagStats[1] = corrLagStats[0] * dsFactor; + + interpMin = max( -( dsFactor - 1 ), -corrLagStats[1] - L_NCSHIFT_DS * dsFactor ); + interpMax = min( ( dsFactor - 1 ), L_NCSHIFT_DS * dsFactor - corrLagStats[1] ); + interpLen = interpMax - interpMin + 1; + + for ( i = interpMin, k = 0; i <= interpMax; i++, k++ ) + { + rInterp[k] = 0.0f; + for ( j = -SINC_ORDER1 / dsFactor; j <= SINC_ORDER1 / dsFactor; j++ ) + { + m = j * dsFactor; + if ( ( m - i >= -SINC_ORDER1 ) && ( m - i <= SINC_ORDER1 ) ) + { + if ( j > lagSearchRange[1] - corrLagStats[0] ) + { + rInterp[k] += winInterp[m - i] * tempRK[lagSearchRange[1] - corrLagStats[0]]; + } + else if ( j < lagSearchRange[0] - corrLagStats[0] ) + { + rInterp[k] += winInterp[m - i] * tempRK[lagSearchRange[0] - corrLagStats[0]]; + } + else + { + rInterp[k] += winInterp[m - i] * tempRK[j]; + } + } + } + } + corrLagStats[1] += ( maximum( rInterp, interpLen, &tempF ) + interpMin ); + + /* save corr lag stats for the current frame */ + mvs2s( corrLagStats, hStereoTCA->corrLagStats, 3 ); + + return; +} +#endif /*--------------------------------------------------------------- * Function estDownmixGain() diff --git a/lib_enc/rom_enc.c b/lib_enc/rom_enc.c index dea73a1af38e3a89723ecb54c3190bb6a481887a..9b4d998d65e41f874714cdd66559ca2bddf4bf7b 100644 --- a/lib_enc/rom_enc.c +++ b/lib_enc/rom_enc.c @@ -486,6 +486,10 @@ const float prec_chol_speech[N_SMC_MIXTURES*(N_PCA_COEF*N_PCA_COEF + N_PCA_COEF) 0.621627f, 0.011114f, 0.981192f, -0.253924f, -0.028998f, 0.716900f, -0.000541f, -0.058620f, -0.133470f, 1.285166f, 0.023501f, -0.107565f, 0.003875f, 0.521398f, 0.938485f, -0.274409f, -0.864538f, 0.475578f, 1.350173f, 0.704398f, 1.977759f, -0.009563f, -0.180193f, -0.152311f, 0.308965f, 0.476963f, 0.235046f, 1.431437f, 0.203638f, -1.193943f, -0.776895f, 2.853604f, 1.384001f, 2.607643f, 0.210453f, 4.753994f, -0.318441f, -0.641232f, 0.681624f, 0.064183f, 0.062136f, 0.491965f, 0.078715f, -1.190437f, 2.299521f, 0.279093f, -0.531432f, 0.056751f, 0.908371f, 0.596757f, 1.108720f, 0.554711f, 1.171101f, -0.006235f, 2.159307f, 0.175362f, -0.505673f, 0.166484f, 0.362916f, 0.227361f, 0.587717f, 0.301477f, 0.363320f, 0.233596f, 0.708472f, 2.246567f, -0.273318f, 0.112575f, -0.014105f, -0.074279f, -0.077574f, -0.191145f, -0.280995f, 0.338633f, 0.068418f, -1.464972f, -0.873218f, 2.629376f, }; +#ifdef IVAS_FLOAT_FIXED +const Word32 prec_chol_speech_fx[N_SMC_MIXTURES*(N_PCA_COEF*N_PCA_COEF + N_PCA_COEF) / 2] = { 232432352, -59879628, 304533312, -125429152, 99142344, 326068544, -56826444, 46350480, 37527544, 329625600, 31308164, -19632564, 32069984, 57804624, 403167520, 3554890, 64722740, -62716724, 56281252, -4959882, 478142080, -101929504, -14876156, 39897560, -40777760, -47372952, 187926288, 341264960, 21311090, -155172336, 108759040, -143880592, -20300968, -5616206, 73920144, 570679040, 218755296, -162775504, -64450816, -7369627, -36893500, -37446476, -80293072, 8201777, 597213568, -152507040, -256513440, -45927696, -77678512, -140313632, -170642272, -94367944, 112342656, 201967072, 756224576, -208080960, 225792336, 58484840, 66006132, -48434348, 7039720, -102562208, 39706704, -339747744, -90058216, 796577920, 202634944, 37851548, 10156524, 82895824, 73911016, -121567440, -148495536, -112251120, 27724282, 54130280, 84421344, 906663552, 209343680, 32186754, 306301216, -116259936, -36924908, 224391904, -24221736, 35642860, 46462152, 312121440, -39075344, -7622225, 60599840, 68880272, 244525904, -58326728, -36760892, 23687014, 173225968, 23444078, 473461088, -15582947, -34961840, 37741220, 31105764, -74304544, 12497281, 425261888, 33453500, 9509863, -120815016, 63221652, 7134209, 101693816, 61658284, 770215168, -17228724, -276690656, 128551864, -10146055, -15149155, 107287480, -8414646, -387697024, 649792832, 50705580, -73682312, -57487332, 8967623, -3074928, 20067160, 135318848, 273094432, -35929816, 819158720, -19654308, -23457232, 20863072, 11649830, -4230006, 119185344, 150983408, -51511960, -81179712, 365100416, 677546368, -1104880, 67182952, -148998320, -17748684, -31462514, 36981012, -243459152, -30465546, 39841460, -70792064, -7307618, 793560192, 196828960, -32199102, 232271296, -76255536, 20992994, 207772000, 19910932, 13635984, 32428882, 280196960, -25198842, -1165278, 42992624, -13211856, 237527792, -62810408, -99852624, 47340472, 231473232, -57426664, 489151424, 19632028, 2961111, -8761465, 94918512, -12379975, 80280456, 362603424, 142636400, 32065152, -350124672, 127381216, 47764332, 62010736, -35439924, 1157346560, -346969728, -366012832, 596499264, 170357456, 55534196, 444685088, 84101632, -1036655616, 1080018176, 147994640, -5793911, -7695776, 55731228, 16318191, 137087568, 107977896, 172195712, -159871568, 713860352, 105155024, -52278340, -47440328, 26797374, 11704323, 154628224, 63859720, 177926800, -3174517, 294978624, 586304384, -55530976, 100304664, -184816464, -121156736, -22429392, -187445792, -66290940, 399174784, -145260096, -435817856, -227558112, 632079616, 235678272, -8552354, 240827680, -37353868, -9822859, 337976896, -48387368, -7377143, 35740572, 353392864, 21862994, -32516660, 28525294, 31415538, 414974112, -20909780, 53856472, -18235358, 45784888, -63774628, 489165088, -36009004, -13625515, 11037798, -62288300, 22793928, 41337988, 507775744, 36684656, -117420112, 22649242, -64967556, -21585164, -59302760, 61845652, 551227904, 28334704, -96908688, -10986795, -3158680, -1148098, -1090116, -21733072, -98534064, 548509440, -12526541, -124452312, -39815688, 28119150, -82867096, -48626816, 41780368, 15056276, 114722336, 752520384, -72335840, 138609600, 457950, 58517588, -34335848, 7930120, 68274944, 71795744, -218303520, 30022896, 684244416, 29311810, 24879134, -5031822, -3604014, -107700064, 34280548, -161518960, -29972698, 45524776, -28767690, 70451960, 997068864, 181172464, -10389794, 282143392, -18262200, -9411616, 194651136, -34642400, 14696036, 32774090, 266709152, -12321724, -7934147, 42899744, 17961822, 326197120, -61064236, 2757905, -9434969, 114016616, -45294724, 423241632, 44844560, 7194607, -20771536, 17406428, -12688407, 27344446, 398144544, -20988432, 7578201, -36547220, 31998848, 1553972, 12336757, 23499376, 524998272, 38193264, -220705488, -3866007, -51896088, -8053332, 4608500, 2672543, 105980736, 617651200, 138849584, -138894416, -16658031, 14451491, -13349295, 42108932, 90309472, 45946488, 90752120, 714260864, -36247644, 218070784, 60232888, 133086544, 36377836, 146851648, 21597244, -80293880, -242705376, -25434260, 721932224, -73526080, 12684917, -26789322, -47208132, -29985852, -50325744, 4686614, 48175036, 81419696, -245451200, -121876408, 664714368, 166866720, 2983391, 263386720, -68162208, -7784091, 192441376, -145223, -15735686, -35828080, 344984128, 6308501, -28874260, 1040187, 139961712, 251922656, -73661104, -232072656, 127662000, 362434304, 189085392, 530900640, -2567048, -48370192, -40885672, 82937160, 128033784, 63094680, 384248448, 54663660, -320496640, -208546160, 766008512, 371514944, 699983808, 56493048, 1276140544, -85480856, -172129408, 182972048, 17228992, 16679505, 132060848, 21129896, -319555488, 617272960, 74918456, -142655184, 15233981, 243838976, 160190736, 297619744, 148904096, 314365024, -1673695, 579634560, 47073380, -135740560, 44690208, 97419520, 61031752, 157764080, 80927112, 97527968, 62705448, 190179008, 603058240, -73368240, 30219122, -3786282, -19939118, -20823612, -51310096, -75429024, 90901104, 18365818, -393250432, -234402672, 705817728 }; +#endif + const float weights_speech[N_SMC_MIXTURES] = { 0.110243f, 0.158178f, 0.164349f, 0.167653f, 0.235143f, 0.164434f, @@ -516,6 +520,13 @@ const float prec_chol_music[N_SMC_MIXTURES*(N_PCA_COEF*N_PCA_COEF + N_PCA_COEF) 1.209142f, 0.088647f, 1.211345f, -0.361558f, -0.636888f, 1.613629f, -0.073522f, 0.094440f, 0.348373f, 1.388855f, 0.080521f, -0.037507f, 0.070824f, 0.052632f, 1.876701f, -0.032255f, 0.124895f, -0.048464f, -0.228421f, -0.227230f, 1.726811f, -0.291388f, -0.077976f, 0.299319f, -0.171845f, 0.012052f, 0.228501f, 2.014814f, -0.039524f, -0.252147f, 0.412488f, -0.546076f, -0.276125f, -0.498767f, 0.283854f, 1.733851f, 0.044785f, -0.173411f, 0.095814f, -0.035061f, 0.007025f, 0.236956f, -0.269935f, -0.192304f, 1.968567f, -0.667309f, -0.209738f, -0.260295f, 0.087080f, -0.241920f, -0.281580f, 0.361767f, -0.183381f, 0.175271f, 3.096483f, -0.472230f, -0.015951f, -0.043282f, -0.052428f, -0.352133f, -0.334534f, 0.390436f, 0.113373f, -0.300916f, 1.454841f, 2.885831f, 0.478838f, 0.050578f, -0.039767f, 0.114396f, -0.067675f, 0.013117f, -1.117973f, -0.208723f, 0.445495f, -0.031657f, 0.011242f, 4.388051f, }; +#ifdef IVAS_FLOAT_FIXED +const Word32 prec_chol_music_fx[N_SMC_MIXTURES*(N_PCA_COEF*N_PCA_COEF + N_PCA_COEF) / 2] = +{ +232322304, 25836912, 235062480, -21970368, -88834688, 289087264, 38061464, 12829336, 25661356, 216226640, 3946806, 35926596, -49888460, 17696070, 235923888, -6313870, -29279598, 78824728, -1208764, 27664958, 273027328, -37742024, -71072048, 103689640, -10025259, -37697196, -61880548, 396421184, -26288422, 15300284, -459293, -23770764, 3474360, -25473720, 47310944, 332768160, 39007700, 9191767, -7766106, -35545148, 21523424, 6770747, -124619552, 1410091, 508736736, -25959320, -7817109, 19565456, 1404185, -51538532, -45457932, 165445904, -28407450, -81223736, 586521024, -46704816, -52083996, 9754944, -31150324, -39091720, -40270152, 66884720, 30225296, 100608000, 24531512, 657688896, 68076576, 48213692, -48020152, 49097380, -4268929, 59026004, -192775040, -50962204, -31122406, -117876448, -182476256, 625576192, 144678656, -9655086, 218327952, 6044361, -48620372, 254562448, -42861896, -13298292, 33123862, 287620000, -12122277, -30804848, 33071786, 16293764, 388198208, -19213268, 24338774, 14173929, -33506382, -87690080, 361291840, 34886676, 52138220, -48904644, -17044846, 2490812, 73471056, 354929120, 40659380, 38784092, 24256364, -112064552, -5929471, -94763888, -36720092, 322304000, -56521232, -65294776, 11020081, -15673409, 12677670, -51147960, -59997204, 68434128, 456261088, 44138304, 4000225, -77793136, 61852092, -3655554, -41457440, 71121976, -86157848, -69391640, 621925248, 56908, 50305072, -2964869, 102686760, -14214731, -68874360, 41617696, -34848560, -20007568, 56042344, 554498560, -36107792, -20590342, -25757992, -64163052, 53273432, 35053644, -77861048, -7609071, 98731368, 8330089, 81691888, 807686592, 206408080, 29242554, 246745872, -30320054, -141839152, 416885632, -31085094, 1968705, -7943542, 294907488, 972004, 26633630, -66413080, -21165868, 367168960, 11512928, -20904948, 56274004, -44399224, -4818953, 360435008, -49596404, 13496398, 7960453, 47073916, -4338185, -43219720, 462487456, 13815031, 2689991, 103510592, -71014872, -75477072, -97242624, -9769708, 371118464, -17768548, -22740510, -33236068, -8622415, 24936848, 18291192, -70021120, 2836020, 476147872, 63471028, 25856776, -21647440, 32818918, 34664144, 29404152, 114646368, -122478512, -68419632, 815255936, 35136860, -35646080, -34052916, 1725771, 2581812, -62708940, 215970288, -82662016, -34325112, 447461504, 753578304, -99274408, -3290213, 4633733, -15484968, -36483064, 40284916, -407272960, 58910844, 134671648, -601330048, -345841248, 981086208, 206335056, 1025423, 222901824, -58332100, -39948832, 262789184, -18609020, 1410628, -1774089, 212590416, 18125298, 28513752, -59463552, 4719095, 246369264, -81872, -23613194, 15571135, -16710912, 46739712, 268572096, 29908542, 37123820, 22983444, 55254756, 2779112, -57131120, 341562368, 5126580, 21001584, -6036039, -24000546, -46808700, -51209432, -9852655, 337137760, -8485513, -113351704, -16093779, -60946660, 8815152, -10634339, -80527416, 50575120, 478848064, 149115360, 27080038, 22408992, -24449906, 61073900, 40576168, 139484976, -77672608, -93439968, 705148288, 18536542, 21612006, -26196616, -17896592, 48906792, 10430328, 52505440, -55231936, -34737696, 181267760, 633243008, -77855408, -1544846, -16549583, -11117523, 31134218, 10136123, -124216360, 27077622, 80264352, -345837472, -203551920, 593004544, 99558952, 18271060, 165076528, -18376286, -30611842, 213814752, -21311628, -7249636, 15457856, 170620528, -4867540, -16669573, -49741896, 22724940, 143143472, -39989100, -10398384, 72010760, 44482440, 6079526, 200783008, -1092263, 15936208, 13659607, 18654116, -27689922, -23768886, 188797104, 12175964, -32829120, -61108256, 16816140, 3177470, 18617342, -5304821, 286432992, 28388660, -118288232, 51768048, -15788031, 7528809, 37260184, -33815620, -179149264, 306826016, -17569370, 22469390, -41274636, -16302085, -108179, -24384408, 16991428, -10070088, 94377344, 317392448, -28549990, -4354291, -2879238, 26857504, 15448729, 28430268, -357556, -21060910, -72179608, -65214248, 414794528, 6270384, -21697370, 62073552, -9898557, 21225192, 16768895, 1158030, 20269024, 9883257, 18578686, -97534416, 397891136, 324576576, 23795998, 325167936, -97054984, -170963328, 433155232, -19735912, 25351044, 93515664, 372817920, 21614692, -10068209, 19011672, 14128295, 503773088, -8658386, 33526246, -13009456, -61316296, -60996588, 463537312, -78218872, -20931524, 80347832, -46129292, 3235184, 61337772, 540847488, -10609643, -67685192, 110726408, -146586160, -74121744, -133886744, 76196480, 465427072, 12021882, -46549660, 25719874, -9411616, 1885759, 63607392, -72460128, -51621212, 528433184, -179129392, -56301116, -69872408, 23375360, -64939904, -75586056, 97111088, -49225964, 47048952, 831205824, -126763272, -4281814, -11618423, -14073534, -94524984, -89800784, 104806864, 30433332, -80776520, 390530912, 774659392, 128537096, 13576928, -10674873, 30707942, -18166370, 3521068, -300103584, -56028652, 119586656, -8497861, 3017751, 1177908480, +}; +#endif + const float weights_music[N_SMC_MIXTURES] = { 0.156143f, 0.136090f, 0.300104f, 0.193919f, 0.051812f, 0.161931f, @@ -546,6 +557,12 @@ const float prec_chol_noise[N_SMC_MIXTURES*(N_PCA_COEF*N_PCA_COEF + N_PCA_COEF) 1.388928f, -0.529069f, 1.914570f, -0.246147f, -0.376892f, 1.708861f, -0.205445f, 0.314175f, 0.650069f, 1.841539f, -0.085711f, -0.013295f, -0.070955f, -0.466548f, 3.166998f, -0.260572f, 0.720834f, -0.792817f, -1.467658f, -0.372909f, 2.710914f, 0.606852f, 0.637359f, -0.541404f, -0.330263f, 0.170983f, 0.064970f, 2.980650f, 0.184834f, -0.340049f, 0.288191f, -1.044021f, -0.466651f, -0.374637f, -0.231777f, 2.131845f, 0.132432f, -0.963210f, -0.294608f, 0.042302f, -0.114246f, -0.932316f, -0.166230f, 0.203508f, 2.563777f, 0.548839f, -0.440379f, -0.934541f, -0.344106f, 0.030490f, -0.377361f, -0.085154f, -0.262774f, 0.477136f, 4.772394f, 0.084517f, 0.357867f, -0.429870f, 0.688023f, 0.353825f, -0.439173f, 0.076948f, -0.769601f, -1.177351f, 1.958859f, 4.300092f, -0.207814f, 0.243404f, -0.706169f, -0.588024f, 0.962382f, 0.338356f, -0.298229f, -0.351656f, 0.768647f, 0.689511f, 0.692148f, 6.121235f, }; +#ifdef IVAS_FLOAT_FIXED +const Word32 prec_chol_noise_fx[N_SMC_MIXTURES*(N_PCA_COEF*N_PCA_COEF + N_PCA_COEF) / 2] = +{ 438274304, -101757704, 527959936, -92401656, -40066944, 569343040, -172637280, 92605400, 90126936, 502116576, -70281768, 15855140, 42644192, -75216424, 659778944, 21871852, 200184128, -66884720, -206631952, -271753600, 579995584, -41937404, 180430768, -70175472, 31213406, -61285692, -15286594, 914624320, 95589064, -197632384, 189309536, -378043296, -192572912, -278483808, 89525912, 667027200, 86715120, -398169504, -134308192, -24187914, -117845048, -77955000, 87847112, -65235184, 674295616, 118658400, 141179872, -385477600, 90381952, 47387716, -172783040, 213091584, -92059136, 180518816, 1113415808, 89164056, 159457376, -287768704, 140198736, 53548040, 12615661, 211052816, -195082784, -315002016, 199766976, 1060323264, -162984080, -169212048, 275297472, -209311472, -148597008, 90449592, -308289792, 131478344, 206830864, -316232544, -116264224, 1640643200, 575459840, -110880488, 540680576, -20299626, -120988960, 592097728, 75230912, 156925216, 72533680, 553807552, -272710816, -155779808, 38634840, -156862400, 752763072, -182010784, 94162056, -90105192, -416297760, -234046992, 690654080, 181351232, 128870224, -96438120, 75957304, 10280273, -216431728, 1041874240, -2115271, -161272000, 112353664, -387182976, 14206141, -50861004, -182521616, 664376128, -13428752, -358865984, -61852360, -3840774, -153737552, -335891136, 127812056, 72605352, 719982528, -11629161, -73992352, -370089824, -83219288, -95394448, -78352816, -28330678, -1691143, 79366160, 1178542208, -85602184, 94195616, -206783344, 134659312, -21508390, -89069568, 127365104, -119468808, -308679296, 307254432, 1144636160, 19030194, -8984535, -70737840, -155128048, 221865936, 50559280, 68931808, -94236416, 266750224, 260988512, 297829664, 1781693056, 421042624, -59415772, 527790560, -98705864, -94187288, 738609024, -33151778, 77225928, -97992632, 568460672, 43811888, 22125524, -44354932, -38434856, 696296640, 8982924, 76764488, 78076064, -148321856, -92465280, 669700032, 31240518, 1078842, -80158048, 17065784, 42776532, -97703528, 836559488, 101995008, -23285702, -44140184, -65183912, -32696244, -173090400, 116281136, 675435968, 24943022, -289375584, -28602066, -60206048, -67754184, -46702400, 88339696, -173576816, 792158656, 13933948, -103396240, -80619760, 4375229, 22152636, 20907632, 211938912, -232494096, 278967264, 1247803392, -70594232, 74254616, -54600844, 26415928, -10635144, 33370554, 298018912, -152319936, -244328336, 782944064, 1259614592, 20826564, 8513699, 101483096, -35796672, -149573040, -20171850, -532795264, 142151072, 163440688, -597162880, -376404768, 1708897920, 209270944, -23997324, 331448000, 6588480, -58776092, 382514624, -18828332, 39394512, -32496796, 381425568, 21953188, -17026056, -17383880, 29140548, 410380096, -22143510, 714575, 45383844, 30147180, -75072000, 468424448, 6863357, -2480612, -3718636, -18806856, 45715632, -12727867, 520436480, 52698712, 758061, -3390071, -57345060, -19137300, -94494648, 48669764, 553184256, 15456245, -55625196, 37220456, -21190832, 14073266, 37691024, -82113600, -125746712, 566309184, 24663312, -79263352, -81187768, 18228110, -23309324, -35227592, 45638592, -100310304, 111057384, 747604032, -38703024, -49378972, -1829387, -24915374, -44106092, 3890166, 95093528, 4453076, -105516344, 61340456, 779151360, 14067092, 24841018, 46075604, 14026826, -73798008, -27891786, -172234896, 22042846, 8342168, -136356624, -15677436, 917865664, 323919712, -183311616, 550143680, -136109392, -92123024, 508834976, -36581580, 25082610, 186993744, 612834112, 3323231, -41768288, 10869757, -32662958, 900676672, -132146208, 173920944, -285617472, -265424416, -17592454, 746075264, 174698064, 87668608, -12579422, 50304000, -52162648, -8793140, 1121056000, 147971552, -143892672, 40492148, -332957664, -99433056, -229635520, 90426776, 754189248, 91340536, -315738880, 3497714, 95215400, -131254200, -125219504, 210190864, -40012452, 774223424, 323906560, 82644032, -378786592, 18817058, 56163140, 78116600, -101264320, -171897472, 108985600, 1384031744, 217351648, 179599424, -252806064, 287773824, -31378494, -40052984, 121498448, -198681968, -319067488, 616561856, 1259752576, -10863851, -35322348, 214155392, -580386432, 360088192, 294499200, -423245408, 122124976, 92175096, -14300899, 19809194, 1852783360, 372837536, -142020880, 513938464, -66074584, -101171176, 458718880, -55148724, 84335712, 174501568, 494334368, -23007872, -3568849, -19046838, -125238024, 850134528, -69946760, 193497408, -212820192, -393971456, -100102000, 727705408, 162900592, 171089760, -145332032, -88654296, 45897900, 17440252, 800112128, 49616000, -91281208, 77360680, -280252256, -125265672, -100565856, -62217164, 572262784, 35549444, -258559712, -79083232, 11355357, -30667678, -250266672, -44622024, 54628764, 688208640, 147327840, -118213336, -250863936, -92370248, 8184597, -101297072, -22858352, -70537856, 128080216, 1281079808, 22687360, 96064192, -115392352, 184689760, 94979176, -117889608, 20655572, -206588192, -316042752, 525827200, 1154297216, -55784644, 65338264, -189560800, -157846496, 258337456, 90826744, -80055240, -94396936, 206332112, 185089200, 185797056, 1643156480, +}; +#endif + const float weights_noise[N_SMC_MIXTURES] = { 0.186702f, 0.110792f, 0.358051f, 0.085247f, 0.132809f, 0.126399f, diff --git a/lib_enc/rom_enc.h b/lib_enc/rom_enc.h index cc009fc7a295510cf69ce593be45a98b43d41053..812db4821e8047a3c9fe7c84ae18a6f65df3ffb8 100644 --- a/lib_enc/rom_enc.h +++ b/lib_enc/rom_enc.h @@ -137,6 +137,12 @@ extern const Word32 invV_speech_fx[]; extern const float lvm_speech[]; extern const Word32 lvm_speech_fx[]; +#ifdef IVAS_FLOAT_FIXED +extern const Word32 prec_chol_speech_fx[]; +extern const Word32 prec_chol_music_fx[]; +extern const Word32 prec_chol_noise_fx[]; +#endif + extern const float m_music[]; extern const Word16 m_music_fx[]; extern const float invV_music[]; diff --git a/lib_enc/speech_music_classif.c b/lib_enc/speech_music_classif.c index fe0ea59681dc022be6f402533d0da90b58076c9f..3fe6fe53b64c399dfae134de51f73eb3d4449387 100644 --- a/lib_enc/speech_music_classif.c +++ b/lib_enc/speech_music_classif.c @@ -44,6 +44,9 @@ #include "rom_enc.h" #include "rom_com.h" /* Common static table prototypes */ #include "wmc_auto.h" +#ifdef IVAS_FLOAT_FIXED +#include "prot_fx2.h" +#endif /*---------------------------------------------------------------------* @@ -1128,6 +1131,706 @@ static int16_t attack_det( *---------------------------------------------------------------------*/ /*! r: S/M decision (0=speech or noise,1=unclear,2=music) */ + +#ifdef IVAS_FLOAT_FIXED +int16_t ivas_smc_gmm( + Encoder_State *st, /* i/o: state structure */ + STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure */ + const int16_t localVAD_HE_SAD, /* i : HE-SAD flag without hangover */ + const float Etot, /* i : total frame energy */ + const float lsp_new[M], /* i : LSPs in current frame */ + const float cor_map_sum, /* i : correlation map sum (from multi-harmonic anal.) */ + const float epsP[M + 1], /* i : LP prediciton error */ + const float PS[], /* i : energy spectrum */ + const float non_sta, /* i : unbound non-stationarity */ + const float relE, /* i : relative frame energy */ + int16_t *high_lpn_flag, /* i/o: sp/mus LPN flag */ + const int16_t flag_spitch /* i : flag to indicate very short stable pitch */ +) +{ + int16_t i, m, dec; + int16_t flag_odv; + float lps, lpm, lpn; + float ps[N_SMC_MIXTURES], pm[N_SMC_MIXTURES], pn[N_SMC_MIXTURES]; + float fvm[N_PCA_COEF], lprob; + float dlp, ftmp, sum_PS, ps_diff, ps_sta, wrelE, wdrop, wght; + float wrise; + float dlp_mean2var; + float FV[N_SMC_FEATURES], *pFV, PS_norm[128], dPS[128]; + const float *pODV; + float *pFV_st, smc_st_mean_fact; + int16_t relE_attack_flag; + int16_t j, len; + const float *pt_mel_fb; + float melS[NB_MEL_BANDS], mfcc[NB_MEL_BANDS]; + int16_t odv_cnt; + int16_t i_out[N_SMC_FEATURES], *p_out; + + /*------------------------------------------------------------------* + * Initialization + *------------------------------------------------------------------*/ + + SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas; + + /*------------------------------------------------------------------* + * State machine (sp_mus_state: -8 = INACTIVE, -7:-1 = UNSTABLE, 0:7 = ENTRY, 8 = STABLE ) + *------------------------------------------------------------------*/ + + if ( localVAD_HE_SAD ) + { + if ( relE < -20 ) + { + if ( hSpMusClas->sp_mus_state > 0 ) + { + if ( hSpMusClas->sp_mus_state < HANG_LEN ) + { + /* energy is too low but we are in entry period -> reset the inactive counter to allow new entry later */ + hSpMusClas->inact_cnt = 0; + } + + /* energy is too low -> we are going to instable state */ + hSpMusClas->sp_mus_state = 0; + } + else if ( hSpMusClas->sp_mus_state > -HANG_LEN ) + { + /* energy is still too low -> we are still in instable state */ + hSpMusClas->sp_mus_state--; + } + } + else if ( hSpMusClas->sp_mus_state <= 0 ) + { + if ( hSpMusClas->inact_cnt == 0 ) + { + + hSpMusClas->sp_mus_state = 1; + } + else + { + + hSpMusClas->sp_mus_state = HANG_LEN; + } + + hSpMusClas->inact_cnt = 12; + } + else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN ) + { + /* we are inside an entry period -> increment the counter of entry frames */ + hSpMusClas->sp_mus_state++; + } + + if ( hSpMusClas->sp_mus_state < 0 && hSpMusClas->inact_cnt > 0 ) + { + hSpMusClas->inact_cnt--; + } + } + else + { + if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN ) + { + hSpMusClas->inact_cnt = 0; + } + else if ( hSpMusClas->inact_cnt > 0 ) + { + hSpMusClas->inact_cnt--; + } + + if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN ) + { + hSpMusClas->sp_mus_state = -HANG_LEN; + } + else if ( hSpMusClas->sp_mus_state > 0 ) + { + hSpMusClas->sp_mus_state = -1; + } + else if ( hSpMusClas->sp_mus_state > -HANG_LEN ) + { + /* we are in inactive state */ + hSpMusClas->sp_mus_state--; + } + } + + /* detect attacks based on relE */ + if ( relE > hSpMusClas->prev_relE ) + { + hSpMusClas->relE_attack_sum += relE - hSpMusClas->prev_relE; + } + else + { + hSpMusClas->relE_attack_sum = 0; + } + hSpMusClas->prev_relE = relE; + + /* update counter from last VAD 0->1 change */ + if ( hSpMusClas->prev_vad == 0 && localVAD_HE_SAD == 1 ) + { + hSpMusClas->vad_0_1_cnt = 1; + } + else if ( localVAD_HE_SAD == 1 && hSpMusClas->vad_0_1_cnt > 0 && hSpMusClas->vad_0_1_cnt < 50 ) + { + hSpMusClas->vad_0_1_cnt++; + } + else + { + hSpMusClas->vad_0_1_cnt = 0; + } + hSpMusClas->prev_vad = localVAD_HE_SAD; + + if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN && hSpMusClas->relE_attack_sum > 5.0f ) + { + hSpMusClas->relE_attack_cnt++; + + /* set flag only in the first X frames in a series */ + if ( hSpMusClas->relE_attack_cnt > 0 && hSpMusClas->relE_attack_cnt < 3 ) + { + relE_attack_flag = 1; + } + else + { + relE_attack_flag = 0; + } + } + else + { + hSpMusClas->relE_attack_cnt = 0; + relE_attack_flag = 0; + } + + hSpMusClas->prev_Etot = Etot; + + /*------------------------------------------------------------------* + * Preparation of the feature vector + *------------------------------------------------------------------*/ + + pFV = FV; + + /* [0] OL pitch */ + if ( relE_attack_flag || st->tc_cnt == 1 || st->tc_cnt == 2 ) + { + *pFV++ = (float) st->pitch[2]; + } + else + { + *pFV++ = (float) ( st->pitch[0] + st->pitch[1] + st->pitch[2] ) / 3.0f; + } + + /* [1] voicing */ + if ( relE_attack_flag || st->tc_cnt == 1 || st->tc_cnt == 2 ) + { + *pFV++ = st->voicing[2]; + } + else + { + *pFV++ = ( st->voicing[0] + st->voicing[1] + st->voicing[2] ) / 3.0f; + } + + /* [2,3,4,5,6] LSFs */ + *pFV++ = acosf( lsp_new[2] ); + *pFV++ = acosf( lsp_new[3] ); + *pFV++ = acosf( lsp_new[4] ); + *pFV++ = acosf( lsp_new[5] ); + *pFV++ = acosf( lsp_new[6] ); + + /* [7] cor_map_sum */ + *pFV++ = cor_map_sum; + + /* [8] non_sta */ + *pFV++ = non_sta; + + /* [9] epsP */ + *pFV++ = logf( epsP[14] + 1e-5f ) - logf( epsP[0] + 1e-5f ); + + /* [10,11,12] MFCCs */ + set_zero( melS, NB_MEL_BANDS ); + pt_mel_fb = mel_fb; + for ( i = 0; i < NB_MEL_BANDS; i++ ) + { + j = mel_fb_start[i]; + len = mel_fb_len[i]; + melS[i] = logf( dotp( &PS[j], pt_mel_fb, len ) + 1e-5f ); + pt_mel_fb += len; + } + + v_mult_mat( mfcc, melS, dct_mtx, NB_MEL_BANDS, NB_MEL_COEF ); + + *pFV++ = mfcc[2]; + *pFV++ = mfcc[6]; + *pFV++ = mfcc[12]; + + /* calculation of differential normalized power spectrum */ + sum_PS = 1e-5f; + for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ ) + { + sum_PS += PS[i]; + } + + for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ ) + { + PS_norm[i] = PS[i] / sum_PS; + dPS[i] = fabsf( PS_norm[i] - hSpMusClas->past_PS[i - LOWEST_FBIN] ); + } + + /* [13] ps_diff (spectral difference) */ + ps_diff = 0; + for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ ) + { + ps_diff += dPS[i]; + } + + *pFV++ = ps_diff; + + /* [14] ps_sta (spectral stationarity) */ + ps_sta = 0; + for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ ) + { + if ( PS_norm[i] > hSpMusClas->past_PS[i - LOWEST_FBIN] ) + { + ps_sta += PS_norm[i] / ( dPS[i] + 1e-5f ); + } + else + { + ps_sta += hSpMusClas->past_PS[i - LOWEST_FBIN] / ( dPS[i] + 1e-5f ); + } + } + + *pFV++ = logf( ps_sta + 1e-5f ); + mvr2r( &PS_norm[LOWEST_FBIN], hSpMusClas->past_PS, HIGHEST_FBIN - LOWEST_FBIN ); + + /* save ps_diff and ps_sta features for XTALK and UNCLR classifier */ + if ( hStereoClassif != NULL ) + { + if ( st->idchan == 0 ) + { + hStereoClassif->ps_diff_ch1 = ps_diff; + hStereoClassif->ps_sta_ch1 = logf( ps_sta + 1e-5f ); + } + else + { + hStereoClassif->ps_diff_ch2 = ps_diff; + hStereoClassif->ps_sta_ch2 = logf( ps_sta + 1e-5f ); + } + } + + /*------------------------------------------------------------------* + * Outlier detection based on feature histograms + *------------------------------------------------------------------*/ + + flag_odv = 0; + if ( localVAD_HE_SAD ) + { + pFV = FV; + pODV = hout_intervals; + p_out = i_out; + odv_cnt = 0; + for ( i = 0; i < N_SMC_FEATURES; i++ ) + { + if ( *pFV < pODV[0] || *pFV > pODV[1] ) + { + *p_out++ = i; + odv_cnt++; + } + + pFV++; + pODV += 2; + } + + /* set outlier flag */ + if ( odv_cnt >= 2 ) + { + flag_odv = 1; + + /* replace outlying features with values from the previous frame */ + for ( i = 0; i < odv_cnt; i++ ) + { + FV[i_out[i]] = hSpMusClas->prev_FV[i_out[i]]; + } + } + } + + /*------------------------------------------------------------------* + * Adaptive short-term mean filter on feature vector + *------------------------------------------------------------------*/ + + pFV = FV; + pFV_st = hSpMusClas->FV_st; + smc_st_mean_fact = SMC_ST_MEAN_FACT; + for ( i = 0; i < N_SMC_FEATURES; i++ ) + { + *pFV_st = smc_st_mean_fact * ( *pFV_st ) + ( 1 - smc_st_mean_fact ) * ( *pFV ); + + if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN && ( relE_attack_flag || flag_odv ) ) + { + /* strong attack or outlier frame during entry state -> features cannot be trusted but there is also no useful past info -> */ + /* -> do whatever you want because dlp will be reset to 0 anyway */ + pFV++; + pFV_st++; + } + else if ( hSpMusClas->sp_mus_state == HANG_LEN && ( st->tc_cnt == 1 || st->tc_cnt == 2 ) ) + { + /* energy attack in stable state -> use current features intead of the long-term average */ + pFV++; + pFV_st++; + } + else + { + *pFV++ = *pFV_st++; + } + } + + /* update */ + mvr2r( FV, hSpMusClas->prev_FV, N_SMC_FEATURES ); + + /*------------------------------------------------------------------* + * Non-linear power transformation (boxcox) on certain features + *------------------------------------------------------------------*/ + + pFV = FV; + for ( i = 0; i < N_SMC_FEATURES; i++ ) + { + if ( bcox_lmbd[i] != 0 ) + { + *pFV -= bcox_add_cnst[i]; + if ( *pFV < 1 ) + { + *pFV = 1; + } + *pFV = ( powf( *pFV, bcox_lmbd[i] ) - 1 ) / bcox_lmbd[i]; + } + + pFV++; + } + + /*------------------------------------------------------------------* + * Scaling of the feature vector + * PCA + *------------------------------------------------------------------*/ + + pFV = FV; + for ( i = 0; i < N_SMC_FEATURES; i++ ) + { + /* Standard scaler - mean and variance normalization */ + *pFV = ( *pFV - sm_means[i] ) / sm_scale[i]; + pFV++; + + /* MinMax sclaer - mean and variance normalization */ + /**pFV = *pFV * sm_scale[i] + sm_min[i];*/ + /*pFV++;*/ + } + + /* PCA */ + v_sub( FV, pca_mean_, FV, N_SMC_FEATURES ); + v_mult_mat( FV, FV, pca_components_, N_SMC_FEATURES, N_PCA_COEF ); + + /*------------------------------------------------------------------* + * Calculation of posterior probability + * Log-probability + *------------------------------------------------------------------*/ + +#ifdef IVAS_FLOAT_FIXED + Word32 fvm_fx[N_PCA_COEF], lprob_fx; + Word16 fvm_q, guard_bits; + guard_bits = find_guarded_bits_fx( N_PCA_COEF ); // 12! 479001600 +#endif + /* run loop for all mixtures (for each mixture, calculate the probability of speech, music and noise) */ + lps = lpm = lpn = 0; + for ( m = 0; m < N_SMC_MIXTURES; m++ ) + { + v_sub( FV, &means_speech[m * N_PCA_COEF], fvm, N_PCA_COEF ); + +#ifdef IVAS_FLOAT_FIXED + f2me_buf( fvm, fvm_fx, &fvm_q, N_PCA_COEF ); + fvm_q = sub( 31, fvm_q ); + scale_sig32( fvm_fx, N_PCA_COEF, sub( sub( fvm_q, guard_bits ), fvm_q ) ); + fvm_q = sub( fvm_q, guard_bits ); + + lprob_fx = dot_product_cholesky_fx( fvm_fx, &prec_chol_speech_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); + lprob = fixedToFloat( lprob_fx, sub( 2 * fvm_q, 37 ) ); +#else + lprob = dot_product_cholesky( fvm, &prec_chol_speech[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); +#endif + ps[m] = logf( weights_speech[m] ) + log_det_chol_speech[m] - 0.5f * N_PCA_COEF * logf( PI2 ) - 0.5f * lprob; + + v_sub( FV, &means_music[m * N_PCA_COEF], fvm, N_PCA_COEF ); +#ifdef IVAS_FLOAT_FIXED + f2me_buf( fvm, fvm_fx, &fvm_q, N_PCA_COEF ); + fvm_q = sub( 31, fvm_q ); + scale_sig32( fvm_fx, N_PCA_COEF, sub( sub( fvm_q, guard_bits ), fvm_q ) ); + fvm_q = sub( fvm_q, guard_bits ); + lprob_fx = dot_product_cholesky_fx( fvm_fx, &prec_chol_music_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); + lprob = fixedToFloat( lprob_fx, sub( 2 * fvm_q, 37 ) ); +#else + lprob = dot_product_cholesky( fvm, &prec_chol_music[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); +#endif + pm[m] = logf( weights_music[m] ) + log_det_chol_music[m] - 0.5f * N_PCA_COEF * logf( PI2 ) - 0.5f * lprob; + + v_sub( FV, &means_noise[m * N_PCA_COEF], fvm, N_PCA_COEF ); +#ifdef IVAS_FLOAT_FIXED + f2me_buf( fvm, fvm_fx, &fvm_q, N_PCA_COEF ); + fvm_q = sub( 31, fvm_q ); + scale_sig32( fvm_fx, N_PCA_COEF, sub( sub( fvm_q, guard_bits ), fvm_q ) ); + fvm_q = sub( fvm_q, guard_bits ); + lprob_fx = dot_product_cholesky_fx( fvm_fx, &prec_chol_noise_fx[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); + lprob = fixedToFloat( lprob_fx, sub( 2 * fvm_q, 37 ) ); +#else + lprob = dot_product_cholesky( fvm, &prec_chol_noise[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF ); +#endif + pn[m] = logf( weights_noise[m] ) + log_det_chol_noise[m] - 0.5f * N_PCA_COEF * logf( PI2 ) - 0.5f * lprob; + } + + lps = logsumexp( ps, N_SMC_MIXTURES ); + lpm = logsumexp( pm, N_SMC_MIXTURES ); + lpn = logsumexp( pn, N_SMC_MIXTURES ); + + *high_lpn_flag = 0; + if ( lpn > lps && lpn > lpm ) + { + *high_lpn_flag = 1; + } + + hSpMusClas->lpm = lpm; + hSpMusClas->lps = lps; + hSpMusClas->lpn = lpn; + + /* determine HQ Generic speech class */ + if ( st->hHQ_core != NULL ) + { + if ( lps > lpm + 0.5f ) + { + st->hHQ_core->hq_generic_speech_class = 1; + } + else + { + st->hHQ_core->hq_generic_speech_class = 0; + } + } + + /*------------------------------------------------------------------* + * Decision without hangover + * Weighted decision + *------------------------------------------------------------------*/ + + /* decision without hangover (0 - speech/noise, 1 - music) */ + if ( !localVAD_HE_SAD || Etot < 10 || ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN && ( relE_attack_flag || flag_odv ) ) ) + { + dlp = 0; + } + else + { + dlp = lpm - lps + DLP_BIAS; + + if ( dlp > 30.0f ) + { + dlp = 30.0f; + } + else if ( dlp < -30.0f ) + { + dlp = -30.0f; + } + } + + dec = dlp > 0; + + /* calculate weight based on relE (higher relE -> lower weight, lower relE -> higher weight) */ + wrelE = lin_interp( relE, 15.0f, 0.9f, -15.0f, 0.99f, 1 ); + + /* calculate weight based on drops of dlp (close to 1 during sudden drops of dlp, close to 0 otherwise) */ + hSpMusClas->dlp_mean_ST = 0.8f * hSpMusClas->dlp_mean_ST + 0.2f * dlp; + hSpMusClas->lt_dec_thres = hSpMusClas->dlp_mean_ST; + + if ( dlp < 0 && dlp < hSpMusClas->dlp_mean_ST ) + { + if ( hSpMusClas->dlp_mean_ST > 0 ) + { + hSpMusClas->wdrop = -dlp; + } + else if ( hSpMusClas->wdrop > 0 ) + { + hSpMusClas->wdrop += hSpMusClas->dlp_mean_ST - dlp; + } + } + else + { + hSpMusClas->wdrop = 0; + } + + wdrop = lin_interp( hSpMusClas->wdrop, 15.0f, 0.7f, 0.0f, 1.0f, 1 ); + + /* calculate weight based on rises of dlp (close to 1 during sudden rise of dlp, close to 0 otherwise) */ + if ( hSpMusClas->sp_mus_state == HANG_LEN && hSpMusClas->dlp_mean_ST > 0 && hSpMusClas->dlp_mean_ST > hSpMusClas->past_dlp_mean_ST[0] ) + { + if ( hSpMusClas->past_dlp_mean_ST[0] < 0 ) + { + hSpMusClas->wrise = hSpMusClas->dlp_mean_ST; + } + else if ( hSpMusClas->wrise > 0 ) + { + hSpMusClas->wrise += hSpMusClas->dlp_mean_ST - hSpMusClas->past_dlp_mean_ST[0]; + } + } + else + { + hSpMusClas->wrise = 0; + } + + wrise = lin_interp( hSpMusClas->wrise, 5.0f, 0.95f, 0.0f, 1.0f, 1 ); + + /* combine weights into one */ + wght = wrelE * wdrop * wrise; + + /* ratio of delta means vs. delta variances */ + if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN ) + { + hSpMusClas->dlp_mean_LT = dlp; + hSpMusClas->dlp_var_LT = 0; + } + + hSpMusClas->dlp_mean_LT = 0.9f * hSpMusClas->dlp_mean_LT + 0.1f * dlp; + ftmp = dlp - hSpMusClas->dlp_mean_LT; + hSpMusClas->dlp_var_LT = 0.9f * hSpMusClas->dlp_var_LT + 0.1f * ( ftmp * ftmp ); + + if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN ) + { + dlp_mean2var = 0; + } + else + { + dlp_mean2var = fabsf( hSpMusClas->dlp_mean_LT ) / ( sqrtf( fabsf( hSpMusClas->dlp_var_LT ) ) + 1.0f ); + } + + if ( dlp_mean2var > 15.0f ) + { + /* decrease the weight little bit when the classifier indicates "strong speech" or "strong music" */ + wght *= 0.9f; + } + + if ( wght > 1.0f ) + { + wght = 1.0f; + } + else if ( wght < 0.01f ) + { + wght = 0.01f; + } + + if ( Etot < 10 ) + { + /* silence */ + wght = 0.92f; + } + + /* calculate weighted decision */ + hSpMusClas->wdlp_0_95_sp = wght * hSpMusClas->wdlp_0_95_sp + ( 1 - wght ) * dlp; + + /* xtalk classifier: apply long hysteresis to prevent LRTD on music */ + hSpMusClas->wdlp_xtalk = 0.995f * hSpMusClas->wdlp_xtalk + 0.005f * dlp; + + /*------------------------------------------------------------------* + * Final speech/music decision + *------------------------------------------------------------------*/ + + if ( flag_spitch ) + { + hSpMusClas->flag_spitch_cnt = 5; + } + else if ( hSpMusClas->flag_spitch_cnt > 0 ) + { + hSpMusClas->flag_spitch_cnt--; + } + + if ( Etot < 10 ) + { + /* silence */ + dec = 0; + } + else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN ) + { + /* entry state -> final decision is calculated based on weighted average of past non-binary decisions */ + ftmp = w_spmus[hSpMusClas->sp_mus_state - 1][0] * dlp; + ftmp += dotp( &w_spmus[hSpMusClas->sp_mus_state - 1][1], hSpMusClas->past_dlp, HANG_LEN - 1 ); + if ( ftmp > 2.0f ) + { + if ( dlp > 2.0f ) + { + dec = 2; + } + else + { + dec = 1; + } + } + else + { + dec = 0; + } + } + else + { + /* stable active state */ + if ( hSpMusClas->past_dec[0] == 0 && hSpMusClas->past_dec[1] == 0 && hSpMusClas->past_dec[2] == 0 && + ( ( hSpMusClas->flag_spitch_cnt > 0 && hSpMusClas->wdlp_0_95_sp > 3.4f ) || ( hSpMusClas->flag_spitch_cnt == 0 && hSpMusClas->wdlp_0_95_sp > 2.1f ) ) ) + { + /* switching from speech to unclear */ + dec = 1; + } + else if ( hSpMusClas->past_dec[0] == 0 && hSpMusClas->vad_0_1_cnt < 50 && hSpMusClas->relE_attack_sum == 0.0f && hSpMusClas->wdlp_0_95_sp > 1.0f ) + { + /* switch from speech to unclear also during slowly rising weak music onsets */ + dec = 1; + } + else if ( hSpMusClas->past_dec[0] == 1 && hSpMusClas->wdlp_0_95_sp > 2.5f ) + { + /* switching from unclear to music */ + dec = 2; + } + else if ( hSpMusClas->past_dec[0] == 2 && hSpMusClas->past_dec[1] == 2 && hSpMusClas->past_dec[2] == 2 && hSpMusClas->wdlp_0_95_sp < -1.0f ) + { + /* switching from music to unclear */ + dec = 1; + } + else if ( hSpMusClas->past_dec[0] == 1 && hSpMusClas->wdlp_0_95_sp < -2.5f ) + { + /* switching from unclear to speech */ + dec = 0; + } + else + { + dec = hSpMusClas->past_dec[0]; + } + } + + /*------------------------------------------------------------------* + * raw S/M decision based on smoothed GMM score + *------------------------------------------------------------------*/ + + if ( dec == 0 || st->hSpMusClas->wdlp_0_95_sp <= 0 ) + { + st->sp_aud_decision0 = 0; + st->sp_aud_decision1 = 0; + } + else + { + st->sp_aud_decision0 = 1; + st->sp_aud_decision1 = 1; + } + + /*------------------------------------------------------------------* + * Updates + *------------------------------------------------------------------*/ + + /* update buffer of past non-binary decisions */ + mvr2r( &hSpMusClas->past_dlp[0], &hSpMusClas->past_dlp[1], HANG_LEN - 2 ); + hSpMusClas->past_dlp[0] = dlp; + + mvr2r( &hSpMusClas->past_dlp_mean_ST[0], &hSpMusClas->past_dlp_mean_ST[1], HANG_LEN - 2 ); + hSpMusClas->past_dlp_mean_ST[0] = hSpMusClas->dlp_mean_ST; + + /* update buffer of past binary decisions */ + mvs2s( &hSpMusClas->past_dec[0], &hSpMusClas->past_dec[1], HANG_LEN - 2 ); + hSpMusClas->past_dec[0] = dec; + + + return dec; +} +#else int16_t ivas_smc_gmm( Encoder_State *st, /* i/o: state structure */ STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure */ @@ -1791,6 +2494,7 @@ int16_t ivas_smc_gmm( return dec; } +#endif /*---------------------------------------------------------------------* * ivas_smc_mode_selection()