diff --git a/lib_com/options.h b/lib_com/options.h old mode 100755 new mode 100644 index b3d3c5a736b0144462dae95dbccf7406982bca02..ec3e206c4f96369945787ba892295bc82f8944ec --- a/lib_com/options.h +++ b/lib_com/options.h @@ -154,6 +154,11 @@ #define FIX_I87 /* fix for issue 86: incorrect Ambisonics order set for head rotation in SBA */ #define SBA_ORDER_BITSTREAM /* issue 76: Use input sba order for bitstream coding */ + +/* NTT switches */ +#define NTT_UPDATE_ITD_SW /* contribution 4: Update of ITD switch in stereo downmix for EVS */ +#define NTT_REMOVE_EPS_ROM /* contribution 4: Reduction of ROM size in stereo downmix for EVS */ + /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ #endif diff --git a/lib_enc/ivas_rom_enc.c b/lib_enc/ivas_rom_enc.c index 783085fb86e1909107cdd3a6719fe8407b813db9..e8048b8f1a9b9f1cce4f04272e36bb9aef8c411a 100644 --- a/lib_enc/ivas_rom_enc.c +++ b/lib_enc/ivas_rom_enc.c @@ -534,6 +534,7 @@ const float ari_bit_estimate_s17_LC[RANGE_N_CONTEXT][RANGE_N_SYMBOLS] = * Stereo downmix to EVS ROM tables *----------------------------------------------------------------------------------*/ +#ifndef NTT_REMOVE_EPS_ROM const float Stereo_dmx_s_wnd_coef_eps_16k[L_FRAME16k * 3 / 4] = { 0.00000000f, 0.000385506690f, 0.000770864717f, 0.00115592557f, 0.00154054083f, 0.00192456215f, 0.00230784155f, 0.00269023119f, 0.00307158381f, 0.00345175178f, 0.00383058959f, 0.00420795102f, 0.00458368938f, 0.00495766103f, 0.00532972161f, 0.00569972629f, 0.00606753491f, 0.00643300405f, 0.00679599261f, 0.00715636183f, @@ -686,6 +687,7 @@ const float Stereo_dmx_s_wnd_coef_eps_48k[L_FRAME48k * 3 / 4] = { -0.00648899190f, -0.00649444433f, -0.00649961829f, -0.00650451379f, -0.00650913082f, -0.00651346892f, -0.00651752809f, -0.00652130833f, -0.00652480870f, -0.00652803015f, -0.00653097173f, -0.00653363345f, -0.00653601484f, -0.00653811684f, -0.00653993897f, -0.00654148031f, -0.00654274225f, -0.00654372340f, -0.00654442422f, -0.00654484471f }; +#endif const float Stereo_dmx_s_wnd_coef_16k[L_FRAME16k >> 4] = { 0.00154133327f, 0.0138150426f, 0.0380602330f, 0.0736799166f, 0.119797014f, 0.175276011f, 0.238750681f, 0.308658302f, 0.383277327f, 0.460770488f, diff --git a/lib_enc/ivas_rom_enc.h b/lib_enc/ivas_rom_enc.h index 71942642253d85741b0dc9949c7b640114837aef..2a4f71b8b95603d94147dd42c0c02ede10b27fbb 100644 --- a/lib_enc/ivas_rom_enc.h +++ b/lib_enc/ivas_rom_enc.h @@ -120,9 +120,11 @@ extern const uint16_t ECSQ_tab_vals[ECSQ_PARAM_COUNT - 1][1 + ECSQ_TAB_VALS_SIZE * Stereo downmix to EVS ROM tables *----------------------------------------------------------------------------------*/ +#ifndef NTT_REMOVE_EPS_ROM extern const float Stereo_dmx_s_wnd_coef_eps_16k[L_FRAME16k * 3 / 4]; extern const float Stereo_dmx_s_wnd_coef_eps_32k[L_FRAME32k * 3 / 4]; extern const float Stereo_dmx_s_wnd_coef_eps_48k[L_FRAME48k * 3 / 4]; +#endif extern const float Stereo_dmx_s_wnd_coef_16k[L_FRAME16k >> 4]; extern const float Stereo_dmx_s_wnd_coef_32k[L_FRAME32k >> 4]; extern const float Stereo_dmx_s_wnd_coef_48k[L_FRAME48k >> 4]; diff --git a/lib_enc/ivas_stereo_dmx_evs.c b/lib_enc/ivas_stereo_dmx_evs.c index af3430766a0085ccd4fb4117811db5a8658b14a0..3daa28aa96d40312a93ceb7b06f9054ad0127bfb 100644 --- a/lib_enc/ivas_stereo_dmx_evs.c +++ b/lib_enc/ivas_stereo_dmx_evs.c @@ -63,6 +63,8 @@ #define STEREO_DMX_EVS_DMX_EGY_FORGETTING 0.25f #define STEREO_DMX_EVS_CORR_FORGETTING 0.78f +#define Q_BAND 0.25f + /*-----------------------------------------------------------------------* * Local function prototypes *-----------------------------------------------------------------------*/ @@ -150,26 +152,34 @@ static void calc_poc( { int16_t i, n1, n2; int16_t n0, *itdLR; - const float *c, *s; +#ifndef NTT_REMOVE_EPS_ROM + const float *c; +#endif + const float *s; float *P; float tmp1, tmp2, Lr, Li, Rr, Ri, gamma, igamma, iN; float specPOr[L_FRAME48k], specPOi[L_FRAME48k]; float tmpPOC1[L_FRAME48k], tmpPOC2[L_FRAME48k]; float rfft_buf[L_FRAME48k]; int16_t step, bias; +#ifdef NTT_REMOVE_EPS_ROM + int16_t i_for; + int16_t cos_step, cos_max; + float eps_cos, eps_sin, EPS; +#endif /* Initialization */ iN = 1.0f / (float) input_frame; +#ifndef NTT_REMOVE_EPS_ROM c = hPOC->sin + ( input_frame >> 2 ); +#endif s = hPOC->sin; P = hPOC->P; n0 = input_frame / 2; itdLR = hPOC->itdLR; igamma = STEREO_DMX_EVS_POC_GAMMA * iN; gamma = 1.0f - igamma; - set_zero( tmpPOC1, L_FRAME48k ); - set_zero( tmpPOC2, L_FRAME48k ); if ( input_frame == L_FRAME16k ) { @@ -185,6 +195,65 @@ static void calc_poc( specPOr[0] = sign( specLr[0] ) * sign( specRr[0] ) * wnd[bias]; specPOi[0] = 0.0f; +#ifdef NTT_REMOVE_EPS_ROM + EPS = hPOC->eps; + + if ( input_frame == L_FRAME16k ) + { + cos_step = 4; + cos_max = input_frame; + } + else /* for 32 kHz & 48 kHz */ + { + cos_step = 2; + cos_max = n0; + } + + for ( i = 1; i < n0 / 2; i++ ) + { + Lr = specLr[i]; + Li = specLi[i]; + Rr = specRr[i]; + Ri = specRi[i]; + i_for = i * cos_step; + eps_cos = s[cos_max - i_for] * EPS; + eps_sin = s[i_for] * EPS; + Lr += ( specRr[i] * eps_cos + specRi[i] * eps_sin ); + Li += ( -specRr[i] * eps_sin + specRi[i] * eps_cos ); + Rr += ( specLr[i] * eps_cos + specLi[i] * eps_sin ); + Ri += ( -specLr[i] * eps_sin + specLi[i] * eps_cos ); + tmp1 = wnd[i * step + bias] * gamma / ( sqrtf( ( ( Lr * Lr + Li * Li ) ) * ( ( Rr * Rr + Ri * Ri ) ) ) + EPS ); + + specPOr[i] = ( Lr * Rr + Li * Ri ) * tmp1; + specPOi[i] = ( Lr * Ri - Li * Rr ) * tmp1; + + gamma -= igamma; + } + + for ( i = n0 >> 1; i < n0; i++ ) + { + Lr = specLr[i]; + Li = specLi[i]; + Rr = specRr[i]; + Ri = specRi[i]; + + i_for = ( n0 - i ) * cos_step; + eps_cos = s[cos_max - i_for] * EPS; + eps_sin = s[i_for] * EPS; + + Lr += ( -specRr[i] * eps_cos + specRi[i] * eps_sin ); + Li += ( -specRr[i] * eps_sin - specRi[i] * eps_cos ); + Rr += ( -specLr[i] * eps_cos + specLi[i] * eps_sin ); + Ri += ( -specLr[i] * eps_sin - specLi[i] * eps_cos ); + + tmp1 = wnd[i * step + bias] * gamma / ( sqrtf( ( ( Lr * Lr + Li * Li ) ) * ( ( Rr * Rr + Ri * Ri ) ) ) + EPS ); + + specPOr[i] = ( Lr * Rr + Li * Ri ) * tmp1; + specPOi[i] = ( Lr * Ri - Li * Rr ) * tmp1; + gamma -= igamma; + } + //end NTT_REMOVE_EPS_ROM +#else for ( i = 1; i < n0; i++ ) { Lr = specLr[i]; @@ -204,6 +273,8 @@ static void calc_poc( gamma -= igamma; } +#endif //end !NTT_REMOVE_EPS_ROM + specPOr[n0] = sign( specLr[i] ) * sign( specRr[i] ) * wnd[i * step + bias] * gamma; rfft_buf[0] = specPOr[0]; @@ -305,7 +376,7 @@ static float find_poc_peak( itd_cand[0] = itd_cand[1] = 0; P = hPOC->P; - for ( i = 1; i < hPOC->shift_limit; i++ ) + for ( i = 1; i < hPOC->shift_limit; i++ ) /*find peaks of POC P[] with positive and negative ITD */ { if ( P[Lh - i] > Q[0] ) { @@ -327,7 +398,7 @@ static float find_poc_peak( cnt[n] = 0; cQ[n] = P[Lh - itd_cand[n]]; - peak_range = (int16_t) ( abs( itd_cand[n] ) + hPOC->shift_limit / STEREO_DMX_EVS_FIND_POC_PEAK_TAU ) / STEREO_DMX_EVS_FIND_POC_PEAK_TAU2; + peak_range = ( int16_t )( abs( itd_cand[n] ) + hPOC->shift_limit / STEREO_DMX_EVS_FIND_POC_PEAK_TAU ) / STEREO_DMX_EVS_FIND_POC_PEAK_TAU2; for ( i = 1; i <= peak_range; i++ ) { @@ -340,7 +411,7 @@ static float find_poc_peak( Q[n] = ( 1.0f - ( cQ[n] / ( peak_range * 2 + 1 ) + eps2 ) / ( Q[n] + eps2 ) ); Q[n] = max( Q[n], 0.0f ); - if ( on[n] ) + if ( on[n] ) /*if channel n was active (likely to be preceding) in the previous frame*/ { tmpf = ( 0.3f - 0.2f * (float) abs( itd_cand[n] ) / (float) hPOC->shift_limit ) * peakQ[n]; if ( Q[n] < tmpf ) @@ -357,7 +428,7 @@ static float find_poc_peak( peakQ[n] = max( peakQ[n], Q[n] ); } - else + else /*if channel n was not active (not likely to be preceding) in the previous frame*/ { tmpf = ( 0.75f - 0.2f * (float) abs( itd_cand[n] ) / (float) hPOC->shift_limit ); @@ -374,6 +445,7 @@ static float find_poc_peak( } } +#ifndef NTT_UPDATE_ITD_SW if ( on[0] && prev_off[0] ) { *itd = (float) itdLR[0]; @@ -386,8 +458,38 @@ static float find_poc_peak( { *itd = ( *itd > 0 ) ? (float) itdLR[0] : (float) itdLR[1]; } +#else + if ( ( on[0] && prev_off[0] ) && ( on[1] && prev_off[1] ) ) /*if both channels have newly detected as active (possibility of preceding), select channel by peakness Q[] of POC */ + { + *itd = ( Q[0] > Q[1] ) ? (float) itdLR[0] : (float) itdLR[1]; + } + else if ( ( on[0] && prev_off[0] ) && ( Q[0] > ( Q[1] - 0.1 ) ) ) /* if channel 0 becomes active, select channel 0*/ + { + *itd = (float) itdLR[0]; + } + else if ( ( on[1] && prev_off[1] ) && ( Q[1] > ( Q[0] - 0.1 ) ) ) /*if channel 1 becomes active, selsect channel 1*/ + { + *itd = (float) itdLR[1]; + } + else if ( Q[0] > ( Q[1] + Q_BAND ) ) /* if no status change, use Q[]*/ + { + *itd = (float) itdLR[0]; + } + else if ( Q[1] > ( Q[0] + Q_BAND ) ) /* if no status change, use Q[]*/ + { + *itd = (float) itdLR[1]; + } + else if ( *itd == 0.0 ) /*if no channels are likely to be preceding, follow the status of the previous frame*/ + { + *itd = 0; + } + else /*follow the status of the previous frame*/ + { + *itd = ( *itd > 0 ) ? (float) itdLR[0] : (float) itdLR[1]; + } +#endif - cconfidence = sqrtf( fabsf( Q[0] - Q[1] ) ); + cconfidence = sqrtf( fabsf( Q[0] - Q[1] ) ); /*higher value indicates higher confidence for one preceding channel*/ return hPOC->confidence = hPOC->confidence * STEREO_DMX_EVS_CORR_FORGETTING + cconfidence * ( 1.0f - STEREO_DMX_EVS_CORR_FORGETTING ); } @@ -649,7 +751,7 @@ void stereo_dmx_evs_enc( float dmx_data[L_FRAME48k]; int16_t input_frame; - input_frame = (int16_t) ( input_Fs / FRAMES_PER_SEC ); + input_frame = ( int16_t )( input_Fs / FRAMES_PER_SEC ); for ( n = 0; n < input_frame; n++ ) { @@ -696,7 +798,7 @@ ivas_error stereo_dmx_evs_init_encoder( STEREO_DMX_EVS_ENC_HANDLE hStereoDmxEVS; int16_t n, input_frame; - input_frame = (int16_t) ( input_Fs / FRAMES_PER_SEC ); + input_frame = ( int16_t )( input_Fs / FRAMES_PER_SEC ); hStereoDmxEVS = NULL; if ( ( hStereoDmxEVS = (STEREO_DMX_EVS_ENC_HANDLE) count_malloc( sizeof( STEREO_DMX_EVS_ENC_DATA ) ) ) == NULL ) @@ -738,7 +840,7 @@ ivas_error stereo_dmx_evs_init_encoder( return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for STEREO_DMX_EVS_POC_DATA\n" ) ); } - hStereoDmxEVS->hPOC->shift_limit = (int16_t) ( STEREO_DMX_EVS_SHIFT_LIMIT * input_Fs / 1000 ); + hStereoDmxEVS->hPOC->shift_limit = ( int16_t )( STEREO_DMX_EVS_SHIFT_LIMIT * input_Fs / 1000 ); for ( n = 0; n < CPE_CHANNELS; n++ ) { hStereoDmxEVS->hPOC->peakQ[n] = 0.0f; @@ -766,6 +868,24 @@ ivas_error stereo_dmx_evs_init_encoder( } hStereoDmxEVS->hPOC->eps = 2.0f * EVS_PI / ( (float) input_frame ); +#ifdef NTT_REMOVE_EPS_ROM + if ( input_frame == L_FRAME16k ) + { + hStereoDmxEVS->hPOC->sin = dft_trigo_32k; + } + else if ( input_frame == L_FRAME32k ) + { + hStereoDmxEVS->hPOC->sin = dft_trigo_32k; + } + else if ( input_frame == L_FRAME48k ) + { + hStereoDmxEVS->hPOC->sin = dft_trigo_48k; + } + else + { + return IVAS_ERROR( IVAS_ERR_INTERNAL_FATAL, "invalid frame length\n" ); + } +#else if ( input_frame == L_FRAME16k ) { hStereoDmxEVS->hPOC->sin = Stereo_dmx_s_wnd_coef_eps_16k; @@ -782,6 +902,8 @@ ivas_error stereo_dmx_evs_init_encoder( { return IVAS_ERROR( IVAS_ERR_INTERNAL_FATAL, "invalid frame length\n" ); } +#endif + hStereoDmxEVS->hPOC->confidence = 0.0f; *hStereoDmxEVS_out = hStereoDmxEVS;