Merge branch 'basop-2404-harmonization-of-signal-classification' into 'main' (1e268e41) · Commits · SA4 / Audio / IVAS BASOP

lib_com/options.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -90,6 +90,7 @@
		#define FIX_2280_REDUCTION_UNNECESSARY_SCALING /* VA: reduction of unnecessary scaling */
		#define FIX_2280_REDUCTION_UNNECESSARY_SCALING_NONBE /* VA: reduction of unnecessary scaling, non-BE part */
		#define FIX_2403_COMBINE_PITCH_OL /* VA : basop 2403, reusing common code between EVS and IVAS in pitch_ol */
		#define FIX_2404_HARM_SIGNAL_CLAS /* VA: basop-2404, harmonize signal_clas and signa_ivas_clas */
		#define HARMONIZE_ACELP_ENC /* VA: basop issue 2400: Remove duplicated main ACELP encoder function */

		/* #################### End BE switches ################################## */

lib_enc/init_enc_fx.c

+1 −0

Original line number	Diff line number	Diff line
		@@ -557,6 +557,7 @@ ivas_error init_encoder_fx(
		#else
		speech_music_clas_init_fx( st->element_mode, st->hSpMusClas );
		#endif

		st->sp_aud_decision0 = 0;
		move16();
		st->sp_aud_decision1 = 0;

lib_enc/ivas_core_pre_proc_front_fx.c

+4 −0

Original line number	Diff line number	Diff line
		@@ -1254,7 +1254,11 @@ void pre_proc_front_ivas_fx(
		* TC frame selection
		-----------------------------------------------------------------/

		#ifndef FIX_2404_HARM_SIGNAL_CLAS
		st->clas = signal_clas_ivas_fx( st, inp_12k8_fx, ee_fx, relE_fx, L_look, tdm_SM_last_clas ); / Q0 */
		#else
		st->clas = signal_clas_fx( st, inp_12k8_fx, ee_fx, relE_fx, L_look, tdm_SM_last_clas ); / Q0 */
		#endif
		move16();

		select_TC_fx( MODE1, st->tc_cnt, &st->coder_type, st->localVAD );

lib_enc/prot_fx_enc.h

+3 −2

Original line number	Diff line number	Diff line
		@@ -511,7 +511,7 @@ Word16 signal_clas_fx(
		const Word16 L_look, /* i : look-ahead */
		Word16 uc_clas / o : temporary classification used in music/speech class*/
		);

		#ifndef FIX_2404_HARM_SIGNAL_CLAS
		/* o : classification for current frames */
		Word16 signal_clas_ivas_fx(
		Encoder_State st, / i/o: encoder state structure */
		@@ -521,6 +521,7 @@ Word16 signal_clas_ivas_fx(
		const Word16 L_look, /* i : look-ahead */
		Word16 uc_clas / o : temporary classification used in music/speech class*/
		);
		#endif

		void speech_music_classif_fx(
		Encoder_State st, / i/o: state structure */
		@@ -1127,7 +1128,7 @@ void coder_type_modif_fx(

		void speech_music_clas_init_fx(
		#ifdef FIX_2405_HARM_SMC_INIT
		const Word16 element_mode, /* element mode to differentiate IVAS only init */
		const Word16 element_mode, /* i : element mode */
		#endif
		SP_MUS_CLAS_HANDLE hSpMusClas /* i/o: speech/music classifier handle */
		);

lib_enc/sig_clas_fx.c

+74 −39

Original line number	Diff line number	Diff line
		@@ -31,9 +31,9 @@
		#define K_SNR_FX 3541 /* Q15 .1111 */
		#define C_SNR_FX -10921 /* Q15 -0.3333f */


		#define THRES_EEN 514206 /* 251.077 => (10^(1/(K_EE10))) Q11/


		/-------------------------------------------------------------------
		* signal_clas_fx()
		*
		@@ -41,7 +41,8 @@
		* TC frames selection
		-------------------------------------------------------------------/

		Word16 signal_clas_fx( /* o : classification for current frames */
		/* o : classification for current frames */
		Word16 signal_clas_fx(
		Encoder_State st, / i/o: encoder state structure */
		const Word16 speech, / i : pointer to speech signal for E computation in Qx */
		const Word32 ee, / i : lf/hf E ration for 2 half-frames in Q6 */
		@@ -55,6 +56,9 @@ Word16 signal_clas_fx( /* o : classification for current
		Word16 i, clas, pc, zc, lo, lo2, hi, hi2, exp_ee, frac_ee;
		Word16 tmp16, tmpS;
		const Word16 *pt1;
		#ifdef FIX_2404_HARM_SIGNAL_CLAS
		Word64 tmp64;
		#endif
		Flag Overflow = 0;
		move32();

		@@ -69,6 +73,31 @@ Word16 signal_clas_fx( /* o : classification for current
		Ltmp = L_mult( st->voicing_fx[1], 16384 ); /* Q15Q14->Q30 /
		mean_voi2 = mac_r( Ltmp, st->voicing_fx[2], 16384 );

		#ifdef FIX_2404_HARM_SIGNAL_CLAS
		IF( st->element_mode != EVS_MONO )
		{
		tmp64 = W_mult0_32_32( ee[0], ee[1] );
		exp_ee = W_norm( tmp64 );
		Ltmp = W_extract_h( W_shl( tmp64, exp_ee ) ); // Q = Q6+Q6 + exp_ee - 32
		exp_ee = sub( 31, sub( add( Q12, exp_ee ), 32 ) );
		IF( EQ_16( BASOP_Util_Cmp_Mant32Exp( Ltmp, exp_ee, ONE_IN_Q31, 0 ), -1 ) )
		{
		een = 0;
		move16();
		}
		ELSE
		{
		/* mean_ee2 = 0.5f * 20.0f * (float)log10( tmp ); */
		/* een = K_EE_ENC * mean_ee2 + C_EE_ENC; */
		Ltmp = BASOP_Util_Log10( Ltmp, exp_ee ); // Q25
		Ltmp = Mpy_32_32( Ltmp, 671088640 /20.f in Q25/ ); // Q25 + Q25 -Q31 = Q19 * 0.5 = Q20
		een = extract_l( L_shl( Mpy_32_16_1( Ltmp, K_EE_FX ), Q9 - Q20 ) ); // Q9
		een = s_min( s_max( een, 0 ), 512 );
		}
		}
		ELSE
		#endif
		{
		/* average spectral tilt in dB */
		lo = L_Extract_lc( ee[0], &hi );
		lo2 = L_Extract_lc( ee[1], &hi2 );
		@@ -97,6 +126,8 @@ Word16 signal_clas_fx( /* o : classification for current
		een = round_fx( L_shl( Ltmp, 16 - 5 ) ); /* Q14 -> Q9 */
		een = mac_r( C_EE_FX, een, K_EE_FX );
		}
		}

		/* compute zero crossing rate */
		pt1 = speech + sub( L_look, 1 );
		tmpS = shr( pt1, 15 ); / sets 'tmpS to -1 if pt1 < 0 /
		@@ -114,6 +145,7 @@ Word16 signal_clas_fx( /* o : classification for current
		pc = add( abs_s( sub( st->pitch[1], st->pitch[0] ) ), abs_s( sub( st->pitch[2], st->pitch[1] ) ) );
		st->tdm_pc = pc;
		move16();

		/-----------------------------------------------------------------
		* Transform parameters to the range <0:1>
		* Compute the merit function
		@@ -241,8 +273,8 @@ Word16 signal_clas_fx( /* o : classification for current
		BREAK;
		}
		}
		/* Onset classification */

		/* Onset classification */
		/* tc_cnt == -1: frame after TC frame in continuous block of GC/VC frames */
		/* tc_cnt == 0: UC frame */
		/* tc_cnt == 1: onset/transition frame, coded by GC coder type */
		@@ -266,9 +298,11 @@ Word16 signal_clas_fx( /* o : classification for current
		st->tc_cnt = -1;
		move16();
		}

		return clas;
		}

		#ifndef FIX_2404_HARM_SIGNAL_CLAS
		Word16 signal_clas_ivas_fx( /* o : classification for current frames */
		Encoder_State st, / i/o: encoder state structure */
		const Word16 speech, / i : pointer to speech signal for E computation in Qx */
		@@ -498,6 +532,7 @@ Word16 signal_clas_ivas_fx( /* o : classification for cur
		}
		return clas;
		}
		#endif

		/-------------------------------------------------------------------
		* select_TC_fx()
		@@ -519,6 +554,7 @@ void select_TC_fx(
		* Select TC coder type for appropriate frames which is in general VOICED_TRANSITION,
		* VOICED_CLAS or ONSET frames following UNVOICED_CLAS frames
		---------------------------------------------------------------------/

		test();
		IF( localVAD != 0 && GE_16( tc_cnt, 1 ) )
		{
		@@ -540,21 +576,21 @@ void select_TC_fx(
		return;
		}


		/-------------------------------------------------------------------
		* coder_type_modif_fx()
		*
		* Coder type modification
		-------------------------------------------------------------------/

		void coder_type_modif_fx(
		Encoder_State st, / i/o: encoder state structure */
		const Word16 relE /* i : frame relative E to the long term average */
		)
		{
		Word16 unmod_coder_type, vbr_generic_ho;

		SC_VBR_ENC_HANDLE hSC_VBR = st->hSC_VBR;


		IF( st->Opt_SC_VBR )
		{
		vbr_generic_ho = hSC_VBR->vbr_generic_ho;
		@@ -582,8 +618,8 @@ void coder_type_modif_fx(
		test();
		test();
		test();
		if ( ( st->element_mode == 0 && GT_32( st->total_brate, ACELP_9k60 ) && EQ_16( st->coder_type, UNVOICED ) ) \|\|
		( st->element_mode > 0 && GT_32( st->total_brate, MAX_UNVOICED_BRATE ) && EQ_16( st->coder_type, UNVOICED ) ) )
		if ( ( st->element_mode == EVS_MONO && GT_32( st->total_brate, ACELP_9k60 ) && EQ_16( st->coder_type, UNVOICED ) ) \|\|
		( st->element_mode > EVS_MONO && GT_32( st->total_brate, MAX_UNVOICED_BRATE ) && EQ_16( st->coder_type, UNVOICED ) ) )
		{
		st->coder_type = GENERIC;
		move16();
		@@ -609,8 +645,7 @@ void coder_type_modif_fx(
		test();
		test();
		test();
		if ( st->localVAD == 0 && ( (
		EQ_16( st->coder_type, UNVOICED ) && ( ( st->Opt_SC_VBR == 0 ) \|\| ( ( EQ_16( st->Opt_SC_VBR, 1 ) ) && vbr_generic_ho == 0 && GT_16( st->last_coder_type, UNVOICED ) ) ) ) \|\|
		if ( st->localVAD == 0 && ( ( EQ_16( st->coder_type, UNVOICED ) && ( ( st->Opt_SC_VBR == 0 ) \|\| ( ( EQ_16( st->Opt_SC_VBR, 1 ) ) && vbr_generic_ho == 0 && GT_16( st->last_coder_type, UNVOICED ) ) ) ) \|\|
		EQ_16( st->coder_type, TRANSITION ) \|\| EQ_16( st->coder_type, VOICED ) )

		)
		@@ -657,7 +692,7 @@ void coder_type_modif_fx(
		}
		}

		IF( st->element_mode == 0 )
		IF( st->element_mode == EVS_MONO )
		{
		/* At higher rates and with 16kHz core, allow only GC and TC coder type */
		test();