Merge branch 'main' into 1326-complexity-issue-ism-4-32-kbps-fb-to-binaural-basop (d2f445be) · Commits · SA4 / Audio / IVAS BASOP

lib_com/ivas_prot_fx.h

+1 −2

Original line number	Diff line number	Diff line
		@@ -5857,8 +5857,7 @@ ivas_error ivas_compute_core_buffers_fx(
		Word16 lsp_mid_fx[M], /* i/o: LSPs in the middle of the frame */
		Word16 Q_old_inp_16k,
		Word16 Q_r[2],
		Word16 *Q_new,
		Word16 downscale_buf_speech_enc_pe );
		Word16 *Q_new );

		ivas_error ivas_enc_fx(
		Encoder_Struct st_ivas, / i/o: IVAS encoder structure */

lib_com/prot.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -761,6 +761,7 @@ void preemph_ivas_fx(
		const Word16 L, /* i : vector size Q0*/
		Word32 mem / i/o: memory (x[-1]) Qx*/
		);

		void cb_shape(
		const int16_t preemphFlag, /* i : flag for pre-emphasis */
		const int16_t pitchFlag, /* i : flag for pitch sharpening */

lib_enc/core_enc_init_fx.c

+30 −17

Original line number	Diff line number	Diff line
		@@ -1375,6 +1375,7 @@ static void init_sig_buffers_ivas_fx( Encoder_State *st, const Word16 L_frame_ol
		}

		// Copy_Scale_sig( st->old_wsp_fx, st->buf_wspeech_enc + st->L_frame + L_SUBFR - L_WSP_MEM, L_WSP_MEM, sub( st->prev_Q_new, st->prev_Q_old ) );
		Scale_sig( st->buf_wspeech_enc, L_FRAME16k + L_SUBFR + L_FRAME16k + L_NEXT_MAX_16k + 320, sub( 0, sub( st->exp_old_wsp, st->exp_buf_wspeech_enc ) ) );
		Copy( st->old_wsp_fx, st->buf_wspeech_enc + st->L_frame + L_SUBFR - L_WSP_MEM, L_WSP_MEM );
		st->exp_buf_wspeech_enc = st->exp_old_wsp;
		move16();
		@@ -1384,26 +1385,37 @@ static void init_sig_buffers_ivas_fx( Encoder_State *st, const Word16 L_frame_ol
		test();
		IF( EQ_16( st->L_frame, L_FRAME ) && !st->tcxonly )
		{
		// Copy_Scale_sig( st->old_inp_12k8_fx, st->buf_speech_enc_pe + st->L_frame - L_INP_MEM, L_INP_MEM, sub( st->prev_Q_new, st->prev_Q_old ) );
		if ( st->exp_buf_speech_enc_pe >= st->exp_old_inp_12k8 )
		{
		Copy_Scale_sig( st->old_inp_12k8_fx, st->buf_speech_enc_pe + st->L_frame - L_INP_MEM, L_INP_MEM, sub( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe ) ); // Scaling to common Q
		// st->buf_speech_enc_pe is in st->exp_buf_speech_enc_pe
		}
		else
		{
		Scale_sig( st->buf_speech_enc_pe, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k, sub( st->exp_buf_speech_enc_pe, st->exp_old_inp_12k8 ) ); // Scaling to common Q
		Copy( st->old_inp_12k8_fx, st->buf_speech_enc_pe + st->L_frame - L_INP_MEM, L_INP_MEM );
		/* SCaling to common exponent*/
		Scale_sig( st->buf_speech_enc_pe + sub( st->L_frame, L_INP_MEM ), L_INP_MEM, sub( st->exp_old_inp_12k8, s_max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe ) ) ); // Q(15-max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe ))
		Scale_sig( st->buf_speech_enc_pe, sub( st->L_frame, L_INP_MEM ), sub( st->exp_buf_speech_enc_pe, s_max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe ) ) ); // Q(15-max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe ))
		Scale_sig( st->buf_speech_enc_pe + st->L_frame, sub( L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k, st->L_frame ), sub( st->exp_buf_speech_enc_pe, s_max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe ) ) ); // Q(15-max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe ))
		st->exp_buf_speech_enc_pe = s_max( st->exp_old_inp_12k8, st->exp_buf_speech_enc_pe );
		st->exp_buf_speech_enc_pe = st->exp_old_inp_12k8;
		// st->buf_speech_enc_pe is in st->exp_buf_speech_enc_pe
		move16();
		}
		}
		ELSE IF( EQ_16( st->L_frame, L_FRAME16k ) && !st->tcxonly )
		{
		lerp( st->buf_wspeech_enc + st->L_frame + L_SUBFR - L_WSP_MEM, st->buf_wspeech_enc + st->L_frame + L_SUBFR - 310, 310, L_WSP_MEM );
		if ( st->exp_buf_speech_enc_pe >= st->exp_old_inp_16k )
		{
		Copy_Scale_sig( st->old_inp_16k_fx, st->buf_speech_enc_pe + st->L_frame - L_INP_MEM, L_INP_MEM, sub( st->exp_old_inp_16k, st->exp_buf_speech_enc_pe ) ); // Scaling to common Q
		// st->buf_speech_enc_pe is in st->exp_buf_speech_enc_pe
		}
		else
		{
		Scale_sig( st->buf_speech_enc_pe, L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k, sub( st->exp_buf_speech_enc_pe, st->exp_old_inp_16k ) ); // Scaling to common Q
		Copy( st->old_inp_16k_fx, st->buf_speech_enc_pe + st->L_frame - L_INP_MEM, L_INP_MEM );
		/* SCaling to common exponent*/
		Scale_sig( st->buf_speech_enc_pe + sub( st->L_frame, L_INP_MEM ), L_INP_MEM, sub( st->exp_old_inp_16k, s_max( st->exp_old_inp_16k, st->exp_buf_speech_enc_pe ) ) );
		Scale_sig( st->buf_speech_enc_pe, sub( st->L_frame, L_INP_MEM ), sub( st->exp_buf_speech_enc_pe, s_max( st->exp_old_inp_16k, st->exp_buf_speech_enc_pe ) ) );
		Scale_sig( st->buf_speech_enc_pe + st->L_frame, sub( L_PAST_MAX_32k + L_FRAME32k + L_NEXT_MAX_32k, st->L_frame ), sub( st->exp_buf_speech_enc_pe, s_max( st->exp_old_inp_16k, st->exp_buf_speech_enc_pe ) ) );
		st->exp_buf_speech_enc_pe = s_max( st->exp_old_inp_16k, st->exp_buf_speech_enc_pe );
		st->exp_buf_speech_enc_pe = st->exp_old_inp_16k;
		// st->buf_speech_enc_pe is in st->exp_buf_speech_enc_pe
		move16();
		}
		}

		st->mem_preemph_enc = st->buf_speech_enc[st->encoderPastSamples_enc + st->encoderLookahead_enc - 1];
		move16();
		@@ -1416,6 +1428,7 @@ static void init_sig_buffers_ivas_fx( Encoder_State *st, const Word16 L_frame_ol
		ELSE IF( !st->tcxonly && GE_32( last_total_brate, ACELP_32k ) )
		{

		Scale_sig( st->buf_wspeech_enc, L_FRAME16k + L_SUBFR + L_FRAME16k + L_NEXT_MAX_16k + 320, sub( 0, sub( st->exp_old_wsp, st->exp_buf_wspeech_enc ) ) );
		Copy( st->old_wsp_fx, st->buf_wspeech_enc + st->L_frame + L_SUBFR - L_WSP_MEM, L_WSP_MEM );
		// Copy_Scale_sig( st->old_wsp_fx, st->buf_wspeech_enc + st->L_frame + L_SUBFR - L_WSP_MEM, L_WSP_MEM, sub( st->prev_Q_new, st->prev_Q_old ) );
		st->exp_buf_wspeech_enc = st->exp_old_wsp;

lib_enc/ext_sig_ana_fx.c

+5 −5

Original line number	Diff line number	Diff line
		@@ -710,8 +710,9 @@ void core_signal_analysis_high_bitrate_ivas_fx(
		test();
		IF( st->tcxonly && NE_16( st->element_mode, IVAS_CPE_MDCT ) )
		{
		Copy( new_samples, st->new_speech_enc, L_frame ); /* Q0 */
		Scale_sig( st->new_speech_enc, L_frame, 1 ); // Q1
		Copy_Scale_sig( new_samples, st->new_speech_enc, L_frame, sub( 15, st->exp_buf_speech_enc ) );
		/* st->new_speech_enc copied from new_samples in Q st->exp_buf_speech_enc
		This is considering new_samples is in q 0 in current code*/
		}

		/--------------------------------------------------------------
		@@ -747,10 +748,9 @@ void core_signal_analysis_high_bitrate_ivas_fx(
		test();
		IF( st->tcxonly && NE_16( st->element_mode, IVAS_CPE_MDCT ) )
		{
		Copy( st->speech_enc + st->encoderLookahead_enc, st->new_speech_enc_pe, L_frame );
		Copy( st->speech_enc + st->encoderLookahead_enc, st->new_speech_enc_pe, L_frame ); // Assuming both exp_buf_speech_enc_pe and exp_buf_speech_enc are same

		Preemph_scaled( st->new_speech_enc_pe, Q_new, &( st->mem_preemph_enc ),
		st->Q_max_enc, st->preemph_fac, 1, 0, 2, L_frame, st->coder_type_raw, 1 );
		PREEMPH_FX( st->new_speech_enc_pe, st->preemph_fac, L_frame, &( st->mem_preemph_enc ) ); // using this to keep values alligned in Q-1
		}

		/* Rescale Memory */

lib_enc/ivas_core_enc_fx.c

+7 −7

Original line number	Diff line number	Diff line
		@@ -734,8 +734,8 @@ ivas_error ivas_core_enc_fx(
		Scale_sig( st->hBWE_FD->L_old_wtda_swb_fx, L_FRAME48k, shift ); // st->Q_old_wtda
		}

		Word16 q_new_swb_speech_buffer = getScaleFactor16( new_swb_speech_buffer_fx_16, input_frame );
		Scale_sig( new_swb_speech_buffer_fx_16, input_frame, q_new_swb_speech_buffer ); // Q0->q_new_swb_speech_buffer
		Word16 q_new_swb_speech_buffer = getScaleFactor16( new_swb_speech_buffer_fx_16, L_FRAME48k + STEREO_DFT_OVL_MAX );
		Scale_sig( new_swb_speech_buffer_fx_16, L_FRAME48k + STEREO_DFT_OVL_MAX, q_new_swb_speech_buffer ); // Q0->q_new_swb_speech_buffer

		/* SWB TBE encoder */
		test();
		@@ -760,10 +760,10 @@ ivas_error ivas_core_enc_fx(
		ELSE IF( EQ_16( st->extl, SWB_BWE ) \|\| EQ_16( st->extl, FB_BWE ) )
		{
		Copy_Scale_sig_32_16( shb_speech_fx32, shb_speech_fx, L_FRAME16k, -Q16 ); // Q_shb_spch - 16
		Scale_sig( new_swb_speech_fx_16, input_frame, negate( q_new_swb_speech_buffer ) ); // Q0
		Scale_sig( new_swb_speech_buffer_fx_16, L_FRAME48k + STEREO_DFT_OVL_MAX, negate( q_new_swb_speech_buffer ) ); // q_new_swb_speech_buffer -> Q0
		/* SWB(FB) BWE encoder */
		swb_bwe_enc_ivas_fx( st, last_element_mode, old_inp_12k8_fx[n], old_inp_16k_fx[n], old_syn_12k8_16k_fx[n], new_swb_speech_fx_16, shb_speech_fx, sub( Q_shb_spch, Q16 ), sub( Q_new[n], 1 ) );
		Scale_sig( new_swb_speech_fx_16, input_frame, q_new_swb_speech_buffer ); // q_new_swb_speech_buffer
		Scale_sig( new_swb_speech_buffer_fx_16, L_FRAME48k + STEREO_DFT_OVL_MAX, q_new_swb_speech_buffer ); // Q0 -> q_new_swb_speech_buffer
		}

		Scale_sig( old_syn_12k8_16k_fx[n], L_FRAME16k, sub( Q1, Q_new[n] ) ); // Q0
		@@ -793,7 +793,7 @@ ivas_error ivas_core_enc_fx(
		stereo_icBWE_preproc_fx( hCPE, input_frame, new_swb_speech_buffer_fx_16 /tmp buffer/, q_new_swb_speech_buffer );

		q_new_swb_speech_buffer = add( q_new_swb_speech_buffer, 16 );
		Copy_Scale_sig_16_32_no_sat( new_swb_speech_buffer_fx_16, new_swb_speech_buffer_fx, input_frame, Q16 ); // q_new_swb_speech_buffer
		Copy_Scale_sig_16_32_no_sat( new_swb_speech_buffer_fx_16, new_swb_speech_buffer_fx, L_FRAME48k + STEREO_DFT_OVL_MAX, Q16 ); // q_new_swb_speech_buffer - 16 - > q_new_swb_speech_buffer
		Copy_Scale_sig_16_32_no_sat( voice_factors_fx[0], voice_factors_fx32[0], NB_SUBFR16k, Q16 ); // Q31

		stereo_icBWE_enc_ivas_fx( hCPE, shb_speech_fx32, sub( Q31, Q_shb_spch ), new_swb_speech_buffer_fx, sub( Q31, q_new_swb_speech_buffer ), voice_factors_fx32[0] );