diff --git a/lib_com/delay_comp.c b/lib_com/delay_comp.c index 32f2f47303ba50bfac3567e237d6494badab266e..194b4b29194abb7f561608d4b764916684694474 100644 --- a/lib_com/delay_comp.c +++ b/lib_com/delay_comp.c @@ -55,7 +55,7 @@ int32_t get_delay( const int32_t io_fs, /* i : input/output sampling frequency */ const IVAS_FORMAT ivas_format, /* i : IVAS format */ HANDLE_CLDFB_FILTER_BANK hCldfb, /* i : Handle of Cldfb analysis */ - const int32_t binaural_latency_ns /* i : binauralization delay in ns */ + const int32_t binaural_latency_ns /* i : binaural renderer HRTF delay in ns */ ) { int32_t delay = 0; diff --git a/lib_com/ivas_cnst.h b/lib_com/ivas_cnst.h index e07ca8cbcbad57efabb8cab24956ff03ba236706..30fd4d97b023e70228dc86a87d6c7b932fce1bab 100644 --- a/lib_com/ivas_cnst.h +++ b/lib_com/ivas_cnst.h @@ -570,7 +570,13 @@ typedef enum #define NO_SYMB_GR_PRED_G 8 #define STEREO_DFT_RES_BW_MAX 66 /*Maximum number of bin for residual signal in each frame (res_cod_band_max == 6 in 48kHz)*/ + +#ifdef DFT_STEREO_SPAR_MIXING +#define SBA_DIRAC_STEREO_NUM_BANDS 12 +#else #define SBA_DIRAC_STEREO_NUM_BANDS 5 +#endif + #define SBA_DIRAC_NRG_SMOOTH_LONG 10 #define SBA_DIRAC_NRG_SMOOTH_SHORT 3 diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index c7005f97e98637702f86d99e7cd2dcf703c50ca7..5f60ea42c6925cbacfa373adc4e3fa65288b0ec7 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -1059,6 +1059,10 @@ ivas_error stereo_dft_dec_create( const int32_t element_brate, /* i : element bitrate */ const int32_t output_Fs, /* i : output sampling rate */ const int16_t sba_dirac_stereo_flag /* i : signal stereo output for SBA DirAC */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t nchan_transport +#endif ); void stereo_dft_dec_reset( @@ -1098,10 +1102,17 @@ void stereo_dft_dec_synthesize( void stereo_dft_dec( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ Decoder_State *st0, /* i/o: decoder state structure */ - float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ + float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ float *input_mem, /* i/o: mem of buffer DFT analysis */ STEREO_CNG_DEC_HANDLE hStereoCng, /* i/o: Stereo CNG data structure */ const int16_t sba_dirac_stereo_flag /* i : signal stereo output for SBA DirAC */ +#ifdef DFT_STEREO_SPAR_MIXING + , + ivas_spar_md_dec_state_t *hMdDec, /* SPAR MD handle for upmixing */ + int16_t cross_fade_start_offset, /* i: SPAR mixer delay compensation */ + int32_t output_Fs, /* i: Fs for delay calculation */ + int16_t nchan_transport /* i: number of transpor channels */ +#endif ); void stereo_dft_res_ecu( @@ -3201,6 +3212,10 @@ void ivas_sba_dirac_stereo_dec( Decoder_Struct *st_ivas, /* i/o: IVAS decoder structure */ float output[CPE_CHANNELS][L_FRAME48k], /* o : output synthesis signal */ const int16_t output_frame /* i : output frame length per channel */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t mcmasa +#endif ); void ivas_sba_dirac_stereo_config( @@ -3209,6 +3224,12 @@ void ivas_sba_dirac_stereo_config( void ivas_sba_dirac_stereo_smooth_parameters( STEREO_DFT_DEC_DATA_HANDLE hStereoDft /* i/o: encoder DFT stereo handle */ +#ifdef DFT_STEREO_SPAR_MIXING + , + ivas_spar_md_dec_state_t *hMdDec, /* i/o: SPAR MD handle for upmixing */ + int16_t cross_fade_start_offset, /* i: SPAR mixer delay compensation */ + int32_t output_Fs /* i: Fs for delay calculation */ +#endif ); ivas_error ivas_sba_get_hoa_dec_matrix( diff --git a/lib_com/options.h b/lib_com/options.h index 7bff2f901b919e2aa83759f52dd8f21c0777694d..f757b62fffc67f6489c84d4f6657455579b40203 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -141,6 +141,7 @@ #define DISABLE_ADAP_RES_COD_TMP /* temporary fix for IVAS-403, disables adaptive residual coding */ /*#define ITD_WINNER_GAIN_MODIFY */ /* ITD optimization - WORK IN PROGRESS */ /*#define FIX_I4_OL_PITCH*/ /* fix open-loop pitch used for EVS core switching */ + #define SBA_BR_SWITCHING /* Issue 114: Changes for SBA bit rate switching with reconfiguration for bitrates with same number of transport channels*/ #ifdef SBA_BR_SWITCHING #define SBA_BR_SWITCHING_RECONFIG /* Issue 114: Changes for SBA bitrate switching with reconfiguration for bitrates with different number of transport channels*/ @@ -159,6 +160,12 @@ #define FIX_310_TD_REND_DELAY /* Adding HRTF delay being read from ROM/Binary file, fix rounding for delay compensation in renderer */ #define FIX_334_DEBUG_BE_STEREO_SWITCHING /* FhG: Fix non-BE issue for stereo switching when DEBUGGING is enabled */ +#define DFT_STEREO_SPAR_MIXING +#ifdef DFT_STEREO_SPAR_MIXING +/*#define DFT_STEREO_SPAR_MIXING_DEBUG*/ /* more debugging output for DFT_STEREO_SPAR_MIXING_DEBUG */ +#define DISABLE_RES_CHANNELS_MCT /* decode only W and residual for Y when outputting to stereo */ +#endif + /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ #endif diff --git a/lib_dec/ivas_core_dec.c b/lib_dec/ivas_core_dec.c index 186a624f4d44ff157384ef3fc995f4da329c254b..3fb50b5ca63096029c8c04be9f44869fe51e57be 100644 --- a/lib_dec/ivas_core_dec.c +++ b/lib_dec/ivas_core_dec.c @@ -441,7 +441,11 @@ ivas_error ivas_core_dec( *---------------------------------------------------------------------*/ /* save synth and output in case of SBA DirAC stereo output as core switching is done outside of core decoder */ +#ifdef DFT_STEREO_SPAR_MIXING + if ( sba_dirac_stereo_flag && st->element_mode != IVAS_CPE_MDCT && !( st->core_brate == SID_2k40 && st->cng_type == FD_CNG ) ) +#else if ( sba_dirac_stereo_flag && !( st->core_brate == SID_2k40 && st->cng_type == FD_CNG ) ) +#endif { mvr2r( synth[n], hSCE->save_synth, output_frame ); } @@ -660,7 +664,11 @@ ivas_error ivas_core_dec( } } +#ifdef DFT_STEREO_SPAR_MIXING + if ( sba_dirac_stereo_flag && st->element_mode != IVAS_CPE_MDCT ) +#else if ( sba_dirac_stereo_flag ) +#endif { /* for SBA DirAC stereo output DFT Stereo core switching and updates are done in ivas_sba_dirac_stereo_dec() as hCPE is not available at this point */ break; @@ -675,13 +683,28 @@ ivas_error ivas_core_dec( if ( st->element_mode != IVAS_CPE_DFT ) { +#ifdef DFT_STEREO_SPAR_MIXING + if ( st->element_mode != IVAS_CPE_MDCT || sba_dirac_stereo_flag ) +#else if ( st->element_mode != IVAS_CPE_MDCT ) +#endif { +#ifdef DFT_STEREO_SPAR_MIXING + ivas_post_proc( hSCE, hCPE, n, synth[n], NULL, output_frame, sba_dirac_stereo_flag ); +#else ivas_post_proc( hSCE, hCPE, n, synth[n], NULL, output_frame, 0 ); +#endif } /* update OLA buffers - needed for switching to DFT stereo */ +#ifdef DFT_STEREO_SPAR_MIXING + if ( !sba_dirac_stereo_flag ) + { + stereo_td2dft_update( hCPE, n, output[n], synth[n], hb_synth[n], output_frame ); + } +#else stereo_td2dft_update( hCPE, n, output[n], synth[n], hb_synth[n], output_frame ); +#endif } else /* IVAS_CPE_DFT */ { diff --git a/lib_dec/ivas_cpe_dec.c b/lib_dec/ivas_cpe_dec.c old mode 100644 new mode 100755 index 972f1304f7515b782cd96a81ccb75f61faf1fa6a..b656f9255bd3bb13244c0d6b972696bee94f4099 --- a/lib_dec/ivas_cpe_dec.c +++ b/lib_dec/ivas_cpe_dec.c @@ -361,7 +361,11 @@ ivas_error ivas_cpe_dec( if ( hCPE->element_mode != IVAS_CPE_DFT || ( hCPE->nchan_out == 1 && hCPE->hStereoDft->hConfig->res_cod_mode == STEREO_DFT_RES_COD_OFF ) ) { +#ifdef DFT_STEREO_SPAR_MIXING + if ( ( error = ivas_core_dec( st_ivas, NULL, hCPE, st_ivas->hMCT, n_channels, output, outputHB, NULL, st_ivas->sba_dirac_stereo_flag ) ) != IVAS_ERR_OK ) +#else if ( ( error = ivas_core_dec( st_ivas, NULL, hCPE, st_ivas->hMCT, n_channels, output, outputHB, NULL, 0 ) ) != IVAS_ERR_OK ) +#endif { return error; } @@ -406,7 +410,12 @@ ivas_error ivas_cpe_dec( } else { - stereo_dft_dec( hCPE->hStereoDft, sts[0], DFT, hCPE->input_mem[1], hCPE->hStereoCng, 0 ); + stereo_dft_dec( hCPE->hStereoDft, sts[0], DFT, hCPE->input_mem[1], hCPE->hStereoCng, 0 +#ifdef DFT_STEREO_SPAR_MIXING + , + 0, 0, 0, 0 +#endif + ); } /* synthesis iFFT */ @@ -454,8 +463,12 @@ ivas_error ivas_cpe_dec( /*----------------------------------------------------------------* * Synthesis synchronization between CPE modes *----------------------------------------------------------------*/ - - synchro_synthesis( ivas_total_brate, hCPE, output, output_frame, 0 ); +#ifdef DFT_STEREO_SPAR_MIXING + if ( !st_ivas->sba_dirac_stereo_flag ) +#endif + { + synchro_synthesis( ivas_total_brate, hCPE, output, output_frame, 0 ); + } if ( hCPE->element_mode == IVAS_CPE_MDCT && hCPE->nchan_out == 1 && ( is_DTXrate( ivas_total_brate ) == 0 || ( is_DTXrate( ivas_total_brate ) == 1 && is_DTXrate( st_ivas->hDecoderConfig->last_ivas_total_brate ) == 0 ) ) ) { @@ -670,7 +683,11 @@ ivas_error create_cpe_dec( for ( n = 0; n < CPE_CHANNELS; n++ ) { - if ( st_ivas->sba_dirac_stereo_flag ) + if ( st_ivas->sba_dirac_stereo_flag +#ifdef DFT_STEREO_SPAR_MIXING + && st_ivas->nchan_transport == 1 +#endif + ) { /* for SBA DirAC stereo output CPE element is only used for upmix, core coder is found in SCE element used for core decoding */ break; @@ -702,9 +719,18 @@ ivas_error create_cpe_dec( * DFT stereo initialization *-----------------------------------------------------------------*/ +#ifdef DFT_STEREO_SPAR_MIXING + if ( hCPE->element_mode == IVAS_CPE_DFT || ( st_ivas->sba_dirac_stereo_flag && hCPE->cpe_id == 0 ) ) +#else if ( hCPE->element_mode == IVAS_CPE_DFT || st_ivas->sba_dirac_stereo_flag ) +#endif { - if ( ( error = stereo_dft_dec_create( &( hCPE->hStereoDft ), hCPE->element_brate, output_Fs, st_ivas->sba_dirac_stereo_flag ) ) != IVAS_ERR_OK ) + if ( ( error = stereo_dft_dec_create( &( hCPE->hStereoDft ), hCPE->element_brate, output_Fs, st_ivas->sba_dirac_stereo_flag +#ifdef DFT_STEREO_SPAR_MIXING + , + st_ivas->nchan_transport +#endif + ) ) != IVAS_ERR_OK ) { return error; } diff --git a/lib_dec/ivas_dec.c b/lib_dec/ivas_dec.c index e4a08b0540fbb3f8a9fd3af399fd02f415d5d54a..384baa0bb26482918c2e122394c771f8658c6294 100644 --- a/lib_dec/ivas_dec.c +++ b/lib_dec/ivas_dec.c @@ -305,7 +305,20 @@ ivas_error ivas_dec( if ( st_ivas->sba_dirac_stereo_flag ) { nchan_remapped = CPE_CHANNELS; - ivas_sba_dirac_stereo_dec( st_ivas, output, output_frame ); + +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->sba_mode == SBA_MODE_SPAR ) + { + ivas_spar_dec_gen_umx_mat( st_ivas->hSpar->hMdDec, st_ivas->nchan_transport, IVAS_MAX_NUM_BANDS, st_ivas->bfi ); + } +#endif + + ivas_sba_dirac_stereo_dec( st_ivas, output, output_frame +#ifdef DFT_STEREO_SPAR_MIXING + , + st_ivas->ivas_format == MC_FORMAT +#endif + ); } else if ( st_ivas->ivas_format == MASA_FORMAT && ivas_total_brate < MASA_STEREO_MIN_BITRATE && ( ivas_total_brate > IVAS_SID_5k2 || ( ivas_total_brate <= IVAS_SID_5k2 && st_ivas->nCPE > 0 && st_ivas->hCPE[0]->nchan_out == 1 ) ) ) { @@ -360,6 +373,9 @@ ivas_error ivas_dec( } } else /* SBA_MODE_SPAR */ +#ifdef DFT_STEREO_SPAR_MIXING + if ( !st_ivas->sba_dirac_stereo_flag ) +#endif { ivas_sba_upmixer_renderer( st_ivas, output, output_frame ); /* Note: ivas_sba_linear_renderer() or ivas_dirac_dec() are called internally */ } @@ -536,7 +552,12 @@ ivas_error ivas_dec( if ( st_ivas->sba_dirac_stereo_flag ) /* use the flag to trigger the DFT upmix */ { - ivas_sba_dirac_stereo_dec( st_ivas, output, output_frame ); + ivas_sba_dirac_stereo_dec( st_ivas, output, output_frame +#ifdef DFT_STEREO_SPAR_MIXING + , + 1 +#endif + ); } /* HP filtering */ diff --git a/lib_dec/ivas_init_dec.c b/lib_dec/ivas_init_dec.c index f8a58c64661e42ee5984b474069d000a533934e4..3acc858727161c5c1cf93c8adffe2623a121aeff 100644 --- a/lib_dec/ivas_init_dec.c +++ b/lib_dec/ivas_init_dec.c @@ -884,6 +884,9 @@ ivas_error ivas_init_decoder( ivas_dirac_config_bands( band_grouping, IVAS_MAX_NUM_BANDS, (int16_t) ( st_ivas->hDecoderConfig->output_Fs * INV_CLDFB_BANDWIDTH + 0.5f ), st_ivas->hSpar->dirac_to_spar_md_bands, st_ivas->hQMetaData->useLowerBandRes, st_ivas->hSpar->enc_param_start_band, 0 ); } +#ifdef DFT_STEREO_SPAR_MIXING + st_ivas->sba_dirac_stereo_flag = ( output_config == AUDIO_CONFIG_STEREO ); +#endif } else /* SBA_MODE_DIRAC */ { @@ -929,7 +932,11 @@ ivas_error ivas_init_decoder( } /* create CPE element for DFT Stereo like upmix */ - if ( st_ivas->sba_dirac_stereo_flag ) + if ( st_ivas->sba_dirac_stereo_flag +#ifdef DFT_STEREO_SPAR_MIXING + && st_ivas->nchan_transport == 1 +#endif + ) { if ( ( error = create_cpe_dec( st_ivas, cpe_id, ivas_total_brate / ( st_ivas->nSCE + st_ivas->nCPE ) ) ) != IVAS_ERR_OK ) { @@ -1352,7 +1359,11 @@ ivas_error ivas_init_decoder( } /* CLDFB Interpolation weights */ - if ( st_ivas->ivas_format == SBA_FORMAT && st_ivas->sba_mode == SBA_MODE_SPAR ) + if ( st_ivas->ivas_format == SBA_FORMAT && st_ivas->sba_mode == SBA_MODE_SPAR +#ifdef DFT_STEREO_SPAR_MIXING + && !st_ivas->sba_dirac_stereo_flag +#endif + ) { ivas_spar_get_cldfb_gains( st_ivas->hSpar, st_ivas->cldfbAnaDec[0], st_ivas->cldfbSynDec[0], hDecoderConfig ); } @@ -1641,7 +1652,11 @@ void ivas_destroy_dec( if ( st_ivas->hCPE[i] != NULL ) { /* set pointer to NULL as core coder already deallocated in destroy_sce_dec() */ - if ( st_ivas->sba_dirac_stereo_flag ) + if ( st_ivas->sba_dirac_stereo_flag +#ifdef DFT_STEREO_SPAR_MIXING + && st_ivas->nchan_transport == 1 +#endif + ) { st_ivas->hCPE[i]->hCoreCoder[0] = NULL; st_ivas->hCPE[i]->hCoreCoder[1] = NULL; @@ -1933,19 +1948,29 @@ void ivas_init_dec_get_num_cldfb_instances( case RENDERER_BINAURAL_FASTCONV_ROOM: if ( st_ivas->sba_mode == SBA_MODE_SPAR ) { - *numCldfbAnalyses = st_ivas->hSpar->hFbMixer->fb_cfg->num_in_chans; - - if ( st_ivas->hOutSetup.is_loudspeaker_setup && st_ivas->renderer_type == RENDERER_DIRAC ) +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->sba_dirac_stereo_flag ) { - *numCldfbSyntheses = st_ivas->hOutSetup.nchan_out_woLFE; - } - else if ( st_ivas->hDecoderConfig->output_config == AUDIO_CONFIG_FOA ) - { - *numCldfbSyntheses = st_ivas->hSpar->hFbMixer->fb_cfg->num_out_chans; + *numCldfbAnalyses = 0; + *numCldfbSyntheses = 0; } else +#endif { - *numCldfbSyntheses = MAX_OUTPUT_CHANNELS; + *numCldfbAnalyses = st_ivas->hSpar->hFbMixer->fb_cfg->num_in_chans; + + if ( st_ivas->hOutSetup.is_loudspeaker_setup && st_ivas->renderer_type == RENDERER_DIRAC ) + { + *numCldfbSyntheses = st_ivas->hOutSetup.nchan_out_woLFE; + } + else if ( st_ivas->hDecoderConfig->output_config == AUDIO_CONFIG_FOA ) + { + *numCldfbSyntheses = st_ivas->hSpar->hFbMixer->fb_cfg->num_out_chans; + } + else + { + *numCldfbSyntheses = MAX_OUTPUT_CHANNELS; + } } } else if ( st_ivas->mc_mode == MC_MODE_PARAMMC ) diff --git a/lib_dec/ivas_mct_dec.c b/lib_dec/ivas_mct_dec.c index 7b2fa2982c17d4273880e98c431ee1ed843df4d0..f05d5d1ec5845ad87f2186057d9544512589a7c6 100644 --- a/lib_dec/ivas_mct_dec.c +++ b/lib_dec/ivas_mct_dec.c @@ -166,6 +166,20 @@ ivas_error ivas_mct_dec( /* MCT core decoder */ ivas_mct_core_dec( hMCT, st_ivas->hCPE, nCPE, output ); +#ifdef DISABLE_RES_CHANNELS_MCT + /* for sba to stereo output disable any further processing for TCs > 2 as it is not needed*/ + if ( st_ivas->sba_dirac_stereo_flag ) + { + for ( cpe_id = 1; cpe_id < nCPE; cpe_id++ ) + { + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + st_ivas->hCPE[cpe_id]->hCoreCoder[n]->mct_chan_mode = MCT_CHAN_MODE_IGNORE; + } + } + } +#endif + /* MCT reconstruction and CoreCoder updates */ for ( cpe_id = 0; cpe_id < nCPE; cpe_id++ ) { @@ -225,8 +239,19 @@ ivas_error ivas_mct_dec( break; } +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->sba_dirac_stereo_flag ) + { + ivas_post_proc( NULL, hCPE, n, synth[n], NULL, output_frame, 1 ); + } +#endif + /* Postprocessing for ACELP/MDCT core switching and synchronization */ +#ifdef DFT_STEREO_SPAR_MIXING + if ( ( error = core_switching_post_dec( sts[n], synth[n], output[cpe_id * CPE_CHANNELS + n], hCPE->output_mem[1], 0, output_frame, 0 /*core_switching_flag*/, st_ivas->sba_dirac_stereo_flag, -1, hCPE->last_element_mode ) ) != IVAS_ERR_OK ) +#else if ( ( error = core_switching_post_dec( sts[n], synth[n], output[cpe_id * CPE_CHANNELS + n], hCPE->output_mem[1], 0, output_frame, 0 /*core_switching_flag*/, 0, -1, hCPE->last_element_mode ) ) != IVAS_ERR_OK ) +#endif { return error; } @@ -247,7 +272,14 @@ ivas_error ivas_mct_dec( /* synthesis synchronization between stereo modes */ +#ifdef DFT_STEREO_SPAR_MIXING + if ( !st_ivas->sba_dirac_stereo_flag ) + { + synchro_synthesis( ivas_total_brate, hCPE, output + cpe_id * CPE_CHANNELS, output_frame, 0 ); + } +#else synchro_synthesis( ivas_total_brate, hCPE, output + cpe_id * CPE_CHANNELS, output_frame, 0 ); +#endif #ifdef DEBUG_PLOT for ( n = 0; n < CPE_CHANNELS; n++ ) diff --git a/lib_dec/ivas_post_proc.c b/lib_dec/ivas_post_proc.c index 1104c9a378ce2ed25c9b0364bc3cfe40b201dc9d..849f4b79c079273b7f05e38406e3ab51e46eda3e 100644 --- a/lib_dec/ivas_post_proc.c +++ b/lib_dec/ivas_post_proc.c @@ -81,7 +81,11 @@ void ivas_post_proc( output_Fs = sts[0]->output_Fs; +#ifdef DFT_STEREO_SPAR_MIXING + if ( ( sts[n]->element_mode != IVAS_CPE_DFT && !( sba_dirac_stereo_flag && sts[n]->element_mode != IVAS_CPE_MDCT ) ) || ( sts[n]->element_mode == IVAS_CPE_DFT && hCPE->nchan_out == 1 && hCPE->hStereoDft->hConfig->res_cod_mode == STEREO_DFT_RES_COD_OFF ) ) +#else if ( ( sts[n]->element_mode != IVAS_CPE_DFT && !sba_dirac_stereo_flag ) || ( sts[n]->element_mode == IVAS_CPE_DFT && hCPE->nchan_out == 1 && hCPE->hStereoDft->hConfig->res_cod_mode == STEREO_DFT_RES_COD_OFF ) ) +#endif { if ( sts[n]->hTcxLtpDec != NULL ) { @@ -102,6 +106,13 @@ void ivas_post_proc( mvr2r( sts[n]->prev_synth_buffer, sts[n]->hTcxDec->FBTCXdelayBuf, 0 ); mvr2r( sts[n]->delay_buf_out, sts[n]->hTcxDec->FBTCXdelayBuf + 0, delay_comp ); } +#ifdef DFT_STEREO_SPAR_MIXING + else if ( sba_dirac_stereo_flag && sts[n]->element_mode == IVAS_CPE_MDCT ) + { + int16_t numZeros = (int16_t) ( NS2SA( output_Fs, N_ZERO_MDCT_NS ) ); + mvr2r( sts[n]->hHQ_core->old_out + numZeros, sts[n]->hTcxDec->FBTCXdelayBuf, delay_comp ); + } +#endif tcx_ltp_post( sts[n], hTcxLtpDec, sts[n]->core, output_frame, NS2SA( output_Fs, ACELP_LOOK_NS ) + delay_comp, synth, sts[n]->hTcxDec->FBTCXdelayBuf ); } diff --git a/lib_dec/ivas_rom_dec.c b/lib_dec/ivas_rom_dec.c index 04e3461e99a452098489dda3ac4ca6f0cdf76b26..e9e107e863b7a4cc1f65ed13dbd852e3a8021208 100644 --- a/lib_dec/ivas_rom_dec.c +++ b/lib_dec/ivas_rom_dec.c @@ -231,11 +231,31 @@ const int16_t cna_init_bands[MAX_CNA_NBANDS + 1] = 1, 4, 14, 33, 67, 171, 320 }; +#ifdef DFT_STEREO_SPAR_MIXING +const float max_smooth_gains1[SBA_DIRAC_STEREO_NUM_BANDS] = +{ + 0.98f, 0.97f, 0.95f, 0.9f, 0.9f, 0.9f, 0.9f, 0.9f, 0.9f, 0.9f, 0.9f, 0.9f +}; + +const float min_smooth_gains1[SBA_DIRAC_STEREO_NUM_BANDS] = +{ + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f +}; + +const float max_smooth_gains2[SBA_DIRAC_STEREO_NUM_BANDS] = +{ + 0.75f, 0.75f, 0.75f, 0.75f, 0.75f, 0.75f, 0.75f, 0.75f, 0.75f, 0.9f, 0.9f, 0.9f}; + +const float min_smooth_gains2[SBA_DIRAC_STEREO_NUM_BANDS] = +{ + 0.5f, 0.5f, 0.5, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.3f, 0.3f, 0.3f +}; +#else const float max_smooth_gains[SBA_DIRAC_STEREO_NUM_BANDS] = { 0.98f, 0.97f, 0.95f, 0.9f, 0.9f }; - +#endif /*------------------------------------------------------------------------- * ECLVQ Stereo ROM tables diff --git a/lib_dec/ivas_rom_dec.h b/lib_dec/ivas_rom_dec.h index 00995b1ebce442e646b983b4e7fb745954c9de9d..0f40b07c2afe8c503b8dbb4f2d00f1abc8f6057a 100644 --- a/lib_dec/ivas_rom_dec.h +++ b/lib_dec/ivas_rom_dec.h @@ -70,8 +70,15 @@ extern const float dft_win232ms_48k[450]; extern const float dft_win_8k[70]; extern const int16_t cna_init_bands[MAX_CNA_NBANDS + 1]; -extern const float max_smooth_gains[SBA_DIRAC_STEREO_NUM_BANDS]; +#ifdef DFT_STEREO_SPAR_MIXING +extern const float min_smooth_gains1[SBA_DIRAC_STEREO_NUM_BANDS]; +extern const float max_smooth_gains1[SBA_DIRAC_STEREO_NUM_BANDS]; +extern const float min_smooth_gains2[SBA_DIRAC_STEREO_NUM_BANDS]; +extern const float max_smooth_gains2[SBA_DIRAC_STEREO_NUM_BANDS]; +#else +extern const float max_smooth_gains[SBA_DIRAC_STEREO_NUM_BANDS]; +#endif /*----------------------------------------------------------------------------------* * ECLVQ Stereo ROM tables diff --git a/lib_dec/ivas_sba_dec.c b/lib_dec/ivas_sba_dec.c index d48664be32d9702a5dcabc0a72ce094c78d42063..ec6ddf039fc8f39dc40c082270d790e9fd2d7910 100644 --- a/lib_dec/ivas_sba_dec.c +++ b/lib_dec/ivas_sba_dec.c @@ -929,6 +929,10 @@ ivas_error ivas_sba_dec_reconfigure( sba_order_internal = min( st_ivas->sba_analysis_order, IVAS_MAX_SBA_ORDER ); ivas_spar_config( hDecoderConfig->ivas_total_brate, sba_order_internal, &st_ivas->nchan_transport, &st_ivas->nSCE, &st_ivas->nCPE, &st_ivas->hSpar->core_nominal_brate, st_ivas->sid_format ); +#ifdef DFT_STEREO_SPAR_MIXING + st_ivas->sba_dirac_stereo_flag = ( hDecoderConfig->output_config == AUDIO_CONFIG_STEREO ); +#endif + if ( ( error = ivas_dirac_sba_config( st_ivas->hQMetaData, &st_ivas->nchan_transport, &st_ivas->nSCE, &st_ivas->nCPE, &st_ivas->element_mode_init, hDecoderConfig->ivas_total_brate, st_ivas->sba_analysis_order, st_ivas->sba_mode, IVAS_MAX_NUM_BANDS - SPAR_DIRAC_SPLIT_START_BAND ) ) != IVAS_ERR_OK ) { return error; diff --git a/lib_dec/ivas_sba_dirac_stereo_dec.c b/lib_dec/ivas_sba_dirac_stereo_dec.c old mode 100644 new mode 100755 index fc1c445d1e4f47545c6675e3761510f8fcd6008a..bbd612b7ec7d4a6c1b7ebec80d28f91601f56fad --- a/lib_dec/ivas_sba_dirac_stereo_dec.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec.c @@ -91,6 +91,10 @@ static int16_t ivas_sba_dirac_stereo_band_config( int16_t *band_limits, /* o : DFT band limits */ const int32_t output_Fs, /* i : output sampling rate */ const int16_t NFFT /* i : analysis/synthesis window length */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t spar_flag /* i : SPAR or DirAC band grouping */ +#endif ) { int16_t i; @@ -98,6 +102,18 @@ static int16_t ivas_sba_dirac_stereo_band_config( int16_t nbands, num_cldfb_bands; nbands = SBA_DIRAC_STEREO_NUM_BANDS; + +#ifdef DFT_STEREO_SPAR_MIXING + if ( spar_flag ) + { + nbands = IVAS_MAX_NUM_BANDS; + } + else + { + nbands = 5; + } +#endif + num_cldfb_bands = (int16_t) ( output_Fs * INV_CLDFB_BANDWIDTH + 0.5f ); bins_per_cldfb_band = NFFT / ( 2 * num_cldfb_bands ); @@ -105,7 +121,23 @@ static int16_t ivas_sba_dirac_stereo_band_config( band_limits[0] = 1; for ( i = 1; i < nbands; i++ ) { +#ifdef DFT_STEREO_SPAR_MIXING + if ( spar_flag ) + { + band_limits[i] = DirAC_band_grouping_12[i] * bins_per_cldfb_band; + } + else + { + band_limits[i] = DirAC_band_grouping_5[i] * bins_per_cldfb_band; + } + if ( band_limits[i] >= NFFT / 2 ) + { + nbands = i; + break; + } +#else band_limits[i] = DirAC_band_grouping_5[i] * bins_per_cldfb_band; +#endif } band_limits[nbands] = NFFT / 2; @@ -159,32 +191,41 @@ static void map_params_dirac_to_stereo( float DFT[STEREO_DFT_BUF_MAX], /* i/o: DFT buffer */ const uint8_t b_wide_panning, /* i : flag indicating wider panning */ const int16_t L_frame /* i : core signal length */ - +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t mcmasa +#endif ) { - int16_t i, b, k, block; + int16_t i, b, k; int16_t k_offset; - int16_t nbands, nBlocks, block_len; + int16_t nbands, nBlocks; + int16_t block; + int16_t block_len; int16_t azimuth[MAX_PARAM_SPATIAL_SUBFRAMES][SBA_DIRAC_STEREO_NUM_BANDS]; int16_t elevation[MAX_PARAM_SPATIAL_SUBFRAMES][SBA_DIRAC_STEREO_NUM_BANDS]; float diffuseness[SBA_DIRAC_STEREO_NUM_BANDS]; + float block_nrg[MAX_PARAM_SPATIAL_SUBFRAMES]; + float nrg_norm1, nrg_norm2; + float *pSynth; float surrCoh[SBA_DIRAC_STEREO_NUM_BANDS]; + float *pDFT; float subframe_band_nrg[NB_DIV][SBA_DIRAC_STEREO_NUM_BANDS]; float smooth_long_avg[NB_DIV][SBA_DIRAC_STEREO_NUM_BANDS]; float smooth_short_avg[NB_DIV][SBA_DIRAC_STEREO_NUM_BANDS]; - float block_nrg[MAX_PARAM_SPATIAL_SUBFRAMES]; - float nrg_norm1, nrg_norm2; + float *side_gain, *res_pred_gain; - float *pSynth, *pDFT; IVAS_QDIRECTION *q_direction; - nBlocks = MAX_PARAM_SPATIAL_SUBFRAMES; +#ifdef DFT_STEREO_SPAR_MIXING + nbands = hStereoDft->nbands; +#else nbands = SBA_DIRAC_STEREO_NUM_BANDS; +#endif k_offset = STEREO_DFT_OFFSET; side_gain = hStereoDft->side_gain + k_offset * STEREO_DFT_BAND_MAX; res_pred_gain = hStereoDft->res_pred_gain + k_offset * STEREO_DFT_BAND_MAX; - q_direction = &( hQMetaData->q_direction[0] ); /* gain smoothing factor */ @@ -233,99 +274,109 @@ static void map_params_dirac_to_stereo( } /* apply upper bounds depending on band */ +#ifdef DFT_STEREO_SPAR_MIXING + hStereoDft->smooth_fac[0][b] = max( hStereoDft->min_smooth_gains[b], min( hStereoDft->max_smooth_gains[b], hStereoDft->smooth_fac[0][b] ) ); + hStereoDft->smooth_fac[1][b] = max( hStereoDft->min_smooth_gains[b], min( hStereoDft->max_smooth_gains[b], hStereoDft->smooth_fac[1][b] ) ); +#else hStereoDft->smooth_fac[0][b] = min( max_smooth_gains[b], hStereoDft->smooth_fac[0][b] ); hStereoDft->smooth_fac[1][b] = min( max_smooth_gains[b], hStereoDft->smooth_fac[1][b] ); +#endif } pDFT += STEREO_DFT32MS_N_MAX; } - /* calculate block energies for side gain weighting (combine angles of 2 DirAC blocks to side gain for 1 DFT Stereo subframe; 4 blocks and 2 subframes overall) */ - pSynth = synth; - block_len = L_frame / nBlocks; - for ( block = 0; block < nBlocks; block++ ) - { - block_nrg[block] = 0.f; - for ( i = 0; i < block_len; i++ ) - { - block_nrg[block] += pSynth[i] * pSynth[i]; - } - block_nrg[block] = sqrtf( block_nrg[block] ); - pSynth += block_len; - } - nrg_norm1 = 1 / ( block_nrg[0] + block_nrg[1] + EPSILON ); - nrg_norm2 = 1 / ( block_nrg[2] + block_nrg[3] + EPSILON ); - - /* extract DirAC parameters from metadata */ - for ( b = 0; b < nbands; b++ ) +#ifdef DFT_STEREO_SPAR_MIXING + if ( mcmasa ) +#endif { - diffuseness[b] = 1.0f - q_direction->band_data[b].energy_ratio[0]; - if ( hQMetaData->surcoh_band_data != NULL ) - { - surrCoh[b] = hQMetaData->surcoh_band_data[b].surround_coherence[0] / 255.0f; - } - else - { - surrCoh[b] = 0.0f; - } - + /* calculate block energies for side gain weighting (combine angles of 2 DirAC blocks to side gain for 1 DFT Stereo subframe; 4 blocks and 2 subframes overall) */ + pSynth = synth; + block_len = L_frame / nBlocks; for ( block = 0; block < nBlocks; block++ ) { - int16_t block_metadata; - - if ( hQMetaData->useLowerRes ) - { - block_metadata = 0; - } - else - { - block_metadata = block; - } - if ( q_direction->band_data[b].azimuth[block_metadata] < 0.f ) + block_nrg[block] = 0.f; + for ( i = 0; i < block_len; i++ ) { - q_direction->band_data[b].azimuth[block_metadata] += 360.f; + block_nrg[block] += pSynth[i] * pSynth[i]; } - azimuth[block][b] = (int16_t) q_direction->band_data[b].azimuth[block_metadata]; - elevation[block][b] = (int16_t) q_direction->band_data[b].elevation[block_metadata]; + block_nrg[block] = sqrtf( block_nrg[block] ); + pSynth += block_len; } - } + nrg_norm1 = 1 / ( block_nrg[0] + block_nrg[1] + EPSILON ); + nrg_norm2 = 1 / ( block_nrg[2] + block_nrg[3] + EPSILON ); - /* map angles (azi, ele), surround coherence, and diffuseness to DFT Stereo side and prediction gains */ - for ( b = 0; b < hStereoDft->nbands; b++ ) - { - /* combine angles of first 2 blocks to side gain of first subframe */ - side_gain[b] = 0.f; - for ( block = 0; block < nBlocks / 2; block++ ) + /* extract DirAC parameters from metadata */ + for ( b = 0; b < nbands; b++ ) { - if ( b_wide_panning == 1 ) + diffuseness[b] = 1.0f - q_direction->band_data[b].energy_ratio[0]; + if ( hQMetaData->surcoh_band_data != NULL ) { - /* panning between left and ride, saturate at the stereo ls positions (+/- 30deg azi) */ - side_gain[b] += nrg_norm1 * block_nrg[block] * get_panning( azimuth[block][b], elevation[block][b] ); + surrCoh[b] = hQMetaData->surcoh_band_data[b].surround_coherence[0] / 255.0f; } else { - side_gain[b] += nrg_norm1 * block_nrg[block] * sinf( azimuth[block][b] * EVS_PI / 180 ) * cosf( elevation[block][b] * EVS_PI / 180 ); + surrCoh[b] = 0.0f; + } + + for ( block = 0; block < nBlocks; block++ ) + { + int16_t block_metadata; + + if ( hQMetaData->useLowerRes ) + { + block_metadata = 0; + } + else + { + block_metadata = block; + } + if ( q_direction->band_data[b].azimuth[block_metadata] < 0.f ) + { + q_direction->band_data[b].azimuth[block_metadata] += 360.f; + } + azimuth[block][b] = (int16_t) q_direction->band_data[b].azimuth[block_metadata]; + elevation[block][b] = (int16_t) q_direction->band_data[b].elevation[block_metadata]; } } - /* combine angles of last 2 blocks to side gain of second subframe */ - side_gain[b + STEREO_DFT_BAND_MAX] = 0.f; - for ( block = nBlocks / 2; block < nBlocks; block++ ) + /* map angles (azi, ele), surround coherence, and diffuseness to DFT Stereo side and prediction gains */ + for ( b = 0; b < hStereoDft->nbands; b++ ) { - if ( b_wide_panning == 1 ) + /* combine angles of first 2 blocks to side gain of first subframe */ + side_gain[b] = 0.f; + for ( block = 0; block < nBlocks / 2; block++ ) { - /* panning between left and ride, saturate at the stereo ls positions (+/- 30deg azi) */ - side_gain[b + STEREO_DFT_BAND_MAX] += nrg_norm2 * block_nrg[block] * get_panning( azimuth[block][b], elevation[block][b] ); + if ( b_wide_panning == 1 ) + { + /* panning between left and ride, saturate at the stereo ls positions (+/- 30deg azi) */ + side_gain[b] += nrg_norm1 * block_nrg[block] * get_panning( azimuth[block][b], elevation[block][b] ); + } + else + { + side_gain[b] += nrg_norm1 * block_nrg[block] * sinf( azimuth[block][b] * EVS_PI / 180 ) * cosf( elevation[block][b] * EVS_PI / 180 ); + } } - else + + /* combine angles of last 2 blocks to side gain of second subframe */ + side_gain[b + STEREO_DFT_BAND_MAX] = 0.f; + for ( block = nBlocks / 2; block < nBlocks; block++ ) { - side_gain[b + STEREO_DFT_BAND_MAX] += nrg_norm2 * block_nrg[block] * sinf( azimuth[block][b] * EVS_PI / 180 ) * cosf( elevation[block][b] * EVS_PI / 180 ); + if ( b_wide_panning == 1 ) + { + /* panning between left and ride, saturate at the stereo ls positions (+/- 30deg azi) */ + side_gain[b + STEREO_DFT_BAND_MAX] += nrg_norm2 * block_nrg[block] * get_panning( azimuth[block][b], elevation[block][b] ); + } + else + { + side_gain[b + STEREO_DFT_BAND_MAX] += nrg_norm2 * block_nrg[block] * sinf( azimuth[block][b] * EVS_PI / 180 ) * cosf( elevation[block][b] * EVS_PI / 180 ); + } } - } - side_gain[b] *= sqrtf( 1.f - diffuseness[b] ); - side_gain[b + STEREO_DFT_BAND_MAX] *= sqrtf( 1.f - diffuseness[b] ); - res_pred_gain[b] = diffuseness[b] * ( 1.0f - surrCoh[b] ); - res_pred_gain[b + STEREO_DFT_BAND_MAX] = diffuseness[b] * ( 1.0f - surrCoh[b] ); + side_gain[b] *= sqrtf( 1.f - diffuseness[b] ); + side_gain[b + STEREO_DFT_BAND_MAX] *= sqrtf( 1.f - diffuseness[b] ); + res_pred_gain[b] = diffuseness[b] * ( 1.0f - surrCoh[b] ); + res_pred_gain[b + STEREO_DFT_BAND_MAX] = diffuseness[b] * ( 1.0f - surrCoh[b] ); + } } hStereoDft->frame_nodata = 0; @@ -424,22 +475,66 @@ static void ivas_sba_dirac_stereo_upmix_hb( float hb_synth[L_FRAME48k], /* i : HB signal */ float hb_gain[NB_DIV], /* i : side gains for HB signal */ const int16_t output_frame /* i : output frame length per channel */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t mcmasa, + const STEREO_DFT_DEC_DATA_HANDLE hStereoDft /* i : Stereo DFT handle for mixing matrix */ +#endif ) { int16_t i; - for ( i = 0; i < output_frame / 2; i++ ) +#ifdef DFT_STEREO_SPAR_MIXING + if ( !mcmasa ) { - hb_stereo_synth[0][i] = 0.5f * hb_synth[i] + 0.5f * hb_gain[0] * hb_synth[i]; - hb_stereo_synth[1][i] = 0.5f * hb_synth[i] - 0.5f * hb_gain[0] * hb_synth[i]; + for ( i = 0; i < output_frame / 2; i++ ) + { + float gp = hStereoDft->mixer_mat_smooth[0][0][8] + hStereoDft->mixer_mat_smooth[1][0][8] + + hStereoDft->mixer_mat_smooth[0][0][9] + hStereoDft->mixer_mat_smooth[1][0][9] + + hStereoDft->mixer_mat_smooth[0][0][10] + hStereoDft->mixer_mat_smooth[1][0][10] + + hStereoDft->mixer_mat_smooth[0][0][11] + hStereoDft->mixer_mat_smooth[1][0][11]; + + float gm = hStereoDft->mixer_mat_smooth[0][0][8] - hStereoDft->mixer_mat_smooth[1][0][8] + + hStereoDft->mixer_mat_smooth[0][0][9] - hStereoDft->mixer_mat_smooth[1][0][9] + + hStereoDft->mixer_mat_smooth[0][0][10] - hStereoDft->mixer_mat_smooth[1][0][10] + + hStereoDft->mixer_mat_smooth[0][0][11] - hStereoDft->mixer_mat_smooth[1][0][11]; + + hb_stereo_synth[0][i] = 0.5f * hb_synth[i] * 0.25f * gp; + hb_stereo_synth[1][i] = 0.5f * hb_synth[i] * 0.25f * gm; + } + for ( i = output_frame / 2; i < output_frame; i++ ) + { + float gp = hStereoDft->mixer_mat_smooth[0][0][8 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][0][8 + IVAS_MAX_NUM_BANDS] + + hStereoDft->mixer_mat_smooth[0][0][9 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][0][9 + IVAS_MAX_NUM_BANDS] + + hStereoDft->mixer_mat_smooth[0][0][10 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][0][10 + IVAS_MAX_NUM_BANDS] + + hStereoDft->mixer_mat_smooth[0][0][11 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][0][11 + IVAS_MAX_NUM_BANDS]; + + float gm = hStereoDft->mixer_mat_smooth[0][0][8 + IVAS_MAX_NUM_BANDS] - hStereoDft->mixer_mat_smooth[1][0][8 + IVAS_MAX_NUM_BANDS] + + hStereoDft->mixer_mat_smooth[0][0][9 + IVAS_MAX_NUM_BANDS] - hStereoDft->mixer_mat_smooth[1][0][9 + IVAS_MAX_NUM_BANDS] + + hStereoDft->mixer_mat_smooth[0][0][10 + IVAS_MAX_NUM_BANDS] - hStereoDft->mixer_mat_smooth[1][0][10 + IVAS_MAX_NUM_BANDS] + + hStereoDft->mixer_mat_smooth[0][0][11 + IVAS_MAX_NUM_BANDS] - hStereoDft->mixer_mat_smooth[1][0][11 + IVAS_MAX_NUM_BANDS]; + + hb_stereo_synth[0][i] = 0.5f * hb_synth[i] * 0.25f * gp; + hb_stereo_synth[1][i] = 0.5f * hb_synth[i] * 0.25f * gm; + } } - - for ( i = output_frame / 2; i < output_frame; i++ ) + else +#endif { - hb_stereo_synth[0][i] = 0.5f * hb_synth[i] + 0.5f * hb_gain[1] * hb_synth[i]; - hb_stereo_synth[1][i] = 0.5f * hb_synth[i] - 0.5f * hb_gain[1] * hb_synth[i]; + for ( i = 0; i < output_frame / 2; i++ ) + { + hb_stereo_synth[0][i] = 0.5f * hb_synth[i] + 0.5f * hb_gain[0] * hb_synth[i]; + hb_stereo_synth[1][i] = 0.5f * hb_synth[i] - 0.5f * hb_gain[0] * hb_synth[i]; + } + + for ( i = output_frame / 2; i < output_frame; i++ ) + { + hb_stereo_synth[0][i] = 0.5f * hb_synth[i] + 0.5f * hb_gain[1] * hb_synth[i]; + hb_stereo_synth[1][i] = 0.5f * hb_synth[i] - 0.5f * hb_gain[1] * hb_synth[i]; + } } + return; } @@ -454,6 +549,10 @@ static void ivas_sba_dirac_stereo_apply_td_stefi( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ float output[CPE_CHANNELS][L_FRAME48k], /* i/o: output synthesis signal */ const int16_t output_frame /* i : output frame length per channel */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t spar_flag +#endif ) { int16_t i; @@ -462,7 +561,71 @@ static void ivas_sba_dirac_stereo_apply_td_stefi( float tmp; const float *win_dft; - if ( max( hStereoDft->td_gain[0], hStereoDft->td_gain[1] ) > 0 ) +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + + static FILE *f_stefi = 0; + + if ( f_stefi == 0 ) + { + f_stefi = fopen( "stefi.txt", "w" ); + } + +#endif + +#ifdef DFT_STEREO_SPAR_MIXING + if ( spar_flag ) + { + win_dft = hStereoDft->win32ms; + dftOvlLen = hStereoDft->dft32ms_ovl; + + float g_W_1, g_Y_1; + float g_W_2, g_Y_2; + float g_L, g_R; + float stefi_L, stefi_R; + + g_W_1 = ( hStereoDft->mixer_mat_smooth[0][1][8] + hStereoDft->mixer_mat_smooth[0][2][8] + hStereoDft->mixer_mat_smooth[0][3][8] ) + ( hStereoDft->mixer_mat_smooth[0][1][9] + hStereoDft->mixer_mat_smooth[0][2][9] + hStereoDft->mixer_mat_smooth[0][3][9] ) + ( hStereoDft->mixer_mat_smooth[0][1][10] + hStereoDft->mixer_mat_smooth[0][2][10] + hStereoDft->mixer_mat_smooth[0][3][10] ); + + g_Y_1 = ( hStereoDft->mixer_mat_smooth[1][1][8] + hStereoDft->mixer_mat_smooth[1][2][8] + hStereoDft->mixer_mat_smooth[1][3][8] ) + ( hStereoDft->mixer_mat_smooth[1][1][9] + hStereoDft->mixer_mat_smooth[1][2][9] + hStereoDft->mixer_mat_smooth[1][3][9] ) + ( hStereoDft->mixer_mat_smooth[1][1][10] + hStereoDft->mixer_mat_smooth[1][2][10] + hStereoDft->mixer_mat_smooth[1][3][10] ); + + g_W_2 = ( hStereoDft->mixer_mat_smooth[0][1][8 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][2][8 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][3][8 + IVAS_MAX_NUM_BANDS] ) + ( hStereoDft->mixer_mat_smooth[0][1][9 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][2][9 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][3][9 + IVAS_MAX_NUM_BANDS] ) + ( hStereoDft->mixer_mat_smooth[0][1][10 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][2][10 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][3][10 + IVAS_MAX_NUM_BANDS] ); + + g_Y_2 = ( hStereoDft->mixer_mat_smooth[1][1][8 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][2][8 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][3][8 + IVAS_MAX_NUM_BANDS] ) + ( hStereoDft->mixer_mat_smooth[1][1][9 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][2][9 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][3][9 + IVAS_MAX_NUM_BANDS] ) + ( hStereoDft->mixer_mat_smooth[1][1][10 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][2][10 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][3][10 + IVAS_MAX_NUM_BANDS] ); + + g_L = 0.16f * ( g_W_1 + g_W_2 - g_Y_1 - g_Y_2 ); + g_R = 0.16f * ( g_W_1 + g_W_2 + g_Y_1 + g_Y_2 ); + + for ( i = 0; i < dftOvlLen; i++ ) + { + win_in = win_dft[STEREO_DFT32MS_STEP * i] * win_dft[STEREO_DFT32MS_STEP * i]; + win_out = 1 - win_in; + + stefi_L = ( win_out * hStereoDft->g_L_prev + win_in * g_L ) * 0.5f * hStereoDft->hb_stefi_sig[i]; + stefi_R = ( win_out * hStereoDft->g_R_prev + win_in * g_R ) * 0.5f * hStereoDft->hb_stefi_sig[i]; + + output[0][i] += stefi_L; + output[1][i] += stefi_R; +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + fprintf( f_stefi, "%f %f\n", stefi_L, stefi_R ); +#endif + } + for ( i = dftOvlLen; i < output_frame; i++ ) + { + + stefi_L = g_L * 0.5f * hStereoDft->hb_stefi_sig[i]; + stefi_R = g_R * 0.5f * hStereoDft->hb_stefi_sig[i]; + + output[0][i] += stefi_L; + output[1][i] += stefi_R; +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + fprintf( f_stefi, "%f %f\n", stefi_L, stefi_R ); +#endif + } + hStereoDft->g_L_prev = g_L; + hStereoDft->g_R_prev = g_R; + } + else +#endif + if ( max( hStereoDft->td_gain[0], hStereoDft->td_gain[1] ) > 0 ) { win_dft = hStereoDft->win32ms; dftOvlLen = hStereoDft->dft32ms_ovl; @@ -475,12 +638,18 @@ static void ivas_sba_dirac_stereo_apply_td_stefi( output[0][i] += tmp; output[1][i] -= tmp; +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + fprintf( f_stefi, "%f %f\n", +tmp, -tmp ); +#endif } for ( i = dftOvlLen; i < output_frame; i++ ) { tmp = hStereoDft->td_gain[0] * 0.5f * hStereoDft->hb_stefi_sig[i]; output[0][i] += tmp; output[1][i] -= tmp; +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + fprintf( f_stefi, "%f %f\n", +tmp, -tmp ); +#endif } } @@ -496,35 +665,168 @@ static void ivas_sba_dirac_stereo_apply_td_stefi( void ivas_sba_dirac_stereo_smooth_parameters( STEREO_DFT_DEC_DATA_HANDLE hStereoDft /* i/o: decoder DFT stereo handle */ +#ifdef DFT_STEREO_SPAR_MIXING + , + ivas_spar_md_dec_state_t *hMdDec, /* i/o: SPAR MD handle for upmixing */ + int16_t cross_fade_start_offset, /* i: SPAR mixer delay compensation */ + int32_t output_Fs /* i: Fs for delay calculation */ +#endif ) { - int16_t k_offset, b; + +#ifdef DFT_STEREO_SPAR_MIXING + int16_t i, j, k, i_sf; +#endif + + int16_t b; + int16_t k_offset; float *side_gain, *prev_side_gain; float *res_pred_gain, *prev_res_pred_gain; k_offset = STEREO_DFT_OFFSET; - prev_side_gain = hStereoDft->side_gain; side_gain = hStereoDft->side_gain + k_offset * STEREO_DFT_BAND_MAX; prev_res_pred_gain = hStereoDft->res_pred_gain; res_pred_gain = hStereoDft->res_pred_gain + k_offset * STEREO_DFT_BAND_MAX; - /* Smoothing of side and prediction gains between ftrames */ - for ( b = hStereoDft->res_pred_band_min; b < hStereoDft->nbands; b++ ) +#ifdef DFT_STEREO_SPAR_MIXING + if ( !hMdDec ) +#endif { - if ( hStereoDft->attackPresent ) + /* Smoothing of side and prediction gains between ftrames */ + for ( b = hStereoDft->res_pred_band_min; b < hStereoDft->nbands; b++ ) { - res_pred_gain[b] *= 0.8f; - res_pred_gain[b + STEREO_DFT_BAND_MAX] *= 0.8f; + if ( hStereoDft->attackPresent ) + { + res_pred_gain[b] *= 0.8f; + res_pred_gain[b + STEREO_DFT_BAND_MAX] *= 0.8f; + } + else + { + side_gain[b] = hStereoDft->smooth_fac[0][b] * prev_side_gain[b] + ( 1.f - hStereoDft->smooth_fac[0][b] ) * side_gain[b]; + side_gain[b + STEREO_DFT_BAND_MAX] = hStereoDft->smooth_fac[1][b] * side_gain[b] + ( 1.f - hStereoDft->smooth_fac[1][b] ) * side_gain[b + STEREO_DFT_BAND_MAX]; + res_pred_gain[b] = hStereoDft->smooth_fac[0][b] * prev_res_pred_gain[b] + ( 1.f - hStereoDft->smooth_fac[0][b] ) * res_pred_gain[b]; + res_pred_gain[b + STEREO_DFT_BAND_MAX] = hStereoDft->smooth_fac[1][b] * res_pred_gain[b] + ( 1.f - hStereoDft->smooth_fac[1][b] ) * res_pred_gain[b + STEREO_DFT_BAND_MAX]; + } } - else + } + +#ifdef DFT_STEREO_SPAR_MIXING + if ( hMdDec != 0 ) + { + float xfade_start_ns; + int16_t xfade_delay_subframes; + int16_t i_hist; + +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + static FILE *f_smoothing = 0; + if ( f_smoothing == 0 ) { - side_gain[b] = hStereoDft->smooth_fac[0][b] * prev_side_gain[b] + ( 1.f - hStereoDft->smooth_fac[0][b] ) * side_gain[b]; - side_gain[b + STEREO_DFT_BAND_MAX] = hStereoDft->smooth_fac[1][b] * side_gain[b] + ( 1.f - hStereoDft->smooth_fac[1][b] ) * side_gain[b + STEREO_DFT_BAND_MAX]; - res_pred_gain[b] = hStereoDft->smooth_fac[0][b] * prev_res_pred_gain[b] + ( 1.f - hStereoDft->smooth_fac[0][b] ) * res_pred_gain[b]; - res_pred_gain[b + STEREO_DFT_BAND_MAX] = hStereoDft->smooth_fac[1][b] * res_pred_gain[b] + ( 1.f - hStereoDft->smooth_fac[1][b] ) * res_pred_gain[b + STEREO_DFT_BAND_MAX]; + f_smoothing = fopen( "stereo_param_smoothing.txt", "w" ); } - } +#endif + + xfade_start_ns = cross_fade_start_offset / (float) output_Fs * 1000000000.f - IVAS_FB_ENC_DELAY_NS; + xfade_delay_subframes = (int16_t) ( xfade_start_ns / ( FRAME_SIZE_NS / MAX_PARAM_SPATIAL_SUBFRAMES ) ); + + i_hist = 4 - xfade_delay_subframes; + + for ( k = 0; k < 2; k++ ) + { + for ( i_sf = k * 2; i_sf < ( k + 1 ) * 2; i_sf++ ) + { + if ( hStereoDft->first_frame ) + { + for ( i = 0; i < 2; i++ ) + { + for ( j = 0; j < 4; j++ ) + { + for ( b = 0; b < hStereoDft->nbands; b++ ) + { + hStereoDft->mixer_mat_smooth[i][j][b + k * IVAS_MAX_NUM_BANDS] = hMdDec->mixer_mat[i][j][b]; + } + for ( ; b < IVAS_MAX_NUM_BANDS; b++ ) + { + hStereoDft->mixer_mat_smooth[i][j][b + k * IVAS_MAX_NUM_BANDS] = 0.f; + } + } + } + } + else + { + for ( i = 0; i < 2; i++ ) + { + for ( j = 0; j < 4; j++ ) + { + for ( b = 0; b < hStereoDft->nbands; b++ ) + { + float beta = hStereoDft->smooth_fac[k][b]; + hStereoDft->mixer_mat_smooth[i][j][b + k * IVAS_MAX_NUM_BANDS] = + beta * hStereoDft->mixer_mat_smooth[i][j][b + k * IVAS_MAX_NUM_BANDS] + ( 1 - beta ) * hMdDec->mixer_mat_prev[i_hist][i][j][b]; + +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + if ( i == 0 && j == 0 ) + { + fprintf( f_smoothing, "%d %f\n", b, beta ); + } +#endif + } + } + } + } // first_frame + + mvr2r( hMdDec->mixer_mat_prev[1][0][0], hMdDec->mixer_mat_prev[0][0][0], IVAS_MAX_FB_MIXER_OUT_CH * IVAS_MAX_SPAR_FB_MIXER_IN_CH * IVAS_MAX_NUM_BANDS ); + mvr2r( hMdDec->mixer_mat_prev[2][0][0], hMdDec->mixer_mat_prev[1][0][0], IVAS_MAX_FB_MIXER_OUT_CH * IVAS_MAX_SPAR_FB_MIXER_IN_CH * IVAS_MAX_NUM_BANDS ); + mvr2r( hMdDec->mixer_mat_prev[3][0][0], hMdDec->mixer_mat_prev[2][0][0], IVAS_MAX_FB_MIXER_OUT_CH * IVAS_MAX_SPAR_FB_MIXER_IN_CH * IVAS_MAX_NUM_BANDS ); + mvr2r( hMdDec->mixer_mat_prev[4][0][0], hMdDec->mixer_mat_prev[3][0][0], IVAS_MAX_FB_MIXER_OUT_CH * IVAS_MAX_SPAR_FB_MIXER_IN_CH * IVAS_MAX_NUM_BANDS ); + + for ( i = 0; i < 2; i++ ) + { + for ( j = 0; j < 4; j++ ) + { + for ( b = 0; b < hStereoDft->nbands; b++ ) + { + hMdDec->mixer_mat_prev[4][i][j][b] = hMdDec->mixer_mat[i][j][b + i_sf * IVAS_MAX_NUM_BANDS]; + } + } + } + } // i_sf + } // k ( DFT block ) + hStereoDft->first_frame = 0; + +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + { + static FILE *f_mat = 0; + + if ( f_mat == 0 ) + f_mat = fopen( "mixer_mat_stereo_smooth", "w" ); + + for ( i = 0; i < 2; i++ ) + { + for ( j = 0; j < 4; j++ ) + { + for ( b = 0; b < 12; b++ ) + { + fprintf( f_mat, "%f\n", hStereoDft->mixer_mat_smooth[i][j][b] ); + } + } + } + + for ( i = 0; i < 2; i++ ) + { + for ( j = 0; j < 4; j++ ) + { + for ( b = 0; b < 12; b++ ) + { + fprintf( f_mat, "%f\n", hStereoDft->mixer_mat_smooth[i][j][b + IVAS_MAX_NUM_BANDS] ); + } + } + } + } // debug output +#endif + + } // hMdDec != 0 +#endif return; } @@ -540,6 +842,10 @@ void ivas_sba_dirac_stereo_dec( Decoder_Struct *st_ivas, /* i/o: IVAS decoder structure */ float output[CPE_CHANNELS][L_FRAME48k], /* i/o: output synthesis signal */ const int16_t output_frame /* i : output frame length per channel */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t mcmasa +#endif ) { int16_t dtx_flag, fd_cng_flag; @@ -556,30 +862,86 @@ void ivas_sba_dirac_stereo_dec( hSCE = st_ivas->hSCE[0]; hCPE = st_ivas->hCPE[0]; hStereoDft = hCPE->hStereoDft; - dtx_flag = ( hSCE->hCoreCoder[0]->core_brate <= SID_2k40 ); - fd_cng_flag = ( dtx_flag && hSCE->hCoreCoder[0]->cng_type == FD_CNG ); +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->nchan_transport > 1 && !mcmasa ) + { + dtx_flag = 0; + fd_cng_flag = 0; + } + else +#endif + { + dtx_flag = ( hSCE->hCoreCoder[0]->core_brate <= SID_2k40 ); + fd_cng_flag = ( dtx_flag && hSCE->hCoreCoder[0]->cng_type == FD_CNG ); + } + memOffset = NS2SA( output_frame * FRAMES_PER_SEC, IVAS_DEC_DELAY_NS - DELAY_BWE_TOTAL_NS ); ivas_sba_dirac_stereo_config( hStereoDft->hConfig ); - hStereoDft->nbands = ivas_sba_dirac_stereo_band_config( hStereoDft->band_limits, st_ivas->hDecoderConfig->output_Fs, hStereoDft->NFFT ); + hStereoDft->nbands = ivas_sba_dirac_stereo_band_config( + hStereoDft->band_limits, + st_ivas->hDecoderConfig->output_Fs, + hStereoDft->NFFT +#ifdef DFT_STEREO_SPAR_MIXING + , + ( st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa ) +#endif + ); stereo_dft_dec_update( hStereoDft, output_frame, 1 /*st_ivas->sba_dirac_stereo_flag*/ ); +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->nchan_transport > 1 ) + { + stereo_dft_dec_analyze( hCPE, output[0], DFT, 0, output_frame, output_frame, DFT_STEREO_DEC_ANA_FB, 0, 0 ); + stereo_dft_dec_analyze( hCPE, output[1], DFT, 1, output_frame, output_frame, DFT_STEREO_DEC_ANA_FB, 0, 0 ); + hStereoDft->core_hist[0] = hCPE->hCoreCoder[0]->core; + } + else +#endif + { + /* nrg calculation for TD Stereo Filling, as done in ICBWE which is not used in this case */ + ivas_sba_dirac_stereo_compute_td_stefi_nrgs( hStereoDft, hSCE->save_hb_synth, hSCE->hCoreCoder[0]->core, output_frame, fd_cng_flag ); - /* nrg calculation for TD Stereo Filling, as done in ICBWE which is not used in this case */ - ivas_sba_dirac_stereo_compute_td_stefi_nrgs( hStereoDft, hSCE->save_hb_synth, hSCE->hCoreCoder[0]->core, output_frame, fd_cng_flag ); - - /* do DFT Stereo core switching (including DFT analysis) here as CPE element was not available in SCE decoder */ - mvr2r( hSCE->save_synth, tmp_synth, hSCE->hCoreCoder[0]->L_frame ); - stereo_dft_dec_core_switching( hCPE, output[0] /*hSCE->save_output*/, hSCE->save_synth, hSCE->save_hb_synth, DFT, output_frame, 0, dtx_flag ); + /* do DFT Stereo core switching (including DFT analysis) here as CPE element was not available in SCE decoder */ + mvr2r( hSCE->save_synth, tmp_synth, hSCE->hCoreCoder[0]->L_frame ); + stereo_dft_dec_core_switching( hCPE, output[0] /*hSCE->save_output*/, hSCE->save_synth, hSCE->save_hb_synth, DFT, output_frame, 0, dtx_flag ); - /* do updates here after skipping this in SCE decoder (needs to be done after core switching) */ - updt_dec_common( hSCE->hCoreCoder[0], NORMAL_HQ_CORE, -1, hSCE->save_synth ); + /* do updates here after skipping this in SCE decoder (needs to be done after core switching) */ + updt_dec_common( hSCE->hCoreCoder[0], NORMAL_HQ_CORE, -1, hSCE->save_synth ); + } /* mapping of DirAC parameters (azimuth, elevation, diffuseness) to DFT Stereo parameters (side gain, prediction gain) */ - map_params_dirac_to_stereo( hStereoDft, st_ivas->hQMetaData, tmp_synth, DFT[0], st_ivas->ivas_format == MC_FORMAT, hSCE->hCoreCoder[0]->L_frame ); + map_params_dirac_to_stereo( + hStereoDft, + st_ivas->hQMetaData, + tmp_synth, + DFT[0], + st_ivas->ivas_format == MC_FORMAT, +#ifdef DFT_STEREO_SPAR_MIXING + ( st_ivas->sba_mode != SBA_MODE_SPAR || mcmasa ) ? hSCE->hCoreCoder[0]->L_frame : output_frame, + ( st_ivas->sba_mode != SBA_MODE_SPAR || mcmasa ) +#else + hSCE->hCoreCoder[0]->L_frame +#endif + ); + +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa ) + { + set_f( hStereoDft->res_pred_gain, 1.f, 3 * STEREO_DFT_BAND_MAX ); + } +#endif /* DFT Stereo upmix */ - stereo_dft_dec( hStereoDft, hCPE->hCoreCoder[0], DFT, NULL, NULL, 1 /*st_ivas->sba_dirac_stereo_flag*/ ); + stereo_dft_dec( hStereoDft, hCPE->hCoreCoder[0], DFT, NULL, NULL, 1 /*st_ivas->sba_dirac_stereo_flag*/ +#ifdef DFT_STEREO_SPAR_MIXING + , + ( st_ivas->hSpar != NULL && !mcmasa ) ? st_ivas->hSpar->hMdDec : 0, + ( st_ivas->hSpar != NULL && !mcmasa ) ? st_ivas->hSpar->hFbMixer->cross_fade_start_offset : 0, + st_ivas->hDecoderConfig->output_Fs, + st_ivas->nchan_transport +#endif + ); /* DFT synthesis */ stereo_dft_dec_synthesize( hCPE, DFT, 0, output[0], output_frame ); @@ -592,23 +954,49 @@ void ivas_sba_dirac_stereo_dec( v_multc( output[1], 0.5f, output[1], output_frame ); /* delay HB synth */ - mvr2r( hSCE->save_hb_synth + output_frame - memOffset, tmp_buf, memOffset ); - mvr2r( hSCE->save_hb_synth, hSCE->save_hb_synth + memOffset, output_frame - memOffset ); - mvr2r( hSCE->prev_hb_synth, hSCE->save_hb_synth, memOffset ); - mvr2r( tmp_buf, hSCE->prev_hb_synth, memOffset ); +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->nchan_transport == 1 ) +#endif + { + mvr2r( hSCE->save_hb_synth + output_frame - memOffset, tmp_buf, memOffset ); + mvr2r( hSCE->save_hb_synth, hSCE->save_hb_synth + memOffset, output_frame - memOffset ); + mvr2r( hSCE->prev_hb_synth, hSCE->save_hb_synth, memOffset ); + mvr2r( tmp_buf, hSCE->prev_hb_synth, memOffset ); + } if ( ( hCPE->hCoreCoder[0]->core == ACELP_CORE || hCPE->hCoreCoder[0]->last_core == ACELP_CORE ) && !fd_cng_flag ) { /* upmix ACELP BWE */ ivas_sba_dirac_stereo_compute_hb_gain( hStereoDft, hb_gain ); - ivas_sba_dirac_stereo_upmix_hb( hb_synth_stereo, hSCE->save_hb_synth, hb_gain, output_frame ); + +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->nchan_transport == 1 ) +#endif + { + ivas_sba_dirac_stereo_upmix_hb( + hb_synth_stereo, + hSCE->save_hb_synth, + hb_gain, + output_frame +#ifdef DFT_STEREO_SPAR_MIXING + , + ( st_ivas->sba_mode != SBA_MODE_SPAR || mcmasa ), + hStereoDft +#endif + ); + } /* add HB to ACELP core */ v_add( output[0], hb_synth_stereo[0], output[0], output_frame ); v_add( output[1], hb_synth_stereo[1], output[1], output_frame ); /* apply TD Stereo Filling as is done in ICBWE */ - ivas_sba_dirac_stereo_apply_td_stefi( hStereoDft, output, output_frame ); + ivas_sba_dirac_stereo_apply_td_stefi( hStereoDft, output, output_frame +#ifdef DFT_STEREO_SPAR_MIXING + , + ( st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa ) +#endif + ); } return; diff --git a/lib_dec/ivas_spar_md_dec.c b/lib_dec/ivas_spar_md_dec.c index a9df2ff34b8120cb873a6402356a7b5043efcf61..cd14d7e132fe92447b0da9d48347f914422e06e6 100644 --- a/lib_dec/ivas_spar_md_dec.c +++ b/lib_dec/ivas_spar_md_dec.c @@ -631,7 +631,7 @@ void ivas_spar_md_dec_process( ivas_spar_dec_parse_md_bs( hMdDec, st0, &nB, &bw, &dtx_vad, st_ivas->hDecoderConfig->ivas_total_brate, ivas_spar_br_table_consts[hMdDec->table_idx].usePlanarCoeff, st_ivas->hQMetaData->sba_inactive_mode ); -#ifdef DEBUG_SBA_MD_DUMP +#if 0 { char f_name[100]; int16_t num_bands = nB; diff --git a/lib_dec/ivas_stat_dec.h b/lib_dec/ivas_stat_dec.h old mode 100644 new mode 100755 index dab905865d922a5663f0a4637d63f538069020ae..907ad1b04956749d535137efdc3888eea9f7012f --- a/lib_dec/ivas_stat_dec.h +++ b/lib_dec/ivas_stat_dec.h @@ -230,6 +230,14 @@ typedef struct stereo_dft_dec_data_struct float smooth_buf[SBA_DIRAC_STEREO_NUM_BANDS][SBA_DIRAC_NRG_SMOOTH_LONG + 1]; float smooth_fac[NB_DIV][SBA_DIRAC_STEREO_NUM_BANDS]; +#ifdef DFT_STEREO_SPAR_MIXING + int16_t first_frame; + float mixer_mat_smooth[2][4][2 * IVAS_MAX_NUM_BANDS]; + float g_L_prev; + float g_R_prev; + const float *max_smooth_gains, *min_smooth_gains; +#endif + } STEREO_DFT_DEC_DATA, *STEREO_DFT_DEC_DATA_HANDLE; diff --git a/lib_dec/ivas_stereo_dft_dec.c b/lib_dec/ivas_stereo_dft_dec.c old mode 100644 new mode 100755 index e2eab3f3b294c753957672e4034fa8bc3c66217c..3be3c59428fde723477569fd99f1dde3dfcd737d --- a/lib_dec/ivas_stereo_dft_dec.c +++ b/lib_dec/ivas_stereo_dft_dec.c @@ -76,7 +76,12 @@ * Local function prototypes *-------------------------------------------------------------------------*/ -static void stereo_dft_dec_open( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, const int32_t output_Fs ); +static void stereo_dft_dec_open( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, const int32_t output_Fs +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t nchan_transport +#endif +); static void stereo_dft_compute_td_stefi_params( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, const float samp_ratio ); @@ -252,6 +257,10 @@ ivas_error stereo_dft_dec_create( const int32_t element_brate, /* i : element bitrate */ const int32_t output_Fs, /* i : output sampling rate */ const int16_t sba_dirac_stereo_flag /* i : signal stereo output for SBA DirAC */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t nchan_transport +#endif ) { STEREO_DFT_DEC_DATA_HANDLE hStereoDft_loc; @@ -293,7 +302,12 @@ ivas_error stereo_dft_dec_create( stereo_dft_config( hStereoDft_loc->hConfig, element_brate, &tmpS, &tmpS ); } - stereo_dft_dec_open( hStereoDft_loc, output_Fs ); + stereo_dft_dec_open( hStereoDft_loc, output_Fs +#ifdef DFT_STEREO_SPAR_MIXING + , + nchan_transport +#endif + ); *hStereoDft = hStereoDft_loc; @@ -310,6 +324,10 @@ ivas_error stereo_dft_dec_create( static void stereo_dft_dec_open( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ const int32_t output_Fs /* i : output sampling rate */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t nchan_transport +#endif ) { @@ -365,6 +383,19 @@ static void stereo_dft_dec_open( hStereoDft->hb_stefi_delay = NS2SA( output_Fs, STEREO_DFT_TD_STEFI_DELAY_NS ); +#ifdef DFT_STEREO_SPAR_MIXING + if ( nchan_transport > 2 ) + { + hStereoDft->min_smooth_gains = min_smooth_gains2; + hStereoDft->max_smooth_gains = max_smooth_gains2; + } + else + { + hStereoDft->min_smooth_gains = min_smooth_gains1; + hStereoDft->max_smooth_gains = max_smooth_gains1; + } +#endif + /* reset DFT stereo memories */ stereo_dft_dec_reset( hStereoDft ); @@ -383,6 +414,9 @@ void stereo_dft_dec_reset( ) { int16_t i; +#ifdef DFT_STEREO_SPAR_MIXING + int16_t j, b; +#endif /*Configuration*/ set_s( hStereoDft->prm_res, hStereoDft->hConfig->prm_res, STEREO_DFT_DEC_DFT_NB ); @@ -490,6 +524,23 @@ void stereo_dft_dec_reset( hStereoDft->ipd_xfade_counter = 0; hStereoDft->ipd_xfade_prev = 0.0f; +#ifdef DFT_STEREO_SPAR_MIXING + for ( b = 0; b < hStereoDft->nbands; b++ ) + { + for ( i = 0; i < 2; i++ ) + { + for ( j = 0; j < 4; j++ ) + { + hStereoDft->mixer_mat_smooth[i][j][b] = 0.0f; + } + } + } + hStereoDft->first_frame = 1; + hStereoDft->g_L_prev = 0.f; + hStereoDft->g_R_prev = 0.f; +#endif + + return; } @@ -1097,6 +1148,13 @@ void stereo_dft_dec( float *input_mem, /* i/o: mem of buffer DFT analysis */ STEREO_CNG_DEC_HANDLE hStereoCng, /* i/o: Stereo CNG data structure */ const int16_t sba_dirac_stereo_flag /* i : signal stereo output for SBA DirAC */ +#ifdef DFT_STEREO_SPAR_MIXING + , + ivas_spar_md_dec_state_t *hMdDec, /* i: SPAR MD handle for upmixing */ + int16_t cross_fade_start_offset, /* i: SPAR mixer delay compensation */ + int32_t output_Fs, /* i: Fs for delay calculation */ + int16_t nchan_transport /* i: number of transpor channels */ +#endif ) { int16_t i, k, b, N_div, stop; @@ -1104,6 +1162,9 @@ void stereo_dft_dec( float DFT_R[STEREO_DFT32MS_N_MAX]; float DFT_PRED_RES[STEREO_DFT32MS_N_32k]; float *pDFT_DMX; +#ifdef DFT_STEREO_SPAR_MIXING + float *pDFT_DMX1; +#endif float *pDFT_RES; float g, tmp; float *pPredGain; @@ -1127,6 +1188,10 @@ void stereo_dft_dec( HANDLE_FD_CNG_COM hFdCngCom = hFdCngDec->hFdCngCom; int16_t *cna_seed = &( hFdCngCom->seed ); +#ifdef DFT_STEREO_SPAR_MIXING + float DFT_W, DFT_Y; +#endif + output_frame = (int16_t) ( st0->output_Fs / FRAMES_PER_SEC ); /*------------------------------------------------------------------* @@ -1168,7 +1233,14 @@ void stereo_dft_dec( /* Smoothing for the current frame */ if ( sba_dirac_stereo_flag ) { - ivas_sba_dirac_stereo_smooth_parameters( hStereoDft ); + ivas_sba_dirac_stereo_smooth_parameters( hStereoDft +#ifdef DFT_STEREO_SPAR_MIXING + , + hMdDec, + cross_fade_start_offset, + output_Fs +#endif + ); } else { @@ -1193,6 +1265,13 @@ void stereo_dft_dec( { pDFT_DMX = DFT[0] + k * STEREO_DFT32MS_N_MAX; pDFT_RES = DFT[1] + k * STEREO_DFT32MS_N_MAX; +#ifdef DFT_STEREO_SPAR_MIXING + pDFT_DMX1 = 0; + if ( nchan_transport > 1 ) + { + pDFT_DMX1 = DFT[1] + k * STEREO_DFT32MS_N_MAX; + } +#endif /*Apply Stereo*/ if ( hStereoDft->hConfig->dmx_active ) @@ -1236,7 +1315,14 @@ void stereo_dft_dec( hStereoDft->past_DMX_pos = ( hStereoDft->past_DMX_pos + STEREO_DFT_PAST_MAX - 1 ) % STEREO_DFT_PAST_MAX; } +#ifdef DFT_STEREO_SPAR_MIXING + if ( !( sba_dirac_stereo_flag && nchan_transport >= 2 ) ) + { + stereo_dft_generate_res_pred( hStereoDft, samp_ratio, pDFT_DMX, DFT_PRED_RES, pPredGain, k, DFT[1] + k * STEREO_DFT32MS_N_MAX, &stop, st0->bfi ); + } +#else stereo_dft_generate_res_pred( hStereoDft, samp_ratio, pDFT_DMX, DFT_PRED_RES, pPredGain, k, DFT[1] + k * STEREO_DFT32MS_N_MAX, &stop, st0->bfi ); +#endif if ( hStereoDft->res_cod_band_max > 0 ) { @@ -1308,7 +1394,11 @@ void stereo_dft_dec( #endif /* No residual coding in inactive frames, instead pDFT_RES is used for the second channel */ +#ifdef DFT_STEREO_SPAR_MIXING + if ( b >= hStereoDft->res_cod_band_max && !hStereoDft->frame_sid_nodata && !( sba_dirac_stereo_flag && hMdDec ) ) +#else if ( b >= hStereoDft->res_cod_band_max && !hStereoDft->frame_sid_nodata ) +#endif { /*filter non-coded frequencies. It removes some MDCT frequency aliasing*/ for ( i = hStereoDft->band_limits[b]; i < hStereoDft->band_limits[b + 1]; i++ ) @@ -1358,10 +1448,206 @@ void stereo_dft_dec( } } } +#ifdef DFT_STEREO_SPAR_MIXING + else if ( sba_dirac_stereo_flag && hMdDec ) + { +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + { + static FILE *f_dmx = 0, *f_res = 0, *f_dmx1 = 0; + + if ( f_dmx == 0 ) + { + f_dmx = fopen( "dft_dmx.txt", "w" ); + f_res = fopen( "dft_pred_res.txt", "w" ); + if ( nchan_transport >= 2 ) + { + f_dmx1 = fopen( "dft_dmx1.txt", "w" ); + } + } + + if ( b == 0 ) + { + i = 0; + fprintf( f_dmx, "%d %f %f\n", i, pDFT_DMX[2 * i], pDFT_DMX[2 * i + 1] ); + if ( nchan_transport >= 2 ) + { + fprintf( f_dmx1, "%d %f %f\n", i, pDFT_DMX1[2 * i], pDFT_DMX1[2 * i + 1] ); + } + fprintf( f_res, "%d %f %f\n", i, 0.0f, 0.0f ); + } + for ( i = hStereoDft->band_limits[b]; i < min( stop, hStereoDft->band_limits[b + 1] ); i++ ) + { + fprintf( f_dmx, "%d %f %f\n", i, pDFT_DMX[2 * i], pDFT_DMX[2 * i + 1] ); + if ( nchan_transport >= 2 ) + { + fprintf( f_dmx1, "%d %f %f\n", i, pDFT_DMX1[2 * i], pDFT_DMX1[2 * i + 1] ); + } + fprintf( f_res, "%d %f %f\n", i, DFT_PRED_RES[2 * i], DFT_PRED_RES[2 * i + 1] ); + } + for ( ; i < hStereoDft->band_limits[b + 1]; i++ ) + { + fprintf( f_dmx, "%d %f %f\n", i, pDFT_DMX[2 * i], pDFT_DMX[2 * i + 1] ); + if ( nchan_transport >= 2 ) + { + fprintf( f_dmx1, "%d %f %f\n", i, pDFT_DMX1[2 * i], pDFT_DMX1[2 * i + 1] ); + } + fprintf( f_res, "%d %f %f\n", i, 0.0f, 0.0f ); + } + } +#endif + + if ( nchan_transport == 1 ) + { + if ( b == 0 ) + { + i = 0; + + DFT_W = hStereoDft->mixer_mat_smooth[0][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i]; + + DFT_L[2 * i] = DFT_W + DFT_Y; + DFT_R[2 * i] = DFT_W - DFT_Y; + + DFT_W = hStereoDft->mixer_mat_smooth[0][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1]; + + DFT_L[2 * i + 1] = DFT_W + DFT_Y; + DFT_R[2 * i + 1] = DFT_W - DFT_Y; + } + for ( i = hStereoDft->band_limits[b]; i < min( stop, hStereoDft->band_limits[b + 1] ); i++ ) + { + DFT_W = hStereoDft->mixer_mat_smooth[0][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i] + ( hStereoDft->mixer_mat_smooth[0][1][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][2][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][3][b + k * IVAS_MAX_NUM_BANDS] ) * DFT_PRED_RES[2 * i]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i] + ( hStereoDft->mixer_mat_smooth[1][1][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][2][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][3][b + k * IVAS_MAX_NUM_BANDS] ) * DFT_PRED_RES[2 * i]; + + DFT_L[2 * i] = DFT_W + DFT_Y; + DFT_R[2 * i] = DFT_W - DFT_Y; + + DFT_W = hStereoDft->mixer_mat_smooth[0][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1] + ( hStereoDft->mixer_mat_smooth[0][1][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][2][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][3][b + k * IVAS_MAX_NUM_BANDS] ) * DFT_PRED_RES[2 * i + 1]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1] + ( hStereoDft->mixer_mat_smooth[1][1][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][2][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][3][b + k * IVAS_MAX_NUM_BANDS] ) * DFT_PRED_RES[2 * i + 1]; + + DFT_L[2 * i + 1] = DFT_W + DFT_Y; + DFT_R[2 * i + 1] = DFT_W - DFT_Y; + } + for ( ; i < hStereoDft->band_limits[b + 1]; i++ ) + { + DFT_W = hStereoDft->mixer_mat_smooth[0][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i]; + + DFT_L[2 * i] = DFT_W + DFT_Y; + DFT_R[2 * i] = DFT_W - DFT_Y; + + DFT_W = hStereoDft->mixer_mat_smooth[0][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1]; + + DFT_L[2 * i + 1] = DFT_W + DFT_Y; + DFT_R[2 * i + 1] = DFT_W - DFT_Y; + } + } + else if ( nchan_transport >= 2 ) + { + if ( b == 0 ) + { + i = 0; + + DFT_W = pDFT_DMX[2 * i]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i] + pDFT_DMX1[2 * i]; + + DFT_L[2 * i] = DFT_W + DFT_Y; + DFT_R[2 * i] = DFT_W - DFT_Y; + + DFT_W = pDFT_DMX[2 * i + 1]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1] + pDFT_DMX1[2 * i + 1]; + + DFT_L[2 * i + 1] = DFT_W + DFT_Y; + DFT_R[2 * i + 1] = DFT_W - DFT_Y; + } + for ( i = hStereoDft->band_limits[b]; i < hStereoDft->band_limits[b + 1]; i++ ) + { + DFT_W = pDFT_DMX[2 * i]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i] + pDFT_DMX1[2 * i]; + + DFT_L[2 * i] = DFT_W + DFT_Y; + DFT_R[2 * i] = DFT_W - DFT_Y; + + DFT_W = pDFT_DMX[2 * i + 1]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1] + pDFT_DMX1[2 * i + 1]; + + DFT_L[2 * i + 1] = DFT_W + DFT_Y; + DFT_R[2 * i + 1] = DFT_W - DFT_Y; + } + } + else + { + assert( "nhcan_transport must be 1 or 1!" ); + } + +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + { + static FILE *f_L = 0, *f_R = 0; + + if ( f_L == 0 ) + { + f_L = fopen( "dft_L.txt", "w" ); + f_R = fopen( "dft_R.txt", "w" ); + } + + if ( b == 0 ) + { + i = 0; + fprintf( f_L, "%d %f %f\n", i, DFT_L[2 * i], DFT_L[2 * i + 1] ); + fprintf( f_R, "%d %f %f\n", i, DFT_L[2 * i], DFT_L[2 * i + 1] ); + } + for ( i = hStereoDft->band_limits[b]; i < hStereoDft->band_limits[b + 1]; i++ ) + { + fprintf( f_L, "%d %f %f\n", i, DFT_L[2 * i], DFT_L[2 * i + 1] ); + fprintf( f_R, "%d %f %f\n", i, DFT_L[2 * i], DFT_L[2 * i + 1] ); + } + } +#endif + } +#endif else { + +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + { + static FILE *f_dmx = 0, *f_res = 0, *f_dmx1 = 0; + + if ( f_dmx == 0 ) + { + f_dmx = fopen( "dft_dmx.txt", "w" ); + f_res = fopen( "dft_pred_res.txt", "w" ); + if ( nchan_transport == 2 ) + { + f_dmx1 = fopen( "dft_dmx1.txt", "w" ); + } + } + + if ( b == 0 ) + { + i = 0; + fprintf( f_dmx, "%d %f %f\n", i, pDFT_DMX[2 * i], pDFT_DMX[2 * i + 1] ); + fprintf( f_dmx1, "%d %f %f\n", i, pDFT_DMX1[2 * i], pDFT_DMX1[2 * i + 1] ); + fprintf( f_res, "%d %f %f\n", i, 0.0f, 0.0f ); + } + for ( i = hStereoDft->band_limits[b]; i < min( stop, hStereoDft->band_limits[b + 1] ); i++ ) + { + fprintf( f_dmx, "%d %f %f\n", i, pDFT_DMX[2 * i], pDFT_DMX[2 * i + 1] ); + fprintf( f_dmx1, "%d %f %f\n", i, pDFT_DMX1[2 * i], pDFT_DMX1[2 * i + 1] ); + fprintf( f_res, "%d %f %f\n", i, DFT_PRED_RES[2 * i], DFT_PRED_RES[2 * i + 1] ); + } + for ( ; i < hStereoDft->band_limits[b + 1]; i++ ) + { + fprintf( f_dmx, "%d %f %f\n", i, pDFT_DMX[2 * i], pDFT_DMX[2 * i + 1] ); + fprintf( f_dmx1, "%d %f %f\n", i, pDFT_DMX1[2 * i], pDFT_DMX1[2 * i + 1] ); + fprintf( f_res, "%d %f %f\n", i, 0.0f, 0.0f ); + } + } +#endif + for ( i = hStereoDft->band_limits[b]; i < min( stop, hStereoDft->band_limits[b + 1] ); i++ ) { + tmp = g * pDFT_DMX[2 * i] + pDFT_RES[2 * i] + DFT_PRED_RES[2 * i]; DFT_L[2 * i] = pDFT_DMX[2 * i] + tmp; diff --git a/lib_dec/ivas_stereo_switching_dec.c b/lib_dec/ivas_stereo_switching_dec.c index 44b47bc5cbb99c5406ec688c9678886c1dcd2841..c8b373ce5c1429c5f9be8611a6c221b774a6b732 100644 --- a/lib_dec/ivas_stereo_switching_dec.c +++ b/lib_dec/ivas_stereo_switching_dec.c @@ -421,7 +421,12 @@ ivas_error stereo_memory_dec( deallocate_CoreCoder( hCPE->hCoreCoder[1] ); /* allocate DFT stereo data structure */ - if ( ( error = stereo_dft_dec_create( &( hCPE->hStereoDft ), hCPE->element_brate, output_Fs, 0 ) ) != IVAS_ERR_OK ) + if ( ( error = stereo_dft_dec_create( &( hCPE->hStereoDft ), hCPE->element_brate, output_Fs, 0 +#ifdef DFT_STEREO_SPAR_MIXING + , + nchan_transport +#endif + ) ) != IVAS_ERR_OK ) { return error; } @@ -1115,7 +1120,14 @@ void synchro_synthesis( delay_signal( output[0], output_frame, hCPE->hCoreCoder[0]->hTcxDec->FBTCXdelayBuf, delay_diff ); } +#ifdef DFT_STEREO_SPAR_MIXING + if ( hCPE->element_mode != IVAS_CPE_MDCT ) + { + ivas_post_proc( NULL, hCPE, 0, output[0], output, output_frame, sba_dirac_stereo_flag ); + } +#else ivas_post_proc( NULL, hCPE, 0, output[0], output, output_frame, sba_dirac_stereo_flag ); +#endif /* zero padding in order to synchronize the upmixed DFT stereo synthesis with the TD/MDCT stereo synthesis */ for ( n = 0; n < hCPE->nchan_out; n++ ) @@ -1182,7 +1194,10 @@ void synchro_synthesis( /*----------------------------------------------------------------* * TD/MDCT stereo synchro *----------------------------------------------------------------*/ - +#ifdef DFT_STEREO_SPAR_MIXING + if ( sba_dirac_stereo_flag ) + return; +#endif if ( hCPE->element_mode == IVAS_CPE_TD || hCPE->element_mode == IVAS_CPE_MDCT ) { /* handling of DFT->TD switching */