From b3a78e75cf7f487903170dbb6ac977082105b275 Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Thu, 15 Dec 2022 14:35:12 +0100 Subject: [PATCH 01/20] implement delay harmonization and complexity reduction for SBA-to-stereo processing in the decoder --- lib_com/delay_comp.c | 12 +- lib_com/ivas_cnst.h | 6 + lib_com/ivas_prot.h | 27 +- lib_com/options.h | 6 + lib_com/prot.h | 4 + lib_dec/ivas_core_dec.c | 23 ++ lib_dec/ivas_cpe_dec.c | 54 ++- lib_dec/ivas_dec.c | 29 +- lib_dec/ivas_init_dec.c | 43 +- lib_dec/ivas_mct_dec.c | 32 ++ lib_dec/ivas_post_proc.c | 11 + lib_dec/ivas_rom_dec.c | 22 +- lib_dec/ivas_rom_dec.h | 9 +- lib_dec/ivas_sba_dec.c | 4 + lib_dec/ivas_sba_dirac_stereo_dec.c | 605 +++++++++++++++++++++++----- lib_dec/ivas_spar_md_dec.c | 2 +- lib_dec/ivas_stat_dec.h | 12 + lib_dec/ivas_stereo_dft_dec.c | 308 +++++++++++++- lib_dec/ivas_stereo_switching_dec.c | 18 +- lib_dec/lib_dec.c | 4 + lib_enc/lib_enc.c | 4 + 21 files changed, 1093 insertions(+), 142 deletions(-) diff --git a/lib_com/delay_comp.c b/lib_com/delay_comp.c index 4bcac1555e..c220184aaa 100644 --- a/lib_com/delay_comp.c +++ b/lib_com/delay_comp.c @@ -56,7 +56,11 @@ int32_t get_delay( const IVAS_FORMAT ivas_format, /* i : IVAS format */ HANDLE_CLDFB_FILTER_BANK hCldfb, /* i : Handle of Cldfb analysis */ RENDERER_TYPE renderer_type, /* i : IVAS rendering type */ - const int32_t binaural_latency_ns /* i : binaural renderer HRTF delay in ns */ + const int32_t binaural_latency_ns /* i : binaural renderer HRTF delay in ns */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t sba_dirac_stereo_flag +#endif ) { int32_t delay = 0; @@ -99,6 +103,12 @@ int32_t get_delay( { delay += IVAS_FB_DEC_DELAY_NS; } +#ifdef DFT_STEREO_SPAR_MIXING + else if ( sba_dirac_stereo_flag ) + { + delay += 0; + } +#endif /* compensate for Binaural renderer HRTF delay */ { diff --git a/lib_com/ivas_cnst.h b/lib_com/ivas_cnst.h index 95ce270957..422729dfd0 100644 --- a/lib_com/ivas_cnst.h +++ b/lib_com/ivas_cnst.h @@ -574,7 +574,13 @@ typedef enum #define NO_SYMB_GR_PRED_G 8 #define STEREO_DFT_RES_BW_MAX 66 /*Maximum number of bin for residual signal in each frame (res_cod_band_max == 6 in 48kHz)*/ + +#ifdef DFT_STEREO_SPAR_MIXING +#define SBA_DIRAC_STEREO_NUM_BANDS 12 +#else #define SBA_DIRAC_STEREO_NUM_BANDS 5 +#endif + #define SBA_DIRAC_NRG_SMOOTH_LONG 10 #define SBA_DIRAC_NRG_SMOOTH_SHORT 3 diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index 6cb365bfad..8885c60f3e 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -1016,6 +1016,10 @@ ivas_error stereo_dft_dec_create( const int32_t element_brate, /* i : element bitrate */ const int32_t output_Fs, /* i : output sampling rate */ const int16_t sba_dirac_stereo_flag /* i : signal stereo output for SBA DirAC */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t nchan_transport +#endif ); void stereo_dft_dec_reset( @@ -1055,10 +1059,21 @@ void stereo_dft_dec_synthesize( void stereo_dft_dec( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ Decoder_State *st0, /* i/o: decoder state structure */ - float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ +#ifdef DFT_STEREO_SPAR_MIXING + float DFT[CPE_CHANNELS + 1][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ +#else + float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ +#endif float *input_mem, /* i/o: mem of buffer DFT analysis */ STEREO_CNG_DEC_HANDLE hStereoCng, /* i/o: Stereo CNG data structure */ const int16_t sba_dirac_stereo_flag /* i : signal stereo output for SBA DirAC */ +#ifdef DFT_STEREO_SPAR_MIXING + , + ivas_spar_md_dec_state_t *hMdDec, /* SPAR MD handle for upmixing */ + int16_t cross_fade_start_offset, /* i: SPAR mixer delay compensation */ + int32_t output_Fs, /* i: Fs for delay calculation */ + int16_t nchan_transport /* i: number of transpor channels */ +#endif ); void stereo_dft_res_ecu( @@ -3174,6 +3189,10 @@ void ivas_sba_dirac_stereo_dec( Decoder_Struct *st_ivas, /* i/o: IVAS decoder structure */ float output[CPE_CHANNELS][L_FRAME48k], /* o : output synthesis signal */ const int16_t output_frame /* i : output frame length per channel */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t mcmasa +#endif ); void ivas_sba_dirac_stereo_config( @@ -3182,6 +3201,12 @@ void ivas_sba_dirac_stereo_config( void ivas_sba_dirac_stereo_smooth_parameters( STEREO_DFT_DEC_DATA_HANDLE hStereoDft /* i/o: encoder DFT stereo handle */ +#ifdef DFT_STEREO_SPAR_MIXING + , + ivas_spar_md_dec_state_t *hMdDec, /* i/o: SPAR MD handle for upmixing */ + int16_t cross_fade_start_offset, /* i: SPAR mixer delay compensation */ + int32_t output_Fs /* i: Fs for delay calculation */ +#endif ); ivas_error ivas_sba_get_hoa_dec_matrix( diff --git a/lib_com/options.h b/lib_com/options.h index b462e5ec50..55034562c4 100755 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -144,6 +144,7 @@ #define DISABLE_ADAP_RES_COD_TMP /* temporary fix for IVAS-403, disables adaptive residual coding */ /*#define ITD_WINNER_GAIN_MODIFY */ /* ITD optimization - WORK IN PROGRESS */ /*#define FIX_I4_OL_PITCH*/ /* fix open-loop pitch used for EVS core switching */ + #define FIX_I1_113 /* under review : MCT bit distribution optimization for SBA high bitrates*/ #define FIX_124_DONT_ALLOC_PLCINFO_IN_IVAS /* Issue 124: do not allocate unused plc struct in IVAS modes which is only used in EVS mono */ #define SBA_BR_SWITCHING_2 /* Issue 114: Changes for sba bit rate switching with reconfigurations*/ @@ -163,7 +164,12 @@ #define IMPROVE_CMDLINE_ROBUSTNESS /* Issue 233: Improve robustness of command-line parameters */ #define FIX_MDCT_AND_MC_MONO_ISSUES /* Issue 242: Fix some issues with TCX-LTP and delay alignement for mono output */ +#define DFT_STEREO_SPAR_MIXING /* For SBA to stereo output, perform SPAR upmix in DFT domain */ +#ifdef DFT_STEREO_SPAR_MIXING +/*#define DFT_STEREO_SPAR_MIXING_DEBUG*/ /* more debugging output for DFT_STEREO_SPAR_MIXING_DEBUG */ +#define DISABLE_RES_CHANNELS_MCT /* decode only W and residual for Y when outputting to stereo */ +#endif /* ################## End DEVELOPMENT switches ######################### */ /* clang-format on */ #endif diff --git a/lib_com/prot.h b/lib_com/prot.h index 973f15ef5a..525a28700d 100644 --- a/lib_com/prot.h +++ b/lib_com/prot.h @@ -719,6 +719,10 @@ int32_t get_delay( HANDLE_CLDFB_FILTER_BANK hCldfb, /* i : Handle of Cldfb analysis */ RENDERER_TYPE renderer_type, /* i : IVAS rendering type */ const int32_t binaural_latency_ns /* i : binaural renderer HRTF delay in ns */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t sba_dirac_stereo_flag +#endif ); void decision_matrix_enc( diff --git a/lib_dec/ivas_core_dec.c b/lib_dec/ivas_core_dec.c index e4f587a16e..671c5c5050 100644 --- a/lib_dec/ivas_core_dec.c +++ b/lib_dec/ivas_core_dec.c @@ -464,7 +464,11 @@ ivas_error ivas_core_dec( *---------------------------------------------------------------------*/ /* save synth and output in case of SBA DirAC stereo output as core switching is done outside of core decoder */ +#ifdef DFT_STEREO_SPAR_MIXING + if ( sba_dirac_stereo_flag && st->element_mode != IVAS_CPE_MDCT && !( st->core_brate == SID_2k40 && st->cng_type == FD_CNG ) ) +#else if ( sba_dirac_stereo_flag && !( st->core_brate == SID_2k40 && st->cng_type == FD_CNG ) ) +#endif { mvr2r( synth[n], hSCE->save_synth, output_frame ); } @@ -683,7 +687,11 @@ ivas_error ivas_core_dec( } } +#ifdef DFT_STEREO_SPAR_MIXING + if ( sba_dirac_stereo_flag && st->element_mode != IVAS_CPE_MDCT ) +#else if ( sba_dirac_stereo_flag ) +#endif { /* for SBA DirAC stereo output DFT Stereo core switching and updates are done in ivas_sba_dirac_stereo_dec() as hCPE is not available at this point */ break; @@ -698,13 +706,28 @@ ivas_error ivas_core_dec( if ( st->element_mode != IVAS_CPE_DFT ) { +#ifdef DFT_STEREO_SPAR_MIXING + if ( st->element_mode != IVAS_CPE_MDCT || sba_dirac_stereo_flag ) +#else if ( st->element_mode != IVAS_CPE_MDCT ) +#endif { +#ifdef DFT_STEREO_SPAR_MIXING + ivas_post_proc( hSCE, hCPE, n, synth[n], NULL, output_frame, sba_dirac_stereo_flag ); +#else ivas_post_proc( hSCE, hCPE, n, synth[n], NULL, output_frame, 0 ); +#endif } /* update OLA buffers - needed for switching to DFT stereo */ +#ifdef DFT_STEREO_SPAR_MIXING + if ( !sba_dirac_stereo_flag ) + { + stereo_td2dft_update( hCPE, n, output[n], synth[n], hb_synth[n], output_frame ); + } +#else stereo_td2dft_update( hCPE, n, output[n], synth[n], hb_synth[n], output_frame ); +#endif } else /* IVAS_CPE_DFT */ { diff --git a/lib_dec/ivas_cpe_dec.c b/lib_dec/ivas_cpe_dec.c index 41b8c339a2..2b4e0bcfa5 100644 --- a/lib_dec/ivas_cpe_dec.c +++ b/lib_dec/ivas_cpe_dec.c @@ -361,7 +361,11 @@ ivas_error ivas_cpe_dec( if ( hCPE->element_mode != IVAS_CPE_DFT || ( hCPE->nchan_out == 1 && hCPE->hStereoDft->hConfig->res_cod_mode == STEREO_DFT_RES_COD_OFF ) ) { +#ifdef DFT_STEREO_SPAR_MIXING + if ( ( error = ivas_core_dec( st_ivas, NULL, hCPE, st_ivas->hMCT, n_channels, output, outputHB, NULL, st_ivas->sba_dirac_stereo_flag ) ) != IVAS_ERR_OK ) +#else if ( ( error = ivas_core_dec( st_ivas, NULL, hCPE, st_ivas->hMCT, n_channels, output, outputHB, NULL, 0 ) ) != IVAS_ERR_OK ) +#endif { return error; } @@ -406,7 +410,12 @@ ivas_error ivas_cpe_dec( } else { - stereo_dft_dec( hCPE->hStereoDft, sts[0], DFT, hCPE->input_mem[1], hCPE->hStereoCng, 0 ); + stereo_dft_dec( hCPE->hStereoDft, sts[0], DFT, hCPE->input_mem[1], hCPE->hStereoCng, 0 +#ifdef DFT_STEREO_SPAR_MIXING + , + 0,0,0,0 +#endif + ); } /* synthesis iFFT */ @@ -460,8 +469,12 @@ ivas_error ivas_cpe_dec( /*----------------------------------------------------------------* * Synthesis synchronization between CPE modes *----------------------------------------------------------------*/ - - synchro_synthesis( ivas_total_brate, hCPE, output, output_frame, 0 ); +#ifdef DFT_STEREO_SPAR_MIXING + if (!st_ivas->sba_dirac_stereo_flag) +#endif + { + synchro_synthesis( ivas_total_brate, hCPE, output, output_frame, 0 ); + } #ifdef FIX_MDCT_AND_MC_MONO_ISSUES if ( hCPE->element_mode == IVAS_CPE_MDCT && hCPE->nchan_out == 1 && ( is_DTXrate( ivas_total_brate ) == 0 || ( is_DTXrate( ivas_total_brate ) == 1 && is_DTXrate( st_ivas->hDecoderConfig->last_ivas_total_brate ) == 0 ) ) ) @@ -676,13 +689,32 @@ ivas_error create_cpe_dec( } } +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->sba_dirac_stereo_flag && st_ivas->nchan_transport >= 2 ) + { + if ( ( hCPE->input_mem[2] = (float *) count_malloc( sizeof( float ) * NS2SA( output_Fs, STEREO_DFT32MS_OVL_NS ) ) ) == NULL ) + { + return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DFT stereo memory\n" ) ); + } + set_zero( hCPE->input_mem[2], NS2SA( output_Fs, STEREO_DFT32MS_OVL_NS ) ); + } + else + { + hCPE->input_mem[2] = NULL; + } +#endif + /*-----------------------------------------------------------------* * CoreCoder, 2 instances: allocate and initialize *-----------------------------------------------------------------*/ for ( n = 0; n < CPE_CHANNELS; n++ ) { - if ( st_ivas->sba_dirac_stereo_flag ) + if ( st_ivas->sba_dirac_stereo_flag +#ifdef DFT_STEREO_SPAR_MIXING + && st_ivas->nchan_transport == 1 +#endif + ) { /* for SBA DirAC stereo output CPE element is only used for upmix, core coder is found in SCE element used for core decoding */ break; @@ -716,7 +748,12 @@ ivas_error create_cpe_dec( if ( hCPE->element_mode == IVAS_CPE_DFT || st_ivas->sba_dirac_stereo_flag ) { - if ( ( error = stereo_dft_dec_create( &( hCPE->hStereoDft ), hCPE->element_brate, output_Fs, st_ivas->sba_dirac_stereo_flag ) ) != IVAS_ERR_OK ) + if ( ( error = stereo_dft_dec_create( &( hCPE->hStereoDft ), hCPE->element_brate, output_Fs, st_ivas->sba_dirac_stereo_flag +#ifdef DFT_STEREO_SPAR_MIXING + , + st_ivas->nchan_transport +#endif + ) ) != IVAS_ERR_OK ) { return error; } @@ -908,6 +945,13 @@ void destroy_cpe_dec( } count_free( hCPE->input_mem_BPF[0] ); hCPE->input_mem_BPF[0] = NULL; +#ifdef DFT_STEREO_SPAR_MIXING + if ( hCPE->input_mem[2] != NULL ) + { + count_free( hCPE->input_mem[2] ); + hCPE->input_mem[2] = NULL; + } +#endif } if ( hCPE->hStereoCng != NULL ) diff --git a/lib_dec/ivas_dec.c b/lib_dec/ivas_dec.c index c3deb7103f..2286dc0b4b 100644 --- a/lib_dec/ivas_dec.c +++ b/lib_dec/ivas_dec.c @@ -216,7 +216,11 @@ ivas_error ivas_dec( } } } +#ifdef DFT_STEREO_SPAR_MIXING + else if ( ( st_ivas->ivas_format == SBA_FORMAT || st_ivas->ivas_format == MASA_FORMAT ) || st_ivas->sba_dirac_stereo_flag ) +#else else if ( st_ivas->ivas_format == SBA_FORMAT || st_ivas->ivas_format == MASA_FORMAT ) +#endif { set_s( nb_bits_metadata, 0, MAX_SCE ); @@ -288,7 +292,20 @@ ivas_error ivas_dec( if ( st_ivas->sba_dirac_stereo_flag ) { nchan_remapped = CPE_CHANNELS; - ivas_sba_dirac_stereo_dec( st_ivas, output, output_frame ); + +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->sba_mode == SBA_MODE_SPAR ) + { + ivas_spar_dec_gen_umx_mat( st_ivas->hSpar->hMdDec, st_ivas->nchan_transport, IVAS_MAX_NUM_BANDS, st_ivas->bfi ); + } +#endif + + ivas_sba_dirac_stereo_dec( st_ivas, output, output_frame +#ifdef DFT_STEREO_SPAR_MIXING + , + st_ivas->ivas_format == MC_FORMAT +#endif + ); } else if ( st_ivas->ivas_format == MASA_FORMAT && ivas_total_brate < MASA_STEREO_MIN_BITRATE && ( ivas_total_brate > IVAS_SID_5k2 || ( ivas_total_brate <= IVAS_SID_5k2 && st_ivas->nCPE > 0 && st_ivas->hCPE[0]->nchan_out == 1 ) ) ) { @@ -343,6 +360,9 @@ ivas_error ivas_dec( } } else /* SBA_MODE_SPAR */ +#ifdef DFT_STEREO_SPAR_MIXING + if ( ! st_ivas->sba_dirac_stereo_flag ) +#endif { ivas_sba_upmixer_renderer( st_ivas, output, output_frame ); /* Note: ivas_sba_linear_renderer() or ivas_dirac_dec() are called internally */ } @@ -505,7 +525,12 @@ ivas_error ivas_dec( if ( st_ivas->sba_dirac_stereo_flag ) /* use the flag to trigger the DFT upmix */ { - ivas_sba_dirac_stereo_dec( st_ivas, output, output_frame ); + ivas_sba_dirac_stereo_dec( st_ivas, output, output_frame +#ifdef DFT_STEREO_SPAR_MIXING + , + 1 +#endif + ); } /* HP filtering */ diff --git a/lib_dec/ivas_init_dec.c b/lib_dec/ivas_init_dec.c index 3855b296c3..4904803e91 100644 --- a/lib_dec/ivas_init_dec.c +++ b/lib_dec/ivas_init_dec.c @@ -838,6 +838,9 @@ ivas_error ivas_init_decoder( ivas_dirac_config_bands( band_grouping, IVAS_MAX_NUM_BANDS, (int16_t) ( st_ivas->hDecoderConfig->output_Fs * INV_CLDFB_BANDWIDTH + 0.5f ), st_ivas->hSpar->dirac_to_spar_md_bands, st_ivas->hQMetaData->useLowerBandRes, st_ivas->hSpar->enc_param_start_band, 0 ); } +#ifdef DFT_STEREO_SPAR_MIXING + st_ivas->sba_dirac_stereo_flag = ( output_config == AUDIO_CONFIG_STEREO ); +#endif } else { @@ -905,7 +908,11 @@ ivas_error ivas_init_decoder( } /* create CPE element for DFT Stereo like upmix */ - if ( st_ivas->sba_dirac_stereo_flag ) + if ( st_ivas->sba_dirac_stereo_flag +#ifdef DFT_STEREO_SPAR_MIXING + && st_ivas->nchan_transport == 1 +#endif + ) { if ( ( error = create_cpe_dec( st_ivas, cpe_id, ivas_total_brate / ( st_ivas->nSCE + st_ivas->nCPE ) ) ) != IVAS_ERR_OK ) { @@ -1282,7 +1289,11 @@ ivas_error ivas_init_decoder( } /* CLDFB Interpolation weights */ - if ( st_ivas->ivas_format == SBA_FORMAT && st_ivas->sba_mode == SBA_MODE_SPAR ) + if ( st_ivas->ivas_format == SBA_FORMAT && st_ivas->sba_mode == SBA_MODE_SPAR +#ifdef DFT_STEREO_SPAR_MIXING + && !st_ivas->sba_dirac_stereo_flag +#endif + ) { ivas_spar_get_cldfb_gains( st_ivas->hSpar, st_ivas->cldfbAnaDec[0], st_ivas->cldfbSynDec[0], hDecoderConfig ); } @@ -1833,19 +1844,29 @@ void ivas_init_dec_get_num_cldfb_instances( case RENDERER_BINAURAL_FASTCONV_ROOM: if ( st_ivas->sba_mode == SBA_MODE_SPAR ) { - *numCldfbAnalyses = st_ivas->hSpar->hFbMixer->fb_cfg->num_in_chans; - - if ( st_ivas->hOutSetup.is_loudspeaker_setup && st_ivas->renderer_type == RENDERER_DIRAC ) +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->sba_dirac_stereo_flag ) { - *numCldfbSyntheses = st_ivas->hOutSetup.nchan_out_woLFE; - } - else if ( st_ivas->hDecoderConfig->output_config == AUDIO_CONFIG_FOA ) - { - *numCldfbSyntheses = st_ivas->hSpar->hFbMixer->fb_cfg->num_out_chans; + *numCldfbAnalyses = 0; + *numCldfbSyntheses = 0; } else +#endif { - *numCldfbSyntheses = MAX_OUTPUT_CHANNELS; + *numCldfbAnalyses = st_ivas->hSpar->hFbMixer->fb_cfg->num_in_chans; + + if ( st_ivas->hOutSetup.is_loudspeaker_setup && st_ivas->renderer_type == RENDERER_DIRAC ) + { + *numCldfbSyntheses = st_ivas->hOutSetup.nchan_out_woLFE; + } + else if ( st_ivas->hDecoderConfig->output_config == AUDIO_CONFIG_FOA ) + { + *numCldfbSyntheses = st_ivas->hSpar->hFbMixer->fb_cfg->num_out_chans; + } + else + { + *numCldfbSyntheses = MAX_OUTPUT_CHANNELS; + } } } else if ( st_ivas->mc_mode == MC_MODE_PARAMMC ) diff --git a/lib_dec/ivas_mct_dec.c b/lib_dec/ivas_mct_dec.c index b4a4da3e3c..71b018a349 100644 --- a/lib_dec/ivas_mct_dec.c +++ b/lib_dec/ivas_mct_dec.c @@ -158,6 +158,20 @@ ivas_error ivas_mct_dec( /* MCT core decoder */ ivas_mct_core_dec( hMCT, st_ivas->hCPE, nCPE, output ); +#ifdef DISABLE_RES_CHANNELS_MCT + /* for sba to stereo output disable any further processing for TCs > 2 as it is not needed*/ + if ( st_ivas->sba_dirac_stereo_flag ) + { + for ( cpe_id = 1; cpe_id < nCPE; cpe_id++ ) + { + for ( n = 0; n < CPE_CHANNELS; n++ ) + { + st_ivas->hCPE[cpe_id]->hCoreCoder[n]->mct_chan_mode = MCT_CHAN_MODE_IGNORE; + } + } + } +#endif + /* MCT reconstruction and CoreCoder updates */ for ( cpe_id = 0; cpe_id < nCPE; cpe_id++ ) { @@ -217,8 +231,19 @@ ivas_error ivas_mct_dec( break; } +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->sba_dirac_stereo_flag ) + { + ivas_post_proc( NULL, hCPE, n, synth[n], NULL, output_frame, 1 ); + } +#endif + /* Postprocessing for ACELP/MDCT core switching and synchronization */ +#ifdef DFT_STEREO_SPAR_MIXING + if ( ( error = core_switching_post_dec( sts[n], synth[n], output[cpe_id * CPE_CHANNELS + n], hCPE->output_mem[1], 0, output_frame, 0 /*core_switching_flag*/, st_ivas->sba_dirac_stereo_flag, -1, hCPE->last_element_mode ) ) != IVAS_ERR_OK ) +#else if ( ( error = core_switching_post_dec( sts[n], synth[n], output[cpe_id * CPE_CHANNELS + n], hCPE->output_mem[1], 0, output_frame, 0 /*core_switching_flag*/, 0, -1, hCPE->last_element_mode ) ) != IVAS_ERR_OK ) +#endif { return error; } @@ -239,7 +264,14 @@ ivas_error ivas_mct_dec( /* synthesis synchronization between stereo modes */ +#ifdef DFT_STEREO_SPAR_MIXING + if ( !st_ivas->sba_dirac_stereo_flag ) + { + synchro_synthesis( ivas_total_brate, hCPE, output + cpe_id * CPE_CHANNELS, output_frame, 0 ); + } +#else synchro_synthesis( ivas_total_brate, hCPE, output + cpe_id * CPE_CHANNELS, output_frame, 0 ); +#endif #ifdef DEBUG_PLOT for ( n = 0; n < CPE_CHANNELS; n++ ) diff --git a/lib_dec/ivas_post_proc.c b/lib_dec/ivas_post_proc.c index 1dce7978c2..e77fdaa046 100644 --- a/lib_dec/ivas_post_proc.c +++ b/lib_dec/ivas_post_proc.c @@ -81,7 +81,11 @@ void ivas_post_proc( output_Fs = sts[0]->output_Fs; +#ifdef DFT_STEREO_SPAR_MIXING + if ( ( sts[n]->element_mode != IVAS_CPE_DFT && !( sba_dirac_stereo_flag && sts[n]->element_mode != IVAS_CPE_MDCT ) ) || ( sts[n]->element_mode == IVAS_CPE_DFT && hCPE->nchan_out == 1 && hCPE->hStereoDft->hConfig->res_cod_mode == STEREO_DFT_RES_COD_OFF ) ) +#else if ( ( sts[n]->element_mode != IVAS_CPE_DFT && !sba_dirac_stereo_flag ) || ( sts[n]->element_mode == IVAS_CPE_DFT && hCPE->nchan_out == 1 && hCPE->hStereoDft->hConfig->res_cod_mode == STEREO_DFT_RES_COD_OFF ) ) +#endif { if ( sts[n]->hTcxLtpDec != NULL ) { @@ -102,6 +106,13 @@ void ivas_post_proc( mvr2r( sts[n]->prev_synth_buffer, sts[n]->hTcxDec->FBTCXdelayBuf, 0 ); mvr2r( sts[n]->delay_buf_out, sts[n]->hTcxDec->FBTCXdelayBuf + 0, delay_comp ); } +#ifdef DFT_STEREO_SPAR_MIXING + else if ( sba_dirac_stereo_flag && sts[n]->element_mode == IVAS_CPE_MDCT ) + { + int16_t numZeros = (int16_t)(NS2SA( output_Fs, N_ZERO_MDCT_NS )); + mvr2r( sts[n]->hHQ_core->old_out + numZeros, sts[n]->hTcxDec->FBTCXdelayBuf, delay_comp ); + } +#endif tcx_ltp_post( sts[n], hTcxLtpDec, sts[n]->core, output_frame, NS2SA( output_Fs, ACELP_LOOK_NS ) + delay_comp, synth, sts[n]->hTcxDec->FBTCXdelayBuf ); } diff --git a/lib_dec/ivas_rom_dec.c b/lib_dec/ivas_rom_dec.c index c79bc38019..72b26319a0 100644 --- a/lib_dec/ivas_rom_dec.c +++ b/lib_dec/ivas_rom_dec.c @@ -231,11 +231,31 @@ const int16_t cna_init_bands[MAX_CNA_NBANDS + 1] = 1, 4, 14, 33, 67, 171, 320 }; +#ifdef DFT_STEREO_SPAR_MIXING +const float max_smooth_gains1[SBA_DIRAC_STEREO_NUM_BANDS] = +{ + 0.98f, 0.97f, 0.95f, 0.9f, 0.9f, 0.9f, 0.9f, 0.9f, 0.9f, 0.9f, 0.9f, 0.9f +}; + +const float min_smooth_gains1[SBA_DIRAC_STEREO_NUM_BANDS] = +{ + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f +}; + +const float max_smooth_gains2[SBA_DIRAC_STEREO_NUM_BANDS] = +{ + 0.75f, 0.75f, 0.75f, 0.75f, 0.75f, 0.75f, 0.75f, 0.75f, 0.75f, 0.9f, 0.9f, 0.9f}; + +const float min_smooth_gains2[SBA_DIRAC_STEREO_NUM_BANDS] = +{ + 0.5f, 0.5f, 0.5, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.3f, 0.3f, 0.3f +}; +#else const float max_smooth_gains[SBA_DIRAC_STEREO_NUM_BANDS] = { 0.98f, 0.97f, 0.95f, 0.9f, 0.9f }; - +#endif /*------------------------------------------------------------------------- * ECLVQ Stereo ROM tables diff --git a/lib_dec/ivas_rom_dec.h b/lib_dec/ivas_rom_dec.h index febc60e2e5..bc69159f83 100644 --- a/lib_dec/ivas_rom_dec.h +++ b/lib_dec/ivas_rom_dec.h @@ -70,8 +70,15 @@ extern const float dft_win232ms_48k[450]; extern const float dft_win_8k[70]; extern const int16_t cna_init_bands[MAX_CNA_NBANDS + 1]; -extern const float max_smooth_gains[SBA_DIRAC_STEREO_NUM_BANDS]; +#ifdef DFT_STEREO_SPAR_MIXING +extern const float min_smooth_gains1[SBA_DIRAC_STEREO_NUM_BANDS]; +extern const float max_smooth_gains1[SBA_DIRAC_STEREO_NUM_BANDS]; +extern const float min_smooth_gains2[SBA_DIRAC_STEREO_NUM_BANDS]; +extern const float max_smooth_gains2[SBA_DIRAC_STEREO_NUM_BANDS]; +#else +extern const float max_smooth_gains[SBA_DIRAC_STEREO_NUM_BANDS]; +#endif /*----------------------------------------------------------------------------------* * ECLVQ Stereo ROM tables diff --git a/lib_dec/ivas_sba_dec.c b/lib_dec/ivas_sba_dec.c index 4e96c041bd..60aeb2286c 100644 --- a/lib_dec/ivas_sba_dec.c +++ b/lib_dec/ivas_sba_dec.c @@ -797,6 +797,10 @@ ivas_error ivas_sba_dec_reconfigure( sba_order_internal = min( st_ivas->sba_analysis_order, IVAS_MAX_SBA_ORDER ); ivas_spar_config( hDecoderConfig->ivas_total_brate, sba_order_internal, &st_ivas->nchan_transport, &st_ivas->nSCE, &st_ivas->nCPE, &st_ivas->hSpar->core_nominal_brate, st_ivas->sid_format ); +#ifdef DFT_STEREO_SPAR_MIXING + st_ivas->sba_dirac_stereo_flag = ( hDecoderConfig->output_config == AUDIO_CONFIG_STEREO ); +#endif + if ( ( error = ivas_dirac_sba_config( st_ivas->hQMetaData, &st_ivas->nchan_transport, &st_ivas->nSCE, &st_ivas->nCPE, &st_ivas->element_mode_init, hDecoderConfig->ivas_total_brate, st_ivas->sba_analysis_order, st_ivas->sba_mode, IVAS_MAX_NUM_BANDS - SPAR_DIRAC_SPLIT_START_BAND ) ) != IVAS_ERR_OK ) { return error; diff --git a/lib_dec/ivas_sba_dirac_stereo_dec.c b/lib_dec/ivas_sba_dirac_stereo_dec.c index e4ab91a7d7..bb3b4825f4 100644 --- a/lib_dec/ivas_sba_dirac_stereo_dec.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec.c @@ -91,6 +91,10 @@ static int16_t ivas_sba_dirac_stereo_band_config( int16_t *band_limits, /* o : DFT band limits */ const int32_t output_Fs, /* i : output sampling rate */ const int16_t NFFT /* i : analysis/synthesis window length */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t spar_flag /* i : SPAR or DirAC band grouping */ +#endif ) { int16_t i; @@ -98,6 +102,18 @@ static int16_t ivas_sba_dirac_stereo_band_config( int16_t nbands, num_cldfb_bands; nbands = SBA_DIRAC_STEREO_NUM_BANDS; + +#ifdef DFT_STEREO_SPAR_MIXING + if ( spar_flag ) + { + nbands = IVAS_MAX_NUM_BANDS; + } + else + { + nbands = 5; + } +#endif + num_cldfb_bands = (int16_t) ( output_Fs * INV_CLDFB_BANDWIDTH + 0.5f ); bins_per_cldfb_band = NFFT / ( 2 * num_cldfb_bands ); @@ -105,7 +121,18 @@ static int16_t ivas_sba_dirac_stereo_band_config( band_limits[0] = 1; for ( i = 1; i < nbands; i++ ) { +#ifdef DFT_STEREO_SPAR_MIXING + if ( spar_flag ) + { + band_limits[i] = DirAC_band_grouping_12[i] * bins_per_cldfb_band; + } + else + { + band_limits[i] = DirAC_band_grouping_5[i] * bins_per_cldfb_band; + } +#else band_limits[i] = DirAC_band_grouping_5[i] * bins_per_cldfb_band; +#endif } band_limits[nbands] = NFFT / 2; @@ -153,38 +180,47 @@ static float get_panning( *-------------------------------------------------------------------*/ static void map_params_dirac_to_stereo( - STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ + STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ const IVAS_QMETADATA_HANDLE hQMetaData, /* i : frame of MASA q_metadata */ float synth[], /* i : decoded downmix signal */ - float DFT[STEREO_DFT_BUF_MAX], /* i/o: DFT buffer */ - const uint8_t b_wide_panning, /* i : flag indicating wider panning */ - const int16_t L_frame /* i : core signal length */ - + float DFT[STEREO_DFT_BUF_MAX], /* i/o: DFT buffer */ + const uint8_t b_wide_panning, /* i : flag indicating wider panning */ + const int16_t L_frame /* i : core signal length */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t mcmasa +#endif ) { - int16_t i, b, k, block; + int16_t i, b, k; int16_t k_offset; - int16_t nbands, nBlocks, block_len; + int16_t nbands, nBlocks; + int16_t block; + int16_t block_len; int16_t azimuth[MAX_PARAM_SPATIAL_SUBFRAMES][SBA_DIRAC_STEREO_NUM_BANDS]; int16_t elevation[MAX_PARAM_SPATIAL_SUBFRAMES][SBA_DIRAC_STEREO_NUM_BANDS]; float diffuseness[SBA_DIRAC_STEREO_NUM_BANDS]; + float block_nrg[MAX_PARAM_SPATIAL_SUBFRAMES]; + float nrg_norm1, nrg_norm2; + float *pSynth; float surrCoh[SBA_DIRAC_STEREO_NUM_BANDS]; + float *pDFT; float subframe_band_nrg[NB_DIV][SBA_DIRAC_STEREO_NUM_BANDS]; float smooth_long_avg[NB_DIV][SBA_DIRAC_STEREO_NUM_BANDS]; float smooth_short_avg[NB_DIV][SBA_DIRAC_STEREO_NUM_BANDS]; - float block_nrg[MAX_PARAM_SPATIAL_SUBFRAMES]; - float nrg_norm1, nrg_norm2; + float *side_gain, *res_pred_gain; - float *pSynth, *pDFT; IVAS_QDIRECTION *q_direction; - nBlocks = MAX_PARAM_SPATIAL_SUBFRAMES; +#ifdef DFT_STEREO_SPAR_MIXING + nbands = !mcmasa ? SBA_DIRAC_STEREO_NUM_BANDS : 5; +#else nbands = SBA_DIRAC_STEREO_NUM_BANDS; +#endif k_offset = STEREO_DFT_OFFSET; side_gain = hStereoDft->side_gain + k_offset * STEREO_DFT_BAND_MAX; res_pred_gain = hStereoDft->res_pred_gain + k_offset * STEREO_DFT_BAND_MAX; - q_direction = &( hQMetaData->q_direction[0] ); /* gain smoothing factor */ @@ -233,99 +269,109 @@ static void map_params_dirac_to_stereo( } /* apply upper bounds depending on band */ +#ifdef DFT_STEREO_SPAR_MIXING + hStereoDft->smooth_fac[0][b] = max( hStereoDft->min_smooth_gains[b], min( hStereoDft->max_smooth_gains[b], hStereoDft->smooth_fac[0][b] ) ); + hStereoDft->smooth_fac[1][b] = max( hStereoDft->min_smooth_gains[b], min( hStereoDft->max_smooth_gains[b], hStereoDft->smooth_fac[1][b] ) ); +#else hStereoDft->smooth_fac[0][b] = min( max_smooth_gains[b], hStereoDft->smooth_fac[0][b] ); hStereoDft->smooth_fac[1][b] = min( max_smooth_gains[b], hStereoDft->smooth_fac[1][b] ); +#endif } pDFT += STEREO_DFT32MS_N_MAX; } - /* calculate block energies for side gain weighting (combine angles of 2 DirAC blocks to side gain for 1 DFT Stereo subframe; 4 blocks and 2 subframes overall) */ - pSynth = synth; - block_len = L_frame / nBlocks; - for ( block = 0; block < nBlocks; block++ ) - { - block_nrg[block] = 0.f; - for ( i = 0; i < block_len; i++ ) - { - block_nrg[block] += pSynth[i] * pSynth[i]; - } - block_nrg[block] = sqrtf( block_nrg[block] ); - pSynth += block_len; - } - nrg_norm1 = 1 / ( block_nrg[0] + block_nrg[1] + EPSILON ); - nrg_norm2 = 1 / ( block_nrg[2] + block_nrg[3] + EPSILON ); - - /* extract DirAC parameters from metadata */ - for ( b = 0; b < nbands; b++ ) +#ifdef DFT_STEREO_SPAR_MIXING + if ( mcmasa ) +#endif { - diffuseness[b] = 1.0f - q_direction->band_data[b].energy_ratio[0]; - if ( hQMetaData->surcoh_band_data != NULL ) - { - surrCoh[b] = hQMetaData->surcoh_band_data[b].surround_coherence[0] / 255.0f; - } - else - { - surrCoh[b] = 0.0f; - } - + /* calculate block energies for side gain weighting (combine angles of 2 DirAC blocks to side gain for 1 DFT Stereo subframe; 4 blocks and 2 subframes overall) */ + pSynth = synth; + block_len = L_frame / nBlocks; for ( block = 0; block < nBlocks; block++ ) { - int16_t block_metadata; - - if ( hQMetaData->useLowerRes ) - { - block_metadata = 0; - } - else - { - block_metadata = block; - } - if ( q_direction->band_data[b].azimuth[block_metadata] < 0.f ) + block_nrg[block] = 0.f; + for ( i = 0; i < block_len; i++ ) { - q_direction->band_data[b].azimuth[block_metadata] += 360.f; + block_nrg[block] += pSynth[i] * pSynth[i]; } - azimuth[block][b] = (int16_t) q_direction->band_data[b].azimuth[block_metadata]; - elevation[block][b] = (int16_t) q_direction->band_data[b].elevation[block_metadata]; + block_nrg[block] = sqrtf( block_nrg[block] ); + pSynth += block_len; } - } + nrg_norm1 = 1 / ( block_nrg[0] + block_nrg[1] + EPSILON ); + nrg_norm2 = 1 / ( block_nrg[2] + block_nrg[3] + EPSILON ); - /* map angles (azi, ele), surround coherence, and diffuseness to DFT Stereo side and prediction gains */ - for ( b = 0; b < hStereoDft->nbands; b++ ) - { - /* combine angles of first 2 blocks to side gain of first subframe */ - side_gain[b] = 0.f; - for ( block = 0; block < nBlocks / 2; block++ ) + /* extract DirAC parameters from metadata */ + for ( b = 0; b < nbands; b++ ) { - if ( b_wide_panning == 1 ) + diffuseness[b] = 1.0f - q_direction->band_data[b].energy_ratio[0]; + if ( hQMetaData->surcoh_band_data != NULL ) { - /* panning between left and ride, saturate at the stereo ls positions (+/- 30deg azi) */ - side_gain[b] += nrg_norm1 * block_nrg[block] * get_panning( azimuth[block][b], elevation[block][b] ); + surrCoh[b] = hQMetaData->surcoh_band_data[b].surround_coherence[0] / 255.0f; } else { - side_gain[b] += nrg_norm1 * block_nrg[block] * sinf( azimuth[block][b] * EVS_PI / 180 ) * cosf( elevation[block][b] * EVS_PI / 180 ); + surrCoh[b] = 0.0f; + } + + for ( block = 0; block < nBlocks; block++ ) + { + int16_t block_metadata; + + if ( hQMetaData->useLowerRes ) + { + block_metadata = 0; + } + else + { + block_metadata = block; + } + if ( q_direction->band_data[b].azimuth[block_metadata] < 0.f ) + { + q_direction->band_data[b].azimuth[block_metadata] += 360.f; + } + azimuth[block][b] = (int16_t) q_direction->band_data[b].azimuth[block_metadata]; + elevation[block][b] = (int16_t) q_direction->band_data[b].elevation[block_metadata]; } } - /* combine angles of last 2 blocks to side gain of second subframe */ - side_gain[b + STEREO_DFT_BAND_MAX] = 0.f; - for ( block = nBlocks / 2; block < nBlocks; block++ ) + /* map angles (azi, ele), surround coherence, and diffuseness to DFT Stereo side and prediction gains */ + for ( b = 0; b < hStereoDft->nbands; b++ ) { - if ( b_wide_panning == 1 ) + /* combine angles of first 2 blocks to side gain of first subframe */ + side_gain[b] = 0.f; + for ( block = 0; block < nBlocks / 2; block++ ) { - /* panning between left and ride, saturate at the stereo ls positions (+/- 30deg azi) */ - side_gain[b + STEREO_DFT_BAND_MAX] += nrg_norm2 * block_nrg[block] * get_panning( azimuth[block][b], elevation[block][b] ); + if ( b_wide_panning == 1 ) + { + /* panning between left and ride, saturate at the stereo ls positions (+/- 30deg azi) */ + side_gain[b] += nrg_norm1 * block_nrg[block] * get_panning( azimuth[block][b], elevation[block][b] ); + } + else + { + side_gain[b] += nrg_norm1 * block_nrg[block] * sinf( azimuth[block][b] * EVS_PI / 180 ) * cosf( elevation[block][b] * EVS_PI / 180 ); + } } - else + + /* combine angles of last 2 blocks to side gain of second subframe */ + side_gain[b + STEREO_DFT_BAND_MAX] = 0.f; + for ( block = nBlocks / 2; block < nBlocks; block++ ) { - side_gain[b + STEREO_DFT_BAND_MAX] += nrg_norm2 * block_nrg[block] * sinf( azimuth[block][b] * EVS_PI / 180 ) * cosf( elevation[block][b] * EVS_PI / 180 ); + if ( b_wide_panning == 1 ) + { + /* panning between left and ride, saturate at the stereo ls positions (+/- 30deg azi) */ + side_gain[b + STEREO_DFT_BAND_MAX] += nrg_norm2 * block_nrg[block] * get_panning( azimuth[block][b], elevation[block][b] ); + } + else + { + side_gain[b + STEREO_DFT_BAND_MAX] += nrg_norm2 * block_nrg[block] * sinf( azimuth[block][b] * EVS_PI / 180 ) * cosf( elevation[block][b] * EVS_PI / 180 ); + } } - } - side_gain[b] *= sqrtf( 1.f - diffuseness[b] ); - side_gain[b + STEREO_DFT_BAND_MAX] *= sqrtf( 1.f - diffuseness[b] ); - res_pred_gain[b] = diffuseness[b] * ( 1.0f - surrCoh[b] ); - res_pred_gain[b + STEREO_DFT_BAND_MAX] = diffuseness[b] * ( 1.0f - surrCoh[b] ); + side_gain[b] *= sqrtf( 1.f - diffuseness[b] ); + side_gain[b + STEREO_DFT_BAND_MAX] *= sqrtf( 1.f - diffuseness[b] ); + res_pred_gain[b] = diffuseness[b] * ( 1.0f - surrCoh[b] ); + res_pred_gain[b + STEREO_DFT_BAND_MAX] = diffuseness[b] * ( 1.0f - surrCoh[b] ); + } } hStereoDft->frame_nodata = 0; @@ -422,24 +468,68 @@ static void ivas_sba_dirac_stereo_compute_hb_gain( static void ivas_sba_dirac_stereo_upmix_hb( float hb_stereo_synth[CPE_CHANNELS][L_FRAME48k], /* i/o: stereo HB synthesis signal */ float hb_synth[L_FRAME48k], /* i : HB signal */ - float hb_gain[NB_DIV], /* i : side gains for HB signal */ - const int16_t output_frame /* i : output frame length per channel */ + float hb_gain[NB_DIV], /* i : side gains for HB signal */ + const int16_t output_frame /* i : output frame length per channel */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t mcmasa, + const STEREO_DFT_DEC_DATA_HANDLE hStereoDft /* i : Stereo DFT handle for mixing matrix */ +#endif ) { int16_t i; - for ( i = 0; i < output_frame / 2; i++ ) +#ifdef DFT_STEREO_SPAR_MIXING + if (!mcmasa) { - hb_stereo_synth[0][i] = 0.5f * hb_synth[i] + 0.5f * hb_gain[0] * hb_synth[i]; - hb_stereo_synth[1][i] = 0.5f * hb_synth[i] - 0.5f * hb_gain[0] * hb_synth[i]; + for ( i = 0; i < output_frame / 2; i++ ) + { + float gp = hStereoDft->mixer_mat_smooth[0][0][8] + hStereoDft->mixer_mat_smooth[1][0][8] + + hStereoDft->mixer_mat_smooth[0][0][9] + hStereoDft->mixer_mat_smooth[1][0][9] + + hStereoDft->mixer_mat_smooth[0][0][10] + hStereoDft->mixer_mat_smooth[1][0][10] + + hStereoDft->mixer_mat_smooth[0][0][11] + hStereoDft->mixer_mat_smooth[1][0][11]; + + float gm = hStereoDft->mixer_mat_smooth[0][0][8] - hStereoDft->mixer_mat_smooth[1][0][8] + + hStereoDft->mixer_mat_smooth[0][0][9] - hStereoDft->mixer_mat_smooth[1][0][9] + + hStereoDft->mixer_mat_smooth[0][0][10] - hStereoDft->mixer_mat_smooth[1][0][10] + + hStereoDft->mixer_mat_smooth[0][0][11] - hStereoDft->mixer_mat_smooth[1][0][11]; + + hb_stereo_synth[0][i] = 0.5f * hb_synth[i] * 0.25f * gp; + hb_stereo_synth[1][i] = 0.5f * hb_synth[i] * 0.25f * gm; + } + for ( i = output_frame / 2; i < output_frame; i++ ) + { + float gp = hStereoDft->mixer_mat_smooth[0][0][8 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][0][8 + IVAS_MAX_NUM_BANDS] + + hStereoDft->mixer_mat_smooth[0][0][9 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][0][9 + IVAS_MAX_NUM_BANDS] + + hStereoDft->mixer_mat_smooth[0][0][10 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][0][10 + IVAS_MAX_NUM_BANDS] + + hStereoDft->mixer_mat_smooth[0][0][11 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][0][11 + IVAS_MAX_NUM_BANDS]; + + float gm = hStereoDft->mixer_mat_smooth[0][0][8 + IVAS_MAX_NUM_BANDS] - hStereoDft->mixer_mat_smooth[1][0][8 + IVAS_MAX_NUM_BANDS] + + hStereoDft->mixer_mat_smooth[0][0][9 + IVAS_MAX_NUM_BANDS] - hStereoDft->mixer_mat_smooth[1][0][9 + IVAS_MAX_NUM_BANDS] + + hStereoDft->mixer_mat_smooth[0][0][10 + IVAS_MAX_NUM_BANDS] - hStereoDft->mixer_mat_smooth[1][0][10 + IVAS_MAX_NUM_BANDS] + + hStereoDft->mixer_mat_smooth[0][0][11 + IVAS_MAX_NUM_BANDS] - hStereoDft->mixer_mat_smooth[1][0][11 + IVAS_MAX_NUM_BANDS]; + + hb_stereo_synth[0][i] = 0.5f * hb_synth[i] * 0.25f * gp; + hb_stereo_synth[1][i] = 0.5f * hb_synth[i] * 0.25f * gm; + } } - - for ( i = output_frame / 2; i < output_frame; i++ ) + else +#endif { - hb_stereo_synth[0][i] = 0.5f * hb_synth[i] + 0.5f * hb_gain[1] * hb_synth[i]; - hb_stereo_synth[1][i] = 0.5f * hb_synth[i] - 0.5f * hb_gain[1] * hb_synth[i]; + for ( i = 0; i < output_frame / 2; i++ ) + { + hb_stereo_synth[0][i] = 0.5f * hb_synth[i] + 0.5f * hb_gain[0] * hb_synth[i]; + hb_stereo_synth[1][i] = 0.5f * hb_synth[i] - 0.5f * hb_gain[0] * hb_synth[i]; + } + + for ( i = output_frame / 2; i < output_frame; i++ ) + { + hb_stereo_synth[0][i] = 0.5f * hb_synth[i] + 0.5f * hb_gain[1] * hb_synth[i]; + hb_stereo_synth[1][i] = 0.5f * hb_synth[i] - 0.5f * hb_gain[1] * hb_synth[i]; + } } + return; } @@ -454,6 +544,10 @@ static void ivas_sba_dirac_stereo_apply_td_stefi( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ float output[CPE_CHANNELS][L_FRAME48k], /* i/o: output synthesis signal */ const int16_t output_frame /* i : output frame length per channel */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t spar_flag +#endif ) { int16_t i; @@ -462,7 +556,71 @@ static void ivas_sba_dirac_stereo_apply_td_stefi( float tmp; const float *win_dft; - if ( max( hStereoDft->td_gain[0], hStereoDft->td_gain[1] ) > 0 ) +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + + static FILE *f_stefi = 0; + + if ( f_stefi == 0 ) + { + f_stefi = fopen( "stefi.txt", "w" ); + } + +#endif + +#ifdef DFT_STEREO_SPAR_MIXING + if ( spar_flag ) + { + win_dft = hStereoDft->win32ms; + dftOvlLen = hStereoDft->dft32ms_ovl; + + float g_W_1, g_Y_1; + float g_W_2, g_Y_2; + float g_L, g_R; + float stefi_L, stefi_R; + + g_W_1 = ( hStereoDft->mixer_mat_smooth[0][1][8] + hStereoDft->mixer_mat_smooth[0][2][8] + hStereoDft->mixer_mat_smooth[0][3][8] ) + ( hStereoDft->mixer_mat_smooth[0][1][9] + hStereoDft->mixer_mat_smooth[0][2][9] + hStereoDft->mixer_mat_smooth[0][3][9] ) + ( hStereoDft->mixer_mat_smooth[0][1][10] + hStereoDft->mixer_mat_smooth[0][2][10] + hStereoDft->mixer_mat_smooth[0][3][10] ); + + g_Y_1 = ( hStereoDft->mixer_mat_smooth[1][1][8] + hStereoDft->mixer_mat_smooth[1][2][8] + hStereoDft->mixer_mat_smooth[1][3][8] ) + ( hStereoDft->mixer_mat_smooth[1][1][9] + hStereoDft->mixer_mat_smooth[1][2][9] + hStereoDft->mixer_mat_smooth[1][3][9] ) + ( hStereoDft->mixer_mat_smooth[1][1][10] + hStereoDft->mixer_mat_smooth[1][2][10] + hStereoDft->mixer_mat_smooth[1][3][10] ); + + g_W_2 = ( hStereoDft->mixer_mat_smooth[0][1][8 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][2][8 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][3][8 + IVAS_MAX_NUM_BANDS] ) + ( hStereoDft->mixer_mat_smooth[0][1][9 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][2][9 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][3][9 + IVAS_MAX_NUM_BANDS] ) + ( hStereoDft->mixer_mat_smooth[0][1][10 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][2][10 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][3][10 + IVAS_MAX_NUM_BANDS] ); + + g_Y_2 = ( hStereoDft->mixer_mat_smooth[1][1][8 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][2][8 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][3][8 + IVAS_MAX_NUM_BANDS] ) + ( hStereoDft->mixer_mat_smooth[1][1][9 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][2][9 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][3][9 + IVAS_MAX_NUM_BANDS] ) + ( hStereoDft->mixer_mat_smooth[1][1][10 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][2][10 + IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][3][10 + IVAS_MAX_NUM_BANDS] ); + + g_L = 0.16f * ( g_W_1 + g_W_2 - g_Y_1 - g_Y_2 ); + g_R = 0.16f * ( g_W_1 + g_W_2 + g_Y_1 + g_Y_2 ); + + for ( i = 0; i < dftOvlLen; i++ ) + { + win_in = win_dft[STEREO_DFT32MS_STEP * i] * win_dft[STEREO_DFT32MS_STEP * i]; + win_out = 1 - win_in; + + stefi_L = ( win_out * hStereoDft->g_L_prev + win_in * g_L ) * 0.5f * hStereoDft->hb_stefi_sig[i]; + stefi_R = ( win_out * hStereoDft->g_R_prev + win_in * g_R ) * 0.5f * hStereoDft->hb_stefi_sig[i]; + + output[0][i] += stefi_L; + output[1][i] += stefi_R; +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + fprintf( f_stefi, "%f %f\n", stefi_L, stefi_R ); +#endif + } + for ( i = dftOvlLen; i < output_frame; i++ ) + { + + stefi_L = g_L * 0.5f * hStereoDft->hb_stefi_sig[i]; + stefi_R = g_R * 0.5f * hStereoDft->hb_stefi_sig[i]; + + output[0][i] += stefi_L; + output[1][i] += stefi_R; +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + fprintf( f_stefi, "%f %f\n", stefi_L, stefi_R ); +#endif + } + hStereoDft->g_L_prev = g_L; + hStereoDft->g_R_prev = g_R; + } + else +#endif + if ( max( hStereoDft->td_gain[0], hStereoDft->td_gain[1] ) > 0 ) { win_dft = hStereoDft->win32ms; dftOvlLen = hStereoDft->dft32ms_ovl; @@ -475,12 +633,18 @@ static void ivas_sba_dirac_stereo_apply_td_stefi( output[0][i] += tmp; output[1][i] -= tmp; +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + fprintf( f_stefi, "%f %f\n", +tmp, -tmp ); +#endif } for ( i = dftOvlLen; i < output_frame; i++ ) { tmp = hStereoDft->td_gain[0] * 0.5f * hStereoDft->hb_stefi_sig[i]; output[0][i] += tmp; output[1][i] -= tmp; +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + fprintf( f_stefi, "%f %f\n", +tmp, -tmp ); +#endif } } @@ -496,19 +660,34 @@ static void ivas_sba_dirac_stereo_apply_td_stefi( void ivas_sba_dirac_stereo_smooth_parameters( STEREO_DFT_DEC_DATA_HANDLE hStereoDft /* i/o: decoder DFT stereo handle */ +#ifdef DFT_STEREO_SPAR_MIXING + , + ivas_spar_md_dec_state_t *hMdDec, /* i/o: SPAR MD handle for upmixing */ + int16_t cross_fade_start_offset, /* i: SPAR mixer delay compensation */ + int32_t output_Fs /* i: Fs for delay calculation */ +#endif ) { - int16_t k_offset, b; + +#ifdef DFT_STEREO_SPAR_MIXING + int16_t i, j, k, i_sf; +#endif + + int16_t b; + int16_t k_offset; float *side_gain, *prev_side_gain; float *res_pred_gain, *prev_res_pred_gain; k_offset = STEREO_DFT_OFFSET; - prev_side_gain = hStereoDft->side_gain; side_gain = hStereoDft->side_gain + k_offset * STEREO_DFT_BAND_MAX; prev_res_pred_gain = hStereoDft->res_pred_gain; res_pred_gain = hStereoDft->res_pred_gain + k_offset * STEREO_DFT_BAND_MAX; +#ifdef DFT_STEREO_SPAR_MIXING + if (!hMdDec) +#endif + { /* Smoothing of side and prediction gains between ftrames */ for ( b = hStereoDft->res_pred_band_min; b < hStereoDft->nbands; b++ ) { @@ -525,6 +704,118 @@ void ivas_sba_dirac_stereo_smooth_parameters( res_pred_gain[b + STEREO_DFT_BAND_MAX] = hStereoDft->smooth_fac[1][b] * res_pred_gain[b] + ( 1.f - hStereoDft->smooth_fac[1][b] ) * res_pred_gain[b + STEREO_DFT_BAND_MAX]; } } + } + +#ifdef DFT_STEREO_SPAR_MIXING + if ( hMdDec != 0 ) + { + float xfade_start_ns; + int16_t xfade_delay_subframes; + int16_t i_hist; + +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + static FILE *f_smoothing = 0; + if ( f_smoothing == 0 ) + { + f_smoothing = fopen( "stereo_param_smoothing.txt", "w" ); + } +#endif + + xfade_start_ns = cross_fade_start_offset / (float) output_Fs * 1000000000.f - IVAS_FB_ENC_DELAY_NS; + xfade_delay_subframes = (int16_t) ( xfade_start_ns / ( FRAME_SIZE_NS / MAX_PARAM_SPATIAL_SUBFRAMES ) ); + + i_hist = 4 - xfade_delay_subframes; + + for ( k = 0; k < 2; k++ ) + { + for ( i_sf = k * 2; i_sf < ( k + 1 ) * 2; i_sf++ ) + { + if ( hStereoDft->first_frame ) + { + for ( i = 0; i < 4; i++ ) + { + for ( j = 0; j < 4; j++ ) + { + for ( b = 0; b < hStereoDft->nbands; b++ ) + { + hStereoDft->mixer_mat_smooth[i][j][b + k * IVAS_MAX_NUM_BANDS] = hMdDec->mixer_mat[i][j][b]; + } + } + } + } + else + { + for ( i = 0; i < 4; i++ ) + { + for ( j = 0; j < 4; j++ ) + { + for ( b = 0; b < hStereoDft->nbands; b++ ) + { + float beta = hStereoDft->smooth_fac[k][b]; + hStereoDft->mixer_mat_smooth[i][j][b + k * IVAS_MAX_NUM_BANDS] = + beta * hStereoDft->mixer_mat_smooth[i][j][b + k * IVAS_MAX_NUM_BANDS] + ( 1 - beta ) * hMdDec->mixer_mat_prev[i_hist][i][j][b]; + +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + if ( i == 0 && j == 0 ) + { + fprintf( f_smoothing, "%d %f\n", b, beta ); + } +#endif + } + } + } + } // first_frame + + mvr2r( hMdDec->mixer_mat_prev[1][0][0], hMdDec->mixer_mat_prev[0][0][0], IVAS_MAX_FB_MIXER_OUT_CH * IVAS_MAX_SPAR_FB_MIXER_IN_CH * IVAS_MAX_NUM_BANDS ); + mvr2r( hMdDec->mixer_mat_prev[2][0][0], hMdDec->mixer_mat_prev[1][0][0], IVAS_MAX_FB_MIXER_OUT_CH * IVAS_MAX_SPAR_FB_MIXER_IN_CH * IVAS_MAX_NUM_BANDS ); + mvr2r( hMdDec->mixer_mat_prev[3][0][0], hMdDec->mixer_mat_prev[2][0][0], IVAS_MAX_FB_MIXER_OUT_CH * IVAS_MAX_SPAR_FB_MIXER_IN_CH * IVAS_MAX_NUM_BANDS ); + mvr2r( hMdDec->mixer_mat_prev[4][0][0], hMdDec->mixer_mat_prev[3][0][0], IVAS_MAX_FB_MIXER_OUT_CH * IVAS_MAX_SPAR_FB_MIXER_IN_CH * IVAS_MAX_NUM_BANDS ); + + for ( i = 0; i < 4; i++ ) + { + for ( j = 0; j < 4; j++ ) + { + for ( b = 0; b < hStereoDft->nbands; b++ ) + { + hMdDec->mixer_mat_prev[4][i][j][b] = hMdDec->mixer_mat[i][j][b + i_sf * IVAS_MAX_NUM_BANDS]; + } + } + } + } // i_sf + } // k ( DFT block ) + hStereoDft->first_frame = 0; + + { + static FILE *f_mat = 0; + + if ( f_mat == 0 ) + f_mat = fopen( "mixer_mat_stereo_smooth", "w" ); + + for ( i = 0; i < 4; i++ ) + { + for ( j = 0; j < 4; j++ ) + { + for ( b = 0; b < 12; b++ ) + { + fprintf( f_mat, "%f\n", hStereoDft->mixer_mat_smooth[i][j][b] ); + } + } + } + + for ( i = 0; i < 4; i++ ) + { + for ( j = 0; j < 4; j++ ) + { + for ( b = 0; b < 12; b++ ) + { + fprintf( f_mat, "%f\n", hStereoDft->mixer_mat_smooth[i][j][b + IVAS_MAX_NUM_BANDS] ); + } + } + } + } // debug output + + } // hMdDec != 0 +#endif return; } @@ -540,6 +831,10 @@ void ivas_sba_dirac_stereo_dec( Decoder_Struct *st_ivas, /* i/o: IVAS decoder structure */ float output[CPE_CHANNELS][L_FRAME48k], /* i/o: output synthesis signal */ const int16_t output_frame /* i : output frame length per channel */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t mcmasa +#endif ) { int16_t dtx_flag, fd_cng_flag; @@ -548,7 +843,11 @@ void ivas_sba_dirac_stereo_dec( float tmp_synth[L_FRAME16k]; float hb_gain[NB_DIV]; float hb_synth_stereo[CPE_CHANNELS][L_FRAME48k]; +#ifdef DFT_STEREO_SPAR_MIXING + float DFT[CPE_CHANNELS + 1][STEREO_DFT_BUF_MAX]; +#else float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX]; +#endif SCE_DEC_HANDLE hSCE; CPE_DEC_HANDLE hCPE; STEREO_DFT_DEC_DATA_HANDLE hStereoDft; @@ -556,30 +855,92 @@ void ivas_sba_dirac_stereo_dec( hSCE = st_ivas->hSCE[0]; hCPE = st_ivas->hCPE[0]; hStereoDft = hCPE->hStereoDft; - dtx_flag = ( hSCE->hCoreCoder[0]->core_brate <= SID_2k40 ); - fd_cng_flag = ( dtx_flag && hSCE->hCoreCoder[0]->cng_type == FD_CNG ); +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->nchan_transport > 1 && ! mcmasa) + { + dtx_flag = 0; + fd_cng_flag = 0; + } + else +#endif + { + dtx_flag = ( hSCE->hCoreCoder[0]->core_brate <= SID_2k40 ); + fd_cng_flag = ( dtx_flag && hSCE->hCoreCoder[0]->cng_type == FD_CNG ); + } + memOffset = NS2SA( output_frame * FRAMES_PER_SEC, IVAS_DEC_DELAY_NS - DELAY_BWE_TOTAL_NS ); ivas_sba_dirac_stereo_config( hStereoDft->hConfig ); - hStereoDft->nbands = ivas_sba_dirac_stereo_band_config( hStereoDft->band_limits, st_ivas->hDecoderConfig->output_Fs, hStereoDft->NFFT ); + hStereoDft->nbands = ivas_sba_dirac_stereo_band_config( + hStereoDft->band_limits, + st_ivas->hDecoderConfig->output_Fs, + hStereoDft->NFFT +#ifdef DFT_STEREO_SPAR_MIXING + , + (st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa) ? 1 : 0 +#endif + ); stereo_dft_dec_update( hStereoDft, output_frame, 1 /*st_ivas->sba_dirac_stereo_flag*/ ); +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->nchan_transport > 2 ) + { + stereo_dft_dec_analyze( hCPE, output[0], DFT, 0, output_frame, output_frame, DFT_STEREO_DEC_ANA_FB, 0, 0 ); + stereo_dft_dec_analyze( hCPE, output[1], DFT, 1, output_frame, output_frame, DFT_STEREO_DEC_ANA_FB, 0, 0 ); + hStereoDft->core_hist[0] = hCPE->hCoreCoder[0]->core; + } + else if ( st_ivas->nchan_transport == 2 ) + { + stereo_dft_dec_analyze( hCPE, output[0], DFT, 0, output_frame, output_frame, DFT_STEREO_DEC_ANA_FB, 0, 0 ); + stereo_dft_dec_analyze( hCPE, output[1], DFT, 2, output_frame, output_frame, DFT_STEREO_DEC_ANA_FB, 0, 0 ); + hStereoDft->core_hist[0] = hCPE->hCoreCoder[0]->core; + } + else +#endif + { + /* nrg calculation for TD Stereo Filling, as done in ICBWE which is not used in this case */ + ivas_sba_dirac_stereo_compute_td_stefi_nrgs( hStereoDft, hSCE->save_hb_synth, hSCE->hCoreCoder[0]->core, output_frame, fd_cng_flag ); - /* nrg calculation for TD Stereo Filling, as done in ICBWE which is not used in this case */ - ivas_sba_dirac_stereo_compute_td_stefi_nrgs( hStereoDft, hSCE->save_hb_synth, hSCE->hCoreCoder[0]->core, output_frame, fd_cng_flag ); - - /* do DFT Stereo core switching (including DFT analysis) here as CPE element was not available in SCE decoder */ - mvr2r( hSCE->save_synth, tmp_synth, hSCE->hCoreCoder[0]->L_frame ); - stereo_dft_dec_core_switching( hCPE, output[0] /*hSCE->save_output*/, hSCE->save_synth, hSCE->save_hb_synth, DFT, output_frame, 0, dtx_flag ); + /* do DFT Stereo core switching (including DFT analysis) here as CPE element was not available in SCE decoder */ + mvr2r( hSCE->save_synth, tmp_synth, hSCE->hCoreCoder[0]->L_frame ); + stereo_dft_dec_core_switching( hCPE, output[0] /*hSCE->save_output*/, hSCE->save_synth, hSCE->save_hb_synth, DFT, output_frame, 0, dtx_flag ); - /* do updates here after skipping this in SCE decoder (needs to be done after core switching) */ - updt_dec_common( hSCE->hCoreCoder[0], NORMAL_HQ_CORE, -1, hSCE->save_synth ); + /* do updates here after skipping this in SCE decoder (needs to be done after core switching) */ + updt_dec_common( hSCE->hCoreCoder[0], NORMAL_HQ_CORE, -1, hSCE->save_synth ); + } /* mapping of DirAC parameters (azimuth, elevation, diffuseness) to DFT Stereo parameters (side gain, prediction gain) */ - map_params_dirac_to_stereo( hStereoDft, st_ivas->hQMetaData, tmp_synth, DFT[0], st_ivas->ivas_format == MC_FORMAT, hSCE->hCoreCoder[0]->L_frame ); + map_params_dirac_to_stereo( + hStereoDft, + st_ivas->hQMetaData, + tmp_synth, + DFT[0], + st_ivas->ivas_format == MC_FORMAT, +#ifdef DFT_STEREO_SPAR_MIXING + mcmasa ? hSCE->hCoreCoder[0]->L_frame : 0, + mcmasa +#else + hSCE->hCoreCoder[0]->L_frame +#endif + ); + +#ifdef DFT_STEREO_SPAR_MIXING + if (!mcmasa) + { + set_f( hStereoDft->res_pred_gain, 1.f, 3 * STEREO_DFT_BAND_MAX ); + } +#endif /* DFT Stereo upmix */ - stereo_dft_dec( hStereoDft, hCPE->hCoreCoder[0], DFT, NULL, NULL, 1 /*st_ivas->sba_dirac_stereo_flag*/ ); + stereo_dft_dec( hStereoDft, hCPE->hCoreCoder[0], DFT, NULL, NULL, 1 /*st_ivas->sba_dirac_stereo_flag*/ +#ifdef DFT_STEREO_SPAR_MIXING + , + (st_ivas->hSpar != NULL && !mcmasa) ? st_ivas->hSpar->hMdDec : 0, + (st_ivas->hSpar != NULL && !mcmasa) ? st_ivas->hSpar->hFbMixer->cross_fade_start_offset : 0, + st_ivas->hDecoderConfig->output_Fs, + st_ivas->nchan_transport +#endif + ); /* DFT synthesis */ stereo_dft_dec_synthesize( hCPE, DFT, 0, output[0], output_frame ); @@ -592,23 +953,49 @@ void ivas_sba_dirac_stereo_dec( v_multc( output[1], 0.5f, output[1], output_frame ); /* delay HB synth */ - mvr2r( hSCE->save_hb_synth + output_frame - memOffset, tmp_buf, memOffset ); - mvr2r( hSCE->save_hb_synth, hSCE->save_hb_synth + memOffset, output_frame - memOffset ); - mvr2r( hSCE->prev_hb_synth, hSCE->save_hb_synth, memOffset ); - mvr2r( tmp_buf, hSCE->prev_hb_synth, memOffset ); +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->nchan_transport == 1 ) +#endif + { + mvr2r( hSCE->save_hb_synth + output_frame - memOffset, tmp_buf, memOffset ); + mvr2r( hSCE->save_hb_synth, hSCE->save_hb_synth + memOffset, output_frame - memOffset ); + mvr2r( hSCE->prev_hb_synth, hSCE->save_hb_synth, memOffset ); + mvr2r( tmp_buf, hSCE->prev_hb_synth, memOffset ); + } if ( ( hCPE->hCoreCoder[0]->core == ACELP_CORE || hCPE->hCoreCoder[0]->last_core == ACELP_CORE ) && !fd_cng_flag ) { /* upmix ACELP BWE */ ivas_sba_dirac_stereo_compute_hb_gain( hStereoDft, hb_gain ); - ivas_sba_dirac_stereo_upmix_hb( hb_synth_stereo, hSCE->save_hb_synth, hb_gain, output_frame ); + +#ifdef DFT_STEREO_SPAR_MIXING + if ( st_ivas->nchan_transport == 1 ) +#endif + { + ivas_sba_dirac_stereo_upmix_hb( + hb_synth_stereo, + hSCE->save_hb_synth, + hb_gain, + output_frame +#ifdef DFT_STEREO_SPAR_MIXING + , + mcmasa, + hStereoDft +#endif + ); + } /* add HB to ACELP core */ v_add( output[0], hb_synth_stereo[0], output[0], output_frame ); v_add( output[1], hb_synth_stereo[1], output[1], output_frame ); /* apply TD Stereo Filling as is done in ICBWE */ - ivas_sba_dirac_stereo_apply_td_stefi( hStereoDft, output, output_frame ); + ivas_sba_dirac_stereo_apply_td_stefi( hStereoDft, output, output_frame +#ifdef DFT_STEREO_SPAR_MIXING + , + ( st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa) ? 1 : 0 +#endif + ); } return; diff --git a/lib_dec/ivas_spar_md_dec.c b/lib_dec/ivas_spar_md_dec.c index f316c746cf..b0df83ef14 100644 --- a/lib_dec/ivas_spar_md_dec.c +++ b/lib_dec/ivas_spar_md_dec.c @@ -622,7 +622,7 @@ void ivas_spar_md_dec_process( ivas_spar_dec_parse_md_bs( hMdDec, st0, &nB, &bw, &dtx_vad, st_ivas->hDecoderConfig->ivas_total_brate, ivas_spar_br_table_consts[hMdDec->table_idx].usePlanarCoeff, st_ivas->hQMetaData->sba_inactive_mode ); -#ifdef DEBUG_SBA_MD_DUMP +#if 0 { char f_name[100]; int16_t num_bands = nB; diff --git a/lib_dec/ivas_stat_dec.h b/lib_dec/ivas_stat_dec.h index de79f876d6..1a136f634c 100644 --- a/lib_dec/ivas_stat_dec.h +++ b/lib_dec/ivas_stat_dec.h @@ -229,6 +229,14 @@ typedef struct stereo_dft_dec_data_struct float smooth_buf[SBA_DIRAC_STEREO_NUM_BANDS][SBA_DIRAC_NRG_SMOOTH_LONG + 1]; float smooth_fac[NB_DIV][SBA_DIRAC_STEREO_NUM_BANDS]; +#ifdef DFT_STEREO_SPAR_MIXING + int16_t first_frame; + float mixer_mat_smooth[4][4][2*IVAS_MAX_NUM_BANDS]; + float g_L_prev; + float g_R_prev; + const float *max_smooth_gains, *min_smooth_gains; +#endif + } STEREO_DFT_DEC_DATA, *STEREO_DFT_DEC_DATA_HANDLE; @@ -950,7 +958,11 @@ typedef struct cpe_dec_data_structure float prev_synth[CPE_CHANNELS][NS2SA( 48000, IVAS_DEC_DELAY_NS - STEREO_DFT32MS_OVL_NS )]; /* DFT stereo I/O channel buffer memories that need to be updated for TD->DFT stereo switching */ +#ifdef DFT_STEREO_SPAR_MIXING + float *input_mem[CPE_CHANNELS+1]; +#else float *input_mem[CPE_CHANNELS]; +#endif float *input_mem_LB[CPE_CHANNELS]; float *input_mem_BPF[1]; float *output_mem[CPE_CHANNELS]; diff --git a/lib_dec/ivas_stereo_dft_dec.c b/lib_dec/ivas_stereo_dft_dec.c index 32d3510f80..0e5681e61f 100644 --- a/lib_dec/ivas_stereo_dft_dec.c +++ b/lib_dec/ivas_stereo_dft_dec.c @@ -76,7 +76,12 @@ * Local function prototypes *-------------------------------------------------------------------------*/ -static void stereo_dft_dec_open( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, const int32_t output_Fs ); +static void stereo_dft_dec_open( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, const int32_t output_Fs +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t nchan_transport +#endif +); static void stereo_dft_compute_td_stefi_params( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, const float samp_ratio ); @@ -252,6 +257,10 @@ ivas_error stereo_dft_dec_create( const int32_t element_brate, /* i : element bitrate */ const int32_t output_Fs, /* i : output sampling rate */ const int16_t sba_dirac_stereo_flag /* i : signal stereo output for SBA DirAC */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t nchan_transport +#endif ) { STEREO_DFT_DEC_DATA_HANDLE hStereoDft_loc; @@ -293,7 +302,12 @@ ivas_error stereo_dft_dec_create( stereo_dft_config( hStereoDft_loc->hConfig, element_brate, &tmpS, &tmpS ); } - stereo_dft_dec_open( hStereoDft_loc, output_Fs ); + stereo_dft_dec_open( hStereoDft_loc, output_Fs +#ifdef DFT_STEREO_SPAR_MIXING + , + nchan_transport +#endif + ); *hStereoDft = hStereoDft_loc; @@ -310,6 +324,10 @@ ivas_error stereo_dft_dec_create( static void stereo_dft_dec_open( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ const int32_t output_Fs /* i : output sampling rate */ +#ifdef DFT_STEREO_SPAR_MIXING + , + const int16_t nchan_transport +#endif ) { @@ -365,6 +383,19 @@ static void stereo_dft_dec_open( hStereoDft->hb_stefi_delay = NS2SA( output_Fs, STEREO_DFT_TD_STEFI_DELAY_NS ); +#ifdef DFT_STEREO_SPAR_MIXING + if ( nchan_transport > 2 ) + { + hStereoDft->min_smooth_gains = min_smooth_gains2; + hStereoDft->max_smooth_gains = max_smooth_gains2; + } + else + { + hStereoDft->min_smooth_gains = min_smooth_gains1; + hStereoDft->max_smooth_gains = max_smooth_gains1; + } +#endif + /* reset DFT stereo memories */ stereo_dft_dec_reset( hStereoDft ); @@ -383,6 +414,9 @@ void stereo_dft_dec_reset( ) { int16_t i; +#ifdef DFT_STEREO_SPAR_MIXING + int16_t j, b; +#endif /*Configuration*/ set_s( hStereoDft->prm_res, hStereoDft->hConfig->prm_res, STEREO_DFT_DEC_DFT_NB ); @@ -492,6 +526,25 @@ void stereo_dft_dec_reset( hStereoDft->ipd_xfade_prev = 0.0f; #endif +#ifdef DFT_STEREO_SPAR_MIXING + for ( b = 0; b < hStereoDft->nbands; b++ ) + { + for ( i = 0; i < 4; i++ ) + { + for ( j = 0; j < 4; j++ ) + { + hStereoDft->mixer_mat_smooth[i][j][b] = 0.0f; + } + } + hStereoDft->mixer_mat_smooth[0][0][b] = 0.0f; + hStereoDft->mixer_mat_smooth[1][1][b] = 0.0f; + } + hStereoDft->first_frame = 1; + hStereoDft->g_L_prev = 0.f; + hStereoDft->g_R_prev = 0.f; +#endif + + return; } @@ -1093,12 +1146,23 @@ void stereo_dft_dec_synthesize( *-------------------------------------------------------------------------*/ void stereo_dft_dec( - STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ - Decoder_State *st0, /* i/o: decoder state structure */ + STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ + Decoder_State *st0, /* i/o: decoder state structure */ +#ifdef DFT_STEREO_SPAR_MIXING + float DFT[CPE_CHANNELS + 1][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ +#else float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ - float *input_mem, /* i/o: mem of buffer DFT analysis */ - STEREO_CNG_DEC_HANDLE hStereoCng, /* i/o: Stereo CNG data structure */ - const int16_t sba_dirac_stereo_flag /* i : signal stereo output for SBA DirAC */ +#endif + float *input_mem, /* i/o: mem of buffer DFT analysis */ + STEREO_CNG_DEC_HANDLE hStereoCng, /* i/o: Stereo CNG data structure */ + const int16_t sba_dirac_stereo_flag /* i : signal stereo output for SBA DirAC */ +#ifdef DFT_STEREO_SPAR_MIXING + , + ivas_spar_md_dec_state_t *hMdDec, /* i: SPAR MD handle for upmixing */ + int16_t cross_fade_start_offset, /* i: SPAR mixer delay compensation */ + int32_t output_Fs, /* i: Fs for delay calculation */ + int16_t nchan_transport /* i: number of transpor channels */ +#endif ) { int16_t i, k, b, N_div, stop; @@ -1106,6 +1170,9 @@ void stereo_dft_dec( float DFT_R[STEREO_DFT32MS_N_MAX]; float DFT_PRED_RES[STEREO_DFT32MS_N_32k]; float *pDFT_DMX; +#ifdef DFT_STEREO_SPAR_MIXING + float *pDFT_DMX1; +#endif float *pDFT_RES; float g, tmp; float *pPredGain; @@ -1129,6 +1196,10 @@ void stereo_dft_dec( HANDLE_FD_CNG_COM hFdCngCom = hFdCngDec->hFdCngCom; int16_t *cna_seed = &( hFdCngCom->seed ); +#ifdef DFT_STEREO_SPAR_MIXING + float DFT_W, DFT_Y; +#endif + output_frame = (int16_t) ( st0->output_Fs / FRAMES_PER_SEC ); /*------------------------------------------------------------------* @@ -1170,7 +1241,14 @@ void stereo_dft_dec( /* Smoothing for the current frame */ if ( sba_dirac_stereo_flag ) { - ivas_sba_dirac_stereo_smooth_parameters( hStereoDft ); + ivas_sba_dirac_stereo_smooth_parameters( hStereoDft +#ifdef DFT_STEREO_SPAR_MIXING + , + hMdDec, + cross_fade_start_offset, + output_Fs +#endif + ); } else { @@ -1199,6 +1277,17 @@ void stereo_dft_dec( { pDFT_DMX = DFT[0] + k * STEREO_DFT32MS_N_MAX; pDFT_RES = DFT[1] + k * STEREO_DFT32MS_N_MAX; +#ifdef DFT_STEREO_SPAR_MIXING + pDFT_DMX1 = 0; + if ( nchan_transport > 2 ) + { + pDFT_DMX1 = DFT[1] + k * STEREO_DFT32MS_N_MAX; + } + else if ( nchan_transport == 2 ) + { + pDFT_DMX1 = DFT[2] + k * STEREO_DFT32MS_N_MAX; + } +#endif /*Apply Stereo*/ if ( hStereoDft->hConfig->dmx_active ) @@ -1242,7 +1331,14 @@ void stereo_dft_dec( hStereoDft->past_DMX_pos = ( hStereoDft->past_DMX_pos + STEREO_DFT_PAST_MAX - 1 ) % STEREO_DFT_PAST_MAX; } +#ifdef DFT_STEREO_SPAR_MIXING + if ( !( sba_dirac_stereo_flag && nchan_transport >= 2 ) ) + { + stereo_dft_generate_res_pred( hStereoDft, samp_ratio, pDFT_DMX, DFT_PRED_RES, pPredGain, k, DFT[1] + k * STEREO_DFT32MS_N_MAX, &stop, st0->bfi ); + } +#else stereo_dft_generate_res_pred( hStereoDft, samp_ratio, pDFT_DMX, DFT_PRED_RES, pPredGain, k, DFT[1] + k * STEREO_DFT32MS_N_MAX, &stop, st0->bfi ); +#endif if ( hStereoDft->res_cod_band_max > 0 ) { @@ -1364,10 +1460,206 @@ void stereo_dft_dec( } } } +#ifdef DFT_STEREO_SPAR_MIXING + else if ( sba_dirac_stereo_flag && hMdDec ) + { +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + { + static FILE *f_dmx = 0, *f_res = 0, *f_dmx1 = 0; + + if ( f_dmx == 0 ) + { + f_dmx = fopen( "dft_dmx.txt", "w" ); + f_res = fopen( "dft_pred_res.txt", "w" ); + if ( nchan_transport >= 2 ) + { + f_dmx1 = fopen( "dft_dmx1.txt", "w" ); + } + } + + if ( b == 0 ) + { + i = 0; + fprintf( f_dmx, "%d %f %f\n", i, pDFT_DMX[2 * i], pDFT_DMX[2 * i + 1] ); + if ( nchan_transport >= 2 ) + { + fprintf( f_dmx1, "%d %f %f\n", i, pDFT_DMX1[2 * i], pDFT_DMX1[2 * i + 1] ); + } + fprintf( f_res, "%d %f %f\n", i, 0.0f, 0.0f ); + } + for ( i = hStereoDft->band_limits[b]; i < min( stop, hStereoDft->band_limits[b + 1] ); i++ ) + { + fprintf( f_dmx, "%d %f %f\n", i, pDFT_DMX[2 * i], pDFT_DMX[2 * i + 1] ); + if ( nchan_transport >= 2 ) + { + fprintf( f_dmx1, "%d %f %f\n", i, pDFT_DMX1[2 * i], pDFT_DMX1[2 * i + 1] ); + } + fprintf( f_res, "%d %f %f\n", i, DFT_PRED_RES[2 * i], DFT_PRED_RES[2 * i + 1] ); + } + for ( ; i < hStereoDft->band_limits[b + 1]; i++ ) + { + fprintf( f_dmx, "%d %f %f\n", i, pDFT_DMX[2 * i], pDFT_DMX[2 * i + 1] ); + if ( nchan_transport >= 2 ) + { + fprintf( f_dmx1, "%d %f %f\n", i, pDFT_DMX1[2 * i], pDFT_DMX1[2 * i + 1] ); + } + fprintf( f_res, "%d %f %f\n", i, 0.0f, 0.0f ); + } + } +#endif + + if ( nchan_transport == 1 ) + { + if ( b == 0 ) + { + i = 0; + + DFT_W = hStereoDft->mixer_mat_smooth[0][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i]; + + DFT_L[2 * i] = DFT_W + DFT_Y; + DFT_R[2 * i] = DFT_W - DFT_Y; + + DFT_W = hStereoDft->mixer_mat_smooth[0][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1]; + + DFT_L[2 * i + 1] = DFT_W + DFT_Y; + DFT_R[2 * i + 1] = DFT_W - DFT_Y; + } + for ( i = hStereoDft->band_limits[b]; i < min( stop, hStereoDft->band_limits[b + 1] ); i++ ) + { + DFT_W = hStereoDft->mixer_mat_smooth[0][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i] + ( hStereoDft->mixer_mat_smooth[0][1][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][2][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][3][b + k * IVAS_MAX_NUM_BANDS] ) * DFT_PRED_RES[2 * i]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i] + ( hStereoDft->mixer_mat_smooth[1][1][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][2][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][3][b + k * IVAS_MAX_NUM_BANDS] ) * DFT_PRED_RES[2 * i]; + + DFT_L[2 * i] = DFT_W + DFT_Y; + DFT_R[2 * i] = DFT_W - DFT_Y; + + DFT_W = hStereoDft->mixer_mat_smooth[0][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1] + ( hStereoDft->mixer_mat_smooth[0][1][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][2][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[0][3][b + k * IVAS_MAX_NUM_BANDS] ) * DFT_PRED_RES[2 * i + 1]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1] + ( hStereoDft->mixer_mat_smooth[1][1][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][2][b + k * IVAS_MAX_NUM_BANDS] + hStereoDft->mixer_mat_smooth[1][3][b + k * IVAS_MAX_NUM_BANDS] ) * DFT_PRED_RES[2 * i + 1]; + + DFT_L[2 * i + 1] = DFT_W + DFT_Y; + DFT_R[2 * i + 1] = DFT_W - DFT_Y; + } + for ( ; i < hStereoDft->band_limits[b + 1]; i++ ) + { + DFT_W = hStereoDft->mixer_mat_smooth[0][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i]; + + DFT_L[2 * i] = DFT_W + DFT_Y; + DFT_R[2 * i] = DFT_W - DFT_Y; + + DFT_W = hStereoDft->mixer_mat_smooth[0][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1]; + + DFT_L[2 * i + 1] = DFT_W + DFT_Y; + DFT_R[2 * i + 1] = DFT_W - DFT_Y; + } + } + else if ( nchan_transport >= 2 ) + { + if ( b == 0 ) + { + i = 0; + + DFT_W = pDFT_DMX[2 * i]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i] + pDFT_DMX1[2 * i]; + + DFT_L[2 * i] = DFT_W + DFT_Y; + DFT_R[2 * i] = DFT_W - DFT_Y; + + DFT_W = pDFT_DMX[2 * i + 1]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1] + pDFT_DMX1[2 * i + 1]; + + DFT_L[2 * i + 1] = DFT_W + DFT_Y; + DFT_R[2 * i + 1] = DFT_W - DFT_Y; + } + for ( i = hStereoDft->band_limits[b]; i < hStereoDft->band_limits[b + 1]; i++ ) + { + DFT_W = pDFT_DMX[2 * i]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i] + pDFT_DMX1[2 * i]; + + DFT_L[2 * i] = DFT_W + DFT_Y; + DFT_R[2 * i] = DFT_W - DFT_Y; + + DFT_W = pDFT_DMX[2 * i + 1]; + DFT_Y = hStereoDft->mixer_mat_smooth[1][0][b + k * IVAS_MAX_NUM_BANDS] * pDFT_DMX[2 * i + 1] + pDFT_DMX1[2 * i + 1]; + + DFT_L[2 * i + 1] = DFT_W + DFT_Y; + DFT_R[2 * i + 1] = DFT_W - DFT_Y; + } + } + else + { + assert( "nhcan_transport must be 1 or 1!" ); + } + +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + { + static FILE *f_L = 0, *f_R = 0; + + if ( f_L == 0 ) + { + f_L = fopen( "dft_L.txt", "w" ); + f_R = fopen( "dft_R.txt", "w" ); + } + + if ( b == 0 ) + { + i = 0; + fprintf( f_L, "%d %f %f\n", i, DFT_L[2 * i], DFT_L[2 * i + 1] ); + fprintf( f_R, "%d %f %f\n", i, DFT_L[2 * i], DFT_L[2 * i + 1] ); + } + for ( i = hStereoDft->band_limits[b]; i < hStereoDft->band_limits[b + 1]; i++ ) + { + fprintf( f_L, "%d %f %f\n", i, DFT_L[2 * i], DFT_L[2 * i + 1] ); + fprintf( f_R, "%d %f %f\n", i, DFT_L[2 * i], DFT_L[2 * i + 1] ); + } + } +#endif + } +#endif else { + +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG + { + static FILE *f_dmx = 0, *f_res = 0, *f_dmx1 = 0; + + if ( f_dmx == 0 ) + { + f_dmx = fopen( "dft_dmx.txt", "w" ); + f_res = fopen( "dft_pred_res.txt", "w" ); + if ( nchan_transport == 2 ) + { + f_dmx1 = fopen( "dft_dmx1.txt", "w" ); + } + } + + if ( b == 0 ) + { + i = 0; + fprintf( f_dmx, "%d %f %f\n", i, pDFT_DMX[2 * i], pDFT_DMX[2 * i + 1] ); + fprintf( f_dmx1, "%d %f %f\n", i, pDFT_DMX1[2 * i], pDFT_DMX1[2 * i + 1] ); + fprintf( f_res, "%d %f %f\n", i, 0.0f, 0.0f ); + } + for ( i = hStereoDft->band_limits[b]; i < min( stop, hStereoDft->band_limits[b + 1] ); i++ ) + { + fprintf( f_dmx, "%d %f %f\n", i, pDFT_DMX[2 * i], pDFT_DMX[2 * i + 1] ); + fprintf( f_dmx1, "%d %f %f\n", i, pDFT_DMX1[2 * i], pDFT_DMX1[2 * i + 1] ); + fprintf( f_res, "%d %f %f\n", i, DFT_PRED_RES[2 * i], DFT_PRED_RES[2 * i + 1] ); + } + for ( ; i < hStereoDft->band_limits[b + 1]; i++ ) + { + fprintf( f_dmx, "%d %f %f\n", i, pDFT_DMX[2 * i], pDFT_DMX[2 * i + 1] ); + fprintf( f_dmx1, "%d %f %f\n", i, pDFT_DMX1[2 * i], pDFT_DMX1[2 * i + 1] ); + fprintf( f_res, "%d %f %f\n", i, 0.0f, 0.0f ); + } + } +#endif + for ( i = hStereoDft->band_limits[b]; i < min( stop, hStereoDft->band_limits[b + 1] ); i++ ) { + tmp = g * pDFT_DMX[2 * i] + pDFT_RES[2 * i] + DFT_PRED_RES[2 * i]; DFT_L[2 * i] = pDFT_DMX[2 * i] + tmp; diff --git a/lib_dec/ivas_stereo_switching_dec.c b/lib_dec/ivas_stereo_switching_dec.c index 8db6ae3ac3..ce200c5633 100755 --- a/lib_dec/ivas_stereo_switching_dec.c +++ b/lib_dec/ivas_stereo_switching_dec.c @@ -416,7 +416,12 @@ ivas_error stereo_memory_dec( deallocate_CoreCoder( hCPE->hCoreCoder[1] ); /* allocate DFT stereo data structure */ - if ( ( error = stereo_dft_dec_create( &( hCPE->hStereoDft ), hCPE->element_brate, output_Fs, 0 ) ) != IVAS_ERR_OK ) + if ( ( error = stereo_dft_dec_create( &( hCPE->hStereoDft ), hCPE->element_brate, output_Fs, 0 +#ifdef DFT_STEREO_SPAR_MIXING + , + nchan_transport +#endif + ) ) != IVAS_ERR_OK ) { return error; } @@ -1073,7 +1078,14 @@ void synchro_synthesis( delay_signal( output[0], output_frame, hCPE->hCoreCoder[0]->hTcxDec->FBTCXdelayBuf, delay_diff ); } +#ifdef DFT_STEREO_SPAR_MIXING + if ( hCPE->element_mode != IVAS_CPE_MDCT ) + { + ivas_post_proc( NULL, hCPE, 0, output[0], output, output_frame, sba_dirac_stereo_flag ); + } +#else ivas_post_proc( NULL, hCPE, 0, output[0], output, output_frame, sba_dirac_stereo_flag ); +#endif /* zero padding in order to synchronize the upmixed DFT stereo synthesis with the TD/MDCT stereo synthesis */ for ( n = 0; n < hCPE->nchan_out; n++ ) @@ -1140,7 +1152,9 @@ void synchro_synthesis( /*----------------------------------------------------------------* * TD/MDCT stereo synchro *----------------------------------------------------------------*/ - +#ifdef DFT_STEREO_SPAR_MIXING + if( sba_dirac_stereo_flag ) return; +#endif if ( hCPE->element_mode == IVAS_CPE_TD || hCPE->element_mode == IVAS_CPE_MDCT ) { /* handling of DFT->TD switching */ diff --git a/lib_dec/lib_dec.c b/lib_dec/lib_dec.c index d45527b740..d675cf263d 100644 --- a/lib_dec/lib_dec.c +++ b/lib_dec/lib_dec.c @@ -1055,7 +1055,11 @@ ivas_error IVAS_DEC_GetDelay( st_ivas = hIvasDec->st_ivas; hDecoderConfig = st_ivas->hDecoderConfig; +#ifdef DFT_STEREO_SPAR_MIXING + *nSamples = NS2SA( hDecoderConfig->output_Fs, get_delay( DEC, hDecoderConfig->output_Fs, st_ivas->ivas_format, st_ivas->cldfbAnaDec[0], st_ivas->renderer_type, st_ivas->binaural_latency_ns, st_ivas->sba_dirac_stereo_flag ) ); +#else *nSamples = NS2SA( hDecoderConfig->output_Fs, get_delay( DEC, hDecoderConfig->output_Fs, st_ivas->ivas_format, st_ivas->cldfbAnaDec[0], st_ivas->renderer_type, st_ivas->binaural_latency_ns ) ); +#endif *timeScale = hDecoderConfig->output_Fs; diff --git a/lib_enc/lib_enc.c b/lib_enc/lib_enc.c index b4eb85915e..df5c6f9f7b 100755 --- a/lib_enc/lib_enc.c +++ b/lib_enc/lib_enc.c @@ -951,7 +951,11 @@ ivas_error IVAS_ENC_GetDelay( return IVAS_ERR_UNEXPECTED_NULL_POINTER; } +#ifdef DFT_STEREO_SPAR_MIXING + *delay = NS2SA( hEncoderConfig->input_Fs, get_delay( ENC, hEncoderConfig->input_Fs, hEncoderConfig->ivas_format, NULL, RENDERER_DISABLE, 0, 0 ) ); +#else *delay = NS2SA( hEncoderConfig->input_Fs, get_delay( ENC, hEncoderConfig->input_Fs, hEncoderConfig->ivas_format, NULL, RENDERER_DISABLE, 0 ) ); +#endif *delay *= hEncoderConfig->nchan_inp; -- GitLab From ecdd7c1f04bcb08dc152fb8fe316f43e9045ea10 Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Fri, 16 Dec 2022 09:44:53 +0100 Subject: [PATCH 02/20] deallocate of CPE structure for > 1 TC --- lib_dec/ivas_init_dec.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib_dec/ivas_init_dec.c b/lib_dec/ivas_init_dec.c index 4904803e91..9a37cf7e71 100644 --- a/lib_dec/ivas_init_dec.c +++ b/lib_dec/ivas_init_dec.c @@ -908,11 +908,11 @@ ivas_error ivas_init_decoder( } /* create CPE element for DFT Stereo like upmix */ - if ( st_ivas->sba_dirac_stereo_flag + if ( st_ivas->sba_dirac_stereo_flag #ifdef DFT_STEREO_SPAR_MIXING - && st_ivas->nchan_transport == 1 + && st_ivas->nchan_transport == 1 #endif - ) + ) { if ( ( error = create_cpe_dec( st_ivas, cpe_id, ivas_total_brate / ( st_ivas->nSCE + st_ivas->nCPE ) ) ) != IVAS_ERR_OK ) { @@ -1291,7 +1291,7 @@ ivas_error ivas_init_decoder( /* CLDFB Interpolation weights */ if ( st_ivas->ivas_format == SBA_FORMAT && st_ivas->sba_mode == SBA_MODE_SPAR #ifdef DFT_STEREO_SPAR_MIXING - && !st_ivas->sba_dirac_stereo_flag + && !st_ivas->sba_dirac_stereo_flag #endif ) { @@ -1573,7 +1573,11 @@ void ivas_destroy_dec( if ( st_ivas->hCPE[i] != NULL ) { /* set pointer to NULL as core coder already deallocated in destroy_sce_dec() */ - if ( st_ivas->sba_dirac_stereo_flag ) + if ( st_ivas->sba_dirac_stereo_flag +#ifdef DFT_STEREO_SPAR_MIXING + && st_ivas->nchan_transport == 1 +#endif + ) { st_ivas->hCPE[i]->hCoreCoder[0] = NULL; st_ivas->hCPE[i]->hCoreCoder[1] = NULL; -- GitLab From afdf23ef2c9077659a04d6db10004c855b737806 Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Fri, 16 Dec 2022 11:09:54 +0100 Subject: [PATCH 03/20] remove code to add zero to the delay --- lib_com/delay_comp.c | 10 ---------- lib_com/prot.h | 4 ---- lib_dec/lib_dec.c | 4 ---- lib_enc/lib_enc.c | 4 ---- 4 files changed, 22 deletions(-) diff --git a/lib_com/delay_comp.c b/lib_com/delay_comp.c index c220184aaa..c3e6aa1ac7 100644 --- a/lib_com/delay_comp.c +++ b/lib_com/delay_comp.c @@ -57,10 +57,6 @@ int32_t get_delay( HANDLE_CLDFB_FILTER_BANK hCldfb, /* i : Handle of Cldfb analysis */ RENDERER_TYPE renderer_type, /* i : IVAS rendering type */ const int32_t binaural_latency_ns /* i : binaural renderer HRTF delay in ns */ -#ifdef DFT_STEREO_SPAR_MIXING - , - const int16_t sba_dirac_stereo_flag -#endif ) { int32_t delay = 0; @@ -103,12 +99,6 @@ int32_t get_delay( { delay += IVAS_FB_DEC_DELAY_NS; } -#ifdef DFT_STEREO_SPAR_MIXING - else if ( sba_dirac_stereo_flag ) - { - delay += 0; - } -#endif /* compensate for Binaural renderer HRTF delay */ { diff --git a/lib_com/prot.h b/lib_com/prot.h index 525a28700d..973f15ef5a 100644 --- a/lib_com/prot.h +++ b/lib_com/prot.h @@ -719,10 +719,6 @@ int32_t get_delay( HANDLE_CLDFB_FILTER_BANK hCldfb, /* i : Handle of Cldfb analysis */ RENDERER_TYPE renderer_type, /* i : IVAS rendering type */ const int32_t binaural_latency_ns /* i : binaural renderer HRTF delay in ns */ -#ifdef DFT_STEREO_SPAR_MIXING - , - const int16_t sba_dirac_stereo_flag -#endif ); void decision_matrix_enc( diff --git a/lib_dec/lib_dec.c b/lib_dec/lib_dec.c index d675cf263d..d45527b740 100644 --- a/lib_dec/lib_dec.c +++ b/lib_dec/lib_dec.c @@ -1055,11 +1055,7 @@ ivas_error IVAS_DEC_GetDelay( st_ivas = hIvasDec->st_ivas; hDecoderConfig = st_ivas->hDecoderConfig; -#ifdef DFT_STEREO_SPAR_MIXING - *nSamples = NS2SA( hDecoderConfig->output_Fs, get_delay( DEC, hDecoderConfig->output_Fs, st_ivas->ivas_format, st_ivas->cldfbAnaDec[0], st_ivas->renderer_type, st_ivas->binaural_latency_ns, st_ivas->sba_dirac_stereo_flag ) ); -#else *nSamples = NS2SA( hDecoderConfig->output_Fs, get_delay( DEC, hDecoderConfig->output_Fs, st_ivas->ivas_format, st_ivas->cldfbAnaDec[0], st_ivas->renderer_type, st_ivas->binaural_latency_ns ) ); -#endif *timeScale = hDecoderConfig->output_Fs; diff --git a/lib_enc/lib_enc.c b/lib_enc/lib_enc.c index df5c6f9f7b..b4eb85915e 100755 --- a/lib_enc/lib_enc.c +++ b/lib_enc/lib_enc.c @@ -951,11 +951,7 @@ ivas_error IVAS_ENC_GetDelay( return IVAS_ERR_UNEXPECTED_NULL_POINTER; } -#ifdef DFT_STEREO_SPAR_MIXING - *delay = NS2SA( hEncoderConfig->input_Fs, get_delay( ENC, hEncoderConfig->input_Fs, hEncoderConfig->ivas_format, NULL, RENDERER_DISABLE, 0, 0 ) ); -#else *delay = NS2SA( hEncoderConfig->input_Fs, get_delay( ENC, hEncoderConfig->input_Fs, hEncoderConfig->ivas_format, NULL, RENDERER_DISABLE, 0 ) ); -#endif *delay *= hEncoderConfig->nchan_inp; -- GitLab From 6ce011ef9a15cb8bb313f2345376f3b0e7f479d2 Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Fri, 16 Dec 2022 11:35:51 +0100 Subject: [PATCH 04/20] remove unused buffer hCPE->input_mem[2] --- lib_dec/ivas_cpe_dec.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/lib_dec/ivas_cpe_dec.c b/lib_dec/ivas_cpe_dec.c index 2b4e0bcfa5..492509e86d 100644 --- a/lib_dec/ivas_cpe_dec.c +++ b/lib_dec/ivas_cpe_dec.c @@ -689,21 +689,6 @@ ivas_error create_cpe_dec( } } -#ifdef DFT_STEREO_SPAR_MIXING - if ( st_ivas->sba_dirac_stereo_flag && st_ivas->nchan_transport >= 2 ) - { - if ( ( hCPE->input_mem[2] = (float *) count_malloc( sizeof( float ) * NS2SA( output_Fs, STEREO_DFT32MS_OVL_NS ) ) ) == NULL ) - { - return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DFT stereo memory\n" ) ); - } - set_zero( hCPE->input_mem[2], NS2SA( output_Fs, STEREO_DFT32MS_OVL_NS ) ); - } - else - { - hCPE->input_mem[2] = NULL; - } -#endif - /*-----------------------------------------------------------------* * CoreCoder, 2 instances: allocate and initialize *-----------------------------------------------------------------*/ @@ -945,13 +930,6 @@ void destroy_cpe_dec( } count_free( hCPE->input_mem_BPF[0] ); hCPE->input_mem_BPF[0] = NULL; -#ifdef DFT_STEREO_SPAR_MIXING - if ( hCPE->input_mem[2] != NULL ) - { - count_free( hCPE->input_mem[2] ); - hCPE->input_mem[2] = NULL; - } -#endif } if ( hCPE->hStereoCng != NULL ) -- GitLab From d9705e4881c01af473b99e05160a28ac05cb4009 Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Fri, 16 Dec 2022 11:38:43 +0100 Subject: [PATCH 05/20] wrap SBA-to-stereo debug output into DFT_STEREO_SPAR_MIXING_DEBUG --- lib_dec/ivas_sba_dirac_stereo_dec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib_dec/ivas_sba_dirac_stereo_dec.c b/lib_dec/ivas_sba_dirac_stereo_dec.c index bb3b4825f4..3099d8ac0c 100644 --- a/lib_dec/ivas_sba_dirac_stereo_dec.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec.c @@ -785,6 +785,7 @@ void ivas_sba_dirac_stereo_smooth_parameters( } // k ( DFT block ) hStereoDft->first_frame = 0; +#ifdef DFT_STEREO_SPAR_MIXING_DEBUG { static FILE *f_mat = 0; @@ -813,6 +814,7 @@ void ivas_sba_dirac_stereo_smooth_parameters( } } } // debug output +#endif } // hMdDec != 0 #endif -- GitLab From 0a030ae890a989667e6221ed3a31fbc39c0e194c Mon Sep 17 00:00:00 2001 From: rhb Date: Fri, 16 Dec 2022 16:36:38 +0100 Subject: [PATCH 06/20] remove some obsolete special cases for nchan_transport == 2 --- lib_dec/ivas_sba_dirac_stereo_dec.c | 13 +------------ lib_dec/ivas_stat_dec.h | 4 ---- lib_dec/ivas_stereo_dft_dec.c | 6 +----- 3 files changed, 2 insertions(+), 21 deletions(-) mode change 100644 => 100755 lib_dec/ivas_sba_dirac_stereo_dec.c mode change 100644 => 100755 lib_dec/ivas_stat_dec.h mode change 100644 => 100755 lib_dec/ivas_stereo_dft_dec.c diff --git a/lib_dec/ivas_sba_dirac_stereo_dec.c b/lib_dec/ivas_sba_dirac_stereo_dec.c old mode 100644 new mode 100755 index 3099d8ac0c..ddaeb02681 --- a/lib_dec/ivas_sba_dirac_stereo_dec.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec.c @@ -845,11 +845,7 @@ void ivas_sba_dirac_stereo_dec( float tmp_synth[L_FRAME16k]; float hb_gain[NB_DIV]; float hb_synth_stereo[CPE_CHANNELS][L_FRAME48k]; -#ifdef DFT_STEREO_SPAR_MIXING - float DFT[CPE_CHANNELS + 1][STEREO_DFT_BUF_MAX]; -#else float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX]; -#endif SCE_DEC_HANDLE hSCE; CPE_DEC_HANDLE hCPE; STEREO_DFT_DEC_DATA_HANDLE hStereoDft; @@ -885,19 +881,12 @@ void ivas_sba_dirac_stereo_dec( stereo_dft_dec_update( hStereoDft, output_frame, 1 /*st_ivas->sba_dirac_stereo_flag*/ ); #ifdef DFT_STEREO_SPAR_MIXING - if ( st_ivas->nchan_transport > 2 ) + if ( st_ivas->nchan_transport > 1 ) { stereo_dft_dec_analyze( hCPE, output[0], DFT, 0, output_frame, output_frame, DFT_STEREO_DEC_ANA_FB, 0, 0 ); stereo_dft_dec_analyze( hCPE, output[1], DFT, 1, output_frame, output_frame, DFT_STEREO_DEC_ANA_FB, 0, 0 ); hStereoDft->core_hist[0] = hCPE->hCoreCoder[0]->core; } - else if ( st_ivas->nchan_transport == 2 ) - { - stereo_dft_dec_analyze( hCPE, output[0], DFT, 0, output_frame, output_frame, DFT_STEREO_DEC_ANA_FB, 0, 0 ); - stereo_dft_dec_analyze( hCPE, output[1], DFT, 2, output_frame, output_frame, DFT_STEREO_DEC_ANA_FB, 0, 0 ); - hStereoDft->core_hist[0] = hCPE->hCoreCoder[0]->core; - } - else #endif { /* nrg calculation for TD Stereo Filling, as done in ICBWE which is not used in this case */ diff --git a/lib_dec/ivas_stat_dec.h b/lib_dec/ivas_stat_dec.h old mode 100644 new mode 100755 index 1a136f634c..79d58b0165 --- a/lib_dec/ivas_stat_dec.h +++ b/lib_dec/ivas_stat_dec.h @@ -958,11 +958,7 @@ typedef struct cpe_dec_data_structure float prev_synth[CPE_CHANNELS][NS2SA( 48000, IVAS_DEC_DELAY_NS - STEREO_DFT32MS_OVL_NS )]; /* DFT stereo I/O channel buffer memories that need to be updated for TD->DFT stereo switching */ -#ifdef DFT_STEREO_SPAR_MIXING - float *input_mem[CPE_CHANNELS+1]; -#else float *input_mem[CPE_CHANNELS]; -#endif float *input_mem_LB[CPE_CHANNELS]; float *input_mem_BPF[1]; float *output_mem[CPE_CHANNELS]; diff --git a/lib_dec/ivas_stereo_dft_dec.c b/lib_dec/ivas_stereo_dft_dec.c old mode 100644 new mode 100755 index 0e5681e61f..ccf9d559a5 --- a/lib_dec/ivas_stereo_dft_dec.c +++ b/lib_dec/ivas_stereo_dft_dec.c @@ -1279,14 +1279,10 @@ void stereo_dft_dec( pDFT_RES = DFT[1] + k * STEREO_DFT32MS_N_MAX; #ifdef DFT_STEREO_SPAR_MIXING pDFT_DMX1 = 0; - if ( nchan_transport > 2 ) + if ( nchan_transport > 1 ) { pDFT_DMX1 = DFT[1] + k * STEREO_DFT32MS_N_MAX; } - else if ( nchan_transport == 2 ) - { - pDFT_DMX1 = DFT[2] + k * STEREO_DFT32MS_N_MAX; - } #endif /*Apply Stereo*/ -- GitLab From 0250a94468c6cbc7015b895361554bfa97322b1e Mon Sep 17 00:00:00 2001 From: rhb Date: Mon, 19 Dec 2022 14:17:36 +0100 Subject: [PATCH 07/20] fix mistake in previous commit --- lib_dec/ivas_sba_dirac_stereo_dec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib_dec/ivas_sba_dirac_stereo_dec.c b/lib_dec/ivas_sba_dirac_stereo_dec.c index ddaeb02681..83e96b0b40 100755 --- a/lib_dec/ivas_sba_dirac_stereo_dec.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec.c @@ -887,6 +887,7 @@ void ivas_sba_dirac_stereo_dec( stereo_dft_dec_analyze( hCPE, output[1], DFT, 1, output_frame, output_frame, DFT_STEREO_DEC_ANA_FB, 0, 0 ); hStereoDft->core_hist[0] = hCPE->hCoreCoder[0]->core; } + else #endif { /* nrg calculation for TD Stereo Filling, as done in ICBWE which is not used in this case */ -- GitLab From 62053ebdd8931b3486350c56aaec9ae30ce63238 Mon Sep 17 00:00:00 2001 From: rhb Date: Mon, 19 Dec 2022 16:04:35 +0100 Subject: [PATCH 08/20] avoid allocating more than 1 hStereoDft struct for modes with more than 1 CPE --- lib_dec/ivas_cpe_dec.c | 4 ++++ 1 file changed, 4 insertions(+) mode change 100644 => 100755 lib_dec/ivas_cpe_dec.c diff --git a/lib_dec/ivas_cpe_dec.c b/lib_dec/ivas_cpe_dec.c old mode 100644 new mode 100755 index 492509e86d..311b015cd6 --- a/lib_dec/ivas_cpe_dec.c +++ b/lib_dec/ivas_cpe_dec.c @@ -731,7 +731,11 @@ ivas_error create_cpe_dec( * DFT stereo initialization *-----------------------------------------------------------------*/ +#ifdef DFT_STEREO_SPAR_MIXING + if ( hCPE->element_mode == IVAS_CPE_DFT || ( st_ivas->sba_dirac_stereo_flag && hCPE->cpe_id == 0 ) ) +#else if ( hCPE->element_mode == IVAS_CPE_DFT || st_ivas->sba_dirac_stereo_flag ) +#endif { if ( ( error = stereo_dft_dec_create( &( hCPE->hStereoDft ), hCPE->element_brate, output_Fs, st_ivas->sba_dirac_stereo_flag #ifdef DFT_STEREO_SPAR_MIXING -- GitLab From f3e3e3914744d86434cc7f929d6d7a51427d319f Mon Sep 17 00:00:00 2001 From: rhb Date: Fri, 13 Jan 2023 15:19:57 +0100 Subject: [PATCH 09/20] reduce buffer mixer_mat_smooth by half --- lib_dec/ivas_sba_dirac_stereo_dec.c | 10 +++++----- lib_dec/ivas_stat_dec.h | 2 +- lib_dec/ivas_stereo_dft_dec.c | 4 +--- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/lib_dec/ivas_sba_dirac_stereo_dec.c b/lib_dec/ivas_sba_dirac_stereo_dec.c index 83e96b0b40..7bba219cda 100755 --- a/lib_dec/ivas_sba_dirac_stereo_dec.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec.c @@ -732,7 +732,7 @@ void ivas_sba_dirac_stereo_smooth_parameters( { if ( hStereoDft->first_frame ) { - for ( i = 0; i < 4; i++ ) + for ( i = 0; i < 2; i++ ) { for ( j = 0; j < 4; j++ ) { @@ -745,7 +745,7 @@ void ivas_sba_dirac_stereo_smooth_parameters( } else { - for ( i = 0; i < 4; i++ ) + for ( i = 0; i < 2; i++ ) { for ( j = 0; j < 4; j++ ) { @@ -771,7 +771,7 @@ void ivas_sba_dirac_stereo_smooth_parameters( mvr2r( hMdDec->mixer_mat_prev[3][0][0], hMdDec->mixer_mat_prev[2][0][0], IVAS_MAX_FB_MIXER_OUT_CH * IVAS_MAX_SPAR_FB_MIXER_IN_CH * IVAS_MAX_NUM_BANDS ); mvr2r( hMdDec->mixer_mat_prev[4][0][0], hMdDec->mixer_mat_prev[3][0][0], IVAS_MAX_FB_MIXER_OUT_CH * IVAS_MAX_SPAR_FB_MIXER_IN_CH * IVAS_MAX_NUM_BANDS ); - for ( i = 0; i < 4; i++ ) + for ( i = 0; i < 2; i++ ) { for ( j = 0; j < 4; j++ ) { @@ -792,7 +792,7 @@ void ivas_sba_dirac_stereo_smooth_parameters( if ( f_mat == 0 ) f_mat = fopen( "mixer_mat_stereo_smooth", "w" ); - for ( i = 0; i < 4; i++ ) + for ( i = 0; i < 2; i++ ) { for ( j = 0; j < 4; j++ ) { @@ -803,7 +803,7 @@ void ivas_sba_dirac_stereo_smooth_parameters( } } - for ( i = 0; i < 4; i++ ) + for ( i = 0; i < 2; i++ ) { for ( j = 0; j < 4; j++ ) { diff --git a/lib_dec/ivas_stat_dec.h b/lib_dec/ivas_stat_dec.h index 79d58b0165..34ef96d17f 100755 --- a/lib_dec/ivas_stat_dec.h +++ b/lib_dec/ivas_stat_dec.h @@ -231,7 +231,7 @@ typedef struct stereo_dft_dec_data_struct #ifdef DFT_STEREO_SPAR_MIXING int16_t first_frame; - float mixer_mat_smooth[4][4][2*IVAS_MAX_NUM_BANDS]; + float mixer_mat_smooth[2][4][2*IVAS_MAX_NUM_BANDS]; float g_L_prev; float g_R_prev; const float *max_smooth_gains, *min_smooth_gains; diff --git a/lib_dec/ivas_stereo_dft_dec.c b/lib_dec/ivas_stereo_dft_dec.c index ccf9d559a5..8571c099b4 100755 --- a/lib_dec/ivas_stereo_dft_dec.c +++ b/lib_dec/ivas_stereo_dft_dec.c @@ -529,15 +529,13 @@ void stereo_dft_dec_reset( #ifdef DFT_STEREO_SPAR_MIXING for ( b = 0; b < hStereoDft->nbands; b++ ) { - for ( i = 0; i < 4; i++ ) + for ( i = 0; i < 2; i++ ) { for ( j = 0; j < 4; j++ ) { hStereoDft->mixer_mat_smooth[i][j][b] = 0.0f; } } - hStereoDft->mixer_mat_smooth[0][0][b] = 0.0f; - hStereoDft->mixer_mat_smooth[1][1][b] = 0.0f; } hStereoDft->first_frame = 1; hStereoDft->g_L_prev = 0.f; -- GitLab From b37983deb1b5223f66ef4f6fa7db4037304435d1 Mon Sep 17 00:00:00 2001 From: rhb Date: Thu, 19 Jan 2023 12:39:38 +0100 Subject: [PATCH 10/20] fix bug that accidentally zeroed the second channel for modes with more than 2 TCs --- lib_dec/ivas_stereo_dft_dec.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib_dec/ivas_stereo_dft_dec.c b/lib_dec/ivas_stereo_dft_dec.c index 8571c099b4..fb837f4bb4 100755 --- a/lib_dec/ivas_stereo_dft_dec.c +++ b/lib_dec/ivas_stereo_dft_dec.c @@ -1404,7 +1404,11 @@ void stereo_dft_dec( #endif /* No residual coding in inactive frames, instead pDFT_RES is used for the second channel */ +#ifdef DFT_STEREO_SPAR_MIXING + if ( b >= hStereoDft->res_cod_band_max && !hStereoDft->frame_sid_nodata && !sba_dirac_stereo_flag ) +#else if ( b >= hStereoDft->res_cod_band_max && !hStereoDft->frame_sid_nodata ) +#endif { /*filter non-coded frequencies. It removes some MDCT frequency aliasing*/ for ( i = hStereoDft->band_limits[b]; i < hStereoDft->band_limits[b + 1]; i++ ) -- GitLab From b3cebc19828e9950f59ee528c5ad6a1cbdc79b33 Mon Sep 17 00:00:00 2001 From: rhb Date: Thu, 19 Jan 2023 16:25:19 +0100 Subject: [PATCH 11/20] fix broken low bitrates where SPAR isn't used right now --- lib_dec/ivas_sba_dirac_stereo_dec.c | 51 +++++++++++++++-------------- lib_dec/ivas_stereo_dft_dec.c | 6 +--- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/lib_dec/ivas_sba_dirac_stereo_dec.c b/lib_dec/ivas_sba_dirac_stereo_dec.c index 7bba219cda..be65633126 100755 --- a/lib_dec/ivas_sba_dirac_stereo_dec.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec.c @@ -270,8 +270,11 @@ static void map_params_dirac_to_stereo( /* apply upper bounds depending on band */ #ifdef DFT_STEREO_SPAR_MIXING - hStereoDft->smooth_fac[0][b] = max( hStereoDft->min_smooth_gains[b], min( hStereoDft->max_smooth_gains[b], hStereoDft->smooth_fac[0][b] ) ); - hStereoDft->smooth_fac[1][b] = max( hStereoDft->min_smooth_gains[b], min( hStereoDft->max_smooth_gains[b], hStereoDft->smooth_fac[1][b] ) ); + if ( !mcmasa ) + { + hStereoDft->smooth_fac[0][b] = max( hStereoDft->min_smooth_gains[b], min( hStereoDft->max_smooth_gains[b], hStereoDft->smooth_fac[0][b] ) ); + hStereoDft->smooth_fac[1][b] = max( hStereoDft->min_smooth_gains[b], min( hStereoDft->max_smooth_gains[b], hStereoDft->smooth_fac[1][b] ) ); + } #else hStereoDft->smooth_fac[0][b] = min( max_smooth_gains[b], hStereoDft->smooth_fac[0][b] ); hStereoDft->smooth_fac[1][b] = min( max_smooth_gains[b], hStereoDft->smooth_fac[1][b] ); @@ -688,23 +691,23 @@ void ivas_sba_dirac_stereo_smooth_parameters( if (!hMdDec) #endif { - /* Smoothing of side and prediction gains between ftrames */ - for ( b = hStereoDft->res_pred_band_min; b < hStereoDft->nbands; b++ ) - { - if ( hStereoDft->attackPresent ) + /* Smoothing of side and prediction gains between ftrames */ + for ( b = hStereoDft->res_pred_band_min; b < hStereoDft->nbands; b++ ) { - res_pred_gain[b] *= 0.8f; - res_pred_gain[b + STEREO_DFT_BAND_MAX] *= 0.8f; - } - else - { - side_gain[b] = hStereoDft->smooth_fac[0][b] * prev_side_gain[b] + ( 1.f - hStereoDft->smooth_fac[0][b] ) * side_gain[b]; - side_gain[b + STEREO_DFT_BAND_MAX] = hStereoDft->smooth_fac[1][b] * side_gain[b] + ( 1.f - hStereoDft->smooth_fac[1][b] ) * side_gain[b + STEREO_DFT_BAND_MAX]; - res_pred_gain[b] = hStereoDft->smooth_fac[0][b] * prev_res_pred_gain[b] + ( 1.f - hStereoDft->smooth_fac[0][b] ) * res_pred_gain[b]; - res_pred_gain[b + STEREO_DFT_BAND_MAX] = hStereoDft->smooth_fac[1][b] * res_pred_gain[b] + ( 1.f - hStereoDft->smooth_fac[1][b] ) * res_pred_gain[b + STEREO_DFT_BAND_MAX]; + if ( hStereoDft->attackPresent ) + { + res_pred_gain[b] *= 0.8f; + res_pred_gain[b + STEREO_DFT_BAND_MAX] *= 0.8f; + } + else + { + side_gain[b] = hStereoDft->smooth_fac[0][b] * prev_side_gain[b] + ( 1.f - hStereoDft->smooth_fac[0][b] ) * side_gain[b]; + side_gain[b + STEREO_DFT_BAND_MAX] = hStereoDft->smooth_fac[1][b] * side_gain[b] + ( 1.f - hStereoDft->smooth_fac[1][b] ) * side_gain[b + STEREO_DFT_BAND_MAX]; + res_pred_gain[b] = hStereoDft->smooth_fac[0][b] * prev_res_pred_gain[b] + ( 1.f - hStereoDft->smooth_fac[0][b] ) * res_pred_gain[b]; + res_pred_gain[b + STEREO_DFT_BAND_MAX] = hStereoDft->smooth_fac[1][b] * res_pred_gain[b] + ( 1.f - hStereoDft->smooth_fac[1][b] ) * res_pred_gain[b + STEREO_DFT_BAND_MAX]; + } } } - } #ifdef DFT_STEREO_SPAR_MIXING if ( hMdDec != 0 ) @@ -854,7 +857,7 @@ void ivas_sba_dirac_stereo_dec( hCPE = st_ivas->hCPE[0]; hStereoDft = hCPE->hStereoDft; #ifdef DFT_STEREO_SPAR_MIXING - if ( st_ivas->nchan_transport > 1 && ! mcmasa) + if ( st_ivas->nchan_transport > 1 && !mcmasa) { dtx_flag = 0; fd_cng_flag = 0; @@ -875,7 +878,7 @@ void ivas_sba_dirac_stereo_dec( hStereoDft->NFFT #ifdef DFT_STEREO_SPAR_MIXING , - (st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa) ? 1 : 0 + (st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa) #endif ); @@ -907,17 +910,17 @@ void ivas_sba_dirac_stereo_dec( st_ivas->hQMetaData, tmp_synth, DFT[0], - st_ivas->ivas_format == MC_FORMAT, + st_ivas->ivas_format == MC_FORMAT, #ifdef DFT_STEREO_SPAR_MIXING - mcmasa ? hSCE->hCoreCoder[0]->L_frame : 0, - mcmasa + ( st_ivas->sba_mode != SBA_MODE_SPAR || mcmasa ) ? hSCE->hCoreCoder[0]->L_frame : 0, + ( st_ivas->sba_mode != SBA_MODE_SPAR || mcmasa ) #else hSCE->hCoreCoder[0]->L_frame #endif ); #ifdef DFT_STEREO_SPAR_MIXING - if (!mcmasa) + if ( st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa) { set_f( hStereoDft->res_pred_gain, 1.f, 3 * STEREO_DFT_BAND_MAX ); } @@ -971,7 +974,7 @@ void ivas_sba_dirac_stereo_dec( output_frame #ifdef DFT_STEREO_SPAR_MIXING , - mcmasa, + (st_ivas->sba_mode != SBA_MODE_SPAR || mcmasa), hStereoDft #endif ); @@ -985,7 +988,7 @@ void ivas_sba_dirac_stereo_dec( ivas_sba_dirac_stereo_apply_td_stefi( hStereoDft, output, output_frame #ifdef DFT_STEREO_SPAR_MIXING , - ( st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa) ? 1 : 0 + ( st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa) #endif ); } diff --git a/lib_dec/ivas_stereo_dft_dec.c b/lib_dec/ivas_stereo_dft_dec.c index fb837f4bb4..f82c5c3cab 100755 --- a/lib_dec/ivas_stereo_dft_dec.c +++ b/lib_dec/ivas_stereo_dft_dec.c @@ -1146,11 +1146,7 @@ void stereo_dft_dec_synthesize( void stereo_dft_dec( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ Decoder_State *st0, /* i/o: decoder state structure */ -#ifdef DFT_STEREO_SPAR_MIXING - float DFT[CPE_CHANNELS + 1][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ -#else float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ -#endif float *input_mem, /* i/o: mem of buffer DFT analysis */ STEREO_CNG_DEC_HANDLE hStereoCng, /* i/o: Stereo CNG data structure */ const int16_t sba_dirac_stereo_flag /* i : signal stereo output for SBA DirAC */ @@ -1405,7 +1401,7 @@ void stereo_dft_dec( /* No residual coding in inactive frames, instead pDFT_RES is used for the second channel */ #ifdef DFT_STEREO_SPAR_MIXING - if ( b >= hStereoDft->res_cod_band_max && !hStereoDft->frame_sid_nodata && !sba_dirac_stereo_flag ) + if ( b >= hStereoDft->res_cod_band_max && !hStereoDft->frame_sid_nodata && !(sba_dirac_stereo_flag && hMdDec) ) #else if ( b >= hStereoDft->res_cod_band_max && !hStereoDft->frame_sid_nodata ) #endif -- GitLab From 878de2336091d8e83c148b49196f893badae10e2 Mon Sep 17 00:00:00 2001 From: rhb Date: Thu, 19 Jan 2023 16:29:26 +0100 Subject: [PATCH 12/20] revert one accidental change from previous commit --- lib_dec/ivas_sba_dirac_stereo_dec.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lib_dec/ivas_sba_dirac_stereo_dec.c b/lib_dec/ivas_sba_dirac_stereo_dec.c index be65633126..c4cff5abbd 100755 --- a/lib_dec/ivas_sba_dirac_stereo_dec.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec.c @@ -270,11 +270,8 @@ static void map_params_dirac_to_stereo( /* apply upper bounds depending on band */ #ifdef DFT_STEREO_SPAR_MIXING - if ( !mcmasa ) - { - hStereoDft->smooth_fac[0][b] = max( hStereoDft->min_smooth_gains[b], min( hStereoDft->max_smooth_gains[b], hStereoDft->smooth_fac[0][b] ) ); - hStereoDft->smooth_fac[1][b] = max( hStereoDft->min_smooth_gains[b], min( hStereoDft->max_smooth_gains[b], hStereoDft->smooth_fac[1][b] ) ); - } + hStereoDft->smooth_fac[0][b] = max( hStereoDft->min_smooth_gains[b], min( hStereoDft->max_smooth_gains[b], hStereoDft->smooth_fac[0][b] ) ); + hStereoDft->smooth_fac[1][b] = max( hStereoDft->min_smooth_gains[b], min( hStereoDft->max_smooth_gains[b], hStereoDft->smooth_fac[1][b] ) ); #else hStereoDft->smooth_fac[0][b] = min( max_smooth_gains[b], hStereoDft->smooth_fac[0][b] ); hStereoDft->smooth_fac[1][b] = min( max_smooth_gains[b], hStereoDft->smooth_fac[1][b] ); -- GitLab From b0e5db9456c515f159fc477e78dab8fbd91591ae Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Fri, 10 Feb 2023 13:00:05 +0100 Subject: [PATCH 13/20] fix formatting a in few files --- lib_com/delay_comp.c | 2 +- lib_dec/ivas_dec.c | 4 ++-- lib_dec/ivas_post_proc.c | 2 +- lib_dec/ivas_sba_dirac_stereo_dec.c | 30 ++++++++++++++--------------- lib_dec/ivas_stat_dec.h | 2 +- lib_dec/ivas_stereo_dft_dec.c | 14 +++++++------- 6 files changed, 27 insertions(+), 27 deletions(-) diff --git a/lib_com/delay_comp.c b/lib_com/delay_comp.c index e7713f3b47..194b4b2919 100644 --- a/lib_com/delay_comp.c +++ b/lib_com/delay_comp.c @@ -55,7 +55,7 @@ int32_t get_delay( const int32_t io_fs, /* i : input/output sampling frequency */ const IVAS_FORMAT ivas_format, /* i : IVAS format */ HANDLE_CLDFB_FILTER_BANK hCldfb, /* i : Handle of Cldfb analysis */ - const int32_t binaural_latency_ns /* i : binaural renderer HRTF delay in ns */ + const int32_t binaural_latency_ns /* i : binaural renderer HRTF delay in ns */ ) { int32_t delay = 0; diff --git a/lib_dec/ivas_dec.c b/lib_dec/ivas_dec.c index 6004d165e8..bb750eab00 100644 --- a/lib_dec/ivas_dec.c +++ b/lib_dec/ivas_dec.c @@ -317,7 +317,7 @@ ivas_error ivas_dec( } #endif - ivas_sba_dirac_stereo_dec( st_ivas, output, output_frame + ivas_sba_dirac_stereo_dec( st_ivas, output, output_frame #ifdef DFT_STEREO_SPAR_MIXING , st_ivas->ivas_format == MC_FORMAT @@ -378,7 +378,7 @@ ivas_error ivas_dec( } else /* SBA_MODE_SPAR */ #ifdef DFT_STEREO_SPAR_MIXING - if ( ! st_ivas->sba_dirac_stereo_flag ) + if ( !st_ivas->sba_dirac_stereo_flag ) #endif { ivas_sba_upmixer_renderer( st_ivas, output, output_frame ); /* Note: ivas_sba_linear_renderer() or ivas_dirac_dec() are called internally */ diff --git a/lib_dec/ivas_post_proc.c b/lib_dec/ivas_post_proc.c index 20624c5bd0..849f4b79c0 100644 --- a/lib_dec/ivas_post_proc.c +++ b/lib_dec/ivas_post_proc.c @@ -109,7 +109,7 @@ void ivas_post_proc( #ifdef DFT_STEREO_SPAR_MIXING else if ( sba_dirac_stereo_flag && sts[n]->element_mode == IVAS_CPE_MDCT ) { - int16_t numZeros = (int16_t)(NS2SA( output_Fs, N_ZERO_MDCT_NS )); + int16_t numZeros = (int16_t) ( NS2SA( output_Fs, N_ZERO_MDCT_NS ) ); mvr2r( sts[n]->hHQ_core->old_out + numZeros, sts[n]->hTcxDec->FBTCXdelayBuf, delay_comp ); } #endif diff --git a/lib_dec/ivas_sba_dirac_stereo_dec.c b/lib_dec/ivas_sba_dirac_stereo_dec.c index 08087ca0be..2c77ab1b19 100755 --- a/lib_dec/ivas_sba_dirac_stereo_dec.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec.c @@ -180,12 +180,12 @@ static float get_panning( *-------------------------------------------------------------------*/ static void map_params_dirac_to_stereo( - STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ + STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ const IVAS_QMETADATA_HANDLE hQMetaData, /* i : frame of MASA q_metadata */ float synth[], /* i : decoded downmix signal */ - float DFT[STEREO_DFT_BUF_MAX], /* i/o: DFT buffer */ - const uint8_t b_wide_panning, /* i : flag indicating wider panning */ - const int16_t L_frame /* i : core signal length */ + float DFT[STEREO_DFT_BUF_MAX], /* i/o: DFT buffer */ + const uint8_t b_wide_panning, /* i : flag indicating wider panning */ + const int16_t L_frame /* i : core signal length */ #ifdef DFT_STEREO_SPAR_MIXING , const int16_t mcmasa @@ -468,8 +468,8 @@ static void ivas_sba_dirac_stereo_compute_hb_gain( static void ivas_sba_dirac_stereo_upmix_hb( float hb_stereo_synth[CPE_CHANNELS][L_FRAME48k], /* i/o: stereo HB synthesis signal */ float hb_synth[L_FRAME48k], /* i : HB signal */ - float hb_gain[NB_DIV], /* i : side gains for HB signal */ - const int16_t output_frame /* i : output frame length per channel */ + float hb_gain[NB_DIV], /* i : side gains for HB signal */ + const int16_t output_frame /* i : output frame length per channel */ #ifdef DFT_STEREO_SPAR_MIXING , const int16_t mcmasa, @@ -480,7 +480,7 @@ static void ivas_sba_dirac_stereo_upmix_hb( int16_t i; #ifdef DFT_STEREO_SPAR_MIXING - if (!mcmasa) + if ( !mcmasa ) { for ( i = 0; i < output_frame / 2; i++ ) { @@ -685,7 +685,7 @@ void ivas_sba_dirac_stereo_smooth_parameters( res_pred_gain = hStereoDft->res_pred_gain + k_offset * STEREO_DFT_BAND_MAX; #ifdef DFT_STEREO_SPAR_MIXING - if (!hMdDec) + if ( !hMdDec ) #endif { /* Smoothing of side and prediction gains between ftrames */ @@ -854,7 +854,7 @@ void ivas_sba_dirac_stereo_dec( hCPE = st_ivas->hCPE[0]; hStereoDft = hCPE->hStereoDft; #ifdef DFT_STEREO_SPAR_MIXING - if ( st_ivas->nchan_transport > 1 && !mcmasa) + if ( st_ivas->nchan_transport > 1 && !mcmasa ) { dtx_flag = 0; fd_cng_flag = 0; @@ -875,7 +875,7 @@ void ivas_sba_dirac_stereo_dec( hStereoDft->NFFT #ifdef DFT_STEREO_SPAR_MIXING , - (st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa) + ( st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa ) #endif ); @@ -917,7 +917,7 @@ void ivas_sba_dirac_stereo_dec( ); #ifdef DFT_STEREO_SPAR_MIXING - if ( st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa) + if ( st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa ) { set_f( hStereoDft->res_pred_gain, 1.f, 3 * STEREO_DFT_BAND_MAX ); } @@ -927,8 +927,8 @@ void ivas_sba_dirac_stereo_dec( stereo_dft_dec( hStereoDft, hCPE->hCoreCoder[0], DFT, NULL, NULL, 1 /*st_ivas->sba_dirac_stereo_flag*/ #ifdef DFT_STEREO_SPAR_MIXING , - (st_ivas->hSpar != NULL && !mcmasa) ? st_ivas->hSpar->hMdDec : 0, - (st_ivas->hSpar != NULL && !mcmasa) ? st_ivas->hSpar->hFbMixer->cross_fade_start_offset : 0, + ( st_ivas->hSpar != NULL && !mcmasa ) ? st_ivas->hSpar->hMdDec : 0, + ( st_ivas->hSpar != NULL && !mcmasa ) ? st_ivas->hSpar->hFbMixer->cross_fade_start_offset : 0, st_ivas->hDecoderConfig->output_Fs, st_ivas->nchan_transport #endif @@ -971,7 +971,7 @@ void ivas_sba_dirac_stereo_dec( output_frame #ifdef DFT_STEREO_SPAR_MIXING , - (st_ivas->sba_mode != SBA_MODE_SPAR || mcmasa), + ( st_ivas->sba_mode != SBA_MODE_SPAR || mcmasa ), hStereoDft #endif ); @@ -985,7 +985,7 @@ void ivas_sba_dirac_stereo_dec( ivas_sba_dirac_stereo_apply_td_stefi( hStereoDft, output, output_frame #ifdef DFT_STEREO_SPAR_MIXING , - ( st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa) + ( st_ivas->sba_mode == SBA_MODE_SPAR && !mcmasa ) #endif ); } diff --git a/lib_dec/ivas_stat_dec.h b/lib_dec/ivas_stat_dec.h index 08a6562300..907ad1b049 100755 --- a/lib_dec/ivas_stat_dec.h +++ b/lib_dec/ivas_stat_dec.h @@ -232,7 +232,7 @@ typedef struct stereo_dft_dec_data_struct #ifdef DFT_STEREO_SPAR_MIXING int16_t first_frame; - float mixer_mat_smooth[2][4][2*IVAS_MAX_NUM_BANDS]; + float mixer_mat_smooth[2][4][2 * IVAS_MAX_NUM_BANDS]; float g_L_prev; float g_R_prev; const float *max_smooth_gains, *min_smooth_gains; diff --git a/lib_dec/ivas_stereo_dft_dec.c b/lib_dec/ivas_stereo_dft_dec.c index 5919d99c71..3be3c59428 100755 --- a/lib_dec/ivas_stereo_dft_dec.c +++ b/lib_dec/ivas_stereo_dft_dec.c @@ -76,7 +76,7 @@ * Local function prototypes *-------------------------------------------------------------------------*/ -static void stereo_dft_dec_open( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, const int32_t output_Fs +static void stereo_dft_dec_open( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, const int32_t output_Fs #ifdef DFT_STEREO_SPAR_MIXING , const int16_t nchan_transport @@ -1142,12 +1142,12 @@ void stereo_dft_dec_synthesize( *-------------------------------------------------------------------------*/ void stereo_dft_dec( - STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ - Decoder_State *st0, /* i/o: decoder state structure */ + STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ + Decoder_State *st0, /* i/o: decoder state structure */ float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ - float *input_mem, /* i/o: mem of buffer DFT analysis */ - STEREO_CNG_DEC_HANDLE hStereoCng, /* i/o: Stereo CNG data structure */ - const int16_t sba_dirac_stereo_flag /* i : signal stereo output for SBA DirAC */ + float *input_mem, /* i/o: mem of buffer DFT analysis */ + STEREO_CNG_DEC_HANDLE hStereoCng, /* i/o: Stereo CNG data structure */ + const int16_t sba_dirac_stereo_flag /* i : signal stereo output for SBA DirAC */ #ifdef DFT_STEREO_SPAR_MIXING , ivas_spar_md_dec_state_t *hMdDec, /* i: SPAR MD handle for upmixing */ @@ -1395,7 +1395,7 @@ void stereo_dft_dec( /* No residual coding in inactive frames, instead pDFT_RES is used for the second channel */ #ifdef DFT_STEREO_SPAR_MIXING - if ( b >= hStereoDft->res_cod_band_max && !hStereoDft->frame_sid_nodata && !(sba_dirac_stereo_flag && hMdDec) ) + if ( b >= hStereoDft->res_cod_band_max && !hStereoDft->frame_sid_nodata && !( sba_dirac_stereo_flag && hMdDec ) ) #else if ( b >= hStereoDft->res_cod_band_max && !hStereoDft->frame_sid_nodata ) #endif -- GitLab From 47083ed328a7563b65cd2796f3cd578106a02664 Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Fri, 10 Feb 2023 13:16:12 +0100 Subject: [PATCH 14/20] fix formating of lib_dec/ivas_cpe_dec.c and lib_dec/ivas_stereo_switching_dec.c --- lib_dec/ivas_cpe_dec.c | 18 +++++++++--------- lib_dec/ivas_stereo_switching_dec.c | 9 +++++---- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/lib_dec/ivas_cpe_dec.c b/lib_dec/ivas_cpe_dec.c index 25e9dd019f..b656f9255b 100755 --- a/lib_dec/ivas_cpe_dec.c +++ b/lib_dec/ivas_cpe_dec.c @@ -410,10 +410,10 @@ ivas_error ivas_cpe_dec( } else { - stereo_dft_dec( hCPE->hStereoDft, sts[0], DFT, hCPE->input_mem[1], hCPE->hStereoCng, 0 + stereo_dft_dec( hCPE->hStereoDft, sts[0], DFT, hCPE->input_mem[1], hCPE->hStereoCng, 0 #ifdef DFT_STEREO_SPAR_MIXING , - 0,0,0,0 + 0, 0, 0, 0 #endif ); } @@ -464,7 +464,7 @@ ivas_error ivas_cpe_dec( * Synthesis synchronization between CPE modes *----------------------------------------------------------------*/ #ifdef DFT_STEREO_SPAR_MIXING - if (!st_ivas->sba_dirac_stereo_flag) + if ( !st_ivas->sba_dirac_stereo_flag ) #endif { synchro_synthesis( ivas_total_brate, hCPE, output, output_frame, 0 ); @@ -683,11 +683,11 @@ ivas_error create_cpe_dec( for ( n = 0; n < CPE_CHANNELS; n++ ) { - if ( st_ivas->sba_dirac_stereo_flag + if ( st_ivas->sba_dirac_stereo_flag #ifdef DFT_STEREO_SPAR_MIXING - && st_ivas->nchan_transport == 1 -#endif - ) + && st_ivas->nchan_transport == 1 +#endif + ) { /* for SBA DirAC stereo output CPE element is only used for upmix, core coder is found in SCE element used for core decoding */ break; @@ -725,12 +725,12 @@ ivas_error create_cpe_dec( if ( hCPE->element_mode == IVAS_CPE_DFT || st_ivas->sba_dirac_stereo_flag ) #endif { - if ( ( error = stereo_dft_dec_create( &( hCPE->hStereoDft ), hCPE->element_brate, output_Fs, st_ivas->sba_dirac_stereo_flag + if ( ( error = stereo_dft_dec_create( &( hCPE->hStereoDft ), hCPE->element_brate, output_Fs, st_ivas->sba_dirac_stereo_flag #ifdef DFT_STEREO_SPAR_MIXING , st_ivas->nchan_transport #endif - ) ) != IVAS_ERR_OK ) + ) ) != IVAS_ERR_OK ) { return error; } diff --git a/lib_dec/ivas_stereo_switching_dec.c b/lib_dec/ivas_stereo_switching_dec.c index 67980fab3e..c8b373ce5c 100644 --- a/lib_dec/ivas_stereo_switching_dec.c +++ b/lib_dec/ivas_stereo_switching_dec.c @@ -421,12 +421,12 @@ ivas_error stereo_memory_dec( deallocate_CoreCoder( hCPE->hCoreCoder[1] ); /* allocate DFT stereo data structure */ - if ( ( error = stereo_dft_dec_create( &( hCPE->hStereoDft ), hCPE->element_brate, output_Fs, 0 + if ( ( error = stereo_dft_dec_create( &( hCPE->hStereoDft ), hCPE->element_brate, output_Fs, 0 #ifdef DFT_STEREO_SPAR_MIXING - , + , nchan_transport #endif - ) ) != IVAS_ERR_OK ) + ) ) != IVAS_ERR_OK ) { return error; } @@ -1195,7 +1195,8 @@ void synchro_synthesis( * TD/MDCT stereo synchro *----------------------------------------------------------------*/ #ifdef DFT_STEREO_SPAR_MIXING - if( sba_dirac_stereo_flag ) return; + if ( sba_dirac_stereo_flag ) + return; #endif if ( hCPE->element_mode == IVAS_CPE_TD || hCPE->element_mode == IVAS_CPE_MDCT ) { -- GitLab From 08d8046d487639d23ab0728ad01468e36705b1d7 Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Fri, 10 Feb 2023 15:35:02 +0100 Subject: [PATCH 15/20] fix memory bug due to wrong band config at lower SRs --- lib_dec/ivas_sba_dirac_stereo_dec.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib_dec/ivas_sba_dirac_stereo_dec.c b/lib_dec/ivas_sba_dirac_stereo_dec.c index 2c77ab1b19..9560ab38ea 100755 --- a/lib_dec/ivas_sba_dirac_stereo_dec.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec.c @@ -130,6 +130,10 @@ static int16_t ivas_sba_dirac_stereo_band_config( { band_limits[i] = DirAC_band_grouping_5[i] * bins_per_cldfb_band; } + if ( band_limits[i] > NFFT/2 ) + { + band_limits[i] = NFFT / 2; + } #else band_limits[i] = DirAC_band_grouping_5[i] * bins_per_cldfb_band; #endif @@ -909,7 +913,7 @@ void ivas_sba_dirac_stereo_dec( DFT[0], st_ivas->ivas_format == MC_FORMAT, #ifdef DFT_STEREO_SPAR_MIXING - ( st_ivas->sba_mode != SBA_MODE_SPAR || mcmasa ) ? hSCE->hCoreCoder[0]->L_frame : 0, + ( st_ivas->sba_mode != SBA_MODE_SPAR || mcmasa ) ? hSCE->hCoreCoder[0]->L_frame : output_frame, ( st_ivas->sba_mode != SBA_MODE_SPAR || mcmasa ) #else hSCE->hCoreCoder[0]->L_frame -- GitLab From f2358f9f41494f0c2d06850e171ac0e7ec8c274c Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Mon, 13 Feb 2023 14:04:22 +0100 Subject: [PATCH 16/20] fix wrong declaration in lib_com/ivas_prot.h --- lib_com/ivas_prot.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index a472ba2116..756ec00bff 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -1103,7 +1103,7 @@ void stereo_dft_dec( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ Decoder_State *st0, /* i/o: decoder state structure */ #ifdef DFT_STEREO_SPAR_MIXING - float DFT[CPE_CHANNELS + 1][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ + float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ #else float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ #endif -- GitLab From 80b611e2864ed3565cc8c8a9d84e0688a6bfd0a3 Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Mon, 13 Feb 2023 14:14:05 +0100 Subject: [PATCH 17/20] remove unnecessary condition from ivas_dec --- lib_dec/ivas_dec.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib_dec/ivas_dec.c b/lib_dec/ivas_dec.c index bb750eab00..384baa0bb2 100644 --- a/lib_dec/ivas_dec.c +++ b/lib_dec/ivas_dec.c @@ -233,11 +233,7 @@ ivas_error ivas_dec( #endif } } -#ifdef DFT_STEREO_SPAR_MIXING - else if ( ( st_ivas->ivas_format == SBA_FORMAT || st_ivas->ivas_format == MASA_FORMAT ) || st_ivas->sba_dirac_stereo_flag ) -#else else if ( st_ivas->ivas_format == SBA_FORMAT || st_ivas->ivas_format == MASA_FORMAT ) -#endif { set_s( nb_bits_metadata, 0, MAX_SCE ); -- GitLab From 427542d6050a73ea8b0b9a5bbb55b9223842b23b Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Mon, 13 Feb 2023 16:22:18 +0100 Subject: [PATCH 18/20] fix clang-format errors in lib_dec/ivas_sba_dirac_stereo_dec.c --- lib_dec/ivas_sba_dirac_stereo_dec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib_dec/ivas_sba_dirac_stereo_dec.c b/lib_dec/ivas_sba_dirac_stereo_dec.c index 9560ab38ea..86a7353651 100755 --- a/lib_dec/ivas_sba_dirac_stereo_dec.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec.c @@ -130,7 +130,7 @@ static int16_t ivas_sba_dirac_stereo_band_config( { band_limits[i] = DirAC_band_grouping_5[i] * bins_per_cldfb_band; } - if ( band_limits[i] > NFFT/2 ) + if ( band_limits[i] > NFFT / 2 ) { band_limits[i] = NFFT / 2; } -- GitLab From ae8f755145670b83112ba812ea969dbedaff9bea Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Tue, 14 Feb 2023 12:54:45 +0100 Subject: [PATCH 19/20] improve setup of band_limits in ivas_sba_dirac_stereo_band_config --- lib_dec/ivas_sba_dirac_stereo_dec.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib_dec/ivas_sba_dirac_stereo_dec.c b/lib_dec/ivas_sba_dirac_stereo_dec.c index 86a7353651..bbd612b7ec 100755 --- a/lib_dec/ivas_sba_dirac_stereo_dec.c +++ b/lib_dec/ivas_sba_dirac_stereo_dec.c @@ -130,9 +130,10 @@ static int16_t ivas_sba_dirac_stereo_band_config( { band_limits[i] = DirAC_band_grouping_5[i] * bins_per_cldfb_band; } - if ( band_limits[i] > NFFT / 2 ) + if ( band_limits[i] >= NFFT / 2 ) { - band_limits[i] = NFFT / 2; + nbands = i; + break; } #else band_limits[i] = DirAC_band_grouping_5[i] * bins_per_cldfb_band; @@ -218,7 +219,7 @@ static void map_params_dirac_to_stereo( nBlocks = MAX_PARAM_SPATIAL_SUBFRAMES; #ifdef DFT_STEREO_SPAR_MIXING - nbands = !mcmasa ? SBA_DIRAC_STEREO_NUM_BANDS : 5; + nbands = hStereoDft->nbands; #else nbands = SBA_DIRAC_STEREO_NUM_BANDS; #endif @@ -744,6 +745,10 @@ void ivas_sba_dirac_stereo_smooth_parameters( { hStereoDft->mixer_mat_smooth[i][j][b + k * IVAS_MAX_NUM_BANDS] = hMdDec->mixer_mat[i][j][b]; } + for ( ; b < IVAS_MAX_NUM_BANDS; b++ ) + { + hStereoDft->mixer_mat_smooth[i][j][b + k * IVAS_MAX_NUM_BANDS] = 0.f; + } } } } -- GitLab From 25872f96d2c24afd150284e685c90ce437590749 Mon Sep 17 00:00:00 2001 From: Dominik Weckbecker Date: Tue, 14 Feb 2023 12:56:31 +0100 Subject: [PATCH 20/20] remove unnecessary ifdef in ivas_prot.h --- lib_com/ivas_prot.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lib_com/ivas_prot.h b/lib_com/ivas_prot.h index 756ec00bff..59907577a6 100644 --- a/lib_com/ivas_prot.h +++ b/lib_com/ivas_prot.h @@ -1102,11 +1102,7 @@ void stereo_dft_dec_synthesize( void stereo_dft_dec( STEREO_DFT_DEC_DATA_HANDLE hStereoDft, /* i/o: decoder DFT stereo handle */ Decoder_State *st0, /* i/o: decoder state structure */ -#ifdef DFT_STEREO_SPAR_MIXING - float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ -#else - float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ -#endif + float DFT[CPE_CHANNELS][STEREO_DFT_BUF_MAX], /* i/o: DFT buffers */ float *input_mem, /* i/o: mem of buffer DFT analysis */ STEREO_CNG_DEC_HANDLE hStereoCng, /* i/o: Stereo CNG data structure */ const int16_t sba_dirac_stereo_flag /* i : signal stereo output for SBA DirAC */ -- GitLab