diff --git a/apps/decoder.c b/apps/decoder.c index 752fc401d12799cc534522f71dc3d1c5eb62a21e..f4d3a61bea8ba69b0d19581932496e9aec6eee57 100644 --- a/apps/decoder.c +++ b/apps/decoder.c @@ -2923,12 +2923,19 @@ static ivas_error decodeG192( goto cleanup; } - /* Write current frame */ - if ( ( error = AudioFileWriter_write( afWriter, pcmBuf, nSamplesFlushed * nOutChannels ) ) != IVAS_ERR_OK ) +#ifdef FIX_1342_PROPER_FLUSH_IN_SR + if ( !isSplitCoded ) { - fprintf( stderr, "\nOutput audio file writer error\n" ); - goto cleanup; +#endif + /* Write current frame */ + if ( ( error = AudioFileWriter_write( afWriter, pcmBuf, nSamplesFlushed * nOutChannels ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "\nOutput audio file writer error\n" ); + goto cleanup; + } +#ifdef FIX_1342_PROPER_FLUSH_IN_SR } +#endif /* Write ISM metadata to external file(s) */ if ( decodedGoodFrame && arg.outputConfig == IVAS_AUDIO_CONFIG_EXTERNAL ) diff --git a/lib_com/options.h b/lib_com/options.h index 6d9d98192d4f87e54492ddad3e63b8d410068ef7..aac391c61d450b899eaa9a3eee5b5d12aa8a5593 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -161,7 +161,8 @@ /* only BE switches wrt wrt. TS 26.258 V3.0 */ /*#define FIX_I4_OL_PITCH*/ /* fix open-loop pitch used for EVS core switching */ -#define TMP_1342_WORKAROUND_DEC_FLUSH_BROKEN_IN_SR /* FhG: Temporary workaround for incorrect implementation of decoder flush with split rendering */ +/*#define TMP_1342_WORKAROUND_DEC_FLUSH_BROKEN_IN_SR*/ /* FhG: Temporary workaround for incorrect implementation of decoder flush with split rendering; disabled, superseded by FIX_1342_PROPER_FLUSH_IN_SR */ +#define FIX_1342_PROPER_FLUSH_IN_SR /* FhG: Proper implementation of decoder flush with split rendering, using isar_render_poses + isar_generate_metadata_and_bitstream */ #define NONBE_1122_KEEP_EVS_MODE_UNCHANGED /* FhG: Disables fix for issue 1122 in EVS mode to keep BE tests green. This switch should be removed once the 1122 fix is added to EVS via a CR. */ #define FIX_FLOAT_1539_G192_FORMAT_SWITCH /* Nokia: reintroduce format switching for g192 bitstreams */ diff --git a/lib_dec/lib_dec.c b/lib_dec/lib_dec.c index 7199ae4e64e49bff16aebe96ebb2d6bfd8069597..d4a631ffdbfb497c909638461f8243b52925aebe 100644 --- a/lib_dec/lib_dec.c +++ b/lib_dec/lib_dec.c @@ -2176,7 +2176,11 @@ static ivas_error isar_generate_metadata_and_bitstream( Quaternion, st_ivas->hRenderConfig->split_rend_config.splitRendBitRate, st_ivas->hRenderConfig->split_rend_config.codec, +#ifdef FIX_1342_PROPER_FLUSH_IN_SR + (int16_t) ( (int32_t) nSamples * 1000 / (int32_t) st_ivas->hDecoderConfig->output_Fs ), +#else st_ivas->hRenderConfig->split_rend_config.isar_frame_size_ms, +#endif st_ivas->hRenderConfig->split_rend_config.codec_frame_size_ms, splitRendBits, p_Cldfb_RealBuffer_Binaural, @@ -4339,7 +4343,71 @@ ivas_error IVAS_DEC_Flush( error = IVAS_ERR_OK; if ( nSamplesToRender > 0 && hIvasDec->st_ivas->ivas_format != MONO_FORMAT ) { - error = ivas_dec_render( hIvasDec->st_ivas, nSamplesToRender, &nSamplesFlushedLocal, &hIvasDec->nSamplesAvailableNext, pcm_type_API_to_internal( pcmType ), pcmBuf ); +#ifdef FIX_1342_PROPER_FLUSH_IN_SR + if ( is_split_rendering_enabled( hIvasDec->st_ivas->hDecoderConfig, hIvasDec->st_ivas->hRenderConfig ) ) + { + int16_t i, nOutSamples = 0; + bool needNewFrame; + const int16_t splitFrameSize = isar_get_frame_size( hIvasDec->st_ivas ); + float head_pose_buf[BINAURAL_CHANNELS * MAX_HEAD_ROT_POSES][L_FRAME48k]; + float *p_head_pose_buf[BINAURAL_CHANNELS * MAX_HEAD_ROT_POSES]; + + /* Zero-initialise so that ivas_limiter_dec never reads uninitialised memory + (e.g. when the ring buffer provides fewer samples than the nominal frame size) */ + for ( i = 0; i < BINAURAL_CHANNELS * MAX_HEAD_ROT_POSES; ++i ) + { + set_zero( head_pose_buf[i], L_FRAME48k ); + p_head_pose_buf[i] = head_pose_buf[i]; + } + + /* Render remaining buffered audio into the SR ring buffers using the + correctly-sized internal float buffer to avoid writing beyond the end of the pcmBuf */ + if ( ( error = isar_render_poses( hIvasDec, splitFrameSize, &nOutSamples, &needNewFrame ) ) != IVAS_ERR_OK ) + { + return error; + } + + *nSamplesFlushed = nOutSamples; + + /* Flush the remaining audio output in SR mode. + For BINAURAL_SPLIT_PCM, generate metadata and binaural PCM from the ring buffer. + The ISAR metadata output is discarded (flush frames are not written to the metadata file). */ + if ( hIvasDec->st_ivas->hDecoderConfig->output_config == IVAS_AUDIO_CONFIG_BINAURAL_SPLIT_PCM && + hIvasDec->hasBeenFedFirstGoodFrame && nOutSamples > 0 ) + { + Decoder_Struct *st_ivas_flush = hIvasDec->st_ivas; + ISAR_SPLIT_REND_BITS_DATA flushSplitRendBits; + uint8_t flushBitsBuf[ISAR_MAX_SPLIT_REND_BITS_BUFFER_SIZE_IN_BYTES]; + + flushSplitRendBits.bits_buf = flushBitsBuf; + flushSplitRendBits.bits_read = 0; + flushSplitRendBits.bits_written = 0; + flushSplitRendBits.buf_len = ISAR_MAX_SPLIT_REND_BITS_BUFFER_SIZE_IN_BYTES; + flushSplitRendBits.codec = ISAR_SPLIT_REND_CODEC_DEFAULT; + flushSplitRendBits.pose_correction = ISAR_SPLIT_REND_POSE_CORRECTION_MODE_NONE; + flushSplitRendBits.codec_frame_size_ms = 0; + flushSplitRendBits.isar_frame_size_ms = 0; + flushSplitRendBits.lc3plus_highres = 0; + + /* Pop from ring buffer and generate binaural PCM into p_head_pose_buf */ + if ( ( error = isar_generate_metadata_and_bitstream( st_ivas_flush, p_head_pose_buf, nOutSamples, &flushSplitRendBits ) ) != IVAS_ERR_OK ) + { + return error; + } + +#ifndef DISABLE_LIMITER + ivas_limiter_dec( st_ivas_flush->hLimiter, p_head_pose_buf, st_ivas_flush->hDecoderConfig->nchan_out, nOutSamples, st_ivas_flush->BER_detect ); +#endif + ivas_syn_output( p_head_pose_buf, nOutSamples, st_ivas_flush->hDecoderConfig->nchan_out, (int16_t *) pcmBuf ); + } + } + else + { +#endif + error = ivas_dec_render( hIvasDec->st_ivas, nSamplesToRender, &nSamplesFlushedLocal, &hIvasDec->nSamplesAvailableNext, pcm_type_API_to_internal( pcmType ), pcmBuf ); +#ifdef FIX_1342_PROPER_FLUSH_IN_SR + } +#endif } else { diff --git a/lib_isar/isar_prot.h b/lib_isar/isar_prot.h index 1ba51ecd5462d98dcea730c326b4cc48fcaa0d69..020881f2903b7f9448bbd2994cb3c4f4ddf3ee19 100644 --- a/lib_isar/isar_prot.h +++ b/lib_isar/isar_prot.h @@ -277,6 +277,10 @@ void isar_rend_CldfbSplitPreRendProcess( const int32_t target_md_bits, /* i : ISAR MD bitrate */ const int16_t low_res_pre_rend_rot, /* i : low time resolution pre-renderer flag */ const int16_t ro_md_flag /* i : real only metadata for yaw flag */ +#ifdef FIX_1342_PROPER_FLUSH_IN_SR + , + const int16_t num_cldfb_slots /* i : actual number of valid CLDFB columns */ +#endif ); ivas_error isar_renderMultiTDBinToSplitBinaural( diff --git a/lib_isar/isar_splitRendererPre.c b/lib_isar/isar_splitRendererPre.c index 242d29ac457f6435c486e71737d7d7a34efb4a56..6e8690669fc260d3f10d67d9c455fd8dc3a15ac4 100644 --- a/lib_isar/isar_splitRendererPre.c +++ b/lib_isar/isar_splitRendererPre.c @@ -53,7 +53,11 @@ * Local function declarations *---------------------------------------------------------------------*/ +#ifdef FIX_1342_PROPER_FLUSH_IN_SR +static void isar_SplitRenderer_GetRotMd( ISAR_BIN_HR_SPLIT_PRE_REND_HANDLE hBinHrSplitPreRend, MULTI_BIN_REND_POSE_DATA *pMultiBinPoseData, float *Cldfb_RealBuffer_Ref_Binaural[][CLDFB_NO_COL_MAX], float *Cldfb_ImagBuffer_Ref_Binaural[][CLDFB_NO_COL_MAX], const int16_t low_res, const int16_t ro_md_flag, const int16_t num_cldfb_slots ); +#else static void isar_SplitRenderer_GetRotMd( ISAR_BIN_HR_SPLIT_PRE_REND_HANDLE hBinHrSplitPreRend, MULTI_BIN_REND_POSE_DATA *pMultiBinPoseData, float *Cldfb_RealBuffer_Ref_Binaural[][CLDFB_NO_COL_MAX], float *Cldfb_ImagBuffer_Ref_Binaural[][CLDFB_NO_COL_MAX], const int16_t low_res, const int16_t ro_md_flag ); +#endif /*------------------------------------------------------------------------- @@ -1351,6 +1355,10 @@ static void isar_SplitRenderer_GetRotMd( float *Cldfb_ImagBuffer_Ref_Binaural[][CLDFB_NO_COL_MAX], /* o : Reference Binaural signals */ const int16_t low_res, const int16_t ro_md_flag /* i : Flag to indicate real only metadata for yaw */ +#ifdef FIX_1342_PROPER_FLUSH_IN_SR + , + const int16_t num_cldfb_slots /* i : actual number of valid CLDFB columns in the input buffers */ +#endif ) { float cov_ii_re[BINAURAL_CHANNELS][BINAURAL_CHANNELS]; @@ -1371,7 +1379,11 @@ static void isar_SplitRenderer_GetRotMd( if ( low_res ) { +#ifdef FIX_1342_PROPER_FLUSH_IN_SR + num_slots = num_cldfb_slots; +#else num_slots = CLDFB_NO_COL_MAX; +#endif num_subframes = 1; } else @@ -1384,6 +1396,12 @@ static void isar_SplitRenderer_GetRotMd( for ( sf_idx = 0; sf_idx < num_subframes; sf_idx++ ) { start_slot_idx = sf_idx * num_slots; +#ifdef FIX_1342_PROPER_FLUSH_IN_SR + if ( start_slot_idx >= num_cldfb_slots ) + { + break; + } +#endif for ( b = 0; b < num_md_bands; b++ ) { if ( ( b < SPLIT_REND_RO_MD_BAND_THRESH ) || ( !ro_md_flag && b < COMPLEX_MD_BAND_THRESH ) ) @@ -1439,11 +1457,19 @@ void isar_rend_CldfbSplitPreRendProcess( const int32_t target_md_bits, /* i : ISAR MD bitrate */ const int16_t low_res_pre_rend_rot, /* i : low time resolution pre-renderer flag */ const int16_t ro_md_flag /* i : real only metadata for yaw flag */ +#ifdef FIX_1342_PROPER_FLUSH_IN_SR + , + const int16_t num_cldfb_slots /* i : actual number of valid CLDFB columns */ +#endif ) { push_wmops( "isar_rend_CldfbSplitPreRendProcess" ); +#ifdef FIX_1342_PROPER_FLUSH_IN_SR + isar_SplitRenderer_GetRotMd( hBinHrSplitPreRend, pMultiBinPoseData, Cldfb_In_BinReal, Cldfb_In_BinImag, low_res_pre_rend_rot, ro_md_flag, num_cldfb_slots ); +#else isar_SplitRenderer_GetRotMd( hBinHrSplitPreRend, pMultiBinPoseData, Cldfb_In_BinReal, Cldfb_In_BinImag, low_res_pre_rend_rot, ro_md_flag ); +#endif isar_SplitRenderer_quant_code( hBinHrSplitPreRend, headPosition, pMultiBinPoseData, pBits, low_res_pre_rend_rot, ro_md_flag, target_md_bits ); @@ -1897,7 +1923,7 @@ ivas_error isar_renderMultiTDBinToSplitBinaural( int16_t j; float *p_Cldfb_In_BinReal[MAX_HEAD_ROT_POSES * BINAURAL_CHANNELS][CLDFB_NO_COL_MAX]; float *p_Cldfb_In_BinImag[MAX_HEAD_ROT_POSES * BINAURAL_CHANNELS][CLDFB_NO_COL_MAX]; - int32_t num_slots; + int32_t num_slots = 0; push_wmops( "isar_renderMultiTDBinToSplitBinaural" ); @@ -1925,7 +1951,21 @@ ivas_error isar_renderMultiTDBinToSplitBinaural( /* Artificially delay input to head pose correction analysis by LC3plus coding delay, so that audio and metadata are in sync after decoding */ mvr2r( hSplitBin->lc3plusDelayBuffers[i] + frame_size, hSplitBin->lc3plusDelayBuffers[i], (int16_t) hSplitBin->lc3plusDelaySamples ); in_delayed[i] = hSplitBin->lc3plusDelayBuffers[i]; +#ifdef FIX_1342_PROPER_FLUSH_IN_SR + { + /* During flush the actual number of new samples may be less than the nominal frame size. + * Only copy the available samples and zero-fill the rest to avoid reading uninitialised memory. */ + int16_t actual_samples_lc3 = (int16_t) ( (int32_t) hSplitBin->hLc3plusEnc->config.samplerate * isar_frame_size_ms / 1000 ); + mvr2r( in[i], hSplitBin->lc3plusDelayBuffers[i] + hSplitBin->lc3plusDelaySamples, actual_samples_lc3 ); + if ( actual_samples_lc3 < frame_size ) + { + set_zero( hSplitBin->lc3plusDelayBuffers[i] + hSplitBin->lc3plusDelaySamples + actual_samples_lc3, + frame_size - actual_samples_lc3 ); + } + } +#else mvr2r( in[i], hSplitBin->lc3plusDelayBuffers[i] + hSplitBin->lc3plusDelaySamples, frame_size ); +#endif } } else @@ -1938,7 +1978,14 @@ ivas_error isar_renderMultiTDBinToSplitBinaural( if ( ( hSplitBin->multiBinPoseData.poseCorrectionMode == ISAR_SPLIT_REND_POSE_CORRECTION_MODE_CLDFB ) || ( !useLc3plus && !pcm_out_flag ) ) { +#ifdef FIX_1342_PROPER_FLUSH_IN_SR + /* During flush, use only the actual number of valid CLDFB slots derived from isar_frame_size_ms */ + num_slots = ( hSplitBin->multiBinPoseData.poseCorrectionMode == ISAR_SPLIT_REND_POSE_CORRECTION_MODE_CLDFB ) + ? (int32_t) ( (int32_t) isar_frame_size_ms * 1000000L / CLDFB_SLOT_NS ) + : ( hSplitBin->hSplitBinLCLDEnc->iNumBlocks * hSplitBin->hSplitBinLCLDEnc->iNumIterations ); +#else num_slots = ( hSplitBin->multiBinPoseData.poseCorrectionMode == ISAR_SPLIT_REND_POSE_CORRECTION_MODE_CLDFB ) ? CLDFB_NO_COL_MAX : ( hSplitBin->hSplitBinLCLDEnc->iNumBlocks * hSplitBin->hSplitBinLCLDEnc->iNumIterations ); +#endif num_cldfb_bands = hSplitBin->hCldfbHandles->cldfbAna[0]->no_channels; /* CLDFB Analysis*/ @@ -1985,7 +2032,12 @@ ivas_error isar_renderMultiTDBinToSplitBinaural( pBits, target_md_bits, low_res_pre_rend_rot, - ro_md_flag ); + ro_md_flag +#ifdef FIX_1342_PROPER_FLUSH_IN_SR + , + (int16_t) num_slots +#endif + ); } if ( pcm_out_flag == 0 ) diff --git a/lib_isar/lib_isar_pre_rend.c b/lib_isar/lib_isar_pre_rend.c index 6ba81ec709e2ec53d95eab28aad6965fe04df78e..66b71eb9d733d3cef0b0b17cc82132e2a5852baa 100644 --- a/lib_isar/lib_isar_pre_rend.c +++ b/lib_isar/lib_isar_pre_rend.c @@ -341,7 +341,11 @@ ivas_error ISAR_PRE_REND_MultiBinToSplitBinaural( target_md_bits = isar_get_split_rend_md_target_brate( SplitRendBitRate, pcm_out_flag ) * L_FRAME48k / 48000; +#ifdef FIX_1342_PROPER_FLUSH_IN_SR + isar_rend_CldfbSplitPreRendProcess( hSplitBin->hBinHrSplitPreRend, headPosition, &hSplitBin->multiBinPoseData, Cldfb_In_BinReal, Cldfb_In_BinImag, pBits, target_md_bits, low_res_pre_rend_rot, ro_md_flag, (int16_t) ( isar_frame_size_ms * 1000000 / CLDFB_SLOT_NS ) ); +#else isar_rend_CldfbSplitPreRendProcess( hSplitBin->hBinHrSplitPreRend, headPosition, &hSplitBin->multiBinPoseData, Cldfb_In_BinReal, Cldfb_In_BinImag, pBits, target_md_bits, low_res_pre_rend_rot, ro_md_flag ); +#endif } if ( pcm_out_flag == 0 )