From f8905d106bf6df36bbf39da16d35d97fb5a78ebf Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Tue, 3 Jun 2025 13:52:19 +0200 Subject: [PATCH 1/3] port MR 1223 from float repo fix for JBM control getting the wrong info for non-20ms rendering framesize --- apps/decoder.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++ lib_com/options.h | 2 + lib_dec/lib_dec.c | 93 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 189 insertions(+), 1 deletion(-) diff --git a/apps/decoder.c b/apps/decoder.c index b54ad4b92..5525c46a0 100644 --- a/apps/decoder.c +++ b/apps/decoder.c @@ -2905,7 +2905,14 @@ static ivas_error decodeVoIP( } vec_pos_update = ( vec_pos_update + 1 ) % vec_pos_len; frame++; +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + if ( vec_pos_update == 0 ) + { + systemTime_ms += vec_pos_len * systemTimeInc_ms; + } +#else systemTime_ms += systemTimeInc_ms; +#endif #ifdef WMOPS update_mem(); @@ -2913,6 +2920,94 @@ static ivas_error decodeVoIP( #endif } + +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + int16_t nSamplesFlushed = 0; + + /* decode and get samples */ +#ifdef SPLIT_REND_WITH_HEAD_ROT + if ( ( error = IVAS_DEC_Flush( hIvasDec, nOutSamples, IVAS_DEC_PCM_INT16, (void *) pcmBuf, &nSamplesFlushed ) ) != IVAS_ERR_OK ) +#else + if ( ( error = IVAS_DEC_Flush( hIvasDec, nOutSamples, pcmBuf, &nSamplesFlushed ) ) != IVAS_ERR_OK ) +#endif + { + fprintf( stderr, "\nError in IVAS_DEC_VoIP_Flush: %s\n", IVAS_DEC_GetErrorMessage( error ) ); + goto cleanup; + } + + if ( nSamplesFlushed ) + { + /* Write current frame */ + if ( ( error = AudioFileWriter_write( afWriter, pcmBuf, nSamplesFlushed * nOutChannels ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "\nOutput audio file writer error\n" ); + goto cleanup; + } + + /* Write ISm metadata to external file(s) */ + if ( decodedGoodFrame && arg.outputConfig == IVAS_AUDIO_CONFIG_EXTERNAL ) + { + if ( bsFormat == IVAS_DEC_BS_OBJ || bsFormat == IVAS_DEC_BS_MASA_ISM || bsFormat == IVAS_DEC_BS_SBA_ISM ) + { + if ( ( error = IVAS_DEC_GetNumObjects( hIvasDec, &numObj ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "\nError in IVAS_DEC_GetNumObjects: %s\n", IVAS_DEC_GetErrorMessage( error ) ); + goto cleanup; + } + + for ( i = 0; i < numObj; ++i ) + { + IVAS_ISM_METADATA IsmMetadata; + + if ( ( error = IVAS_DEC_GetObjectMetadata( hIvasDec, &IsmMetadata, 0, i ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "\nError in IVAS_DEC_GetObjectMetadata: %s\n", IVAS_DEC_GetErrorMessage( error ) ); + goto cleanup; + } + + if ( ( IsmFileWriter_writeFrame( IsmMetadata, ismWriters[i] ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "\nError writing ISM metadata to file %s\n", IsmFileWriter_getFilePath( ismWriters[i] ) ); + goto cleanup; + } + } + } + + if ( bsFormat == IVAS_DEC_BS_MASA || bsFormat == IVAS_DEC_BS_MASA_ISM ) + { + IVAS_MASA_DECODER_EXT_OUT_META_HANDLE hMasaExtOutMeta; +#ifdef NONBE_FIX_984_OMASA_EXT_OUTPUT + int16_t fullDelayNumSamples[3]; + float delayMs; + + /* delayNumSamples is zeroed, and delayNumSamples_orig is updated only on first good frame, so need to re-fetch delay info */ + if ( ( error = IVAS_DEC_GetDelay( hIvasDec, fullDelayNumSamples, &delayTimeScale ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "\nUnable to get delay of decoder: %s\n", ivas_error_to_string( error ) ); + } +#endif + if ( ( error = IVAS_DEC_GetMasaMetadata( hIvasDec, &hMasaExtOutMeta, 0 ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "\nError in IVAS_DEC_GetMasaMetadata: %s\n", IVAS_DEC_GetErrorMessage( error ) ); + goto cleanup; + } + +#ifdef NONBE_FIX_984_OMASA_EXT_OUTPUT + delayMs = (float) ( fullDelayNumSamples[0] ) / (float) ( delayTimeScale ); + if ( ( error = MasaFileWriter_writeFrame( masaWriter, hMasaExtOutMeta, &delayMs ) ) != IVAS_ERR_OK ) +#else + if ( ( error = MasaFileWriter_writeFrame( masaWriter, hMasaExtOutMeta ) ) != IVAS_ERR_OK ) +#endif + { + fprintf( stderr, "\nError writing MASA metadata to file: %s\n", MasaFileWriter_getFilePath( masaWriter ) ); + goto cleanup; + } + } + } + } +#endif + + /*------------------------------------------------------------------------------------------* * Add zeros at the end to have equal length of synthesized signals *------------------------------------------------------------------------------------------*/ diff --git a/lib_com/options.h b/lib_com/options.h index 16880c701..9b03463a1 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -143,6 +143,8 @@ /* ################### Start FIXES switches ########################### */ +#define NONBE_FIX_864_JBM_RENDER_FRAMESIZE /* FhG: issue #864: fix different behaviour of JBM TSM with different render frame sizes */ + /* #################### End FIXES switches ############################ */ diff --git a/lib_dec/lib_dec.c b/lib_dec/lib_dec.c index d7f3e5f6e..e8d51ca01 100644 --- a/lib_dec/lib_dec.c +++ b/lib_dec/lib_dec.c @@ -57,6 +57,9 @@ struct IVAS_DEC_VOIP uint16_t lastDecodedWasActive; JB4_DATAUNIT_HANDLE hCurrentDataUnit; /* Points to the currently processed data unit */ uint16_t *bs_conversion_buf; /* Buffer for bitstream conversion from packed to serial */ +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + int16_t nSamplesRendered20ms; /* how many samples have been rendered since the last 20ms render border*/ +#endif #ifdef SUPPORT_JBM_TRACEFILE IVAS_JBM_TRACE_DATA JbmTraceData; #endif @@ -77,6 +80,9 @@ struct IVAS_DEC int16_t bitstreamformat; /* Bitstream format flag (G.192/MIME/VOIP_G192_RTP/VOIP_RTPDUMP) */ bool Opt_VOIP; /* flag indicating VOIP mode with JBM */ int16_t tsm_scale; /* scale for TSM operation */ +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + int16_t timeScalingDone; /* have we done already one TSM in a 20ms frame? */ +#endif int16_t tsm_max_scaling; float tsm_quality; float *apaExecBuffer; /* Buffer for APA scaling */ @@ -112,6 +118,9 @@ static ivas_error IVAS_DEC_RendererFeedTcSamples( IVAS_DEC_HANDLE hIvasDec, cons static ivas_error IVAS_DEC_GetRenderedSamples( IVAS_DEC_HANDLE hIvasDec, const uint16_t nSamplesForRendering, uint16_t *nSamplesRendered, uint16_t *nSamplesAvailableNext, int16_t *pcmBuf ); static ivas_error IVAS_DEC_GetBufferedNumberOfSamples( IVAS_DEC_HANDLE hIvasDec, int16_t *nSamplesBuffered ); static int16_t get_render_frame_size_ms( IVAS_RENDER_FRAMESIZE render_framesize ); +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE +static void update_voip_rendered20ms( IVAS_DEC_HANDLE hIvasDec, const int16_t nSamplesRendered ); +#endif /*---------------------------------------------------------------------* @@ -149,6 +158,9 @@ ivas_error IVAS_DEC_Open( hIvasDec->tsm_scale = 100; hIvasDec->tsm_max_scaling = 0; hIvasDec->tsm_quality = 1.0f; +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + hIvasDec->timeScalingDone = 0; +#endif hIvasDec->needNewFrame = false; hIvasDec->nTransportChannelsOld = 0; hIvasDec->nSamplesAvailableNext = 0; @@ -651,6 +663,9 @@ ivas_error IVAS_DEC_EnableVoIP( hIvasDec->hVoIP->lastDecodedWasActive = 0; hIvasDec->hVoIP->hCurrentDataUnit = NULL; hIvasDec->hVoIP->nSamplesFrame = (uint16_t) ( hDecoderConfig->output_Fs / FRAMES_PER_SEC ); +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + hIvasDec->hVoIP->nSamplesRendered20ms = 0; +#endif #define WMC_TOOL_SKIP /* Bitstream conversion is not counted towards complexity and memory usage */ @@ -887,17 +902,26 @@ ivas_error IVAS_DEC_GetSamples( assert( nTimeScalerOutSamples <= APA_BUF ); nSamplesTcsScaled = nTimeScalerOutSamples / nTransportChannels; +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + hIvasDec->timeScalingDone = 1; +#endif } else { nSamplesTcsScaled = hIvasDec->nSamplesFrame; } +#ifdef DEBUG_MODE_JBM + dbgwrite( &nTimeScalerOutSamples, sizeof( uint16_t ), 1, 1, "./res/JBM_nTimeScaleOutSamples.dat" ); +#endif /* Feed decoded transport channels samples to the renderer */ if ( ( error = IVAS_DEC_RendererFeedTcSamples( hIvasDec, nSamplesTcsScaled, &nResidualSamples, hIvasDec->apaExecBuffer ) ) != IVAS_ERR_OK ) { return error; } +#ifdef DEBUG_MODE_JBM + dbgwrite( &nResidualSamples, sizeof( int16_t ), 1, 1, "./res/JBM_nResidualSamples.dat" ); +#endif if ( hIvasDec->st_ivas->hDecoderConfig->Opt_tsm ) { @@ -1131,6 +1155,9 @@ static ivas_error IVAS_DEC_GetBufferedNumberOfSamples( if ( hIvasDec->st_ivas->hTcBuffer != NULL ) { *nSamplesBuffered = hIvasDec->st_ivas->hTcBuffer->n_samples_buffered - hIvasDec->st_ivas->hTcBuffer->n_samples_rendered; +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + *nSamplesBuffered += hIvasDec->hVoIP->nSamplesRendered20ms; +#endif } return IVAS_ERR_OK; @@ -2117,6 +2144,9 @@ ivas_error IVAS_DEC_VoIP_GetSamples( uint32_t extBufferedTime_ms, scale, maxScaling; JB4_DATAUNIT_HANDLE dataUnit; uint16_t extBufferedSamples; +#ifndef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + int16_t timeScalingDone; +#endif int16_t timeScalingDone; int16_t result; ivas_error error; @@ -2126,6 +2156,9 @@ ivas_error IVAS_DEC_VoIP_GetSamples( st_ivas = hIvasDec->st_ivas; hDecoderConfig = st_ivas->hDecoderConfig; hVoIP = hIvasDec->hVoIP; +#ifndef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + timeScalingDone = 0; +#endif timeScalingDone = 0; nOutChannels = (uint8_t) st_ivas->hDecoderConfig->nchan_out; nSamplesRendered = 0; @@ -2150,7 +2183,11 @@ ivas_error IVAS_DEC_VoIP_GetSamples( } } +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + extBufferedSamples = nSamplesBuffered; +#else extBufferedSamples = nSamplesRendered + nSamplesBuffered; +#endif extBufferedTime_ms = extBufferedSamples * 1000 / hDecoderConfig->output_Fs; dataUnit = NULL; @@ -2162,17 +2199,30 @@ ivas_error IVAS_DEC_VoIP_GetSamples( } maxScaling = maxScaling * hDecoderConfig->output_Fs / 1000; +#ifdef DEBUG_MODE_JBM + dbgwrite( &extBufferedSamples, sizeof( uint16_t ), 1, 1, "./res/JBM_extBufferedSamples.dat" ); + dbgwrite( &systemTimestamp_ms, sizeof( uint32_t ), 1, 1, "./res/JBM_systemTimestamp.dat" ); + dbgwrite( &scale, sizeof( uint32_t ), 1, 1, "./res/JBM_scale.dat" ); + dbgwrite( &maxScaling, sizeof( uint32_t ), 1, 1, "./res/JBM_maxScale.dat" ); +#endif + /* avoid time scaling multiple times in one sound card slot */ if ( scale != 100U ) { +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + if ( hIvasDec->timeScalingDone ) +#else if ( timeScalingDone ) +#endif { scale = 100; } +#ifndef NONBE_FIX_864_JBM_RENDER_FRAMESIZE else { timeScalingDone = 1; } +#endif } /* limit scale to range supported by time scaler */ @@ -2257,6 +2307,9 @@ ivas_error IVAS_DEC_VoIP_GetSamples( nSamplesRendered += nSamplesToZero; hIvasDec->nSamplesRendered += nSamplesToZero; hIvasDec->nSamplesAvailableNext -= nSamplesToZero; +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + update_voip_rendered20ms( hIvasDec, nSamplesToZero ); +#endif } else { @@ -2271,12 +2324,38 @@ ivas_error IVAS_DEC_VoIP_GetSamples( } nSamplesRendered += nSamplesRendered_loop; +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + update_voip_rendered20ms( hIvasDec, nSamplesRendered_loop ); +#endif } } return IVAS_ERR_OK; } +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + +/*---------------------------------------------------------------------* + * update_voip_rendered20ms( ) + * + * Function to flush remaining audio in VoIP + *---------------------------------------------------------------------*/ + +static void update_voip_rendered20ms( + IVAS_DEC_HANDLE hIvasDec, + const int16_t nSamplesRendered ) +{ + int16_t nSamplesRenderedTotal; + nSamplesRenderedTotal = hIvasDec->hVoIP->nSamplesRendered20ms + nSamplesRendered; + /* we have crossed a 20ms border, reset the time scaling done flag */ + if ( nSamplesRenderedTotal >= hIvasDec->hVoIP->nSamplesFrame ) + { + hIvasDec->timeScalingDone = 0; + } + hIvasDec->hVoIP->nSamplesRendered20ms = nSamplesRenderedTotal % hIvasDec->hVoIP->nSamplesFrame; +} + +#endif /*---------------------------------------------------------------------* * IVAS_DEC_VoIP_Flush( ) @@ -2300,7 +2379,19 @@ ivas_error IVAS_DEC_Flush( nSamplesToRender = (uint16_t) *nSamplesFlushed; /* render IVAS frames */ - error = IVAS_DEC_GetRenderedSamples( hIvasDec, nSamplesToRender, &nSamplesFlushedLocal, &hIvasDec->nSamplesAvailableNext, pcmBuf ); +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + error = IVAS_ERR_OK; + if ( nSamplesToRender > 0 && hIvasDec->st_ivas->ivas_format != MONO_FORMAT ) + { +#endif + error = IVAS_DEC_GetRenderedSamples( hIvasDec, nSamplesToRender, &nSamplesFlushedLocal, &hIvasDec->nSamplesAvailableNext, pcmBuf ); +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + } + else + { + *nSamplesFlushed = 0; + } +#endif return error; } -- GitLab From 572c9813c9a2f94351989fe1e8a938bb965c868e Mon Sep 17 00:00:00 2001 From: Kacper Sagnowski Date: Thu, 5 Jun 2025 10:00:09 +0200 Subject: [PATCH 2/3] Remove duplicate variable --- lib_dec/lib_dec.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib_dec/lib_dec.c b/lib_dec/lib_dec.c index e8d51ca01..3a8253448 100644 --- a/lib_dec/lib_dec.c +++ b/lib_dec/lib_dec.c @@ -2147,7 +2147,6 @@ ivas_error IVAS_DEC_VoIP_GetSamples( #ifndef NONBE_FIX_864_JBM_RENDER_FRAMESIZE int16_t timeScalingDone; #endif - int16_t timeScalingDone; int16_t result; ivas_error error; int16_t nSamplesRendered; @@ -2159,7 +2158,6 @@ ivas_error IVAS_DEC_VoIP_GetSamples( #ifndef NONBE_FIX_864_JBM_RENDER_FRAMESIZE timeScalingDone = 0; #endif - timeScalingDone = 0; nOutChannels = (uint8_t) st_ivas->hDecoderConfig->nchan_out; nSamplesRendered = 0; -- GitLab From 89843a5f1d4498ab00b138bbf55f1cc4b047c706 Mon Sep 17 00:00:00 2001 From: Kacper Sagnowski Date: Thu, 5 Jun 2025 10:00:31 +0200 Subject: [PATCH 3/3] Whitespace changes to minimise diff wrt original changes --- lib_dec/lib_dec.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lib_dec/lib_dec.c b/lib_dec/lib_dec.c index 3a8253448..64f1baa80 100644 --- a/lib_dec/lib_dec.c +++ b/lib_dec/lib_dec.c @@ -80,10 +80,10 @@ struct IVAS_DEC int16_t bitstreamformat; /* Bitstream format flag (G.192/MIME/VOIP_G192_RTP/VOIP_RTPDUMP) */ bool Opt_VOIP; /* flag indicating VOIP mode with JBM */ int16_t tsm_scale; /* scale for TSM operation */ + int16_t tsm_max_scaling; #ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE int16_t timeScalingDone; /* have we done already one TSM in a 20ms frame? */ #endif - int16_t tsm_max_scaling; float tsm_quality; float *apaExecBuffer; /* Buffer for APA scaling */ PCMDSP_APA_HANDLE hTimeScaler; @@ -122,7 +122,6 @@ static int16_t get_render_frame_size_ms( IVAS_RENDER_FRAMESIZE render_framesize static void update_voip_rendered20ms( IVAS_DEC_HANDLE hIvasDec, const int16_t nSamplesRendered ); #endif - /*---------------------------------------------------------------------* * IVAS_DEC_Open() * @@ -910,19 +909,19 @@ ivas_error IVAS_DEC_GetSamples( { nSamplesTcsScaled = hIvasDec->nSamplesFrame; } + #ifdef DEBUG_MODE_JBM dbgwrite( &nTimeScalerOutSamples, sizeof( uint16_t ), 1, 1, "./res/JBM_nTimeScaleOutSamples.dat" ); #endif - /* Feed decoded transport channels samples to the renderer */ if ( ( error = IVAS_DEC_RendererFeedTcSamples( hIvasDec, nSamplesTcsScaled, &nResidualSamples, hIvasDec->apaExecBuffer ) ) != IVAS_ERR_OK ) { return error; } + #ifdef DEBUG_MODE_JBM dbgwrite( &nResidualSamples, sizeof( int16_t ), 1, 1, "./res/JBM_nResidualSamples.dat" ); #endif - if ( hIvasDec->st_ivas->hDecoderConfig->Opt_tsm ) { /* feed residual samples to TSM for the next call */ @@ -2189,6 +2188,7 @@ ivas_error IVAS_DEC_VoIP_GetSamples( extBufferedTime_ms = extBufferedSamples * 1000 / hDecoderConfig->output_Fs; dataUnit = NULL; + /* pop one access unit from the jitter buffer */ result = JB4_PopDataUnit( hVoIP->hJBM, systemTimestamp_ms, extBufferedTime_ms, &dataUnit, &scale, &maxScaling ); if ( result != 0 ) @@ -2332,7 +2332,6 @@ ivas_error IVAS_DEC_VoIP_GetSamples( } #ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE - /*---------------------------------------------------------------------* * update_voip_rendered20ms( ) * @@ -2390,7 +2389,6 @@ ivas_error IVAS_DEC_Flush( *nSamplesFlushed = 0; } #endif - return error; } -- GitLab