diff --git a/apps/decoder.c b/apps/decoder.c index b54ad4b92e4eff6053d767b70425a6c701403bfb..5525c46a0b5a35accf639bce03899991997a3fb4 100644 --- a/apps/decoder.c +++ b/apps/decoder.c @@ -2905,7 +2905,14 @@ static ivas_error decodeVoIP( } vec_pos_update = ( vec_pos_update + 1 ) % vec_pos_len; frame++; +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + if ( vec_pos_update == 0 ) + { + systemTime_ms += vec_pos_len * systemTimeInc_ms; + } +#else systemTime_ms += systemTimeInc_ms; +#endif #ifdef WMOPS update_mem(); @@ -2913,6 +2920,94 @@ static ivas_error decodeVoIP( #endif } + +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + int16_t nSamplesFlushed = 0; + + /* decode and get samples */ +#ifdef SPLIT_REND_WITH_HEAD_ROT + if ( ( error = IVAS_DEC_Flush( hIvasDec, nOutSamples, IVAS_DEC_PCM_INT16, (void *) pcmBuf, &nSamplesFlushed ) ) != IVAS_ERR_OK ) +#else + if ( ( error = IVAS_DEC_Flush( hIvasDec, nOutSamples, pcmBuf, &nSamplesFlushed ) ) != IVAS_ERR_OK ) +#endif + { + fprintf( stderr, "\nError in IVAS_DEC_VoIP_Flush: %s\n", IVAS_DEC_GetErrorMessage( error ) ); + goto cleanup; + } + + if ( nSamplesFlushed ) + { + /* Write current frame */ + if ( ( error = AudioFileWriter_write( afWriter, pcmBuf, nSamplesFlushed * nOutChannels ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "\nOutput audio file writer error\n" ); + goto cleanup; + } + + /* Write ISm metadata to external file(s) */ + if ( decodedGoodFrame && arg.outputConfig == IVAS_AUDIO_CONFIG_EXTERNAL ) + { + if ( bsFormat == IVAS_DEC_BS_OBJ || bsFormat == IVAS_DEC_BS_MASA_ISM || bsFormat == IVAS_DEC_BS_SBA_ISM ) + { + if ( ( error = IVAS_DEC_GetNumObjects( hIvasDec, &numObj ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "\nError in IVAS_DEC_GetNumObjects: %s\n", IVAS_DEC_GetErrorMessage( error ) ); + goto cleanup; + } + + for ( i = 0; i < numObj; ++i ) + { + IVAS_ISM_METADATA IsmMetadata; + + if ( ( error = IVAS_DEC_GetObjectMetadata( hIvasDec, &IsmMetadata, 0, i ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "\nError in IVAS_DEC_GetObjectMetadata: %s\n", IVAS_DEC_GetErrorMessage( error ) ); + goto cleanup; + } + + if ( ( IsmFileWriter_writeFrame( IsmMetadata, ismWriters[i] ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "\nError writing ISM metadata to file %s\n", IsmFileWriter_getFilePath( ismWriters[i] ) ); + goto cleanup; + } + } + } + + if ( bsFormat == IVAS_DEC_BS_MASA || bsFormat == IVAS_DEC_BS_MASA_ISM ) + { + IVAS_MASA_DECODER_EXT_OUT_META_HANDLE hMasaExtOutMeta; +#ifdef NONBE_FIX_984_OMASA_EXT_OUTPUT + int16_t fullDelayNumSamples[3]; + float delayMs; + + /* delayNumSamples is zeroed, and delayNumSamples_orig is updated only on first good frame, so need to re-fetch delay info */ + if ( ( error = IVAS_DEC_GetDelay( hIvasDec, fullDelayNumSamples, &delayTimeScale ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "\nUnable to get delay of decoder: %s\n", ivas_error_to_string( error ) ); + } +#endif + if ( ( error = IVAS_DEC_GetMasaMetadata( hIvasDec, &hMasaExtOutMeta, 0 ) ) != IVAS_ERR_OK ) + { + fprintf( stderr, "\nError in IVAS_DEC_GetMasaMetadata: %s\n", IVAS_DEC_GetErrorMessage( error ) ); + goto cleanup; + } + +#ifdef NONBE_FIX_984_OMASA_EXT_OUTPUT + delayMs = (float) ( fullDelayNumSamples[0] ) / (float) ( delayTimeScale ); + if ( ( error = MasaFileWriter_writeFrame( masaWriter, hMasaExtOutMeta, &delayMs ) ) != IVAS_ERR_OK ) +#else + if ( ( error = MasaFileWriter_writeFrame( masaWriter, hMasaExtOutMeta ) ) != IVAS_ERR_OK ) +#endif + { + fprintf( stderr, "\nError writing MASA metadata to file: %s\n", MasaFileWriter_getFilePath( masaWriter ) ); + goto cleanup; + } + } + } + } +#endif + + /*------------------------------------------------------------------------------------------* * Add zeros at the end to have equal length of synthesized signals *------------------------------------------------------------------------------------------*/ diff --git a/lib_com/options.h b/lib_com/options.h index 16880c701277224351efb1904b4368cf5a671066..9b03463a181acdc36ccd7594187aac3a70b3d32e 100644 --- a/lib_com/options.h +++ b/lib_com/options.h @@ -143,6 +143,8 @@ /* ################### Start FIXES switches ########################### */ +#define NONBE_FIX_864_JBM_RENDER_FRAMESIZE /* FhG: issue #864: fix different behaviour of JBM TSM with different render frame sizes */ + /* #################### End FIXES switches ############################ */ diff --git a/lib_dec/lib_dec.c b/lib_dec/lib_dec.c index d7f3e5f6ec9f26db9b0372565edbddb3040eddc5..64f1baa800c737f585fca733c1cbb65a6794e782 100644 --- a/lib_dec/lib_dec.c +++ b/lib_dec/lib_dec.c @@ -57,6 +57,9 @@ struct IVAS_DEC_VOIP uint16_t lastDecodedWasActive; JB4_DATAUNIT_HANDLE hCurrentDataUnit; /* Points to the currently processed data unit */ uint16_t *bs_conversion_buf; /* Buffer for bitstream conversion from packed to serial */ +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + int16_t nSamplesRendered20ms; /* how many samples have been rendered since the last 20ms render border*/ +#endif #ifdef SUPPORT_JBM_TRACEFILE IVAS_JBM_TRACE_DATA JbmTraceData; #endif @@ -78,6 +81,9 @@ struct IVAS_DEC bool Opt_VOIP; /* flag indicating VOIP mode with JBM */ int16_t tsm_scale; /* scale for TSM operation */ int16_t tsm_max_scaling; +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + int16_t timeScalingDone; /* have we done already one TSM in a 20ms frame? */ +#endif float tsm_quality; float *apaExecBuffer; /* Buffer for APA scaling */ PCMDSP_APA_HANDLE hTimeScaler; @@ -112,7 +118,9 @@ static ivas_error IVAS_DEC_RendererFeedTcSamples( IVAS_DEC_HANDLE hIvasDec, cons static ivas_error IVAS_DEC_GetRenderedSamples( IVAS_DEC_HANDLE hIvasDec, const uint16_t nSamplesForRendering, uint16_t *nSamplesRendered, uint16_t *nSamplesAvailableNext, int16_t *pcmBuf ); static ivas_error IVAS_DEC_GetBufferedNumberOfSamples( IVAS_DEC_HANDLE hIvasDec, int16_t *nSamplesBuffered ); static int16_t get_render_frame_size_ms( IVAS_RENDER_FRAMESIZE render_framesize ); - +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE +static void update_voip_rendered20ms( IVAS_DEC_HANDLE hIvasDec, const int16_t nSamplesRendered ); +#endif /*---------------------------------------------------------------------* * IVAS_DEC_Open() @@ -149,6 +157,9 @@ ivas_error IVAS_DEC_Open( hIvasDec->tsm_scale = 100; hIvasDec->tsm_max_scaling = 0; hIvasDec->tsm_quality = 1.0f; +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + hIvasDec->timeScalingDone = 0; +#endif hIvasDec->needNewFrame = false; hIvasDec->nTransportChannelsOld = 0; hIvasDec->nSamplesAvailableNext = 0; @@ -651,6 +662,9 @@ ivas_error IVAS_DEC_EnableVoIP( hIvasDec->hVoIP->lastDecodedWasActive = 0; hIvasDec->hVoIP->hCurrentDataUnit = NULL; hIvasDec->hVoIP->nSamplesFrame = (uint16_t) ( hDecoderConfig->output_Fs / FRAMES_PER_SEC ); +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + hIvasDec->hVoIP->nSamplesRendered20ms = 0; +#endif #define WMC_TOOL_SKIP /* Bitstream conversion is not counted towards complexity and memory usage */ @@ -887,18 +901,27 @@ ivas_error IVAS_DEC_GetSamples( assert( nTimeScalerOutSamples <= APA_BUF ); nSamplesTcsScaled = nTimeScalerOutSamples / nTransportChannels; +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + hIvasDec->timeScalingDone = 1; +#endif } else { nSamplesTcsScaled = hIvasDec->nSamplesFrame; } +#ifdef DEBUG_MODE_JBM + dbgwrite( &nTimeScalerOutSamples, sizeof( uint16_t ), 1, 1, "./res/JBM_nTimeScaleOutSamples.dat" ); +#endif /* Feed decoded transport channels samples to the renderer */ if ( ( error = IVAS_DEC_RendererFeedTcSamples( hIvasDec, nSamplesTcsScaled, &nResidualSamples, hIvasDec->apaExecBuffer ) ) != IVAS_ERR_OK ) { return error; } +#ifdef DEBUG_MODE_JBM + dbgwrite( &nResidualSamples, sizeof( int16_t ), 1, 1, "./res/JBM_nResidualSamples.dat" ); +#endif if ( hIvasDec->st_ivas->hDecoderConfig->Opt_tsm ) { /* feed residual samples to TSM for the next call */ @@ -1131,6 +1154,9 @@ static ivas_error IVAS_DEC_GetBufferedNumberOfSamples( if ( hIvasDec->st_ivas->hTcBuffer != NULL ) { *nSamplesBuffered = hIvasDec->st_ivas->hTcBuffer->n_samples_buffered - hIvasDec->st_ivas->hTcBuffer->n_samples_rendered; +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + *nSamplesBuffered += hIvasDec->hVoIP->nSamplesRendered20ms; +#endif } return IVAS_ERR_OK; @@ -2117,7 +2143,9 @@ ivas_error IVAS_DEC_VoIP_GetSamples( uint32_t extBufferedTime_ms, scale, maxScaling; JB4_DATAUNIT_HANDLE dataUnit; uint16_t extBufferedSamples; +#ifndef NONBE_FIX_864_JBM_RENDER_FRAMESIZE int16_t timeScalingDone; +#endif int16_t result; ivas_error error; int16_t nSamplesRendered; @@ -2126,7 +2154,9 @@ ivas_error IVAS_DEC_VoIP_GetSamples( st_ivas = hIvasDec->st_ivas; hDecoderConfig = st_ivas->hDecoderConfig; hVoIP = hIvasDec->hVoIP; +#ifndef NONBE_FIX_864_JBM_RENDER_FRAMESIZE timeScalingDone = 0; +#endif nOutChannels = (uint8_t) st_ivas->hDecoderConfig->nchan_out; nSamplesRendered = 0; @@ -2150,10 +2180,15 @@ ivas_error IVAS_DEC_VoIP_GetSamples( } } +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + extBufferedSamples = nSamplesBuffered; +#else extBufferedSamples = nSamplesRendered + nSamplesBuffered; +#endif extBufferedTime_ms = extBufferedSamples * 1000 / hDecoderConfig->output_Fs; dataUnit = NULL; + /* pop one access unit from the jitter buffer */ result = JB4_PopDataUnit( hVoIP->hJBM, systemTimestamp_ms, extBufferedTime_ms, &dataUnit, &scale, &maxScaling ); if ( result != 0 ) @@ -2162,17 +2197,30 @@ ivas_error IVAS_DEC_VoIP_GetSamples( } maxScaling = maxScaling * hDecoderConfig->output_Fs / 1000; +#ifdef DEBUG_MODE_JBM + dbgwrite( &extBufferedSamples, sizeof( uint16_t ), 1, 1, "./res/JBM_extBufferedSamples.dat" ); + dbgwrite( &systemTimestamp_ms, sizeof( uint32_t ), 1, 1, "./res/JBM_systemTimestamp.dat" ); + dbgwrite( &scale, sizeof( uint32_t ), 1, 1, "./res/JBM_scale.dat" ); + dbgwrite( &maxScaling, sizeof( uint32_t ), 1, 1, "./res/JBM_maxScale.dat" ); +#endif + /* avoid time scaling multiple times in one sound card slot */ if ( scale != 100U ) { +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + if ( hIvasDec->timeScalingDone ) +#else if ( timeScalingDone ) +#endif { scale = 100; } +#ifndef NONBE_FIX_864_JBM_RENDER_FRAMESIZE else { timeScalingDone = 1; } +#endif } /* limit scale to range supported by time scaler */ @@ -2257,6 +2305,9 @@ ivas_error IVAS_DEC_VoIP_GetSamples( nSamplesRendered += nSamplesToZero; hIvasDec->nSamplesRendered += nSamplesToZero; hIvasDec->nSamplesAvailableNext -= nSamplesToZero; +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + update_voip_rendered20ms( hIvasDec, nSamplesToZero ); +#endif } else { @@ -2271,12 +2322,37 @@ ivas_error IVAS_DEC_VoIP_GetSamples( } nSamplesRendered += nSamplesRendered_loop; +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + update_voip_rendered20ms( hIvasDec, nSamplesRendered_loop ); +#endif } } return IVAS_ERR_OK; } +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE +/*---------------------------------------------------------------------* + * update_voip_rendered20ms( ) + * + * Function to flush remaining audio in VoIP + *---------------------------------------------------------------------*/ + +static void update_voip_rendered20ms( + IVAS_DEC_HANDLE hIvasDec, + const int16_t nSamplesRendered ) +{ + int16_t nSamplesRenderedTotal; + nSamplesRenderedTotal = hIvasDec->hVoIP->nSamplesRendered20ms + nSamplesRendered; + /* we have crossed a 20ms border, reset the time scaling done flag */ + if ( nSamplesRenderedTotal >= hIvasDec->hVoIP->nSamplesFrame ) + { + hIvasDec->timeScalingDone = 0; + } + hIvasDec->hVoIP->nSamplesRendered20ms = nSamplesRenderedTotal % hIvasDec->hVoIP->nSamplesFrame; +} + +#endif /*---------------------------------------------------------------------* * IVAS_DEC_VoIP_Flush( ) @@ -2300,8 +2376,19 @@ ivas_error IVAS_DEC_Flush( nSamplesToRender = (uint16_t) *nSamplesFlushed; /* render IVAS frames */ - error = IVAS_DEC_GetRenderedSamples( hIvasDec, nSamplesToRender, &nSamplesFlushedLocal, &hIvasDec->nSamplesAvailableNext, pcmBuf ); - +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + error = IVAS_ERR_OK; + if ( nSamplesToRender > 0 && hIvasDec->st_ivas->ivas_format != MONO_FORMAT ) + { +#endif + error = IVAS_DEC_GetRenderedSamples( hIvasDec, nSamplesToRender, &nSamplesFlushedLocal, &hIvasDec->nSamplesAvailableNext, pcmBuf ); +#ifdef NONBE_FIX_864_JBM_RENDER_FRAMESIZE + } + else + { + *nSamplesFlushed = 0; + } +#endif return error; }