Loading apps/encoder.c +16 −7 Original line number Diff line number Diff line Loading @@ -1640,8 +1640,18 @@ static void usage_enc( void ) fprintf( stdout, " *VBR mode (average bitrate),\n" ); fprintf( stdout, " for AMR-WB IO modes R = (6600, 8850, 12650, 14250, 15850, 18250,\n" ); fprintf( stdout, " 19850, 23050, 23850) \n" ); #ifdef ISM_HIGHEST_BITRATE fprintf( stdout, " for IVAS stereo R = (13200, 16400, 24400, 32000, 48000, 64000, 80000, \n" ); fprintf( stdout, " 96000, 128000, 160000, 192000, 256000) \n" ); fprintf( stdout, " for IVAS ISM R = 13200 for 1 ISM, 16400 for 1 ISM and 2 ISM, \n" ); fprintf( stdout, " (24400, 32000, 48000, 64000, 80000, 96000, 128000) \n" ); fprintf( stdout, " for 2 ISM, 3 ISM and 4 ISM also 160000, 192000, 256000) \n" ); fprintf( stdout, " for 3 ISM and 4 ISM also 384000 \n" ); fprintf( stdout, " for 4 ISM also 512000 \n" ); #else fprintf( stdout, " for IVAS stereo & ISm R =(13200, 16400, 24400, 32000, 48000, 64000, 80000, \n" ); fprintf( stdout, " 96000, 128000, 160000, 192000, 256000) \n" ); #endif fprintf( stdout, " for IVAS SBA, MASA, MC R=(13200, 16400, 24400, 32000, 48000, 64000, 80000, \n" ); fprintf( stdout, " 96000, 128000, 160000, 192000, 256000, 384000, 512000) \n" ); fprintf( stdout, " Alternatively, R can be a bitrate switching file which consists of R values\n" ); Loading @@ -1657,16 +1667,16 @@ static void usage_enc( void ) fprintf( stdout, "EVS mono is default, for IVAS choose one of the following: -stereo, -ism, -sba, -masa, -mc\n" ); fprintf( stdout, "-stereo [Mode] : Stereo format, default is unified stereo \n" ); fprintf( stdout, " optional for Mode: 1: DFT Stereo, 2: TD Stereo, 3: MDCT Stereo\n" ); fprintf( stdout, "-ism Channels Files : ISm format \n" ); fprintf( stdout, " where Channels specifies the number of ISms (1-4)\n" ); fprintf( stdout, "-ism Channels Files : ISM format \n" ); fprintf( stdout, " where Channels specifies the number of ISMs (1-4)\n" ); fprintf( stdout, " and Files specify input files containing metadata, one file per object\n" ); fprintf( stdout, " (use NULL for no input metadata)\n" ); fprintf( stdout, "-sba +/-Order : Scene Based Audio input format (Ambisonics ACN/SN3D),\n" ); fprintf( stdout, " where Order specifies the Ambisionics order (1-3),\n" ); fprintf( stdout, " where positive (+) means full 3D and negative (-) only 2D/planar components to be coded\n" ); fprintf( stdout, "-masa Ch File : MASA format \n" ); fprintf( stdout, " where Ch specifies the number of input/transport channels (1 or 2): \n" ); fprintf( stdout, " and File specifies input file containing parametric metadata \n" ); fprintf( stdout, "-masa Channels File : MASA format \n" ); fprintf( stdout, " where Channels specifies the number of input/transport channels (1 or 2): \n" ); fprintf( stdout, " and File specifies input file containing parametric MASA metadata \n" ); fprintf( stdout, "-mc InputConf : Multi-channel format\n" ); fprintf( stdout, " where InputConf specifies the channel configuration: 5_1, 7_1, 5_1_2, 5_1_4, 7_1_4\n" ); fprintf( stdout, " Loudspeaker positions are assumed to have azimuth and elevation as per \n" ); Loading @@ -1676,8 +1686,7 @@ static void usage_enc( void ) fprintf( stdout, " where 0 = adaptive, 3-100 = fixed in number of frames,\n" ); fprintf( stdout, " default is deactivated\n" ); fprintf( stdout, "-dtx : Activate DTX mode with a SID update rate of 8 frames\n" ); fprintf( stdout, " Note: DTX is currently supported in EVS, stereo, 1 ISm, \n" ); fprintf( stdout, " SBA (up to 128kbps) and MASA (up to 128kbps)\n" ); fprintf( stdout, " Note: DTX is supported in EVS, stereo, ISM, SBA up to 80kbps and MASA up to 128kbps \n" ); fprintf( stdout, "-rf p o : Activate channel-aware mode for WB and SWB signal at 13.2kbps, \n" ); fprintf( stdout, " where FEC indicator, p: LO or HI, and FEC offset, o: 2, 3, 5, or 7 in number of frames.\n" ); fprintf( stdout, " Alternatively p and o can be replaced by a rf configuration file with each line \n" ); Loading lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -163,6 +163,7 @@ #define BINAURALIZATION_DELAY_REPORT /* VA: Issue 255 - Changes the way the decoder delay is reported */ #define FIX_351_HRTF_COMMAND /* VA: Issue 354 - improve "-hrtf" command-line option */ #define FIX_94_VERIFY_WAV_NUM_CHANNELS /* FhG: Issue 94 - Check if number of channels in input wav file matches encoder/renderer configuration */ #define ISM_HIGHEST_BITRATE /* VA: Issue 284: Update highest bitrate limit in ISM format */ #define TUNE_360_OBJECT_WITH_NOISE /* VA: issue 360: consider objects being speech+noise for active speech coding */ Loading lib_enc/lib_enc.c +20 −3 Original line number Diff line number Diff line Loading @@ -1985,24 +1985,41 @@ static ivas_error sanitizeBandwidth( static ivas_error sanitizeBitrateISM( const ENCODER_CONFIG_HANDLE hEncoderConfig ) { #ifdef ISM_HIGHEST_BITRATE if ( hEncoderConfig->ivas_total_brate > IVAS_128k && hEncoderConfig->nchan_inp == 1 ) { return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too high bitrate for 1 ISM specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); } if ( hEncoderConfig->ivas_total_brate > IVAS_256k && hEncoderConfig->nchan_inp == 2 ) { return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too high bitrate for 2 ISM specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); } if ( hEncoderConfig->ivas_total_brate > IVAS_384k && hEncoderConfig->nchan_inp == 3 ) { return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too high bitrate for 3 ISM specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); } #else if ( hEncoderConfig->ivas_total_brate > IVAS_256k ) { return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too high bitrate for ISm specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); } #endif if ( hEncoderConfig->ivas_total_brate < IVAS_16k4 && hEncoderConfig->nchan_inp == 2 ) { return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too low bitrate for 2 ISm specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too low bitrate for 2 ISM specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); } if ( hEncoderConfig->ivas_total_brate < IVAS_24k4 && hEncoderConfig->nchan_inp == 3 ) { return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too low bitrate for 3 ISm specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too low bitrate for 3 ISM specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); } if ( hEncoderConfig->ivas_total_brate < IVAS_24k4 && hEncoderConfig->nchan_inp == 4 ) { return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too low bitrate for 4 ISm specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too low bitrate for 4 ISM specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); } return IVAS_ERR_OK; Loading readme.txt +74 −53 Original line number Diff line number Diff line Loading @@ -160,8 +160,13 @@ R : Bitrate in bps, *VBR mode (average bitrate), for AMR-WB IO modes R = (6600, 8850, 12650, 14250, 15850, 18250, 19850, 23050, 23850) for IVAS stereo & ISm R =(13200, 16400, 24400, 32000, 48000, 64000, 80000, for IVAS stereo R = (13200, 16400, 24400, 32000, 48000, 64000, 80000, 96000, 128000, 160000, 192000, 256000) for IVAS ISM R = 13200 for 1 ISM, 16400 for 1 ISM and 2 ISM, (24400, 32000, 48000, 64000, 80000, 96000, 128000) for 2 ISM, 3 ISM and 4 ISM also 160000, 192000, 256000 for 3 ISM and 4 ISM also 384000 for 4 ISM also 512000 for IVAS SBA, MASA, MC R=(13200, 16400, 24400, 32000, 48000, 64000, 80000, 96000, 128000, 160000, 192000, 256000, 384000, 512000) Alternatively, R can be a bitrate switching file which consists of R values Loading @@ -176,16 +181,16 @@ Options: EVS mono is default, for IVAS choose one of the following: -stereo, -ism, -sba, -masa, -mc -stereo [Mode] : Stereo format, default is unified stereo optional for Mode: 1: DFT Stereo, 2: TD Stereo, 3: MDCT Stereo -ism Channels Files : ISm format where Channels specifies the number of ISms (1-4) -ism Channels Files : ISM format where Channels specifies the number of ISMs (1-4) and Files specify input files containing metadata, one file per object (use NULL for no input metadata) -sba +/-Order : Scene Based Audio input format (Ambisonics ACN/SN3D), where Order specifies the Ambisionics order (1-3), where positive (+) means full 3D and negative (-) only 2D/planar components to be coded -masa Ch File : MASA format where Ch specifies the number of input/transport channels (1 or 2): and File specifies input file containing parametric metadata -masa Channels File : MASA format where Channels specifies the number of input/transport channels (1 or 2): and File specifies input file containing parametric MASA metadata -mc InputConf : Multi-channel format where InputConf specifies the channel configuration: 5_1, 7_1, 5_1_2, 5_1_4, 7_1_4 Loudspeaker positions are assumed to have azimuth and elevation as per Loading @@ -195,8 +200,7 @@ EVS mono is default, for IVAS choose one of the following: -stereo, -ism, -sba, where 0 = adaptive, 3-100 = fixed in number of frames, default is deactivated -dtx : Activate DTX mode with a SID update rate of 8 frames Note: DTX is currently supported in EVS, stereo, 1 ISm, SBA (up to 128kbps) and MASA (up to 128kbps) Note: DTX is supported in EVS, stereo, ISM, SBA up to 80kbps and MASA up to 128kbps -rf p o : Activate channel-aware mode for WB and SWB signal at 13.2kbps, where FEC indicator, p: LO or HI, and FEC offset, o: 2, 3, 5, or 7 in number of frames. Alternatively p and o can be replaced by a rf configuration file with each line Loading Loading @@ -292,6 +296,7 @@ The output channel ordering is 0, 1, ... N-1. The third row contains an index "L specifying the output channel to which the LFE input will be routed if present. If the third row is omitted, the LFE input is downmixed to all channels with a factor of 1/N. Position is not considered for the LFE channel. An example custom loudspeaker layout file is available: ls_setup_16ch_8+4+4.txt Loading @@ -306,55 +311,56 @@ points or complete coverage. Documentation on the self_test.py can be found as a part of scripts/README.md. Note: Running the self_test.py requires the input vectors in the folder scripts/testv. The audio files could unfortunately not be shared, and they need to be replaced in order to run the self_test.py. To complement the test vector set, please replace the empty *.pcm-files in the self_test folder with 16 bit PCM files following the specification below. stv1ISM48s.pcm - 1 channel (1 audio object), 48000 Hz, 1440000 samples stv2ISM48s.pcm - 2 channels (discrete audio objects), 48000 Hz, 1440000 samples per channel stv2OA32c.pcm - 9 channels (2nd order Ambisonics ACN/SN3D), 32000 Hz stv2OA48c.pcm - 9 channels (2nd order Ambisonics ACN/SN3D), 48000 Hz stv3ISM48s.pcm - 3 channels (discrete audio objects), 48000 Hz, 1440000 samples per channel stv3OA32c.pcm - 16 channels (3rd order Ambisonics ACN/SN3D), 32000 Hz, 288939 samples per channel stv3OA48c.pcm - 16 channels (3rd order Ambisonics ACN/SN3D), 48000 Hz, 433408 samples per channel stv4ISM48s.pcm - 4 channel (discrete audio objects), 48000 Hz, 1440000 samples per channel stv8c.pcm - 1 channel, 8000 Hz, clean speech/audio stv8n.pcm - 1 channel, 8000 Hz, noisy speech stv16c.pcm - 1 channel, 16000 Hz, 610307 samples, clean speech stv16n.pcm - 1 channel, 16000 Hz, 257024 samples, noisy speech stv32c.pcm - 1 channel, 32000 Hz, 1220613 samples, clean speech/audio stv32n.pcm - 1 channel, 32000 Hz, 514048 samples, noisy speech stv48c.pcm - 1 channel, 48000 Hz, 1830919 samples, clean speech/audio stv51MC48c.pcm - 6 channels (5.1 1..6 where 4th channel is LFE), 3231233 samples per channel, 48000 Hz, movie excerpt stv512MC48c.pcm - 8 channels (5.1+2 1..8 where 4th channel is LFE), 144000 samples per channel, 48000 Hz, movie excerpt stv714MC48c.pcm - 12 channels (7.1+4 1..12 where 4th channel is LFE), 144000 samples per channel, 48000 Hz, movie excerpt stvFOA16c.pcm - 4 channels (1st order Ambisonics ACN/SN3D), 16000 Hz, stvFOA32c.pcm - 4 channels (1st order Ambisonics ACN/SN3D), 32000 Hz, 288939 samples per channel stvFOA48c.pcm - 4 channels (1st order Ambisonics ACN/SN3D), 48000 Hz, 433408 samples per channel stvST16c.pcm - 2 channels, 16000 Hz, 329601 samples per channel, clean speech/audio stvST16n.pcm - 2 channels, 16000 Hz, 310401 samples per channel, noisy speech stvST32c.pcm - 2 channels, 32000 Hz, 659200 samples per channel, clean speech/audio stvST32n.pcm - 2 channels, 32000 Hz, 620800 samples per channel, noisy speech stvST48c.pcm - 2 channels, 48000 Hz, 988800 samples per channel, clean speech/audio stvST48n.pcm - 2 channels, 48000 Hz, 931200 samples per channel, noisy speech stv_IVASMASA_1dir1TC.pcm - 1 channel (1 MASA transport channel), 48000 Hz, 48000 Hz, 144000 samples stv_IVASMASA_1dir1TC_DTX.pcm - 1 channel (1 MASA transport channel), 48000 Hz, 48000 Hz, 963840 samples stv_IVASMASA_1dir2TC.pcm - 2 channels (2 MASA transport channel), 48000 Hz, 48000 Hz, 288000 samples per channel stv_IVASMASA_1dir2TC_DTX.pcm - 2 channels (2 MASA transport channel), 48000 Hz, 48000 Hz, 963840 samples per channel stv_IVASMASA_2dir1TC.pcm - 1 channel (1 MASA transport channel), 48000 Hz, 48000 Hz, 288000 stv_IVASMASA_2dir2TC.pcm - 2 channels (2 MASA transport channel), 48000 Hz, 48000 Hz, 144000 samples per channel Note: Running the self_test.py requires the input vectors in the folder scripts/testv. stv1ISM48s.wav - 1 channel (1 audio object), 48000 Hz, 1440000 samples stv2ISM48s.wav - 2 channels (discrete audio objects), 48000 Hz, 1440000 samples per channel stv2OA32c.wav - 9 channels (2nd order Ambisonics ACN/SN3D), 32000 Hz stv2OA48c.wav - 9 channels (2nd order Ambisonics ACN/SN3D), 48000 Hz stv3ISM48s.wav - 3 channels (discrete audio objects), 48000 Hz, 1440000 samples per channel stv3OA32c.wav - 16 channels (3rd order Ambisonics ACN/SN3D), 32000 Hz, 288939 samples per channel stv3OA48c.wav - 16 channels (3rd order Ambisonics ACN/SN3D), 48000 Hz, 433408 samples per channel stv4ISM48s.wav - 4 channel (discrete audio objects), 48000 Hz, 1440000 samples per channel stv4ISM48n.wav - 4 channel (discrete audio objects), 48000 Hz, noisy speech stv8c.wav - 1 channel, 8000 Hz, clean speech/audio stv8n.wav - 1 channel, 8000 Hz, noisy speech stv16c.wav - 1 channel, 16000 Hz, 610307 samples, clean speech stv16n.wav - 1 channel, 16000 Hz, 257024 samples, noisy speech stv32c.wav - 1 channel, 32000 Hz, 1220613 samples, clean speech/audio stv32n.wav - 1 channel, 32000 Hz, 514048 samples, noisy speech stv48c.wav - 1 channel, 48000 Hz, 960000 samples, clean speech/audio stv48n.wav - 1 channel, 48000 Hz, 931200 samples, noisy clean speech stv51MC48c.wav - 6 channels (5.1 1..6 where 4th channel is LFE), 960000 samples per channel, 48000 Hz stv512MC48c.wav - 8 channels (5.1+2 1..8 where 4th channel is LFE), 144000 samples per channel, 48000 Hz stv514MC48c.wav - 10 channels (7.1+2 1..10 where 4th channel is LFE), 144000 samples per channel, 48000 Hz stv71MC48c.wav - 8 channels (7.1 1..8 where 4th channel is LFE), 144000 samples per channel, 48000 Hz stv714MC48c.wav - 12 channels (7.1+4 1..12 where 4th channel is LFE), 144000 samples per channel, 48000 Hz stvFOA16c.wav - 4 channels (1st order Ambisonics ACN/SN3D), 16000 Hz, stvFOA32c.wav - 4 channels (1st order Ambisonics ACN/SN3D), 32000 Hz, 288939 samples per channel stvFOA48c.wav - 4 channels (1st order Ambisonics ACN/SN3D), 48000 Hz, 433408 samples per channel stvST16c.wav - 2 channels, 16000 Hz, 329601 samples per channel, clean speech/audio stvST16n.wav - 2 channels, 16000 Hz, 310401 samples per channel, noisy speech stvST32c.wav - 2 channels, 32000 Hz, 659200 samples per channel, clean speech/audio stvST32n.wav - 2 channels, 32000 Hz, 620800 samples per channel, noisy speech stvST48c.wav - 2 channels, 48000 Hz, 988800 samples per channel, clean speech/audio stvST48n.wav - 2 channels, 48000 Hz, 931200 samples per channel, noisy speech stv1MASA1TC48c.wav - 1 channel (1 MASA transport channel), 48000 Hz, 48000 Hz, 144000 samples stv1MASA1TC48n.wav - 1 channel (1 MASA transport channel), 48000 Hz, 48000 Hz, 963840 samples stv1MASA2TC48c.wav - 2 channels (2 MASA transport channel), 48000 Hz, 48000 Hz, 288000 samples per channel stv1MASA2TC48n.wav - 2 channels (2 MASA transport channel), 48000 Hz, 48000 Hz, 963840 samples per channel stv2MASA1TC48c.wav - 1 channel (1 MASA transport channel), 48000 Hz, 48000 Hz, 288000 stv2MASA2TC48c.wav - 2 channels (2 MASA transport channel), 48000 Hz, 48000 Hz, 144000 samples per channel For the MASA operation modes, in addition the following metadata files are required: stv_IVASMASA_1dir1TC.met stv_IVASMASA_1dir1TC_DTX.met stv_IVASMASA_1dir2TC.met stv_IVASMASA_1dir2TC_DTX.met stv_IVASMASA_2dir1TC.met stv_IVASMASA_2dir2TC.met stv1MASA1TC48c.met stv1MASA1TC48n.met stv1MASA2TC48c.met stv1MASA2TC48n.met stv2MASA1TC48c.met stv2MASA2TC48c.met It is strongly recommended to align these files to the corresponding PCM audio files. The MASA metadata files can be generated with the Loading Loading @@ -389,6 +395,21 @@ with the following meaning: ----------------------------------------------------------------------------------- For the Head rotation operation modes, external trajectory files are available: headrot.csv headrot_case00_3000_q.csv headrot_case01_3000_q.csv headrot_case02_3000_q.csv headrot_case03_3000_q.csv For the Renderer configuration option operation modes, external configuration files are available: rend_config_hospital_patientroom.cfg config_recreation.cfg config_renderer.cfg ADDITIONAL SCRIPTS ================== Loading scripts/config/ivas_modes.json +21 −30 Original line number Diff line number Diff line Loading @@ -1828,10 +1828,7 @@ 64000, 80000, 96000, 128000, 160000, 192000, 256000 128000 ], "swb": [ 13200, Loading @@ -1842,10 +1839,7 @@ 64000, 80000, 96000, 128000, 160000, 192000, 256000 128000 ], "fb": [ 32000, Loading @@ -1853,10 +1847,7 @@ 64000, 80000, 96000, 128000, 160000, 192000, 256000 128000 ] } }, Loading Loading @@ -1892,10 +1883,7 @@ 64000, 80000, 96000, 128000, 160000, 192000, 256000 128000 ], "swb": [ 13200, Loading @@ -1906,10 +1894,7 @@ 64000, 80000, 96000, 128000, 160000, 192000, 256000 128000 ], "fb": [ 32000, Loading @@ -1917,10 +1902,7 @@ 64000, 80000, 96000, 128000, 160000, 192000, 256000 128000 ] } } Loading Loading @@ -2021,7 +2003,8 @@ 128000, 160000, 192000, 256000 256000, 384000 ], "swb": [ 24400, Loading @@ -2033,7 +2016,8 @@ 128000, 160000, 192000, 256000 256000, 384000 ], "fb": [ 32000, Loading @@ -2044,7 +2028,8 @@ 128000, 160000, 192000, 256000 256000, 384000 ] } }, Loading Loading @@ -2118,7 +2103,9 @@ 128000, 160000, 192000, 256000 256000, 384000, 512000 ], "swb": [ 24400, Loading @@ -2130,7 +2117,9 @@ 128000, 160000, 192000, 256000 256000, 384000, 512000 ], "fb": [ 32000, Loading @@ -2141,7 +2130,9 @@ 128000, 160000, 192000, 256000 256000, 384000, 512000 ] } }, Loading Loading
apps/encoder.c +16 −7 Original line number Diff line number Diff line Loading @@ -1640,8 +1640,18 @@ static void usage_enc( void ) fprintf( stdout, " *VBR mode (average bitrate),\n" ); fprintf( stdout, " for AMR-WB IO modes R = (6600, 8850, 12650, 14250, 15850, 18250,\n" ); fprintf( stdout, " 19850, 23050, 23850) \n" ); #ifdef ISM_HIGHEST_BITRATE fprintf( stdout, " for IVAS stereo R = (13200, 16400, 24400, 32000, 48000, 64000, 80000, \n" ); fprintf( stdout, " 96000, 128000, 160000, 192000, 256000) \n" ); fprintf( stdout, " for IVAS ISM R = 13200 for 1 ISM, 16400 for 1 ISM and 2 ISM, \n" ); fprintf( stdout, " (24400, 32000, 48000, 64000, 80000, 96000, 128000) \n" ); fprintf( stdout, " for 2 ISM, 3 ISM and 4 ISM also 160000, 192000, 256000) \n" ); fprintf( stdout, " for 3 ISM and 4 ISM also 384000 \n" ); fprintf( stdout, " for 4 ISM also 512000 \n" ); #else fprintf( stdout, " for IVAS stereo & ISm R =(13200, 16400, 24400, 32000, 48000, 64000, 80000, \n" ); fprintf( stdout, " 96000, 128000, 160000, 192000, 256000) \n" ); #endif fprintf( stdout, " for IVAS SBA, MASA, MC R=(13200, 16400, 24400, 32000, 48000, 64000, 80000, \n" ); fprintf( stdout, " 96000, 128000, 160000, 192000, 256000, 384000, 512000) \n" ); fprintf( stdout, " Alternatively, R can be a bitrate switching file which consists of R values\n" ); Loading @@ -1657,16 +1667,16 @@ static void usage_enc( void ) fprintf( stdout, "EVS mono is default, for IVAS choose one of the following: -stereo, -ism, -sba, -masa, -mc\n" ); fprintf( stdout, "-stereo [Mode] : Stereo format, default is unified stereo \n" ); fprintf( stdout, " optional for Mode: 1: DFT Stereo, 2: TD Stereo, 3: MDCT Stereo\n" ); fprintf( stdout, "-ism Channels Files : ISm format \n" ); fprintf( stdout, " where Channels specifies the number of ISms (1-4)\n" ); fprintf( stdout, "-ism Channels Files : ISM format \n" ); fprintf( stdout, " where Channels specifies the number of ISMs (1-4)\n" ); fprintf( stdout, " and Files specify input files containing metadata, one file per object\n" ); fprintf( stdout, " (use NULL for no input metadata)\n" ); fprintf( stdout, "-sba +/-Order : Scene Based Audio input format (Ambisonics ACN/SN3D),\n" ); fprintf( stdout, " where Order specifies the Ambisionics order (1-3),\n" ); fprintf( stdout, " where positive (+) means full 3D and negative (-) only 2D/planar components to be coded\n" ); fprintf( stdout, "-masa Ch File : MASA format \n" ); fprintf( stdout, " where Ch specifies the number of input/transport channels (1 or 2): \n" ); fprintf( stdout, " and File specifies input file containing parametric metadata \n" ); fprintf( stdout, "-masa Channels File : MASA format \n" ); fprintf( stdout, " where Channels specifies the number of input/transport channels (1 or 2): \n" ); fprintf( stdout, " and File specifies input file containing parametric MASA metadata \n" ); fprintf( stdout, "-mc InputConf : Multi-channel format\n" ); fprintf( stdout, " where InputConf specifies the channel configuration: 5_1, 7_1, 5_1_2, 5_1_4, 7_1_4\n" ); fprintf( stdout, " Loudspeaker positions are assumed to have azimuth and elevation as per \n" ); Loading @@ -1676,8 +1686,7 @@ static void usage_enc( void ) fprintf( stdout, " where 0 = adaptive, 3-100 = fixed in number of frames,\n" ); fprintf( stdout, " default is deactivated\n" ); fprintf( stdout, "-dtx : Activate DTX mode with a SID update rate of 8 frames\n" ); fprintf( stdout, " Note: DTX is currently supported in EVS, stereo, 1 ISm, \n" ); fprintf( stdout, " SBA (up to 128kbps) and MASA (up to 128kbps)\n" ); fprintf( stdout, " Note: DTX is supported in EVS, stereo, ISM, SBA up to 80kbps and MASA up to 128kbps \n" ); fprintf( stdout, "-rf p o : Activate channel-aware mode for WB and SWB signal at 13.2kbps, \n" ); fprintf( stdout, " where FEC indicator, p: LO or HI, and FEC offset, o: 2, 3, 5, or 7 in number of frames.\n" ); fprintf( stdout, " Alternatively p and o can be replaced by a rf configuration file with each line \n" ); Loading
lib_com/options.h +1 −0 Original line number Diff line number Diff line Loading @@ -163,6 +163,7 @@ #define BINAURALIZATION_DELAY_REPORT /* VA: Issue 255 - Changes the way the decoder delay is reported */ #define FIX_351_HRTF_COMMAND /* VA: Issue 354 - improve "-hrtf" command-line option */ #define FIX_94_VERIFY_WAV_NUM_CHANNELS /* FhG: Issue 94 - Check if number of channels in input wav file matches encoder/renderer configuration */ #define ISM_HIGHEST_BITRATE /* VA: Issue 284: Update highest bitrate limit in ISM format */ #define TUNE_360_OBJECT_WITH_NOISE /* VA: issue 360: consider objects being speech+noise for active speech coding */ Loading
lib_enc/lib_enc.c +20 −3 Original line number Diff line number Diff line Loading @@ -1985,24 +1985,41 @@ static ivas_error sanitizeBandwidth( static ivas_error sanitizeBitrateISM( const ENCODER_CONFIG_HANDLE hEncoderConfig ) { #ifdef ISM_HIGHEST_BITRATE if ( hEncoderConfig->ivas_total_brate > IVAS_128k && hEncoderConfig->nchan_inp == 1 ) { return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too high bitrate for 1 ISM specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); } if ( hEncoderConfig->ivas_total_brate > IVAS_256k && hEncoderConfig->nchan_inp == 2 ) { return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too high bitrate for 2 ISM specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); } if ( hEncoderConfig->ivas_total_brate > IVAS_384k && hEncoderConfig->nchan_inp == 3 ) { return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too high bitrate for 3 ISM specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); } #else if ( hEncoderConfig->ivas_total_brate > IVAS_256k ) { return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too high bitrate for ISm specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); } #endif if ( hEncoderConfig->ivas_total_brate < IVAS_16k4 && hEncoderConfig->nchan_inp == 2 ) { return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too low bitrate for 2 ISm specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too low bitrate for 2 ISM specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); } if ( hEncoderConfig->ivas_total_brate < IVAS_24k4 && hEncoderConfig->nchan_inp == 3 ) { return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too low bitrate for 3 ISm specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too low bitrate for 3 ISM specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); } if ( hEncoderConfig->ivas_total_brate < IVAS_24k4 && hEncoderConfig->nchan_inp == 4 ) { return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too low bitrate for 4 ISm specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); return IVAS_ERROR( IVAS_ERR_INVALID_BITRATE, "Too low bitrate for 4 ISM specified in IVAS: %d", hEncoderConfig->ivas_total_brate ); } return IVAS_ERR_OK; Loading
readme.txt +74 −53 Original line number Diff line number Diff line Loading @@ -160,8 +160,13 @@ R : Bitrate in bps, *VBR mode (average bitrate), for AMR-WB IO modes R = (6600, 8850, 12650, 14250, 15850, 18250, 19850, 23050, 23850) for IVAS stereo & ISm R =(13200, 16400, 24400, 32000, 48000, 64000, 80000, for IVAS stereo R = (13200, 16400, 24400, 32000, 48000, 64000, 80000, 96000, 128000, 160000, 192000, 256000) for IVAS ISM R = 13200 for 1 ISM, 16400 for 1 ISM and 2 ISM, (24400, 32000, 48000, 64000, 80000, 96000, 128000) for 2 ISM, 3 ISM and 4 ISM also 160000, 192000, 256000 for 3 ISM and 4 ISM also 384000 for 4 ISM also 512000 for IVAS SBA, MASA, MC R=(13200, 16400, 24400, 32000, 48000, 64000, 80000, 96000, 128000, 160000, 192000, 256000, 384000, 512000) Alternatively, R can be a bitrate switching file which consists of R values Loading @@ -176,16 +181,16 @@ Options: EVS mono is default, for IVAS choose one of the following: -stereo, -ism, -sba, -masa, -mc -stereo [Mode] : Stereo format, default is unified stereo optional for Mode: 1: DFT Stereo, 2: TD Stereo, 3: MDCT Stereo -ism Channels Files : ISm format where Channels specifies the number of ISms (1-4) -ism Channels Files : ISM format where Channels specifies the number of ISMs (1-4) and Files specify input files containing metadata, one file per object (use NULL for no input metadata) -sba +/-Order : Scene Based Audio input format (Ambisonics ACN/SN3D), where Order specifies the Ambisionics order (1-3), where positive (+) means full 3D and negative (-) only 2D/planar components to be coded -masa Ch File : MASA format where Ch specifies the number of input/transport channels (1 or 2): and File specifies input file containing parametric metadata -masa Channels File : MASA format where Channels specifies the number of input/transport channels (1 or 2): and File specifies input file containing parametric MASA metadata -mc InputConf : Multi-channel format where InputConf specifies the channel configuration: 5_1, 7_1, 5_1_2, 5_1_4, 7_1_4 Loudspeaker positions are assumed to have azimuth and elevation as per Loading @@ -195,8 +200,7 @@ EVS mono is default, for IVAS choose one of the following: -stereo, -ism, -sba, where 0 = adaptive, 3-100 = fixed in number of frames, default is deactivated -dtx : Activate DTX mode with a SID update rate of 8 frames Note: DTX is currently supported in EVS, stereo, 1 ISm, SBA (up to 128kbps) and MASA (up to 128kbps) Note: DTX is supported in EVS, stereo, ISM, SBA up to 80kbps and MASA up to 128kbps -rf p o : Activate channel-aware mode for WB and SWB signal at 13.2kbps, where FEC indicator, p: LO or HI, and FEC offset, o: 2, 3, 5, or 7 in number of frames. Alternatively p and o can be replaced by a rf configuration file with each line Loading Loading @@ -292,6 +296,7 @@ The output channel ordering is 0, 1, ... N-1. The third row contains an index "L specifying the output channel to which the LFE input will be routed if present. If the third row is omitted, the LFE input is downmixed to all channels with a factor of 1/N. Position is not considered for the LFE channel. An example custom loudspeaker layout file is available: ls_setup_16ch_8+4+4.txt Loading @@ -306,55 +311,56 @@ points or complete coverage. Documentation on the self_test.py can be found as a part of scripts/README.md. Note: Running the self_test.py requires the input vectors in the folder scripts/testv. The audio files could unfortunately not be shared, and they need to be replaced in order to run the self_test.py. To complement the test vector set, please replace the empty *.pcm-files in the self_test folder with 16 bit PCM files following the specification below. stv1ISM48s.pcm - 1 channel (1 audio object), 48000 Hz, 1440000 samples stv2ISM48s.pcm - 2 channels (discrete audio objects), 48000 Hz, 1440000 samples per channel stv2OA32c.pcm - 9 channels (2nd order Ambisonics ACN/SN3D), 32000 Hz stv2OA48c.pcm - 9 channels (2nd order Ambisonics ACN/SN3D), 48000 Hz stv3ISM48s.pcm - 3 channels (discrete audio objects), 48000 Hz, 1440000 samples per channel stv3OA32c.pcm - 16 channels (3rd order Ambisonics ACN/SN3D), 32000 Hz, 288939 samples per channel stv3OA48c.pcm - 16 channels (3rd order Ambisonics ACN/SN3D), 48000 Hz, 433408 samples per channel stv4ISM48s.pcm - 4 channel (discrete audio objects), 48000 Hz, 1440000 samples per channel stv8c.pcm - 1 channel, 8000 Hz, clean speech/audio stv8n.pcm - 1 channel, 8000 Hz, noisy speech stv16c.pcm - 1 channel, 16000 Hz, 610307 samples, clean speech stv16n.pcm - 1 channel, 16000 Hz, 257024 samples, noisy speech stv32c.pcm - 1 channel, 32000 Hz, 1220613 samples, clean speech/audio stv32n.pcm - 1 channel, 32000 Hz, 514048 samples, noisy speech stv48c.pcm - 1 channel, 48000 Hz, 1830919 samples, clean speech/audio stv51MC48c.pcm - 6 channels (5.1 1..6 where 4th channel is LFE), 3231233 samples per channel, 48000 Hz, movie excerpt stv512MC48c.pcm - 8 channels (5.1+2 1..8 where 4th channel is LFE), 144000 samples per channel, 48000 Hz, movie excerpt stv714MC48c.pcm - 12 channels (7.1+4 1..12 where 4th channel is LFE), 144000 samples per channel, 48000 Hz, movie excerpt stvFOA16c.pcm - 4 channels (1st order Ambisonics ACN/SN3D), 16000 Hz, stvFOA32c.pcm - 4 channels (1st order Ambisonics ACN/SN3D), 32000 Hz, 288939 samples per channel stvFOA48c.pcm - 4 channels (1st order Ambisonics ACN/SN3D), 48000 Hz, 433408 samples per channel stvST16c.pcm - 2 channels, 16000 Hz, 329601 samples per channel, clean speech/audio stvST16n.pcm - 2 channels, 16000 Hz, 310401 samples per channel, noisy speech stvST32c.pcm - 2 channels, 32000 Hz, 659200 samples per channel, clean speech/audio stvST32n.pcm - 2 channels, 32000 Hz, 620800 samples per channel, noisy speech stvST48c.pcm - 2 channels, 48000 Hz, 988800 samples per channel, clean speech/audio stvST48n.pcm - 2 channels, 48000 Hz, 931200 samples per channel, noisy speech stv_IVASMASA_1dir1TC.pcm - 1 channel (1 MASA transport channel), 48000 Hz, 48000 Hz, 144000 samples stv_IVASMASA_1dir1TC_DTX.pcm - 1 channel (1 MASA transport channel), 48000 Hz, 48000 Hz, 963840 samples stv_IVASMASA_1dir2TC.pcm - 2 channels (2 MASA transport channel), 48000 Hz, 48000 Hz, 288000 samples per channel stv_IVASMASA_1dir2TC_DTX.pcm - 2 channels (2 MASA transport channel), 48000 Hz, 48000 Hz, 963840 samples per channel stv_IVASMASA_2dir1TC.pcm - 1 channel (1 MASA transport channel), 48000 Hz, 48000 Hz, 288000 stv_IVASMASA_2dir2TC.pcm - 2 channels (2 MASA transport channel), 48000 Hz, 48000 Hz, 144000 samples per channel Note: Running the self_test.py requires the input vectors in the folder scripts/testv. stv1ISM48s.wav - 1 channel (1 audio object), 48000 Hz, 1440000 samples stv2ISM48s.wav - 2 channels (discrete audio objects), 48000 Hz, 1440000 samples per channel stv2OA32c.wav - 9 channels (2nd order Ambisonics ACN/SN3D), 32000 Hz stv2OA48c.wav - 9 channels (2nd order Ambisonics ACN/SN3D), 48000 Hz stv3ISM48s.wav - 3 channels (discrete audio objects), 48000 Hz, 1440000 samples per channel stv3OA32c.wav - 16 channels (3rd order Ambisonics ACN/SN3D), 32000 Hz, 288939 samples per channel stv3OA48c.wav - 16 channels (3rd order Ambisonics ACN/SN3D), 48000 Hz, 433408 samples per channel stv4ISM48s.wav - 4 channel (discrete audio objects), 48000 Hz, 1440000 samples per channel stv4ISM48n.wav - 4 channel (discrete audio objects), 48000 Hz, noisy speech stv8c.wav - 1 channel, 8000 Hz, clean speech/audio stv8n.wav - 1 channel, 8000 Hz, noisy speech stv16c.wav - 1 channel, 16000 Hz, 610307 samples, clean speech stv16n.wav - 1 channel, 16000 Hz, 257024 samples, noisy speech stv32c.wav - 1 channel, 32000 Hz, 1220613 samples, clean speech/audio stv32n.wav - 1 channel, 32000 Hz, 514048 samples, noisy speech stv48c.wav - 1 channel, 48000 Hz, 960000 samples, clean speech/audio stv48n.wav - 1 channel, 48000 Hz, 931200 samples, noisy clean speech stv51MC48c.wav - 6 channels (5.1 1..6 where 4th channel is LFE), 960000 samples per channel, 48000 Hz stv512MC48c.wav - 8 channels (5.1+2 1..8 where 4th channel is LFE), 144000 samples per channel, 48000 Hz stv514MC48c.wav - 10 channels (7.1+2 1..10 where 4th channel is LFE), 144000 samples per channel, 48000 Hz stv71MC48c.wav - 8 channels (7.1 1..8 where 4th channel is LFE), 144000 samples per channel, 48000 Hz stv714MC48c.wav - 12 channels (7.1+4 1..12 where 4th channel is LFE), 144000 samples per channel, 48000 Hz stvFOA16c.wav - 4 channels (1st order Ambisonics ACN/SN3D), 16000 Hz, stvFOA32c.wav - 4 channels (1st order Ambisonics ACN/SN3D), 32000 Hz, 288939 samples per channel stvFOA48c.wav - 4 channels (1st order Ambisonics ACN/SN3D), 48000 Hz, 433408 samples per channel stvST16c.wav - 2 channels, 16000 Hz, 329601 samples per channel, clean speech/audio stvST16n.wav - 2 channels, 16000 Hz, 310401 samples per channel, noisy speech stvST32c.wav - 2 channels, 32000 Hz, 659200 samples per channel, clean speech/audio stvST32n.wav - 2 channels, 32000 Hz, 620800 samples per channel, noisy speech stvST48c.wav - 2 channels, 48000 Hz, 988800 samples per channel, clean speech/audio stvST48n.wav - 2 channels, 48000 Hz, 931200 samples per channel, noisy speech stv1MASA1TC48c.wav - 1 channel (1 MASA transport channel), 48000 Hz, 48000 Hz, 144000 samples stv1MASA1TC48n.wav - 1 channel (1 MASA transport channel), 48000 Hz, 48000 Hz, 963840 samples stv1MASA2TC48c.wav - 2 channels (2 MASA transport channel), 48000 Hz, 48000 Hz, 288000 samples per channel stv1MASA2TC48n.wav - 2 channels (2 MASA transport channel), 48000 Hz, 48000 Hz, 963840 samples per channel stv2MASA1TC48c.wav - 1 channel (1 MASA transport channel), 48000 Hz, 48000 Hz, 288000 stv2MASA2TC48c.wav - 2 channels (2 MASA transport channel), 48000 Hz, 48000 Hz, 144000 samples per channel For the MASA operation modes, in addition the following metadata files are required: stv_IVASMASA_1dir1TC.met stv_IVASMASA_1dir1TC_DTX.met stv_IVASMASA_1dir2TC.met stv_IVASMASA_1dir2TC_DTX.met stv_IVASMASA_2dir1TC.met stv_IVASMASA_2dir2TC.met stv1MASA1TC48c.met stv1MASA1TC48n.met stv1MASA2TC48c.met stv1MASA2TC48n.met stv2MASA1TC48c.met stv2MASA2TC48c.met It is strongly recommended to align these files to the corresponding PCM audio files. The MASA metadata files can be generated with the Loading Loading @@ -389,6 +395,21 @@ with the following meaning: ----------------------------------------------------------------------------------- For the Head rotation operation modes, external trajectory files are available: headrot.csv headrot_case00_3000_q.csv headrot_case01_3000_q.csv headrot_case02_3000_q.csv headrot_case03_3000_q.csv For the Renderer configuration option operation modes, external configuration files are available: rend_config_hospital_patientroom.cfg config_recreation.cfg config_renderer.cfg ADDITIONAL SCRIPTS ================== Loading
scripts/config/ivas_modes.json +21 −30 Original line number Diff line number Diff line Loading @@ -1828,10 +1828,7 @@ 64000, 80000, 96000, 128000, 160000, 192000, 256000 128000 ], "swb": [ 13200, Loading @@ -1842,10 +1839,7 @@ 64000, 80000, 96000, 128000, 160000, 192000, 256000 128000 ], "fb": [ 32000, Loading @@ -1853,10 +1847,7 @@ 64000, 80000, 96000, 128000, 160000, 192000, 256000 128000 ] } }, Loading Loading @@ -1892,10 +1883,7 @@ 64000, 80000, 96000, 128000, 160000, 192000, 256000 128000 ], "swb": [ 13200, Loading @@ -1906,10 +1894,7 @@ 64000, 80000, 96000, 128000, 160000, 192000, 256000 128000 ], "fb": [ 32000, Loading @@ -1917,10 +1902,7 @@ 64000, 80000, 96000, 128000, 160000, 192000, 256000 128000 ] } } Loading Loading @@ -2021,7 +2003,8 @@ 128000, 160000, 192000, 256000 256000, 384000 ], "swb": [ 24400, Loading @@ -2033,7 +2016,8 @@ 128000, 160000, 192000, 256000 256000, 384000 ], "fb": [ 32000, Loading @@ -2044,7 +2028,8 @@ 128000, 160000, 192000, 256000 256000, 384000 ] } }, Loading Loading @@ -2118,7 +2103,9 @@ 128000, 160000, 192000, 256000 256000, 384000, 512000 ], "swb": [ 24400, Loading @@ -2130,7 +2117,9 @@ 128000, 160000, 192000, 256000 256000, 384000, 512000 ], "fb": [ 32000, Loading @@ -2141,7 +2130,9 @@ 128000, 160000, 192000, 256000 256000, 384000, 512000 ] } }, Loading