initial revision (74998303) · Commits · IVAS Codec Public Collaboration / IVAS Processing Scripts

LICENSE.md

0 → 100755

+31 −0

Original line number	Diff line number	Diff line
		/******************************************************************************************************

		(C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
		Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
		Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
		Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
		contributors to this repository. All Rights Reserved.

		This software is protected by copyright law and by international treaties.
		The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
		Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
		Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
		Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
		contributors to this repository retain full ownership rights in their respective contributions in
		the software. This notice grants no license of any kind, including but not limited to patent
		license, nor is any license granted by implication, estoppel or otherwise.

		Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
		contributions.

		This software is provided "AS IS", without any express or implied warranties. The software is in the
		development stage. It is intended exclusively for experts who have experience with such software and
		solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
		and fitness for a particular purpose are hereby disclaimed and excluded.

		Any dispute, controversy or claim arising under or in relation to providing this software shall be
		submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
		accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
		the United Nations Convention on Contracts on the International Sales of Goods.

		*******************************************************************************************************/

README.md

0 → 100755

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

examples/TEMPLATE.yml

0 → 100755

+244 −0

Original line number	Diff line number	Diff line
		---
		################################################
		# General configuration
		################################################
		### Name of test; default = YYYYMMDD_HH.MM.SS_listening_test
		# name: test SBA
		### Date; default = YYYYMMDD_HH.MM.SS
		# date: 2023.06.30
		### git commit SHA; default = git rev-parse HEAD
		# git_sha: abc123

		### Whether to use multiprocessing; default = true
		# multiprocessing: false
		### Deletion of temporary directories containing
		### intermediate processing files, bitstreams etc.; default = false
		# delete_tmp: true

		### Any relative paths will be interpreted relative to the working directory the script is called from!
		### Usage of absolute paths is recommended.
		### Do not use file names with dots "." in them! This is not supported, use "_" instead
		### For Windows user: please use double back slash '\\' in paths and add '.exe' to executable definitions
		### REQUIRED: Input path or file
		input_path: "~/ivas/items/HOA3"
		### REQUIRED: Output path or file
		output_path: "./tmp_output"
		### Metadata path or file(s)
		### If input format is ISM{1-4} a path for the metadata files can be specified;
		### default = null (for ISM search for item_name.{wav, raw, pcm}.{0-3}.csv in input folder, otherise ignored)
		# metadata_path:
		### Path can be set for all items with the 'all_items' key (automatic search for item_name.{wav, raw, pcm}.{0-3}.csv within this folder)
		# all_items: ".../metadata_folder"
		### Path can be set for all items individually with 'item{1-4}' keys
		### 'item{1-4}' keys can also be renamed to the input file names including extension {wav, raw, pcm}
		### Either list individual files for all objects or name folder for automatic search for one item
		# item1:
		# - ".../meta_all_obj"
		# item2:
		# - ".../meta_obj1.csv"
		# - ".../meta_ob2.csv"
		# noise.wav:
		# - ".../metadata_folder_for_noise_item"

		### Select only a subset of items
		### searches for the specified substring in found filenames; default = null
		# input_select:
		# - "48kHz"

		### Horizontally concatenate input items into one long file; default = false
		# concatenate_input: true
		### Specify preamble duration in ms; default = 0
		# preamble: 40
		### Flag wheter to use noise (amplitude +-4) for the preamble or silence; default = false (silence)
		# pad_noise_preamble: true

		################################################
		### Input configuration
		################################################
		input:
		### REQUIRED: Input format
		fmt: "HOA3"
		### Input sampling rate in Hz needed for headerless audio files; default = 48000
		# fs: 32000

		################################################
		### Pre-processing
		################################################
		### Pre-processing step performed prior to core processing for all conditions
		### If not defined, preprocessing step is skipped
		# preprocessing:
		### Target format used in rendering from input format; default = null (no rendering)
		# fmt: "7_1_4"
		### Flag for application of 50Hz high-pass filter; default = false
		# hp50: true
		### Target sampling rate in Hz for resampling; default = null (no resampling)
		# fs: 16000
		### Target loudness in LKFS; default = null (no loudness change applied)
		# loudness: -26
		### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null);
		### default = null (uses preprocessing fmt if possible)
		# loudness_fmt: "BINAURAL"
		### Pre-/post-trim individual signal(s) (ms) (negative values pad silence); default = 0
		# trim:
		# - 50
		# - -50
		### Flag for using noise (amplitude +-4) instead of silence for padding; default = false (silence)
		# pad_noise: true
		### Value for application of delay (ms) (negative values advance); default = 0
		# delay: 20
		### Length of window used at start/end of signal (ms); default = 0
		# window: 100

		#################################################
		### Bitstream processing
		#################################################
		### Bistream processing (transport simulation) done after encoding and before decoding
		### e.g. frame error insertion or transport simulation for JBM testing
		# tx:
		### REQUIRED: Path to network simulation binary
		# bs_proc_bin: ".../ivas_python_testscripts/networkSimulator_g192.exe"
		### Path to error pattern (mandatory if no information for generating the error pattern is given)
		# error_pattern: ".../dly_error_profile.dat"
		### options for the binary, possible placeholders are {error_pattern} for the error pattern,
		### {bitstream} for the bitstream to process and {bitstream_processed} for the processed bitstream
		# bs_proc_opts: [ "{error_pattern}", "{bitstream}", "{processed_bitstream}", "{processed_bitstream}_tracefile_sim", "2", "0" ]

		################################################
		### Configuration for conditions under test
		################################################
		### List of conditions to generate
		### Name of the key will be used as output directory name
		### conditions must specify the "type" key which may be one of the following options:
		### ref generate the reference condition
		### lp3k5 generate a low-pass anchor with cut-off frequency 3.5 kHz
		### lp7k generate a low-pass anchor with cut-off frequency 7 kHz
		### mnru generate MNRU condition
		### esdru generate ESDRU condition
		### mono_dmx generate mono downmix condition
		### evs generate an EVS coded condition (see below examples for additional required keys) (currently uses IVAS EVS mode)
		### ivas generate an IVAS coded condition (see below examples for additional required keys)
		conditions_to_generate:
		### Reference and anchor conditions ##########################
		c01:
		### REQUIRED: type of condition
		type: ref
		### optional low-pass cut-off frequency in Hz; default = null
		# out_fc: 22500
		c02:
		### REQUIRED: type of condition
		type: lp3k5
		c03:
		### REQUIRED: type of condition
		type: mnru
		### REQUIRED: the ratio of speech power to modulated noise power in dB
		q: 20
		c04:
		### REQUIRED: type of condition
		type: esdru
		### REQUIRED: spatial degradation value between 0 and 1
		alpha: 0.5
		c05:
		### REQUIRED: type of condition
		type: mono_dmx

		### IVAS condition ###############################
		c06:
		### REQUIRED: type of condition
		type: ivas
		### REQUIRED: Bitrates to use for coding
		bitrates:
		- 160000
		# - 32000
		### Encoder options
		cod:
		### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary)
		bin: ~/git/ivas-codec/IVAS_cod
		### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling)
		# fs: 32000
		### Additional commandline options; default = null
		# opts: ["-q", "-dtx", 4]
		### Decoder options
		dec:
		### Path to decoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary)
		bin: ~/git/ivas-codec/IVAS_dec
		### Decoder output format; default = postprocessing fmt
		fmt: "HOA3"
		### Decoder output sampling rate; default = null (same as input)
		# fs: 48000
		### Additional commandline options; default = null
		# opts: ["-q", "-no_delay_cmp"]

		### IVAS condition ###############################
		c07:
		### REQUIRED: type of condition
		type: ivas
		### REQUIRED: Bitrates to use for coding
		bitrates:
		- 160000
		# - 32000
		### Encoder options
		cod:
		### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary)
		bin: ~/git/ivas-codec/IVAS_cod
		### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling)
		# fs: 32000
		### Additional commandline options; default = null
		# opts: ["-q", "-dtx", 4]
		### Decoder options
		dec:
		### Path to decoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary)
		bin: ~/git/ivas-codec/IVAS_dec
		### Decoder output format; default = postprocessing fmt
		fmt: "CICP19"
		### Decoder output sampling rate; default = null (same as input)
		# fs: 48000
		### Additional commandline options; default = null
		# opts: ["-q", "-no_delay_cmp"]

		### EVS condition ################################
		c08:
		### REQUIRED: type of condition
		type: evs
		### REQUIRED: Bitrates to use for coding
		### For EVS mono, this may be a per-channel bitrate configuration (must match input/preprocessing format!)
		### the last value will be repeated if too few are specified
		bitrates:
		# - 9600
		- [13200, 13200, 8000, 13200, 9600]
		cod:
		### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary)
		bin: ~/git/ivas-codec/IVAS_cod
		### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling)
		# fs: 32000
		dec:
		### Path to encoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary)
		bin: ~/git/ivas-codec/IVAS_dec
		### Decoder output sampling rate; default = null (same as input)
		# fs: 48000

		################################################
		### Post-processing
		################################################
		### Post-processing step performed after core processing for all conditions
		### Post-processing is required and can not be omitted
		postprocessing:
		### REQUIRED: Target format for output
		fmt: "BINAURAL"
		### Target sampling rate in Hz for resampling; default = null (no resampling)
		# fs: 16000
		### Low-pass cut-off frequency in Hz; default = null (no filtering)
		# lp_cutoff: 24000
		### Target loudness in LKFS; default = null (no loudness change applied)
		# loudness: -26
		### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null);
		### default = null (uses postprocessing fmt if possible)
		# loudness_fmt: null
		### Name of custom binaural dataset (without prefix or suffix);
		### default = null (ORANGE53 for BINAURAL, IISofficialMPEG222UC for BINAURAL_ROOM)
		# bin_dataset: SADIE
		### Render LFE to binaural output with the specified gain (only valid for channel-based input); default = null
		# bin_lfe_gain: 1
		### Flag whether output should be limited to avoid clipping (can alter target loudness); default = true
		# limit: false
		### Head-tracking trajectory file for binaural output; default = null
		# trajectory: "path/to/file"

examples/audiotools.ipynb

0 → 100755

+0 −0

File added.

Preview size limit exceeded, changes collapsed.

ivas_processing_scripts/init.py

0 → 100755

+152 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env python3

		#
		# (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
		# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
		# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
		# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
		# contributors to this repository. All Rights Reserved.
		#
		# This software is protected by copyright law and by international treaties.
		# The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
		# Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
		# Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
		# Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
		# contributors to this repository retain full ownership rights in their respective contributions in
		# the software. This notice grants no license of any kind, including but not limited to patent
		# license, nor is any license granted by implication, estoppel or otherwise.
		#
		# Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
		# contributions.
		#
		# This software is provided "AS IS", without any express or implied warranties. The software is in the
		# development stage. It is intended exclusively for experts who have experience with such software and
		# solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
		# and fitness for a particular purpose are hereby disclaimed and excluded.
		#
		# Any dispute, controversy or claim arising under or in relation to providing this software shall be
		# submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
		# accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
		# the United Nations Convention on Contracts on the International Sales of Goods.
		#

		import logging
		from itertools import repeat

		import yaml

		from ivas_processing_scripts.audiotools.metadata import check_ISM_metadata
		from ivas_processing_scripts.constants import (
		LOGGER_DATEFMT,
		LOGGER_FORMAT,
		LOGGER_SUFFIX,
		)
		from ivas_processing_scripts.processing import chains, config
		from ivas_processing_scripts.processing.processing import (
		concat_setup,
		concat_teardown,
		preprocess,
		process_item,
		)
		from ivas_processing_scripts.utils import DirManager, apply_func_parallel


		def logging_init(args, cfg):
		"""set up logging for a test file"""
		logger = logging.getLogger("__main__")
		logger.setLevel(logging.DEBUG)

		# console handler
		console_handler = logging.StreamHandler()
		console_handler.setFormatter(logging.Formatter("%(message)s"))
		console_handler.setLevel(logging.DEBUG if args.debug else logging.INFO)
		logger.addHandler(console_handler)

		# main log file
		file_handler = logging.FileHandler(
		cfg.output_path.joinpath(f"{cfg.name}{LOGGER_SUFFIX}"), mode="w"
		)
		file_handler.setFormatter(logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT))
		file_handler.setLevel(logging.DEBUG if args.debug else logging.INFO)
		logger.addHandler(file_handler)

		logger.info(f"Processing test configuration file {args.config}")
		logger.info(f"Test name: {cfg.name}")
		logger.info(f"Input path: {cfg.input_path.absolute()}")
		logger.info(f"Output path: {cfg.output_path.absolute()}")

		return logger


		def main(args):
		# parse configuration
		cfg = config.TestConfig(args.config)

		# set up processing chains
		chains.init_processing_chains(cfg)

		# context manager to create output directories and clean up temporary directories
		with DirManager(
		cfg.out_dirs + cfg.tmp_dirs, cfg.tmp_dirs if cfg.delete_tmp else []
		):
		# set up logging
		logger = logging_init(args, cfg)

		# check for ISM metadata
		if cfg.input["fmt"].startswith("ISM"):
		metadata = check_ISM_metadata(
		cfg.metadata_path,
		num_objects=int(cfg.input["fmt"][3]),
		num_items=len(cfg.items_list),
		item_names=cfg.items_list,
		)
		# print info about found and used metadata files
		for i in range(len(metadata)):
		metadata_str = []
		for o in range(len(metadata[i])):
		metadata_str.append(str(metadata[i][o]))
		logger.info(
		f" ISM metadata files item {cfg.items_list[i]}: {', '.join(metadata_str)}"
		)

		else:
		metadata = [None] * len(cfg.items_list)

		cfg.metadata_path = metadata

		# run preprocessing only once
		if hasattr(cfg, "preprocessing"):
		preprocess(cfg, cfg.metadata_path, logger)

		if cfg.concatenate_input:
		# concatenate items if required
		concat_setup(cfg, logger)

		for condition, out_dir, tmp_dir in zip(
		cfg.proc_chains, cfg.out_dirs, cfg.tmp_dirs
		):
		chain = condition["processes"]

		logger.info(f" Generating condition: {condition['name']}")

		apply_func_parallel(
		process_item,
		zip(
		cfg.items_list,
		repeat(tmp_dir),
		repeat(out_dir),
		repeat(chain),
		repeat(logger),
		cfg.metadata_path,
		),
		None,
		"mp" if cfg.multiprocessing else None,
		)

		if cfg.concatenate_input:
		# write out the splits, optionally remove file
		concat_teardown(cfg, logger)

		# copy configuration to output directory
		with open(cfg.output_path.joinpath(f"{cfg.name}.yml"), "w") as f:
		yaml.safe_dump(cfg._yaml_dump, f)