Commit 74998303 authored by multrus's avatar multrus
Browse files

initial revision

parents
Loading
Loading
Loading
Loading

LICENSE.md

0 → 100755
+31 −0
Original line number Diff line number Diff line
/******************************************************************************************************

   (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
   Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
   Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
   Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
   contributors to this repository. All Rights Reserved.

   This software is protected by copyright law and by international treaties.
   The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
   Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
   Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
   Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
   contributors to this repository retain full ownership rights in their respective contributions in
   the software. This notice grants no license of any kind, including but not limited to patent
   license, nor is any license granted by implication, estoppel or otherwise.

   Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
   contributions.

   This software is provided "AS IS", without any express or implied warranties. The software is in the
   development stage. It is intended exclusively for experts who have experience with such software and
   solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
   and fitness for a particular purpose are hereby disclaimed and excluded.

   Any dispute, controversy or claim arising under or in relation to providing this software shall be
   submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
   accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
   the United Nations Convention on Contracts on the International Sales of Goods.

*******************************************************************************************************/

README.md

0 → 100755
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

examples/TEMPLATE.yml

0 → 100755
+244 −0
Original line number Diff line number Diff line
---
################################################
# General configuration
################################################
### Name of test; default = YYYYMMDD_HH.MM.SS_listening_test
# name: test SBA
### Date; default = YYYYMMDD_HH.MM.SS
# date: 2023.06.30
### git commit SHA; default = git rev-parse HEAD
# git_sha: abc123

### Whether to use multiprocessing; default = true
# multiprocessing: false
### Deletion of temporary directories containing 
### intermediate processing files, bitstreams etc.; default = false
# delete_tmp: true

### Any relative paths will be interpreted relative to the working directory the script is called from!
### Usage of absolute paths is recommended.
### Do not use file names with dots "." in them! This is not supported, use "_" instead
### For Windows user: please use double back slash '\\' in paths and add '.exe' to executable definitions
### REQUIRED: Input path or file
input_path: "~/ivas/items/HOA3"
### REQUIRED: Output path or file
output_path: "./tmp_output"
### Metadata path or file(s)
### If input format is ISM{1-4} a path for the metadata files can be specified;
### default = null (for ISM search for item_name.{wav, raw, pcm}.{0-3}.csv in input folder, otherise ignored)
# metadata_path:
    ### Path can be set for all items with the 'all_items' key (automatic search for item_name.{wav, raw, pcm}.{0-3}.csv within this folder)
    # all_items: ".../metadata_folder"
    ### Path can be set for all items individually with 'item{1-4}' keys
    ### 'item{1-4}' keys can also be renamed to the input file names including extension {wav, raw, pcm}
    ### Either list individual files for all objects or name folder for automatic search for one item
    # item1:
        # - ".../meta_all_obj"
    # item2:
        # - ".../meta_obj1.csv"
        # - ".../meta_ob2.csv"
    # noise.wav:
        # - ".../metadata_folder_for_noise_item"

### Select only a subset of items
### searches for the specified substring in found filenames; default = null
# input_select:
#  - "48kHz"

### Horizontally concatenate input items into one long file; default = false
# concatenate_input: true
### Specify preamble duration in ms; default = 0
# preamble: 40
### Flag wheter to use noise (amplitude +-4) for the preamble or silence; default = false (silence)
# pad_noise_preamble: true

################################################
### Input configuration
################################################
input:
    ### REQUIRED: Input format
    fmt: "HOA3"
    ### Input sampling rate in Hz needed for headerless audio files; default = 48000
    # fs: 32000

################################################
### Pre-processing
################################################
### Pre-processing step performed prior to core processing for all conditions
### If not defined, preprocessing step is skipped
# preprocessing:
    ### Target format used in rendering from input format; default = null (no rendering)
    # fmt: "7_1_4"
    ### Flag for application of 50Hz high-pass filter; default = false
    # hp50: true
    ### Target sampling rate in Hz for resampling; default = null (no resampling)
    # fs: 16000
    ### Target loudness in LKFS; default = null (no loudness change applied)
    # loudness: -26
    ### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null);
    ### default = null (uses preprocessing fmt if possible)
    # loudness_fmt: "BINAURAL"
    ### Pre-/post-trim individual signal(s) (ms) (negative values pad silence); default = 0
    # trim:
        # - 50
        # - -50
    ### Flag for using noise (amplitude +-4) instead of silence for padding; default = false (silence)
    # pad_noise: true
    ### Value for application of delay (ms) (negative values advance); default = 0
    # delay: 20
    ### Length of window used at start/end of signal (ms); default = 0
    # window: 100

#################################################
### Bitstream processing
#################################################
### Bistream processing (transport simulation) done after encoding and before decoding
### e.g. frame error insertion or transport simulation for JBM testing
# tx:
    ### REQUIRED: Path to network simulation binary
    # bs_proc_bin: ".../ivas_python_testscripts/networkSimulator_g192.exe"
    ### Path to error pattern (mandatory if no information for generating the error pattern is given)
    # error_pattern: ".../dly_error_profile.dat"
    ### options for the binary, possible placeholders are {error_pattern} for the error pattern,
    ### {bitstream} for the bitstream to process and {bitstream_processed} for the processed bitstream
    # bs_proc_opts: [ "{error_pattern}",  "{bitstream}",  "{processed_bitstream}",  "{processed_bitstream}_tracefile_sim", "2", "0" ]

################################################
### Configuration for conditions under test
################################################
### List of conditions to generate
### Name of the key will be used as output directory name
###     conditions must specify the "type" key which may be one of the following options:
###     ref        generate the reference condition
###     lp3k5      generate a low-pass anchor with cut-off frequency 3.5 kHz
###     lp7k       generate a low-pass anchor with cut-off frequency 7 kHz
###     mnru       generate MNRU condition
###     esdru      generate ESDRU condition
###     mono_dmx   generate mono downmix condition
###     evs        generate an EVS coded condition (see below examples for additional required keys) (currently uses IVAS EVS mode)
###     ivas       generate an IVAS coded condition (see below examples for additional required keys)
conditions_to_generate:
  ### Reference and anchor conditions ##########################
  c01:
      ### REQUIRED: type of condition
      type: ref
      ### optional low-pass cut-off frequency in Hz; default = null
      # out_fc: 22500
  c02:
      ### REQUIRED: type of condition
      type: lp3k5
  c03:
      ### REQUIRED: type of condition
      type: mnru
      ### REQUIRED: the ratio of speech power to modulated noise power in dB
      q: 20
  c04:
      ### REQUIRED: type of condition
      type: esdru
      ### REQUIRED: spatial degradation value between 0 and 1
      alpha: 0.5
  c05:
      ### REQUIRED: type of condition
      type: mono_dmx
      
  ### IVAS condition ###############################
  c06:
      ### REQUIRED: type of condition
      type: ivas
      ### REQUIRED: Bitrates to use for coding
      bitrates:
          - 160000
          # - 32000
      ### Encoder options
      cod:
          ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary)
          bin: ~/git/ivas-codec/IVAS_cod
          ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling)
          # fs: 32000
          ### Additional commandline options; default = null
          # opts: ["-q", "-dtx", 4]
      ### Decoder options
      dec:
          ### Path to decoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary)
          bin: ~/git/ivas-codec/IVAS_dec
          ### Decoder output format; default = postprocessing fmt
          fmt: "HOA3"
          ### Decoder output sampling rate; default = null (same as input)
          # fs: 48000
          ### Additional commandline options; default = null
          # opts: ["-q", "-no_delay_cmp"]

  ### IVAS condition ###############################
  c07:
      ### REQUIRED: type of condition
      type: ivas
      ### REQUIRED: Bitrates to use for coding
      bitrates:
          - 160000
          # - 32000
      ### Encoder options
      cod:
          ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary)
          bin: ~/git/ivas-codec/IVAS_cod
          ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling)
          # fs: 32000
          ### Additional commandline options; default = null
          # opts: ["-q", "-dtx", 4]
      ### Decoder options
      dec:
          ### Path to decoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary)
          bin: ~/git/ivas-codec/IVAS_dec
          ### Decoder output format; default = postprocessing fmt
          fmt: "CICP19"
          ### Decoder output sampling rate; default = null (same as input)
          # fs: 48000
          ### Additional commandline options; default = null
          # opts: ["-q", "-no_delay_cmp"]
            
  ### EVS condition ################################
  c08:
      ### REQUIRED: type of condition
      type: evs
      ### REQUIRED: Bitrates to use for coding
      ### For EVS mono, this may be a per-channel bitrate configuration (must match input/preprocessing format!)
      ### the last value will be repeated if too few are specified
      bitrates:
          # - 9600
          - [13200, 13200, 8000, 13200, 9600]
      cod:
          ### Path to encoder binary; default search for IVAS_cod in bin folder (primary) and PATH (secondary)
          bin: ~/git/ivas-codec/IVAS_cod
          ### Encoder input sampling rate in Hz (resampling performed in case of mismatch); default = null (no resampling)
          # fs: 32000
      dec:
          ### Path to encoder binary; default search for IVAS_dec in bin folder (primary) and PATH (secondary)
          bin: ~/git/ivas-codec/IVAS_dec
          ### Decoder output sampling rate; default = null (same as input)
          # fs: 48000

################################################
### Post-processing
################################################
### Post-processing step performed after core processing for all conditions
### Post-processing is required and can not be omitted
postprocessing:
    ### REQUIRED: Target format for output
    fmt: "BINAURAL"
    ### Target sampling rate in Hz for resampling; default = null (no resampling)
    # fs: 16000
    ### Low-pass cut-off frequency in Hz; default = null (no filtering)
    # lp_cutoff: 24000
    ### Target loudness in LKFS; default = null (no loudness change applied)
    # loudness: -26
    ### Spatial audio format in which loudness is adjusted (only used if preprocessing loudness is not null);
    ### default = null (uses postprocessing fmt if possible)
    # loudness_fmt: null
    ### Name of custom binaural dataset (without prefix or suffix);
    ### default = null (ORANGE53 for BINAURAL, IISofficialMPEG222UC for BINAURAL_ROOM)
    # bin_dataset: SADIE
    ### Render LFE to binaural output with the specified gain (only valid for channel-based input); default = null
    # bin_lfe_gain: 1
    ### Flag whether output should be limited to avoid clipping (can alter target loudness); default = true
    # limit: false
    ### Head-tracking trajectory file for binaural output; default = null
    # trajectory: "path/to/file"
+0 −0

File added.

Preview size limit exceeded, changes collapsed.

+152 −0
Original line number Diff line number Diff line
#!/usr/bin/env python3

#
#  (C) 2022-2023 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
#  contributors to this repository. All Rights Reserved.
#
#  This software is protected by copyright law and by international treaties.
#  The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
#  Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
#  Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
#  Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
#  contributors to this repository retain full ownership rights in their respective contributions in
#  the software. This notice grants no license of any kind, including but not limited to patent
#  license, nor is any license granted by implication, estoppel or otherwise.
#
#  Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
#  contributions.
#
#  This software is provided "AS IS", without any express or implied warranties. The software is in the
#  development stage. It is intended exclusively for experts who have experience with such software and
#  solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
#  and fitness for a particular purpose are hereby disclaimed and excluded.
#
#  Any dispute, controversy or claim arising under or in relation to providing this software shall be
#  submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
#  accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
#  the United Nations Convention on Contracts on the International Sales of Goods.
#

import logging
from itertools import repeat

import yaml

from ivas_processing_scripts.audiotools.metadata import check_ISM_metadata
from ivas_processing_scripts.constants import (
    LOGGER_DATEFMT,
    LOGGER_FORMAT,
    LOGGER_SUFFIX,
)
from ivas_processing_scripts.processing import chains, config
from ivas_processing_scripts.processing.processing import (
    concat_setup,
    concat_teardown,
    preprocess,
    process_item,
)
from ivas_processing_scripts.utils import DirManager, apply_func_parallel


def logging_init(args, cfg):
    """set up logging for a test file"""
    logger = logging.getLogger("__main__")
    logger.setLevel(logging.DEBUG)

    # console handler
    console_handler = logging.StreamHandler()
    console_handler.setFormatter(logging.Formatter("%(message)s"))
    console_handler.setLevel(logging.DEBUG if args.debug else logging.INFO)
    logger.addHandler(console_handler)

    # main log file
    file_handler = logging.FileHandler(
        cfg.output_path.joinpath(f"{cfg.name}{LOGGER_SUFFIX}"), mode="w"
    )
    file_handler.setFormatter(logging.Formatter(LOGGER_FORMAT, datefmt=LOGGER_DATEFMT))
    file_handler.setLevel(logging.DEBUG if args.debug else logging.INFO)
    logger.addHandler(file_handler)

    logger.info(f"Processing test configuration file {args.config}")
    logger.info(f"Test name: {cfg.name}")
    logger.info(f"Input path: {cfg.input_path.absolute()}")
    logger.info(f"Output path: {cfg.output_path.absolute()}")

    return logger


def main(args):
    # parse configuration
    cfg = config.TestConfig(args.config)

    # set up processing chains
    chains.init_processing_chains(cfg)

    # context manager to create output directories and clean up temporary directories
    with DirManager(
        cfg.out_dirs + cfg.tmp_dirs, cfg.tmp_dirs if cfg.delete_tmp else []
    ):
        # set up logging
        logger = logging_init(args, cfg)

        # check for ISM metadata
        if cfg.input["fmt"].startswith("ISM"):
            metadata = check_ISM_metadata(
                cfg.metadata_path,
                num_objects=int(cfg.input["fmt"][3]),
                num_items=len(cfg.items_list),
                item_names=cfg.items_list,
            )
            # print info about found and used metadata files
            for i in range(len(metadata)):
                metadata_str = []
                for o in range(len(metadata[i])):
                    metadata_str.append(str(metadata[i][o]))
                logger.info(
                    f"  ISM metadata files item {cfg.items_list[i]}: {', '.join(metadata_str)}"
                )

        else:
            metadata = [None] * len(cfg.items_list)

        cfg.metadata_path = metadata

        # run preprocessing only once
        if hasattr(cfg, "preprocessing"):
            preprocess(cfg, cfg.metadata_path, logger)

        if cfg.concatenate_input:
            # concatenate items if required
            concat_setup(cfg, logger)

        for condition, out_dir, tmp_dir in zip(
            cfg.proc_chains, cfg.out_dirs, cfg.tmp_dirs
        ):
            chain = condition["processes"]

            logger.info(f"  Generating condition: {condition['name']}")

            apply_func_parallel(
                process_item,
                zip(
                    cfg.items_list,
                    repeat(tmp_dir),
                    repeat(out_dir),
                    repeat(chain),
                    repeat(logger),
                    cfg.metadata_path,
                ),
                None,
                "mp" if cfg.multiprocessing else None,
            )

        if cfg.concatenate_input:
            # write out the splits, optionally remove file
            concat_teardown(cfg, logger)

    # copy configuration to output directory
    with open(cfg.output_path.joinpath(f"{cfg.name}.yml"), "w") as f:
        yaml.safe_dump(cfg._yaml_dump, f)