Commit f3c848c2 authored by malenovsky's avatar malenovsky
Browse files

Merge branch '96-missing-support-for-mc-format-in-item-generation-scripts' into 'main'

Resolve "Missing support for MC format in item generation scripts"

See merge request !188
parents 29c30d78 c7b33450
Loading
Loading
Loading
Loading
+177 −0
Original line number Diff line number Diff line
---
################################################
# Item generation - General configuration
################################################

### Any relative paths will be interpreted relative to the working directory the script is called from!
### Usage of absolute paths is recommended.
### Do not use file names with dots "." in them! This is not supported, use "_" instead
### For Windows users: please use double back slash '\\' in paths and add '.exe' to executable definitions

### Output format
format: "5_1_4"
# masa_tc: 1        # applicable only to MASA/OMASA format
# masa_dirs: 1      # applicable only to MASA/OMASA format
# sba_order: 2      # applicable only to OSBA format

### Output sampling rate in Hz
fs: 48000

### Generate BINAURAL output (_BINAURAL will be appended to the output filename)
binaural_output: true

### Normalize target loudness to X LKFS 
loudness: -26

### Apply pre-amble and post-amble in X seconds 
preamble: 0.0
postamble: 0.0

### Apply fade-in and fade-out of X seconds
fade_in_out: 0.5

### Trim the output such that the total duration is X seconds
duration: 8

### Add low-level random background noise (amplitude +-4) instead of silence; default = false (silence)
add_low_level_random_noise: false

### Process with parallel streams
multiprocessing: false

################################################
### Item generation - Filename conventions
################################################

### Naming convention for the input mono files
### The input filenames are represented by:
###   lLLeeettszz.wav
### where: 
###   l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) 
###   LL stands for the language: JP, FR, GE, MA, DA, EN
###   eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09
###   tt stands for the talker ID: f1, f2, f3, m1, m2, m3
###   s stands for 'sample' and zz is the sample number; 01, ..., 14

### Naming convention for the generated output files
### The output filenames are represented by:
###   leeeayszz.wav
### The filenames of the accompanying output metadata files (applicable to metadata-assisted spatial audio, object-based audio) are represented by:
###   leeeayszz.met for metadata-assisted spatial audio
###   leeeayszz.wav.o.csv for object-based audio
### where: 
###   l stands for the listening lab designator: a (Force Technology), b (HEAD acoustics), c (MQ University), d (Mesaqin.com) 
###   eee stands for the experiment designator: p01, p02, p04, p05, p06, p07, p08, p09
###   a stands 'audio'
###   y is the per-experiment category according to IVAS-8a: 01, 02, 03, 04, 05, 06
###   s stands for sample and zz is the sample number; 01, 02, 03, 04, 05, 06, 07 (07 is the preliminary sample)
###   o stands for the object number; 0, 1, 2, 3

### File designators, default is "l" for listening lab, "EN" for language, "p07" for experiment and "g" for company
listening_lab: "l"
language: "EN"
exp: "p01"
provider: "va"

### Insert prefix for all input filenames (default: "")
### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'experiment' 
### the number of consecutive letters define the length of each field
# use_input_prefix: "lLLeee"

### Insert prefix for all output filenames (default: "")
### l stands for the 'listening_lab' designator, L stands for the 'language', e stands for the 'experiment' 
### the number of consecutive letters define the length of each field
# use_output_prefix: "leee"

################################################
### Item generation - Scene description
################################################

### Each scene shall de described using the following parameters/properties:
###   output:      output filename
###   description: textual description of the scene
###   input:       input filename(s)
###   IR:          filenames(s) of the input IRs 
###   azimuth:     azimuth in the range [-180,180]; positive values point to the left
###   elevation:   elevation in the range [-90,90]; positive values indicate up
###   shift:       time adjustment of the input signal (negative value delays the signal)
###   background:  background noise filename (if used, the 'add_low_level_random_noise' parameter is ignored)
###   background_level:  normalized background noise loudness to X dB LKFS
###
### Note 0: you can use relative paths in filenames (the program assumes that the root directory is the parent directory of the ivas_processing_scripts subfolder)
### Note 1: use brackets [val1, val2, ...] when specifying multiple values 
### Note 2: use the "start:step:stop" notation for moving sources, where step will be applied in 20ms frames
### Note 3: we're using right-handed coordinate system with azimuth = 0 pointing from the nose to the screen

scenes:
    "01": 
        output: "out/s01.wav"
        description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
        input: ["items_mono/untrimmed/f1s4b_Talker2.wav", "items_mono/untrimmed/f2s1a_Talker1.wav"]
        IR: ["IRs/IR_do_p04_e_01_01_FOA.wav", "IRs/IR_do_p04_e_02_01_FOA.wav"]
        shift: [0.0, -1.0]
        background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
        background_level: -46

    "02": 
        output: "out/s02.wav"
        description: "Car with AB microphone pickup, overlap between the talkers, car noise."
        input: ["items_mono/untrimmed/f1s6a_Talker2.wav", "items_mono/untrimmed/f2s3b_Talker1.wav"]
        IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
        shift: [0.0, +1.0]
        background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
        background_level: -46
        
    "03": 
        output: "out/s03.wav"
        description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
        input: ["items_mono/untrimmed/f3s3a_Talker2.wav", "items_mono/untrimmed/f3s10b_Talker2.wav"]
        IR: ["IRs/IR_do_p04_e_05_01_FOA.wav", "IRs/IR_do_p04_e_06_01_FOA.wav"]
        shift: [0.0, -1.0]
        background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
        background_level: -46
        
    "04": 
        output: "out/s04.wav"
        description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
        input: ["items_mono/untrimmed/f2s7b_Talker1.wav", "items_mono/untrimmed/f5s15a_Talker1.wav"]
        IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_08_01_FOA.wav"]
        shift: [0.0, -1.0]
        background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
        background_level: -46
        
    "05": 
        output: "out/s05.wav"
        description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
        input: ["items_mono/untrimmed/m2s15a_Talker2.wav", "items_mono/untrimmed/m1s4a_Talker1.wav"]
        IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
        shift: [0.0, -1.0]
        background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
        background_level: -46
        
    "06": 
        output: "out/s06.wav"
        description: "Car with AB microphone pickup, no overlap between the talkers."
        input: ["items_mono/untrimmed/m3s8a_Talker2.wav", "items_mono/untrimmed/m4s13a_Talker1.wav"]
        IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
        shift: [0.0, -1.0]
        background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
        background_level: -46

    "07": 
        output: "out/s07.wav"
        description: "Preliminary: Car with AB microphone pickup, no overlap between the talkers."
        input: ["items_mono/untrimmed/f1s20a_Talker2.wav", "items_mono/untrimmed/f5s15b_Talker1.wav"]
        IR: ["IRs/IR_do_p04_e_02_01_FOA.wav", "IRs/IR_do_p04_e_07_01_FOA.wav"]
        shift: [0.0, -1.0]
        background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
        background_level: -46
         
    "08": 
        output: "out/s08.wav"
        description: "Car with AB microphone pickup, overlap between the talkers."
        input: ["items_mono/untrimmed/m2s6b_Talker2.wav", "items_mono/untrimmed/f5s14a_Talker1.wav"]
        IR: ["IRs/IR_do_p04_e_08_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
        shift: [0.0, +1.0]
        background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
        background_level: -46
+11 −0
Original line number Diff line number Diff line
@@ -32,6 +32,10 @@

import logging

from ivas_processing_scripts.audiotools.constants import (
    CHANNEL_BASED_AUDIO_ALTNAMES,
    CHANNEL_BASED_AUDIO_FORMATS,
)
from ivas_processing_scripts.constants import (
    LOGGER_DATEFMT,
    LOGGER_FORMAT,
@@ -41,6 +45,7 @@ from ivas_processing_scripts.generation import (
    config,
    generate_ismN_items,
    generate_masa_items,
    generate_mc_items,
    generate_omasa_items,
    generate_osba_items,
    generate_sba_items,
@@ -100,5 +105,11 @@ def main(args):
    elif "OSBA" in cfg.format:
        # generate OSBA items from FOA/HOA2/HOA3 and MONO items according to scene description
        generate_osba_items.generate_osba_items(cfg, logger)
    elif (
        cfg.format in CHANNEL_BASED_AUDIO_FORMATS.keys()
        or cfg.format in CHANNEL_BASED_AUDIO_ALTNAMES.keys()
    ):
        # generate MC items from MONO items according to scene description
        generate_mc_items.generate_mc_items(cfg, logger)

    logger.handlers.clear()
+485 −0

File added.

Preview size limit exceeded, changes collapsed.