Commit 90de7c0b authored by janssontoftg's avatar janssontoftg
Browse files

Merge branch 'ericsson/review-item-creation-stereo' into 'main'

Review of stereo item generation for P800-1 and P800-2

See merge request !92
parents 5eb4e32e ad6fa9f7
Loading
Loading
Loading
Loading
+92 −73
Original line number Diff line number Diff line
@@ -30,12 +30,31 @@ output_path: "experiments/selection/P800-1/proc_input"
loudness: -26

### Pre-amble and Post-amble length in seconds (default = 0.0)
preamble: 1.0
preamble: 0.5
postamble: 1.0

### Flag for adding low-level random background noise (amplitude +-4) instead of silence; default = false (silence)
add_low_level_random_noise: true

### File designators
listening_lab: "a"
language: "JP"
exp: "p01"

################################################
### Input files
################################################

### <LL><Lang><Exp><TalkerID>s<Sample>.wav
###
### With
###
### <LL>                = a (Force Technology), b (HEAD acoustics), 
###                       c (MQ University),    d (Mesaqin.com) 
### <Lang>              = JP, FR, GE, MA, DA, EN
### <Exp>               = p01, p02, p04, p05, p06, p07, p08, p09
### <TalkerID>          = f1, f2, f3, m1, m2, m3
### <Sample>            = 01, …, 14

################################################
### Scene description
@@ -64,254 +83,254 @@ add_low_level_random_noise: true

scenes:
    cat1_1: 
        name: "lp01a1s01"
        name: "a1s01"
        description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["f1_s1.wav", "m1_s1.wav"]
        source: ["f1s01.wav", "m1s01.wav"]
        IR: ["SAABP01.wav", "SAABP07.wav"]
        overlap: 1.0
        
    cat1_2: 
        name: "lp01a1s02"
        name: "a1s02"
        description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["m2_s1.wav", "f2_s1.wav"]
        source: ["m2s01.wav", "f2s01.wav"]
        IR: ["SAABP05.wav", "SAABP03.wav"]
        overlap: 1.0
        
    cat1_3: 
        name: "lp01a1s03"
        name: "a1s03"
        description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["f3_s1.wav", "m3_s1.wav"]
        source: ["f3s01.wav", "m3s01.wav"]
        IR: ["SAABP02.wav", "SAABP06.wav"]
        overlap: 1.0
        
    cat1_4: 
        name: "lp01a1s04"
        name: "a1s04"
        description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["m1_s2.wav", "f1_s2.wav"]
        source: ["m1s02.wav", "f1s02.wav"]
        IR: ["SAABP04.wav", "SAABP01.wav"]
        overlap: 1.0
        
    cat1_5: 
        name: "lp01a1s05"
        name: "a1s05"
        description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["f2_s2.wav", "m2_s2.wav"]
        source: ["f2s02.wav", "m2s02.wav"]
        IR: ["SAABP03.wav", "SAABP04.wav"]
        overlap: 1.0
        
    cat1_6: 
        name: "lp01a1s06"
        name: "a1s06"
        description: "Small anechoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["m3_s2.wav", "f3_s2.wav"]
        source: ["m3s02.wav", "f3s02.wav"]
        IR: ["SAABP07.wav", "SAABP02.wav"]
        overlap: 1.0
         
    cat2_1: 
        name: "lp01a2s01"
        name: "a2s01"
        description: "Large anechoic room with AB microphone pickup, no overlap between the talkers."
        source: ["m3_s3.wav", "f3_s3.wav"]
        source: ["m3s03.wav", "f3s03.wav"]
        IR: ["LAABP05.wav", "LAABP11.wav"]
        overlap: -1.0
        
    cat2_2: 
        name: "lp01a2s02"
        name: "a2s02"
        description: "Large anechoic room with AB microphone pickup, no overlap between the talkers."
        source: ["f1_s3.wav", "m1.wav"]
        source: ["f1s03.wav", "m1s03.wav"]
        IR: ["LAABP01.wav", "LAABP06.wav"]
        overlap: -1.0
        
    cat2_3: 
        name: "lp01a2s03"
        name: "a2s03"
        description: "Large anechoic room with AB microphone pickup, no overlap between the talkers."
        source: ["m2_s3.wav", "f2_s3.wav"]
        source: ["m2s03.wav", "f2s03.wav"]
        IR: ["LAABP03.wav", "LAABP07.wav"]
        overlap: -1.0
        
    cat2_4: 
        name: "lp01a2s04"
        name: "a2s04"
        description: "Large anechoic room with AB microphone pickup, no overlap between the talkers."
        source: ["f3_s4.wav", "m3_s4.wav"]
        source: ["f3s04.wav", "m3s04.wav"]
        IR: ["LAABP05.wav", "LAABP08.wav"]
        overlap: -1.0
        
    cat2_5: 
        name: "lp01a2s05"
        name: "a2s05"
        description: "Large anechoic room with AB microphone pickup, no overlap between the talkers."
        source: ["m1_s4.wav", "f1_s4.wav"]
        source: ["m1s04.wav", "f1s04.wav"]
        IR: ["LAABP09.wav", "LAABP07.wav"]
        overlap: -1.0
        
    cat2_6: 
        name: "lp01a2s06"
        name: "a2s06"
        description: "Large anechoic room with AB microphone pickup, no overlap between the talkers."
        source: ["f2_s4.wav", "m2_s4.wav"]
        source: ["f2s04.wav", "m2s04.wav"]
        IR: ["LAABP10.wav", "LAABP09.wav"]
        overlap: -1.0
 
    cat3_1: 
        name: "lp01a3s01"
        name: "a3s01"
        description: "Small anechoic room with MS microphone pickup, no overlap between the talkers."
        source: ["f2_s5.wav", "m2_s5.wav"]
        source: ["f2s05.wav", "m2s05.wav"]
        IR: ["SAMSP01.wav", "SAMSP07.wav"]
        overlap: -1.0
        
    cat3_2: 
        name: "lp01a3s02"
        name: "a3s02"
        description: "Small anechoic room with MS microphone pickup, no overlap between the talkers."
        source: ["m3_s5.wav", "f3_s5.wav"]
        source: ["m3s05.wav", "f3s05.wav"]
        IR: ["SAMSP05.wav", "SAMSP03.wav"]
        overlap: -1.0
        
    cat3_3: 
        name: "lp01a3s03"
        name: "a3s03"
        description: "Small anechoic room with MS microphone pickup, no overlap between the talkers."
        source: ["f1_s5.wav", "m1_s5.wav"]
        source: ["f1s05.wav", "m1s05.wav"]
        IR: ["SAMSP02.wav", "SAMSP06.wav"]
        overlap: -1.0
        
    cat3_4: 
        name: "lp01a3s04"
        name: "a3s04"
        description: "Small anechoic room with MS microphone pickup, no overlap between the talkers."
        source: ["m2_s6.wav", "f2_s6.wav"]
        source: ["m2s06.wav", "f2s06.wav"]
        IR: ["SAMSP04.wav", "SAMSP01.wav"]
        overlap: -1.0
        
    cat3_5: 
        name: "lp01a3s05"
        name: "a3s05"
        description: "Small anechoic room with MS microphone pickup, no overlap between the talkers."
        source: ["f3_s6.wav", "m3_s6.wav"]
        source: ["f3s06.wav", "m3s06.wav"]
        IR: ["SAMSP03.wav", "SAMSP04.wav"]
        overlap: -1.0
        
    cat3_6: 
        name: "lp01a3s06"
        name: "a3s06"
        description: "Small anechoic room with MS microphone pickup, no overlap between the talkers."
        source: ["m1_s6.wav", "f1_s6.wav"]
        source: ["m1s06.wav", "f1s06.wav"]
        IR: ["SAMSP07.wav", "SAMSP02.wav"]
        overlap: -1.0
        
    cat4_1: 
        name: "lp01a4s01"
        name: "a4s01"
        description: "Small echoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["m1_s7.wav", "f1_s7.wav"]
        source: ["m1s07.wav", "f1s07.wav"]
        IR: ["SEABP01.wav", "SEABP07.wav"]
        overlap: 1.0
        
    cat4_2: 
        name: "lp01a4s02"
        name: "a4s02"
        description: "Small echoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["f2_s7.wav", "m2_s7.wav"]
        source: ["f2s07.wav", "m2s07.wav"]
        IR: ["SEABP05.wav", "SEABP03.wav"]
        overlap: 1.0
        
    cat4_3: 
        name: "lp01a4s03"
        name: "a4s03"
        description: "Small echoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["m3_s7.wav", "f3_s7.wav"]
        source: ["m3s07.wav", "f3s07.wav"]
        IR: ["SEABP02.wav", "SEABP06.wav"]
        overlap: 1.0
        
    cat4_4: 
        name: "lp01a4s04"
        name: "a4s04"
        description: "Small echoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["f1_s8.wav", "m1_s8.wav"]
        source: ["f1s08.wav", "m1s08.wav"]
        IR: ["SEABP04.wav", "SEABP01.wav"]
        overlap: 1.0
        
    cat4_5: 
        name: "lp01a4s05"
        name: "a4s05"
        description: "Small echoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["m2_s8.wav", "f2_s8.wav"]
        source: ["m2s08.wav", "f2s08.wav"]
        IR: ["SEABP03.wav", "SEABP04.wav"]
        overlap: 1.0
        
    cat4_6: 
        name: "lp01a4s06"
        name: "a4s06"
        description: "Small echoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["f3_s8.wav", "m3_s8.wav"]
        source: ["f3s08.wav", "m3s08.wav"]
        IR: ["SEABP07.wav", "SEABP02.wav"]
        overlap: 1.0

    cat5_1: 
        name: "lp01a5s01"
        name: "a5s01"
        description: "Large echoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["f3_s9.wav", "m3_s9.wav"]
        source: ["f3s09.wav", "m3s09.wav"]
        IR: ["LEABP02.wav", "LEABP08.wav"]
        overlap: 1.0
        
    cat5_2: 
        name: "lp01a5s02"
        name: "a5s02"
        description: "Large echoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["m1_s9.wav", "f1_s9.wav"]
        source: ["m1s09.wav", "f1s09.wav"]
        IR: ["LEABP09.wav", "LEABP04.wav"]
        overlap: 1.0
        
    cat5_3: 
        name: "lp01a5s03"
        name: "a5s03"
        description: "Large echoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["f2_s9.wav", "m2_s9.wav"]
        source: ["f2s09.wav", "m2s09.wav"]
        IR: ["LEABP06.wav", "LEABP10.wav"]
        overlap: 1.0
        
    cat5_4: 
        name: "lp01a5s04"
        name: "a5s04"
        description: "Large echoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["m3_s10.wav", "f3_s10.wav"]
        source: ["m3s10.wav", "f3s10.wav"]
        IR: ["LEABP11.wav", "LEABP08.wav"]
        overlap: 1.0
        
    cat5_5: 
        name: "lp01a5s05"
        name: "a5s05"
        description: "Large echoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["f1_s10.wav", "m1_s10.wav"]
        source: ["f1s10.wav", "m1s10.wav"]
        IR: ["LEABP10.wav", "LEABP12.wav"]
        overlap: 1.0
        
    cat5_6: 
        name: "lp01a5s06"
        name: "a5s06"
        description: "Large echoic room with AB microphone pickup, partial overlap between the talkers."
        source: ["m2_s10.wav", "f2_s10.wav"]
        source: ["m2s10.wav", "f2s10.wav"]
        IR: ["LEABP12.wav", "LEABP01.wav"]
        overlap: 1.0
        
    cat6_1: 
        name: "lp01a6s01"
        name: "a6s01"
        description: "Small echoic room with binaural microphone pickup, no overlap between the talkers."
        source: ["m2_s11.wav", "f2_s11.wav"]
        source: ["m2s11.wav", "f2s11.wav"]
        IR: ["SEABP01.wav", "SEABP07.wav"]
        overlap: -1.0
        
    cat6_2: 
        name: "lp01a6s02"
        name: "a6s02"
        description: "Small echoic room with binaural microphone pickup, no overlap between the talkers."
        source: ["f3_s11.wav", "m3_s11.wav"]
        source: ["f3s11.wav", "m3s11.wav"]
        IR: ["SEABP05.wav", "SEABP03.wav"]
        overlap: -1.0
        
    cat6_3: 
        name: "lp01a6s03"
        name: "a6s03"
        description: "Small echoic room with binaural microphone pickup, no overlap between the talkers."
        source: ["m1_s11.wav", "f1_s11.wav"]
        source: ["m1s11.wav", "f1s11.wav"]
        IR: ["SEABP02.wav", "SEABP06.wav"]
        overlap: -1.0
        
    cat6_4: 
        name: "lp01a6s04"
        name: "a6s04"
        description: "Small echoic room with binaural microphone pickup, no overlap between the talkers."
        source: ["f2_s12.wav", "m2_s12.wav"]
        source: ["f2s12.wav", "m2s12.wav"]
        IR: ["SEABP04.wav", "SEABP01.wav"]
        overlap: -1.0
        
    cat6_5: 
        name: "lp01a6s05"
        name: "a6s05"
        description: "Small echoic room with binaural microphone pickup, no overlap between the talkers."
        source: ["m3_s12.wav", "f3_s12.wav"]
        source: ["m3s12.wav", "f3s12.wav"]
        IR: ["SEABP03.wav", "SEABP04.wav"]
        overlap: -1.0
        
    cat6_6: 
        name: "lp01a6s06"
        name: "a6s06"
        description: "Small echoic room with binaural microphone pickup, no overlap between the talkers."
        source: ["f1_s12.wav", "m1_s12.wav"]
        source: ["f1s12.wav", "m1s12.wav"]
        IR: ["SEABP07.wav", "SEABP02.wav"]
        overlap: -1.0
+94 −75

File changed.

Preview size limit exceeded, changes collapsed.

+132 −124
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@

import logging
import os
from itertools import repeat
from math import floor

import numpy as np
@@ -40,6 +41,7 @@ from ivas_processing_scripts.audiotools import audio, audiofile
from ivas_processing_scripts.audiotools.wrappers.bs1770 import get_loudness
from ivas_processing_scripts.audiotools.wrappers.reverb import reverb_stereo
from ivas_processing_scripts.generation import config
from ivas_processing_scripts.utils import apply_func_parallel

SEED_RANDOM_NOISE = 0

@@ -56,9 +58,6 @@ def generate_stereo_items(
):
    """Generate STEREO items from mono items based on scene description"""

    # get the number of scenes
    N_scenes = len(cfg.scenes)

    # set the target level
    if "loudness" not in cfg.__dict__:
        cfg.loudness = -26
@@ -86,10 +85,24 @@ def generate_stereo_items(
    if "add_low_level_random_noise" not in cfg.__dict__:
        cfg.add_low_level_random_noise = False

    # repeat for all source files
    for scene_name, scene in cfg.scenes.items():
    # set multiprocessing
    if "multiprocessing" not in cfg.__dict__:
        cfg.multiprocessing = True

    apply_func_parallel(
        generate_stereo_scene,
        zip(cfg.scenes.keys(), cfg.scenes.values(), repeat(cfg), repeat(logger)),
        None,
        "mp" if cfg.multiprocessing else None,
    )
    return


def generate_stereo_scene(
    scene_name: str, scene: dict, cfg: config.TestConfig, logger: logging.Logger
):
    logger.info(
            f"Processing scene: {scene_name} out of {N_scenes} scenes, name: {scene['name']}"
        f"Processing scene: {scene_name} out of {len(cfg.scenes)} scenes, name: {scene['name']}"
    )

    # extract the number of audio sources
@@ -106,21 +119,18 @@ def generate_stereo_items(

    y = audio.ChannelBasedAudio("STEREO")
    for i in range(N_sources):
        source_prefix = cfg.listening_lab + cfg.language + cfg.exp
        # parse parameters from the scene description
            source_file = np.atleast_1d(scene["source"])[i]
        source_file = source_prefix + np.atleast_1d(scene["source"])[i]
        IR_file = np.atleast_1d(scene["IR"])[i]

        logger.info(f"Convolving {source_file} with {source_IR}")

        # read source file
            x = audio.fromfile(
                "MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs
            )
        x = audio.fromfile("MONO", os.path.join(cfg.input_path, source_file), fs=cfg.fs)

        # read the IR file
            IR = audio.fromfile(
                "STEREO", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs
            )
        IR = audio.fromfile("STEREO", os.path.join(cfg.IR_path, IR_file), fs=cfg.IR_fs)

        # convolve with stereo IR
        x = reverb_stereo(x, IR)
@@ -141,15 +151,6 @@ def generate_stereo_items(
            pre = np.zeros((N_delay, x.audio.shape[1]))
            x.audio = np.concatenate([pre, x.audio])

            # pad with zeros to ensure that the signal length is a multiple of 20ms
            N_frame = x.fs / 50
            if len(x.audio) % N_frame != 0:
                N_pad = int(N_frame - len(x.audio) % N_frame)

                # insert all-zero preamble
                pre = np.zeros((N_pad, x.audio.shape[1]))
                x.audio = np.concatenate([pre, x.audio])

        # add source signal to the array of source signals
        y.fs = x.fs
        if y.audio is None:
@@ -201,21 +202,28 @@ def generate_stereo_items(
        post = np.zeros((N_post, y.audio.shape[1]))
        y.audio = np.concatenate([y.audio, post])

    # pad with zeros to ensure that the signal length is a multiple of 20ms
    N_frame = y.fs / 50
    if y.audio.shape[0] % N_frame != 0:
        N_pad = int(N_frame - y.audio.shape[0] % N_frame)

        # insert all-zero postamble
        post = np.zeros((N_pad, y.audio.shape[1]))
        y.audio = np.concatenate([y.audio, post])

    # add random noise
    if cfg.add_low_level_random_noise:
        # create uniformly distributed noise between -4 and 4
        np.random.seed(SEED_RANDOM_NOISE)
            noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype(
                "float"
            )
        noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")

        # superimpose
        y.audio += noise

    # write the reverberated audio into output file
        output_filename = scene["name"]
    output_filename = cfg.listening_lab + cfg.exp + scene["name"] + ".wav"
    audiofile.write(
            os.path.join(cfg.output_path, output_filename), y.audio, y.fs
        os.path.join(cfg.output_path, scene_name.split("_")[0], output_filename),
        y.audio,
        y.fs,
    )  # !!!! TBD: replace all os.path.xxx operations with the Path object

    return