Commit dc3a06ca authored by Vladimir Malenovsky's avatar Vladimir Malenovsky
Browse files

fix incorrect shifting of audio signal when creating overlap between items (SBA)

parent cd2f9f63
Loading
Loading
Loading
Loading
Loading
+53 −43
Original line number Diff line number Diff line
@@ -95,6 +95,8 @@ use_output_prefix: "leee"
###   azimuth:     azimuth in the range [-180,180]; positive values point to the left
###   elevation:   elevation in the range [-90,90]; positive values indicate up
###   shift:       time adjustment of the input signal (negative value delays the signal)
###   background:  background noise filename (if used, the 'add_low_level_random_noise' parameter is ignored)
###   background_level:  normalized background noise loudness to X dB LKFS
###
### Note 0: you can use relative paths in filenames (the program assumes that the root directory is the parent directory of the ivas_processing_scripts subfolder)
### Note 1: use brackets [val1, val2, ...] when specifying multiple values 
@@ -109,52 +111,60 @@ scenes:
        input: ["items_mono/untrimmed/f1s4b_Talker2.wav", "items_mono/untrimmed/f2s1a_Talker1.wav"]
        IR: ["IRs/IR_do_p04_e_01_01_FOA.wav", "IRs/IR_do_p04_e_02_01_FOA.wav"]
        shift: [0.0, -1.0]
        
    "02": 
        output: "out/s02.wav"
        description: "Car with AB microphone pickup, overlap between the talkers, car noise."
        input: ["items_mono/untrimmed/f1s6a_Talker2.wav", "items_mono/untrimmed/f2s3b_Talker1.wav"]
        IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
        shift: [0.0, +1.0]
        
    "03": 
        output: "out/s03.wav"
        description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
        input: ["items_mono/untrimmed/f3s3a_Talker2.wav", "items_mono/untrimmed/f3s10b_Talker2.wav"]
        IR: ["IRs/IR_do_p04_e_05_01_FOA.wav", "IRs/IR_do_p04_e_06_01_FOA.wav"]
        shift: [0.0, -1.0]
        
    "04": 
        output: "out/s04.wav"
        description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
        input: ["items_mono/untrimmed/f2s7b_Talker1.wav", "items_mono/untrimmed/f5s15a_Talker1.wav"]
        IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_08_01_FOA.wav"]
        shift: [0.0, -1.0]
        
    "05": 
        output: "out/s05.wav"
        description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
        input: ["items_mono/untrimmed/m2s15a_Talker2.wav", "items_mono/untrimmed/m1s4a_Talker1.wav"]
        IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
        shift: [0.0, -1.0]
        
    "06": 
        output: "out/s06.wav"
        description: "Car with AB microphone pickup, no overlap between the talkers."
        input: ["items_mono/untrimmed/m3s8a_Talker2.wav", "items_mono/untrimmed/m4s13a_Talker1.wav"]
        IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
        shift: [0.0, -1.0]
         
    "07": 
        output: "out/s07.wav"
        description: "Preliminary: Car with AB microphone pickup, no overlap between the talkers."
        input: ["items_mono/untrimmed/f1s20a_Talker2.wav", "items_mono/untrimmed/f5s15b_Talker1.wav"]
        IR: ["IRs/IR_do_p04_e_02_01_FOA.wav", "IRs/IR_do_p04_e_07_01_FOA.wav"]
        shift: [0.0, -1.0]
         
    "08": 
        output: "out/s08.wav"
        description: "Car with AB microphone pickup, overlap between the talkers."
        input: ["items_mono/untrimmed/m2s6b_Talker2.wav", "items_mono/untrimmed/f5s14a_Talker1.wav"]
        IR: ["IRs/IR_do_p04_e_08_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
        shift: [0.0, +1.0]
        background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
        background_level: -46

    # "02": 
        # output: "out/s02.wav"
        # description: "Car with AB microphone pickup, overlap between the talkers, car noise."
        # input: ["items_mono/untrimmed/f1s6a_Talker2.wav", "items_mono/untrimmed/f2s3b_Talker1.wav"]
        # IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
        # shift: [0.0, +1.0]
        # background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
        # background_level: -46
        
    # "03": 
        # output: "out/s03.wav"
        # description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
        # input: ["items_mono/untrimmed/f3s3a_Talker2.wav", "items_mono/untrimmed/f3s10b_Talker2.wav"]
        # IR: ["IRs/IR_do_p04_e_05_01_FOA.wav", "IRs/IR_do_p04_e_06_01_FOA.wav"]
        # shift: [0.0, -1.0]
        
    # "04": 
        # output: "out/s04.wav"
        # description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
        # input: ["items_mono/untrimmed/f2s7b_Talker1.wav", "items_mono/untrimmed/f5s15a_Talker1.wav"]
        # IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_08_01_FOA.wav"]
        # shift: [0.0, -1.0]
        # background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
        # background_level: -46
        
    # "05": 
        # output: "out/s05.wav"
        # description: "Car with AB microphone pickup, no overlap between the talkers, car noise."
        # input: ["items_mono/untrimmed/m2s15a_Talker2.wav", "items_mono/untrimmed/m1s4a_Talker1.wav"]
        # IR: ["IRs/IR_do_p04_e_07_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
        # shift: [0.0, -1.0]
        # background: "items_background/Dolby_BG_do_p05_a_01_FOA.wav"
        # background_level: -46
        
    # "06": 
        # output: "out/s06.wav"
        # description: "Car with AB microphone pickup, no overlap between the talkers."
        # input: ["items_mono/untrimmed/m3s8a_Talker2.wav", "items_mono/untrimmed/m4s13a_Talker1.wav"]
        # IR: ["IRs/IR_do_p04_e_03_01_FOA.wav", "IRs/IR_do_p04_e_01_01_FOA.wav"]
        # shift: [0.0, -1.0]
         
    # "07": 
        # output: "out/s07.wav"
        # description: "Preliminary: Car with AB microphone pickup, no overlap between the talkers."
        # input: ["items_mono/untrimmed/f1s20a_Talker2.wav", "items_mono/untrimmed/f5s15b_Talker1.wav"]
        # IR: ["IRs/IR_do_p04_e_02_01_FOA.wav", "IRs/IR_do_p04_e_07_01_FOA.wav"]
        # shift: [0.0, -1.0]
         
    # "08": 
        # output: "out/s08.wav"
        # description: "Car with AB microphone pickup, overlap between the talkers."
        # input: ["items_mono/untrimmed/m2s6b_Talker2.wav", "items_mono/untrimmed/f5s14a_Talker1.wav"]
        # IR: ["IRs/IR_do_p04_e_08_01_FOA.wav", "IRs/IR_do_p04_e_04_01_FOA.wav"]
        # shift: [0.0, +1.0]
+60 −7
Original line number Diff line number Diff line
@@ -339,13 +339,6 @@ def generate_sba_scene(
                y.audio, y.fs, limits=[-preamble, -postamble], samples=True
            )

    # add random noise
    if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
        # create uniformly distributed noise between -4 and 4
        np.random.seed(SEED_RANDOM_NOISE)
        noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
        y.audio += noise

    # adjust the length of the output signal
    if "duration" in cfg.__dict__:
        # trim the output signal such that the total duration is X seconds
@@ -366,6 +359,66 @@ def generate_sba_scene(
        logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
        y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL")

    # add background noise in FOA/HOA2/HOA3 format
    if "background" in scene.keys():
        # check if [] are used in the background noise file name
        if isinstance(scene["background"], list):
            # if so, use the first element
            background_filename = scene["background"][0]
        else:
            background_filename = scene["background"]

        # read the background noise file
        background_filename = Path(scene["background"]).parent / (
            cfg.use_input_prefix + Path(scene["background"]).name
        )
        logger.info(f"-- Adding background noise from {background_filename}")
        background = audio.fromfile(cfg.format, background_filename)

        # resample to the target fs if necessary
        if background.fs != cfg.fs:
            logger.warning(
                f"Warning: Sample rate of the background noise is {background.fs} Hz and needs to be resampled to {cfg.fs}!"
            )
            resampled_audio = audioarray.resample(background.audio, background.fs, cfg.fs)
            background.audio = resampled_audio
            background.fs = cfg.fs

        # adjust the length of the background noise signal
        if len(background.audio) != len(y.audio):
            background.audio = audioarray.trim(
                background.audio, background.fs, limits=[0, len(background.audio) - len(y.audio)], samples=True
            )

        # adjust the loudness of the background noise signal
        if "background_level" in scene.keys():
            logger.info(
                f"-- Rescaling background noise to target loudness: {scene['background_level']} LUFS"
            )

            # check if [] are used in the background level
            if isinstance(scene["background_level"], list):
                # if so, use the first element
                scene["background_level"] = scene["background_level"][0]

            # convert to float if the background level was entered in string format
            if not isinstance(scene["background_level"], (int, float)):
                scene["background_level"] = float(scene["background_level"])
        else:
            logger.warning(
                "-- Warning: No target loudness for background noise specified, using default value of -26 LUFS"
            )
            scene["background_level"] = -26
        background.audio, _ = loudness_norm(background, scene['background_level'], loudness_format="STEREO")

        # add the background noise to the output signal
        y.audio += background.audio
    elif "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
        # create uniformly distributed noise between -4 and 4
        np.random.seed(SEED_RANDOM_NOISE)
        noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
        y.audio += noise

    # apply fade-in and fade-out
    if "fade_in_out" in cfg.__dict__ and cfg.fade_in_out > 0:
        logger.info(f"-- Applying fade-in and fade-out with {cfg.fade_in_out} seconds")
+1 −1

File changed.

Contains only whitespace changes.