Commit cd2f9f63 authored by Vladimir Malenovsky's avatar Vladimir Malenovsky
Browse files

support addition of custom background noise file for each generated item

parent 974f029f
Loading
Loading
Loading
Loading
+13 −1
Original line number Diff line number Diff line
@@ -95,6 +95,8 @@ provider: "g"
###   azimuth:     azimuth in the range [-180,180]; positive values point to the left
###   elevation:   elevation in the range [-90,90]; positive values indicate up
###   shift:       time adjustment of the input signal (negative value delays the signal)
###   background:  background noise filename (if used, the 'add_low_level_random_noise' parameter is ignored)
###   background_level:  normalized background noise loudness to X dB LKFS
###
### Note 0: you can use relative paths in filenames (the program assumes that the root directory is the parent directory of the ivas_processing_scripts subfolder)
### Note 1: use brackets [val1, val2, ...] when specifying multiple values 
@@ -109,6 +111,8 @@ scenes:
        input: ["items_mono/untrimmed/f1s4b_Talker2.wav", "items_mono/untrimmed/f2s1a_Talker1.wav"]
        IR: ["IRs/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav", "IRs/Car_TalkPos2_Stereo_M5_SinSweep_2chn.wav"]
        shift: [0.0, -1.0]
        background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav"
        background_level: -66
        
    "02": 
        output: "out/a1s02.wav"
@@ -116,6 +120,8 @@ scenes:
        input: ["items_mono/untrimmed/f1s6a_Talker2.wav", "items_mono/untrimmed/f2s3b_Talker1.wav"]
        IR: ["IRs/Car_TalkPos3_Stereo_M5_SinSweep_2chn.wav", "IRs/Car_TalkPos4_Stereo_M5_SinSweep_2chn.wav"]
        shift: [0.0, +1.0]
        background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav"
        background_level: -66
        
    "03": 
        output: "out/a1s03.wav"
@@ -123,6 +129,8 @@ scenes:
        input: ["items_mono/untrimmed/f3s3a_Talker2.wav", "items_mono/untrimmed/f3s10b_Talker2.wav"]
        IR: ["IRs/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav", "IRs/Car_TalkPos1_Stereo_M5_SinSweep_2chn.wav"]
        shift: [0.0, -1.0]
        background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav"
        background_level: -66
        
    "04": 
        output: "out/a1s04.wav"
@@ -130,6 +138,8 @@ scenes:
        input: ["items_mono/untrimmed/f2s7b_Talker1.wav", "items_mono/untrimmed/f5s15a_Talker1.wav"]
        IR: ["IRs/FreeField_IR_Python_AB_20cm_Pos1.wav", "IRs/FreeField_IR_Python_AB_20cm_Pos2.wav"]
        shift: [0.0, -1.0]
        background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav"
        background_level: -66

    "05": 
        output: "out/a1s05.wav"
@@ -137,6 +147,8 @@ scenes:
        input: ["items_mono/untrimmed/m2s15a_Talker2.wav", "items_mono/untrimmed/m1s4a_Talker1.wav"]
        IR: ["IRs/FreeField_IR_Python_AB_20cm_Pos3.wav", "IRs/FreeField_IR_Python_AB_20cm_Pos4.wav"]
        shift: [0.0, -1.0]
        background: "items_mono/347224__rayjensen__ambience-in-car_stereo.wav"
        background_level: -66
        
    "06": 
        output: "out/a1s06.wav"
+60 −7
Original line number Diff line number Diff line
@@ -340,13 +340,6 @@ def generate_stereo_scene(
                y.audio, y.fs, limits=[-preamble, -postamble], samples=True
            )

    # add random noise
    if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
        # create uniformly distributed noise between -4 and 4
        np.random.seed(SEED_RANDOM_NOISE)
        noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
        y.audio += noise

    # adjust the length of the output signal
    if "duration" in cfg.__dict__:
        # trim the output signal such that the total duration is X seconds
@@ -367,6 +360,66 @@ def generate_stereo_scene(
        logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
        y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="STEREO")

    # add background noise in STEREO format
    if "background" in scene.keys():
        # check if [] are used in the background noise file name
        if isinstance(scene["background"], list):
            # if so, use the first element
            background_filename = scene["background"][0]
        else:
            background_filename = scene["background"]

        # read the background noise file
        background_filename = Path(scene["background"]).parent / (
            cfg.use_input_prefix + Path(scene["background"]).name
        )
        logger.info(f"-- Adding background noise from {background_filename}")
        background = audio.fromfile("STEREO", background_filename)

        # resample to the target fs if necessary
        if background.fs != cfg.fs:
            logger.warning(
                f"Warning: Sample rate of the background noise is {background.fs} Hz and needs to be resampled to {cfg.fs}!"
            )
            resampled_audio = audioarray.resample(background.audio, background.fs, cfg.fs)
            background.audio = resampled_audio
            background.fs = cfg.fs

        # adjust the length of the background noise signal
        if len(background.audio) != len(y.audio):
            background.audio = audioarray.trim(
                background.audio, background.fs, limits=[0, len(background.audio) - len(y.audio)], samples=True
            )

        # adjust the loudness of the background noise signal
        if "background_level" in scene.keys():
            logger.info(
                f"-- Rescaling background noise to target loudness: {scene['background_level']} LUFS"
            )

            # check if [] are used in the background level
            if isinstance(scene["background_level"], list):
                # if so, use the first element
                scene["background_level"] = scene["background_level"][0]

            # convert to float if the background level was entered in string format
            if not isinstance(scene["background_level"], (int, float)):
                scene["background_level"] = float(scene["background_level"])
        else:
            logger.warning(
                "-- Warning: No target loudness for background noise specified, using default value of -26 LUFS"
            )
            scene["background_level"] = -26
        background.audio, _ = loudness_norm(background, scene['background_level'], loudness_format="STEREO")

        # add the background noise to the output signal
        y.audio += background.audio
    elif "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
        # create uniformly distributed noise between -4 and 4
        np.random.seed(SEED_RANDOM_NOISE)
        noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
        y.audio += noise        

    # apply fade-in and fade-out
    if "fade_in_out" in cfg.__dict__ and cfg.fade_in_out > 0:
        logger.info(f"-- Applying fade-in and fade-out with {cfg.fade_in_out} seconds")