Commit caced321 authored by Vladimir Malenovsky's avatar Vladimir Malenovsky
Browse files

skip pre-amble and post-amble if not specified in the .yml file (saves processing time)

parent 1d40ba98
Loading
Loading
Loading
Loading
+12 −21
Original line number Diff line number Diff line
@@ -76,10 +76,6 @@ def generate_ambi_items(
):
    """Generate FOA/HOA2/HOA3 items from mono items based on scene description"""

    # set the target level
    if "loudness" not in cfg.__dict__:
        cfg.loudness = -26

    # set the fs
    if "fs" not in cfg.__dict__:
        cfg.fs = 48000
@@ -88,17 +84,6 @@ def generate_ambi_items(
    if "IR_fs" not in cfg.__dict__:
        cfg.IR_fs = 48000

    # set the pre-amble and post-amble
    if "preamble" not in cfg.__dict__:
        cfg.preamble = 0.0

    if "postamble" not in cfg.__dict__:
        cfg.postamble = 0.0

    # set the pre-amble and post-amble
    if "add_low_level_random_noise" not in cfg.__dict__:
        cfg.add_low_level_random_noise = False

    # set the listening lab designator
    if "listening_lab" not in cfg.__dict__:
        cfg.listening_lab = "l"
@@ -325,12 +310,17 @@ def generate_ambi_scene(
            y.audio += x.audio

    # append pre-amble and post-amble
    if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
        logger.info(
            f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds"
        )
        if any([cfg.preamble, cfg.postamble]):
            preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
            postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
            y.audio = audioarray.trim(y.audio, y.fs, limits=[-preamble, -postamble], samples=True)

    # add random noise
    if cfg.add_low_level_random_noise:
    if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
        # create uniformly distributed noise between -4 and 4
        np.random.seed(SEED_RANDOM_NOISE)
        noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
@@ -348,6 +338,7 @@ def generate_ambi_scene(
        y.audio = audioarray.trim(y.audio, y.fs, limits=[0, len(y.audio) - duration], samples=True)

    # adjust the loudness of the output signal
    if "loudness" in cfg.__dict__:
        logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
        y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL")

+12 −14
Original line number Diff line number Diff line
@@ -82,10 +82,6 @@ def generate_ismN_items(
):
    """Generate ISMN items with metadata from mono items based on scene description"""

    # set the target level
    if "loudness" not in cfg.__dict__:
        cfg.loudness = -26

    # set the fs
    if "fs" not in cfg.__dict__:
        cfg.fs = 48000
@@ -97,10 +93,6 @@ def generate_ismN_items(
    if "postamble" not in cfg.__dict__:
        cfg.postamble = 0.0

    # set the pre-amble and post-amble
    if "add_low_level_random_noise" not in cfg.__dict__:
        cfg.add_low_level_random_noise = False

    # set the listening lab designator
    if "listening_lab" not in cfg.__dict__:
        cfg.listening_lab = "l"
@@ -392,12 +384,17 @@ def generate_ismN_scene(
        y.metadata_files.insert(i, str(output_filename.with_suffix(f".{i}.csv")))

    # append pre-amble and post-amble
    if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
        logger.info(
            f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds"
        )
        if any([cfg.preamble, cfg.postamble]):
            preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
            postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
            metadata.trim_meta(y, limits=[-preamble, -postamble], samples=True)

    # add random noise
    if cfg.add_low_level_random_noise:
    if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
        # create uniformly distributed noise between -4 and 4
        np.random.seed(SEED_RANDOM_NOISE)
        noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
@@ -415,6 +412,7 @@ def generate_ismN_scene(
        metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True)

    # adjust the loudness of the output signal
    if "loudness" in cfg.__dict__:
        logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
        y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL")

+12 −21
Original line number Diff line number Diff line
@@ -80,25 +80,10 @@ def generate_omasa_items(
):
    """Generate OMASA items with metadata from FOA/HO2 and ISMn items based on scene description"""

    # set the target level
    if "loudness" not in cfg.__dict__:
        cfg.loudness = -26

    # set the fs
    if "fs" not in cfg.__dict__:
        cfg.fs = 48000

    # set the pre-amble and post-amble
    if "preamble" not in cfg.__dict__:
        cfg.preamble = 0.0

    if "postamble" not in cfg.__dict__:
        cfg.postamble = 0.0

    # set the pre-amble and post-amble
    if "add_low_level_random_noise" not in cfg.__dict__:
        cfg.add_low_level_random_noise = False

    # set the listening lab designator
    if "listening_lab" not in cfg.__dict__:
        cfg.listening_lab = "l"
@@ -439,12 +424,17 @@ def generate_OMASA_scene(
            y.metadata_files.insert(i - 1, str(output_filename.with_suffix(f".{i - 1}.csv")))

    # append pre-amble and post-amble
    if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
        logger.info(
            f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds"
        )
        if any([cfg.preamble, cfg.postamble]):
            preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
            postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
            metadata.trim_meta(y, limits=[-preamble, -postamble], samples=True)

    # add random noise
    if cfg.add_low_level_random_noise:
    if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
        # create uniformly distributed noise between -4 and 4
        np.random.seed(SEED_RANDOM_NOISE)
        noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
@@ -462,6 +452,7 @@ def generate_OMASA_scene(
        metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True)

    # adjust the loudness of the output signal
    if "loudness" in cfg.__dict__:
        logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
        y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL")

+12 −21
Original line number Diff line number Diff line
@@ -78,25 +78,10 @@ def generate_osba_items(
):
    """Generate OSBA items from FOA/HOA2/HOA3 and ISMn items based on scene description"""

    # set the target level
    if "loudness" not in cfg.__dict__:
        cfg.loudness = -26

    # set the fs
    if "fs" not in cfg.__dict__:
        cfg.fs = 48000

    # set the pre-amble and post-amble
    if "preamble" not in cfg.__dict__:
        cfg.preamble = 0.0

    if "postamble" not in cfg.__dict__:
        cfg.postamble = 0.0

    # set the pre-amble and post-amble
    if "add_low_level_random_noise" not in cfg.__dict__:
        cfg.add_low_level_random_noise = False

    # set the listening lab designator
    if "listening_lab" not in cfg.__dict__:
        cfg.listening_lab = "l"
@@ -421,12 +406,17 @@ def generate_OSBA_scene(
            y.metadata_files.insert(i - 1, str(output_filename.with_suffix(f".{i - 1}.csv")))

    # append pre-amble and post-amble
    if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
        logger.info(
            f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds"
        )
        if any([cfg.preamble, cfg.postamble]):
            preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
            postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
            metadata.trim_meta(y, limits=[-preamble, -postamble], samples=True)

    # add random noise
    if cfg.add_low_level_random_noise:
    if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
        # create uniformly distributed noise between -4 and 4
        np.random.seed(SEED_RANDOM_NOISE)
        noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
@@ -444,6 +434,7 @@ def generate_OSBA_scene(
        metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True)

    # adjust the loudness of the output signal
    if "loudness" in cfg.__dict__:
        logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
        y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="BINAURAL")

+12 −21
Original line number Diff line number Diff line
@@ -78,10 +78,6 @@ def generate_stereo_items(
):
    """Generate STEREO items from mono items based on scene description"""

    # set the target level
    if "loudness" not in cfg.__dict__:
        cfg.loudness = -26

    # set the fs
    if "fs" not in cfg.__dict__:
        cfg.fs = 48000
@@ -90,21 +86,10 @@ def generate_stereo_items(
    if "IR_fs" not in cfg.__dict__:
        cfg.IR_fs = 48000

    # set the pre-amble and post-amble
    if "preamble" not in cfg.__dict__:
        cfg.preamble = 0.0

    if "postamble" not in cfg.__dict__:
        cfg.postamble = 0.0

    # set the IR path
    if "IR_path" not in cfg.__dict__:
        cfg.IR_path = os.path.join(os.path.dirname(__file__), "IRs")

    # set the pre-amble and post-amble
    if "add_low_level_random_noise" not in cfg.__dict__:
        cfg.add_low_level_random_noise = False

    # set the listening lab designator
    if "listening_lab" not in cfg.__dict__:
        cfg.listening_lab = "l"
@@ -326,12 +311,17 @@ def generate_stereo_scene(
            y.audio += x.audio

    # append pre-amble and post-amble
    if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
        logger.info(
            f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds"
        )
        if any([cfg.preamble, cfg.postamble]):
            preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
            postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
            y.audio = audioarray.trim(y.audio, y.fs, limits=[-preamble, -postamble], samples=True)

    # add random noise
    if cfg.add_low_level_random_noise:
    if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
        # create uniformly distributed noise between -4 and 4
        np.random.seed(SEED_RANDOM_NOISE)
        noise = np.random.randint(low=-4, high=5, size=y.audio.shape).astype("float")
@@ -349,6 +339,7 @@ def generate_stereo_scene(
        y.audio = audioarray.trim(y.audio, y.fs, limits=[0, len(y.audio) - duration], samples=True)

    # adjust the loudness of the output signal
    if "loudness" in cfg.__dict__:
        logger.info(f"-- Rescaling to target loudness: {cfg.loudness} LUFS")
        y.audio, _ = loudness_norm(y, cfg.loudness, loudness_format="STEREO")