Commit de1f23bf authored by Vladimir Malenovsky's avatar Vladimir Malenovsky
Browse files

reformatting

parent 1eb0cfed
Loading
Loading
Loading
Loading
Loading
+43 −17
Original line number Diff line number Diff line
@@ -205,12 +205,12 @@ def generate_ambi_scene(
        source_file = (
            scene["input"][i] if isinstance(scene["input"], list) else scene["input"]
        )
        IR_file = (
            scene["IR"][i] if isinstance(scene["IR"], list) else scene["IR"]
        )
        IR_file = scene["IR"][i] if isinstance(scene["IR"], list) else scene["IR"]

        # get input filename and IR filename
        input_filename = Path(source_file).parent / (cfg.use_input_prefix + Path(source_file).name)
        input_filename = Path(source_file).parent / (
            cfg.use_input_prefix + Path(source_file).name
        )
        IR_filename = Path(IR_file).parent / (cfg.use_IR_prefix + Path(IR_file).name)

        # read the overlap length
@@ -241,7 +241,9 @@ def generate_ambi_scene(
        else:
            level = -26

        logger.info(f"-- Convolving {source_file} with {IR_file} at {level} LUFS with shift of {source_shift_in_seconds} seconds")
        logger.info(
            f"-- Convolving {source_file} with {IR_file} at {level} LUFS with shift of {source_shift_in_seconds} seconds"
        )

        # read source file
        x = audio.fromfile("MONO", input_filename)
@@ -274,7 +276,9 @@ def generate_ambi_scene(
            # pad with zeros to ensure that the signal length is a multiple of 20ms
            if len(x.audio) % frame_len != 0:
                N_pad = int(frame_len - len(x.audio) % frame_len)
                x.audio = audioarray.trim(x.audio, x.fs, limits=[0, -N_pad], samples=True)
                x.audio = audioarray.trim(
                    x.audio, x.fs, limits=[0, -N_pad], samples=True
                )

        # add the convolved FOA/HOA2/HOA3 audio source signal to the output signal
        if y.audio is None:
@@ -283,7 +287,9 @@ def generate_ambi_scene(

            if source_shift < 0:
                # insert zeros to the new audio source signal to shift it right
                y.audio = audioarray.trim_meta(y.audio, y.fs, limits=[source_shift, 0], samples=True)
                y.audio = audioarray.trim_meta(
                    y.audio, y.fs, limits=[source_shift, 0], samples=True
                )
            else:
                offset = source_shift
        else:
@@ -291,33 +297,47 @@ def generate_ambi_scene(
            delta_offset = source_shift - offset
            if delta_offset > 0:
                # insert zeros to the existing output signal to shift it right
                y.audio = audioarray.trim(y.audio, y.fs, limits=[0, -delta_offset], samples=True)
                y.audio = audioarray.trim(
                    y.audio, y.fs, limits=[0, -delta_offset], samples=True
                )
                offset = source_shift
            else:
                # insert zeros to the new audio source signal to shift it right
                x.audio = audioarray.trim(x.audio, x.fs, limits=[0, delta_offset], samples=True)
                x.audio = audioarray.trim(
                    x.audio, x.fs, limits=[0, delta_offset], samples=True
                )

            # adjust the length of the audio source signal
            delta_length = len(x.audio) - len(y.audio)
            if delta_length > 0:
                # pad zeros to the existing output signal
                y.audio = audioarray.trim(y.audio, y.fs, limits=[0, -delta_length], samples=True)
                y.audio = audioarray.trim(
                    y.audio, y.fs, limits=[0, -delta_length], samples=True
                )
            else:
                # pad zeros to the new audio source signal
                x.audio = audioarray.trim(x.audio, x.fs, limits=[0, delta_length], samples=True)
                x.audio = audioarray.trim(
                    x.audio, x.fs, limits=[0, delta_length], samples=True
                )

            # superimpose
            y.audio += x.audio

    # append pre-amble and post-amble
    if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
        preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
        postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
        preamble = int(
            np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len
        )  # convert to samples and ensure multiple of 20ms
        postamble = int(
            np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len
        )  # convert to samples and ensure multiple of 20ms
        if preamble != 0 or postamble != 0:
            logger.info(
                f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds"
            )
            y.audio = audioarray.trim(y.audio, y.fs, limits=[-preamble, -postamble], samples=True)
            y.audio = audioarray.trim(
                y.audio, y.fs, limits=[-preamble, -postamble], samples=True
            )

    # add random noise
    if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
@@ -333,9 +353,13 @@ def generate_ambi_scene(
    else:
        # do not change the length of the audio signal
        duration = len(y.audio)
    duration = int(np.floor(duration / frame_len) * frame_len)  # ensure multiple of 20ms
    duration = int(
        np.floor(duration / frame_len) * frame_len
    )  # ensure multiple of 20ms
    if len(y.audio) != duration:
        y.audio = audioarray.trim(y.audio, y.fs, limits=[0, len(y.audio) - duration], samples=True)
        y.audio = audioarray.trim(
            y.audio, y.fs, limits=[0, len(y.audio) - duration], samples=True
        )

    # adjust the loudness of the output signal
    if "loudness" in cfg.__dict__:
@@ -355,7 +379,9 @@ def generate_ambi_scene(
        binaural_output_filename = output_filename.with_name(
            output_filename.stem + "_BINAURAL" + output_filename.suffix
        )
        logger.info(f"-- Converting to BINAURAL output file: {binaural_output_filename}")
        logger.info(
            f"-- Converting to BINAURAL output file: {binaural_output_filename}"
        )
        binaudio = audio.fromtype("BINAURAL")
        binaudio.fs = y.fs
        convert_scenebased(y, binaudio)
+30 −10
Original line number Diff line number Diff line
@@ -208,14 +208,18 @@ def generate_ismN_scene(
        # read azimuth and elevation information
        if "azimuth" in scene.keys():
            source_azi = (
                scene["azimuth"][i] if isinstance(scene["azimuth"], list) else scene["azimuth"]
                scene["azimuth"][i]
                if isinstance(scene["azimuth"], list)
                else scene["azimuth"]
            )
        else:
            source_azi = 0.0

        if "elevation" in scene.keys():
            source_ele = (
                scene["elevation"][i] if isinstance(scene["elevation"], list) else scene["elevation"]
                scene["elevation"][i]
                if isinstance(scene["elevation"], list)
                else scene["elevation"]
            )
        else:
            source_ele = 0.0
@@ -223,7 +227,9 @@ def generate_ismN_scene(
        # read the source shift length (in seconds)
        if "shift" in scene.keys():
            source_shift = (
                scene["shift"][i] if isinstance(scene["shift"], list) else scene["shift"]
                scene["shift"][i]
                if isinstance(scene["shift"], list)
                else scene["shift"]
            )
        else:
            source_shift = 0.0
@@ -239,12 +245,16 @@ def generate_ismN_scene(
        # read the level
        if "level" in scene.keys():
            level = (
                scene["level"][i] if isinstance(scene["level"], list) else scene["level"]
                scene["level"][i]
                if isinstance(scene["level"], list)
                else scene["level"]
            )
        else:
            level = -26

        logger.info(f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LUFS with shift of {source_shift_in_seconds} seconds")
        logger.info(
            f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LUFS with shift of {source_shift_in_seconds} seconds"
        )

        # read source file
        x = audio.fromtype("ISM1")
@@ -271,7 +281,9 @@ def generate_ismN_scene(
            # pad with zeros to ensure that the signal length is a multiple of 20ms
            if len(x.audio) % frame_len != 0:
                N_pad = int(frame_len - len(x.audio) % frame_len)
                x.audio = audioarray.trim(x.audio, x.fs, limits=[0, -N_pad], samples=True)
                x.audio = audioarray.trim(
                    x.audio, x.fs, limits=[0, -N_pad], samples=True
                )

        # get the number of frames (multiple of 20ms)
        N_frames = int(len(x.audio) / frame_len)
@@ -385,8 +397,12 @@ def generate_ismN_scene(

    # append pre-amble and post-amble
    if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
        preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
        postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
        preamble = int(
            np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len
        )  # convert to samples and ensure multiple of 20ms
        postamble = int(
            np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len
        )  # convert to samples and ensure multiple of 20ms
        if preamble != 0 or postamble != 0:
            logger.info(
                f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds"
@@ -407,7 +423,9 @@ def generate_ismN_scene(
    else:
        # do not change the length of the audio signal
        duration = len(y.audio)
    duration = int(np.floor(duration / frame_len) * frame_len)  # ensure multiple of 20ms
    duration = int(
        np.floor(duration / frame_len) * frame_len
    )  # ensure multiple of 20ms
    if len(y.audio) != duration:
        metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True)

@@ -430,7 +448,9 @@ def generate_ismN_scene(
        binaural_output_filename = output_filename.with_name(
            output_filename.stem + "_BINAURAL" + output_filename.suffix
        )
        logger.info(f"-- Converting to BINAURAL output file: {binaural_output_filename}")
        logger.info(
            f"-- Converting to BINAURAL output file: {binaural_output_filename}"
        )
        binaudio = audio.fromtype("BINAURAL")
        binaudio.fs = y.fs
        convert_objectbased(y, binaudio)
+30 −10
Original line number Diff line number Diff line
@@ -203,14 +203,18 @@ def generate_OMASA_scene(
        # read azimuth and elevation information
        if "azimuth" in scene.keys():
            source_azi = (
                scene["azimuth"][i] if isinstance(scene["azimuth"], list) else scene["azimuth"]
                scene["azimuth"][i]
                if isinstance(scene["azimuth"], list)
                else scene["azimuth"]
            )
        else:
            source_azi = 0.0

        if "elevation" in scene.keys():
            source_ele = (
                scene["elevation"][i] if isinstance(scene["elevation"], list) else scene["elevation"]
                scene["elevation"][i]
                if isinstance(scene["elevation"], list)
                else scene["elevation"]
            )
        else:
            source_ele = 0.0
@@ -218,7 +222,9 @@ def generate_OMASA_scene(
        # read the source shift length (in seconds)
        if "shift" in scene.keys():
            source_shift = (
                scene["shift"][i] if isinstance(scene["shift"], list) else scene["shift"]
                scene["shift"][i]
                if isinstance(scene["shift"], list)
                else scene["shift"]
            )
        else:
            source_shift = 0.0
@@ -241,7 +247,9 @@ def generate_OMASA_scene(
        else:
            level = -26

        logger.info(f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LUFS with shift of {source_shift_in_seconds} seconds")
        logger.info(
            f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LUFS with shift of {source_shift_in_seconds} seconds"
        )

        # get the number of channels from the .wav file header
        wav_header = audiofile.parse_wave_header(input_filename)
@@ -286,7 +294,9 @@ def generate_OMASA_scene(
            # pad with zeros to ensure that the signal length is a multiple of 20ms
            if len(x.audio) % frame_len != 0:
                N_pad = int(frame_len - len(x.audio) % frame_len)
                x.audio = audioarray.trim(x.audio, x.fs, limits=[0, -N_pad], samples=True)
                x.audio = audioarray.trim(
                    x.audio, x.fs, limits=[0, -N_pad], samples=True
                )

        # get the number of frames (multiple of 20ms)
        N_frames = int(len(x.audio) / frame_len)
@@ -421,12 +431,18 @@ def generate_OMASA_scene(
            y.object_pos.extend(x.object_pos)

            # add ISM metadata .csv filename (should end with .wav.0.csv, .wav.1.csv, ...)
            y.metadata_files.insert(i - 1, str(output_filename.with_suffix(f".{i - 1}.csv")))
            y.metadata_files.insert(
                i - 1, str(output_filename.with_suffix(f".{i - 1}.csv"))
            )

    # append pre-amble and post-amble
    if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
        preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
        postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
        preamble = int(
            np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len
        )  # convert to samples and ensure multiple of 20ms
        postamble = int(
            np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len
        )  # convert to samples and ensure multiple of 20ms
        if preamble != 0 or postamble != 0:
            logger.info(
                f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds"
@@ -447,7 +463,9 @@ def generate_OMASA_scene(
    else:
        # do not change the length of the audio signal
        duration = len(y.audio)
    duration = int(np.floor(duration / frame_len) * frame_len)  # ensure multiple of 20ms
    duration = int(
        np.floor(duration / frame_len) * frame_len
    )  # ensure multiple of 20ms
    if len(y.audio) != duration:
        metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True)

@@ -470,7 +488,9 @@ def generate_OMASA_scene(
        binaural_output_filename = output_filename.with_name(
            output_filename.stem + "_BINAURAL" + output_filename.suffix
        )
        logger.info(f"-- Converting to BINAURAL output file: {binaural_output_filename}")
        logger.info(
            f"-- Converting to BINAURAL output file: {binaural_output_filename}"
        )
        binaudio = audio.fromtype("BINAURAL")
        binaudio.fs = y.fs
        convert_omasa(y, binaudio)
+24 −8
Original line number Diff line number Diff line
@@ -194,7 +194,9 @@ def generate_OSBA_scene(
        )

        # get input filename
        input_filename = Path(source_file).parent / (cfg.use_input_prefix + Path(source_file).name)
        input_filename = Path(source_file).parent / (
            cfg.use_input_prefix + Path(source_file).name
        )

        # read azimuth and elevation information
        source_azi = (
@@ -236,7 +238,9 @@ def generate_OSBA_scene(
        else:
            level = -26

        logger.info(f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LUFS with shift of {source_shift_in_seconds} seconds")
        logger.info(
            f"-- Encoding {source_file} at position(s) {source_azi},{source_ele} at {level} LUFS with shift of {source_shift_in_seconds} seconds"
        )

        # get the number of channels from the .wav file header
        wav_header = audiofile.parse_wave_header(input_filename)
@@ -281,7 +285,9 @@ def generate_OSBA_scene(
            # pad with zeros to ensure that the signal length is a multiple of 20ms
            if len(x.audio) % frame_len != 0:
                N_pad = int(frame_len - len(x.audio) % frame_len)
                x.audio = audioarray.trim(x.audio, x.fs, limits=[0, -N_pad], samples=True)
                x.audio = audioarray.trim(
                    x.audio, x.fs, limits=[0, -N_pad], samples=True
                )

        # get the number of frames (multiple of 20ms)
        N_frames = int(len(x.audio) / frame_len)
@@ -403,12 +409,18 @@ def generate_OSBA_scene(
            y.object_pos.extend(x.object_pos)

            # add ISM metadata .csv filename (should end with .wav.0.csv, .wav.1.csv, ...)
            y.metadata_files.insert(i - 1, str(output_filename.with_suffix(f".{i - 1}.csv")))
            y.metadata_files.insert(
                i - 1, str(output_filename.with_suffix(f".{i - 1}.csv"))
            )

    # append pre-amble and post-amble
    if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
        preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
        postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
        preamble = int(
            np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len
        )  # convert to samples and ensure multiple of 20ms
        postamble = int(
            np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len
        )  # convert to samples and ensure multiple of 20ms
        if preamble != 0 or postamble != 0:
            logger.info(
                f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds"
@@ -429,7 +441,9 @@ def generate_OSBA_scene(
    else:
        # do not change the length of the audio signal
        duration = len(y.audio)
    duration = int(np.floor(duration / frame_len) * frame_len)  # ensure multiple of 20ms
    duration = int(
        np.floor(duration / frame_len) * frame_len
    )  # ensure multiple of 20ms
    if len(y.audio) != duration:
        metadata.trim_meta(y, limits=[0, len(y.audio) - duration], samples=True)

@@ -452,7 +466,9 @@ def generate_OSBA_scene(
        binaural_output_filename = output_filename.with_name(
            output_filename.stem + "_BINAURAL" + output_filename.suffix
        )
        logger.info(f"-- Converting to BINAURAL output file: {binaural_output_filename}")
        logger.info(
            f"-- Converting to BINAURAL output file: {binaural_output_filename}"
        )
        binaudio = audio.fromtype("BINAURAL")
        binaudio.fs = y.fs
        convert_osba(y, binaudio)
+40 −16
Original line number Diff line number Diff line
@@ -211,12 +211,12 @@ def generate_stereo_scene(
        source_file = (
            scene["input"][i] if isinstance(scene["input"], list) else scene["input"]
        )
        IR_file = (
            scene["IR"][i] if isinstance(scene["IR"], list) else scene["IR"]
        )
        IR_file = scene["IR"][i] if isinstance(scene["IR"], list) else scene["IR"]

        # get input filename and IR filename
        input_filename = Path(source_file).parent / (cfg.use_input_prefix + Path(source_file).name)
        input_filename = Path(source_file).parent / (
            cfg.use_input_prefix + Path(source_file).name
        )
        IR_filename = Path(IR_file).parent / (cfg.use_IR_prefix + Path(IR_file).name)

        # read the overlap length
@@ -247,7 +247,9 @@ def generate_stereo_scene(
        else:
            level = -26

        logger.info(f"-- Convolving {source_file} with {IR_file} at {level} LUFS with shift of {source_shift_in_seconds} seconds")
        logger.info(
            f"-- Convolving {source_file} with {IR_file} at {level} LUFS with shift of {source_shift_in_seconds} seconds"
        )

        # read source file
        x = audio.fromfile("MONO", input_filename)
@@ -275,7 +277,9 @@ def generate_stereo_scene(
            # pad with zeros to ensure that the signal length is a multiple of 20ms
            if len(x.audio) % frame_len != 0:
                N_pad = int(frame_len - len(x.audio) % frame_len)
                x.audio = audioarray.trim(x.audio, x.fs, limits=[0, -N_pad], samples=True)
                x.audio = audioarray.trim(
                    x.audio, x.fs, limits=[0, -N_pad], samples=True
                )

        # add the convolved STEREO audio source signal to the output signal
        if y.audio is None:
@@ -284,7 +288,9 @@ def generate_stereo_scene(

            if source_shift < 0:
                # insert zeros to the new audio source signal to shift it right
                y.audio = audioarray.trim(y.audio, x.fs, limits=[source_shift, 0], samples=True)
                y.audio = audioarray.trim(
                    y.audio, x.fs, limits=[source_shift, 0], samples=True
                )
            else:
                offset = source_shift
        else:
@@ -292,33 +298,47 @@ def generate_stereo_scene(
            delta_offset = source_shift - offset
            if delta_offset > 0:
                # insert zeros to the existing output signal to shift it right
                y.audio = audioarray.trim(y.audio, y.fs, limits=[0, -delta_offset], samples=True)
                y.audio = audioarray.trim(
                    y.audio, y.fs, limits=[0, -delta_offset], samples=True
                )
                offset = source_shift
            else:
                # insert zeros to the new audio source signal to shift it right
                x.audio = audioarray.trim(x.audio, x.fs, limits=[0, delta_offset], samples=True)
                x.audio = audioarray.trim(
                    x.audio, x.fs, limits=[0, delta_offset], samples=True
                )

            # adjust the length of the audio source signal
            delta_length = len(x.audio) - len(y.audio)
            if delta_length > 0:
                # pad zeros to the existing output signal
                y.audio = audioarray.trim(y.audio, y.fs, limits=[0, -delta_length], samples=True)
                y.audio = audioarray.trim(
                    y.audio, y.fs, limits=[0, -delta_length], samples=True
                )
            else:
                # pad zeros to the new audio source signal
                x.audio = audioarray.trim(x.audio, x.fs, limits=[0, delta_length], samples=True)
                x.audio = audioarray.trim(
                    x.audio, x.fs, limits=[0, delta_length], samples=True
                )

            # superimpose
            y.audio += x.audio

    # append pre-amble and post-amble
    if "preamble" in cfg.__dict__ or "postamble" in cfg.__dict__:
        preamble = int(np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
        postamble = int(np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len)  # convert to samples and ensure multiple of 20ms
        preamble = int(
            np.floor(cfg.preamble * cfg.fs / frame_len) * frame_len
        )  # convert to samples and ensure multiple of 20ms
        postamble = int(
            np.floor(cfg.postamble * cfg.fs / frame_len) * frame_len
        )  # convert to samples and ensure multiple of 20ms
        if preamble != 0 or postamble != 0:
            logger.info(
                f"-- Adding pre-amble of {cfg.preamble} seconds and post-amble of {cfg.postamble} seconds"
            )
            y.audio = audioarray.trim(y.audio, y.fs, limits=[-preamble, -postamble], samples=True)
            y.audio = audioarray.trim(
                y.audio, y.fs, limits=[-preamble, -postamble], samples=True
            )

    # add random noise
    if "add_low_level_random_noise" in cfg.__dict__ and cfg.add_low_level_random_noise:
@@ -334,9 +354,13 @@ def generate_stereo_scene(
    else:
        # do not change the length of the audio signal
        duration = len(y.audio)
    duration = int(np.floor(duration / frame_len) * frame_len)  # ensure multiple of 20ms
    duration = int(
        np.floor(duration / frame_len) * frame_len
    )  # ensure multiple of 20ms
    if len(y.audio) != duration:
        y.audio = audioarray.trim(y.audio, y.fs, limits=[0, len(y.audio) - duration], samples=True)
        y.audio = audioarray.trim(
            y.audio, y.fs, limits=[0, len(y.audio) - duration], samples=True
        )

    # adjust the loudness of the output signal
    if "loudness" in cfg.__dict__: