Commit 5abb8af5 authored by Anika Treffehn's avatar Anika Treffehn
Browse files

Merge branch 'fix_ISM_different_number_metadata_columns' into 'main'

adapted ISM metadata to 8 columns and enabled pass through to encoder

See merge request !101
parents 7de4ac6b 7d1faf1d
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -37,9 +37,10 @@ stages:
  # NOTE: CODEC_DIR has to be in PATH
  - cd $CODEC_DIR
  # make sure that we are at latest main
  # TODO: temporarily use the RC1a tag
  # TODO: temporarily use the RC1b tag
  - git restore .
  - git checkout 20230511-RC1a-listening-tests
  - git fetch
  - git checkout 20230516-RC1b-listening-tests
  - echo "--------------------------------------------"
  - echo "Building codec on commit $(git rev-parse HEAD --short)"
  - echo "--------------------------------------------"
+16 −10
Original line number Diff line number Diff line
@@ -42,8 +42,10 @@ from ivas_processing_scripts.audiotools.constants import (
    BINAURAL_AUDIO_FORMATS,
    CHANNEL_BASED_AUDIO_ALTNAMES,
    CHANNEL_BASED_AUDIO_FORMATS,
    DEFAULT_ISM_METADATA,
    IVAS_FRAME_LEN_MS,
    METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS,
    NUMBER_COLUMNS_ISM_METADATA,
    OBJECT_BASED_AUDIO_FORMATS,
    SCENE_BASED_AUDIO_FORMATS,
)
@@ -295,6 +297,7 @@ class ObjectBasedAudio(Audio):
        return obj

    def init_metadata(self):
        # check if number of metadata files matches format
        if self.audio.shape[1] != len(self.metadata_files):
            raise ValueError(
                f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]"
@@ -305,15 +308,18 @@ class ObjectBasedAudio(Audio):
            pos = np.genfromtxt(f, delimiter=",")

            # check if metadata has right number of columns
            if pos.shape[1] < 5:
                raise ValueError("Metadata incomplete. Columns are missing.")
            elif pos.shape[1] > 5:
                if pos.shape[1] <= 8:
                    # TODO: FIXME
                    pos = pos[:, :5]
                else:
            num_columns = pos.shape[1]
            if num_columns < 2:
                raise ValueError(
                        "Too many columns in metadata (possibly old version with frame index used)"
                    "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory."
                )
            elif num_columns > NUMBER_COLUMNS_ISM_METADATA:
                raise ValueError("Too many columns in metadata")

            # pad metadata to max number of columns
            if num_columns < NUMBER_COLUMNS_ISM_METADATA:
                pos = np.hstack(
                    [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])]
                )

            # check if metadata is longer than file -> cut off
+13 −0
Original line number Diff line number Diff line
@@ -703,3 +703,16 @@ DELAY_COMPENSATION_FOR_FILTERING = {
    "HP50_32KHZ": 559,
    "HP50_48KHZ": 839,
}

DEFAULT_ISM_METADATA = [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0]
FORMAT_ISM_METADATA_CSV = [
    "%+07.2f",
    "%+06.2f",
    "%05.2f",
    "%06.2f",
    "%04.2f",
    "%+07.2f",
    "%+06.2f",
    "%1.0f",
]
NUMBER_COLUMNS_ISM_METADATA = len(DEFAULT_ISM_METADATA)
+50 −13
Original line number Diff line number Diff line
@@ -40,7 +40,12 @@ from ivas_processing_scripts.audiotools import audio
from ivas_processing_scripts.audiotools.audio import fromtype
from ivas_processing_scripts.audiotools.audioarray import trim
from ivas_processing_scripts.audiotools.audiofile import read
from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS
from ivas_processing_scripts.audiotools.constants import (
    DEFAULT_ISM_METADATA,
    FORMAT_ISM_METADATA_CSV,
    IVAS_FRAME_LEN_MS,
    NUMBER_COLUMNS_ISM_METADATA,
)


class Metadata:
@@ -201,16 +206,15 @@ def write_ISM_metadata_in_file(

    for i, csv_file in enumerate(file_names):
        number_frames = metadata[i].shape[0]
        number_columns = metadata[i].shape[1]
        with open(csv_file, "w", newline="") as file:
            writer = csv.writer(file)
            for k in range(number_frames):
                row_list = [
                    "%+07.2f" % np.round(metadata[i][k, 0], 2),
                    "%+06.2f" % np.round(metadata[i][k, 1], 2),
                    "01.00",
                    "000.00",
                    "1.00",
                ]
                row_list = []
                for p in range(number_columns):
                    row_list.append(
                        FORMAT_ISM_METADATA_CSV[p] % np.round(metadata[i][k, p], 2)
                    )
                writer.writerow(row_list)

    return file_names
@@ -374,7 +378,7 @@ def concat_meta_from_file(

    # add preamble
    if preamble:
        concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble)
        concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble, 0)

    write_ISM_metadata_in_file(concat_meta_all_obj, out_file)

@@ -544,8 +548,10 @@ def metadata_search(
def add_remove_preamble(
    metadata,
    preamble,
    postamble,
    add: Optional[bool] = True,
):
    # preamble
    preamble_frames = preamble / IVAS_FRAME_LEN_MS
    if not preamble_frames.is_integer():
        raise ValueError(
@@ -555,18 +561,49 @@ def add_remove_preamble(
    for obj_idx in range(len(metadata)):
        if metadata is not None and metadata[obj_idx] is not None:
            if add:
                num_columns = metadata[obj_idx].shape[1]
                metadata[obj_idx] = np.vstack(
                    [
                        np.repeat(
                            np.array(DEFAULT_ISM_METADATA)[None, :num_columns],
                            preamble_frames,
                            0,
                        ),
                        metadata[obj_idx],
                    ]
                )
            else:
                metadata[obj_idx] = trim(
                    metadata[obj_idx],
                    limits=(-int(preamble_frames), 0),
                    limits=(int(preamble_frames), 0),
                    samples=True,
                )

                # add radius 1
                metadata[obj_idx][: int(preamble_frames), 2] = 1
    # postamble
    postamble_frames = postamble / IVAS_FRAME_LEN_MS
    if not postamble_frames.is_integer():
        raise ValueError(
            f"Application of postamble for ISM metadata is only possible if postamble length is multiple of frame length. "
            f"Frame length: {IVAS_FRAME_LEN_MS}ms"
        )
    for obj_idx in range(len(metadata)):
        if metadata is not None and metadata[obj_idx] is not None:
            if add:
                num_columns = metadata[obj_idx].shape[1]
                metadata[obj_idx] = np.vstack(
                    [
                        metadata[obj_idx],
                        np.repeat(
                            np.array(DEFAULT_ISM_METADATA)[None, :num_columns],
                            postamble_frames,
                            0,
                        ),
                    ]
                )
            else:
                metadata[obj_idx] = trim(
                    metadata[obj_idx],
                    limits=(int(preamble_frames), 0),
                    limits=(0, int(postamble_frames)),
                    samples=True,
                )

+13 −6
Original line number Diff line number Diff line
@@ -64,8 +64,7 @@ class Preprocessing2(Processing):
            self.in_fmt, in_file, fs=self.in_fs, in_meta=in_meta
        )

        # add preamble
        # also apply preamble to ISM metadata
        # modify ISM metadata
        if self.in_fmt.startswith("ISM"):
            if not self.preamble:
                preamble = 0
@@ -75,16 +74,24 @@ class Preprocessing2(Processing):
            # read out old
            metadata = audio_object.object_pos

            # modify metadata
            metadata = add_remove_preamble(metadata, preamble)
            # add preamble
            metadata = add_remove_preamble(metadata, preamble, 0)

            # repeat signal
            if self.repeat_signal:
                metadata = [np.concatenate((m, m), axis=0) for m in metadata]

            # add postable
            if self.postamble:
                metadata = add_remove_preamble(metadata, 0, self.postamble)

            meta_files = write_ISM_metadata_in_file(metadata, [out_file], True)

            # modify audio object
            audio_object.metadata_files = meta_files
            audio_object.obect_pos = metadata
            audio_object.object_pos = metadata

        # modify audio signal
        # add preamble
        if self.preamble > 0:
            logger.debug(f"Add preamble of length {self.preamble}ms")
@@ -111,7 +118,7 @@ class Preprocessing2(Processing):
                (audio_object.audio, audio_object.audio), axis=0
            )

        # add postamble - do ater signal repetition as this is just for ensuring equal lengths between in- and output signals
        # add postamble - do alter signal repetition as this is just for ensuring equal lengths between in- and output signals
        if self.postamble > 0:
            logger.debug(f"Add postamble of length {self.postamble}ms")
            audio_object.audio = trim(
Loading