Commit ff88325e authored by Anika Treffehn's avatar Anika Treffehn
Browse files

adapted ISM metadata to 8 columns and enabled pass through to encoder

parent 5d9dd9d6
Loading
Loading
Loading
Loading
Loading
+14 −10
Original line number Diff line number Diff line
@@ -46,6 +46,8 @@ from ivas_processing_scripts.audiotools.constants import (
    METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS,
    OBJECT_BASED_AUDIO_FORMATS,
    SCENE_BASED_AUDIO_FORMATS,
    NUMBER_COLUMNS_ISM_METADATA,
    DEFAULT_ISM_METADATA
)

from .EFAP import wrap_angles
@@ -295,6 +297,7 @@ class ObjectBasedAudio(Audio):
        return obj

    def init_metadata(self):
        # check if number of metadata files matches format
        if self.audio.shape[1] != len(self.metadata_files):
            raise ValueError(
                f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]"
@@ -305,17 +308,18 @@ class ObjectBasedAudio(Audio):
            pos = np.genfromtxt(f, delimiter=",")

            # check if metadata has right number of columns
            if pos.shape[1] < 5:
                raise ValueError("Metadata incomplete. Columns are missing.")
            elif pos.shape[1] > 5:
                if pos.shape[1] <= 8:
                    # TODO: FIXME
                    pos = pos[:, :5]
                else:
            num_columns = pos.shape[1]
            if num_columns < 2:
                raise ValueError("Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory.")
            elif num_columns > NUMBER_COLUMNS_ISM_METADATA:
                raise ValueError(
                        "Too many columns in metadata (possibly old version with frame index used)"
                    "Too many columns in metadata"
                )

            # pad metadata to max number of columns
            if num_columns < NUMBER_COLUMNS_ISM_METADATA:
                pos = np.hstack([pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])])

            # check if metadata is longer than file -> cut off
            num_frames = int(
                np.ceil(self.audio.shape[0] / (self.fs * IVAS_FRAME_LEN_MS / 1000))
+4 −0
Original line number Diff line number Diff line
@@ -703,3 +703,7 @@ DELAY_COMPENSATION_FOR_FILTERING = {
    "HP50_32KHZ": 559,
    "HP50_48KHZ": 839,
}

DEFAULT_ISM_METADATA = [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0]
FORMAT_ISM_METADATA_CSV = ["%+07.2f", "%+06.2f", "%05.2f", "%06.2f", "%04.2f", "%+07.2f", "%+06.2f", "%1.0f"]
NUMBER_COLUMNS_ISM_METADATA = len(DEFAULT_ISM_METADATA)
+4 −8
Original line number Diff line number Diff line
@@ -40,7 +40,7 @@ from ivas_processing_scripts.audiotools import audio
from ivas_processing_scripts.audiotools.audio import fromtype
from ivas_processing_scripts.audiotools.audioarray import trim
from ivas_processing_scripts.audiotools.audiofile import read
from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS
from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS, FORMAT_ISM_METADATA_CSV, NUMBER_COLUMNS_ISM_METADATA


class Metadata:
@@ -204,13 +204,9 @@ def write_ISM_metadata_in_file(
        with open(csv_file, "w", newline="") as file:
            writer = csv.writer(file)
            for k in range(number_frames):
                row_list = [
                    "%+07.2f" % np.round(metadata[i][k, 0], 2),
                    "%+06.2f" % np.round(metadata[i][k, 1], 2),
                    "01.00",
                    "000.00",
                    "1.00",
                ]
                row_list = []
                for p in range(NUMBER_COLUMNS_ISM_METADATA):
                    row_list.append(FORMAT_ISM_METADATA_CSV[p] % np.round(metadata[i][k, p], 2))
                writer.writerow(row_list)

    return file_names