Loading ivas_processing_scripts/audiotools/audio.py +8 −6 Original line number Diff line number Diff line Loading @@ -42,12 +42,12 @@ from ivas_processing_scripts.audiotools.constants import ( BINAURAL_AUDIO_FORMATS, CHANNEL_BASED_AUDIO_ALTNAMES, CHANNEL_BASED_AUDIO_FORMATS, DEFAULT_ISM_METADATA, IVAS_FRAME_LEN_MS, METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS, NUMBER_COLUMNS_ISM_METADATA, OBJECT_BASED_AUDIO_FORMATS, SCENE_BASED_AUDIO_FORMATS, NUMBER_COLUMNS_ISM_METADATA, DEFAULT_ISM_METADATA ) from .EFAP import wrap_angles Loading Loading @@ -310,15 +310,17 @@ class ObjectBasedAudio(Audio): # check if metadata has right number of columns num_columns = pos.shape[1] if num_columns < 2: raise ValueError("Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory.") elif num_columns > NUMBER_COLUMNS_ISM_METADATA: raise ValueError( "Too many columns in metadata" "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." ) elif num_columns > NUMBER_COLUMNS_ISM_METADATA: raise ValueError("Too many columns in metadata") # pad metadata to max number of columns if num_columns < NUMBER_COLUMNS_ISM_METADATA: pos = np.hstack([pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])]) pos = np.hstack( [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] ) # check if metadata is longer than file -> cut off num_frames = int( Loading ivas_processing_scripts/audiotools/constants.py +10 −1 Original line number Diff line number Diff line Loading @@ -705,5 +705,14 @@ DELAY_COMPENSATION_FOR_FILTERING = { } DEFAULT_ISM_METADATA = [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0] FORMAT_ISM_METADATA_CSV = ["%+07.2f", "%+06.2f", "%05.2f", "%06.2f", "%04.2f", "%+07.2f", "%+06.2f", "%1.0f"] FORMAT_ISM_METADATA_CSV = [ "%+07.2f", "%+06.2f", "%05.2f", "%06.2f", "%04.2f", "%+07.2f", "%+06.2f", "%1.0f", ] NUMBER_COLUMNS_ISM_METADATA = len(DEFAULT_ISM_METADATA) ivas_processing_scripts/audiotools/metadata.py +50 −5 Original line number Diff line number Diff line Loading @@ -40,7 +40,12 @@ from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.audio import fromtype from ivas_processing_scripts.audiotools.audioarray import trim from ivas_processing_scripts.audiotools.audiofile import read from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS, FORMAT_ISM_METADATA_CSV, NUMBER_COLUMNS_ISM_METADATA, DEFAULT_ISM_METADATA from ivas_processing_scripts.audiotools.constants import ( DEFAULT_ISM_METADATA, FORMAT_ISM_METADATA_CSV, IVAS_FRAME_LEN_MS, NUMBER_COLUMNS_ISM_METADATA, ) class Metadata: Loading Loading @@ -201,12 +206,15 @@ def write_ISM_metadata_in_file( for i, csv_file in enumerate(file_names): number_frames = metadata[i].shape[0] number_columns = metadata[i].shape[1] with open(csv_file, "w", newline="") as file: writer = csv.writer(file) for k in range(number_frames): row_list = [] for p in range(NUMBER_COLUMNS_ISM_METADATA): row_list.append(FORMAT_ISM_METADATA_CSV[p] % np.round(metadata[i][k, p], 2)) for p in range(number_columns): row_list.append( FORMAT_ISM_METADATA_CSV[p] % np.round(metadata[i][k, p], 2) ) writer.writerow(row_list) return file_names Loading Loading @@ -370,7 +378,7 @@ def concat_meta_from_file( # add preamble if preamble: concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble) concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble, 0) write_ISM_metadata_in_file(concat_meta_all_obj, out_file) Loading Loading @@ -540,8 +548,10 @@ def metadata_search( def add_remove_preamble( metadata, preamble, postamble, add: Optional[bool] = True, ): # preamble preamble_frames = preamble / IVAS_FRAME_LEN_MS if not preamble_frames.is_integer(): raise ValueError( Loading @@ -553,7 +563,14 @@ def add_remove_preamble( if add: num_columns = metadata[obj_idx].shape[1] metadata[obj_idx] = np.vstack( [np.repeat(np.array(DEFAULT_ISM_METADATA)[None, :num_columns], preamble_frames, 0), metadata[obj_idx]] [ np.repeat( np.array(DEFAULT_ISM_METADATA)[None, :num_columns], preamble_frames, 0, ), metadata[obj_idx], ] ) else: metadata[obj_idx] = trim( Loading @@ -562,4 +579,32 @@ def add_remove_preamble( samples=True, ) # postamble postamble_frames = postamble / IVAS_FRAME_LEN_MS if not postamble_frames.is_integer(): raise ValueError( f"Application of postamble for ISM metadata is only possible if postamble length is multiple of frame length. " f"Frame length: {IVAS_FRAME_LEN_MS}ms" ) for obj_idx in range(len(metadata)): if metadata is not None and metadata[obj_idx] is not None: if add: num_columns = metadata[obj_idx].shape[1] metadata[obj_idx] = np.vstack( [ metadata[obj_idx], np.repeat( np.array(DEFAULT_ISM_METADATA)[None, :num_columns], postamble_frames, 0, ), ] ) else: metadata[obj_idx] = trim( metadata[obj_idx], limits=(0, int(postamble_frames)), samples=True, ) return metadata ivas_processing_scripts/processing/preprocessing_2.py +13 −6 Original line number Diff line number Diff line Loading @@ -64,8 +64,7 @@ class Preprocessing2(Processing): self.in_fmt, in_file, fs=self.in_fs, in_meta=in_meta ) # add preamble # also apply preamble to ISM metadata # modify ISM metadata if self.in_fmt.startswith("ISM"): if not self.preamble: preamble = 0 Loading @@ -75,16 +74,24 @@ class Preprocessing2(Processing): # read out old metadata = audio_object.object_pos # modify metadata metadata = add_remove_preamble(metadata, preamble) # add preamble metadata = add_remove_preamble(metadata, preamble, 0) # repeat signal if self.repeat_signal: metadata = [np.concatenate((m, m), axis=0) for m in metadata] # add postable if self.postamble: metadata = add_remove_preamble(metadata, 0, self.postamble) meta_files = write_ISM_metadata_in_file(metadata, [out_file], True) # modify audio object audio_object.metadata_files = meta_files audio_object.obect_pos = metadata audio_object.object_pos = metadata # modify audio signal # add preamble if self.preamble > 0: logger.debug(f"Add preamble of length {self.preamble}ms") Loading @@ -111,7 +118,7 @@ class Preprocessing2(Processing): (audio_object.audio, audio_object.audio), axis=0 ) # add postamble - do ater signal repetition as this is just for ensuring equal lengths between in- and output signals # add postamble - do alter signal repetition as this is just for ensuring equal lengths between in- and output signals if self.postamble > 0: logger.debug(f"Add postamble of length {self.postamble}ms") audio_object.audio = trim( Loading ivas_processing_scripts/processing/processing.py +19 −12 Original line number Diff line number Diff line Loading @@ -407,28 +407,36 @@ def remove_pre_and_postamble( ): # remove preamble for ISM metadata if out_fmt.startswith("ISM"): # remove postamble if postamble_len_ms: meta = add_remove_preamble(meta, 0, postamble_len_ms, add=False) # cut first half of the metadata if repeat_signal: meta = [m[int(len(m) / 2) :, :] for m in meta] # remove preamble if preamble_len_ms > 0: meta = add_remove_preamble(meta, preamble_len_ms, add=False) meta = add_remove_preamble(meta, preamble_len_ms, 0, add=False) # get number of samples to cut from start trim_len_samples = (preamble_len_ms * fs) // 1000 # remove postamble if postamble_len_ms: if logger: logger.debug("Remove postamble") postamble_len_samples = (postamble_len_ms * fs) // 1000 x = trim(x, fs, (0, postamble_len_samples), samples=True) # cut first half of signal if repeat_signal: if logger: logger.debug("Remove first half of signal") x = x[len(x) // 2 :, :] # need to subtract the postamble length before getting half of signal length - it was added after concatenation trim_len_samples += (len(x) - postamble_len_samples) // 2 if trim_len_samples > 0 and logger: # remove preamble if preamble_len_ms: if logger: logger.debug("Remove preamble") x = trim(x, fs, (trim_len_samples, postamble_len_samples), samples=True) x = trim(x, fs, ((preamble_len_ms * fs) // 1000, 0), samples=True) return x, meta Loading Loading @@ -464,4 +472,3 @@ def preprocess_background_noise(cfg): ] = output_audio return Loading
ivas_processing_scripts/audiotools/audio.py +8 −6 Original line number Diff line number Diff line Loading @@ -42,12 +42,12 @@ from ivas_processing_scripts.audiotools.constants import ( BINAURAL_AUDIO_FORMATS, CHANNEL_BASED_AUDIO_ALTNAMES, CHANNEL_BASED_AUDIO_FORMATS, DEFAULT_ISM_METADATA, IVAS_FRAME_LEN_MS, METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS, NUMBER_COLUMNS_ISM_METADATA, OBJECT_BASED_AUDIO_FORMATS, SCENE_BASED_AUDIO_FORMATS, NUMBER_COLUMNS_ISM_METADATA, DEFAULT_ISM_METADATA ) from .EFAP import wrap_angles Loading Loading @@ -310,15 +310,17 @@ class ObjectBasedAudio(Audio): # check if metadata has right number of columns num_columns = pos.shape[1] if num_columns < 2: raise ValueError("Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory.") elif num_columns > NUMBER_COLUMNS_ISM_METADATA: raise ValueError( "Too many columns in metadata" "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." ) elif num_columns > NUMBER_COLUMNS_ISM_METADATA: raise ValueError("Too many columns in metadata") # pad metadata to max number of columns if num_columns < NUMBER_COLUMNS_ISM_METADATA: pos = np.hstack([pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])]) pos = np.hstack( [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] ) # check if metadata is longer than file -> cut off num_frames = int( Loading
ivas_processing_scripts/audiotools/constants.py +10 −1 Original line number Diff line number Diff line Loading @@ -705,5 +705,14 @@ DELAY_COMPENSATION_FOR_FILTERING = { } DEFAULT_ISM_METADATA = [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0] FORMAT_ISM_METADATA_CSV = ["%+07.2f", "%+06.2f", "%05.2f", "%06.2f", "%04.2f", "%+07.2f", "%+06.2f", "%1.0f"] FORMAT_ISM_METADATA_CSV = [ "%+07.2f", "%+06.2f", "%05.2f", "%06.2f", "%04.2f", "%+07.2f", "%+06.2f", "%1.0f", ] NUMBER_COLUMNS_ISM_METADATA = len(DEFAULT_ISM_METADATA)
ivas_processing_scripts/audiotools/metadata.py +50 −5 Original line number Diff line number Diff line Loading @@ -40,7 +40,12 @@ from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.audio import fromtype from ivas_processing_scripts.audiotools.audioarray import trim from ivas_processing_scripts.audiotools.audiofile import read from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS, FORMAT_ISM_METADATA_CSV, NUMBER_COLUMNS_ISM_METADATA, DEFAULT_ISM_METADATA from ivas_processing_scripts.audiotools.constants import ( DEFAULT_ISM_METADATA, FORMAT_ISM_METADATA_CSV, IVAS_FRAME_LEN_MS, NUMBER_COLUMNS_ISM_METADATA, ) class Metadata: Loading Loading @@ -201,12 +206,15 @@ def write_ISM_metadata_in_file( for i, csv_file in enumerate(file_names): number_frames = metadata[i].shape[0] number_columns = metadata[i].shape[1] with open(csv_file, "w", newline="") as file: writer = csv.writer(file) for k in range(number_frames): row_list = [] for p in range(NUMBER_COLUMNS_ISM_METADATA): row_list.append(FORMAT_ISM_METADATA_CSV[p] % np.round(metadata[i][k, p], 2)) for p in range(number_columns): row_list.append( FORMAT_ISM_METADATA_CSV[p] % np.round(metadata[i][k, p], 2) ) writer.writerow(row_list) return file_names Loading Loading @@ -370,7 +378,7 @@ def concat_meta_from_file( # add preamble if preamble: concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble) concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble, 0) write_ISM_metadata_in_file(concat_meta_all_obj, out_file) Loading Loading @@ -540,8 +548,10 @@ def metadata_search( def add_remove_preamble( metadata, preamble, postamble, add: Optional[bool] = True, ): # preamble preamble_frames = preamble / IVAS_FRAME_LEN_MS if not preamble_frames.is_integer(): raise ValueError( Loading @@ -553,7 +563,14 @@ def add_remove_preamble( if add: num_columns = metadata[obj_idx].shape[1] metadata[obj_idx] = np.vstack( [np.repeat(np.array(DEFAULT_ISM_METADATA)[None, :num_columns], preamble_frames, 0), metadata[obj_idx]] [ np.repeat( np.array(DEFAULT_ISM_METADATA)[None, :num_columns], preamble_frames, 0, ), metadata[obj_idx], ] ) else: metadata[obj_idx] = trim( Loading @@ -562,4 +579,32 @@ def add_remove_preamble( samples=True, ) # postamble postamble_frames = postamble / IVAS_FRAME_LEN_MS if not postamble_frames.is_integer(): raise ValueError( f"Application of postamble for ISM metadata is only possible if postamble length is multiple of frame length. " f"Frame length: {IVAS_FRAME_LEN_MS}ms" ) for obj_idx in range(len(metadata)): if metadata is not None and metadata[obj_idx] is not None: if add: num_columns = metadata[obj_idx].shape[1] metadata[obj_idx] = np.vstack( [ metadata[obj_idx], np.repeat( np.array(DEFAULT_ISM_METADATA)[None, :num_columns], postamble_frames, 0, ), ] ) else: metadata[obj_idx] = trim( metadata[obj_idx], limits=(0, int(postamble_frames)), samples=True, ) return metadata
ivas_processing_scripts/processing/preprocessing_2.py +13 −6 Original line number Diff line number Diff line Loading @@ -64,8 +64,7 @@ class Preprocessing2(Processing): self.in_fmt, in_file, fs=self.in_fs, in_meta=in_meta ) # add preamble # also apply preamble to ISM metadata # modify ISM metadata if self.in_fmt.startswith("ISM"): if not self.preamble: preamble = 0 Loading @@ -75,16 +74,24 @@ class Preprocessing2(Processing): # read out old metadata = audio_object.object_pos # modify metadata metadata = add_remove_preamble(metadata, preamble) # add preamble metadata = add_remove_preamble(metadata, preamble, 0) # repeat signal if self.repeat_signal: metadata = [np.concatenate((m, m), axis=0) for m in metadata] # add postable if self.postamble: metadata = add_remove_preamble(metadata, 0, self.postamble) meta_files = write_ISM_metadata_in_file(metadata, [out_file], True) # modify audio object audio_object.metadata_files = meta_files audio_object.obect_pos = metadata audio_object.object_pos = metadata # modify audio signal # add preamble if self.preamble > 0: logger.debug(f"Add preamble of length {self.preamble}ms") Loading @@ -111,7 +118,7 @@ class Preprocessing2(Processing): (audio_object.audio, audio_object.audio), axis=0 ) # add postamble - do ater signal repetition as this is just for ensuring equal lengths between in- and output signals # add postamble - do alter signal repetition as this is just for ensuring equal lengths between in- and output signals if self.postamble > 0: logger.debug(f"Add postamble of length {self.postamble}ms") audio_object.audio = trim( Loading
ivas_processing_scripts/processing/processing.py +19 −12 Original line number Diff line number Diff line Loading @@ -407,28 +407,36 @@ def remove_pre_and_postamble( ): # remove preamble for ISM metadata if out_fmt.startswith("ISM"): # remove postamble if postamble_len_ms: meta = add_remove_preamble(meta, 0, postamble_len_ms, add=False) # cut first half of the metadata if repeat_signal: meta = [m[int(len(m) / 2) :, :] for m in meta] # remove preamble if preamble_len_ms > 0: meta = add_remove_preamble(meta, preamble_len_ms, add=False) meta = add_remove_preamble(meta, preamble_len_ms, 0, add=False) # get number of samples to cut from start trim_len_samples = (preamble_len_ms * fs) // 1000 # remove postamble if postamble_len_ms: if logger: logger.debug("Remove postamble") postamble_len_samples = (postamble_len_ms * fs) // 1000 x = trim(x, fs, (0, postamble_len_samples), samples=True) # cut first half of signal if repeat_signal: if logger: logger.debug("Remove first half of signal") x = x[len(x) // 2 :, :] # need to subtract the postamble length before getting half of signal length - it was added after concatenation trim_len_samples += (len(x) - postamble_len_samples) // 2 if trim_len_samples > 0 and logger: # remove preamble if preamble_len_ms: if logger: logger.debug("Remove preamble") x = trim(x, fs, (trim_len_samples, postamble_len_samples), samples=True) x = trim(x, fs, ((preamble_len_ms * fs) // 1000, 0), samples=True) return x, meta Loading Loading @@ -464,4 +472,3 @@ def preprocess_background_noise(cfg): ] = output_audio return