Loading .gitlab-ci.yml +3 −2 Original line number Diff line number Diff line Loading @@ -37,9 +37,10 @@ stages: # NOTE: CODEC_DIR has to be in PATH - cd $CODEC_DIR # make sure that we are at latest main # TODO: temporarily use the RC1a tag # TODO: temporarily use the RC1b tag - git restore . - git checkout 20230511-RC1a-listening-tests - git fetch - git checkout 20230516-RC1b-listening-tests - echo "--------------------------------------------" - echo "Building codec on commit $(git rev-parse HEAD --short)" - echo "--------------------------------------------" Loading ivas_processing_scripts/audiotools/audio.py +16 −10 Original line number Diff line number Diff line Loading @@ -42,8 +42,10 @@ from ivas_processing_scripts.audiotools.constants import ( BINAURAL_AUDIO_FORMATS, CHANNEL_BASED_AUDIO_ALTNAMES, CHANNEL_BASED_AUDIO_FORMATS, DEFAULT_ISM_METADATA, IVAS_FRAME_LEN_MS, METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS, NUMBER_COLUMNS_ISM_METADATA, OBJECT_BASED_AUDIO_FORMATS, SCENE_BASED_AUDIO_FORMATS, ) Loading Loading @@ -295,6 +297,7 @@ class ObjectBasedAudio(Audio): return obj def init_metadata(self): # check if number of metadata files matches format if self.audio.shape[1] != len(self.metadata_files): raise ValueError( f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]" Loading @@ -305,15 +308,18 @@ class ObjectBasedAudio(Audio): pos = np.genfromtxt(f, delimiter=",") # check if metadata has right number of columns if pos.shape[1] < 5: raise ValueError("Metadata incomplete. Columns are missing.") elif pos.shape[1] > 5: if pos.shape[1] <= 8: # TODO: FIXME pos = pos[:, :5] else: num_columns = pos.shape[1] if num_columns < 2: raise ValueError( "Too many columns in metadata (possibly old version with frame index used)" "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." ) elif num_columns > NUMBER_COLUMNS_ISM_METADATA: raise ValueError("Too many columns in metadata") # pad metadata to max number of columns if num_columns < NUMBER_COLUMNS_ISM_METADATA: pos = np.hstack( [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] ) # check if metadata is longer than file -> cut off Loading ivas_processing_scripts/audiotools/constants.py +13 −0 Original line number Diff line number Diff line Loading @@ -703,3 +703,16 @@ DELAY_COMPENSATION_FOR_FILTERING = { "HP50_32KHZ": 559, "HP50_48KHZ": 839, } DEFAULT_ISM_METADATA = [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0] FORMAT_ISM_METADATA_CSV = [ "%+07.2f", "%+06.2f", "%05.2f", "%06.2f", "%04.2f", "%+07.2f", "%+06.2f", "%1.0f", ] NUMBER_COLUMNS_ISM_METADATA = len(DEFAULT_ISM_METADATA) ivas_processing_scripts/audiotools/metadata.py +50 −13 Original line number Diff line number Diff line Loading @@ -40,7 +40,12 @@ from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.audio import fromtype from ivas_processing_scripts.audiotools.audioarray import trim from ivas_processing_scripts.audiotools.audiofile import read from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS from ivas_processing_scripts.audiotools.constants import ( DEFAULT_ISM_METADATA, FORMAT_ISM_METADATA_CSV, IVAS_FRAME_LEN_MS, NUMBER_COLUMNS_ISM_METADATA, ) class Metadata: Loading Loading @@ -201,16 +206,15 @@ def write_ISM_metadata_in_file( for i, csv_file in enumerate(file_names): number_frames = metadata[i].shape[0] number_columns = metadata[i].shape[1] with open(csv_file, "w", newline="") as file: writer = csv.writer(file) for k in range(number_frames): row_list = [ "%+07.2f" % np.round(metadata[i][k, 0], 2), "%+06.2f" % np.round(metadata[i][k, 1], 2), "01.00", "000.00", "1.00", ] row_list = [] for p in range(number_columns): row_list.append( FORMAT_ISM_METADATA_CSV[p] % np.round(metadata[i][k, p], 2) ) writer.writerow(row_list) return file_names Loading Loading @@ -374,7 +378,7 @@ def concat_meta_from_file( # add preamble if preamble: concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble) concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble, 0) write_ISM_metadata_in_file(concat_meta_all_obj, out_file) Loading Loading @@ -544,8 +548,10 @@ def metadata_search( def add_remove_preamble( metadata, preamble, postamble, add: Optional[bool] = True, ): # preamble preamble_frames = preamble / IVAS_FRAME_LEN_MS if not preamble_frames.is_integer(): raise ValueError( Loading @@ -555,18 +561,49 @@ def add_remove_preamble( for obj_idx in range(len(metadata)): if metadata is not None and metadata[obj_idx] is not None: if add: num_columns = metadata[obj_idx].shape[1] metadata[obj_idx] = np.vstack( [ np.repeat( np.array(DEFAULT_ISM_METADATA)[None, :num_columns], preamble_frames, 0, ), metadata[obj_idx], ] ) else: metadata[obj_idx] = trim( metadata[obj_idx], limits=(-int(preamble_frames), 0), limits=(int(preamble_frames), 0), samples=True, ) # add radius 1 metadata[obj_idx][: int(preamble_frames), 2] = 1 # postamble postamble_frames = postamble / IVAS_FRAME_LEN_MS if not postamble_frames.is_integer(): raise ValueError( f"Application of postamble for ISM metadata is only possible if postamble length is multiple of frame length. " f"Frame length: {IVAS_FRAME_LEN_MS}ms" ) for obj_idx in range(len(metadata)): if metadata is not None and metadata[obj_idx] is not None: if add: num_columns = metadata[obj_idx].shape[1] metadata[obj_idx] = np.vstack( [ metadata[obj_idx], np.repeat( np.array(DEFAULT_ISM_METADATA)[None, :num_columns], postamble_frames, 0, ), ] ) else: metadata[obj_idx] = trim( metadata[obj_idx], limits=(int(preamble_frames), 0), limits=(0, int(postamble_frames)), samples=True, ) Loading ivas_processing_scripts/processing/preprocessing_2.py +13 −6 Original line number Diff line number Diff line Loading @@ -64,8 +64,7 @@ class Preprocessing2(Processing): self.in_fmt, in_file, fs=self.in_fs, in_meta=in_meta ) # add preamble # also apply preamble to ISM metadata # modify ISM metadata if self.in_fmt.startswith("ISM"): if not self.preamble: preamble = 0 Loading @@ -75,16 +74,24 @@ class Preprocessing2(Processing): # read out old metadata = audio_object.object_pos # modify metadata metadata = add_remove_preamble(metadata, preamble) # add preamble metadata = add_remove_preamble(metadata, preamble, 0) # repeat signal if self.repeat_signal: metadata = [np.concatenate((m, m), axis=0) for m in metadata] # add postable if self.postamble: metadata = add_remove_preamble(metadata, 0, self.postamble) meta_files = write_ISM_metadata_in_file(metadata, [out_file], True) # modify audio object audio_object.metadata_files = meta_files audio_object.obect_pos = metadata audio_object.object_pos = metadata # modify audio signal # add preamble if self.preamble > 0: logger.debug(f"Add preamble of length {self.preamble}ms") Loading @@ -111,7 +118,7 @@ class Preprocessing2(Processing): (audio_object.audio, audio_object.audio), axis=0 ) # add postamble - do ater signal repetition as this is just for ensuring equal lengths between in- and output signals # add postamble - do alter signal repetition as this is just for ensuring equal lengths between in- and output signals if self.postamble > 0: logger.debug(f"Add postamble of length {self.postamble}ms") audio_object.audio = trim( Loading Loading
.gitlab-ci.yml +3 −2 Original line number Diff line number Diff line Loading @@ -37,9 +37,10 @@ stages: # NOTE: CODEC_DIR has to be in PATH - cd $CODEC_DIR # make sure that we are at latest main # TODO: temporarily use the RC1a tag # TODO: temporarily use the RC1b tag - git restore . - git checkout 20230511-RC1a-listening-tests - git fetch - git checkout 20230516-RC1b-listening-tests - echo "--------------------------------------------" - echo "Building codec on commit $(git rev-parse HEAD --short)" - echo "--------------------------------------------" Loading
ivas_processing_scripts/audiotools/audio.py +16 −10 Original line number Diff line number Diff line Loading @@ -42,8 +42,10 @@ from ivas_processing_scripts.audiotools.constants import ( BINAURAL_AUDIO_FORMATS, CHANNEL_BASED_AUDIO_ALTNAMES, CHANNEL_BASED_AUDIO_FORMATS, DEFAULT_ISM_METADATA, IVAS_FRAME_LEN_MS, METADATA_ASSISTED_SPATIAL_AUDIO_FORMATS, NUMBER_COLUMNS_ISM_METADATA, OBJECT_BASED_AUDIO_FORMATS, SCENE_BASED_AUDIO_FORMATS, ) Loading Loading @@ -295,6 +297,7 @@ class ObjectBasedAudio(Audio): return obj def init_metadata(self): # check if number of metadata files matches format if self.audio.shape[1] != len(self.metadata_files): raise ValueError( f"Mismatch between number of channels in file [{self.audio.shape[1]}], and metadata [{len(self.metadata_files)}]" Loading @@ -305,15 +308,18 @@ class ObjectBasedAudio(Audio): pos = np.genfromtxt(f, delimiter=",") # check if metadata has right number of columns if pos.shape[1] < 5: raise ValueError("Metadata incomplete. Columns are missing.") elif pos.shape[1] > 5: if pos.shape[1] <= 8: # TODO: FIXME pos = pos[:, :5] else: num_columns = pos.shape[1] if num_columns < 2: raise ValueError( "Too many columns in metadata (possibly old version with frame index used)" "Metadata incomplete. Columns are missing. Azimuth and elevation are mandatory." ) elif num_columns > NUMBER_COLUMNS_ISM_METADATA: raise ValueError("Too many columns in metadata") # pad metadata to max number of columns if num_columns < NUMBER_COLUMNS_ISM_METADATA: pos = np.hstack( [pos, np.array(pos.shape[0] * [DEFAULT_ISM_METADATA[num_columns:]])] ) # check if metadata is longer than file -> cut off Loading
ivas_processing_scripts/audiotools/constants.py +13 −0 Original line number Diff line number Diff line Loading @@ -703,3 +703,16 @@ DELAY_COMPENSATION_FOR_FILTERING = { "HP50_32KHZ": 559, "HP50_48KHZ": 839, } DEFAULT_ISM_METADATA = [0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0] FORMAT_ISM_METADATA_CSV = [ "%+07.2f", "%+06.2f", "%05.2f", "%06.2f", "%04.2f", "%+07.2f", "%+06.2f", "%1.0f", ] NUMBER_COLUMNS_ISM_METADATA = len(DEFAULT_ISM_METADATA)
ivas_processing_scripts/audiotools/metadata.py +50 −13 Original line number Diff line number Diff line Loading @@ -40,7 +40,12 @@ from ivas_processing_scripts.audiotools import audio from ivas_processing_scripts.audiotools.audio import fromtype from ivas_processing_scripts.audiotools.audioarray import trim from ivas_processing_scripts.audiotools.audiofile import read from ivas_processing_scripts.audiotools.constants import IVAS_FRAME_LEN_MS from ivas_processing_scripts.audiotools.constants import ( DEFAULT_ISM_METADATA, FORMAT_ISM_METADATA_CSV, IVAS_FRAME_LEN_MS, NUMBER_COLUMNS_ISM_METADATA, ) class Metadata: Loading Loading @@ -201,16 +206,15 @@ def write_ISM_metadata_in_file( for i, csv_file in enumerate(file_names): number_frames = metadata[i].shape[0] number_columns = metadata[i].shape[1] with open(csv_file, "w", newline="") as file: writer = csv.writer(file) for k in range(number_frames): row_list = [ "%+07.2f" % np.round(metadata[i][k, 0], 2), "%+06.2f" % np.round(metadata[i][k, 1], 2), "01.00", "000.00", "1.00", ] row_list = [] for p in range(number_columns): row_list.append( FORMAT_ISM_METADATA_CSV[p] % np.round(metadata[i][k, p], 2) ) writer.writerow(row_list) return file_names Loading Loading @@ -374,7 +378,7 @@ def concat_meta_from_file( # add preamble if preamble: concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble) concat_meta_all_obj = add_remove_preamble(concat_meta_all_obj, preamble, 0) write_ISM_metadata_in_file(concat_meta_all_obj, out_file) Loading Loading @@ -544,8 +548,10 @@ def metadata_search( def add_remove_preamble( metadata, preamble, postamble, add: Optional[bool] = True, ): # preamble preamble_frames = preamble / IVAS_FRAME_LEN_MS if not preamble_frames.is_integer(): raise ValueError( Loading @@ -555,18 +561,49 @@ def add_remove_preamble( for obj_idx in range(len(metadata)): if metadata is not None and metadata[obj_idx] is not None: if add: num_columns = metadata[obj_idx].shape[1] metadata[obj_idx] = np.vstack( [ np.repeat( np.array(DEFAULT_ISM_METADATA)[None, :num_columns], preamble_frames, 0, ), metadata[obj_idx], ] ) else: metadata[obj_idx] = trim( metadata[obj_idx], limits=(-int(preamble_frames), 0), limits=(int(preamble_frames), 0), samples=True, ) # add radius 1 metadata[obj_idx][: int(preamble_frames), 2] = 1 # postamble postamble_frames = postamble / IVAS_FRAME_LEN_MS if not postamble_frames.is_integer(): raise ValueError( f"Application of postamble for ISM metadata is only possible if postamble length is multiple of frame length. " f"Frame length: {IVAS_FRAME_LEN_MS}ms" ) for obj_idx in range(len(metadata)): if metadata is not None and metadata[obj_idx] is not None: if add: num_columns = metadata[obj_idx].shape[1] metadata[obj_idx] = np.vstack( [ metadata[obj_idx], np.repeat( np.array(DEFAULT_ISM_METADATA)[None, :num_columns], postamble_frames, 0, ), ] ) else: metadata[obj_idx] = trim( metadata[obj_idx], limits=(int(preamble_frames), 0), limits=(0, int(postamble_frames)), samples=True, ) Loading
ivas_processing_scripts/processing/preprocessing_2.py +13 −6 Original line number Diff line number Diff line Loading @@ -64,8 +64,7 @@ class Preprocessing2(Processing): self.in_fmt, in_file, fs=self.in_fs, in_meta=in_meta ) # add preamble # also apply preamble to ISM metadata # modify ISM metadata if self.in_fmt.startswith("ISM"): if not self.preamble: preamble = 0 Loading @@ -75,16 +74,24 @@ class Preprocessing2(Processing): # read out old metadata = audio_object.object_pos # modify metadata metadata = add_remove_preamble(metadata, preamble) # add preamble metadata = add_remove_preamble(metadata, preamble, 0) # repeat signal if self.repeat_signal: metadata = [np.concatenate((m, m), axis=0) for m in metadata] # add postable if self.postamble: metadata = add_remove_preamble(metadata, 0, self.postamble) meta_files = write_ISM_metadata_in_file(metadata, [out_file], True) # modify audio object audio_object.metadata_files = meta_files audio_object.obect_pos = metadata audio_object.object_pos = metadata # modify audio signal # add preamble if self.preamble > 0: logger.debug(f"Add preamble of length {self.preamble}ms") Loading @@ -111,7 +118,7 @@ class Preprocessing2(Processing): (audio_object.audio, audio_object.audio), axis=0 ) # add postamble - do ater signal repetition as this is just for ensuring equal lengths between in- and output signals # add postamble - do alter signal repetition as this is just for ensuring equal lengths between in- and output signals if self.postamble > 0: logger.debug(f"Add postamble of length {self.postamble}ms") audio_object.audio = trim( Loading