Loading ivas_processing_scripts/generation/process_ism_items.py +26 −21 Original line number Diff line number Diff line Loading @@ -115,11 +115,16 @@ def generate_ism_items( # get the number of frames (multiple of 20ms) N_frames = int(len(x.audio) / x.fs * 50) frame_len = int(x.fs / 50) # trim the samples from the end to ensure that the signal length is a multiple of 20ms x.audio = x.audio[:N_frames * frame_len] # adjust the level of the source file _, scale_factor = get_loudness(x, cfg.loudness, "MONO") x.audio *= scale_factor # read azimuth information and create array if isinstance(source_azi, str): if ":" in source_azi: Loading Loading @@ -192,36 +197,34 @@ def generate_ism_items( # get the length of the first source file N_delay = len(y.audio[:, 0]) # add the shift N_delay += int(-source_overlap * x.fs) # ensure delay is a multiple of 20ms # N_delay = int(floor(source_shift * 50) / 50 * x.fs) # add the shift value (ensure that the shift is a multiple of 20ms) N_delay += int(floor(-source_overlap * 50) / 50 * x.fs) # insert all-zero preamble # insert all-zero signal pre = np.zeros((N_delay, x.audio.shape[1])) x.audio = np.concatenate([pre, x.audio]) # insert neutral position as a pre-amble N_delay = int(N_delay / frame_len) pre = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1) ) # !!!! TBD - check if we should insert netrual position or the first position of the metadata x_meta = np.concatenate([pre, x_meta]) # pad with zeros to ensure that the signal length is a multiple of 20ms N_frame = x.fs / 50 if len(x.audio) % N_frame != 0: N_pad = int(N_frame - len(x.audio) % N_frame) # insert all-zero preamble pre = np.zeros((N_pad, x.audio.shape[1])) x.audio = np.concatenate([pre, x.audio]) # insert neutral position as a pre-amble pre = np.tile( if len(x.audio) % frame_len != 0: # pad the source signal N_pad = int(frame_len - len(x.audio) % frame_len) post = np.zeros((N_pad, x.audio.shape[1])) x.audio = np.concatenate([x.audio, post]) # pad the metadata N_pad = int(len(x.audio) / frame_len) - len(x_meta) if N_pad > 0: post = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1) ) # !!!! TBD - check if we should insert netrual position or the first position of the metadata x_meta = np.concatenate([pre, x_meta]) x_meta = np.concatenate([x_meta, post]) # add source signal to the array of all source signals y.fs = x.fs Loading Loading @@ -280,7 +283,7 @@ def generate_ism_items( # append pre-amble and post-amble to all sources if cfg.preamble != 0.0: # ensure that pre-mable is a multiple of 20ms # ensure that pre-amble is a multiple of 20ms N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) # insert all-zero preamble to all sources Loading @@ -288,6 +291,7 @@ def generate_ism_items( y.audio = np.concatenate([pre, y.audio]) # insert neutral position as a pre-amble to all sources N_pre = int(N_pre / frame_len) pre = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1) ) # !!!! TBD - check if we should insert netrual position or the first position of the metadata Loading @@ -302,6 +306,7 @@ def generate_ism_items( y.audio = np.concatenate([y.audio, post]) # append neutral position as a post-amble to all sources N_post = int(N_post / frame_len) post = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1) ) # !!!! TBD - check if we should insert netrual position or the last position of the metadata Loading @@ -319,7 +324,7 @@ def generate_ism_items( y.audio += noise # write individual ISM audio streams to the output file in an interleaved format output_filename = scene["name"] output_filename = scene_name audiofile.write( os.path.join(cfg.output_path, output_filename), y.audio, y.fs ) # !!!! TBD: replace all os.path.xxx operations with the Path object Loading ivas_processing_scripts/generation/process_stereo_items.py +3 −6 Original line number Diff line number Diff line Loading @@ -88,7 +88,7 @@ def generate_stereo_items( # repeat for all source files for scene_name, scene in cfg.scenes.items(): logger.info(f"Processing scene: {scene_name} out of {N_scenes} scenes") logger.info(f"Processing scene: {scene_name} out of {N_scenes} scenes, name: {scene_name}") # extract the number of audio sources N_sources = len(np.atleast_1d(scene["source"])) Loading Loading @@ -135,9 +135,6 @@ def generate_stereo_items( # add the shift N_delay += int(-source_overlap * x.fs) # ensure delay is a multiple of 20ms # N_delay = int(floor(source_shift * 50) / 50 * x.fs) # insert all-zero preamble pre = np.zeros((N_delay, x.audio.shape[1])) x.audio = np.concatenate([pre, x.audio]) Loading Loading @@ -187,7 +184,7 @@ def generate_stereo_items( # append pre-amble and post-amble to all sources if cfg.preamble != 0.0: # ensure that pre-mable is a multiple of 20ms # ensure that pre-amble is a multiple of 20ms N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) # insert all-zero preamble to all sources Loading @@ -214,7 +211,7 @@ def generate_stereo_items( y.audio += noise # write the reverberated audio into output file output_filename = scene["name"] output_filename = scene_name audiofile.write( os.path.join(cfg.output_path, output_filename), y.audio, y.fs ) # !!!! TBD: replace all os.path.xxx operations with the Path object Loading Loading
ivas_processing_scripts/generation/process_ism_items.py +26 −21 Original line number Diff line number Diff line Loading @@ -115,11 +115,16 @@ def generate_ism_items( # get the number of frames (multiple of 20ms) N_frames = int(len(x.audio) / x.fs * 50) frame_len = int(x.fs / 50) # trim the samples from the end to ensure that the signal length is a multiple of 20ms x.audio = x.audio[:N_frames * frame_len] # adjust the level of the source file _, scale_factor = get_loudness(x, cfg.loudness, "MONO") x.audio *= scale_factor # read azimuth information and create array if isinstance(source_azi, str): if ":" in source_azi: Loading Loading @@ -192,36 +197,34 @@ def generate_ism_items( # get the length of the first source file N_delay = len(y.audio[:, 0]) # add the shift N_delay += int(-source_overlap * x.fs) # ensure delay is a multiple of 20ms # N_delay = int(floor(source_shift * 50) / 50 * x.fs) # add the shift value (ensure that the shift is a multiple of 20ms) N_delay += int(floor(-source_overlap * 50) / 50 * x.fs) # insert all-zero preamble # insert all-zero signal pre = np.zeros((N_delay, x.audio.shape[1])) x.audio = np.concatenate([pre, x.audio]) # insert neutral position as a pre-amble N_delay = int(N_delay / frame_len) pre = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (N_delay, 1) ) # !!!! TBD - check if we should insert netrual position or the first position of the metadata x_meta = np.concatenate([pre, x_meta]) # pad with zeros to ensure that the signal length is a multiple of 20ms N_frame = x.fs / 50 if len(x.audio) % N_frame != 0: N_pad = int(N_frame - len(x.audio) % N_frame) # insert all-zero preamble pre = np.zeros((N_pad, x.audio.shape[1])) x.audio = np.concatenate([pre, x.audio]) # insert neutral position as a pre-amble pre = np.tile( if len(x.audio) % frame_len != 0: # pad the source signal N_pad = int(frame_len - len(x.audio) % frame_len) post = np.zeros((N_pad, x.audio.shape[1])) x.audio = np.concatenate([x.audio, post]) # pad the metadata N_pad = int(len(x.audio) / frame_len) - len(x_meta) if N_pad > 0: post = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (N_pad, 1) ) # !!!! TBD - check if we should insert netrual position or the first position of the metadata x_meta = np.concatenate([pre, x_meta]) x_meta = np.concatenate([x_meta, post]) # add source signal to the array of all source signals y.fs = x.fs Loading Loading @@ -280,7 +283,7 @@ def generate_ism_items( # append pre-amble and post-amble to all sources if cfg.preamble != 0.0: # ensure that pre-mable is a multiple of 20ms # ensure that pre-amble is a multiple of 20ms N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) # insert all-zero preamble to all sources Loading @@ -288,6 +291,7 @@ def generate_ism_items( y.audio = np.concatenate([pre, y.audio]) # insert neutral position as a pre-amble to all sources N_pre = int(N_pre / frame_len) pre = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_pre, 1) ) # !!!! TBD - check if we should insert netrual position or the first position of the metadata Loading @@ -302,6 +306,7 @@ def generate_ism_items( y.audio = np.concatenate([y.audio, post]) # append neutral position as a post-amble to all sources N_post = int(N_post / frame_len) post = np.tile( [0.00, 0.00, 1.00, 0.00, 1.00], (y_meta.shape[0], N_post, 1) ) # !!!! TBD - check if we should insert netrual position or the last position of the metadata Loading @@ -319,7 +324,7 @@ def generate_ism_items( y.audio += noise # write individual ISM audio streams to the output file in an interleaved format output_filename = scene["name"] output_filename = scene_name audiofile.write( os.path.join(cfg.output_path, output_filename), y.audio, y.fs ) # !!!! TBD: replace all os.path.xxx operations with the Path object Loading
ivas_processing_scripts/generation/process_stereo_items.py +3 −6 Original line number Diff line number Diff line Loading @@ -88,7 +88,7 @@ def generate_stereo_items( # repeat for all source files for scene_name, scene in cfg.scenes.items(): logger.info(f"Processing scene: {scene_name} out of {N_scenes} scenes") logger.info(f"Processing scene: {scene_name} out of {N_scenes} scenes, name: {scene_name}") # extract the number of audio sources N_sources = len(np.atleast_1d(scene["source"])) Loading Loading @@ -135,9 +135,6 @@ def generate_stereo_items( # add the shift N_delay += int(-source_overlap * x.fs) # ensure delay is a multiple of 20ms # N_delay = int(floor(source_shift * 50) / 50 * x.fs) # insert all-zero preamble pre = np.zeros((N_delay, x.audio.shape[1])) x.audio = np.concatenate([pre, x.audio]) Loading Loading @@ -187,7 +184,7 @@ def generate_stereo_items( # append pre-amble and post-amble to all sources if cfg.preamble != 0.0: # ensure that pre-mable is a multiple of 20ms # ensure that pre-amble is a multiple of 20ms N_pre = int(floor(cfg.preamble * 50) / 50 * y.fs) # insert all-zero preamble to all sources Loading @@ -214,7 +211,7 @@ def generate_stereo_items( y.audio += noise # write the reverberated audio into output file output_filename = scene["name"] output_filename = scene_name audiofile.write( os.path.join(cfg.output_path, output_filename), y.audio, y.fs ) # !!!! TBD: replace all os.path.xxx operations with the Path object Loading