Loading ivas_processing_scripts/audiotools/audio.py +1 −0 Original line number Diff line number Diff line Loading @@ -214,6 +214,7 @@ class MetadataAssistedSpatialAudio(Audio): raise ValueError( f"Unsupported metadata assisted spatial audio format {name}" ) self.metadata_files = [] @classmethod def _from_file( Loading ivas_processing_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_FULL.mat LFS +2 −2 Original line number Diff line number Diff line version https://git-lfs.github.com/spec/v1 oid sha256:1a88a3463513647455bcc38bd7180860edfb97195602a8ff832a6be1421474f8 size 14335861 oid sha256:bf86a03f0b13932c5c138af22584f864b75c5733df1b01ac3fdf7750a1bdbe5f size 14335913 ivas_processing_scripts/audiotools/binaural_datasets/README.txt +6 −3 Original line number Diff line number Diff line Files in this directory should contain impulse responses for use in rendering in Matlab .mat format Samplingrate of 48kHz is assumed Files should adhere to the following naming scheme: {HRIR|BRIR}_{DATASETNAME}_{FULL|LS|SBA3}.mat {HRIR|BRIR}_{DATASETNAME}_{FULL|LS|SBA(1-3)}.mat - HRIR or BRIR specifies the type of impulse response which will be used Loading @@ -15,7 +16,9 @@ Files should adhere to the following naming scheme: FULL: all available measurements on the sphere LS: superset of supported loudspeaker layouts (see audiotools.constants.CHANNEL_BASED_AUDIO_FORMATS["LS""]) SBA3: impulse responses transformed to 3rd order ambisonics by external conversion SBA(1-3): impulse responses transformed to ambisonics by external conversion if available SBA1 is used for FOA, SBA2 for HOA2 and SBA3 for HOA3 if not available SBA3 is used and truncated for all Ambisonic formats Each Matlab file should contain the following variables: - IR Loading @@ -24,7 +27,7 @@ Each Matlab file should contain the following variables: array of {azimuth, elevation, radius} of dimensions [n_channels x 3] required for FULL, optional otherwise - latency_s latency of the dataset in seconds latency of the dataset in samples optional, will be estimated if not provided LICENSES: Loading ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py +5 −8 Original line number Diff line number Diff line Loading @@ -161,18 +161,17 @@ def load_ir( else: dataset_suffix = "SBA3" IR, SourcePosition, latency_s = load_hrtf( Path(__file__).parent.joinpath( path_dataset = Path(__file__).parent.joinpath( f"{dataset_prefix}_{dataset}_{dataset_suffix}.mat" ) ) IR, SourcePosition, latency_s = load_hrtf(path_dataset) if latency_s is not None: latency_smp = int(latency_s * 48000) latency_smp = latency_s[0, 0] else: latency_smp = int(np.min(np.argmax(np.sum(np.abs(IR), axis=1), axis=0))) warnings.warn( f"No latency of HRTF dataset specified in .mat file -> computed latency: {latency_smp}" f"No latency of HRTF dataset specified in {path_dataset} file -> computed latency: {latency_smp} sample(s)" ) if in_fmt.startswith("STEREO"): Loading @@ -182,7 +181,6 @@ def load_ir( and not in_fmt.startswith("CUSTOM_LS") and not in_fmt.startswith("MOZART") ): # TODO update, use _get_audio_dict() instead of using fromtype object? # extract positions from the loudspeaker file in_fmt = fromtype(in_fmt) tmp_fmt = fromtype("LS") Loading @@ -200,7 +198,6 @@ def load_ir( if j != in_fmt.lfe_index[0]: IR[:, :, ir_index] = IR_tmp[:, :, i] ir_index += 1 # TODO: add custom ls support return IR, SourcePosition, latency_smp Loading ivas_processing_scripts/audiotools/convert/__init__.py +8 −2 Original line number Diff line number Diff line Loading @@ -62,7 +62,9 @@ def convert_file( in_meta: Optional[list] = None, logger: Optional[logging.Logger] = None, **kwargs, ): ) -> None: """Conversion function for one audio file""" if not in_fmt: raise ValueError("Input audio format must be specified!") Loading Loading @@ -149,7 +151,9 @@ def convert( esdru_alpha: Optional[float] = None, logger: Optional[logging.Logger] = None, **kwargs, ): ) -> None: """Perform pre-processing, conversion and post-processing""" """pre-processing""" process_audio( x=input, Loading Loading @@ -203,6 +207,8 @@ def process_audio( esdru_alpha: Optional[float] = None, logger: Optional[logging.Logger] = None, ) -> None: """Perform (pre-/pos-) processing of audio""" if fs is None: fs = x.fs Loading Loading
ivas_processing_scripts/audiotools/audio.py +1 −0 Original line number Diff line number Diff line Loading @@ -214,6 +214,7 @@ class MetadataAssistedSpatialAudio(Audio): raise ValueError( f"Unsupported metadata assisted spatial audio format {name}" ) self.metadata_files = [] @classmethod def _from_file( Loading
ivas_processing_scripts/audiotools/binaural_datasets/HRIR_ORANGE53_FULL.mat LFS +2 −2 Original line number Diff line number Diff line version https://git-lfs.github.com/spec/v1 oid sha256:1a88a3463513647455bcc38bd7180860edfb97195602a8ff832a6be1421474f8 size 14335861 oid sha256:bf86a03f0b13932c5c138af22584f864b75c5733df1b01ac3fdf7750a1bdbe5f size 14335913
ivas_processing_scripts/audiotools/binaural_datasets/README.txt +6 −3 Original line number Diff line number Diff line Files in this directory should contain impulse responses for use in rendering in Matlab .mat format Samplingrate of 48kHz is assumed Files should adhere to the following naming scheme: {HRIR|BRIR}_{DATASETNAME}_{FULL|LS|SBA3}.mat {HRIR|BRIR}_{DATASETNAME}_{FULL|LS|SBA(1-3)}.mat - HRIR or BRIR specifies the type of impulse response which will be used Loading @@ -15,7 +16,9 @@ Files should adhere to the following naming scheme: FULL: all available measurements on the sphere LS: superset of supported loudspeaker layouts (see audiotools.constants.CHANNEL_BASED_AUDIO_FORMATS["LS""]) SBA3: impulse responses transformed to 3rd order ambisonics by external conversion SBA(1-3): impulse responses transformed to ambisonics by external conversion if available SBA1 is used for FOA, SBA2 for HOA2 and SBA3 for HOA3 if not available SBA3 is used and truncated for all Ambisonic formats Each Matlab file should contain the following variables: - IR Loading @@ -24,7 +27,7 @@ Each Matlab file should contain the following variables: array of {azimuth, elevation, radius} of dimensions [n_channels x 3] required for FULL, optional otherwise - latency_s latency of the dataset in seconds latency of the dataset in samples optional, will be estimated if not provided LICENSES: Loading
ivas_processing_scripts/audiotools/binaural_datasets/binaural_dataset.py +5 −8 Original line number Diff line number Diff line Loading @@ -161,18 +161,17 @@ def load_ir( else: dataset_suffix = "SBA3" IR, SourcePosition, latency_s = load_hrtf( Path(__file__).parent.joinpath( path_dataset = Path(__file__).parent.joinpath( f"{dataset_prefix}_{dataset}_{dataset_suffix}.mat" ) ) IR, SourcePosition, latency_s = load_hrtf(path_dataset) if latency_s is not None: latency_smp = int(latency_s * 48000) latency_smp = latency_s[0, 0] else: latency_smp = int(np.min(np.argmax(np.sum(np.abs(IR), axis=1), axis=0))) warnings.warn( f"No latency of HRTF dataset specified in .mat file -> computed latency: {latency_smp}" f"No latency of HRTF dataset specified in {path_dataset} file -> computed latency: {latency_smp} sample(s)" ) if in_fmt.startswith("STEREO"): Loading @@ -182,7 +181,6 @@ def load_ir( and not in_fmt.startswith("CUSTOM_LS") and not in_fmt.startswith("MOZART") ): # TODO update, use _get_audio_dict() instead of using fromtype object? # extract positions from the loudspeaker file in_fmt = fromtype(in_fmt) tmp_fmt = fromtype("LS") Loading @@ -200,7 +198,6 @@ def load_ir( if j != in_fmt.lfe_index[0]: IR[:, :, ir_index] = IR_tmp[:, :, i] ir_index += 1 # TODO: add custom ls support return IR, SourcePosition, latency_smp Loading
ivas_processing_scripts/audiotools/convert/__init__.py +8 −2 Original line number Diff line number Diff line Loading @@ -62,7 +62,9 @@ def convert_file( in_meta: Optional[list] = None, logger: Optional[logging.Logger] = None, **kwargs, ): ) -> None: """Conversion function for one audio file""" if not in_fmt: raise ValueError("Input audio format must be specified!") Loading Loading @@ -149,7 +151,9 @@ def convert( esdru_alpha: Optional[float] = None, logger: Optional[logging.Logger] = None, **kwargs, ): ) -> None: """Perform pre-processing, conversion and post-processing""" """pre-processing""" process_audio( x=input, Loading Loading @@ -203,6 +207,8 @@ def process_audio( esdru_alpha: Optional[float] = None, logger: Optional[logging.Logger] = None, ) -> None: """Perform (pre-/pos-) processing of audio""" if fs is None: fs = x.fs Loading