From a92612399662a5141482169da7a10a256e885ae6 Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Mon, 1 Sep 2025 17:51:40 +0200 Subject: [PATCH 1/2] add --inplace arg to create_short_testvectors.py needed for the short sanitizer test in BASO MR pipelines --- tests/create_short_testvectors.py | 41 +++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/tests/create_short_testvectors.py b/tests/create_short_testvectors.py index a478e315e2..6131d2b8b8 100755 --- a/tests/create_short_testvectors.py +++ b/tests/create_short_testvectors.py @@ -95,18 +95,22 @@ def collect_files(use_ltv: bool = False, ltv_dir: Path = None): return files -def create_short_testvectors(cut_len=5.0, use_ltv: bool = False, ltv_dir: Path = None): +def create_short_testvectors( + cut_len=5.0, use_ltv: bool = False, ltv_dir: Path = None, inplace: bool = False +): files = collect_files(use_ltv, ltv_dir) for f in files: - suffix = "" if use_ltv else "_cut" + suffix = "" if use_ltv or inplace else "_cut" out_file = TEST_VECTOR_DIR.joinpath(f.stem + suffix + f.suffix) num_channels = audiofile.get_wav_file_info(f)["channels"] cut_samples(f, out_file, num_channels, CUT_FROM, f"{cut_len}", GAIN) if __name__ == "__main__": - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + description="Cut all stv/ltv signals to a given shorter length" + ) def positive_float(x: str) -> float: x = float(x) @@ -114,8 +118,31 @@ if __name__ == "__main__": raise ValueError("Value for cut_len needs to be positive!") return x - parser.add_argument("--cut_len", type=positive_float, default=5.0) - parser.add_argument("--use_ltv", action="store_true", default=False) - parser.add_argument("--ltv_dir", type=Path) + parser.add_argument( + "--cut_len", type=positive_float, default=5.0, help="Length to cut files to" + ) + parser.add_argument( + "--use_ltv", + action="store_true", + default=False, + help="Operate on LTV files instead of the shorter ones. This implicitly also sets INPLACE (to be backwards-compatible).", + ) + parser.add_argument( + "--ltv_dir", + type=Path, + default=None, + help="Path to the LTV file directory. Needed when using USE_LTV.", + ) + parser.add_argument( + "--inplace", + action="store_true", + default=False, + help="Set this to operate inplace (i.e. overwrite the signals, instead of creating new ones with _cut suffix)", + ) args = parser.parse_args() - sys.exit(create_short_testvectors(args.cut_len, args.use_ltv, args.ltv_dir)) + + assert not (args.use_ltv and args.ltv_dir is None) + + sys.exit( + create_short_testvectors(args.cut_len, args.use_ltv, args.ltv_dir, args.inplace) + ) -- GitLab From cb81bb9aa46ba0372362f79d74c7da4bb832566c Mon Sep 17 00:00:00 2001 From: Jan Kiene Date: Wed, 17 Sep 2025 17:30:28 +0200 Subject: [PATCH 2/2] add --dry and --all arguments --- tests/create_short_testvectors.py | 47 ++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/tests/create_short_testvectors.py b/tests/create_short_testvectors.py index 6131d2b8b8..184386bc27 100755 --- a/tests/create_short_testvectors.py +++ b/tests/create_short_testvectors.py @@ -36,6 +36,7 @@ Create short (5sec) testvectors. import argparse import sys +import logging from pathlib import Path from cut_pcm import cut_samples @@ -81,30 +82,40 @@ FILE_IDS_LTV = [ ] -def collect_files(use_ltv: bool = False, ltv_dir: Path = None): - IDS = FILE_IDS_LTV if use_ltv else FILE_IDS - SEARCH_DIR = ltv_dir if use_ltv and ltv_dir else TEST_VECTOR_DIR +def collect_files(use_ltv: bool = False, ltv_dir: Path = None, all: bool = False): + ids = FILE_IDS_LTV if use_ltv else FILE_IDS + search_dir = ltv_dir if use_ltv and ltv_dir else TEST_VECTOR_DIR files = [ f.absolute() - for f in SEARCH_DIR.iterdir() + for f in search_dir.iterdir() if f.suffix == ".wav" - and any([id in f.name for id in IDS]) - and "_cut" not in f.name + and ((any([id in f.name for id in ids]) and "_cut" not in f.name) or all) ] return files def create_short_testvectors( - cut_len=5.0, use_ltv: bool = False, ltv_dir: Path = None, inplace: bool = False + cut_len=5.0, + use_ltv: bool = False, + ltv_dir: Path = None, + inplace: bool = False, + all: bool = False, + dry: bool = False, ): - files = collect_files(use_ltv, ltv_dir) + files = collect_files(use_ltv, ltv_dir, all) + + logging.info(f"Cutting {len(files)} files to {cut_len} seconds") for f in files: suffix = "" if use_ltv or inplace else "_cut" out_file = TEST_VECTOR_DIR.joinpath(f.stem + suffix + f.suffix) num_channels = audiofile.get_wav_file_info(f)["channels"] - cut_samples(f, out_file, num_channels, CUT_FROM, f"{cut_len}", GAIN) + + logging.info(f"{str(f)} -> {out_file}") + + if not dry: + cut_samples(f, out_file, num_channels, CUT_FROM, f"{cut_len}", GAIN) if __name__ == "__main__": @@ -139,10 +150,26 @@ if __name__ == "__main__": default=False, help="Set this to operate inplace (i.e. overwrite the signals, instead of creating new ones with _cut suffix)", ) + parser.add_argument( + "--all", + action="store_true", + default=False, + help="If given, operate on ALL .wav files in scripts/testv, not just the predefined set.", + ) + parser.add_argument( + "--dry", + action="store_true", + default=False, + help="Only print filenames that would be operated on, but don't actually do anything", + ) args = parser.parse_args() assert not (args.use_ltv and args.ltv_dir is None) + logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") + sys.exit( - create_short_testvectors(args.cut_len, args.use_ltv, args.ltv_dir, args.inplace) + create_short_testvectors( + args.cut_len, args.use_ltv, args.ltv_dir, args.inplace, args.all, args.dry + ) ) -- GitLab