Loading other/rename_and_collect_testvectors.py 0 → 100755 +189 −0 Original line number Diff line number Diff line """ Script for collecting and renaming the files from a processing script run that are intended to use in testing bitexactness against selection test binaries. Usage: python3 rename_and_collect_testvectors.py There is a variable "TEST_MODE" below. Set it to true to do a dry-run to detect problems/missing things before actually copying the files. The dry-run version will not copy any files, but just check that all expected files are present and print out the renaming/copying actions it will perform. Also, it will raise an AssertioonError if there are duplicates in both the collected and renamed files. The script expects: - an existing folder "testv_out" to which it will copy the collected files - folders for each experiment that contain the proc_output_* folders from the stripped-down run of the processing scripts. If one just copies the folders from experiments/selection, that will work. """ import shutil from pathlib import Path HERE = Path(__file__).parent OUTPUT_FOLDER = HERE.joinpath("testv_out") EXPERIMENTS_P800 = [f"P800-{i}" for i in range(1, 10)] EXPERIMENTS_BS1534 = [f"BS1534-{i}{x}" for i in range(1, 8) for x in ["a", "b"]] EXPERIMENTS = EXPERIMENTS_P800 + EXPERIMENTS_BS1534 N_ITEMS_MUSHRA = 16 IN_FOL_FOR_PLC = { "P800-1": "tmp_c25", "P800-3": "tmp_c24", "P800-4": "tmp_c24", "P800-6": "tmp_c24", "P800-7": "tmp_c24", "P800-8": "tmp_c25", } IN_FOL_FOR_FMT_CHANGE = { "P800-8": "tmp_c25", "P800-9": "tmp_c24", "BS1534-4a": "tmp_c06", "BS1534-4b": "tmp_c06", "BS1534-7a": "tmp_c07", "BS1534-7b": "tmp_c07", } # Global switch for dry-run TEST_MODE = False def get_md_suffix_for_exp(exp): md_suffix = "NONE" if exp in ["P800-6", "P800-7", "BS1534-6a", "BS1534-6b"]: md_suffix = ".csv" elif exp in ["P800-8", "P800-9", "BS1534-7a", "BS1534-7b"]: md_suffix = ".met" return md_suffix files_created = list() files_copied = list() EXPERIMENTS = ["BS1534-4a", "BS1534-4b"] for exp in EXPERIMENTS: base_path = HERE.joinpath(exp) output_folders = [ p for p in base_path.iterdir() if p.name.startswith("proc_output") ] md_suffix = get_md_suffix_for_exp(exp) in_fol = IN_FOL_FOR_FMT_CHANGE.get(exp, "preprocessing_2") collection_suffix = ".cod_fmt.wav" if exp in IN_FOL_FOR_FMT_CHANGE else ".wav" for of in output_folders: testset = of.name[-1] if exp.startswith("P800"): # P800 tests all have one concatenated input file per category categories = [f"cat{i}" for i in range(1, 7)] for cat in categories: input_folder = of.joinpath(cat).joinpath(in_fol) input_files = [ f for f in input_folder.iterdir() if f.name.endswith(collection_suffix) ] assert len(input_files) == 1 out_files = [OUTPUT_FOLDER.joinpath(f"{exp}-{cat}-{testset}-input.wav")] # collect metadata files md_files_in = [ Path(f) for f in input_folder.iterdir() if f.suffix == md_suffix and collection_suffix in f.name ] base_name = str(out_files[0]) if md_suffix == ".csv": md_files_out = [ Path(base_name + f"{Path(f.stem).suffix}{f.suffix}") for f in md_files_in ] else: md_files_out = [ Path(base_name + f"{f.suffix}") for f in md_files_in ] input_files.extend(md_files_in) out_files.extend(md_files_out) # collect error pattern file if exp in IN_FOL_FOR_PLC: input_folder_ep = of.joinpath(cat).joinpath(IN_FOL_FOR_PLC[exp]) ep_file_in = input_folder_ep.joinpath("error_pattern.192") ep_file_out = OUTPUT_FOLDER.joinpath( f"{exp}-{cat}-{testset}-ep.192" ) input_files.append(ep_file_in) out_files.append(ep_file_out) for f_in, f_out in zip(input_files, out_files): print(f"{f_in} -> {f_out}") if TEST_MODE: assert f_in.exists() files_created.append(f_out) files_copied.append(f_in) else: shutil.copyfile(f_in, f_out) elif exp.startswith("BS1534"): # no categories here, but 16 seperate files, no concatenation # EXCEPT: for -7a and -7b, there are two categories (FOA and HOA2) categories = [""] if exp == "BS1534-7a" or exp == "BS1534-7b": categories = ["FOA-", "HOA2-"] for cat in categories: cat_folder = cat[:-1] input_folder = of.joinpath(cat_folder).joinpath(in_fol) input_files = [ f for f in input_folder.iterdir() if f.name.endswith(collection_suffix) ] out_files = [ OUTPUT_FOLDER.joinpath( f"{exp}-{cat}{testset}-input-{int(f.name.split('.')[0][-2:])}.wav" ) for f in input_files ] all_md_files = [ f for f in input_folder.iterdir() if f.suffix == md_suffix and collection_suffix in f.name ] for f_in, f_out in zip(list(input_files), list(out_files)): md_files_in = [ f for f in all_md_files if f.name.startswith(f_in.name) ] base_name = str(f_out) if md_suffix == ".csv": md_files_out = [ Path(base_name + f"{Path(f.stem).suffix}{f.suffix}") for f in md_files_in ] else: md_files_out = [ Path(base_name + f"{f.suffix}") for f in md_files_in ] input_files.extend(md_files_in) out_files.extend(md_files_out) for f_in, f_out in zip(input_files, out_files): print(f"{f_in} -> {f_out}") if TEST_MODE: assert f_in.exists() files_created.append(f_out) files_copied.append(f_in) else: shutil.copyfile(f_in, f_out) if TEST_MODE: # import collections # counter = collections.Counter(files_created) # print(counter) assert len(files_created) == len(set(files_created)) # counter = collections.Counter(files_copied) # print(counter) assert len(files_copied) == len(set(files_copied)) Loading
other/rename_and_collect_testvectors.py 0 → 100755 +189 −0 Original line number Diff line number Diff line """ Script for collecting and renaming the files from a processing script run that are intended to use in testing bitexactness against selection test binaries. Usage: python3 rename_and_collect_testvectors.py There is a variable "TEST_MODE" below. Set it to true to do a dry-run to detect problems/missing things before actually copying the files. The dry-run version will not copy any files, but just check that all expected files are present and print out the renaming/copying actions it will perform. Also, it will raise an AssertioonError if there are duplicates in both the collected and renamed files. The script expects: - an existing folder "testv_out" to which it will copy the collected files - folders for each experiment that contain the proc_output_* folders from the stripped-down run of the processing scripts. If one just copies the folders from experiments/selection, that will work. """ import shutil from pathlib import Path HERE = Path(__file__).parent OUTPUT_FOLDER = HERE.joinpath("testv_out") EXPERIMENTS_P800 = [f"P800-{i}" for i in range(1, 10)] EXPERIMENTS_BS1534 = [f"BS1534-{i}{x}" for i in range(1, 8) for x in ["a", "b"]] EXPERIMENTS = EXPERIMENTS_P800 + EXPERIMENTS_BS1534 N_ITEMS_MUSHRA = 16 IN_FOL_FOR_PLC = { "P800-1": "tmp_c25", "P800-3": "tmp_c24", "P800-4": "tmp_c24", "P800-6": "tmp_c24", "P800-7": "tmp_c24", "P800-8": "tmp_c25", } IN_FOL_FOR_FMT_CHANGE = { "P800-8": "tmp_c25", "P800-9": "tmp_c24", "BS1534-4a": "tmp_c06", "BS1534-4b": "tmp_c06", "BS1534-7a": "tmp_c07", "BS1534-7b": "tmp_c07", } # Global switch for dry-run TEST_MODE = False def get_md_suffix_for_exp(exp): md_suffix = "NONE" if exp in ["P800-6", "P800-7", "BS1534-6a", "BS1534-6b"]: md_suffix = ".csv" elif exp in ["P800-8", "P800-9", "BS1534-7a", "BS1534-7b"]: md_suffix = ".met" return md_suffix files_created = list() files_copied = list() EXPERIMENTS = ["BS1534-4a", "BS1534-4b"] for exp in EXPERIMENTS: base_path = HERE.joinpath(exp) output_folders = [ p for p in base_path.iterdir() if p.name.startswith("proc_output") ] md_suffix = get_md_suffix_for_exp(exp) in_fol = IN_FOL_FOR_FMT_CHANGE.get(exp, "preprocessing_2") collection_suffix = ".cod_fmt.wav" if exp in IN_FOL_FOR_FMT_CHANGE else ".wav" for of in output_folders: testset = of.name[-1] if exp.startswith("P800"): # P800 tests all have one concatenated input file per category categories = [f"cat{i}" for i in range(1, 7)] for cat in categories: input_folder = of.joinpath(cat).joinpath(in_fol) input_files = [ f for f in input_folder.iterdir() if f.name.endswith(collection_suffix) ] assert len(input_files) == 1 out_files = [OUTPUT_FOLDER.joinpath(f"{exp}-{cat}-{testset}-input.wav")] # collect metadata files md_files_in = [ Path(f) for f in input_folder.iterdir() if f.suffix == md_suffix and collection_suffix in f.name ] base_name = str(out_files[0]) if md_suffix == ".csv": md_files_out = [ Path(base_name + f"{Path(f.stem).suffix}{f.suffix}") for f in md_files_in ] else: md_files_out = [ Path(base_name + f"{f.suffix}") for f in md_files_in ] input_files.extend(md_files_in) out_files.extend(md_files_out) # collect error pattern file if exp in IN_FOL_FOR_PLC: input_folder_ep = of.joinpath(cat).joinpath(IN_FOL_FOR_PLC[exp]) ep_file_in = input_folder_ep.joinpath("error_pattern.192") ep_file_out = OUTPUT_FOLDER.joinpath( f"{exp}-{cat}-{testset}-ep.192" ) input_files.append(ep_file_in) out_files.append(ep_file_out) for f_in, f_out in zip(input_files, out_files): print(f"{f_in} -> {f_out}") if TEST_MODE: assert f_in.exists() files_created.append(f_out) files_copied.append(f_in) else: shutil.copyfile(f_in, f_out) elif exp.startswith("BS1534"): # no categories here, but 16 seperate files, no concatenation # EXCEPT: for -7a and -7b, there are two categories (FOA and HOA2) categories = [""] if exp == "BS1534-7a" or exp == "BS1534-7b": categories = ["FOA-", "HOA2-"] for cat in categories: cat_folder = cat[:-1] input_folder = of.joinpath(cat_folder).joinpath(in_fol) input_files = [ f for f in input_folder.iterdir() if f.name.endswith(collection_suffix) ] out_files = [ OUTPUT_FOLDER.joinpath( f"{exp}-{cat}{testset}-input-{int(f.name.split('.')[0][-2:])}.wav" ) for f in input_files ] all_md_files = [ f for f in input_folder.iterdir() if f.suffix == md_suffix and collection_suffix in f.name ] for f_in, f_out in zip(list(input_files), list(out_files)): md_files_in = [ f for f in all_md_files if f.name.startswith(f_in.name) ] base_name = str(f_out) if md_suffix == ".csv": md_files_out = [ Path(base_name + f"{Path(f.stem).suffix}{f.suffix}") for f in md_files_in ] else: md_files_out = [ Path(base_name + f"{f.suffix}") for f in md_files_in ] input_files.extend(md_files_in) out_files.extend(md_files_out) for f_in, f_out in zip(input_files, out_files): print(f"{f_in} -> {f_out}") if TEST_MODE: assert f_in.exists() files_created.append(f_out) files_copied.append(f_in) else: shutil.copyfile(f_in, f_out) if TEST_MODE: # import collections # counter = collections.Counter(files_created) # print(counter) assert len(files_created) == len(set(files_created)) # counter = collections.Counter(files_copied) # print(counter) assert len(files_copied) == len(set(files_copied))