Commit 3f7e0c94 authored by Jan Kiene's avatar Jan Kiene
Browse files

add script for collecting and renaming the testvector files

parent 3ca8269b
Loading
Loading
Loading
Loading
+189 −0
Original line number Diff line number Diff line
"""
Script for collecting and renaming the files from a processing script run that 
are intended to use in testing bitexactness against selection test binaries.

Usage: python3 rename_and_collect_testvectors.py

There is a variable "TEST_MODE" below. Set it to true to do a dry-run to detect
problems/missing things before actually copying the files. The dry-run version will
not copy any files, but just check that all expected files are present and print out
the renaming/copying actions it will perform. Also, it will raise an AssertioonError
if there are duplicates in both the collected and renamed files.

The script expects:
    - an existing folder "testv_out" to which it will copy the collected files
    - folders for each experiment that contain the proc_output_* folders from the stripped-down
      run of the processing scripts. If one just copies the folders from experiments/selection,
      that will work.
"""

import shutil
from pathlib import Path

HERE = Path(__file__).parent
OUTPUT_FOLDER = HERE.joinpath("testv_out")
EXPERIMENTS_P800 = [f"P800-{i}" for i in range(1, 10)]
EXPERIMENTS_BS1534 = [f"BS1534-{i}{x}" for i in range(1, 8) for x in ["a", "b"]]
EXPERIMENTS = EXPERIMENTS_P800 + EXPERIMENTS_BS1534
N_ITEMS_MUSHRA = 16
IN_FOL_FOR_PLC = {
    "P800-1": "tmp_c25",
    "P800-3": "tmp_c24",
    "P800-4": "tmp_c24",
    "P800-6": "tmp_c24",
    "P800-7": "tmp_c24",
    "P800-8": "tmp_c25",
}
IN_FOL_FOR_FMT_CHANGE = {
    "P800-8": "tmp_c25",
    "P800-9": "tmp_c24",
    "BS1534-4a": "tmp_c06",
    "BS1534-4b": "tmp_c06",
    "BS1534-7a": "tmp_c07",
    "BS1534-7b": "tmp_c07",
}

# Global switch for dry-run
TEST_MODE = False


def get_md_suffix_for_exp(exp):
    md_suffix = "NONE"
    if exp in ["P800-6", "P800-7", "BS1534-6a", "BS1534-6b"]:
        md_suffix = ".csv"
    elif exp in ["P800-8", "P800-9", "BS1534-7a", "BS1534-7b"]:
        md_suffix = ".met"
    return md_suffix


files_created = list()
files_copied = list()


EXPERIMENTS = ["BS1534-4a", "BS1534-4b"]
for exp in EXPERIMENTS:
    base_path = HERE.joinpath(exp)
    output_folders = [
        p for p in base_path.iterdir() if p.name.startswith("proc_output")
    ]
    md_suffix = get_md_suffix_for_exp(exp)
    in_fol = IN_FOL_FOR_FMT_CHANGE.get(exp, "preprocessing_2")
    collection_suffix = ".cod_fmt.wav" if exp in IN_FOL_FOR_FMT_CHANGE else ".wav"

    for of in output_folders:
        testset = of.name[-1]
        if exp.startswith("P800"):
            # P800 tests all have one concatenated input file per category
            categories = [f"cat{i}" for i in range(1, 7)]
            for cat in categories:
                input_folder = of.joinpath(cat).joinpath(in_fol)
                input_files = [
                    f
                    for f in input_folder.iterdir()
                    if f.name.endswith(collection_suffix)
                ]
                assert len(input_files) == 1
                out_files = [OUTPUT_FOLDER.joinpath(f"{exp}-{cat}-{testset}-input.wav")]

                # collect metadata files
                md_files_in = [
                    Path(f)
                    for f in input_folder.iterdir()
                    if f.suffix == md_suffix and collection_suffix in f.name
                ]
                base_name = str(out_files[0])
                if md_suffix == ".csv":
                    md_files_out = [
                        Path(base_name + f"{Path(f.stem).suffix}{f.suffix}")
                        for f in md_files_in
                    ]
                else:
                    md_files_out = [
                        Path(base_name + f"{f.suffix}") for f in md_files_in
                    ]

                input_files.extend(md_files_in)
                out_files.extend(md_files_out)

                # collect error pattern file
                if exp in IN_FOL_FOR_PLC:
                    input_folder_ep = of.joinpath(cat).joinpath(IN_FOL_FOR_PLC[exp])
                    ep_file_in = input_folder_ep.joinpath("error_pattern.192")
                    ep_file_out = OUTPUT_FOLDER.joinpath(
                        f"{exp}-{cat}-{testset}-ep.192"
                    )

                    input_files.append(ep_file_in)
                    out_files.append(ep_file_out)

                for f_in, f_out in zip(input_files, out_files):
                    print(f"{f_in} -> {f_out}")
                    if TEST_MODE:
                        assert f_in.exists()
                        files_created.append(f_out)
                        files_copied.append(f_in)
                    else:
                        shutil.copyfile(f_in, f_out)
        elif exp.startswith("BS1534"):
            # no categories here, but 16 seperate files, no concatenation
            # EXCEPT: for -7a and -7b, there are two categories (FOA and HOA2)
            categories = [""]
            if exp == "BS1534-7a" or exp == "BS1534-7b":
                categories = ["FOA-", "HOA2-"]

            for cat in categories:
                cat_folder = cat[:-1]
                input_folder = of.joinpath(cat_folder).joinpath(in_fol)
                input_files = [
                    f
                    for f in input_folder.iterdir()
                    if f.name.endswith(collection_suffix)
                ]
                out_files = [
                    OUTPUT_FOLDER.joinpath(
                        f"{exp}-{cat}{testset}-input-{int(f.name.split('.')[0][-2:])}.wav"
                    )
                    for f in input_files
                ]

                all_md_files = [
                    f
                    for f in input_folder.iterdir()
                    if f.suffix == md_suffix and collection_suffix in f.name
                ]
                for f_in, f_out in zip(list(input_files), list(out_files)):
                    md_files_in = [
                        f for f in all_md_files if f.name.startswith(f_in.name)
                    ]
                    base_name = str(f_out)
                    if md_suffix == ".csv":
                        md_files_out = [
                            Path(base_name + f"{Path(f.stem).suffix}{f.suffix}")
                            for f in md_files_in
                        ]
                    else:
                        md_files_out = [
                            Path(base_name + f"{f.suffix}") for f in md_files_in
                        ]

                    input_files.extend(md_files_in)
                    out_files.extend(md_files_out)

                for f_in, f_out in zip(input_files, out_files):
                    print(f"{f_in} -> {f_out}")
                    if TEST_MODE:
                        assert f_in.exists()
                        files_created.append(f_out)
                        files_copied.append(f_in)
                    else:
                        shutil.copyfile(f_in, f_out)

if TEST_MODE:
    # import collections

    # counter = collections.Counter(files_created)
    # print(counter)
    assert len(files_created) == len(set(files_created))
    # counter = collections.Counter(files_copied)
    # print(counter)
    assert len(files_copied) == len(set(files_copied))