diff --git a/other/get_md5.py b/other/get_md5.py index e482dd3b3b1f51d71150d3f28a10e0df45c259c7..a5e9c9ed462ab63b674d412dc17b29fe9a88be37 100644 --- a/other/get_md5.py +++ b/other/get_md5.py @@ -33,21 +33,21 @@ from hashlib import md5 from collections import Counter -def get_hash_line_for_file(file: Path, experiment_dir: Path): +def get_hash_line_for_file(file: Path, output_dir: Path): with open(file, "rb") as f: hash = md5(f.read()).hexdigest() - filepath = file.relative_to(experiment_dir) + filepath = file.relative_to(output_dir) # always print forward slashes even on windows to be able to diff the result files hashline = f"{str(filepath.as_posix())} {hash}\n" return hashline -def main(experiment_dir, out_file): - wav_files = sorted(experiment_dir.glob("proc_output*/**/*c[0-9][0-9].wav")) +def main(output_dir, out_file): + wav_files = sorted(output_dir.glob("*/**/*c[0-9][0-9].wav")) - hashlines = [get_hash_line_for_file(f, experiment_dir) for f in wav_files] + hashlines = [get_hash_line_for_file(f, output_dir) for f in wav_files] count = Counter([line.split()[-1] for line in hashlines]) duplicates = [line for line in hashlines if count[line.split()[-1]] != 1] @@ -65,9 +65,9 @@ if __name__ == "__main__": description="Cross-platform script for generating MD5 hashes of output files for the characterization testing experiments." ) parser.add_argument( - "experiment_dir", + "output_dir", type=Path, - help="Directory of the respective experiment (e.g. experiments/characterization/P800-12", + help="Path to the output directory of the respective experiment that you want to generate Hashes for. Can be proc_output or proc_final.", ) parser.add_argument( "out_file", type=Path, help="Output text file with filenames and Hashes" @@ -75,4 +75,4 @@ if __name__ == "__main__": args = parser.parse_args() - main(args.experiment_dir, args.out_file) + main(args.output_dir, args.out_file)