Loading other/get_md5.py +8 −8 Original line number Diff line number Diff line Loading @@ -33,21 +33,21 @@ from hashlib import md5 from collections import Counter def get_hash_line_for_file(file: Path, experiment_dir: Path): def get_hash_line_for_file(file: Path, output_dir: Path): with open(file, "rb") as f: hash = md5(f.read()).hexdigest() filepath = file.relative_to(experiment_dir) filepath = file.relative_to(output_dir) # always print forward slashes even on windows to be able to diff the result files hashline = f"{str(filepath.as_posix())} {hash}\n" return hashline def main(experiment_dir, out_file): wav_files = sorted(experiment_dir.glob("proc_output*/**/*c[0-9][0-9].wav")) def main(output_dir, out_file): wav_files = sorted(output_dir.glob("*/**/*c[0-9][0-9].wav")) hashlines = [get_hash_line_for_file(f, experiment_dir) for f in wav_files] hashlines = [get_hash_line_for_file(f, output_dir) for f in wav_files] count = Counter([line.split()[-1] for line in hashlines]) duplicates = [line for line in hashlines if count[line.split()[-1]] != 1] Loading @@ -65,9 +65,9 @@ if __name__ == "__main__": description="Cross-platform script for generating MD5 hashes of output files for the characterization testing experiments." ) parser.add_argument( "experiment_dir", "output_dir", type=Path, help="Directory of the respective experiment (e.g. experiments/characterization/P800-12", help="Path to the output directory of the respective experiment that you want to generate Hashes for. Can be proc_output or proc_final.", ) parser.add_argument( "out_file", type=Path, help="Output text file with filenames and Hashes" Loading @@ -75,4 +75,4 @@ if __name__ == "__main__": args = parser.parse_args() main(args.experiment_dir, args.out_file) main(args.output_dir, args.out_file) Loading
other/get_md5.py +8 −8 Original line number Diff line number Diff line Loading @@ -33,21 +33,21 @@ from hashlib import md5 from collections import Counter def get_hash_line_for_file(file: Path, experiment_dir: Path): def get_hash_line_for_file(file: Path, output_dir: Path): with open(file, "rb") as f: hash = md5(f.read()).hexdigest() filepath = file.relative_to(experiment_dir) filepath = file.relative_to(output_dir) # always print forward slashes even on windows to be able to diff the result files hashline = f"{str(filepath.as_posix())} {hash}\n" return hashline def main(experiment_dir, out_file): wav_files = sorted(experiment_dir.glob("proc_output*/**/*c[0-9][0-9].wav")) def main(output_dir, out_file): wav_files = sorted(output_dir.glob("*/**/*c[0-9][0-9].wav")) hashlines = [get_hash_line_for_file(f, experiment_dir) for f in wav_files] hashlines = [get_hash_line_for_file(f, output_dir) for f in wav_files] count = Counter([line.split()[-1] for line in hashlines]) duplicates = [line for line in hashlines if count[line.split()[-1]] != 1] Loading @@ -65,9 +65,9 @@ if __name__ == "__main__": description="Cross-platform script for generating MD5 hashes of output files for the characterization testing experiments." ) parser.add_argument( "experiment_dir", "output_dir", type=Path, help="Directory of the respective experiment (e.g. experiments/characterization/P800-12", help="Path to the output directory of the respective experiment that you want to generate Hashes for. Can be proc_output or proc_final.", ) parser.add_argument( "out_file", type=Path, help="Output text file with filenames and Hashes" Loading @@ -75,4 +75,4 @@ if __name__ == "__main__": args = parser.parse_args() main(args.experiment_dir, args.out_file) main(args.output_dir, args.out_file)