Loading other/get_md5.py +19 −5 Original line number Diff line number Diff line Loading @@ -44,17 +44,31 @@ def get_hash_line_for_file(file: Path, output_dir: Path): return hashline def get_duplicates(hashlines: list) -> dict: count = Counter([line.split()[-1] for line in hashlines]) duplicates = {} for hash, count in count.items(): if count == 1: continue files = [line.replace(hash, "").strip() for line in hashlines if hash in line] duplicates[hash] = files return duplicates def main(output_dir, out_file): wav_files = sorted(output_dir.glob("*/**/*c[0-9][0-9].wav")) hashlines = [get_hash_line_for_file(f, output_dir) for f in wav_files] count = Counter([line.split()[-1] for line in hashlines]) duplicates = [line for line in hashlines if count[line.split()[-1]] != 1] duplicates = get_duplicates(hashlines) if len(duplicates) != 0: print("Found duplicate hashes in these lines:") for dup in duplicates: print(dup) print( "Found duplicate hashes! The following hashes were found in multipe files:" ) for hash, files in duplicates.items(): print(f"{hash} - {', '.join(files)}") with open(out_file, "w") as f: f.writelines(hashlines) Loading Loading
other/get_md5.py +19 −5 Original line number Diff line number Diff line Loading @@ -44,17 +44,31 @@ def get_hash_line_for_file(file: Path, output_dir: Path): return hashline def get_duplicates(hashlines: list) -> dict: count = Counter([line.split()[-1] for line in hashlines]) duplicates = {} for hash, count in count.items(): if count == 1: continue files = [line.replace(hash, "").strip() for line in hashlines if hash in line] duplicates[hash] = files return duplicates def main(output_dir, out_file): wav_files = sorted(output_dir.glob("*/**/*c[0-9][0-9].wav")) hashlines = [get_hash_line_for_file(f, output_dir) for f in wav_files] count = Counter([line.split()[-1] for line in hashlines]) duplicates = [line for line in hashlines if count[line.split()[-1]] != 1] duplicates = get_duplicates(hashlines) if len(duplicates) != 0: print("Found duplicate hashes in these lines:") for dup in duplicates: print(dup) print( "Found duplicate hashes! The following hashes were found in multipe files:" ) for hash, files in duplicates.items(): print(f"{hash} - {', '.join(files)}") with open(out_file, "w") as f: f.writelines(hashlines) Loading