import os from collections import defaultdict def get_files_info(folder_paths): files_info = defaultdict(list) for folder_path in folder_paths: for root, _, files in os.walk(folder_path): for file in files: file_path = os.path.join(root, file) file_size = os.path.getsize(file_path) files_info['names'].append((file, file_path, file_size)) files_info['sizes'].append((file_size, file_path)) return files_info def find_duplicates(files_info): duplicates_by_name = defaultdict(list) duplicates_by_size = defaultdict(list) # 按名称查找 name_to_paths = defaultdict(list) for name, path, size in files_info['names']: short_name = name.split('[')[0].strip() name_to_paths[short_name].append((path, size)) for name, paths in name_to_paths.items(): if len(paths) > 1: duplicates_by_name[name] = paths # 按大小查找 size_to_paths = defaultdict(list) for size, path in files_info['sizes']: size_to_paths[size].append(path) for size, paths in size_to_paths.items(): if len(paths) > 1: duplicates_by_size[size] = paths return duplicates_by_name, duplicates_by_size def write_duplicates_to_file(duplicates_by_name, duplicates_by_size, file_name): with open(file_name, 'w', encoding='utf-8') as f: f.write("Duplicates by Name:\n") for name, paths in duplicates_by_name.items(): for path, size in paths: size_kb = size / 1024 f.write(f"{size_kb:.2f} KB {path}\n") f.write("\n") # 在每组重复文件之间插入空行 f.write("\nDuplicates by Size:\n") for size, paths in duplicates_by_size.items(): for path in paths: size_kb = size / 1024 f.write(f"{size_kb:.2f} KB {path}\n") f.write("\n") # 在每组重复文件之间插入空行 def main(folder_paths): files_info = get_files_info(folder_paths) duplicates_by_name, duplicates_by_size = find_duplicates(files_info) write_duplicates_to_file(duplicates_by_name, duplicates_by_size, 'dup.txt') if __name__ == "__main__": folder_paths = [r'Z:\X\X', r'Z:\X\X', r'Z:\X\X', r'Z:\X\X'] main(folder_paths)