xym a révisé ce gist . Aller à la révision
1 file changed, 61 insertions
dup.py(fichier créé)
| @@ -0,0 +1,61 @@ | |||
| 1 | + | import os | |
| 2 | + | from collections import defaultdict | |
| 3 | + | ||
| 4 | + | def get_files_info(folder_paths): | |
| 5 | + | files_info = defaultdict(list) | |
| 6 | + | for folder_path in folder_paths: | |
| 7 | + | for root, _, files in os.walk(folder_path): | |
| 8 | + | for file in files: | |
| 9 | + | file_path = os.path.join(root, file) | |
| 10 | + | file_size = os.path.getsize(file_path) | |
| 11 | + | files_info['names'].append((file, file_path, file_size)) | |
| 12 | + | files_info['sizes'].append((file_size, file_path)) | |
| 13 | + | return files_info | |
| 14 | + | ||
| 15 | + | def find_duplicates(files_info): | |
| 16 | + | duplicates_by_name = defaultdict(list) | |
| 17 | + | duplicates_by_size = defaultdict(list) | |
| 18 | + | ||
| 19 | + | # 按名称查找 | |
| 20 | + | name_to_paths = defaultdict(list) | |
| 21 | + | for name, path, size in files_info['names']: | |
| 22 | + | short_name = name.split('[')[0].strip() | |
| 23 | + | name_to_paths[short_name].append((path, size)) | |
| 24 | + | for name, paths in name_to_paths.items(): | |
| 25 | + | if len(paths) > 1: | |
| 26 | + | duplicates_by_name[name] = paths | |
| 27 | + | ||
| 28 | + | # 按大小查找 | |
| 29 | + | size_to_paths = defaultdict(list) | |
| 30 | + | for size, path in files_info['sizes']: | |
| 31 | + | size_to_paths[size].append(path) | |
| 32 | + | for size, paths in size_to_paths.items(): | |
| 33 | + | if len(paths) > 1: | |
| 34 | + | duplicates_by_size[size] = paths | |
| 35 | + | ||
| 36 | + | return duplicates_by_name, duplicates_by_size | |
| 37 | + | ||
| 38 | + | def write_duplicates_to_file(duplicates_by_name, duplicates_by_size, file_name): | |
| 39 | + | with open(file_name, 'w', encoding='utf-8') as f: | |
| 40 | + | f.write("Duplicates by Name:\n") | |
| 41 | + | for name, paths in duplicates_by_name.items(): | |
| 42 | + | for path, size in paths: | |
| 43 | + | size_kb = size / 1024 | |
| 44 | + | f.write(f"{size_kb:.2f} KB {path}\n") | |
| 45 | + | f.write("\n") # 在每组重复文件之间插入空行 | |
| 46 | + | ||
| 47 | + | f.write("\nDuplicates by Size:\n") | |
| 48 | + | for size, paths in duplicates_by_size.items(): | |
| 49 | + | for path in paths: | |
| 50 | + | size_kb = size / 1024 | |
| 51 | + | f.write(f"{size_kb:.2f} KB {path}\n") | |
| 52 | + | f.write("\n") # 在每组重复文件之间插入空行 | |
| 53 | + | ||
| 54 | + | def main(folder_paths): | |
| 55 | + | files_info = get_files_info(folder_paths) | |
| 56 | + | duplicates_by_name, duplicates_by_size = find_duplicates(files_info) | |
| 57 | + | write_duplicates_to_file(duplicates_by_name, duplicates_by_size, 'dup.txt') | |
| 58 | + | ||
| 59 | + | if __name__ == "__main__": | |
| 60 | + | folder_paths = [r'Z:\X\X', r'Z:\X\X', r'Z:\X\X', r'Z:\X\X'] | |
| 61 | + | main(folder_paths) | |
Plus récent
Plus ancien