Last active 1727888928

dup.py Raw
1import os
2from collections import defaultdict
3
4def get_files_info(folder_paths):
5 files_info = defaultdict(list)
6 for folder_path in folder_paths:
7 for root, _, files in os.walk(folder_path):
8 for file in files:
9 file_path = os.path.join(root, file)
10 file_size = os.path.getsize(file_path)
11 files_info['names'].append((file, file_path, file_size))
12 files_info['sizes'].append((file_size, file_path))
13 return files_info
14
15def find_duplicates(files_info):
16 duplicates_by_name = defaultdict(list)
17 duplicates_by_size = defaultdict(list)
18
19 # 按名称查找
20 name_to_paths = defaultdict(list)
21 for name, path, size in files_info['names']:
22 short_name = name.split('[')[0].strip()
23 name_to_paths[short_name].append((path, size))
24 for name, paths in name_to_paths.items():
25 if len(paths) > 1:
26 duplicates_by_name[name] = paths
27
28 # 按大小查找
29 size_to_paths = defaultdict(list)
30 for size, path in files_info['sizes']:
31 size_to_paths[size].append(path)
32 for size, paths in size_to_paths.items():
33 if len(paths) > 1:
34 duplicates_by_size[size] = paths
35
36 return duplicates_by_name, duplicates_by_size
37
38def write_duplicates_to_file(duplicates_by_name, duplicates_by_size, file_name):
39 with open(file_name, 'w', encoding='utf-8') as f:
40 f.write("Duplicates by Name:\n")
41 for name, paths in duplicates_by_name.items():
42 for path, size in paths:
43 size_kb = size / 1024
44 f.write(f"{size_kb:.2f} KB {path}\n")
45 f.write("\n") # 在每组重复文件之间插入空行
46
47 f.write("\nDuplicates by Size:\n")
48 for size, paths in duplicates_by_size.items():
49 for path in paths:
50 size_kb = size / 1024
51 f.write(f"{size_kb:.2f} KB {path}\n")
52 f.write("\n") # 在每组重复文件之间插入空行
53
54def main(folder_paths):
55 files_info = get_files_info(folder_paths)
56 duplicates_by_name, duplicates_by_size = find_duplicates(files_info)
57 write_duplicates_to_file(duplicates_by_name, duplicates_by_size, 'dup.txt')
58
59if __name__ == "__main__":
60 folder_paths = [r'Z:\X\X', r'Z:\X\X', r'Z:\X\X', r'Z:\X\X']
61 main(folder_paths)
62