Naposledy aktivní 1727888928

xym's Avatar xym revidoval tento gist 1727888927. Přejít na revizi

1 file changed, 61 insertions

dup.py(vytvořil soubor)

@@ -0,0 +1,61 @@
1 + import os
2 + from collections import defaultdict
3 +
4 + def get_files_info(folder_paths):
5 + files_info = defaultdict(list)
6 + for folder_path in folder_paths:
7 + for root, _, files in os.walk(folder_path):
8 + for file in files:
9 + file_path = os.path.join(root, file)
10 + file_size = os.path.getsize(file_path)
11 + files_info['names'].append((file, file_path, file_size))
12 + files_info['sizes'].append((file_size, file_path))
13 + return files_info
14 +
15 + def find_duplicates(files_info):
16 + duplicates_by_name = defaultdict(list)
17 + duplicates_by_size = defaultdict(list)
18 +
19 + # 按名称查找
20 + name_to_paths = defaultdict(list)
21 + for name, path, size in files_info['names']:
22 + short_name = name.split('[')[0].strip()
23 + name_to_paths[short_name].append((path, size))
24 + for name, paths in name_to_paths.items():
25 + if len(paths) > 1:
26 + duplicates_by_name[name] = paths
27 +
28 + # 按大小查找
29 + size_to_paths = defaultdict(list)
30 + for size, path in files_info['sizes']:
31 + size_to_paths[size].append(path)
32 + for size, paths in size_to_paths.items():
33 + if len(paths) > 1:
34 + duplicates_by_size[size] = paths
35 +
36 + return duplicates_by_name, duplicates_by_size
37 +
38 + def write_duplicates_to_file(duplicates_by_name, duplicates_by_size, file_name):
39 + with open(file_name, 'w', encoding='utf-8') as f:
40 + f.write("Duplicates by Name:\n")
41 + for name, paths in duplicates_by_name.items():
42 + for path, size in paths:
43 + size_kb = size / 1024
44 + f.write(f"{size_kb:.2f} KB {path}\n")
45 + f.write("\n") # 在每组重复文件之间插入空行
46 +
47 + f.write("\nDuplicates by Size:\n")
48 + for size, paths in duplicates_by_size.items():
49 + for path in paths:
50 + size_kb = size / 1024
51 + f.write(f"{size_kb:.2f} KB {path}\n")
52 + f.write("\n") # 在每组重复文件之间插入空行
53 +
54 + def main(folder_paths):
55 + files_info = get_files_info(folder_paths)
56 + duplicates_by_name, duplicates_by_size = find_duplicates(files_info)
57 + write_duplicates_to_file(duplicates_by_name, duplicates_by_size, 'dup.txt')
58 +
59 + if __name__ == "__main__":
60 + folder_paths = [r'Z:\X\X', r'Z:\X\X', r'Z:\X\X', r'Z:\X\X']
61 + main(folder_paths)
Novější Starší