dup.py
· 2.3 KiB · Python
Bruto
import os
from collections import defaultdict
def get_files_info(folder_paths):
files_info = defaultdict(list)
for folder_path in folder_paths:
for root, _, files in os.walk(folder_path):
for file in files:
file_path = os.path.join(root, file)
file_size = os.path.getsize(file_path)
files_info['names'].append((file, file_path, file_size))
files_info['sizes'].append((file_size, file_path))
return files_info
def find_duplicates(files_info):
duplicates_by_name = defaultdict(list)
duplicates_by_size = defaultdict(list)
# 按名称查找
name_to_paths = defaultdict(list)
for name, path, size in files_info['names']:
short_name = name.split('[')[0].strip()
name_to_paths[short_name].append((path, size))
for name, paths in name_to_paths.items():
if len(paths) > 1:
duplicates_by_name[name] = paths
# 按大小查找
size_to_paths = defaultdict(list)
for size, path in files_info['sizes']:
size_to_paths[size].append(path)
for size, paths in size_to_paths.items():
if len(paths) > 1:
duplicates_by_size[size] = paths
return duplicates_by_name, duplicates_by_size
def write_duplicates_to_file(duplicates_by_name, duplicates_by_size, file_name):
with open(file_name, 'w', encoding='utf-8') as f:
f.write("Duplicates by Name:\n")
for name, paths in duplicates_by_name.items():
for path, size in paths:
size_kb = size / 1024
f.write(f"{size_kb:.2f} KB {path}\n")
f.write("\n") # 在每组重复文件之间插入空行
f.write("\nDuplicates by Size:\n")
for size, paths in duplicates_by_size.items():
for path in paths:
size_kb = size / 1024
f.write(f"{size_kb:.2f} KB {path}\n")
f.write("\n") # 在每组重复文件之间插入空行
def main(folder_paths):
files_info = get_files_info(folder_paths)
duplicates_by_name, duplicates_by_size = find_duplicates(files_info)
write_duplicates_to_file(duplicates_by_name, duplicates_by_size, 'dup.txt')
if __name__ == "__main__":
folder_paths = [r'Z:\X\X', r'Z:\X\X', r'Z:\X\X', r'Z:\X\X']
main(folder_paths)
| 1 | import os |
| 2 | from collections import defaultdict |
| 3 | |
| 4 | def get_files_info(folder_paths): |
| 5 | files_info = defaultdict(list) |
| 6 | for folder_path in folder_paths: |
| 7 | for root, _, files in os.walk(folder_path): |
| 8 | for file in files: |
| 9 | file_path = os.path.join(root, file) |
| 10 | file_size = os.path.getsize(file_path) |
| 11 | files_info['names'].append((file, file_path, file_size)) |
| 12 | files_info['sizes'].append((file_size, file_path)) |
| 13 | return files_info |
| 14 | |
| 15 | def find_duplicates(files_info): |
| 16 | duplicates_by_name = defaultdict(list) |
| 17 | duplicates_by_size = defaultdict(list) |
| 18 | |
| 19 | # 按名称查找 |
| 20 | name_to_paths = defaultdict(list) |
| 21 | for name, path, size in files_info['names']: |
| 22 | short_name = name.split('[')[0].strip() |
| 23 | name_to_paths[short_name].append((path, size)) |
| 24 | for name, paths in name_to_paths.items(): |
| 25 | if len(paths) > 1: |
| 26 | duplicates_by_name[name] = paths |
| 27 | |
| 28 | # 按大小查找 |
| 29 | size_to_paths = defaultdict(list) |
| 30 | for size, path in files_info['sizes']: |
| 31 | size_to_paths[size].append(path) |
| 32 | for size, paths in size_to_paths.items(): |
| 33 | if len(paths) > 1: |
| 34 | duplicates_by_size[size] = paths |
| 35 | |
| 36 | return duplicates_by_name, duplicates_by_size |
| 37 | |
| 38 | def write_duplicates_to_file(duplicates_by_name, duplicates_by_size, file_name): |
| 39 | with open(file_name, 'w', encoding='utf-8') as f: |
| 40 | f.write("Duplicates by Name:\n") |
| 41 | for name, paths in duplicates_by_name.items(): |
| 42 | for path, size in paths: |
| 43 | size_kb = size / 1024 |
| 44 | f.write(f"{size_kb:.2f} KB {path}\n") |
| 45 | f.write("\n") # 在每组重复文件之间插入空行 |
| 46 | |
| 47 | f.write("\nDuplicates by Size:\n") |
| 48 | for size, paths in duplicates_by_size.items(): |
| 49 | for path in paths: |
| 50 | size_kb = size / 1024 |
| 51 | f.write(f"{size_kb:.2f} KB {path}\n") |
| 52 | f.write("\n") # 在每组重复文件之间插入空行 |
| 53 | |
| 54 | def main(folder_paths): |
| 55 | files_info = get_files_info(folder_paths) |
| 56 | duplicates_by_name, duplicates_by_size = find_duplicates(files_info) |
| 57 | write_duplicates_to_file(duplicates_by_name, duplicates_by_size, 'dup.txt') |
| 58 | |
| 59 | if __name__ == "__main__": |
| 60 | folder_paths = [r'Z:\X\X', r'Z:\X\X', r'Z:\X\X', r'Z:\X\X'] |
| 61 | main(folder_paths) |
| 62 |