mirror of
https://github.com/kevinveenbirkenbach/duplicate-file-handler.git
synced 2024-11-22 05:51:03 +01:00
optimized structure and created test script
This commit is contained in:
parent
73f2bbe409
commit
a54274b052
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
test_dir1
|
||||
test_dir2
|
46
create_test_structure.py
Normal file
46
create_test_structure.py
Normal file
@ -0,0 +1,46 @@
|
||||
import os
|
||||
import shutil
|
||||
import hashlib
|
||||
import random
|
||||
import string
|
||||
|
||||
def create_test_directory(base_dir, num_files=5, duplicate_files=2):
|
||||
if not os.path.exists(base_dir):
|
||||
os.makedirs(base_dir)
|
||||
|
||||
# Erstelle eine Liste von eindeutigen Dateinamen
|
||||
file_names = [f"file_{i}.txt" for i in range(num_files)]
|
||||
|
||||
# Erstelle einige Dateien mit zufälligem Inhalt
|
||||
for file_name in file_names:
|
||||
with open(os.path.join(base_dir, file_name), 'w') as f:
|
||||
content = ''.join(random.choices(string.ascii_lowercase, k=20))
|
||||
f.write(content)
|
||||
|
||||
# Erstelle Duplikate
|
||||
for i in range(duplicate_files):
|
||||
original = os.path.join(base_dir, file_names[i])
|
||||
duplicate = os.path.join(base_dir, f"dup_{file_names[i]}")
|
||||
shutil.copyfile(original, duplicate)
|
||||
|
||||
def create_file_structure():
|
||||
# Basisverzeichnisse erstellen
|
||||
base_dirs = ['test_dir1', 'test_dir2']
|
||||
for base_dir in base_dirs:
|
||||
create_test_directory(base_dir)
|
||||
|
||||
# Erstelle eine Datei im ersten Verzeichnis und dupliziere sie im zweiten
|
||||
with open(os.path.join('test_dir1', 'unique_file.txt'), 'w') as f:
|
||||
f.write("This is a unique file.")
|
||||
|
||||
shutil.copyfile(os.path.join('test_dir1', 'unique_file.txt'),
|
||||
os.path.join('test_dir2', 'unique_file.txt'))
|
||||
|
||||
# Erstelle eine zusätzliche einzigartige Datei im zweiten Verzeichnis
|
||||
with open(os.path.join('test_dir2', 'another_unique_file.txt'), 'w') as f:
|
||||
f.write("This is another unique file.")
|
||||
|
||||
print("Test file structure created.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
create_file_structure()
|
104
main.py
104
main.py
@ -1,8 +1,7 @@
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import hashlib
|
||||
from collections import defaultdict
|
||||
from filecmp import dircmp
|
||||
|
||||
def md5sum(filename):
|
||||
hash_md5 = hashlib.md5()
|
||||
@ -11,58 +10,71 @@ def md5sum(filename):
|
||||
hash_md5.update(chunk)
|
||||
return hash_md5.hexdigest()
|
||||
|
||||
def find_duplicates(directory):
|
||||
def find_duplicates(directories):
|
||||
hashes = defaultdict(list)
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for filename in files:
|
||||
path = os.path.join(root, filename)
|
||||
file_hash = md5sum(path)
|
||||
hashes[file_hash].append(path)
|
||||
for directory in directories:
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for filename in files:
|
||||
path = os.path.join(root, filename)
|
||||
file_hash = md5sum(path)
|
||||
hashes[file_hash].append(path)
|
||||
return {file_hash: paths for file_hash, paths in hashes.items() if len(paths) > 1}
|
||||
|
||||
def print_diff(files):
|
||||
print("Text file differences:")
|
||||
for i in range(len(files) - 1):
|
||||
os.system(f"diff {files[i]} {files[i+1]}")
|
||||
|
||||
def delete_file(file_path):
|
||||
confirm = input(f"Do you want to delete this file? {file_path} [y/N] ")
|
||||
if confirm.lower() in ["y", "yes"]:
|
||||
os.remove(file_path)
|
||||
print(f"Deleted {file_path}")
|
||||
|
||||
def handle_duplicates(duplicates):
|
||||
for file_hash, files in duplicates.items():
|
||||
print(f"Duplicate files for hash {file_hash}:")
|
||||
for file in files:
|
||||
print(file)
|
||||
if "text" in os.popen(f"file -b --mime-type '{files[0]}'").read():
|
||||
print_diff(files)
|
||||
else:
|
||||
def handle_modification(files, modification, mode, apply_to):
|
||||
if mode == 'preview':
|
||||
if modification == 'show':
|
||||
print("Would show the following duplicate files:")
|
||||
for file in files:
|
||||
print(f"File: {file}")
|
||||
print("Duplicate(s) of this file:")
|
||||
[print(duplicate) for duplicate in files if duplicate != file]
|
||||
delete_file(file)
|
||||
if file.startswith(tuple(apply_to)):
|
||||
print(file)
|
||||
elif mode == 'act':
|
||||
if modification == 'delete':
|
||||
for file in files:
|
||||
if file.startswith(tuple(apply_to)):
|
||||
print(f"Deleting {file}")
|
||||
os.remove(file)
|
||||
elif modification == 'hardlink':
|
||||
# Implement hardlink logic here
|
||||
pass
|
||||
elif modification == 'symlink':
|
||||
# Implement symlink logic here
|
||||
pass
|
||||
elif mode == 'interactive':
|
||||
for file in files:
|
||||
if file.startswith(tuple(apply_to)):
|
||||
answer = input(f"Do you want to {modification} this file? {file} [y/N] ")
|
||||
if answer.lower() in ['y', 'yes']:
|
||||
# Implement deletion, hardlink or symlink logic here
|
||||
pass
|
||||
|
||||
def main(directories):
|
||||
all_duplicates = defaultdict(list)
|
||||
for directory in directories:
|
||||
if not os.path.isdir(directory):
|
||||
print(f"Directory not found: {directory}")
|
||||
continue
|
||||
duplicates = find_duplicates(directory)
|
||||
for hash, files in duplicates.items():
|
||||
all_duplicates[hash].extend(files)
|
||||
def main(args):
|
||||
directories = args.directories
|
||||
apply_to = args.apply_to or directories
|
||||
duplicates = find_duplicates(directories)
|
||||
|
||||
if not all_duplicates:
|
||||
if not duplicates:
|
||||
print("No duplicates found.")
|
||||
return
|
||||
|
||||
handle_duplicates(all_duplicates)
|
||||
for file_hash, files in duplicates.items():
|
||||
if args.mode == 'preview' or (args.mode == 'interactive' and args.modification == 'show'):
|
||||
print(f"Duplicate files for hash {file_hash}:")
|
||||
[print(file) for file in files if file.startswith(tuple(apply_to))]
|
||||
else:
|
||||
handle_modification(files, args.modification, args.mode, apply_to)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) > 1:
|
||||
main(sys.argv[1:])
|
||||
else:
|
||||
print("Usage: python3 script.py <directory1> <directory2> ...")
|
||||
parser = argparse.ArgumentParser(description="Find and handle duplicate files.")
|
||||
parser.add_argument('directories', nargs='*', default=['./'], help="Directories to scan for duplicates.")
|
||||
parser.add_argument('--apply-to', nargs='*', help="Directories to apply modifications to.")
|
||||
parser.add_argument('--modification', choices=['delete', 'hardlink', 'symlink', 'show'], default='show', help="Modification to perform on duplicates.")
|
||||
parser.add_argument('--mode', choices=['act', 'preview', 'interactive'], default='preview', help="How to apply the modifications.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.apply_to and args.modification not in ['delete', 'hardlink', 'symlink']:
|
||||
parser.error("--apply-to requires --modification to be 'delete', 'hardlink', or 'symlink'.")
|
||||
if not args.apply_to and args.modification != 'show':
|
||||
parser.error("Without --apply-to only 'show' modification is allowed.")
|
||||
|
||||
main(args)
|
||||
|
Loading…
Reference in New Issue
Block a user