From 47425f1ad6b133cbf037c03adf06ed52bd70dc1d Mon Sep 17 00:00:00 2001 From: Kevin Veen-Birkenbach Date: Thu, 14 Sep 2023 08:41:20 +0200 Subject: [PATCH] added no-comments filter --- scan.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/scan.py b/scan.py index 1fa07c7..64c2571 100644 --- a/scan.py +++ b/scan.py @@ -1,5 +1,21 @@ import os import argparse +import re + +def remove_comments(content, file_type): + """Remove comments from the content based on file type.""" + if file_type == ".py": + # Remove Python single line comments + content = re.sub(r'^\s*#.*\n?', '', content, flags=re.MULTILINE) + # Remove triple double-quote and triple single-quote docstrings + content = re.sub(r'\"\"\"(.*?)\"\"\"', '', content, flags=re.DOTALL) + content = re.sub(r"\'\'\'(.*?)\'\'\'", '', content, flags=re.DOTALL) + elif file_type == ".js" or file_type == ".c" or file_type == ".cpp" or file_type == ".h": + # Remove C-style comments (// and /* */) + content = re.sub(r'\s*//.*', '', content) + content = re.sub(r'/\*.*?\*/', '', content, flags=re.DOTALL) + # Add more file types and their comment styles as needed + return content.strip() def filter_directories(dirs, ignore_strings, ignore_hidden): """Filter out directories based on ignore criteria.""" @@ -17,11 +33,15 @@ def should_print_file(file, file_filters, ignore_strings, ignore_hidden): return False return True -def print_file_content(file_path): +def print_file_content(file_path, no_comments): """Print the content of a file.""" try: with open(file_path, 'r') as f: content = f.read() + if no_comments: + file_type = os.path.splitext(file_path)[1] + print(file_type) + content = remove_comments(content, file_type) print(f"======== File Path: {file_path} ========") print(content) print("==================================\n") @@ -29,20 +49,20 @@ def print_file_content(file_path): print(f"Warning: Could not read file due to encoding issues: {file_path}") exit(1) -def handle_directory(directory, file_filters, ignore_strings, ignore_hidden, verbose): +def handle_directory(directory, file_filters, ignore_strings, ignore_hidden, verbose, no_comments): """Handle scanning and printing for directories.""" for root, dirs, files in os.walk(directory): filter_directories(dirs, ignore_strings, ignore_hidden) for file in files: if should_print_file(file, file_filters, ignore_strings, ignore_hidden): - print_file_content(os.path.join(root, file)) + print_file_content(os.path.join(root, file), no_comments) elif verbose: print(f"Skipped file: {file}") def handle_file(file_path, file_filters, ignore_strings, ignore_hidden): """Handle scanning and printing for individual files.""" if should_print_file(os.path.basename(file_path), file_filters, ignore_strings, ignore_hidden): - print_file_content(file_path) + print_file_content(file_path, no_comments) def main(): parser = argparse.ArgumentParser(description="Scan directories and print file contents.") @@ -51,13 +71,14 @@ def main(): parser.add_argument("-i", "--ignore", nargs='+', default=[], help="Ignore files and folders containing these strings.") parser.add_argument("--ignore-hidden", action='store_true', help="Ignore hidden directories and files.") parser.add_argument("-v", "--verbose", action='store_true', help="Enable verbose mode.") + parser.add_argument("--no-comments", action='store_true', help="Remove comments from the displayed content based on file type.") args = parser.parse_args() for path in args.paths: if os.path.isdir(path): - handle_directory(path, args.filetype, args.ignore, args.ignore_hidden, args.verbose) + handle_directory(path, args.filetype, args.ignore, args.ignore_hidden, args.verbose, args.no_comments) elif os.path.isfile(path): - handle_file(path, args.filetype, args.ignore, args.ignore_hidden) + handle_file(path, args.filetype, args.ignore, args.ignore_hidden, args.no_comments) else: print(f"Error: {path} is neither a valid file nor a directory.") exit(1)