diff --git a/__pycache__/cli.cpython-313.pyc b/__pycache__/cli.cpython-313.pyc new file mode 100644 index 0000000..a72ab86 Binary files /dev/null and b/__pycache__/cli.cpython-313.pyc differ diff --git a/__pycache__/code_processor.cpython-313.pyc b/__pycache__/code_processor.cpython-313.pyc new file mode 100644 index 0000000..7642b5f Binary files /dev/null and b/__pycache__/code_processor.cpython-313.pyc differ diff --git a/__pycache__/directory_handler.cpython-313.pyc b/__pycache__/directory_handler.cpython-313.pyc new file mode 100644 index 0000000..556431d Binary files /dev/null and b/__pycache__/directory_handler.cpython-313.pyc differ diff --git a/cli.py b/cli.py new file mode 100644 index 0000000..219bf3c --- /dev/null +++ b/cli.py @@ -0,0 +1,18 @@ +import argparse + +def parse_arguments(): + parser = argparse.ArgumentParser( + description="Scan directories and print/compile file contents." + ) + parser.add_argument("paths", nargs='+', help="List of files or directories to scan.") + parser.add_argument("--file-types", nargs='+', default=[], help="Filter by file types (e.g., .txt .log).") + parser.add_argument("--ignore-file-strings", nargs='+', default=[], help="Ignore files and folders containing these strings.") + parser.add_argument("--ignore-hidden", action='store_true', help="Ignore hidden directories and files.") + parser.add_argument("-v", "--verbose", action='store_true', help="Enable verbose mode.") + parser.add_argument("--no-comments", action='store_true', help="Remove comments from the displayed content based on file type.") + parser.add_argument("--compress", action='store_true', help="Compress code (for Python files).") + parser.add_argument("--path-contains", nargs='+', default=[], help="Display files whose paths contain one of these strings.") + parser.add_argument("--content-contains", nargs='+', default=[], help="Display files containing one of these strings in their content.") + parser.add_argument("--no-gitignore", action='store_true', help="Do not respect .gitignore files during scan.") + + return parser.parse_args() \ No newline at end of file diff --git a/code_processor.py b/code_processor.py new file mode 100644 index 0000000..ff81cd3 --- /dev/null +++ b/code_processor.py @@ -0,0 +1,54 @@ +import re +import zlib + +class CodeProcessor: + PYTHON = ".py" + JS = ".js" + C = ".c" + CPP = ".cpp" + H = ".h" + BASH = ".sh" + SHELL = ".bash" + + @staticmethod + def remove_comments(content, file_type): + """Remove comments based on file type.""" + comment_patterns = { + CodeProcessor.PYTHON: [ + (r'\s*#.*', '', 0), + (r'\"\"\"(.*?)\"\"\"', '', re.DOTALL), + (r"\'\'\'(.*?)\'\'\'", '', re.DOTALL) + ], + CodeProcessor.JS: [ + (r'\s*//.*', '', 0), + (r'/\*.*?\*/', '', 0) + ], + CodeProcessor.C: [ + (r'\s*//.*', '', 0), + (r'/\*.*?\*/', '', 0) + ], + CodeProcessor.CPP: [ + (r'\s*//.*', '', 0), + (r'/\*.*?\*/', '', 0) + ], + CodeProcessor.H: [ + (r'\s*//.*', '', 0), + (r'/\*.*?\*/', '', 0) + ], + CodeProcessor.BASH: [ + (r'\s*#.*', '', 0) + ], + CodeProcessor.SHELL: [ + (r'\s*#.*', '', 0) + ] + } + + patterns = comment_patterns.get(file_type, []) + for pattern, repl, flags in patterns: + content = re.sub(pattern, repl, content, flags=flags) + return content.strip() + + @staticmethod + def compress(content): + """Compress code using zlib.""" + return zlib.compress(content.encode()) \ No newline at end of file diff --git a/directory_handler.py b/directory_handler.py new file mode 100644 index 0000000..32647c5 --- /dev/null +++ b/directory_handler.py @@ -0,0 +1,126 @@ +import os +import fnmatch +from code_processor import CodeProcessor + +class DirectoryHandler: + @staticmethod + def load_gitignore_patterns(root_path): + """Collect .gitignore patterns from root_path and all subdirectories.""" + gitignore_patterns = [] + for dirpath, dirnames, filenames in os.walk(root_path): + if '.gitignore' in filenames: + gitignore_path = os.path.join(dirpath, '.gitignore') + try: + with open(gitignore_path, 'r') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#'): + # Erzeuge einen absoluten Pattern-Pfad basierend auf dem Speicherort der .gitignore + gitignore_patterns.append(os.path.join(dirpath, line)) + except Exception as e: + print(f"Error reading {gitignore_path}: {e}") + return gitignore_patterns + + @staticmethod + def is_gitignored(file_path, gitignore_patterns): + """Check if file_path matches any .gitignore pattern.""" + for pattern in gitignore_patterns: + if fnmatch.fnmatch(file_path, pattern): + return True + return False + + @staticmethod + def filter_directories(dirs, ignore_file_strings, ignore_hidden): + """Filter out directories based on ignore criteria.""" + if ignore_hidden: + dirs[:] = [d for d in dirs if not d.startswith('.')] + dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_file_strings)] + + @staticmethod + def path_or_content_contains(file_path, path_contains, content_contains): + # Check if the file path contains specific strings (whitelist) + if path_contains and any(whitelist_str in file_path for whitelist_str in path_contains): + return True + + # Check file content for specific strings (if specified) + if content_contains: + try: + with open(file_path, 'r') as f: + content = f.read() + if any(whitelist_str in content for whitelist_str in content_contains): + return True + except UnicodeDecodeError: + return False + return False + + @staticmethod + def should_print_file(file_path, file_types, ignore_file_strings, ignore_hidden, path_contains, content_contains): + """ + Determine if a file should be printed based on various criteria. + """ + if ignore_hidden and os.path.basename(file_path).startswith('.'): + return False + + if file_types and not any(file_path.endswith(file_type) for file_type in file_types): + return False + + if any(ignore_str in file_path for ignore_str in ignore_file_strings): + return False + + if path_contains or content_contains: + return DirectoryHandler.path_or_content_contains(file_path, path_contains, content_contains) + return True + + @staticmethod + def print_file_content(file_path, no_comments, compress): + """Print the content of a file.""" + try: + with open(file_path, 'r') as f: + content = f.read() + if no_comments: + file_type = os.path.splitext(file_path)[1] + content = CodeProcessor.remove_comments(content, file_type) + print(f"<< START: {file_path} >>") + if compress: + compressed_content = CodeProcessor.compress(content) + print("COMPRESSED CODE: ") + print(compressed_content) + else: + print(content) + print("<< END >>\n") + except UnicodeDecodeError: + print(f"Warning: Could not read file due to encoding issues: {file_path}") + exit(1) + + @staticmethod + def handle_directory(directory, **kwargs): + """Handle scanning and printing for directories.""" + gitignore_patterns = [] + if not kwargs.get('no_gitignore'): + gitignore_patterns = DirectoryHandler.load_gitignore_patterns(directory) + + for root, dirs, files in os.walk(directory): + DirectoryHandler.filter_directories(dirs, kwargs['ignore_file_strings'], kwargs['ignore_hidden']) + for file in files: + file_path = os.path.join(root, file) + if gitignore_patterns and DirectoryHandler.is_gitignored(file_path, gitignore_patterns): + if kwargs.get('verbose'): + print(f"Skipped (gitignored): {file_path}") + continue + + if DirectoryHandler.should_print_file( + file_path, + kwargs['file_types'], + kwargs['ignore_file_strings'], + kwargs['ignore_hidden'], + kwargs['path_contains'], + kwargs['content_contains'] + ): + DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress']) + elif kwargs.get('verbose'): + print(f"Skipped file: {file_path}") + + @staticmethod + def handle_file(file_path, **kwargs): + """Handle scanning and printing for individual files.""" + DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress']) diff --git a/main.py b/main.py index 591e375..641338b 100755 --- a/main.py +++ b/main.py @@ -1,222 +1,46 @@ #!/usr/bin/env python3 - import os -import argparse -import re -import zlib -import fnmatch - -class CodeProcessor: - PYTHON = ".py" - JS = ".js" - C = ".c" - CPP = ".cpp" - H = ".h" - BASH = ".sh" - SHELL = ".bash" - - @staticmethod - def remove_comments(content, file_type): - """Remove comments based on file type.""" - comment_patterns = { - CodeProcessor.PYTHON: [ - (r'\s*#.*', '',0), - (r'\"\"\"(.*?)\"\"\"', '', re.DOTALL), - (r"\'\'\'(.*?)\'\'\'", '', re.DOTALL) - ], - CodeProcessor.JS: [ - (r'\s*//.*', '',0), - (r'/\*.*?\*/', '',0) - ], - CodeProcessor.C: [ - (r'\s*//.*', '',0), - (r'/\*.*?\*/', '',0) - ], - CodeProcessor.CPP: [ - (r'\s*//.*', '',0), - (r'/\*.*?\*/', '',0) - ], - CodeProcessor.H: [ - (r'\s*//.*', '',0), - (r'/\*.*?\*/', '',0) - ], - CodeProcessor.BASH: [ - (r'\s*#.*', '', 0) - ], - CodeProcessor.SHELL: [ - (r'\s*#.*', '', 0) - ] - } - - patterns = comment_patterns.get(file_type, []) - for pattern, repl, flags in patterns: - content = re.sub(pattern, repl, content, flags=flags) - return content.strip() - - @staticmethod - def compress(content): - """Compress code using zlib.""" - return zlib.compress(content.encode()) - - -class DirectoryHandler: - - @staticmethod - def load_gitignore_patterns(root_path): - """Collect .gitignore patterns from root_path and all subdirectories.""" - gitignore_patterns = [] - - for dirpath, dirnames, filenames in os.walk(root_path): - if '.gitignore' in filenames: - gitignore_path = os.path.join(dirpath, '.gitignore') - with open(gitignore_path, 'r') as f: - for line in f: - line = line.strip() - if line and not line.startswith('#'): - gitignore_patterns.append(os.path.join(dirpath, line)) - return gitignore_patterns - - @staticmethod - def is_gitignored(file_path, gitignore_patterns): - """Check if file_path matches any .gitignore pattern.""" - for pattern in gitignore_patterns: - if fnmatch.fnmatch(file_path, pattern): - return True - return False - - @staticmethod - def filter_directories(dirs, ignore_file_strings, ignore_hidden): - """Filter out directories based on ignore criteria.""" - if ignore_hidden: - dirs[:] = [d for d in dirs if not d.startswith('.')] - dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_file_strings)] - - @staticmethod - def path_or_content_contains(file_path, path_contains, content_contains): - # Check if the file name contains specific strings (whitelist) - if path_contains and any(whitelist_str in file_path for whitelist_str in path_contains): - return True - - # Check file content for specific strings (if specified) - if content_contains: - try: - with open(file_path, 'r') as f: - content = f.read() - # Return True if any of the content_contains strings are found in the content - if any(whitelist_str in content for whitelist_str in content_contains): - return True - except UnicodeDecodeError: - # Return False if there's a Unicode decode error (file can't be read) - return False - return False - - @staticmethod - def should_print_file(file_path, file_types, ignore_file_strings, ignore_hidden, path_contains, content_contains): - """ - Determine if a file should be printed based on various criteria. - - Args: - file_path (str): The path of the file to be checked. - file_types (list): List of allowed file extensions. - ignore_file_strings (list): List of strings; if any are found in the file path, the file is ignored. - ignore_hidden (bool): If True, hidden files (starting with '.') are ignored. - path_contains (list): List of strings; the file is processed only if its path contains one of these strings. - content_contains (list): List of strings; the file is processed only if its content contains one of these strings. - - Returns: - bool: True if the file should be printed, False otherwise. - """ - - # Check for hidden files if ignore_hidden is enabled - if ignore_hidden and os.path.basename(file_path).startswith('.'): - return False - - # Check if the file type is in the allowed list (if specified) - if file_types and not any(file_path.endswith(file_type) for file_type in file_types): - return False - - # Check if the file should be ignored based on the presence of specific strings in its path - if any(ignore_str in file_path for ignore_str in ignore_file_strings): - return False - - if path_contains or content_contains: - return DirectoryHandler.path_or_content_contains(file_path, path_contains, content_contains) - return True - - @staticmethod - def print_file_content(file_path, no_comments, compress): - """Print the content of a file.""" - try: - with open(file_path, 'r') as f: - content = f.read() - if no_comments: - file_type = os.path.splitext(file_path)[1] - content = CodeProcessor.remove_comments(content, file_type) - print(f"<< START: {file_path} >>") - if compress: - compressed_content = CodeProcessor.compress(content) - print(f"COMPRESSED CODE: ") - print(compressed_content) - else: - print(content) - print("<< END >>\n") - except UnicodeDecodeError: - print(f"Warning: Could not read file due to encoding issues: {file_path}") - exit(1) - - @staticmethod - def handle_directory(directory, **kwargs): - """Handle scanning and printing for directories.""" - gitignore_patterns = [] - if not kwargs.get('no_gitignore'): - gitignore_patterns = DirectoryHandler.load_gitignore_patterns(directory) - - for root, dirs, files in os.walk(directory): - DirectoryHandler.filter_directories(dirs, kwargs['ignore_file_strings'], kwargs['ignore_hidden']) - for file in files: - file_path = os.path.join(root, file) - - if gitignore_patterns and DirectoryHandler.is_gitignored(file_path, gitignore_patterns): - if kwargs['verbose']: - print(f"Skipped (gitignored): {file_path}") - continue - - if DirectoryHandler.should_print_file(file_path, kwargs['file_types'], kwargs['ignore_file_strings'], kwargs['ignore_hidden'], kwargs['path_contains'], kwargs['content_contains']): - DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress']) - elif kwargs['verbose']: - print(f"Skipped file: {file_path}") - - - @staticmethod - def handle_file(file_path, **kwargs): - """Handle scanning and printing for individual files.""" - DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress']) - +import sys +from cli import parse_arguments +from directory_handler import DirectoryHandler def main(): - parser = argparse.ArgumentParser(description="Scan directories and print/compile file contents.") - parser.add_argument("paths", nargs='+', help="List of files or directories to scan.") - parser.add_argument("--file-types", nargs='+', default=[], help="Filter by file types (e.g., .txt .log).") - parser.add_argument("--ignore-file-strings", nargs='+', default=[], help="Ignore files and folders containing these strings.") - parser.add_argument("--ignore-hidden", action='store_true', help="Ignore hidden directories and files.") - parser.add_argument("-v", "--verbose", action='store_true', help="Enable verbose mode.") - parser.add_argument("--no-comments", action='store_true', help="Remove comments from the displayed content based on file type.") - parser.add_argument("--compress", action='store_true', help="Compress code (for Python files).") - parser.add_argument("--path-contains", nargs='+', default=[], help="Display files whose paths contain one of these strings.") - parser.add_argument("--content-contains", nargs='+', default=[], help="Display files containing one of these strings in their content.") - parser.add_argument("--no-gitignore", action='store_true', help="Do not respect .gitignore files during scan.") - - args = parser.parse_args() + args = parse_arguments() for path in args.paths: if os.path.isdir(path): - DirectoryHandler.handle_directory(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, verbose=args.verbose, no_comments=args.no_comments, compress=args.compress, path_contains=args.path_contains, content_contains=args.content_contains) + DirectoryHandler.handle_directory( + path, + file_types=args.file_types, + ignore_file_strings=args.ignore_file_strings, + ignore_hidden=args.ignore_hidden, + verbose=args.verbose, + no_comments=args.no_comments, + compress=args.compress, + path_contains=args.path_contains, + content_contains=args.content_contains, + no_gitignore=args.no_gitignore + ) elif os.path.isfile(path): - if DirectoryHandler.should_print_file(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, path_contains=args.path_contains, content_contains=args.content_contains): - DirectoryHandler.handle_file(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, no_comments=args.no_comments, compress=args.compress) + if DirectoryHandler.should_print_file( + path, + file_types=args.file_types, + ignore_file_strings=args.ignore_file_strings, + ignore_hidden=args.ignore_hidden, + path_contains=args.path_contains, + content_contains=args.content_contains + ): + DirectoryHandler.handle_file( + path, + file_types=args.file_types, + ignore_file_strings=args.ignore_file_strings, + ignore_hidden=args.ignore_hidden, + no_comments=args.no_comments, + compress=args.compress + ) else: print(f"Error: {path} is neither a valid file nor a directory.") - exit(1) + sys.exit(1) if __name__ == "__main__": main() \ No newline at end of file