From 435df907095f2e0c5c07fcccbddeda501f24431e Mon Sep 17 00:00:00 2001 From: Kevin Veen-Birkenbach Date: Sat, 16 Dec 2023 20:11:23 +0100 Subject: [PATCH] implemented or for content-contains and file-name-contains --- scan.py | 97 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 55 insertions(+), 42 deletions(-) diff --git a/scan.py b/scan.py index f44043a..6225e03 100644 --- a/scan.py +++ b/scan.py @@ -51,31 +51,61 @@ class CodeProcessor: class DirectoryHandler: @staticmethod - def filter_directories(dirs, ignore_strings, ignore_hidden): + def filter_directories(dirs, ignore_file_strings, ignore_hidden): """Filter out directories based on ignore criteria.""" if ignore_hidden: dirs[:] = [d for d in dirs if not d.startswith('.')] - dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_strings)] + dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_file_strings)] - def should_print_file(file_path, file_filters, ignore_strings, ignore_hidden, whitelist_file, whitelist_content): - """Determine if a file should be printed based on new criteria.""" - if ignore_hidden and os.path.basename(file_path).startswith('.'): - return False - if file_filters and not any(file_path.endswith(file_type) for file_type in file_filters): - return False - if any(ignore_str in file_path for ignore_str in ignore_strings): - return False - if whitelist_file and not any(whitelist_str in file_path for whitelist_str in whitelist_file): - return False - if whitelist_content: + @staticmethod + def file_name_or_content_contains(file_path, file_name_contains, content_contains): + # Check if the file name contains specific strings (whitelist) + if file_name_contains and any(whitelist_str in file_path for whitelist_str in file_name_contains): + return True + + # Check file content for specific strings (if specified) + if content_contains: try: with open(file_path, 'r') as f: content = f.read() - if not any(whitelist_str in content for whitelist_str in whitelist_content): - return False + # Return True if any of the content_contains strings are found in the content + if any(whitelist_str in content for whitelist_str in content_contains): + return True except UnicodeDecodeError: + # Return False if there's a Unicode decode error (file can't be read) return False - return True + return False + + @staticmethod + def should_print_file(file_path, file_types, ignore_file_strings, ignore_hidden, file_name_contains, content_contains): + """ + Determine if a file should be printed based on various criteria. + + Args: + file_path (str): The path of the file to be checked. + file_types (list): List of allowed file extensions. + ignore_file_strings (list): List of strings; if any are found in the file path, the file is ignored. + ignore_hidden (bool): If True, hidden files (starting with '.') are ignored. + file_name_contains (list): List of strings; the file is processed only if its path contains one of these strings. + content_contains (list): List of strings; the file is processed only if its content contains one of these strings. + + Returns: + bool: True if the file should be printed, False otherwise. + """ + + # Check for hidden files if ignore_hidden is enabled + if ignore_hidden and os.path.basename(file_path).startswith('.'): + return False + + # Check if the file type is in the allowed list (if specified) + if file_types and not any(file_path.endswith(file_type) for file_type in file_types): + return False + + # Check if the file should be ignored based on the presence of specific strings in its path + if any(ignore_str in file_path for ignore_str in ignore_file_strings): + return False + + return DirectoryHandler.file_name_or_content_contains(file_path, file_name_contains, content_contains) @staticmethod def print_file_content(file_path, no_comments, compress): @@ -102,9 +132,9 @@ class DirectoryHandler: def handle_directory(directory, **kwargs): """Handle scanning and printing for directories.""" for root, dirs, files in os.walk(directory): - DirectoryHandler.filter_directories(dirs, kwargs['ignore_strings'], kwargs['ignore_hidden']) + DirectoryHandler.filter_directories(dirs, kwargs['ignore_file_strings'], kwargs['ignore_hidden']) for file in files: - if DirectoryHandler.should_print_file(os.path.join(root, file), kwargs['file_filters'], kwargs['ignore_strings'], kwargs['ignore_hidden'], kwargs['whitelist_file'], kwargs['whitelist_content']): + if DirectoryHandler.should_print_file(os.path.join(root, file), kwargs['file_types'], kwargs['ignore_file_strings'], kwargs['ignore_hidden'], kwargs['file_name_contains'], kwargs['content_contains']): DirectoryHandler.print_file_content(os.path.join(root, file), kwargs['no_comments'], kwargs['compress']) elif kwargs['verbose']: print(f"Skipped file: {file}") @@ -118,40 +148,23 @@ class DirectoryHandler: def main(): parser = argparse.ArgumentParser(description="Scan directories and print/compile file contents.") parser.add_argument("paths", nargs='+', help="List of files or directories to scan.") - parser.add_argument("-f", "--filetype", nargs='+', default=[], help="Filter by file types (e.g., .txt .log).") - parser.add_argument("-i", "--ignore", nargs='+', default=[], help="Ignore files and folders containing these strings.") + parser.add_argument("--file-types", nargs='+', default=[], help="Filter by file types (e.g., .txt .log).") + parser.add_argument("--ignore-file-strings", nargs='+', default=[], help="Ignore files and folders containing these strings.") parser.add_argument("--ignore-hidden", action='store_true', help="Ignore hidden directories and files.") parser.add_argument("-v", "--verbose", action='store_true', help="Enable verbose mode.") parser.add_argument("--no-comments", action='store_true', help="Remove comments from the displayed content based on file type.") parser.add_argument("--compress", action='store_true', help="Compress code (for Python files).") - parser.add_argument("--whitelist-file", nargs='+', default=[], help="Only display files whose paths contain one of these strings.") - parser.add_argument("--whitelist-content", nargs='+', default=[], help="Only display files containing one of these strings in their content.") - - # Custom section for parameter priority - priority_section = """ -Parameter Priority Order for File Selection: -1. --ignore-hidden: Applied first to filter out hidden files and directories. -2. --ignore: Applied to ignore files and folders with specified strings. -3. --whitelist-file: Overrides ignore settings but subject to file type filter. -4. -f/--filetype: Determines which file types are processed. -5. --whitelist-content: Filters files based on content after type and ignore checks. - -Post-Selection Processing: -- --no-comments: Removes comments from the content of selected files. -- --compress: Compresses content of selected files. - -Note: Verbose mode (-v/--verbose) provides detailed processing information but does not affect file selection or processing.""" - parser.epilog = priority_section - + parser.add_argument("--file-name-contains", nargs='+', default=[], help="Display files whose paths contain one of these strings.") + parser.add_argument("--content-contains", nargs='+', default=[], help="Display files containing one of these strings in their content.") args = parser.parse_args() for path in args.paths: if os.path.isdir(path): - DirectoryHandler.handle_directory(path, file_filters=args.filetype, ignore_strings=args.ignore, ignore_hidden=args.ignore_hidden, verbose=args.verbose, no_comments=args.no_comments, compress=args.compress, whitelist_file=args.whitelist_file, whitelist_content=args.whitelist_content) + DirectoryHandler.handle_directory(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, verbose=args.verbose, no_comments=args.no_comments, compress=args.compress, file_name_contains=args.file_name_contains, content_contains=args.content_contains) elif os.path.isfile(path): - if DirectoryHandler.should_print_file(path, file_filters=args.filetype, ignore_strings=args.ignore, ignore_hidden=args.ignore_hidden, whitelist_file=args.whitelist_file, whitelist_content=args.whitelist_content): - DirectoryHandler.handle_file(path, file_filters=args.filetype, ignore_strings=args.ignore, ignore_hidden=args.ignore_hidden, no_comments=args.no_comments, compress=args.compress) + if DirectoryHandler.should_print_file(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, file_name_contains=args.file_name_contains, content_contains=args.content_contains): + DirectoryHandler.handle_file(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, no_comments=args.no_comments, compress=args.compress) else: print(f"Error: {path} is neither a valid file nor a directory.") exit(1)