renamed main.py

2025-11-18 02:36:25 +00:00 · 2025-03-06 10:13:19 +01:00
12 changed files with 173 additions and 803 deletions
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -1,7 +0,0 @@
-github: kevinveenbirkenbach
-
-patreon: kevinveenbirkenbach
-
-buy_me_a_coffee: kevinveenbirkenbach
-
-custom: https://s.veen.world/paypaldonate
--- a/.gitignore
+++ b/.gitignore
@@ -1 +0,0 @@
-*__pycache__*
--- a/17
+++ b/17
@@ -1,17 +0,0 @@
-# Makefile for ARC
-.PHONY: test install help
-
-help:
-	@echo "Targets:"
-	@echo "  make test     - Run unit tests"
-	@echo "  make install  - Show how to install via Kevin's Package Manager"
-
-test:
-	@python -m unittest discover -s tests -p "test_*.py" -t .
-
-install:
-	@echo "ARC is distributed via Kevin's Package Manager."
-	@echo "Install it with:"
-	@echo "    package-manager install arc"
-	@echo ""
-	@echo "(This 'make install' does not perform any other actions.)"
--- a/README.md
+++ b/README.md
@@ -1,6 +1,4 @@
 # 🤖👩‍🔬 Analysis-Ready Code (ARC)
-[![GitHub Sponsors](https://img.shields.io/badge/Sponsor-GitHub%20Sponsors-blue?logo=github)](https://github.com/sponsors/kevinveenbirkenbach) [![Patreon](https://img.shields.io/badge/Support-Patreon-orange?logo=patreon)](https://www.patreon.com/c/kevinveenbirkenbach) [![Buy Me a Coffee](https://img.shields.io/badge/Buy%20me%20a%20Coffee-Funding-yellow?logo=buymeacoffee)](https://buymeacoffee.com/kevinveenbirkenbach) [![PayPal](https://img.shields.io/badge/Donate-PayPal-blue?logo=paypal)](https://s.veen.world/paypaldonate)
-

 Analysis-Ready Code (ARC) is a Python-based utility designed to recursively scan directories and transform source code into a format optimized for AI and computer analysis. By stripping comments, filtering specific file types, and optionally compressing content, ARC ensures that your code is clean and ready for automated processing.

--- a/init.py
+++ b/init.py
--- a/cli.py
+++ b/cli.py
@@ -1,71 +0,0 @@
-import argparse
-
-def parse_arguments():
-    parser = argparse.ArgumentParser(
-        description="Scan directories and print/compile file contents."
-    )
-    parser.add_argument(
-        "paths",
-        nargs='+',
-        help="List of files or directories to scan."
-    )
-    parser.add_argument(
-        "-t", "--file-types",
-        nargs='+',
-        default=[],
-        help="Filter by file types (e.g., .txt, .log)."
-    )
-    parser.add_argument(
-        "-x", "--ignore-file-strings",
-        nargs='+',
-        default=[],
-        help="Ignore files and folders containing these strings."
-    )
-    parser.add_argument(
-        "-S", "--show-hidden",
-        action='store_true',
-        dest='show_hidden',
-        default=False,
-        help="Include hidden directories and files in the scan."
-    )
-    parser.add_argument(
-        "-v", "--verbose",
-        action='store_true',
-        help="Enable verbose mode."
-    )
-    parser.add_argument(
-        "-N", "--no-comments",
-        action='store_true',
-        help="Remove comments from the displayed content based on file type."
-    )
-    parser.add_argument(
-        "-z", "--compress",
-        action='store_true',
-        help="Compress code (for supported file types)."
-    )
-    parser.add_argument(
-        "-p", "--path-contains",
-        nargs='+',
-        default=[],
-        help="Display files whose paths contain one of these strings."
-    )
-    parser.add_argument(
-        "-C", "--content-contains",
-        nargs='+',
-        default=[],
-        help="Display files containing one of these strings in their content."
-    )
-    parser.add_argument(
-        "-G", "--no-gitignore",
-        action='store_true',
-        help="Do not respect .gitignore files during scan."
-    )
-    parser.add_argument(
-        "-b", "--scan-binary-files",
-        action='store_true',
-        help="Scan binary files as well (by default these are ignored)."
-    )
-    # Convert show_hidden to ignore_hidden for downstream use
-    args = parser.parse_args()
-    args.ignore_hidden = not args.show_hidden
-    return args
--- a/code_processor.py
+++ b/code_processor.py
@@ -1,285 +0,0 @@
-import re
-import zlib
-from dataclasses import dataclass
-from typing import Dict, Tuple, Pattern, Optional
-import io
-import tokenize
-
-
-@dataclass(frozen=True)
-class LanguageSpec:
-    """Holds compiled comment patterns for a language."""
-    patterns: Tuple[Pattern, ...]
-
-
-class CodeProcessor:
-    """
-    Utilities to strip comments and (de)compress code.
-    - Python: tokenize-based (safe) with precise docstring removal.
-    - C/CPP/JS: state-machine comment stripper that respects string/char literals.
-    - Shell/YAML: remove full-line hash comments only.
-    - Jinja: remove {# ... #} blocks.
-    """
-    # File extensions (normalized to lowercase)
-    EXT_TO_LANG: Dict[str, str] = {
-        ".py": "python",
-        ".js": "cstyle",
-        ".c": "cstyle",
-        ".cpp": "cstyle",
-        ".h": "cstyle",
-        ".sh": "hash",
-        ".bash": "hash",
-        ".yml": "hash",
-        ".yaml": "hash",
-        ".j2": "jinja",
-        ".jinja": "jinja",
-        ".jinja2": "jinja",
-        ".tpl": "jinja",
-    }
-
-    # Regex-based specs for hash and jinja
-    _HASH = LanguageSpec(patterns=(
-        re.compile(r"^\s*#.*$", flags=re.MULTILINE),   # only full-line comments
-    ))
-    _JINJA = LanguageSpec(patterns=(
-        re.compile(r"\{#.*?#\}", flags=re.DOTALL),     # {# ... #} across lines
-    ))
-
-    LANG_SPECS: Dict[str, LanguageSpec] = {
-        "hash": _HASH,
-        "jinja": _JINJA,
-        # "cstyle" handled by a state machine, not regex
-        # "python" handled by tokenize, not regex
-    }
-
-    @classmethod
-    def _lang_from_ext(cls, file_type: str) -> Optional[str]:
-        """Map an extension like '.py' to an internal language key."""
-        ext = file_type.lower().strip()
-        return cls.EXT_TO_LANG.get(ext)
-
-    # -----------------------------
-    # Python stripping via tokenize
-    # -----------------------------
-    @staticmethod
-    def _strip_python_comments_tokenize(content: str) -> str:
-        """
-        Remove comments and docstrings safely using tokenize.
-        Rules:
-          - Drop COMMENT tokens.
-          - Drop module docstring only if it's the very first statement at col 0.
-          - Drop the first STRING statement in a suite immediately after 'def'/'class'
-            header (':' NEWLINE INDENT).
-        """
-        tokens = tokenize.generate_tokens(io.StringIO(content).readline)
-        out_tokens = []
-
-        indent_level = 0
-        module_docstring_candidate = True  # until we see first real stmt at module level
-        expect_suite_docstring = False     # just entered a suite after def/class
-        last_was_colon = False
-        seen_nontrivial_in_line = False    # guards module docstring (start of logical line)
-
-        for tok_type, tok_str, start, end, line in tokens:
-            # Track indentation
-            if tok_type == tokenize.INDENT:
-                indent_level += 1
-            elif tok_type == tokenize.DEDENT:
-                indent_level = max(0, indent_level - 1)
-
-            # New logical line: reset guard
-            if tok_type in (tokenize.NEWLINE, tokenize.NL):
-                seen_nontrivial_in_line = False
-                out_tokens.append((tok_type, tok_str))
-                continue
-
-            # Comments are dropped
-            if tok_type == tokenize.COMMENT:
-                continue
-
-            # Detect ':' ending a def/class header
-            if tok_type == tokenize.OP and tok_str == ":":
-                last_was_colon = True
-                out_tokens.append((tok_type, tok_str))
-                continue
-
-            # After ':' + NEWLINE + INDENT comes a suite start -> allow docstring removal
-            if tok_type == tokenize.INDENT and last_was_colon:
-                expect_suite_docstring = True
-                last_was_colon = False
-                out_tokens.append((tok_type, tok_str))
-                continue
-            # Any non-INDENT token clears the last_was_colon flag
-            if tok_type != tokenize.NL:
-                last_was_colon = False
-
-            # STRING handling
-            if tok_type == tokenize.STRING:
-                at_line_start = (start[1] == 0) and not seen_nontrivial_in_line
-                if indent_level == 0:
-                    # Potential module docstring only if first statement at col 0
-                    if module_docstring_candidate and at_line_start:
-                        module_docstring_candidate = False
-                        # drop it
-                        continue
-                    # Any other top-level string is normal
-                    module_docstring_candidate = False
-                    out_tokens.append((tok_type, tok_str))
-                    seen_nontrivial_in_line = True
-                    continue
-                else:
-                    # In a suite: if it's the first statement after def/class, drop regardless of column
-                    if expect_suite_docstring:
-                        expect_suite_docstring = False
-                        # drop it
-                        continue
-                    expect_suite_docstring = False
-                    out_tokens.append((tok_type, tok_str))
-                    seen_nontrivial_in_line = True
-                    continue
-
-            # Any other significant token disables module-docstring candidacy
-            if tok_type not in (tokenize.INDENT, tokenize.DEDENT):
-                if indent_level == 0:
-                    module_docstring_candidate = False
-                # Mark we've seen something on this line
-                if tok_type not in (tokenize.NL, tokenize.NEWLINE):
-                    seen_nontrivial_in_line = True
-
-            out_tokens.append((tok_type, tok_str))
-
-        return tokenize.untokenize(out_tokens)
-
-    # ---------------------------------
-    # C-style stripping via state machine
-    # ---------------------------------
-    @staticmethod
-    def _strip_cstyle_comments(content: str) -> str:
-        """
-        Remove // line comments and /* ... */ block comments while preserving
-        string ("...") and char ('...') literals and their escape sequences.
-        """
-        i = 0
-        n = len(content)
-        out = []
-        in_line_comment = False
-        in_block_comment = False
-        in_string = False
-        in_char = False
-        escape = False
-
-        while i < n:
-            c = content[i]
-            nxt = content[i + 1] if i + 1 < n else ""
-
-            # If inside line comment: consume until newline
-            if in_line_comment:
-                if c == "\n":
-                    in_line_comment = False
-                    out.append(c)
-                i += 1
-                continue
-
-            # If inside block comment: consume until '*/'
-            if in_block_comment:
-                if c == "*" and nxt == "/":
-                    in_block_comment = False
-                    i += 2
-                else:
-                    i += 1
-                continue
-
-            # If inside string literal
-            if in_string:
-                out.append(c)
-                if escape:
-                    escape = False
-                else:
-                    if c == "\\":
-                        escape = True
-                    elif c == '"':
-                        in_string = False
-                i += 1
-                continue
-
-            # If inside char literal
-            if in_char:
-                out.append(c)
-                if escape:
-                    escape = False
-                else:
-                    if c == "\\":
-                        escape = True
-                    elif c == "'":
-                        in_char = False
-                i += 1
-                continue
-
-            # Not in any special state:
-            # Check for start of comments
-            if c == "/" and nxt == "/":
-                in_line_comment = True
-                i += 2
-                continue
-            if c == "/" and nxt == "*":
-                in_block_comment = True
-                i += 2
-                continue
-
-            # Check for start of string/char literals
-            if c == '"':
-                in_string = True
-                out.append(c)
-                i += 1
-                continue
-            if c == "'":
-                in_char = True
-                out.append(c)
-                i += 1
-                continue
-
-            # Normal character
-            out.append(c)
-            i += 1
-
-        return "".join(out)
-
-    # -------------------
-    # Public API
-    # -------------------
-    @classmethod
-    def remove_comments(cls, content: str, file_type: str) -> str:
-        """
-        Remove comments based on file type/extension.
-          - Python: tokenize-based
-          - C/CPP/JS: state-machine
-          - Hash (sh/yaml): regex full-line
-          - Jinja: regex {# ... #}
-        """
-        lang = cls._lang_from_ext(file_type)
-        if lang is None:
-            return content.strip()
-
-        if lang == "python":
-            return cls._strip_python_comments_tokenize(content).strip()
-        if lang == "cstyle":
-            return cls._strip_cstyle_comments(content).strip()
-
-        spec = cls.LANG_SPECS.get(lang)
-        if not spec:
-            return content.strip()
-
-        cleaned = content
-        for pat in spec.patterns:
-            cleaned = pat.sub("", cleaned)
-        return cleaned.strip()
-
-    @staticmethod
-    def compress(content: str, level: int = 9) -> bytes:
-        """Compress code using zlib. Returns bytes."""
-        return zlib.compress(content.encode("utf-8"), level)
-
-    @staticmethod
-    def decompress(blob: bytes) -> str:
-        """Decompress zlib-compressed code back to text."""
-        return zlib.decompress(blob).decode("utf-8")
--- a/directory_handler.py
+++ b/directory_handler.py
@@ -1,193 +0,0 @@
-import os
-import fnmatch
-from code_processor import CodeProcessor
-
-class DirectoryHandler:
-    @staticmethod
-    def load_gitignore_patterns(root_path):
-        """
-        Recursively scans for .gitignore files in the given root_path.
-        Returns a list of tuples (base_dir, patterns) where:
-          - base_dir: the directory in which the .gitignore was found.
-          - patterns: a list of pattern strings from that .gitignore.
-        """
-        gitignore_data = []
-        for dirpath, _, filenames in os.walk(root_path):
-            if '.gitignore' in filenames:
-                gitignore_path = os.path.join(dirpath, '.gitignore')
-                try:
-                    with open(gitignore_path, 'r') as f:
-                        lines = f.readlines()
-                    # Filter out empty lines and comments.
-                    patterns = [line.strip() for line in lines if line.strip() and not line.strip().startswith('#')]
-                    # Save the base directory and its patterns.
-                    gitignore_data.append((dirpath, patterns))
-                except Exception as e:
-                    print(f"Error reading {gitignore_path}: {e}")
-        return gitignore_data
-    
-    @staticmethod
-    def is_binary_file(file_path):
-        """
-        Reads the first 1024 bytes of file_path and heuristically determines
-        if the file appears to be binary. This method returns True if a null byte
-        is found or if more than 30% of the bytes in the sample are non-text.
-        """
-        try:
-            with open(file_path, 'rb') as f:
-                chunk = f.read(1024)
-            # If there's a null byte, it's almost certainly binary.
-            if b'\x00' in chunk:
-                return True
-            
-            # Define a set of text characters (ASCII printable + common control characters)
-            text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x7F)))
-            # Count non-text characters in the chunk.
-            non_text = sum(byte not in text_chars for byte in chunk)
-            if len(chunk) > 0 and (non_text / len(chunk)) > 0.30:
-                return True
-        except Exception:
-            # If the file cannot be read in binary mode, assume it's not binary.
-            return False
-        return False
-
-    @staticmethod
-    def is_gitignored(file_path, gitignore_data):
-        """
-        Checks if file_path should be ignored according to the .gitignore entries.
-        For each tuple (base_dir, patterns), if file_path is under base_dir,
-        computes the relative path and matches it against the patterns.
-        """
-        for base_dir, patterns in gitignore_data:
-            try:
-                rel_path = os.path.relpath(file_path, base_dir)
-            except ValueError:
-                # file_path and base_dir are on different drives.
-                continue
-            # If the file is not under the current .gitignore base_dir, skip it.
-            if rel_path.startswith('..'):
-                continue
-            # Check all patterns.
-            for pattern in patterns:
-                if pattern.endswith('/'):
-                    # Directory pattern: check if any folder in the relative path matches.
-                    parts = rel_path.split(os.sep)
-                    for part in parts[:-1]:
-                        if fnmatch.fnmatch(part + '/', pattern):
-                            return True
-                else:
-                    # Check if the relative path matches the pattern.
-                    if fnmatch.fnmatch(rel_path, pattern):
-                        return True
-        return False
-
-    @staticmethod
-    def filter_directories(dirs, ignore_file_strings, ignore_hidden):
-        """
-        Filter out directories based on ignore_file_strings and hidden status.
-        """
-        if ignore_hidden:
-            dirs[:] = [d for d in dirs if not d.startswith('.')]
-        dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_file_strings)]
-
-    @staticmethod
-    def path_or_content_contains(file_path, path_contains, content_contains):
-        """
-        Check if the file path contains specific strings or if the file content does.
-        """
-        if path_contains and any(whitelist_str in file_path for whitelist_str in path_contains):
-            return True
-
-        if content_contains:
-            try:
-                with open(file_path, 'r') as f:
-                    content = f.read()
-                if any(whitelist_str in content for whitelist_str in content_contains):
-                    return True
-            except UnicodeDecodeError:
-                return False
-        return False
-
-    @staticmethod
-    def should_print_file(file_path, file_types, ignore_file_strings, ignore_hidden, path_contains, content_contains, scan_binary_files=False):
-        """
-        Determines if a file should be printed based on various criteria.
-        By default, binary files are skipped unless scan_binary_files is True.
-        """
-        # Check binary file status using our heuristic.
-        if not scan_binary_files and DirectoryHandler.is_binary_file(file_path):
-            return False
-
-        if ignore_hidden and os.path.basename(file_path).startswith('.'):
-            return False
-
-        if file_types and not any(file_path.endswith(ft) for ft in file_types):
-            return False
-
-        if any(ignore_str in file_path for ignore_str in ignore_file_strings):
-            return False
-
-        if path_contains or content_contains:
-            return DirectoryHandler.path_or_content_contains(file_path, path_contains, content_contains)
-        return True
-
-    @staticmethod
-    def print_file_content(file_path, no_comments, compress):
-        """
-        Prints the content of a file, optionally removing comments or compressing the output.
-        """
-        try:
-            with open(file_path, 'r') as f:
-                content = f.read()
-            if no_comments:
-                file_type = os.path.splitext(file_path)[1]
-                content = CodeProcessor.remove_comments(content, file_type)
-            print(f"<< START: {file_path} >>")
-            if compress:
-                compressed_content = CodeProcessor.compress(content)
-                print("COMPRESSED CODE:")
-                print(compressed_content)
-            else:
-                print(content)
-            print("<< END >>\n")
-        except UnicodeDecodeError:
-            print(f"Warning: Could not read file due to encoding issues: {file_path}")
-            exit(1)
-
-    @staticmethod
-    def handle_directory(directory, **kwargs):
-        """
-        Scans the directory and processes each file while respecting .gitignore rules.
-        """
-        gitignore_data = []
-        if not kwargs.get('no_gitignore'):
-            gitignore_data = DirectoryHandler.load_gitignore_patterns(directory)
-
-        for root, dirs, files in os.walk(directory):
-            DirectoryHandler.filter_directories(dirs, kwargs['ignore_file_strings'], kwargs['ignore_hidden'])
-            for file in files:
-                file_path = os.path.join(root, file)
-                if gitignore_data and DirectoryHandler.is_gitignored(file_path, gitignore_data):
-                    if kwargs.get('verbose'):
-                        print(f"Skipped (gitignored): {file_path}")
-                    continue
-
-                if DirectoryHandler.should_print_file(
-                    file_path,
-                    kwargs['file_types'],
-                    kwargs['ignore_file_strings'],
-                    kwargs['ignore_hidden'],
-                    kwargs['path_contains'],
-                    kwargs['content_contains'],
-                    scan_binary_files=kwargs.get('scan_binary_files', False)
-                ):
-                    DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress'])
-                elif kwargs.get('verbose'):
-                    print(f"Skipped file: {file_path}")
-
-    @staticmethod
-    def handle_file(file_path, **kwargs):
-        """
-        Processes an individual file.
-        """
-        DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress'])
--- a/main.py
+++ b/main.py
@@ -1,49 +1,185 @@
 #!/usr/bin/env python3
+
 import os
-import sys
-from cli import parse_arguments
-from directory_handler import DirectoryHandler
+import argparse
+import re
+import zlib
+
+class CodeProcessor:
+    PYTHON = ".py"
+    JS = ".js"
+    C = ".c"
+    CPP = ".cpp"
+    H = ".h"
+    BASH = ".sh"
+    SHELL = ".bash"
+
+    @staticmethod
+    def remove_comments(content, file_type):
+        """Remove comments based on file type."""
+        comment_patterns = {
+            CodeProcessor.PYTHON: [
+                (r'\s*#.*', '',0),
+                (r'\"\"\"(.*?)\"\"\"', '', re.DOTALL),
+                (r"\'\'\'(.*?)\'\'\'", '', re.DOTALL)
+            ],
+            CodeProcessor.JS: [
+                (r'\s*//.*', '',0),
+                (r'/\*.*?\*/', '',0)
+            ],
+            CodeProcessor.C: [
+                (r'\s*//.*', '',0),
+                (r'/\*.*?\*/', '',0)
+            ],
+            CodeProcessor.CPP: [
+                (r'\s*//.*', '',0),
+                (r'/\*.*?\*/', '',0)
+            ],
+            CodeProcessor.H: [
+                (r'\s*//.*', '',0),
+                (r'/\*.*?\*/', '',0)
+            ],
+            CodeProcessor.BASH: [
+                (r'\s*#.*', '', 0)
+            ],
+            CodeProcessor.SHELL: [
+                (r'\s*#.*', '', 0)
+            ]
+        }
+
+        patterns = comment_patterns.get(file_type, [])
+        for pattern, repl, flags in patterns:
+            content = re.sub(pattern, repl, content, flags=flags)
+        return content.strip()
+
+    @staticmethod
+    def compress(content):
+        """Compress code using zlib."""
+        return zlib.compress(content.encode())
+
+
+class DirectoryHandler:
+    
+    @staticmethod
+    def filter_directories(dirs, ignore_file_strings, ignore_hidden):
+        """Filter out directories based on ignore criteria."""
+        if ignore_hidden:
+            dirs[:] = [d for d in dirs if not d.startswith('.')]
+        dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_file_strings)]
+
+    @staticmethod
+    def path_or_content_contains(file_path, path_contains, content_contains):
+        # Check if the file name contains specific strings (whitelist)
+        if path_contains and any(whitelist_str in file_path for whitelist_str in path_contains):
+            return True
+
+        # Check file content for specific strings (if specified)
+        if content_contains:
+            try:
+                with open(file_path, 'r') as f:
+                    content = f.read()
+                # Return True if any of the content_contains strings are found in the content
+                if any(whitelist_str in content for whitelist_str in content_contains):
+                    return True
+            except UnicodeDecodeError:
+                # Return False if there's a Unicode decode error (file can't be read)
+                return False
+        return False
+
+    @staticmethod
+    def should_print_file(file_path, file_types, ignore_file_strings, ignore_hidden, path_contains, content_contains):
+        """
+        Determine if a file should be printed based on various criteria.
+
+        Args:
+        file_path (str): The path of the file to be checked.
+        file_types (list): List of allowed file extensions.
+        ignore_file_strings (list): List of strings; if any are found in the file path, the file is ignored.
+        ignore_hidden (bool): If True, hidden files (starting with '.') are ignored.
+        path_contains (list): List of strings; the file is processed only if its path contains one of these strings.
+        content_contains (list): List of strings; the file is processed only if its content contains one of these strings.
+
+        Returns:
+        bool: True if the file should be printed, False otherwise.
+        """
+
+        # Check for hidden files if ignore_hidden is enabled
+        if ignore_hidden and os.path.basename(file_path).startswith('.'):
+            return False
+
+        # Check if the file type is in the allowed list (if specified)
+        if file_types and not any(file_path.endswith(file_type) for file_type in file_types):
+            return False
+
+        # Check if the file should be ignored based on the presence of specific strings in its path
+        if any(ignore_str in file_path for ignore_str in ignore_file_strings):
+            return False
+
+        if path_contains or content_contains:
+            return DirectoryHandler.path_or_content_contains(file_path, path_contains, content_contains)
+        return True
+    
+    @staticmethod
+    def print_file_content(file_path, no_comments, compress):
+        """Print the content of a file."""
+        try:
+            with open(file_path, 'r') as f:
+                content = f.read()
+            if no_comments:
+                file_type = os.path.splitext(file_path)[1]
+                content = CodeProcessor.remove_comments(content, file_type)
+            print(f"<< START: {file_path} >>")
+            if compress:
+                compressed_content = CodeProcessor.compress(content)
+                print(f"COMPRESSED CODE: ")
+                print(compressed_content)
+            else:
+                print(content)
+            print("<< END >>\n")
+        except UnicodeDecodeError:
+            print(f"Warning: Could not read file due to encoding issues: {file_path}")
+            exit(1)
+
+    @staticmethod
+    def handle_directory(directory, **kwargs):
+        """Handle scanning and printing for directories."""
+        for root, dirs, files in os.walk(directory):
+            DirectoryHandler.filter_directories(dirs, kwargs['ignore_file_strings'], kwargs['ignore_hidden'])
+            for file in files:
+                if DirectoryHandler.should_print_file(os.path.join(root, file), kwargs['file_types'], kwargs['ignore_file_strings'], kwargs['ignore_hidden'], kwargs['path_contains'], kwargs['content_contains']):
+                    DirectoryHandler.print_file_content(os.path.join(root, file), kwargs['no_comments'], kwargs['compress'])
+                elif kwargs['verbose']:
+                    print(f"Skipped file: {file}")
+
+    @staticmethod
+    def handle_file(file_path, **kwargs):
+        """Handle scanning and printing for individual files."""
+        DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress'])
+

 def main():
-    args = parse_arguments()
+    parser = argparse.ArgumentParser(description="Scan directories and print/compile file contents.")
+    parser.add_argument("paths", nargs='+', help="List of files or directories to scan.")
+    parser.add_argument("--file-types", nargs='+', default=[], help="Filter by file types (e.g., .txt .log).")
+    parser.add_argument("--ignore-file-strings", nargs='+', default=[], help="Ignore files and folders containing these strings.")
+    parser.add_argument("--ignore-hidden", action='store_true', help="Ignore hidden directories and files.")
+    parser.add_argument("-v", "--verbose", action='store_true', help="Enable verbose mode.")
+    parser.add_argument("--no-comments", action='store_true', help="Remove comments from the displayed content based on file type.")
+    parser.add_argument("--compress", action='store_true', help="Compress code (for Python files).")
+    parser.add_argument("--path-contains", nargs='+', default=[], help="Display files whose paths contain one of these strings.")
+    parser.add_argument("--content-contains", nargs='+', default=[], help="Display files containing one of these strings in their content.")
+    
+    args = parser.parse_args()
    
    for path in args.paths:
        if os.path.isdir(path):
-            DirectoryHandler.handle_directory(
-                path,
-                file_types=args.file_types,
-                ignore_file_strings=args.ignore_file_strings,
-                ignore_hidden=args.ignore_hidden,
-                verbose=args.verbose,
-                no_comments=args.no_comments,
-                compress=args.compress,
-                path_contains=args.path_contains,
-                content_contains=args.content_contains,
-                no_gitignore=args.no_gitignore,
-                scan_binary_files=args.scan_binary_files
-            )
+            DirectoryHandler.handle_directory(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, verbose=args.verbose, no_comments=args.no_comments, compress=args.compress, path_contains=args.path_contains, content_contains=args.content_contains)
        elif os.path.isfile(path):
-            if DirectoryHandler.should_print_file(
-                path,
-                file_types=args.file_types,
-                ignore_file_strings=args.ignore_file_strings,
-                ignore_hidden=args.ignore_hidden,
-                path_contains=args.path_contains,
-                content_contains=args.content_contains,
-                scan_binary_files=args.scan_binary_files
-            ):
-                DirectoryHandler.handle_file(
-                    path,
-                    file_types=args.file_types,
-                    ignore_file_strings=args.ignore_file_strings,
-                    ignore_hidden=args.ignore_hidden,
-                    no_comments=args.no_comments,
-                    compress=args.compress,
-                    scan_binary_files=args.scan_binary_files
-                )
+            if DirectoryHandler.should_print_file(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, path_contains=args.path_contains, content_contains=args.content_contains):
+                DirectoryHandler.handle_file(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, no_comments=args.no_comments, compress=args.compress)
        else:
            print(f"Error: {path} is neither a valid file nor a directory.")
-            sys.exit(1)
+            exit(1)

 if __name__ == "__main__":
-    main()
+    main()
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/unit/init.py
+++ b/tests/unit/init.py
--- a/tests/unit/test_arc.py
+++ b/tests/unit/test_arc.py
@@ -1,190 +0,0 @@
-# tests/unit/test_arc.py
-import io
-import os
-import sys
-import tempfile
-import unittest
-from contextlib import redirect_stdout
-
-# Ensure project root is on sys.path when running via discover
-PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
-if PROJECT_ROOT not in sys.path:
-    sys.path.insert(0, PROJECT_ROOT)
-
-from code_processor import CodeProcessor
-from directory_handler import DirectoryHandler
-
-
-class TestCodeProcessor(unittest.TestCase):
-    def test_python_comment_and_docstring_stripping(self):
-        src = '''\
-"""module docstring should go away"""
-
-# a comment
-x = 1  # inline comment
-y = "string with # not a comment"
-
-def f():
-    """function docstring should go away"""
-    s = """triple quoted but not a docstring"""
-    return x
-'''
-        out = CodeProcessor.remove_comments(src, ".py")
-        self.assertNotIn("module docstring", out)
-        self.assertNotIn("function docstring", out)
-        self.assertNotIn("# a comment", out)
-        # tolerate whitespace normalization from tokenize.untokenize
-        self.assertRegex(out, r'y\s*=\s*"string with # not a comment"')
-        self.assertIn('triple quoted but not a docstring', out)
-
-    def test_cstyle_comment_stripping(self):
-        src = '''\
-// line comment
-int main() {
-  /* block
-     comment */
-  int x = 42; // end comment
-  const char* s = "/* not a comment here */";
-  return x;
-}
-'''
-        out = CodeProcessor.remove_comments(src, ".c")
-        # line comment and block comment gone
-        self.assertNotIn("// line comment", out)
-        self.assertNotIn("block\n     comment", out)
-        # string content with /* */ inside should remain
-        self.assertIn('const char* s = "/* not a comment here */";', out)
-
-    def test_hash_comment_stripping(self):
-        src = """\
-# top comment
-KEY=value  # trailing comment should be kept by default
-plain: value
-"""
-        out = CodeProcessor.remove_comments(src, ".yml")
-        # Our regex removes full lines starting with optional spaces then '#'
-        self.assertNotIn("top comment", out)
-        # It does not remove trailing fragments after content for hash style
-        self.assertIn("KEY=value", out)
-        self.assertIn("plain: value", out)
-
-    def test_jinja_comment_stripping(self):
-        src = """\
-{# top jinja comment #}
-Hello {{ name }}!
-{#
-  multi-line
-  jinja comment
-#}
-Body text and {{ value }}.
-"""
-        out = CodeProcessor.remove_comments(src, ".j2")
-        self.assertNotIn("top jinja comment", out)
-        self.assertNotIn("multi-line", out)
-        # Regular content and expressions remain
-        self.assertIn("Hello {{ name }}!", out)
-        self.assertIn("Body text and {{ value }}.", out)
-
-    def test_unknown_extension_returns_stripped(self):
-        src = "  x = 1  # not removed for unknown  "
-        out = CodeProcessor.remove_comments(src, ".unknown")
-        self.assertEqual(out, "x = 1  # not removed for unknown")
-
-    def test_compress_decompress_roundtrip(self):
-        src = "def x():\n    return 42\n"
-        blob = CodeProcessor.compress(src)
-        self.assertIsInstance(blob, (bytes, bytearray))
-        back = CodeProcessor.decompress(blob)
-        self.assertEqual(src, back)
-
-
-class TestDirectoryHandler(unittest.TestCase):
-    def test_is_binary_file(self):
-        with tempfile.NamedTemporaryFile(delete=False) as tf:
-            tf.write(b"\x00\x01\x02BINARY")
-            path = tf.name
-        try:
-            self.assertTrue(DirectoryHandler.is_binary_file(path))
-        finally:
-            os.remove(path)
-
-    def test_gitignore_matching(self):
-        with tempfile.TemporaryDirectory() as root:
-            # Create .gitignore ignoring build/ and *.log
-            gi_dir = os.path.join(root, "a")
-            os.makedirs(gi_dir, exist_ok=True)
-            with open(os.path.join(gi_dir, ".gitignore"), "w") as f:
-                f.write("build/\n*.log\n")
-
-            # Files
-            os.makedirs(os.path.join(gi_dir, "build"), exist_ok=True)
-            ignored_dir_file = os.path.join(gi_dir, "build", "x.txt")
-            with open(ignored_dir_file, "w") as f:
-                f.write("ignored")
-            ignored_log = os.path.join(gi_dir, "debug.log")
-            with open(ignored_log, "w") as f:
-                f.write("ignored log")
-            kept_file = os.path.join(gi_dir, "src.txt")
-            with open(kept_file, "w") as f:
-                f.write("keep me")
-
-            gi_data = DirectoryHandler.load_gitignore_patterns(root)
-
-            self.assertTrue(DirectoryHandler.is_gitignored(ignored_dir_file, gi_data))
-            self.assertTrue(DirectoryHandler.is_gitignored(ignored_log, gi_data))
-            self.assertFalse(DirectoryHandler.is_gitignored(kept_file, gi_data))
-
-    def test_should_print_file_filters_hidden_and_types(self):
-        with tempfile.TemporaryDirectory() as root:
-            hidden = os.path.join(root, ".hidden.txt")
-            plain = os.path.join(root, "keep.py")
-            with open(hidden, "w") as f:
-                f.write("data")
-            with open(plain, "w") as f:
-                f.write("print('hi')")
-
-            self.assertFalse(
-                DirectoryHandler.should_print_file(
-                    hidden,
-                    file_types=[".py"],
-                    ignore_file_strings=[],
-                    ignore_hidden=True,
-                    path_contains=[],
-                    content_contains=[],
-                )
-            )
-            self.assertTrue(
-                DirectoryHandler.should_print_file(
-                    plain,
-                    file_types=[".py"],
-                    ignore_file_strings=[],
-                    ignore_hidden=True,
-                    path_contains=[],
-                    content_contains=[],
-                )
-            )
-
-    def test_print_file_content_no_comments_and_compress(self):
-        with tempfile.TemporaryDirectory() as root:
-            p = os.path.join(root, "t.py")
-            with open(p, "w") as f:
-                f.write("# comment only\nx=1\n")
-            buf = io.StringIO()
-            with redirect_stdout(buf):
-                DirectoryHandler.print_file_content(p, no_comments=True, compress=False)
-            out = buf.getvalue()
-            self.assertIn("<< START:", out)
-            # be whitespace-tolerant (tokenize may insert spaces)
-            self.assertRegex(out, r"x\s*=\s*1")
-            self.assertNotIn("# comment only", out)
-
-            buf = io.StringIO()
-            with redirect_stdout(buf):
-                DirectoryHandler.print_file_content(p, no_comments=True, compress=True)
-            out = buf.getvalue()
-            self.assertIn("COMPRESSED CODE:", out)
-            self.assertIn("<< END >>", out)
-
-
-if __name__ == "__main__":
-    unittest.main()