Release version 0.1.0

This commit introduces a complete structural and architectural refactor of
Analysis-Ready Code (ARC). The project is now fully migrated to a modern src/-based Python package layout, with proper packaging via pyproject.toml, a clean Nix flake, and improved CLI entry points. Major changes: • Add `src/arc/` package with clean module structure: - arc/__init__.py now contains the main() dispatcher and clipboard helpers - arc/__main__.py provides a proper `python -m arc` entry point - arc/cli.py rewritten with full argparse-based interface - arc/code_processor.py modernized and relocated - arc/directory_handler.py rewritten with output_stream support - arc/tee.py added for multi-stream output (stdout + buffer) • Remove legacy top-level modules: - cli.py - directory_handler.py - main.py • Introduce fully PEP-517 compliant pyproject.toml with console script: - arc = arc.__main__:main • Add Nix flake (`flake.nix`) providing: - buildPythonApplication package `arc` - `nix run .#arc` app - development shell with Python + xclip • Add Makefile overhaul: - automatic detection of Nix vs Python installation - unified install/uninstall targets - Nix wrapper installation into ~/.local/bin - improved help text and shell safety • Add GitHub CI pipelines: - ci-python.yml for Python builds + Makefile tests + arc --help - ci-nix.yml for Nix builds, flake checks, dev-shell tests, and `nix run .#arc` • Refactor and extend unit tests: - test_arc.py updated for src/ imports - new tests: test_cli.py, test_main.py, test_tee.py - improved CodeProcessor and DirectoryHandler tests • Add egg-info metadata for local builds • Add build/lib/ tree for compatibility with setuptools (generated) Overall, this commit modernizes ARC into a clean, robust, and fully packaged Python/Nix hybrid tool, enabling reproducible builds, solid CLI behavior, testable architecture, and CI automation. https://chatgpt.com/share/693933a0-e280-800f-9cf0-26036d15be04
2025-12-27 02:46:36 +00:00 · 2025-12-10 09:51:53 +01:00 · 2025-12-10 09:47:19 +01:00 · 2025-10-03 21:34:02 +02:00 · 2025-07-09 16:59:18 +02:00 · 2025-04-15 22:11:01 +02:00
24 changed files with 1630 additions and 191 deletions
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -0,0 +1,7 @@
+github: kevinveenbirkenbach
+
+patreon: kevinveenbirkenbach
+
+buy_me_a_coffee: kevinveenbirkenbach
+
+custom: https://s.veen.world/paypaldonate
--- a/.github/workflows/ci-nix.yml
+++ b/.github/workflows/ci-nix.yml
@@ -0,0 +1,38 @@
+name: Nix CI
+
+on:
+  push:
+    branches:
+      - main
+      - master
+  pull_request:
+
+jobs:
+  nix-tests:
+    name: Nix Build & Tests
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install Nix
+        uses: cachix/install-nix-action@v27
+        with:
+          extra_nix_config: |
+            experimental-features = nix-command flakes
+
+      - name: Build ARC with Nix
+        run: nix build .#arc
+
+      - name: Run flake checks
+        run: nix flake check --show-trace
+
+      - name: Run Makefile tests inside Nix environment
+        run: |
+          nix develop -c bash -c "make test"
+
+      # Changed step:
+      - name: Run arc --help via Nix app
+        run: |
+          nix run .#arc -- --help
--- a/.github/workflows/ci-python.yml
+++ b/.github/workflows/ci-python.yml
@@ -0,0 +1,40 @@
+name: CI
+
+on:
+  push:
+    branches:
+      - main
+      - master
+  pull_request:
+
+jobs:
+  python-tests:
+    name: Run make test (Python)
+    runs-on: ubuntu-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Upgrade pip
+        run: python -m pip install --upgrade pip
+
+      - name: Install project (normal install)
+        run: |
+          python -m pip install .
+
+      - name: Run tests via Makefile
+        run: make test
+
+      - name: Run arc --help
+        run: arc --help
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*__pycache__*
+build/
+src/analysis_ready_code.egg-info/
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -0,0 +1,4 @@
+## [0.1.0] - 2025-12-10
+
+* Add unified Nix/Python install, new ARC entrypoints, xclip-based -x clipboard flag, Nix flake app, improved CI, and extended test coverage.
+
--- a/128
+++ b/128
@@ -0,0 +1,128 @@
+# Makefile for ARC
+SHELL := /usr/bin/env bash
+
+APP_NAME := arc
+BIN_DIR ?= $(HOME)/.local/bin
+# Flake attribute for the ARC app
+NIX_ATTR := .#arc
+
+.PHONY: help test install uninstall detect-nix \
+        install-with-nix install-with-python install-nix install-python \
+        uninstall-nix-wrapper uninstall-python
+
+help:
+	@echo "Targets:"
+	@echo "  make test               - Run unit tests"
+	@echo "  make install            - Install ARC using Nix if available (and usable),"
+	@echo "                            otherwise fall back to Python."
+	@echo "  make uninstall          - Uninstall ARC (Nix wrapper + Python package)"
+	@echo "  make install-nix        - Force Nix installation (no fallback)"
+	@echo "  make install-python     - Force Python installation"
+	@echo "  make uninstall-nix-wrapper - Remove only the arc binary/symlink from BIN_DIR"
+	@echo "  make uninstall-python   - Remove the Python package 'arc'"
+
+test:
+	@python -m unittest discover -s tests -p "test_*.py" -t .
+
+# -------------------------------------------------------------------
+# Smart installation selector
+# -------------------------------------------------------------------
+install: detect-nix
+
+detect-nix:
+	@if command -v nix >/dev/null 2>&1; then \
+		echo "Nix detected → trying Nix-based installation…"; \
+		if $(MAKE) install-with-nix; then \
+			echo "Nix installation succeeded."; \
+		else \
+			echo "Nix installation failed → falling back to Python…"; \
+			$(MAKE) install-with-python; \
+		fi; \
+	else \
+		echo "Nix NOT found → installing via Python…"; \
+		$(MAKE) install-with-python; \
+	fi
+
+# Convenience aliases, if you want to force one path:
+install-nix:
+	$(MAKE) install-with-nix
+
+install-python:
+	$(MAKE) install-with-python
+
+# -------------------------------------------------------------------
+# Nix installation (flakes + nix-command enabled via flags)
+# -------------------------------------------------------------------
+install-with-nix:
+	@echo "Building ARC using Nix ($(NIX_ATTR))..."
+	nix --extra-experimental-features 'nix-command flakes' build $(NIX_ATTR)
+	@echo "Installing into $(BIN_DIR)..."
+	mkdir -p "$(BIN_DIR)"
+	ln -sf "$(PWD)/result/bin/$(APP_NAME)" "$(BIN_DIR)/$(APP_NAME)"
+	@echo "Done (Nix). Run: $(APP_NAME) --help"
+
+# -------------------------------------------------------------------
+# Python installation (fallback if Nix is unavailable or unusable)
+# - In a virtualenv: install into the venv (no --user).
+# - Outside a virtualenv: install with --user.
+# -------------------------------------------------------------------
+install-with-python:
+	@echo "Installing ARC via Python…"
+	@if [ -n "$$VIRTUAL_ENV" ]; then \
+		echo "Virtualenv detected at $$VIRTUAL_ENV → installing into venv (no --user)…"; \
+		python -m pip install --upgrade .; \
+	else \
+		echo "No virtualenv detected → installing with --user…"; \
+		python -m pip install --user --upgrade .; \
+	fi
+	@echo "Ensuring $(BIN_DIR) exists..."
+	mkdir -p "$(BIN_DIR)"
+	@echo "Checking for arc binary in $(BIN_DIR)…"
+	@if [ ! -f "$(BIN_DIR)/$(APP_NAME)" ] && [ ! -L "$(BIN_DIR)/$(APP_NAME)" ]; then \
+		echo "arc executable not found in $(BIN_DIR), creating wrapper…"; \
+		echo '#!/usr/bin/env bash' > "$(BIN_DIR)/$(APP_NAME)"; \
+		echo 'python -m arc "$$@"' >> "$(BIN_DIR)/$(APP_NAME)"; \
+		chmod +x "$(BIN_DIR)/$(APP_NAME)"; \
+	else \
+		echo "arc already present in $(BIN_DIR), not touching it."; \
+	fi
+	@echo "Done (Python). Make sure $(BIN_DIR) is in your PATH."
+
+# -------------------------------------------------------------------
+# High-level uninstall target (calls Nix + Python uninstall helpers)
+# -------------------------------------------------------------------
+uninstall: uninstall-nix-wrapper uninstall-python
+	@echo "=== Uninstall finished ==="
+
+# -------------------------------------------------------------------
+# Nix side: remove wrapper/binary from BIN_DIR
+# -------------------------------------------------------------------
+uninstall-nix-wrapper:
+	@echo "Removing '$(APP_NAME)' from $(BIN_DIR)..."
+	@if [ -L "$(BIN_DIR)/$(APP_NAME)" ] || [ -f "$(BIN_DIR)/$(APP_NAME)" ]; then \
+		rm -f "$(BIN_DIR)/$(APP_NAME)"; \
+		echo "✔ Removed $(BIN_DIR)/$(APP_NAME)"; \
+	else \
+		echo "⚠ No '$(APP_NAME)' binary found in $(BIN_DIR)."; \
+	fi
+
+# -------------------------------------------------------------------
+# Python side: uninstall the arc package
+# - In a virtualenv: uninstall from venv.
+# - Outside a virtualenv: uninstall from user/system environment.
+# -------------------------------------------------------------------
+uninstall-python:
+	@echo "Checking for Python installation of 'arc'…"
+	@if python -c "import arc" >/dev/null 2>&1; then \
+		echo "Python package 'arc' detected → uninstalling…"; \
+		if [ -n "$$VIRTUAL_ENV" ]; then \
+			echo "Virtualenv detected ($$VIRTUAL_ENV) → uninstalling inside venv…"; \
+			python -m pip uninstall -y arc; \
+		else \
+			echo "No virtualenv detected → uninstalling from user/system environment…"; \
+			python -m pip uninstall -y arc; \
+		fi; \
+		echo "✔ Python uninstall complete."; \
+	else \
+		echo "⚠ Python module 'arc' not installed. Skipping Python uninstall."; \
+	fi
--- a/README.md
+++ b/README.md
@@ -1,44 +1,60 @@
-# Directory Content Scanner
+# 🤖👩‍🔬 Analysis-Ready Code (ARC)
+[![GitHub Sponsors](https://img.shields.io/badge/Sponsor-GitHub%20Sponsors-blue?logo=github)](https://github.com/sponsors/kevinveenbirkenbach) [![Patreon](https://img.shields.io/badge/Support-Patreon-orange?logo=patreon)](https://www.patreon.com/c/kevinveenbirkenbach) [![Buy Me a Coffee](https://img.shields.io/badge/Buy%20me%20a%20Coffee-Funding-yellow?logo=buymeacoffee)](https://buymeacoffee.com/kevinveenbirkenbach) [![PayPal](https://img.shields.io/badge/Donate-PayPal-blue?logo=paypal)](https://s.veen.world/paypaldonate)

-The Directory Content Scanner is a Python-based utility designed to recursively scan directories, presenting both file paths and their content. It's an invaluable tool for in-depth content exploration and debugging within nested file structures.
+
+Analysis-Ready Code (ARC) is a Python-based utility designed to recursively scan directories and transform source code into a format optimized for AI and computer analysis. By stripping comments, filtering specific file types, and optionally compressing content, ARC ensures that your code is clean and ready for automated processing.

 ## 🚀 Getting Started

 ### 📋 Prerequisites

 - Python 3.x
+- [Kevin's Package Manager](https://github.com/kevinveenbirkenbach/package-manager) installed

-### 🛠️ Installation
+### 🛠️ Installation via Package Manager

-1. Clone the repository:
+ARC is available through the package manager under the alias `arc`. To install it, follow these steps:
+
+1. Ensure that you have Kevin's Package Manager set up on your system. Follow the instructions on [the package manager repository](https://github.com/kevinveenbirkenbach/package-manager) if you haven't done so already.
+2. Install ARC using the package manager:
   ```bash
-   git clone https://github.com/kevinveenbirkenbach/directory-content-scanner.git
+   package-manager install arc
   ```
+   This command will download and install ARC and make it available globally via the alias `arc`.

-2. Navigate to the cloned directory:
-   ```bash
-   cd directory-content-scanner
-   ```
+### ⚡ Running ARC
+
+Once installed, you can run ARC directly from the terminal using its alias:
+
+```bash
+arc /path/to/directory [options]
+```

 ## 📖 Usage

-Run the Python script, specifying the target directory and any desired filters:
+ARC scans the specified directories and processes source code to prepare it for automated analysis. It can filter by file type, remove comments, and compress the content for efficient processing.
+
+To view the help page and all available options, execute:

 ```bash
-python3 scan.py /path/to/directory [options]
+arc --help
 ```

 ### Options:

-To show the help page execute:
-
-```bash
-python3 scan.py /path/to/directory --help
-```
+- **paths**: List of files or directories to scan. This is a positional argument.
+- **--file-types**: Filter by file types (e.g., `.py`, `.js`, `.c`). Only files with these extensions will be processed.
+- **--ignore-file-strings**: Ignore files and folders containing these strings. Useful for excluding specific files or directories.
+- **--ignore-hidden**: Ignore hidden directories and files (those starting with a dot).
+- **-v, --verbose**: Enable verbose mode to print additional information about skipped files.
+- **--no-comments**: Remove comments from the displayed content based on file type. This is especially useful for preparing code for automated analysis.
+- **--compress**: Compress code (for supported file types) to further optimize it for analysis.
+- **--path-contains**: Display files whose paths contain one of these strings. Useful for narrowing down the scan to specific parts of the directory structure.
+- **--content-contains**: Display files containing one of these strings in their content.

 ## ⚠️ Caution

-Exercise caution when scanning directories with large files. The script may produce extensive output. Always assess the data volume before executing the script.
+ARC is designed to prepare code for analysis by removing extraneous elements like comments. Exercise caution when scanning directories with large files, as the script may produce extensive output. Always assess the data volume before executing the script.

 ## 🤝 Contributing

@@ -56,4 +72,4 @@ This project is licensed under the GNU Affero General Public License v3.0. The c

 ## 🙏 Acknowledgements

-Special thanks to OpenAI's ChatGPT model for assistance. Dive deeper into the creation process [here](https://chat.openai.com/share/71e9bc9e-d34a-4b03-bf68-4f9e994d156a) and [here](https://chat.openai.com/share/3fc66009-ff4e-425e-a4a3-fc703534885d).
+Special thanks to the community and tools like OpenAI's ChatGPT for assisting in the development process.
--- a/init.py
+++ b/init.py
--- a/flake.nix
+++ b/flake.nix
@@ -0,0 +1,81 @@
+{
+  description = "Analysis-Ready Code (ARC) - recursively scan directories and prepare code for automated analysis.";
+
+  inputs = {
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.05";
+    flake-utils.url = "github:numtide/flake-utils";
+  };
+
+  outputs = { self, nixpkgs, flake-utils }:
+    flake-utils.lib.eachDefaultSystem (system:
+      let
+        pkgs = import nixpkgs {
+          inherit system;
+        };
+
+        python = pkgs.python3;
+
+        # Main ARC package built from pyproject.toml
+        arcPkg = pkgs.python3Packages.buildPythonApplication {
+          pname = "analysis-ready-code";
+          version = "0.1.0";
+
+          src = ./.;
+
+          # We are using pyproject.toml with a PEP 517 backend.
+          format = "pyproject";
+
+          nativeBuildInputs = with pkgs.python3Packages; [
+            setuptools
+            wheel
+          ];
+
+          # xclip is not a Python lib, but we can still add it as a runtime
+          # dependency so that `xclip` is available in PATH when running ARC
+          # inside a Nix environment.
+          propagatedBuildInputs = with pkgs; [
+            xclip
+          ];
+
+          meta = {
+            description = "Utility that scans directories and prepares code for AI/computer analysis by stripping comments, filtering files, and optionally compressing content.";
+            homepage = "https://github.com/kevinveenbirkenbach/analysis-ready-code";
+            license = pkgs.lib.licenses.agpl3Plus;
+            platforms = pkgs.lib.platforms.unix;
+          };
+        };
+      in {
+        # Default package for `nix build .` and `nix build .#arc`
+        packages.arc = arcPkg;
+        packages.default = arcPkg;
+
+        # App for `nix run .#arc`
+        apps.arc = {
+          type = "app";
+          program = "${arcPkg}/bin/arc";
+        };
+
+        # Default app for `nix run .`
+        apps.default = self.apps.${system}.arc;
+
+        # Dev shell for local development
+        devShells.default = pkgs.mkShell {
+          name = "arc-dev-shell";
+
+          buildInputs = with pkgs; [
+            python3
+            python3Packages.pip
+            python3Packages.setuptools
+            python3Packages.wheel
+            xclip
+          ];
+
+          shellHook = ''
+            echo "ARC dev shell ready. Typical usage:"
+            echo "  make test"
+            echo "  arc . -x"
+          '';
+        };
+      }
+    );
+}
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,52 @@
+[build-system]
+requires = ["setuptools>=61", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "analysis-ready-code"
+version = "0.1.0"
+description = "A utility that recursively scans directories and transforms source code into an analysis-ready format, removing comments and optionally compressing content."
+readme = "README.md"
+license = { text = "AGPL-3.0" }
+authors = [
+    { name = "Kevin Veen-Birkenbach", email = "kevin@veen.world" }
+]
+
+requires-python = ">=3.8"
+
+dependencies = [
+    # No dependencies needed for ARC
+]
+
+# Optional: define console script (if you ever want pip-installable CLI)
+# ARC normally uses a symlink, but this keeps it compatible with pip.
+[project.scripts]
+arc = "arc.__main__:main"
+
+[project.urls]
+Homepage = "https://github.com/kevinveenbirkenbach/analysis-ready-code"
+Source = "https://github.com/kevinveenbirkenbach/analysis-ready-code"
+Issues = "https://github.com/kevinveenbirkenbach/analysis-ready-code/issues"
+
+[tool.setuptools]
+package-dir = {"" = "src"}
+
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["arc*"]
+
+[tool.setuptools.package-data]
+# if you need non-Python files inside packages
+arc = ["py.typed"]
+
+[tool.coverage.run]
+branch = true
+source = ["src/arc"]
+
+[tool.coverage.report]
+show_missing = true
+
+[tool.mypy]
+python_version = "3.10"
+warn_unused_configs = true
+ignore_missing_imports = true
--- a/scan.py
+++ b/scan.py
@@ -1,173 +0,0 @@
-import os
-import argparse
-import re
-import zlib
-
-class CodeProcessor:
-    PYTHON = ".py"
-    JS = ".js"
-    C = ".c"
-    CPP = ".cpp"
-    H = ".h"
-
-    @staticmethod
-    def remove_comments(content, file_type):
-        """Remove comments based on file type."""
-        comment_patterns = {
-            CodeProcessor.PYTHON: [
-                (r'\s*#.*', '',0),
-                (r'\"\"\"(.*?)\"\"\"', '', re.DOTALL),
-                (r"\'\'\'(.*?)\'\'\'", '', re.DOTALL)
-            ],
-            CodeProcessor.JS: [
-                (r'\s*//.*', '',0),
-                (r'/\*.*?\*/', '',0)
-            ],
-            CodeProcessor.C: [
-                (r'\s*//.*', '',0),
-                (r'/\*.*?\*/', '',0)
-            ],
-            CodeProcessor.CPP: [
-                (r'\s*//.*', '',0),
-                (r'/\*.*?\*/', '',0)
-            ],
-            CodeProcessor.H: [
-                (r'\s*//.*', '',0),
-                (r'/\*.*?\*/', '',0)
-            ]
-        }
-
-        patterns = comment_patterns.get(file_type, [])
-        for pattern, repl, flags in patterns:
-            content = re.sub(pattern, repl, content, flags=flags)
-        return content.strip()
-
-    @staticmethod
-    def compress(content):
-        """Compress code using zlib."""
-        return zlib.compress(content.encode())
-
-
-class DirectoryHandler:
-    
-    @staticmethod
-    def filter_directories(dirs, ignore_file_strings, ignore_hidden):
-        """Filter out directories based on ignore criteria."""
-        if ignore_hidden:
-            dirs[:] = [d for d in dirs if not d.startswith('.')]
-        dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_file_strings)]
-
-    @staticmethod
-    def path_or_content_contains(file_path, path_contains, content_contains):
-        # Check if the file name contains specific strings (whitelist)
-        if path_contains and any(whitelist_str in file_path for whitelist_str in path_contains):
-            return True
-
-        # Check file content for specific strings (if specified)
-        if content_contains:
-            try:
-                with open(file_path, 'r') as f:
-                    content = f.read()
-                # Return True if any of the content_contains strings are found in the content
-                if any(whitelist_str in content for whitelist_str in content_contains):
-                    return True
-            except UnicodeDecodeError:
-                # Return False if there's a Unicode decode error (file can't be read)
-                return False
-        return False
-
-    @staticmethod
-    def should_print_file(file_path, file_types, ignore_file_strings, ignore_hidden, path_contains, content_contains):
-        """
-        Determine if a file should be printed based on various criteria.
-
-        Args:
-        file_path (str): The path of the file to be checked.
-        file_types (list): List of allowed file extensions.
-        ignore_file_strings (list): List of strings; if any are found in the file path, the file is ignored.
-        ignore_hidden (bool): If True, hidden files (starting with '.') are ignored.
-        path_contains (list): List of strings; the file is processed only if its path contains one of these strings.
-        content_contains (list): List of strings; the file is processed only if its content contains one of these strings.
-
-        Returns:
-        bool: True if the file should be printed, False otherwise.
-        """
-
-        # Check for hidden files if ignore_hidden is enabled
-        if ignore_hidden and os.path.basename(file_path).startswith('.'):
-            return False
-
-        # Check if the file type is in the allowed list (if specified)
-        if file_types and not any(file_path.endswith(file_type) for file_type in file_types):
-            return False
-
-        # Check if the file should be ignored based on the presence of specific strings in its path
-        if any(ignore_str in file_path for ignore_str in ignore_file_strings):
-            return False
-
-        return DirectoryHandler.path_or_content_contains(file_path, path_contains, content_contains)
-    
-    @staticmethod
-    def print_file_content(file_path, no_comments, compress):
-        """Print the content of a file."""
-        try:
-            with open(file_path, 'r') as f:
-                content = f.read()
-            if no_comments:
-                file_type = os.path.splitext(file_path)[1]
-                content = CodeProcessor.remove_comments(content, file_type)
-            print(f"<< START: {file_path} >>")
-            if compress:
-                compressed_content = CodeProcessor.compress(content)
-                print(f"COMPRESSED CODE: ")
-                print(compressed_content)
-            else:
-                print(content)
-            print("<< END >>\n")
-        except UnicodeDecodeError:
-            print(f"Warning: Could not read file due to encoding issues: {file_path}")
-            exit(1)
-
-    @staticmethod
-    def handle_directory(directory, **kwargs):
-        """Handle scanning and printing for directories."""
-        for root, dirs, files in os.walk(directory):
-            DirectoryHandler.filter_directories(dirs, kwargs['ignore_file_strings'], kwargs['ignore_hidden'])
-            for file in files:
-                if DirectoryHandler.should_print_file(os.path.join(root, file), kwargs['file_types'], kwargs['ignore_file_strings'], kwargs['ignore_hidden'], kwargs['path_contains'], kwargs['content_contains']):
-                    DirectoryHandler.print_file_content(os.path.join(root, file), kwargs['no_comments'], kwargs['compress'])
-                elif kwargs['verbose']:
-                    print(f"Skipped file: {file}")
-
-    @staticmethod
-    def handle_file(file_path, **kwargs):
-        """Handle scanning and printing for individual files."""
-        DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress'])
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Scan directories and print/compile file contents.")
-    parser.add_argument("paths", nargs='+', help="List of files or directories to scan.")
-    parser.add_argument("--file-types", nargs='+', default=[], help="Filter by file types (e.g., .txt .log).")
-    parser.add_argument("--ignore-file-strings", nargs='+', default=[], help="Ignore files and folders containing these strings.")
-    parser.add_argument("--ignore-hidden", action='store_true', help="Ignore hidden directories and files.")
-    parser.add_argument("-v", "--verbose", action='store_true', help="Enable verbose mode.")
-    parser.add_argument("--no-comments", action='store_true', help="Remove comments from the displayed content based on file type.")
-    parser.add_argument("--compress", action='store_true', help="Compress code (for Python files).")
-    parser.add_argument("--path-contains", nargs='+', default=[], help="Display files whose paths contain one of these strings.")
-    parser.add_argument("--content-contains", nargs='+', default=[], help="Display files containing one of these strings in their content.")
-    
-    args = parser.parse_args()
-    
-    for path in args.paths:
-        if os.path.isdir(path):
-            DirectoryHandler.handle_directory(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, verbose=args.verbose, no_comments=args.no_comments, compress=args.compress, path_contains=args.path_contains, content_contains=args.content_contains)
-        elif os.path.isfile(path):
-            if DirectoryHandler.should_print_file(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, path_contains=args.path_contains, content_contains=args.content_contains):
-                DirectoryHandler.handle_file(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, no_comments=args.no_comments, compress=args.compress)
-        else:
-            print(f"Error: {path} is neither a valid file nor a directory.")
-            exit(1)
-
-if __name__ == "__main__":
-    main()
--- a/src/init.py
+++ b/src/init.py
--- a/src/arc/init.py
+++ b/src/arc/init.py
@@ -0,0 +1,111 @@
+import io
+import os
+import subprocess
+import sys
+
+from .cli import parse_arguments
+from .directory_handler import DirectoryHandler
+from .tee import Tee
+
+import shutil
+import subprocess
+
+def copy_to_clipboard(text: str, quiet: bool = False):
+    if shutil.which("xclip"):
+        subprocess.run(["xclip", "-selection", "clipboard"], input=text, text=True)
+        return
+
+    if shutil.which("wl-copy"):
+        subprocess.run(["wl-copy"], input=text, text=True)
+        return
+
+    if shutil.which("pbcopy"):
+        subprocess.run(["pbcopy"], input=text, text=True)
+        return
+
+    if not quiet:
+        print("Warning: No clipboard tool found (xclip, wl-copy, pbcopy)", file=sys.stderr)
+
+def main() -> None:
+    args = parse_arguments()
+
+    # QUIET MODE:
+    # - no terminal output
+    # - but clipboard buffer still active
+    #
+    # Normal:
+    # - output goes to stdout
+    # - optionally tee into buffer
+
+    buffer = None
+
+    if args.clipboard:
+        buffer = io.StringIO()
+
+        if args.quiet:
+            # quiet + clipboard → only buffer, no stdout
+            output_stream = buffer
+        else:
+            # normal + clipboard → stdout + buffer
+            output_stream = Tee(sys.stdout, buffer)
+    else:
+        # no clipboard
+        if args.quiet:
+            # quiet without clipboard → suppress ALL output
+            class NullWriter:
+                def write(self, *_): pass
+                def flush(self): pass
+            output_stream = NullWriter()
+        else:
+            output_stream = sys.stdout
+
+    # Process all paths
+    for path in args.paths:
+        if os.path.isdir(path):
+            DirectoryHandler.handle_directory(
+                path,
+                file_types=args.file_types,
+                ignore_file_strings=args.ignore_file_strings,
+                ignore_hidden=args.ignore_hidden,
+                verbose=args.verbose and not args.quiet,
+                no_comments=args.no_comments,
+                compress=args.compress,
+                path_contains=args.path_contains,
+                content_contains=args.content_contains,
+                no_gitignore=args.no_gitignore,
+                scan_binary_files=args.scan_binary_files,
+                output_stream=output_stream,
+            )
+        elif os.path.isfile(path):
+            if DirectoryHandler.should_print_file(
+                path,
+                file_types=args.file_types,
+                ignore_file_strings=args.ignore_file_strings,
+                ignore_hidden=args.ignore_hidden,
+                path_contains=args.path_contains,
+                content_contains=args.content_contains,
+                scan_binary_files=args.scan_binary_files,
+            ):
+                DirectoryHandler.handle_file(
+                    path,
+                    file_types=args.file_types,
+                    ignore_file_strings=args.ignore_file_strings,
+                    ignore_hidden=args.ignore_hidden,
+                    no_comments=args.no_comments,
+                    compress=args.compress,
+                    scan_binary_files=args.scan_binary_files,
+                    output_stream=output_stream,
+                )
+        else:
+            if not args.quiet:
+                print(f"Error: {path} is neither file nor directory.", file=sys.stderr)
+            sys.exit(1)
+
+    # Copy to clipboard if enabled
+    if buffer is not None:
+        text = buffer.getvalue()
+        try:
+            subprocess.run(["xclip", "-selection", "clipboard"], input=text, text=True, check=False)
+        except FileNotFoundError:
+            if not args.quiet:
+                print("Warning: xclip not found.", file=sys.stderr)
--- a/src/arc/main.py
+++ b/src/arc/main.py
@@ -0,0 +1,18 @@
+# src/arc/__main__.py
+
+from . import main as _arc_main
+
+
+def main() -> None:
+    """
+    Entry point for the `arc` console script and `python -m arc`.
+
+    This keeps all CLI logic in `arc.__init__.py` (main()) and simply
+    delegates to it, so both setuptools/entry_points and Nix wrappers
+    can reliably import `arc.__main__:main`.
+    """
+    _arc_main()
+
+
+if __name__ == "__main__":
+    main()
--- a/src/arc/cli.py
+++ b/src/arc/cli.py
@@ -0,0 +1,120 @@
+import argparse
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(
+        description="Scan directories and print/compile file contents."
+    )
+
+    # Positional: paths
+    parser.add_argument(
+        "paths",
+        nargs="+",
+        help="List of files or directories to scan.",
+    )
+
+    # File type filter
+    parser.add_argument(
+        "-t",
+        "--file-types",
+        nargs="+",
+        default=[],
+        help="Filter by file types (e.g., .py, .js, .c).",
+    )
+
+    # Ignore file/path strings (was previously -x, jetzt -I)
+    parser.add_argument(
+        "-I",
+        "--ignore-file-strings",
+        nargs="+",
+        default=[],
+        help="Ignore files and folders containing these strings.",
+    )
+
+    # Clipboard: alias -x
+    parser.add_argument(
+        "-x",
+        "--clipboard",
+        action="store_true",
+        help="Copy the output to the X clipboard via xclip (alias: -x).",
+    )
+
+    # Quiet mode
+    parser.add_argument(
+        "-q",
+        "--quiet",
+        action="store_true",
+        help="Suppress terminal output (useful with --clipboard).",
+    )
+
+    # Show hidden files
+    parser.add_argument(
+        "-S",
+        "--show-hidden",
+        action="store_true",
+        dest="show_hidden",
+        default=False,
+        help="Include hidden directories and files.",
+    )
+
+    # Verbose
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="Enable verbose mode.",
+    )
+
+    # Strip comments
+    parser.add_argument(
+        "-N",
+        "--no-comments",
+        action="store_true",
+        help="Remove comments from files before printing.",
+    )
+
+    # Compress
+    parser.add_argument(
+        "-z",
+        "--compress",
+        action="store_true",
+        help="Compress content instead of printing plain text.",
+    )
+
+    # Path filter
+    parser.add_argument(
+        "-p",
+        "--path-contains",
+        nargs="+",
+        default=[],
+        help="Only include files whose *path* contains one of these strings.",
+    )
+
+    # Content filter
+    parser.add_argument(
+        "-C",
+        "--content-contains",
+        nargs="+",
+        default=[],
+        help="Only include files whose *content* contains one of these strings.",
+    )
+
+    # Ignore .gitignore
+    parser.add_argument(
+        "-G",
+        "--no-gitignore",
+        action="store_true",
+        help="Do not respect .gitignore files during scan.",
+    )
+
+    # Scan binary files
+    parser.add_argument(
+        "-b",
+        "--scan-binary-files",
+        action="store_true",
+        help="Also scan binary files (ignored by default).",
+    )
+
+    args = parser.parse_args()
+    args.ignore_hidden = not args.show_hidden
+    return args
--- a/src/arc/code_processor.py
+++ b/src/arc/code_processor.py
@@ -0,0 +1,285 @@
+import io
+import re
+import tokenize
+import zlib
+from dataclasses import dataclass
+from typing import Dict, Tuple, Pattern, Optional
+
+
+@dataclass(frozen=True)
+class LanguageSpec:
+    """Holds compiled comment patterns for a language."""
+    patterns: Tuple[Pattern, ...]
+
+
+class CodeProcessor:
+    """
+    Utilities to strip comments and (de)compress code.
+    - Python: tokenize-based (safe) with precise docstring removal.
+    - C/CPP/JS: state-machine comment stripper that respects string/char literals.
+    - Shell/YAML: remove full-line hash comments only.
+    - Jinja: remove {# ... #} blocks.
+    """
+    # File extensions (normalized to lowercase)
+    EXT_TO_LANG: Dict[str, str] = {
+        ".py": "python",
+        ".js": "cstyle",
+        ".c": "cstyle",
+        ".cpp": "cstyle",
+        ".h": "cstyle",
+        ".sh": "hash",
+        ".bash": "hash",
+        ".yml": "hash",
+        ".yaml": "hash",
+        ".j2": "jinja",
+        ".jinja": "jinja",
+        ".jinja2": "jinja",
+        ".tpl": "jinja",
+    }
+
+    # Regex-based specs for hash and jinja
+    _HASH = LanguageSpec(patterns=(
+        re.compile(r"^\s*#.*$", flags=re.MULTILINE),   # only full-line comments
+    ))
+    _JINJA = LanguageSpec(patterns=(
+        re.compile(r"\{#.*?#\}", flags=re.DOTALL),     # {# ... #} across lines
+    ))
+
+    LANG_SPECS: Dict[str, LanguageSpec] = {
+        "hash": _HASH,
+        "jinja": _JINJA,
+        # "cstyle" handled by a state machine, not regex
+        # "python" handled by tokenize, not regex
+    }
+
+    @classmethod
+    def _lang_from_ext(cls, file_type: str) -> Optional[str]:
+        """Map an extension like '.py' to an internal language key."""
+        ext = file_type.lower().strip()
+        return cls.EXT_TO_LANG.get(ext)
+
+    # -----------------------------
+    # Python stripping via tokenize
+    # -----------------------------
+    @staticmethod
+    def _strip_python_comments_tokenize(content: str) -> str:
+        """
+        Remove comments and docstrings safely using tokenize.
+        Rules:
+          - Drop COMMENT tokens.
+          - Drop module docstring only if it's the very first statement at col 0.
+          - Drop the first STRING statement in a suite immediately after 'def'/'class'
+            header (':' NEWLINE INDENT).
+        """
+        tokens = tokenize.generate_tokens(io.StringIO(content).readline)
+        out_tokens = []
+
+        indent_level = 0
+        module_docstring_candidate = True  # until we see first real stmt at module level
+        expect_suite_docstring = False     # just entered a suite after def/class
+        last_was_colon = False
+        seen_nontrivial_in_line = False    # guards module docstring (start of logical line)
+
+        for tok_type, tok_str, start, end, line in tokens:
+            # Track indentation
+            if tok_type == tokenize.INDENT:
+                indent_level += 1
+            elif tok_type == tokenize.DEDENT:
+                indent_level = max(0, indent_level - 1)
+
+            # New logical line: reset guard
+            if tok_type in (tokenize.NEWLINE, tokenize.NL):
+                seen_nontrivial_in_line = False
+                out_tokens.append((tok_type, tok_str))
+                continue
+
+            # Comments are dropped
+            if tok_type == tokenize.COMMENT:
+                continue
+
+            # Detect ':' ending a def/class header
+            if tok_type == tokenize.OP and tok_str == ":":
+                last_was_colon = True
+                out_tokens.append((tok_type, tok_str))
+                continue
+
+            # After ':' + NEWLINE + INDENT comes a suite start -> allow docstring removal
+            if tok_type == tokenize.INDENT and last_was_colon:
+                expect_suite_docstring = True
+                last_was_colon = False
+                out_tokens.append((tok_type, tok_str))
+                continue
+            # Any non-INDENT token clears the last_was_colon flag
+            if tok_type != tokenize.NL:
+                last_was_colon = False
+
+            # STRING handling
+            if tok_type == tokenize.STRING:
+                at_line_start = (start[1] == 0) and not seen_nontrivial_in_line
+                if indent_level == 0:
+                    # Potential module docstring only if first statement at col 0
+                    if module_docstring_candidate and at_line_start:
+                        module_docstring_candidate = False
+                        # drop it
+                        continue
+                    # Any other top-level string is normal
+                    module_docstring_candidate = False
+                    out_tokens.append((tok_type, tok_str))
+                    seen_nontrivial_in_line = True
+                    continue
+                else:
+                    # In a suite: if it's the first statement after def/class, drop regardless of column
+                    if expect_suite_docstring:
+                        expect_suite_docstring = False
+                        # drop it
+                        continue
+                    expect_suite_docstring = False
+                    out_tokens.append((tok_type, tok_str))
+                    seen_nontrivial_in_line = True
+                    continue
+
+            # Any other significant token disables module-docstring candidacy
+            if tok_type not in (tokenize.INDENT, tokenize.DEDENT):
+                if indent_level == 0:
+                    module_docstring_candidate = False
+                # Mark we've seen something on this line
+                if tok_type not in (tokenize.NL, tokenize.NEWLINE):
+                    seen_nontrivial_in_line = True
+
+            out_tokens.append((tok_type, tok_str))
+
+        return tokenize.untokenize(out_tokens)
+
+    # ---------------------------------
+    # C-style stripping via state machine
+    # ---------------------------------
+    @staticmethod
+    def _strip_cstyle_comments(content: str) -> str:
+        """
+        Remove // line comments and /* ... */ block comments while preserving
+        string ("...") and char ('...') literals and their escape sequences.
+        """
+        i = 0
+        n = len(content)
+        out = []
+        in_line_comment = False
+        in_block_comment = False
+        in_string = False
+        in_char = False
+        escape = False
+
+        while i < n:
+            c = content[i]
+            nxt = content[i + 1] if i + 1 < n else ""
+
+            # If inside line comment: consume until newline
+            if in_line_comment:
+                if c == "\n":
+                    in_line_comment = False
+                    out.append(c)
+                i += 1
+                continue
+
+            # If inside block comment: consume until '*/'
+            if in_block_comment:
+                if c == "*" and nxt == "/":
+                    in_block_comment = False
+                    i += 2
+                else:
+                    i += 1
+                continue
+
+            # If inside string literal
+            if in_string:
+                out.append(c)
+                if escape:
+                    escape = False
+                else:
+                    if c == "\\":
+                        escape = True
+                    elif c == '"':
+                        in_string = False
+                i += 1
+                continue
+
+            # If inside char literal
+            if in_char:
+                out.append(c)
+                if escape:
+                    escape = False
+                else:
+                    if c == "\\":
+                        escape = True
+                    elif c == "'":
+                        in_char = False
+                i += 1
+                continue
+
+            # Not in any special state:
+            # Check for start of comments
+            if c == "/" and nxt == "/":
+                in_line_comment = True
+                i += 2
+                continue
+            if c == "/" and nxt == "*":
+                in_block_comment = True
+                i += 2
+                continue
+
+            # Check for start of string/char literals
+            if c == '"':
+                in_string = True
+                out.append(c)
+                i += 1
+                continue
+            if c == "'":
+                in_char = True
+                out.append(c)
+                i += 1
+                continue
+
+            # Normal character
+            out.append(c)
+            i += 1
+
+        return "".join(out)
+
+    # -------------------
+    # Public API
+    # -------------------
+    @classmethod
+    def remove_comments(cls, content: str, file_type: str) -> str:
+        """
+        Remove comments based on file type/extension.
+          - Python: tokenize-based
+          - C/CPP/JS: state-machine
+          - Hash (sh/yaml): regex full-line
+          - Jinja: regex {# ... #}
+        """
+        lang = cls._lang_from_ext(file_type)
+        if lang is None:
+            return content.strip()
+
+        if lang == "python":
+            return cls._strip_python_comments_tokenize(content).strip()
+        if lang == "cstyle":
+            return cls._strip_cstyle_comments(content).strip()
+
+        spec = cls.LANG_SPECS.get(lang)
+        if not spec:
+            return content.strip()
+
+        cleaned = content
+        for pat in spec.patterns:
+            cleaned = pat.sub("", cleaned)
+        return cleaned.strip()
+
+    @staticmethod
+    def compress(content: str, level: int = 9) -> bytes:
+        """Compress code using zlib. Returns bytes."""
+        return zlib.compress(content.encode("utf-8"), level)
+
+    @staticmethod
+    def decompress(blob: bytes) -> str:
+        """Decompress zlib-compressed code back to text."""
+        return zlib.decompress(blob).decode("utf-8")
--- a/src/arc/directory_handler.py
+++ b/src/arc/directory_handler.py
@@ -0,0 +1,228 @@
+import fnmatch
+import os
+import sys
+
+from .code_processor import CodeProcessor
+
+
+class DirectoryHandler:
+    @staticmethod
+    def load_gitignore_patterns(root_path):
+        """
+        Recursively scans for .gitignore files in the given root_path.
+        Returns a list of tuples (base_dir, patterns) where:
+          - base_dir: the directory in which the .gitignore was found.
+          - patterns: a list of pattern strings from that .gitignore.
+        """
+        gitignore_data = []
+        for dirpath, _, filenames in os.walk(root_path):
+            if ".gitignore" in filenames:
+                gitignore_path = os.path.join(dirpath, ".gitignore")
+                try:
+                    with open(gitignore_path, "r") as f:
+                        lines = f.readlines()
+                    # Filter out empty lines and comments.
+                    patterns = [
+                        line.strip()
+                        for line in lines
+                        if line.strip() and not line.strip().startswith("#")
+                    ]
+                    # Save the base directory and its patterns.
+                    gitignore_data.append((dirpath, patterns))
+                except Exception as e:  # pragma: no cover - defensive
+                    print(f"Error reading {gitignore_path}: {e}", file=sys.stderr)
+        return gitignore_data
+
+    @staticmethod
+    def is_binary_file(file_path):
+        """
+        Reads the first 1024 bytes of file_path and heuristically determines
+        if the file appears to be binary. This method returns True if a null byte
+        is found or if more than 30% of the bytes in the sample are non-text.
+        """
+        try:
+            with open(file_path, "rb") as f:
+                chunk = f.read(1024)
+            # If there's a null byte, it's almost certainly binary.
+            if b"\x00" in chunk:
+                return True
+
+            # Define a set of text characters (ASCII printable + common control characters)
+            text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x7F)))
+            # Count non-text characters in the chunk.
+            non_text = sum(byte not in text_chars for byte in chunk)
+            if len(chunk) > 0 and (non_text / len(chunk)) > 0.30:
+                return True
+        except Exception:  # pragma: no cover - defensive
+            # If the file cannot be read in binary mode, assume it's not binary.
+            return False
+        return False
+
+    @staticmethod
+    def is_gitignored(file_path, gitignore_data):
+        """
+        Checks if file_path should be ignored according to the .gitignore entries.
+        For each tuple (base_dir, patterns), if file_path is under base_dir,
+        computes the relative path and matches it against the patterns.
+        """
+        for base_dir, patterns in gitignore_data:
+            try:
+                rel_path = os.path.relpath(file_path, base_dir)
+            except ValueError:
+                # file_path and base_dir are on different drives.
+                continue
+            # If the file is not under the current .gitignore base_dir, skip it.
+            if rel_path.startswith(".."):
+                continue
+            # Check all patterns.
+            for pattern in patterns:
+                if pattern.endswith("/"):
+                    # Directory pattern: check if any folder in the relative path matches.
+                    parts = rel_path.split(os.sep)
+                    for part in parts[:-1]:
+                        if fnmatch.fnmatch(part + "/", pattern):
+                            return True
+                else:
+                    # Check if the relative path matches the pattern.
+                    if fnmatch.fnmatch(rel_path, pattern):
+                        return True
+        return False
+
+    @staticmethod
+    def filter_directories(dirs, ignore_file_strings, ignore_hidden):
+        """
+        Filter out directories based on ignore_file_strings and hidden status.
+        """
+        if ignore_hidden:
+            dirs[:] = [d for d in dirs if not d.startswith(".")]
+        dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_file_strings)]
+
+    @staticmethod
+    def path_or_content_contains(file_path, path_contains, content_contains):
+        """
+        Check if the file path contains specific strings or if the file content does.
+        """
+        if path_contains and any(whitelist_str in file_path for whitelist_str in path_contains):
+            return True
+
+        if content_contains:
+            try:
+                with open(file_path, "r") as f:
+                    content = f.read()
+                if any(whitelist_str in content for whitelist_str in content_contains):
+                    return True
+            except UnicodeDecodeError:
+                return False
+        return False
+
+    @staticmethod
+    def should_print_file(
+        file_path,
+        file_types,
+        ignore_file_strings,
+        ignore_hidden,
+        path_contains,
+        content_contains,
+        scan_binary_files=False,
+    ):
+        """
+        Determines if a file should be printed based on various criteria.
+        By default, binary files are skipped unless scan_binary_files is True.
+        """
+        # Check binary file status using our heuristic.
+        if not scan_binary_files and DirectoryHandler.is_binary_file(file_path):
+            return False
+
+        if ignore_hidden and os.path.basename(file_path).startswith("."):
+            return False
+
+        if file_types and not any(file_path.endswith(ft) for ft in file_types):
+            return False
+
+        if any(ignore_str in file_path for ignore_str in ignore_file_strings):
+            return False
+
+        if path_contains or content_contains:
+            return DirectoryHandler.path_or_content_contains(
+                file_path, path_contains, content_contains
+            )
+        return True
+
+    @staticmethod
+    def print_file_content(file_path, no_comments, compress, output_stream):
+        """
+        Prints the content of a file, optionally removing comments or compressing the output.
+        """
+        try:
+            with open(file_path, "r") as f:
+                content = f.read()
+            if no_comments:
+                file_type = os.path.splitext(file_path)[1]
+                content = CodeProcessor.remove_comments(content, file_type)
+            print(f"<< START: {file_path} >>", file=output_stream)
+            if compress:
+                compressed_content = CodeProcessor.compress(content)
+                print("COMPRESSED CODE:", file=output_stream)
+                print(compressed_content, file=output_stream)
+            else:
+                print(content, file=output_stream)
+            print("<< END >>\n", file=output_stream)
+        except UnicodeDecodeError:
+            print(
+                f"Warning: Could not read file due to encoding issues: {file_path}",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+
+    @staticmethod
+    def handle_directory(directory, **kwargs):
+        """
+        Scans the directory and processes each file while respecting .gitignore rules.
+        """
+        gitignore_data = []
+        if not kwargs.get("no_gitignore"):
+            gitignore_data = DirectoryHandler.load_gitignore_patterns(directory)
+
+        output_stream = kwargs.get("output_stream", sys.stdout)
+
+        for root, dirs, files in os.walk(directory):
+            DirectoryHandler.filter_directories(
+                dirs, kwargs["ignore_file_strings"], kwargs["ignore_hidden"]
+            )
+            for file in files:
+                file_path = os.path.join(root, file)
+                if gitignore_data and DirectoryHandler.is_gitignored(file_path, gitignore_data):
+                    if kwargs.get("verbose"):
+                        print(f"Skipped (gitignored): {file_path}", file=output_stream)
+                    continue
+
+                if DirectoryHandler.should_print_file(
+                    file_path,
+                    kwargs["file_types"],
+                    kwargs["ignore_file_strings"],
+                    kwargs["ignore_hidden"],
+                    kwargs["path_contains"],
+                    kwargs["content_contains"],
+                    scan_binary_files=kwargs.get("scan_binary_files", False),
+                ):
+                    DirectoryHandler.print_file_content(
+                        file_path,
+                        kwargs["no_comments"],
+                        kwargs["compress"],
+                        output_stream=output_stream,
+                    )
+                elif kwargs.get("verbose"):
+                    print(f"Skipped file: {file_path}", file=output_stream)
+
+    @staticmethod
+    def handle_file(file_path, **kwargs):
+        """
+        Processes an individual file.
+        """
+        output_stream = kwargs.get("output_stream", sys.stdout)
+        DirectoryHandler.print_file_content(
+            file_path,
+            kwargs["no_comments"],
+            kwargs["compress"],
+            output_stream=output_stream,
+        )
--- a/src/arc/tee.py
+++ b/src/arc/tee.py
@@ -0,0 +1,23 @@
+from typing import TextIO
+
+
+class Tee:
+    """
+    Simple tee-like stream that writes everything to multiple underlying streams.
+
+    Typical usage:
+        tee = Tee(sys.stdout, buffer)
+        print("hello", file=tee)
+    """
+
+    def __init__(self, *streams: TextIO) -> None:
+        self.streams = streams
+
+    def write(self, data: str) -> None:
+        for stream in self.streams:
+            stream.write(data)
+
+    def flush(self) -> None:
+        for stream in self.streams:
+            if hasattr(stream, "flush"):
+                stream.flush()
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/unit/init.py
+++ b/tests/unit/init.py
--- a/tests/unit/test_arc.py
+++ b/tests/unit/test_arc.py
@@ -0,0 +1,199 @@
+# tests/unit/test_arc.py
+import io
+import os
+import sys
+import tempfile
+import unittest
+from contextlib import redirect_stdout
+
+# Ensure src/ is on sys.path when running via discover
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
+if SRC_ROOT not in sys.path:
+    sys.path.insert(0, SRC_ROOT)
+
+from arc.code_processor import CodeProcessor
+from arc.directory_handler import DirectoryHandler
+
+
+class TestCodeProcessor(unittest.TestCase):
+    def test_python_comment_and_docstring_stripping(self):
+        src = '''\
+"""module docstring should go away"""
+
+# a comment
+x = 1  # inline comment
+y = "string with # not a comment"
+
+def f():
+    """function docstring should go away"""
+    s = """triple quoted but not a docstring"""
+    return x
+'''
+        out = CodeProcessor.remove_comments(src, ".py")
+        self.assertNotIn("module docstring", out)
+        self.assertNotIn("function docstring", out)
+        self.assertNotIn("# a comment", out)
+        # tolerate whitespace normalization from tokenize.untokenize
+        self.assertRegex(out, r'y\s*=\s*"string with # not a comment"')
+        self.assertIn("triple quoted but not a docstring", out)
+
+    def test_cstyle_comment_stripping(self):
+        src = '''\
+// line comment
+int main() {
+  /* block
+     comment */
+  int x = 42; // end comment
+  const char* s = "/* not a comment here */";
+  return x;
+}
+'''
+        out = CodeProcessor.remove_comments(src, ".c")
+        # line comment and block comment gone
+        self.assertNotIn("// line comment", out)
+        self.assertNotIn("block\n     comment", out)
+        # string content with /* */ inside should remain
+        self.assertIn('const char* s = "/* not a comment here */";', out)
+
+    def test_hash_comment_stripping(self):
+        src = """\
+# top comment
+KEY=value  # trailing comment should be kept by default
+plain: value
+"""
+        out = CodeProcessor.remove_comments(src, ".yml")
+        # Our regex removes full lines starting with optional spaces then '#'
+        self.assertNotIn("top comment", out)
+        # It does not remove trailing fragments after content for hash style
+        self.assertIn("KEY=value", out)
+        self.assertIn("plain: value", out)
+
+    def test_jinja_comment_stripping(self):
+        src = """\
+{# top jinja comment #}
+Hello {{ name }}!
+{#
+  multi-line
+  jinja comment
+#}
+Body text and {{ value }}.
+"""
+        out = CodeProcessor.remove_comments(src, ".j2")
+        self.assertNotIn("top jinja comment", out)
+        self.assertNotIn("multi-line", out)
+        # Regular content and expressions remain
+        self.assertIn("Hello {{ name }}!", out)
+        self.assertIn("Body text and {{ value }}.", out)
+
+    def test_unknown_extension_returns_stripped(self):
+        src = "  x = 1  # not removed for unknown  "
+        out = CodeProcessor.remove_comments(src, ".unknown")
+        self.assertEqual(out, "x = 1  # not removed for unknown")
+
+    def test_compress_decompress_roundtrip(self):
+        src = "def x():\n    return 42\n"
+        blob = CodeProcessor.compress(src)
+        self.assertIsInstance(blob, (bytes, bytearray))
+        back = CodeProcessor.decompress(blob)
+        self.assertEqual(src, back)
+
+
+class TestDirectoryHandler(unittest.TestCase):
+    def test_is_binary_file(self):
+        with tempfile.NamedTemporaryFile(delete=False) as tf:
+            tf.write(b"\x00\x01\x02BINARY")
+            path = tf.name
+        try:
+            self.assertTrue(DirectoryHandler.is_binary_file(path))
+        finally:
+            os.remove(path)
+
+    def test_gitignore_matching(self):
+        with tempfile.TemporaryDirectory() as root:
+            # Create .gitignore ignoring build/ and *.log
+            gi_dir = os.path.join(root, "a")
+            os.makedirs(gi_dir, exist_ok=True)
+            with open(os.path.join(gi_dir, ".gitignore"), "w") as f:
+                f.write("build/\n*.log\n")
+
+            # Files
+            os.makedirs(os.path.join(gi_dir, "build"), exist_ok=True)
+            ignored_dir_file = os.path.join(gi_dir, "build", "x.txt")
+            with open(ignored_dir_file, "w") as f:
+                f.write("ignored")
+            ignored_log = os.path.join(gi_dir, "debug.log")
+            with open(ignored_log, "w") as f:
+                f.write("ignored log")
+            kept_file = os.path.join(gi_dir, "src.txt")
+            with open(kept_file, "w") as f:
+                f.write("keep me")
+
+            gi_data = DirectoryHandler.load_gitignore_patterns(root)
+
+            self.assertTrue(DirectoryHandler.is_gitignored(ignored_dir_file, gi_data))
+            self.assertTrue(DirectoryHandler.is_gitignored(ignored_log, gi_data))
+            self.assertFalse(DirectoryHandler.is_gitignored(kept_file, gi_data))
+
+    def test_should_print_file_filters_hidden_and_types(self):
+        with tempfile.TemporaryDirectory() as root:
+            hidden = os.path.join(root, ".hidden.txt")
+            plain = os.path.join(root, "keep.py")
+            with open(hidden, "w") as f:
+                f.write("data")
+            with open(plain, "w") as f:
+                f.write("print('hi')")
+
+            self.assertFalse(
+                DirectoryHandler.should_print_file(
+                    hidden,
+                    file_types=[".py"],
+                    ignore_file_strings=[],
+                    ignore_hidden=True,
+                    path_contains=[],
+                    content_contains=[],
+                )
+            )
+            self.assertTrue(
+                DirectoryHandler.should_print_file(
+                    plain,
+                    file_types=[".py"],
+                    ignore_file_strings=[],
+                    ignore_hidden=True,
+                    path_contains=[],
+                    content_contains=[],
+                )
+            )
+
+    def test_print_file_content_no_comments_and_compress(self):
+        with tempfile.TemporaryDirectory() as root:
+            p = os.path.join(root, "t.py")
+            with open(p, "w") as f:
+                f.write("# comment only\nx=1\n")
+            buf = io.StringIO()
+            DirectoryHandler.print_file_content(
+                p,
+                no_comments=True,
+                compress=False,
+                output_stream=buf,
+            )
+            out = buf.getvalue()
+            self.assertIn("<< START:", out)
+            # be whitespace-tolerant (tokenize may insert spaces)
+            self.assertRegex(out, r"x\s*=\s*1")
+            self.assertNotIn("# comment only", out)
+
+            buf = io.StringIO()
+            DirectoryHandler.print_file_content(
+                p,
+                no_comments=True,
+                compress=True,
+                output_stream=buf,
+            )
+            out = buf.getvalue()
+            self.assertIn("COMPRESSED CODE:", out)
+            self.assertIn("<< END >>", out)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -0,0 +1,60 @@
+# tests/unit/test_cli.py
+import os
+import sys
+import unittest
+from unittest.mock import patch
+
+# Ensure src/ is on sys.path when running via discover
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
+if SRC_ROOT not in sys.path:
+    sys.path.insert(0, SRC_ROOT)
+
+from arc.cli import parse_arguments  # noqa: E402
+
+
+class TestCliParseArguments(unittest.TestCase):
+    def test_basic_paths_and_defaults(self):
+        with patch.object(sys, "argv", ["arc", "foo", "bar"]):
+            args = parse_arguments()
+
+        self.assertEqual(args.paths, ["foo", "bar"])
+        self.assertEqual(args.file_types, [])
+        self.assertEqual(args.ignore_file_strings, [])
+        self.assertFalse(args.clipboard)
+        self.assertFalse(args.quiet)
+        # show_hidden default is False → ignore_hidden should be True
+        self.assertFalse(args.show_hidden)
+        self.assertTrue(args.ignore_hidden)
+
+    def test_clipboard_and_quiet_short_flags(self):
+        with patch.object(sys, "argv", ["arc", ".", "-x", "-q"]):
+            args = parse_arguments()
+
+        self.assertTrue(args.clipboard)
+        self.assertTrue(args.quiet)
+
+    def test_ignore_file_strings_short_and_long(self):
+        # Test only the short form -I collecting multiple values
+        with patch.object(
+            sys,
+            "argv",
+            ["arc", ".", "-I", "build", "dist", "node_modules"],
+        ):
+            args = parse_arguments()
+
+        self.assertEqual(
+            args.ignore_file_strings,
+            ["build", "dist", "node_modules"],
+        )
+
+    def test_show_hidden_switches_ignore_hidden_off(self):
+        with patch.object(sys, "argv", ["arc", ".", "--show-hidden"]):
+            args = parse_arguments()
+
+        self.assertTrue(args.show_hidden)
+        self.assertFalse(args.ignore_hidden)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/unit/test_main.py
+++ b/tests/unit/test_main.py
@@ -0,0 +1,145 @@
+# tests/unit/test_main.py
+import io
+import os
+import sys
+import tempfile
+import types
+import unittest
+from contextlib import redirect_stdout
+from unittest.mock import patch
+
+# Ensure src/ is on sys.path when running via discover
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
+if SRC_ROOT not in sys.path:
+    sys.path.insert(0, SRC_ROOT)
+
+import arc  # noqa: E402
+
+
+class TestArcMain(unittest.TestCase):
+    def _make_args(
+        self,
+        path,
+        clipboard=False,
+        quiet=False,
+        file_types=None,
+        ignore_file_strings=None,
+        ignore_hidden=True,
+        verbose=False,
+        no_comments=False,
+        compress=False,
+        path_contains=None,
+        content_contains=None,
+        no_gitignore=False,
+        scan_binary_files=False,
+    ):
+        return types.SimpleNamespace(
+            paths=[path],
+            clipboard=clipboard,
+            quiet=quiet,
+            file_types=file_types or [],
+            ignore_file_strings=ignore_file_strings or [],
+            ignore_hidden=ignore_hidden,
+            show_hidden=not ignore_hidden,
+            verbose=verbose,
+            no_comments=no_comments,
+            compress=compress,
+            path_contains=path_contains or [],
+            content_contains=content_contains or [],
+            no_gitignore=no_gitignore,
+            scan_binary_files=scan_binary_files,
+        )
+
+    @patch("arc.subprocess.run")
+    @patch("arc.DirectoryHandler.handle_directory")
+    @patch("arc.parse_arguments")
+    def test_main_clipboard_calls_xclip_and_uses_tee(
+        self, mock_parse_arguments, mock_handle_directory, mock_run
+    ):
+        # create a temporary directory as scan target
+        with tempfile.TemporaryDirectory() as tmpdir:
+            args = self._make_args(path=tmpdir, clipboard=True, quiet=False)
+            mock_parse_arguments.return_value = args
+
+            def fake_handle_directory(path, **kwargs):
+                out = kwargs["output_stream"]
+                # should be a Tee instance
+                self.assertEqual(out.__class__.__name__, "Tee")
+                out.write("FROM ARC\n")
+
+            mock_handle_directory.side_effect = fake_handle_directory
+
+            buf = io.StringIO()
+            with redirect_stdout(buf):
+                arc.main()
+
+        # stdout should contain the text once (via Tee -> sys.stdout)
+        stdout_value = buf.getvalue()
+        self.assertIn("FROM ARC", stdout_value)
+
+        # xclip should have been called with the same text in input
+        mock_run.assert_called_once()
+        called_args, called_kwargs = mock_run.call_args
+        self.assertEqual(called_args[0], ["xclip", "-selection", "clipboard"])
+        self.assertIn("FROM ARC", called_kwargs.get("input", ""))
+
+    @patch("arc.subprocess.run")
+    @patch("arc.DirectoryHandler.handle_directory")
+    @patch("arc.parse_arguments")
+    def test_main_clipboard_quiet_only_clipboard_no_stdout(
+        self, mock_parse_arguments, mock_handle_directory, mock_run
+    ):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            args = self._make_args(path=tmpdir, clipboard=True, quiet=True)
+            mock_parse_arguments.return_value = args
+
+            def fake_handle_directory(path, **kwargs):
+                out = kwargs["output_stream"]
+                # quiet + clipboard → output_stream is a buffer (StringIO)
+                self.assertIsInstance(out, io.StringIO)
+                out.write("SILENT CONTENT\n")
+
+            mock_handle_directory.side_effect = fake_handle_directory
+
+            buf = io.StringIO()
+            # stdout should stay empty
+            with redirect_stdout(buf):
+                arc.main()
+
+        stdout_value = buf.getvalue()
+        self.assertEqual(stdout_value, "")
+
+        mock_run.assert_called_once()
+        called_args, called_kwargs = mock_run.call_args
+        self.assertEqual(called_args[0], ["xclip", "-selection", "clipboard"])
+        self.assertIn("SILENT CONTENT", called_kwargs.get("input", ""))
+
+    @patch("arc.DirectoryHandler.handle_directory")
+    @patch("arc.parse_arguments")
+    def test_main_quiet_without_clipboard_uses_nullwriter(
+        self, mock_parse_arguments, mock_handle_directory
+    ):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            args = self._make_args(path=tmpdir, clipboard=False, quiet=True)
+            mock_parse_arguments.return_value = args
+
+            def fake_handle_directory(path, **kwargs):
+                out = kwargs["output_stream"]
+                # quiet without clipboard → internal NullWriter class
+                self.assertEqual(out.__class__.__name__, "NullWriter")
+                # writing should not raise
+                out.write("SHOULD NOT APPEAR ANYWHERE\n")
+
+            mock_handle_directory.side_effect = fake_handle_directory
+
+            buf = io.StringIO()
+            with redirect_stdout(buf):
+                arc.main()
+
+        # Nothing should be printed to stdout
+        self.assertEqual(buf.getvalue(), "")
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/unit/test_tee.py
+++ b/tests/unit/test_tee.py
@@ -0,0 +1,54 @@
+# tests/unit/test_tee.py
+import io
+import os
+import sys
+import unittest
+
+# Ensure src/ is on sys.path when running via discover
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
+if SRC_ROOT not in sys.path:
+    sys.path.insert(0, SRC_ROOT)
+
+from arc.tee import Tee  # noqa: E402
+
+
+class TestTee(unittest.TestCase):
+    def test_write_writes_to_all_streams(self):
+        buf1 = io.StringIO()
+        buf2 = io.StringIO()
+
+        tee = Tee(buf1, buf2)
+        tee.write("hello")
+        tee.write(" world")
+
+        self.assertEqual(buf1.getvalue(), "hello world")
+        self.assertEqual(buf2.getvalue(), "hello world")
+
+    def test_flush_flushes_all_streams(self):
+        class DummyStream:
+            def __init__(self):
+                self.flushed = False
+                self.data = ""
+
+            def write(self, s):
+                self.data += s
+
+            def flush(self):
+                self.flushed = True
+
+        s1 = DummyStream()
+        s2 = DummyStream()
+
+        tee = Tee(s1, s2)
+        tee.write("x")
+        tee.flush()
+
+        self.assertTrue(s1.flushed)
+        self.assertTrue(s2.flushed)
+        self.assertEqual(s1.data, "x")
+        self.assertEqual(s2.data, "x")
+
+
+if __name__ == "__main__":
+    unittest.main()
Author	SHA1	Message	Date
Kevin Veen-Birkenbach	6a28c7940d	Release version 0.1.0	2025-12-10 09:51:53 +01:00
Kevin Veen-Birkenbach	039481d3a9	This commit introduces a complete structural and architectural refactor of Analysis-Ready Code (ARC). The project is now fully migrated to a modern src/-based Python package layout, with proper packaging via pyproject.toml, a clean Nix flake, and improved CLI entry points. Major changes: • Add `src/arc/` package with clean module structure: - arc/__init__.py now contains the main() dispatcher and clipboard helpers - arc/__main__.py provides a proper `python -m arc` entry point - arc/cli.py rewritten with full argparse-based interface - arc/code_processor.py modernized and relocated - arc/directory_handler.py rewritten with output_stream support - arc/tee.py added for multi-stream output (stdout + buffer) • Remove legacy top-level modules: - cli.py - directory_handler.py - main.py • Introduce fully PEP-517 compliant pyproject.toml with console script: - arc = arc.__main__:main • Add Nix flake (`flake.nix`) providing: - buildPythonApplication package `arc` - `nix run .#arc` app - development shell with Python + xclip • Add Makefile overhaul: - automatic detection of Nix vs Python installation - unified install/uninstall targets - Nix wrapper installation into ~/.local/bin - improved help text and shell safety • Add GitHub CI pipelines: - ci-python.yml for Python builds + Makefile tests + arc --help - ci-nix.yml for Nix builds, flake checks, dev-shell tests, and `nix run .#arc` • Refactor and extend unit tests: - test_arc.py updated for src/ imports - new tests: test_cli.py, test_main.py, test_tee.py - improved CodeProcessor and DirectoryHandler tests • Add egg-info metadata for local builds • Add build/lib/ tree for compatibility with setuptools (generated) Overall, this commit modernizes ARC into a clean, robust, and fully packaged Python/Nix hybrid tool, enabling reproducible builds, solid CLI behavior, testable architecture, and CI automation. https://chatgpt.com/share/693933a0-e280-800f-9cf0-26036d15be04	2025-12-10 09:47:19 +01:00
Kevin Veen-Birkenbach	b55576beb2	Refactor CodeProcessor to use safe state-machine and tokenize-based stripping, add Jinja {# #} support, and introduce unit tests with Makefile targets - Added LanguageSpec dataclass and mapping for extensions - Implemented state-machine for C/CPP/JS comment stripping (handles strings correctly) - Improved Python comment/docstring removal using tokenize - Added regex-based stripping for hash (#) and Jinja {# #} comments - Added Makefile with test and install targets - Added unit test suite under tests/unit covering Python, C-style, hash, and Jinja cases - Added compress/decompress roundtrip test - Added directory handler tests See: https://chatgpt.com/share/68e0250f-40d4-800f-911d-2b4700246574	2025-10-03 21:34:02 +02:00
Kevin Veen-Birkenbach	c5938cf482	Optimized parameter	2025-07-09 16:59:18 +02:00
Kevin Veen-Birkenbach	847b40e9e6	Added binary ignoration	2025-04-15 22:11:01 +02:00
Kevin Veen-Birkenbach	69477fa29e	Optimized gitignore function	2025-04-15 22:02:59 +02:00
Kevin Veen-Birkenbach	ab62b4d1b9	Renamed .gitignore	2025-04-15 21:54:22 +02:00
Kevin Veen-Birkenbach	485f068fa5	Ignored __pycache__	2025-04-15 21:53:48 +02:00
Kevin Veen-Birkenbach	bf2f548a1f	Refactored	2025-04-15 21:52:42 +02:00
Kevin Veen-Birkenbach	11b325ee25	Added automatic ignore option for .gitignore	2025-04-15 21:47:41 +02:00
Kevin Veen-Birkenbach	4953993321	Added Funding	2025-03-12 20:47:42 +01:00
Kevin Veen-Birkenbach	49601176e0	Merge branch 'main' of github.com:kevinveenbirkenbach/analysis-ready-code	2025-03-12 11:14:39 +01:00
Kevin Veen-Birkenbach	b822435762	renamed main.py	2025-03-12 10:49:36 +01:00
Kevin Veen-Birkenbach	843b16563e	renamed main.py	2025-03-12 10:38:59 +01:00
Kevin Veen-Birkenbach	9de33b67db	renamed to main.py to make it installable	2025-03-06 10:10:09 +01:00
Kevin Veen-Birkenbach	bef5f392d7	Update README.md	2025-03-04 19:13:35 +01:00
Kevin Veen-Birkenbach	efcfc585d7	Added instructions for path independend execution	2024-12-01 14:37:43 +01:00
Kevin Veen-Birkenbach	095701e304	Added logic to remove bash and shell comments	2024-07-21 09:22:53 +02:00
Kevin Veen-Birkenbach	c950a42ca9	Updated README.md	2024-06-18 19:05:46 +02:00
Kevin Veen-Birkenbach	68aa8628e1	Solved bvug	2023-12-18 14:08:23 +01:00