This commit introduces a complete structural and architectural refactor of

Analysis-Ready Code (ARC). The project is now fully migrated to a modern
src/-based Python package layout, with proper packaging via pyproject.toml,
a clean Nix flake, and improved CLI entry points.

Major changes:

• Add `src/arc/` package with clean module structure:
  - arc/__init__.py now contains the main() dispatcher and clipboard helpers
  - arc/__main__.py provides a proper `python -m arc` entry point
  - arc/cli.py rewritten with full argparse-based interface
  - arc/code_processor.py modernized and relocated
  - arc/directory_handler.py rewritten with output_stream support
  - arc/tee.py added for multi-stream output (stdout + buffer)

• Remove legacy top-level modules:
  - cli.py
  - directory_handler.py
  - main.py

• Introduce fully PEP-517 compliant pyproject.toml with console script:
  - arc = arc.__main__:main

• Add Nix flake (`flake.nix`) providing:
  - buildPythonApplication package `arc`
  - `nix run .#arc` app
  - development shell with Python + xclip

• Add Makefile overhaul:
  - automatic detection of Nix vs Python installation
  - unified install/uninstall targets
  - Nix wrapper installation into ~/.local/bin
  - improved help text and shell safety

• Add GitHub CI pipelines:
  - ci-python.yml for Python builds + Makefile tests + arc --help
  - ci-nix.yml for Nix builds, flake checks, dev-shell tests, and `nix run .#arc`

• Refactor and extend unit tests:
  - test_arc.py updated for src/ imports
  - new tests: test_cli.py, test_main.py, test_tee.py
  - improved CodeProcessor and DirectoryHandler tests

• Add egg-info metadata for local builds

• Add build/lib/ tree for compatibility with setuptools (generated)

Overall, this commit modernizes ARC into a clean, robust, and fully packaged
Python/Nix hybrid tool, enabling reproducible builds, solid CLI behavior,
testable architecture, and CI automation.

https://chatgpt.com/share/693933a0-e280-800f-9cf0-26036d15be04
This commit is contained in:
2025-12-10 09:47:19 +01:00
parent b55576beb2
commit 039481d3a9
19 changed files with 965 additions and 186 deletions

38
.github/workflows/ci-nix.yml vendored Normal file
View File

@@ -0,0 +1,38 @@
name: Nix CI
on:
push:
branches:
- main
- master
pull_request:
jobs:
nix-tests:
name: Nix Build & Tests
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install Nix
uses: cachix/install-nix-action@v27
with:
extra_nix_config: |
experimental-features = nix-command flakes
- name: Build ARC with Nix
run: nix build .#arc
- name: Run flake checks
run: nix flake check --show-trace
- name: Run Makefile tests inside Nix environment
run: |
nix develop -c bash -c "make test"
# Changed step:
- name: Run arc --help via Nix app
run: |
nix run .#arc -- --help

40
.github/workflows/ci-python.yml vendored Normal file
View File

@@ -0,0 +1,40 @@
name: CI
on:
push:
branches:
- main
- master
pull_request:
jobs:
python-tests:
name: Run make test (Python)
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12"]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Install project (normal install)
run: |
python -m pip install .
- name: Run tests via Makefile
run: make test
- name: Run arc --help
run: arc --help

4
.gitignore vendored
View File

@@ -1 +1,3 @@
*__pycache__* *__pycache__*
build/
src/analysis_ready_code.egg-info/

129
Makefile
View File

@@ -1,17 +1,128 @@
# Makefile for ARC # Makefile for ARC
.PHONY: test install help SHELL := /usr/bin/env bash
APP_NAME := arc
BIN_DIR ?= $(HOME)/.local/bin
# Flake attribute for the ARC app
NIX_ATTR := .#arc
.PHONY: help test install uninstall detect-nix \
install-with-nix install-with-python install-nix install-python \
uninstall-nix-wrapper uninstall-python
help: help:
@echo "Targets:" @echo "Targets:"
@echo " make test - Run unit tests" @echo " make test - Run unit tests"
@echo " make install - Show how to install via Kevin's Package Manager" @echo " make install - Install ARC using Nix if available (and usable),"
@echo " otherwise fall back to Python."
@echo " make uninstall - Uninstall ARC (Nix wrapper + Python package)"
@echo " make install-nix - Force Nix installation (no fallback)"
@echo " make install-python - Force Python installation"
@echo " make uninstall-nix-wrapper - Remove only the arc binary/symlink from BIN_DIR"
@echo " make uninstall-python - Remove the Python package 'arc'"
test: test:
@python -m unittest discover -s tests -p "test_*.py" -t . @python -m unittest discover -s tests -p "test_*.py" -t .
install: # -------------------------------------------------------------------
@echo "ARC is distributed via Kevin's Package Manager." # Smart installation selector
@echo "Install it with:" # -------------------------------------------------------------------
@echo " package-manager install arc" install: detect-nix
@echo ""
@echo "(This 'make install' does not perform any other actions.)" detect-nix:
@if command -v nix >/dev/null 2>&1; then \
echo "Nix detected → trying Nix-based installation…"; \
if $(MAKE) install-with-nix; then \
echo "Nix installation succeeded."; \
else \
echo "Nix installation failed → falling back to Python…"; \
$(MAKE) install-with-python; \
fi; \
else \
echo "Nix NOT found → installing via Python…"; \
$(MAKE) install-with-python; \
fi
# Convenience aliases, if you want to force one path:
install-nix:
$(MAKE) install-with-nix
install-python:
$(MAKE) install-with-python
# -------------------------------------------------------------------
# Nix installation (flakes + nix-command enabled via flags)
# -------------------------------------------------------------------
install-with-nix:
@echo "Building ARC using Nix ($(NIX_ATTR))..."
nix --extra-experimental-features 'nix-command flakes' build $(NIX_ATTR)
@echo "Installing into $(BIN_DIR)..."
mkdir -p "$(BIN_DIR)"
ln -sf "$(PWD)/result/bin/$(APP_NAME)" "$(BIN_DIR)/$(APP_NAME)"
@echo "Done (Nix). Run: $(APP_NAME) --help"
# -------------------------------------------------------------------
# Python installation (fallback if Nix is unavailable or unusable)
# - In a virtualenv: install into the venv (no --user).
# - Outside a virtualenv: install with --user.
# -------------------------------------------------------------------
install-with-python:
@echo "Installing ARC via Python…"
@if [ -n "$$VIRTUAL_ENV" ]; then \
echo "Virtualenv detected at $$VIRTUAL_ENV → installing into venv (no --user)…"; \
python -m pip install --upgrade .; \
else \
echo "No virtualenv detected → installing with --user…"; \
python -m pip install --user --upgrade .; \
fi
@echo "Ensuring $(BIN_DIR) exists..."
mkdir -p "$(BIN_DIR)"
@echo "Checking for arc binary in $(BIN_DIR)"
@if [ ! -f "$(BIN_DIR)/$(APP_NAME)" ] && [ ! -L "$(BIN_DIR)/$(APP_NAME)" ]; then \
echo "arc executable not found in $(BIN_DIR), creating wrapper…"; \
echo '#!/usr/bin/env bash' > "$(BIN_DIR)/$(APP_NAME)"; \
echo 'python -m arc "$$@"' >> "$(BIN_DIR)/$(APP_NAME)"; \
chmod +x "$(BIN_DIR)/$(APP_NAME)"; \
else \
echo "arc already present in $(BIN_DIR), not touching it."; \
fi
@echo "Done (Python). Make sure $(BIN_DIR) is in your PATH."
# -------------------------------------------------------------------
# High-level uninstall target (calls Nix + Python uninstall helpers)
# -------------------------------------------------------------------
uninstall: uninstall-nix-wrapper uninstall-python
@echo "=== Uninstall finished ==="
# -------------------------------------------------------------------
# Nix side: remove wrapper/binary from BIN_DIR
# -------------------------------------------------------------------
uninstall-nix-wrapper:
@echo "Removing '$(APP_NAME)' from $(BIN_DIR)..."
@if [ -L "$(BIN_DIR)/$(APP_NAME)" ] || [ -f "$(BIN_DIR)/$(APP_NAME)" ]; then \
rm -f "$(BIN_DIR)/$(APP_NAME)"; \
echo "✔ Removed $(BIN_DIR)/$(APP_NAME)"; \
else \
echo "⚠ No '$(APP_NAME)' binary found in $(BIN_DIR)."; \
fi
# -------------------------------------------------------------------
# Python side: uninstall the arc package
# - In a virtualenv: uninstall from venv.
# - Outside a virtualenv: uninstall from user/system environment.
# -------------------------------------------------------------------
uninstall-python:
@echo "Checking for Python installation of 'arc'…"
@if python -c "import arc" >/dev/null 2>&1; then \
echo "Python package 'arc' detected → uninstalling…"; \
if [ -n "$$VIRTUAL_ENV" ]; then \
echo "Virtualenv detected ($$VIRTUAL_ENV) → uninstalling inside venv…"; \
python -m pip uninstall -y arc; \
else \
echo "No virtualenv detected → uninstalling from user/system environment…"; \
python -m pip uninstall -y arc; \
fi; \
echo "✔ Python uninstall complete."; \
else \
echo "⚠ Python module 'arc' not installed. Skipping Python uninstall."; \
fi

71
cli.py
View File

@@ -1,71 +0,0 @@
import argparse
def parse_arguments():
parser = argparse.ArgumentParser(
description="Scan directories and print/compile file contents."
)
parser.add_argument(
"paths",
nargs='+',
help="List of files or directories to scan."
)
parser.add_argument(
"-t", "--file-types",
nargs='+',
default=[],
help="Filter by file types (e.g., .txt, .log)."
)
parser.add_argument(
"-x", "--ignore-file-strings",
nargs='+',
default=[],
help="Ignore files and folders containing these strings."
)
parser.add_argument(
"-S", "--show-hidden",
action='store_true',
dest='show_hidden',
default=False,
help="Include hidden directories and files in the scan."
)
parser.add_argument(
"-v", "--verbose",
action='store_true',
help="Enable verbose mode."
)
parser.add_argument(
"-N", "--no-comments",
action='store_true',
help="Remove comments from the displayed content based on file type."
)
parser.add_argument(
"-z", "--compress",
action='store_true',
help="Compress code (for supported file types)."
)
parser.add_argument(
"-p", "--path-contains",
nargs='+',
default=[],
help="Display files whose paths contain one of these strings."
)
parser.add_argument(
"-C", "--content-contains",
nargs='+',
default=[],
help="Display files containing one of these strings in their content."
)
parser.add_argument(
"-G", "--no-gitignore",
action='store_true',
help="Do not respect .gitignore files during scan."
)
parser.add_argument(
"-b", "--scan-binary-files",
action='store_true',
help="Scan binary files as well (by default these are ignored)."
)
# Convert show_hidden to ignore_hidden for downstream use
args = parser.parse_args()
args.ignore_hidden = not args.show_hidden
return args

81
flake.nix Normal file
View File

@@ -0,0 +1,81 @@
{
description = "Analysis-Ready Code (ARC) - recursively scan directories and prepare code for automated analysis.";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.05";
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, flake-utils }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = import nixpkgs {
inherit system;
};
python = pkgs.python3;
# Main ARC package built from pyproject.toml
arcPkg = pkgs.python3Packages.buildPythonApplication {
pname = "analysis-ready-code";
version = "0.1.0";
src = ./.;
# We are using pyproject.toml with a PEP 517 backend.
format = "pyproject";
nativeBuildInputs = with pkgs.python3Packages; [
setuptools
wheel
];
# xclip is not a Python lib, but we can still add it as a runtime
# dependency so that `xclip` is available in PATH when running ARC
# inside a Nix environment.
propagatedBuildInputs = with pkgs; [
xclip
];
meta = {
description = "Utility that scans directories and prepares code for AI/computer analysis by stripping comments, filtering files, and optionally compressing content.";
homepage = "https://github.com/kevinveenbirkenbach/analysis-ready-code";
license = pkgs.lib.licenses.agpl3Plus;
platforms = pkgs.lib.platforms.unix;
};
};
in {
# Default package for `nix build .` and `nix build .#arc`
packages.arc = arcPkg;
packages.default = arcPkg;
# App for `nix run .#arc`
apps.arc = {
type = "app";
program = "${arcPkg}/bin/arc";
};
# Default app for `nix run .`
apps.default = self.apps.${system}.arc;
# Dev shell for local development
devShells.default = pkgs.mkShell {
name = "arc-dev-shell";
buildInputs = with pkgs; [
python3
python3Packages.pip
python3Packages.setuptools
python3Packages.wheel
xclip
];
shellHook = ''
echo "ARC dev shell ready. Typical usage:"
echo " make test"
echo " arc . -x"
'';
};
}
);
}

49
main.py
View File

@@ -1,49 +0,0 @@
#!/usr/bin/env python3
import os
import sys
from cli import parse_arguments
from directory_handler import DirectoryHandler
def main():
args = parse_arguments()
for path in args.paths:
if os.path.isdir(path):
DirectoryHandler.handle_directory(
path,
file_types=args.file_types,
ignore_file_strings=args.ignore_file_strings,
ignore_hidden=args.ignore_hidden,
verbose=args.verbose,
no_comments=args.no_comments,
compress=args.compress,
path_contains=args.path_contains,
content_contains=args.content_contains,
no_gitignore=args.no_gitignore,
scan_binary_files=args.scan_binary_files
)
elif os.path.isfile(path):
if DirectoryHandler.should_print_file(
path,
file_types=args.file_types,
ignore_file_strings=args.ignore_file_strings,
ignore_hidden=args.ignore_hidden,
path_contains=args.path_contains,
content_contains=args.content_contains,
scan_binary_files=args.scan_binary_files
):
DirectoryHandler.handle_file(
path,
file_types=args.file_types,
ignore_file_strings=args.ignore_file_strings,
ignore_hidden=args.ignore_hidden,
no_comments=args.no_comments,
compress=args.compress,
scan_binary_files=args.scan_binary_files
)
else:
print(f"Error: {path} is neither a valid file nor a directory.")
sys.exit(1)
if __name__ == "__main__":
main()

52
pyproject.toml Normal file
View File

@@ -0,0 +1,52 @@
[build-system]
requires = ["setuptools>=61", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "analysis-ready-code"
version = "0.1.0"
description = "A utility that recursively scans directories and transforms source code into an analysis-ready format, removing comments and optionally compressing content."
readme = "README.md"
license = { text = "AGPL-3.0" }
authors = [
{ name = "Kevin Veen-Birkenbach", email = "kevin@veen.world" }
]
requires-python = ">=3.8"
dependencies = [
# No dependencies needed for ARC
]
# Optional: define console script (if you ever want pip-installable CLI)
# ARC normally uses a symlink, but this keeps it compatible with pip.
[project.scripts]
arc = "arc.__main__:main"
[project.urls]
Homepage = "https://github.com/kevinveenbirkenbach/analysis-ready-code"
Source = "https://github.com/kevinveenbirkenbach/analysis-ready-code"
Issues = "https://github.com/kevinveenbirkenbach/analysis-ready-code/issues"
[tool.setuptools]
package-dir = {"" = "src"}
[tool.setuptools.packages.find]
where = ["src"]
include = ["arc*"]
[tool.setuptools.package-data]
# if you need non-Python files inside packages
arc = ["py.typed"]
[tool.coverage.run]
branch = true
source = ["src/arc"]
[tool.coverage.report]
show_missing = true
[tool.mypy]
python_version = "3.10"
warn_unused_configs = true
ignore_missing_imports = true

0
src/__init__.py Normal file
View File

111
src/arc/__init__.py Normal file
View File

@@ -0,0 +1,111 @@
import io
import os
import subprocess
import sys
from .cli import parse_arguments
from .directory_handler import DirectoryHandler
from .tee import Tee
import shutil
import subprocess
def copy_to_clipboard(text: str, quiet: bool = False):
if shutil.which("xclip"):
subprocess.run(["xclip", "-selection", "clipboard"], input=text, text=True)
return
if shutil.which("wl-copy"):
subprocess.run(["wl-copy"], input=text, text=True)
return
if shutil.which("pbcopy"):
subprocess.run(["pbcopy"], input=text, text=True)
return
if not quiet:
print("Warning: No clipboard tool found (xclip, wl-copy, pbcopy)", file=sys.stderr)
def main() -> None:
args = parse_arguments()
# QUIET MODE:
# - no terminal output
# - but clipboard buffer still active
#
# Normal:
# - output goes to stdout
# - optionally tee into buffer
buffer = None
if args.clipboard:
buffer = io.StringIO()
if args.quiet:
# quiet + clipboard → only buffer, no stdout
output_stream = buffer
else:
# normal + clipboard → stdout + buffer
output_stream = Tee(sys.stdout, buffer)
else:
# no clipboard
if args.quiet:
# quiet without clipboard → suppress ALL output
class NullWriter:
def write(self, *_): pass
def flush(self): pass
output_stream = NullWriter()
else:
output_stream = sys.stdout
# Process all paths
for path in args.paths:
if os.path.isdir(path):
DirectoryHandler.handle_directory(
path,
file_types=args.file_types,
ignore_file_strings=args.ignore_file_strings,
ignore_hidden=args.ignore_hidden,
verbose=args.verbose and not args.quiet,
no_comments=args.no_comments,
compress=args.compress,
path_contains=args.path_contains,
content_contains=args.content_contains,
no_gitignore=args.no_gitignore,
scan_binary_files=args.scan_binary_files,
output_stream=output_stream,
)
elif os.path.isfile(path):
if DirectoryHandler.should_print_file(
path,
file_types=args.file_types,
ignore_file_strings=args.ignore_file_strings,
ignore_hidden=args.ignore_hidden,
path_contains=args.path_contains,
content_contains=args.content_contains,
scan_binary_files=args.scan_binary_files,
):
DirectoryHandler.handle_file(
path,
file_types=args.file_types,
ignore_file_strings=args.ignore_file_strings,
ignore_hidden=args.ignore_hidden,
no_comments=args.no_comments,
compress=args.compress,
scan_binary_files=args.scan_binary_files,
output_stream=output_stream,
)
else:
if not args.quiet:
print(f"Error: {path} is neither file nor directory.", file=sys.stderr)
sys.exit(1)
# Copy to clipboard if enabled
if buffer is not None:
text = buffer.getvalue()
try:
subprocess.run(["xclip", "-selection", "clipboard"], input=text, text=True, check=False)
except FileNotFoundError:
if not args.quiet:
print("Warning: xclip not found.", file=sys.stderr)

18
src/arc/__main__.py Normal file
View File

@@ -0,0 +1,18 @@
# src/arc/__main__.py
from . import main as _arc_main
def main() -> None:
"""
Entry point for the `arc` console script and `python -m arc`.
This keeps all CLI logic in `arc.__init__.py` (main()) and simply
delegates to it, so both setuptools/entry_points and Nix wrappers
can reliably import `arc.__main__:main`.
"""
_arc_main()
if __name__ == "__main__":
main()

120
src/arc/cli.py Normal file
View File

@@ -0,0 +1,120 @@
import argparse
def parse_arguments():
parser = argparse.ArgumentParser(
description="Scan directories and print/compile file contents."
)
# Positional: paths
parser.add_argument(
"paths",
nargs="+",
help="List of files or directories to scan.",
)
# File type filter
parser.add_argument(
"-t",
"--file-types",
nargs="+",
default=[],
help="Filter by file types (e.g., .py, .js, .c).",
)
# Ignore file/path strings (was previously -x, jetzt -I)
parser.add_argument(
"-I",
"--ignore-file-strings",
nargs="+",
default=[],
help="Ignore files and folders containing these strings.",
)
# Clipboard: alias -x
parser.add_argument(
"-x",
"--clipboard",
action="store_true",
help="Copy the output to the X clipboard via xclip (alias: -x).",
)
# Quiet mode
parser.add_argument(
"-q",
"--quiet",
action="store_true",
help="Suppress terminal output (useful with --clipboard).",
)
# Show hidden files
parser.add_argument(
"-S",
"--show-hidden",
action="store_true",
dest="show_hidden",
default=False,
help="Include hidden directories and files.",
)
# Verbose
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Enable verbose mode.",
)
# Strip comments
parser.add_argument(
"-N",
"--no-comments",
action="store_true",
help="Remove comments from files before printing.",
)
# Compress
parser.add_argument(
"-z",
"--compress",
action="store_true",
help="Compress content instead of printing plain text.",
)
# Path filter
parser.add_argument(
"-p",
"--path-contains",
nargs="+",
default=[],
help="Only include files whose *path* contains one of these strings.",
)
# Content filter
parser.add_argument(
"-C",
"--content-contains",
nargs="+",
default=[],
help="Only include files whose *content* contains one of these strings.",
)
# Ignore .gitignore
parser.add_argument(
"-G",
"--no-gitignore",
action="store_true",
help="Do not respect .gitignore files during scan.",
)
# Scan binary files
parser.add_argument(
"-b",
"--scan-binary-files",
action="store_true",
help="Also scan binary files (ignored by default).",
)
args = parser.parse_args()
args.ignore_hidden = not args.show_hidden
return args

View File

@@ -1,9 +1,9 @@
import io
import re import re
import tokenize
import zlib import zlib
from dataclasses import dataclass from dataclasses import dataclass
from typing import Dict, Tuple, Pattern, Optional from typing import Dict, Tuple, Pattern, Optional
import io
import tokenize
@dataclass(frozen=True) @dataclass(frozen=True)

View File

@@ -1,6 +1,9 @@
import os
import fnmatch import fnmatch
from code_processor import CodeProcessor import os
import sys
from .code_processor import CodeProcessor
class DirectoryHandler: class DirectoryHandler:
@staticmethod @staticmethod
@@ -13,19 +16,23 @@ class DirectoryHandler:
""" """
gitignore_data = [] gitignore_data = []
for dirpath, _, filenames in os.walk(root_path): for dirpath, _, filenames in os.walk(root_path):
if '.gitignore' in filenames: if ".gitignore" in filenames:
gitignore_path = os.path.join(dirpath, '.gitignore') gitignore_path = os.path.join(dirpath, ".gitignore")
try: try:
with open(gitignore_path, 'r') as f: with open(gitignore_path, "r") as f:
lines = f.readlines() lines = f.readlines()
# Filter out empty lines and comments. # Filter out empty lines and comments.
patterns = [line.strip() for line in lines if line.strip() and not line.strip().startswith('#')] patterns = [
line.strip()
for line in lines
if line.strip() and not line.strip().startswith("#")
]
# Save the base directory and its patterns. # Save the base directory and its patterns.
gitignore_data.append((dirpath, patterns)) gitignore_data.append((dirpath, patterns))
except Exception as e: except Exception as e: # pragma: no cover - defensive
print(f"Error reading {gitignore_path}: {e}") print(f"Error reading {gitignore_path}: {e}", file=sys.stderr)
return gitignore_data return gitignore_data
@staticmethod @staticmethod
def is_binary_file(file_path): def is_binary_file(file_path):
""" """
@@ -34,19 +41,19 @@ class DirectoryHandler:
is found or if more than 30% of the bytes in the sample are non-text. is found or if more than 30% of the bytes in the sample are non-text.
""" """
try: try:
with open(file_path, 'rb') as f: with open(file_path, "rb") as f:
chunk = f.read(1024) chunk = f.read(1024)
# If there's a null byte, it's almost certainly binary. # If there's a null byte, it's almost certainly binary.
if b'\x00' in chunk: if b"\x00" in chunk:
return True return True
# Define a set of text characters (ASCII printable + common control characters) # Define a set of text characters (ASCII printable + common control characters)
text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x7F))) text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x7F)))
# Count non-text characters in the chunk. # Count non-text characters in the chunk.
non_text = sum(byte not in text_chars for byte in chunk) non_text = sum(byte not in text_chars for byte in chunk)
if len(chunk) > 0 and (non_text / len(chunk)) > 0.30: if len(chunk) > 0 and (non_text / len(chunk)) > 0.30:
return True return True
except Exception: except Exception: # pragma: no cover - defensive
# If the file cannot be read in binary mode, assume it's not binary. # If the file cannot be read in binary mode, assume it's not binary.
return False return False
return False return False
@@ -65,15 +72,15 @@ class DirectoryHandler:
# file_path and base_dir are on different drives. # file_path and base_dir are on different drives.
continue continue
# If the file is not under the current .gitignore base_dir, skip it. # If the file is not under the current .gitignore base_dir, skip it.
if rel_path.startswith('..'): if rel_path.startswith(".."):
continue continue
# Check all patterns. # Check all patterns.
for pattern in patterns: for pattern in patterns:
if pattern.endswith('/'): if pattern.endswith("/"):
# Directory pattern: check if any folder in the relative path matches. # Directory pattern: check if any folder in the relative path matches.
parts = rel_path.split(os.sep) parts = rel_path.split(os.sep)
for part in parts[:-1]: for part in parts[:-1]:
if fnmatch.fnmatch(part + '/', pattern): if fnmatch.fnmatch(part + "/", pattern):
return True return True
else: else:
# Check if the relative path matches the pattern. # Check if the relative path matches the pattern.
@@ -87,7 +94,7 @@ class DirectoryHandler:
Filter out directories based on ignore_file_strings and hidden status. Filter out directories based on ignore_file_strings and hidden status.
""" """
if ignore_hidden: if ignore_hidden:
dirs[:] = [d for d in dirs if not d.startswith('.')] dirs[:] = [d for d in dirs if not d.startswith(".")]
dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_file_strings)] dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_file_strings)]
@staticmethod @staticmethod
@@ -100,7 +107,7 @@ class DirectoryHandler:
if content_contains: if content_contains:
try: try:
with open(file_path, 'r') as f: with open(file_path, "r") as f:
content = f.read() content = f.read()
if any(whitelist_str in content for whitelist_str in content_contains): if any(whitelist_str in content for whitelist_str in content_contains):
return True return True
@@ -109,7 +116,15 @@ class DirectoryHandler:
return False return False
@staticmethod @staticmethod
def should_print_file(file_path, file_types, ignore_file_strings, ignore_hidden, path_contains, content_contains, scan_binary_files=False): def should_print_file(
file_path,
file_types,
ignore_file_strings,
ignore_hidden,
path_contains,
content_contains,
scan_binary_files=False,
):
""" """
Determines if a file should be printed based on various criteria. Determines if a file should be printed based on various criteria.
By default, binary files are skipped unless scan_binary_files is True. By default, binary files are skipped unless scan_binary_files is True.
@@ -118,7 +133,7 @@ class DirectoryHandler:
if not scan_binary_files and DirectoryHandler.is_binary_file(file_path): if not scan_binary_files and DirectoryHandler.is_binary_file(file_path):
return False return False
if ignore_hidden and os.path.basename(file_path).startswith('.'): if ignore_hidden and os.path.basename(file_path).startswith("."):
return False return False
if file_types and not any(file_path.endswith(ft) for ft in file_types): if file_types and not any(file_path.endswith(ft) for ft in file_types):
@@ -128,31 +143,36 @@ class DirectoryHandler:
return False return False
if path_contains or content_contains: if path_contains or content_contains:
return DirectoryHandler.path_or_content_contains(file_path, path_contains, content_contains) return DirectoryHandler.path_or_content_contains(
file_path, path_contains, content_contains
)
return True return True
@staticmethod @staticmethod
def print_file_content(file_path, no_comments, compress): def print_file_content(file_path, no_comments, compress, output_stream):
""" """
Prints the content of a file, optionally removing comments or compressing the output. Prints the content of a file, optionally removing comments or compressing the output.
""" """
try: try:
with open(file_path, 'r') as f: with open(file_path, "r") as f:
content = f.read() content = f.read()
if no_comments: if no_comments:
file_type = os.path.splitext(file_path)[1] file_type = os.path.splitext(file_path)[1]
content = CodeProcessor.remove_comments(content, file_type) content = CodeProcessor.remove_comments(content, file_type)
print(f"<< START: {file_path} >>") print(f"<< START: {file_path} >>", file=output_stream)
if compress: if compress:
compressed_content = CodeProcessor.compress(content) compressed_content = CodeProcessor.compress(content)
print("COMPRESSED CODE:") print("COMPRESSED CODE:", file=output_stream)
print(compressed_content) print(compressed_content, file=output_stream)
else: else:
print(content) print(content, file=output_stream)
print("<< END >>\n") print("<< END >>\n", file=output_stream)
except UnicodeDecodeError: except UnicodeDecodeError:
print(f"Warning: Could not read file due to encoding issues: {file_path}") print(
exit(1) f"Warning: Could not read file due to encoding issues: {file_path}",
file=sys.stderr,
)
sys.exit(1)
@staticmethod @staticmethod
def handle_directory(directory, **kwargs): def handle_directory(directory, **kwargs):
@@ -160,34 +180,49 @@ class DirectoryHandler:
Scans the directory and processes each file while respecting .gitignore rules. Scans the directory and processes each file while respecting .gitignore rules.
""" """
gitignore_data = [] gitignore_data = []
if not kwargs.get('no_gitignore'): if not kwargs.get("no_gitignore"):
gitignore_data = DirectoryHandler.load_gitignore_patterns(directory) gitignore_data = DirectoryHandler.load_gitignore_patterns(directory)
output_stream = kwargs.get("output_stream", sys.stdout)
for root, dirs, files in os.walk(directory): for root, dirs, files in os.walk(directory):
DirectoryHandler.filter_directories(dirs, kwargs['ignore_file_strings'], kwargs['ignore_hidden']) DirectoryHandler.filter_directories(
dirs, kwargs["ignore_file_strings"], kwargs["ignore_hidden"]
)
for file in files: for file in files:
file_path = os.path.join(root, file) file_path = os.path.join(root, file)
if gitignore_data and DirectoryHandler.is_gitignored(file_path, gitignore_data): if gitignore_data and DirectoryHandler.is_gitignored(file_path, gitignore_data):
if kwargs.get('verbose'): if kwargs.get("verbose"):
print(f"Skipped (gitignored): {file_path}") print(f"Skipped (gitignored): {file_path}", file=output_stream)
continue continue
if DirectoryHandler.should_print_file( if DirectoryHandler.should_print_file(
file_path, file_path,
kwargs['file_types'], kwargs["file_types"],
kwargs['ignore_file_strings'], kwargs["ignore_file_strings"],
kwargs['ignore_hidden'], kwargs["ignore_hidden"],
kwargs['path_contains'], kwargs["path_contains"],
kwargs['content_contains'], kwargs["content_contains"],
scan_binary_files=kwargs.get('scan_binary_files', False) scan_binary_files=kwargs.get("scan_binary_files", False),
): ):
DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress']) DirectoryHandler.print_file_content(
elif kwargs.get('verbose'): file_path,
print(f"Skipped file: {file_path}") kwargs["no_comments"],
kwargs["compress"],
output_stream=output_stream,
)
elif kwargs.get("verbose"):
print(f"Skipped file: {file_path}", file=output_stream)
@staticmethod @staticmethod
def handle_file(file_path, **kwargs): def handle_file(file_path, **kwargs):
""" """
Processes an individual file. Processes an individual file.
""" """
DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress']) output_stream = kwargs.get("output_stream", sys.stdout)
DirectoryHandler.print_file_content(
file_path,
kwargs["no_comments"],
kwargs["compress"],
output_stream=output_stream,
)

23
src/arc/tee.py Normal file
View File

@@ -0,0 +1,23 @@
from typing import TextIO
class Tee:
"""
Simple tee-like stream that writes everything to multiple underlying streams.
Typical usage:
tee = Tee(sys.stdout, buffer)
print("hello", file=tee)
"""
def __init__(self, *streams: TextIO) -> None:
self.streams = streams
def write(self, data: str) -> None:
for stream in self.streams:
stream.write(data)
def flush(self) -> None:
for stream in self.streams:
if hasattr(stream, "flush"):
stream.flush()

View File

@@ -6,13 +6,14 @@ import tempfile
import unittest import unittest
from contextlib import redirect_stdout from contextlib import redirect_stdout
# Ensure project root is on sys.path when running via discover # Ensure src/ is on sys.path when running via discover
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
if PROJECT_ROOT not in sys.path: SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
sys.path.insert(0, PROJECT_ROOT) if SRC_ROOT not in sys.path:
sys.path.insert(0, SRC_ROOT)
from code_processor import CodeProcessor from arc.code_processor import CodeProcessor
from directory_handler import DirectoryHandler from arc.directory_handler import DirectoryHandler
class TestCodeProcessor(unittest.TestCase): class TestCodeProcessor(unittest.TestCase):
@@ -35,7 +36,7 @@ def f():
self.assertNotIn("# a comment", out) self.assertNotIn("# a comment", out)
# tolerate whitespace normalization from tokenize.untokenize # tolerate whitespace normalization from tokenize.untokenize
self.assertRegex(out, r'y\s*=\s*"string with # not a comment"') self.assertRegex(out, r'y\s*=\s*"string with # not a comment"')
self.assertIn('triple quoted but not a docstring', out) self.assertIn("triple quoted but not a docstring", out)
def test_cstyle_comment_stripping(self): def test_cstyle_comment_stripping(self):
src = '''\ src = '''\
@@ -170,8 +171,12 @@ class TestDirectoryHandler(unittest.TestCase):
with open(p, "w") as f: with open(p, "w") as f:
f.write("# comment only\nx=1\n") f.write("# comment only\nx=1\n")
buf = io.StringIO() buf = io.StringIO()
with redirect_stdout(buf): DirectoryHandler.print_file_content(
DirectoryHandler.print_file_content(p, no_comments=True, compress=False) p,
no_comments=True,
compress=False,
output_stream=buf,
)
out = buf.getvalue() out = buf.getvalue()
self.assertIn("<< START:", out) self.assertIn("<< START:", out)
# be whitespace-tolerant (tokenize may insert spaces) # be whitespace-tolerant (tokenize may insert spaces)
@@ -179,8 +184,12 @@ class TestDirectoryHandler(unittest.TestCase):
self.assertNotIn("# comment only", out) self.assertNotIn("# comment only", out)
buf = io.StringIO() buf = io.StringIO()
with redirect_stdout(buf): DirectoryHandler.print_file_content(
DirectoryHandler.print_file_content(p, no_comments=True, compress=True) p,
no_comments=True,
compress=True,
output_stream=buf,
)
out = buf.getvalue() out = buf.getvalue()
self.assertIn("COMPRESSED CODE:", out) self.assertIn("COMPRESSED CODE:", out)
self.assertIn("<< END >>", out) self.assertIn("<< END >>", out)

60
tests/unit/test_cli.py Normal file
View File

@@ -0,0 +1,60 @@
# tests/unit/test_cli.py
import os
import sys
import unittest
from unittest.mock import patch
# Ensure src/ is on sys.path when running via discover
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
if SRC_ROOT not in sys.path:
sys.path.insert(0, SRC_ROOT)
from arc.cli import parse_arguments # noqa: E402
class TestCliParseArguments(unittest.TestCase):
def test_basic_paths_and_defaults(self):
with patch.object(sys, "argv", ["arc", "foo", "bar"]):
args = parse_arguments()
self.assertEqual(args.paths, ["foo", "bar"])
self.assertEqual(args.file_types, [])
self.assertEqual(args.ignore_file_strings, [])
self.assertFalse(args.clipboard)
self.assertFalse(args.quiet)
# show_hidden default is False → ignore_hidden should be True
self.assertFalse(args.show_hidden)
self.assertTrue(args.ignore_hidden)
def test_clipboard_and_quiet_short_flags(self):
with patch.object(sys, "argv", ["arc", ".", "-x", "-q"]):
args = parse_arguments()
self.assertTrue(args.clipboard)
self.assertTrue(args.quiet)
def test_ignore_file_strings_short_and_long(self):
# Test only the short form -I collecting multiple values
with patch.object(
sys,
"argv",
["arc", ".", "-I", "build", "dist", "node_modules"],
):
args = parse_arguments()
self.assertEqual(
args.ignore_file_strings,
["build", "dist", "node_modules"],
)
def test_show_hidden_switches_ignore_hidden_off(self):
with patch.object(sys, "argv", ["arc", ".", "--show-hidden"]):
args = parse_arguments()
self.assertTrue(args.show_hidden)
self.assertFalse(args.ignore_hidden)
if __name__ == "__main__":
unittest.main()

145
tests/unit/test_main.py Normal file
View File

@@ -0,0 +1,145 @@
# tests/unit/test_main.py
import io
import os
import sys
import tempfile
import types
import unittest
from contextlib import redirect_stdout
from unittest.mock import patch
# Ensure src/ is on sys.path when running via discover
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
if SRC_ROOT not in sys.path:
sys.path.insert(0, SRC_ROOT)
import arc # noqa: E402
class TestArcMain(unittest.TestCase):
def _make_args(
self,
path,
clipboard=False,
quiet=False,
file_types=None,
ignore_file_strings=None,
ignore_hidden=True,
verbose=False,
no_comments=False,
compress=False,
path_contains=None,
content_contains=None,
no_gitignore=False,
scan_binary_files=False,
):
return types.SimpleNamespace(
paths=[path],
clipboard=clipboard,
quiet=quiet,
file_types=file_types or [],
ignore_file_strings=ignore_file_strings or [],
ignore_hidden=ignore_hidden,
show_hidden=not ignore_hidden,
verbose=verbose,
no_comments=no_comments,
compress=compress,
path_contains=path_contains or [],
content_contains=content_contains or [],
no_gitignore=no_gitignore,
scan_binary_files=scan_binary_files,
)
@patch("arc.subprocess.run")
@patch("arc.DirectoryHandler.handle_directory")
@patch("arc.parse_arguments")
def test_main_clipboard_calls_xclip_and_uses_tee(
self, mock_parse_arguments, mock_handle_directory, mock_run
):
# create a temporary directory as scan target
with tempfile.TemporaryDirectory() as tmpdir:
args = self._make_args(path=tmpdir, clipboard=True, quiet=False)
mock_parse_arguments.return_value = args
def fake_handle_directory(path, **kwargs):
out = kwargs["output_stream"]
# should be a Tee instance
self.assertEqual(out.__class__.__name__, "Tee")
out.write("FROM ARC\n")
mock_handle_directory.side_effect = fake_handle_directory
buf = io.StringIO()
with redirect_stdout(buf):
arc.main()
# stdout should contain the text once (via Tee -> sys.stdout)
stdout_value = buf.getvalue()
self.assertIn("FROM ARC", stdout_value)
# xclip should have been called with the same text in input
mock_run.assert_called_once()
called_args, called_kwargs = mock_run.call_args
self.assertEqual(called_args[0], ["xclip", "-selection", "clipboard"])
self.assertIn("FROM ARC", called_kwargs.get("input", ""))
@patch("arc.subprocess.run")
@patch("arc.DirectoryHandler.handle_directory")
@patch("arc.parse_arguments")
def test_main_clipboard_quiet_only_clipboard_no_stdout(
self, mock_parse_arguments, mock_handle_directory, mock_run
):
with tempfile.TemporaryDirectory() as tmpdir:
args = self._make_args(path=tmpdir, clipboard=True, quiet=True)
mock_parse_arguments.return_value = args
def fake_handle_directory(path, **kwargs):
out = kwargs["output_stream"]
# quiet + clipboard → output_stream is a buffer (StringIO)
self.assertIsInstance(out, io.StringIO)
out.write("SILENT CONTENT\n")
mock_handle_directory.side_effect = fake_handle_directory
buf = io.StringIO()
# stdout should stay empty
with redirect_stdout(buf):
arc.main()
stdout_value = buf.getvalue()
self.assertEqual(stdout_value, "")
mock_run.assert_called_once()
called_args, called_kwargs = mock_run.call_args
self.assertEqual(called_args[0], ["xclip", "-selection", "clipboard"])
self.assertIn("SILENT CONTENT", called_kwargs.get("input", ""))
@patch("arc.DirectoryHandler.handle_directory")
@patch("arc.parse_arguments")
def test_main_quiet_without_clipboard_uses_nullwriter(
self, mock_parse_arguments, mock_handle_directory
):
with tempfile.TemporaryDirectory() as tmpdir:
args = self._make_args(path=tmpdir, clipboard=False, quiet=True)
mock_parse_arguments.return_value = args
def fake_handle_directory(path, **kwargs):
out = kwargs["output_stream"]
# quiet without clipboard → internal NullWriter class
self.assertEqual(out.__class__.__name__, "NullWriter")
# writing should not raise
out.write("SHOULD NOT APPEAR ANYWHERE\n")
mock_handle_directory.side_effect = fake_handle_directory
buf = io.StringIO()
with redirect_stdout(buf):
arc.main()
# Nothing should be printed to stdout
self.assertEqual(buf.getvalue(), "")
if __name__ == "__main__":
unittest.main()

54
tests/unit/test_tee.py Normal file
View File

@@ -0,0 +1,54 @@
# tests/unit/test_tee.py
import io
import os
import sys
import unittest
# Ensure src/ is on sys.path when running via discover
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
if SRC_ROOT not in sys.path:
sys.path.insert(0, SRC_ROOT)
from arc.tee import Tee # noqa: E402
class TestTee(unittest.TestCase):
def test_write_writes_to_all_streams(self):
buf1 = io.StringIO()
buf2 = io.StringIO()
tee = Tee(buf1, buf2)
tee.write("hello")
tee.write(" world")
self.assertEqual(buf1.getvalue(), "hello world")
self.assertEqual(buf2.getvalue(), "hello world")
def test_flush_flushes_all_streams(self):
class DummyStream:
def __init__(self):
self.flushed = False
self.data = ""
def write(self, s):
self.data += s
def flush(self):
self.flushed = True
s1 = DummyStream()
s2 = DummyStream()
tee = Tee(s1, s2)
tee.write("x")
tee.flush()
self.assertTrue(s1.flushed)
self.assertTrue(s2.flushed)
self.assertEqual(s1.data, "x")
self.assertEqual(s2.data, "x")
if __name__ == "__main__":
unittest.main()