This commit introduces a complete structural and architectural refactor of

Analysis-Ready Code (ARC). The project is now fully migrated to a modern
src/-based Python package layout, with proper packaging via pyproject.toml,
a clean Nix flake, and improved CLI entry points.

Major changes:

• Add `src/arc/` package with clean module structure:
  - arc/__init__.py now contains the main() dispatcher and clipboard helpers
  - arc/__main__.py provides a proper `python -m arc` entry point
  - arc/cli.py rewritten with full argparse-based interface
  - arc/code_processor.py modernized and relocated
  - arc/directory_handler.py rewritten with output_stream support
  - arc/tee.py added for multi-stream output (stdout + buffer)

• Remove legacy top-level modules:
  - cli.py
  - directory_handler.py
  - main.py

• Introduce fully PEP-517 compliant pyproject.toml with console script:
  - arc = arc.__main__:main

• Add Nix flake (`flake.nix`) providing:
  - buildPythonApplication package `arc`
  - `nix run .#arc` app
  - development shell with Python + xclip

• Add Makefile overhaul:
  - automatic detection of Nix vs Python installation
  - unified install/uninstall targets
  - Nix wrapper installation into ~/.local/bin
  - improved help text and shell safety

• Add GitHub CI pipelines:
  - ci-python.yml for Python builds + Makefile tests + arc --help
  - ci-nix.yml for Nix builds, flake checks, dev-shell tests, and `nix run .#arc`

• Refactor and extend unit tests:
  - test_arc.py updated for src/ imports
  - new tests: test_cli.py, test_main.py, test_tee.py
  - improved CodeProcessor and DirectoryHandler tests

• Add egg-info metadata for local builds

• Add build/lib/ tree for compatibility with setuptools (generated)

Overall, this commit modernizes ARC into a clean, robust, and fully packaged
Python/Nix hybrid tool, enabling reproducible builds, solid CLI behavior,
testable architecture, and CI automation.

https://chatgpt.com/share/693933a0-e280-800f-9cf0-26036d15be04
This commit is contained in:
2025-12-10 09:47:19 +01:00
parent b55576beb2
commit 039481d3a9
19 changed files with 965 additions and 186 deletions

38
.github/workflows/ci-nix.yml vendored Normal file
View File

@@ -0,0 +1,38 @@
name: Nix CI
on:
push:
branches:
- main
- master
pull_request:
jobs:
nix-tests:
name: Nix Build & Tests
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install Nix
uses: cachix/install-nix-action@v27
with:
extra_nix_config: |
experimental-features = nix-command flakes
- name: Build ARC with Nix
run: nix build .#arc
- name: Run flake checks
run: nix flake check --show-trace
- name: Run Makefile tests inside Nix environment
run: |
nix develop -c bash -c "make test"
# Changed step:
- name: Run arc --help via Nix app
run: |
nix run .#arc -- --help

40
.github/workflows/ci-python.yml vendored Normal file
View File

@@ -0,0 +1,40 @@
name: CI
on:
push:
branches:
- main
- master
pull_request:
jobs:
python-tests:
name: Run make test (Python)
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12"]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Install project (normal install)
run: |
python -m pip install .
- name: Run tests via Makefile
run: make test
- name: Run arc --help
run: arc --help

2
.gitignore vendored
View File

@@ -1 +1,3 @@
*__pycache__*
build/
src/analysis_ready_code.egg-info/

129
Makefile
View File

@@ -1,17 +1,128 @@
# Makefile for ARC
.PHONY: test install help
SHELL := /usr/bin/env bash
APP_NAME := arc
BIN_DIR ?= $(HOME)/.local/bin
# Flake attribute for the ARC app
NIX_ATTR := .#arc
.PHONY: help test install uninstall detect-nix \
install-with-nix install-with-python install-nix install-python \
uninstall-nix-wrapper uninstall-python
help:
@echo "Targets:"
@echo " make test - Run unit tests"
@echo " make install - Show how to install via Kevin's Package Manager"
@echo " make test - Run unit tests"
@echo " make install - Install ARC using Nix if available (and usable),"
@echo " otherwise fall back to Python."
@echo " make uninstall - Uninstall ARC (Nix wrapper + Python package)"
@echo " make install-nix - Force Nix installation (no fallback)"
@echo " make install-python - Force Python installation"
@echo " make uninstall-nix-wrapper - Remove only the arc binary/symlink from BIN_DIR"
@echo " make uninstall-python - Remove the Python package 'arc'"
test:
@python -m unittest discover -s tests -p "test_*.py" -t .
install:
@echo "ARC is distributed via Kevin's Package Manager."
@echo "Install it with:"
@echo " package-manager install arc"
@echo ""
@echo "(This 'make install' does not perform any other actions.)"
# -------------------------------------------------------------------
# Smart installation selector
# -------------------------------------------------------------------
install: detect-nix
detect-nix:
@if command -v nix >/dev/null 2>&1; then \
echo "Nix detected → trying Nix-based installation…"; \
if $(MAKE) install-with-nix; then \
echo "Nix installation succeeded."; \
else \
echo "Nix installation failed → falling back to Python…"; \
$(MAKE) install-with-python; \
fi; \
else \
echo "Nix NOT found → installing via Python…"; \
$(MAKE) install-with-python; \
fi
# Convenience aliases, if you want to force one path:
install-nix:
$(MAKE) install-with-nix
install-python:
$(MAKE) install-with-python
# -------------------------------------------------------------------
# Nix installation (flakes + nix-command enabled via flags)
# -------------------------------------------------------------------
install-with-nix:
@echo "Building ARC using Nix ($(NIX_ATTR))..."
nix --extra-experimental-features 'nix-command flakes' build $(NIX_ATTR)
@echo "Installing into $(BIN_DIR)..."
mkdir -p "$(BIN_DIR)"
ln -sf "$(PWD)/result/bin/$(APP_NAME)" "$(BIN_DIR)/$(APP_NAME)"
@echo "Done (Nix). Run: $(APP_NAME) --help"
# -------------------------------------------------------------------
# Python installation (fallback if Nix is unavailable or unusable)
# - In a virtualenv: install into the venv (no --user).
# - Outside a virtualenv: install with --user.
# -------------------------------------------------------------------
install-with-python:
@echo "Installing ARC via Python…"
@if [ -n "$$VIRTUAL_ENV" ]; then \
echo "Virtualenv detected at $$VIRTUAL_ENV → installing into venv (no --user)…"; \
python -m pip install --upgrade .; \
else \
echo "No virtualenv detected → installing with --user…"; \
python -m pip install --user --upgrade .; \
fi
@echo "Ensuring $(BIN_DIR) exists..."
mkdir -p "$(BIN_DIR)"
@echo "Checking for arc binary in $(BIN_DIR)"
@if [ ! -f "$(BIN_DIR)/$(APP_NAME)" ] && [ ! -L "$(BIN_DIR)/$(APP_NAME)" ]; then \
echo "arc executable not found in $(BIN_DIR), creating wrapper…"; \
echo '#!/usr/bin/env bash' > "$(BIN_DIR)/$(APP_NAME)"; \
echo 'python -m arc "$$@"' >> "$(BIN_DIR)/$(APP_NAME)"; \
chmod +x "$(BIN_DIR)/$(APP_NAME)"; \
else \
echo "arc already present in $(BIN_DIR), not touching it."; \
fi
@echo "Done (Python). Make sure $(BIN_DIR) is in your PATH."
# -------------------------------------------------------------------
# High-level uninstall target (calls Nix + Python uninstall helpers)
# -------------------------------------------------------------------
uninstall: uninstall-nix-wrapper uninstall-python
@echo "=== Uninstall finished ==="
# -------------------------------------------------------------------
# Nix side: remove wrapper/binary from BIN_DIR
# -------------------------------------------------------------------
uninstall-nix-wrapper:
@echo "Removing '$(APP_NAME)' from $(BIN_DIR)..."
@if [ -L "$(BIN_DIR)/$(APP_NAME)" ] || [ -f "$(BIN_DIR)/$(APP_NAME)" ]; then \
rm -f "$(BIN_DIR)/$(APP_NAME)"; \
echo "✔ Removed $(BIN_DIR)/$(APP_NAME)"; \
else \
echo "⚠ No '$(APP_NAME)' binary found in $(BIN_DIR)."; \
fi
# -------------------------------------------------------------------
# Python side: uninstall the arc package
# - In a virtualenv: uninstall from venv.
# - Outside a virtualenv: uninstall from user/system environment.
# -------------------------------------------------------------------
uninstall-python:
@echo "Checking for Python installation of 'arc'…"
@if python -c "import arc" >/dev/null 2>&1; then \
echo "Python package 'arc' detected → uninstalling…"; \
if [ -n "$$VIRTUAL_ENV" ]; then \
echo "Virtualenv detected ($$VIRTUAL_ENV) → uninstalling inside venv…"; \
python -m pip uninstall -y arc; \
else \
echo "No virtualenv detected → uninstalling from user/system environment…"; \
python -m pip uninstall -y arc; \
fi; \
echo "✔ Python uninstall complete."; \
else \
echo "⚠ Python module 'arc' not installed. Skipping Python uninstall."; \
fi

71
cli.py
View File

@@ -1,71 +0,0 @@
import argparse
def parse_arguments():
parser = argparse.ArgumentParser(
description="Scan directories and print/compile file contents."
)
parser.add_argument(
"paths",
nargs='+',
help="List of files or directories to scan."
)
parser.add_argument(
"-t", "--file-types",
nargs='+',
default=[],
help="Filter by file types (e.g., .txt, .log)."
)
parser.add_argument(
"-x", "--ignore-file-strings",
nargs='+',
default=[],
help="Ignore files and folders containing these strings."
)
parser.add_argument(
"-S", "--show-hidden",
action='store_true',
dest='show_hidden',
default=False,
help="Include hidden directories and files in the scan."
)
parser.add_argument(
"-v", "--verbose",
action='store_true',
help="Enable verbose mode."
)
parser.add_argument(
"-N", "--no-comments",
action='store_true',
help="Remove comments from the displayed content based on file type."
)
parser.add_argument(
"-z", "--compress",
action='store_true',
help="Compress code (for supported file types)."
)
parser.add_argument(
"-p", "--path-contains",
nargs='+',
default=[],
help="Display files whose paths contain one of these strings."
)
parser.add_argument(
"-C", "--content-contains",
nargs='+',
default=[],
help="Display files containing one of these strings in their content."
)
parser.add_argument(
"-G", "--no-gitignore",
action='store_true',
help="Do not respect .gitignore files during scan."
)
parser.add_argument(
"-b", "--scan-binary-files",
action='store_true',
help="Scan binary files as well (by default these are ignored)."
)
# Convert show_hidden to ignore_hidden for downstream use
args = parser.parse_args()
args.ignore_hidden = not args.show_hidden
return args

81
flake.nix Normal file
View File

@@ -0,0 +1,81 @@
{
description = "Analysis-Ready Code (ARC) - recursively scan directories and prepare code for automated analysis.";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.05";
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, flake-utils }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = import nixpkgs {
inherit system;
};
python = pkgs.python3;
# Main ARC package built from pyproject.toml
arcPkg = pkgs.python3Packages.buildPythonApplication {
pname = "analysis-ready-code";
version = "0.1.0";
src = ./.;
# We are using pyproject.toml with a PEP 517 backend.
format = "pyproject";
nativeBuildInputs = with pkgs.python3Packages; [
setuptools
wheel
];
# xclip is not a Python lib, but we can still add it as a runtime
# dependency so that `xclip` is available in PATH when running ARC
# inside a Nix environment.
propagatedBuildInputs = with pkgs; [
xclip
];
meta = {
description = "Utility that scans directories and prepares code for AI/computer analysis by stripping comments, filtering files, and optionally compressing content.";
homepage = "https://github.com/kevinveenbirkenbach/analysis-ready-code";
license = pkgs.lib.licenses.agpl3Plus;
platforms = pkgs.lib.platforms.unix;
};
};
in {
# Default package for `nix build .` and `nix build .#arc`
packages.arc = arcPkg;
packages.default = arcPkg;
# App for `nix run .#arc`
apps.arc = {
type = "app";
program = "${arcPkg}/bin/arc";
};
# Default app for `nix run .`
apps.default = self.apps.${system}.arc;
# Dev shell for local development
devShells.default = pkgs.mkShell {
name = "arc-dev-shell";
buildInputs = with pkgs; [
python3
python3Packages.pip
python3Packages.setuptools
python3Packages.wheel
xclip
];
shellHook = ''
echo "ARC dev shell ready. Typical usage:"
echo " make test"
echo " arc . -x"
'';
};
}
);
}

49
main.py
View File

@@ -1,49 +0,0 @@
#!/usr/bin/env python3
import os
import sys
from cli import parse_arguments
from directory_handler import DirectoryHandler
def main():
args = parse_arguments()
for path in args.paths:
if os.path.isdir(path):
DirectoryHandler.handle_directory(
path,
file_types=args.file_types,
ignore_file_strings=args.ignore_file_strings,
ignore_hidden=args.ignore_hidden,
verbose=args.verbose,
no_comments=args.no_comments,
compress=args.compress,
path_contains=args.path_contains,
content_contains=args.content_contains,
no_gitignore=args.no_gitignore,
scan_binary_files=args.scan_binary_files
)
elif os.path.isfile(path):
if DirectoryHandler.should_print_file(
path,
file_types=args.file_types,
ignore_file_strings=args.ignore_file_strings,
ignore_hidden=args.ignore_hidden,
path_contains=args.path_contains,
content_contains=args.content_contains,
scan_binary_files=args.scan_binary_files
):
DirectoryHandler.handle_file(
path,
file_types=args.file_types,
ignore_file_strings=args.ignore_file_strings,
ignore_hidden=args.ignore_hidden,
no_comments=args.no_comments,
compress=args.compress,
scan_binary_files=args.scan_binary_files
)
else:
print(f"Error: {path} is neither a valid file nor a directory.")
sys.exit(1)
if __name__ == "__main__":
main()

52
pyproject.toml Normal file
View File

@@ -0,0 +1,52 @@
[build-system]
requires = ["setuptools>=61", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "analysis-ready-code"
version = "0.1.0"
description = "A utility that recursively scans directories and transforms source code into an analysis-ready format, removing comments and optionally compressing content."
readme = "README.md"
license = { text = "AGPL-3.0" }
authors = [
{ name = "Kevin Veen-Birkenbach", email = "kevin@veen.world" }
]
requires-python = ">=3.8"
dependencies = [
# No dependencies needed for ARC
]
# Optional: define console script (if you ever want pip-installable CLI)
# ARC normally uses a symlink, but this keeps it compatible with pip.
[project.scripts]
arc = "arc.__main__:main"
[project.urls]
Homepage = "https://github.com/kevinveenbirkenbach/analysis-ready-code"
Source = "https://github.com/kevinveenbirkenbach/analysis-ready-code"
Issues = "https://github.com/kevinveenbirkenbach/analysis-ready-code/issues"
[tool.setuptools]
package-dir = {"" = "src"}
[tool.setuptools.packages.find]
where = ["src"]
include = ["arc*"]
[tool.setuptools.package-data]
# if you need non-Python files inside packages
arc = ["py.typed"]
[tool.coverage.run]
branch = true
source = ["src/arc"]
[tool.coverage.report]
show_missing = true
[tool.mypy]
python_version = "3.10"
warn_unused_configs = true
ignore_missing_imports = true

0
src/__init__.py Normal file
View File

111
src/arc/__init__.py Normal file
View File

@@ -0,0 +1,111 @@
import io
import os
import subprocess
import sys
from .cli import parse_arguments
from .directory_handler import DirectoryHandler
from .tee import Tee
import shutil
import subprocess
def copy_to_clipboard(text: str, quiet: bool = False):
if shutil.which("xclip"):
subprocess.run(["xclip", "-selection", "clipboard"], input=text, text=True)
return
if shutil.which("wl-copy"):
subprocess.run(["wl-copy"], input=text, text=True)
return
if shutil.which("pbcopy"):
subprocess.run(["pbcopy"], input=text, text=True)
return
if not quiet:
print("Warning: No clipboard tool found (xclip, wl-copy, pbcopy)", file=sys.stderr)
def main() -> None:
args = parse_arguments()
# QUIET MODE:
# - no terminal output
# - but clipboard buffer still active
#
# Normal:
# - output goes to stdout
# - optionally tee into buffer
buffer = None
if args.clipboard:
buffer = io.StringIO()
if args.quiet:
# quiet + clipboard → only buffer, no stdout
output_stream = buffer
else:
# normal + clipboard → stdout + buffer
output_stream = Tee(sys.stdout, buffer)
else:
# no clipboard
if args.quiet:
# quiet without clipboard → suppress ALL output
class NullWriter:
def write(self, *_): pass
def flush(self): pass
output_stream = NullWriter()
else:
output_stream = sys.stdout
# Process all paths
for path in args.paths:
if os.path.isdir(path):
DirectoryHandler.handle_directory(
path,
file_types=args.file_types,
ignore_file_strings=args.ignore_file_strings,
ignore_hidden=args.ignore_hidden,
verbose=args.verbose and not args.quiet,
no_comments=args.no_comments,
compress=args.compress,
path_contains=args.path_contains,
content_contains=args.content_contains,
no_gitignore=args.no_gitignore,
scan_binary_files=args.scan_binary_files,
output_stream=output_stream,
)
elif os.path.isfile(path):
if DirectoryHandler.should_print_file(
path,
file_types=args.file_types,
ignore_file_strings=args.ignore_file_strings,
ignore_hidden=args.ignore_hidden,
path_contains=args.path_contains,
content_contains=args.content_contains,
scan_binary_files=args.scan_binary_files,
):
DirectoryHandler.handle_file(
path,
file_types=args.file_types,
ignore_file_strings=args.ignore_file_strings,
ignore_hidden=args.ignore_hidden,
no_comments=args.no_comments,
compress=args.compress,
scan_binary_files=args.scan_binary_files,
output_stream=output_stream,
)
else:
if not args.quiet:
print(f"Error: {path} is neither file nor directory.", file=sys.stderr)
sys.exit(1)
# Copy to clipboard if enabled
if buffer is not None:
text = buffer.getvalue()
try:
subprocess.run(["xclip", "-selection", "clipboard"], input=text, text=True, check=False)
except FileNotFoundError:
if not args.quiet:
print("Warning: xclip not found.", file=sys.stderr)

18
src/arc/__main__.py Normal file
View File

@@ -0,0 +1,18 @@
# src/arc/__main__.py
from . import main as _arc_main
def main() -> None:
"""
Entry point for the `arc` console script and `python -m arc`.
This keeps all CLI logic in `arc.__init__.py` (main()) and simply
delegates to it, so both setuptools/entry_points and Nix wrappers
can reliably import `arc.__main__:main`.
"""
_arc_main()
if __name__ == "__main__":
main()

120
src/arc/cli.py Normal file
View File

@@ -0,0 +1,120 @@
import argparse
def parse_arguments():
parser = argparse.ArgumentParser(
description="Scan directories and print/compile file contents."
)
# Positional: paths
parser.add_argument(
"paths",
nargs="+",
help="List of files or directories to scan.",
)
# File type filter
parser.add_argument(
"-t",
"--file-types",
nargs="+",
default=[],
help="Filter by file types (e.g., .py, .js, .c).",
)
# Ignore file/path strings (was previously -x, jetzt -I)
parser.add_argument(
"-I",
"--ignore-file-strings",
nargs="+",
default=[],
help="Ignore files and folders containing these strings.",
)
# Clipboard: alias -x
parser.add_argument(
"-x",
"--clipboard",
action="store_true",
help="Copy the output to the X clipboard via xclip (alias: -x).",
)
# Quiet mode
parser.add_argument(
"-q",
"--quiet",
action="store_true",
help="Suppress terminal output (useful with --clipboard).",
)
# Show hidden files
parser.add_argument(
"-S",
"--show-hidden",
action="store_true",
dest="show_hidden",
default=False,
help="Include hidden directories and files.",
)
# Verbose
parser.add_argument(
"-v",
"--verbose",
action="store_true",
help="Enable verbose mode.",
)
# Strip comments
parser.add_argument(
"-N",
"--no-comments",
action="store_true",
help="Remove comments from files before printing.",
)
# Compress
parser.add_argument(
"-z",
"--compress",
action="store_true",
help="Compress content instead of printing plain text.",
)
# Path filter
parser.add_argument(
"-p",
"--path-contains",
nargs="+",
default=[],
help="Only include files whose *path* contains one of these strings.",
)
# Content filter
parser.add_argument(
"-C",
"--content-contains",
nargs="+",
default=[],
help="Only include files whose *content* contains one of these strings.",
)
# Ignore .gitignore
parser.add_argument(
"-G",
"--no-gitignore",
action="store_true",
help="Do not respect .gitignore files during scan.",
)
# Scan binary files
parser.add_argument(
"-b",
"--scan-binary-files",
action="store_true",
help="Also scan binary files (ignored by default).",
)
args = parser.parse_args()
args.ignore_hidden = not args.show_hidden
return args

View File

@@ -1,9 +1,9 @@
import io
import re
import tokenize
import zlib
from dataclasses import dataclass
from typing import Dict, Tuple, Pattern, Optional
import io
import tokenize
@dataclass(frozen=True)

View File

@@ -1,6 +1,9 @@
import os
import fnmatch
from code_processor import CodeProcessor
import os
import sys
from .code_processor import CodeProcessor
class DirectoryHandler:
@staticmethod
@@ -13,17 +16,21 @@ class DirectoryHandler:
"""
gitignore_data = []
for dirpath, _, filenames in os.walk(root_path):
if '.gitignore' in filenames:
gitignore_path = os.path.join(dirpath, '.gitignore')
if ".gitignore" in filenames:
gitignore_path = os.path.join(dirpath, ".gitignore")
try:
with open(gitignore_path, 'r') as f:
with open(gitignore_path, "r") as f:
lines = f.readlines()
# Filter out empty lines and comments.
patterns = [line.strip() for line in lines if line.strip() and not line.strip().startswith('#')]
patterns = [
line.strip()
for line in lines
if line.strip() and not line.strip().startswith("#")
]
# Save the base directory and its patterns.
gitignore_data.append((dirpath, patterns))
except Exception as e:
print(f"Error reading {gitignore_path}: {e}")
except Exception as e: # pragma: no cover - defensive
print(f"Error reading {gitignore_path}: {e}", file=sys.stderr)
return gitignore_data
@staticmethod
@@ -34,10 +41,10 @@ class DirectoryHandler:
is found or if more than 30% of the bytes in the sample are non-text.
"""
try:
with open(file_path, 'rb') as f:
with open(file_path, "rb") as f:
chunk = f.read(1024)
# If there's a null byte, it's almost certainly binary.
if b'\x00' in chunk:
if b"\x00" in chunk:
return True
# Define a set of text characters (ASCII printable + common control characters)
@@ -46,7 +53,7 @@ class DirectoryHandler:
non_text = sum(byte not in text_chars for byte in chunk)
if len(chunk) > 0 and (non_text / len(chunk)) > 0.30:
return True
except Exception:
except Exception: # pragma: no cover - defensive
# If the file cannot be read in binary mode, assume it's not binary.
return False
return False
@@ -65,15 +72,15 @@ class DirectoryHandler:
# file_path and base_dir are on different drives.
continue
# If the file is not under the current .gitignore base_dir, skip it.
if rel_path.startswith('..'):
if rel_path.startswith(".."):
continue
# Check all patterns.
for pattern in patterns:
if pattern.endswith('/'):
if pattern.endswith("/"):
# Directory pattern: check if any folder in the relative path matches.
parts = rel_path.split(os.sep)
for part in parts[:-1]:
if fnmatch.fnmatch(part + '/', pattern):
if fnmatch.fnmatch(part + "/", pattern):
return True
else:
# Check if the relative path matches the pattern.
@@ -87,7 +94,7 @@ class DirectoryHandler:
Filter out directories based on ignore_file_strings and hidden status.
"""
if ignore_hidden:
dirs[:] = [d for d in dirs if not d.startswith('.')]
dirs[:] = [d for d in dirs if not d.startswith(".")]
dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_file_strings)]
@staticmethod
@@ -100,7 +107,7 @@ class DirectoryHandler:
if content_contains:
try:
with open(file_path, 'r') as f:
with open(file_path, "r") as f:
content = f.read()
if any(whitelist_str in content for whitelist_str in content_contains):
return True
@@ -109,7 +116,15 @@ class DirectoryHandler:
return False
@staticmethod
def should_print_file(file_path, file_types, ignore_file_strings, ignore_hidden, path_contains, content_contains, scan_binary_files=False):
def should_print_file(
file_path,
file_types,
ignore_file_strings,
ignore_hidden,
path_contains,
content_contains,
scan_binary_files=False,
):
"""
Determines if a file should be printed based on various criteria.
By default, binary files are skipped unless scan_binary_files is True.
@@ -118,7 +133,7 @@ class DirectoryHandler:
if not scan_binary_files and DirectoryHandler.is_binary_file(file_path):
return False
if ignore_hidden and os.path.basename(file_path).startswith('.'):
if ignore_hidden and os.path.basename(file_path).startswith("."):
return False
if file_types and not any(file_path.endswith(ft) for ft in file_types):
@@ -128,31 +143,36 @@ class DirectoryHandler:
return False
if path_contains or content_contains:
return DirectoryHandler.path_or_content_contains(file_path, path_contains, content_contains)
return DirectoryHandler.path_or_content_contains(
file_path, path_contains, content_contains
)
return True
@staticmethod
def print_file_content(file_path, no_comments, compress):
def print_file_content(file_path, no_comments, compress, output_stream):
"""
Prints the content of a file, optionally removing comments or compressing the output.
"""
try:
with open(file_path, 'r') as f:
with open(file_path, "r") as f:
content = f.read()
if no_comments:
file_type = os.path.splitext(file_path)[1]
content = CodeProcessor.remove_comments(content, file_type)
print(f"<< START: {file_path} >>")
print(f"<< START: {file_path} >>", file=output_stream)
if compress:
compressed_content = CodeProcessor.compress(content)
print("COMPRESSED CODE:")
print(compressed_content)
print("COMPRESSED CODE:", file=output_stream)
print(compressed_content, file=output_stream)
else:
print(content)
print("<< END >>\n")
print(content, file=output_stream)
print("<< END >>\n", file=output_stream)
except UnicodeDecodeError:
print(f"Warning: Could not read file due to encoding issues: {file_path}")
exit(1)
print(
f"Warning: Could not read file due to encoding issues: {file_path}",
file=sys.stderr,
)
sys.exit(1)
@staticmethod
def handle_directory(directory, **kwargs):
@@ -160,34 +180,49 @@ class DirectoryHandler:
Scans the directory and processes each file while respecting .gitignore rules.
"""
gitignore_data = []
if not kwargs.get('no_gitignore'):
if not kwargs.get("no_gitignore"):
gitignore_data = DirectoryHandler.load_gitignore_patterns(directory)
output_stream = kwargs.get("output_stream", sys.stdout)
for root, dirs, files in os.walk(directory):
DirectoryHandler.filter_directories(dirs, kwargs['ignore_file_strings'], kwargs['ignore_hidden'])
DirectoryHandler.filter_directories(
dirs, kwargs["ignore_file_strings"], kwargs["ignore_hidden"]
)
for file in files:
file_path = os.path.join(root, file)
if gitignore_data and DirectoryHandler.is_gitignored(file_path, gitignore_data):
if kwargs.get('verbose'):
print(f"Skipped (gitignored): {file_path}")
if kwargs.get("verbose"):
print(f"Skipped (gitignored): {file_path}", file=output_stream)
continue
if DirectoryHandler.should_print_file(
file_path,
kwargs['file_types'],
kwargs['ignore_file_strings'],
kwargs['ignore_hidden'],
kwargs['path_contains'],
kwargs['content_contains'],
scan_binary_files=kwargs.get('scan_binary_files', False)
kwargs["file_types"],
kwargs["ignore_file_strings"],
kwargs["ignore_hidden"],
kwargs["path_contains"],
kwargs["content_contains"],
scan_binary_files=kwargs.get("scan_binary_files", False),
):
DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress'])
elif kwargs.get('verbose'):
print(f"Skipped file: {file_path}")
DirectoryHandler.print_file_content(
file_path,
kwargs["no_comments"],
kwargs["compress"],
output_stream=output_stream,
)
elif kwargs.get("verbose"):
print(f"Skipped file: {file_path}", file=output_stream)
@staticmethod
def handle_file(file_path, **kwargs):
"""
Processes an individual file.
"""
DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress'])
output_stream = kwargs.get("output_stream", sys.stdout)
DirectoryHandler.print_file_content(
file_path,
kwargs["no_comments"],
kwargs["compress"],
output_stream=output_stream,
)

23
src/arc/tee.py Normal file
View File

@@ -0,0 +1,23 @@
from typing import TextIO
class Tee:
"""
Simple tee-like stream that writes everything to multiple underlying streams.
Typical usage:
tee = Tee(sys.stdout, buffer)
print("hello", file=tee)
"""
def __init__(self, *streams: TextIO) -> None:
self.streams = streams
def write(self, data: str) -> None:
for stream in self.streams:
stream.write(data)
def flush(self) -> None:
for stream in self.streams:
if hasattr(stream, "flush"):
stream.flush()

View File

@@ -6,13 +6,14 @@ import tempfile
import unittest
from contextlib import redirect_stdout
# Ensure project root is on sys.path when running via discover
# Ensure src/ is on sys.path when running via discover
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
if PROJECT_ROOT not in sys.path:
sys.path.insert(0, PROJECT_ROOT)
SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
if SRC_ROOT not in sys.path:
sys.path.insert(0, SRC_ROOT)
from code_processor import CodeProcessor
from directory_handler import DirectoryHandler
from arc.code_processor import CodeProcessor
from arc.directory_handler import DirectoryHandler
class TestCodeProcessor(unittest.TestCase):
@@ -35,7 +36,7 @@ def f():
self.assertNotIn("# a comment", out)
# tolerate whitespace normalization from tokenize.untokenize
self.assertRegex(out, r'y\s*=\s*"string with # not a comment"')
self.assertIn('triple quoted but not a docstring', out)
self.assertIn("triple quoted but not a docstring", out)
def test_cstyle_comment_stripping(self):
src = '''\
@@ -170,8 +171,12 @@ class TestDirectoryHandler(unittest.TestCase):
with open(p, "w") as f:
f.write("# comment only\nx=1\n")
buf = io.StringIO()
with redirect_stdout(buf):
DirectoryHandler.print_file_content(p, no_comments=True, compress=False)
DirectoryHandler.print_file_content(
p,
no_comments=True,
compress=False,
output_stream=buf,
)
out = buf.getvalue()
self.assertIn("<< START:", out)
# be whitespace-tolerant (tokenize may insert spaces)
@@ -179,8 +184,12 @@ class TestDirectoryHandler(unittest.TestCase):
self.assertNotIn("# comment only", out)
buf = io.StringIO()
with redirect_stdout(buf):
DirectoryHandler.print_file_content(p, no_comments=True, compress=True)
DirectoryHandler.print_file_content(
p,
no_comments=True,
compress=True,
output_stream=buf,
)
out = buf.getvalue()
self.assertIn("COMPRESSED CODE:", out)
self.assertIn("<< END >>", out)

60
tests/unit/test_cli.py Normal file
View File

@@ -0,0 +1,60 @@
# tests/unit/test_cli.py
import os
import sys
import unittest
from unittest.mock import patch
# Ensure src/ is on sys.path when running via discover
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
if SRC_ROOT not in sys.path:
sys.path.insert(0, SRC_ROOT)
from arc.cli import parse_arguments # noqa: E402
class TestCliParseArguments(unittest.TestCase):
def test_basic_paths_and_defaults(self):
with patch.object(sys, "argv", ["arc", "foo", "bar"]):
args = parse_arguments()
self.assertEqual(args.paths, ["foo", "bar"])
self.assertEqual(args.file_types, [])
self.assertEqual(args.ignore_file_strings, [])
self.assertFalse(args.clipboard)
self.assertFalse(args.quiet)
# show_hidden default is False → ignore_hidden should be True
self.assertFalse(args.show_hidden)
self.assertTrue(args.ignore_hidden)
def test_clipboard_and_quiet_short_flags(self):
with patch.object(sys, "argv", ["arc", ".", "-x", "-q"]):
args = parse_arguments()
self.assertTrue(args.clipboard)
self.assertTrue(args.quiet)
def test_ignore_file_strings_short_and_long(self):
# Test only the short form -I collecting multiple values
with patch.object(
sys,
"argv",
["arc", ".", "-I", "build", "dist", "node_modules"],
):
args = parse_arguments()
self.assertEqual(
args.ignore_file_strings,
["build", "dist", "node_modules"],
)
def test_show_hidden_switches_ignore_hidden_off(self):
with patch.object(sys, "argv", ["arc", ".", "--show-hidden"]):
args = parse_arguments()
self.assertTrue(args.show_hidden)
self.assertFalse(args.ignore_hidden)
if __name__ == "__main__":
unittest.main()

145
tests/unit/test_main.py Normal file
View File

@@ -0,0 +1,145 @@
# tests/unit/test_main.py
import io
import os
import sys
import tempfile
import types
import unittest
from contextlib import redirect_stdout
from unittest.mock import patch
# Ensure src/ is on sys.path when running via discover
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
if SRC_ROOT not in sys.path:
sys.path.insert(0, SRC_ROOT)
import arc # noqa: E402
class TestArcMain(unittest.TestCase):
def _make_args(
self,
path,
clipboard=False,
quiet=False,
file_types=None,
ignore_file_strings=None,
ignore_hidden=True,
verbose=False,
no_comments=False,
compress=False,
path_contains=None,
content_contains=None,
no_gitignore=False,
scan_binary_files=False,
):
return types.SimpleNamespace(
paths=[path],
clipboard=clipboard,
quiet=quiet,
file_types=file_types or [],
ignore_file_strings=ignore_file_strings or [],
ignore_hidden=ignore_hidden,
show_hidden=not ignore_hidden,
verbose=verbose,
no_comments=no_comments,
compress=compress,
path_contains=path_contains or [],
content_contains=content_contains or [],
no_gitignore=no_gitignore,
scan_binary_files=scan_binary_files,
)
@patch("arc.subprocess.run")
@patch("arc.DirectoryHandler.handle_directory")
@patch("arc.parse_arguments")
def test_main_clipboard_calls_xclip_and_uses_tee(
self, mock_parse_arguments, mock_handle_directory, mock_run
):
# create a temporary directory as scan target
with tempfile.TemporaryDirectory() as tmpdir:
args = self._make_args(path=tmpdir, clipboard=True, quiet=False)
mock_parse_arguments.return_value = args
def fake_handle_directory(path, **kwargs):
out = kwargs["output_stream"]
# should be a Tee instance
self.assertEqual(out.__class__.__name__, "Tee")
out.write("FROM ARC\n")
mock_handle_directory.side_effect = fake_handle_directory
buf = io.StringIO()
with redirect_stdout(buf):
arc.main()
# stdout should contain the text once (via Tee -> sys.stdout)
stdout_value = buf.getvalue()
self.assertIn("FROM ARC", stdout_value)
# xclip should have been called with the same text in input
mock_run.assert_called_once()
called_args, called_kwargs = mock_run.call_args
self.assertEqual(called_args[0], ["xclip", "-selection", "clipboard"])
self.assertIn("FROM ARC", called_kwargs.get("input", ""))
@patch("arc.subprocess.run")
@patch("arc.DirectoryHandler.handle_directory")
@patch("arc.parse_arguments")
def test_main_clipboard_quiet_only_clipboard_no_stdout(
self, mock_parse_arguments, mock_handle_directory, mock_run
):
with tempfile.TemporaryDirectory() as tmpdir:
args = self._make_args(path=tmpdir, clipboard=True, quiet=True)
mock_parse_arguments.return_value = args
def fake_handle_directory(path, **kwargs):
out = kwargs["output_stream"]
# quiet + clipboard → output_stream is a buffer (StringIO)
self.assertIsInstance(out, io.StringIO)
out.write("SILENT CONTENT\n")
mock_handle_directory.side_effect = fake_handle_directory
buf = io.StringIO()
# stdout should stay empty
with redirect_stdout(buf):
arc.main()
stdout_value = buf.getvalue()
self.assertEqual(stdout_value, "")
mock_run.assert_called_once()
called_args, called_kwargs = mock_run.call_args
self.assertEqual(called_args[0], ["xclip", "-selection", "clipboard"])
self.assertIn("SILENT CONTENT", called_kwargs.get("input", ""))
@patch("arc.DirectoryHandler.handle_directory")
@patch("arc.parse_arguments")
def test_main_quiet_without_clipboard_uses_nullwriter(
self, mock_parse_arguments, mock_handle_directory
):
with tempfile.TemporaryDirectory() as tmpdir:
args = self._make_args(path=tmpdir, clipboard=False, quiet=True)
mock_parse_arguments.return_value = args
def fake_handle_directory(path, **kwargs):
out = kwargs["output_stream"]
# quiet without clipboard → internal NullWriter class
self.assertEqual(out.__class__.__name__, "NullWriter")
# writing should not raise
out.write("SHOULD NOT APPEAR ANYWHERE\n")
mock_handle_directory.side_effect = fake_handle_directory
buf = io.StringIO()
with redirect_stdout(buf):
arc.main()
# Nothing should be printed to stdout
self.assertEqual(buf.getvalue(), "")
if __name__ == "__main__":
unittest.main()

54
tests/unit/test_tee.py Normal file
View File

@@ -0,0 +1,54 @@
# tests/unit/test_tee.py
import io
import os
import sys
import unittest
# Ensure src/ is on sys.path when running via discover
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
if SRC_ROOT not in sys.path:
sys.path.insert(0, SRC_ROOT)
from arc.tee import Tee # noqa: E402
class TestTee(unittest.TestCase):
def test_write_writes_to_all_streams(self):
buf1 = io.StringIO()
buf2 = io.StringIO()
tee = Tee(buf1, buf2)
tee.write("hello")
tee.write(" world")
self.assertEqual(buf1.getvalue(), "hello world")
self.assertEqual(buf2.getvalue(), "hello world")
def test_flush_flushes_all_streams(self):
class DummyStream:
def __init__(self):
self.flushed = False
self.data = ""
def write(self, s):
self.data += s
def flush(self):
self.flushed = True
s1 = DummyStream()
s2 = DummyStream()
tee = Tee(s1, s2)
tee.write("x")
tee.flush()
self.assertTrue(s1.flushed)
self.assertTrue(s2.flushed)
self.assertEqual(s1.data, "x")
self.assertEqual(s2.data, "x")
if __name__ == "__main__":
unittest.main()