diff --git a/.github/workflows/ci-nix.yml b/.github/workflows/ci-nix.yml new file mode 100644 index 0000000..8e92045 --- /dev/null +++ b/.github/workflows/ci-nix.yml @@ -0,0 +1,38 @@ +name: Nix CI + +on: + push: + branches: + - main + - master + pull_request: + +jobs: + nix-tests: + name: Nix Build & Tests + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Install Nix + uses: cachix/install-nix-action@v27 + with: + extra_nix_config: | + experimental-features = nix-command flakes + + - name: Build ARC with Nix + run: nix build .#arc + + - name: Run flake checks + run: nix flake check --show-trace + + - name: Run Makefile tests inside Nix environment + run: | + nix develop -c bash -c "make test" + + # Changed step: + - name: Run arc --help via Nix app + run: | + nix run .#arc -- --help diff --git a/.github/workflows/ci-python.yml b/.github/workflows/ci-python.yml new file mode 100644 index 0000000..076fdf4 --- /dev/null +++ b/.github/workflows/ci-python.yml @@ -0,0 +1,40 @@ +name: CI + +on: + push: + branches: + - main + - master + pull_request: + +jobs: + python-tests: + name: Run make test (Python) + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Upgrade pip + run: python -m pip install --upgrade pip + + - name: Install project (normal install) + run: | + python -m pip install . + + - name: Run tests via Makefile + run: make test + + - name: Run arc --help + run: arc --help diff --git a/.gitignore b/.gitignore index abd32e8..340da67 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -*__pycache__* \ No newline at end of file +*__pycache__* +build/ +src/analysis_ready_code.egg-info/ diff --git a/Makefile b/Makefile index 3e781dd..e2f06a2 100644 --- a/Makefile +++ b/Makefile @@ -1,17 +1,128 @@ # Makefile for ARC -.PHONY: test install help +SHELL := /usr/bin/env bash + +APP_NAME := arc +BIN_DIR ?= $(HOME)/.local/bin +# Flake attribute for the ARC app +NIX_ATTR := .#arc + +.PHONY: help test install uninstall detect-nix \ + install-with-nix install-with-python install-nix install-python \ + uninstall-nix-wrapper uninstall-python help: @echo "Targets:" - @echo " make test - Run unit tests" - @echo " make install - Show how to install via Kevin's Package Manager" + @echo " make test - Run unit tests" + @echo " make install - Install ARC using Nix if available (and usable)," + @echo " otherwise fall back to Python." + @echo " make uninstall - Uninstall ARC (Nix wrapper + Python package)" + @echo " make install-nix - Force Nix installation (no fallback)" + @echo " make install-python - Force Python installation" + @echo " make uninstall-nix-wrapper - Remove only the arc binary/symlink from BIN_DIR" + @echo " make uninstall-python - Remove the Python package 'arc'" test: @python -m unittest discover -s tests -p "test_*.py" -t . -install: - @echo "ARC is distributed via Kevin's Package Manager." - @echo "Install it with:" - @echo " package-manager install arc" - @echo "" - @echo "(This 'make install' does not perform any other actions.)" +# ------------------------------------------------------------------- +# Smart installation selector +# ------------------------------------------------------------------- +install: detect-nix + +detect-nix: + @if command -v nix >/dev/null 2>&1; then \ + echo "Nix detected → trying Nix-based installation…"; \ + if $(MAKE) install-with-nix; then \ + echo "Nix installation succeeded."; \ + else \ + echo "Nix installation failed → falling back to Python…"; \ + $(MAKE) install-with-python; \ + fi; \ + else \ + echo "Nix NOT found → installing via Python…"; \ + $(MAKE) install-with-python; \ + fi + +# Convenience aliases, if you want to force one path: +install-nix: + $(MAKE) install-with-nix + +install-python: + $(MAKE) install-with-python + +# ------------------------------------------------------------------- +# Nix installation (flakes + nix-command enabled via flags) +# ------------------------------------------------------------------- +install-with-nix: + @echo "Building ARC using Nix ($(NIX_ATTR))..." + nix --extra-experimental-features 'nix-command flakes' build $(NIX_ATTR) + @echo "Installing into $(BIN_DIR)..." + mkdir -p "$(BIN_DIR)" + ln -sf "$(PWD)/result/bin/$(APP_NAME)" "$(BIN_DIR)/$(APP_NAME)" + @echo "Done (Nix). Run: $(APP_NAME) --help" + +# ------------------------------------------------------------------- +# Python installation (fallback if Nix is unavailable or unusable) +# - In a virtualenv: install into the venv (no --user). +# - Outside a virtualenv: install with --user. +# ------------------------------------------------------------------- +install-with-python: + @echo "Installing ARC via Python…" + @if [ -n "$$VIRTUAL_ENV" ]; then \ + echo "Virtualenv detected at $$VIRTUAL_ENV → installing into venv (no --user)…"; \ + python -m pip install --upgrade .; \ + else \ + echo "No virtualenv detected → installing with --user…"; \ + python -m pip install --user --upgrade .; \ + fi + @echo "Ensuring $(BIN_DIR) exists..." + mkdir -p "$(BIN_DIR)" + @echo "Checking for arc binary in $(BIN_DIR)…" + @if [ ! -f "$(BIN_DIR)/$(APP_NAME)" ] && [ ! -L "$(BIN_DIR)/$(APP_NAME)" ]; then \ + echo "arc executable not found in $(BIN_DIR), creating wrapper…"; \ + echo '#!/usr/bin/env bash' > "$(BIN_DIR)/$(APP_NAME)"; \ + echo 'python -m arc "$$@"' >> "$(BIN_DIR)/$(APP_NAME)"; \ + chmod +x "$(BIN_DIR)/$(APP_NAME)"; \ + else \ + echo "arc already present in $(BIN_DIR), not touching it."; \ + fi + @echo "Done (Python). Make sure $(BIN_DIR) is in your PATH." + +# ------------------------------------------------------------------- +# High-level uninstall target (calls Nix + Python uninstall helpers) +# ------------------------------------------------------------------- +uninstall: uninstall-nix-wrapper uninstall-python + @echo "=== Uninstall finished ===" + +# ------------------------------------------------------------------- +# Nix side: remove wrapper/binary from BIN_DIR +# ------------------------------------------------------------------- +uninstall-nix-wrapper: + @echo "Removing '$(APP_NAME)' from $(BIN_DIR)..." + @if [ -L "$(BIN_DIR)/$(APP_NAME)" ] || [ -f "$(BIN_DIR)/$(APP_NAME)" ]; then \ + rm -f "$(BIN_DIR)/$(APP_NAME)"; \ + echo "✔ Removed $(BIN_DIR)/$(APP_NAME)"; \ + else \ + echo "⚠ No '$(APP_NAME)' binary found in $(BIN_DIR)."; \ + fi + +# ------------------------------------------------------------------- +# Python side: uninstall the arc package +# - In a virtualenv: uninstall from venv. +# - Outside a virtualenv: uninstall from user/system environment. +# ------------------------------------------------------------------- +uninstall-python: + @echo "Checking for Python installation of 'arc'…" + @if python -c "import arc" >/dev/null 2>&1; then \ + echo "Python package 'arc' detected → uninstalling…"; \ + if [ -n "$$VIRTUAL_ENV" ]; then \ + echo "Virtualenv detected ($$VIRTUAL_ENV) → uninstalling inside venv…"; \ + python -m pip uninstall -y arc; \ + else \ + echo "No virtualenv detected → uninstalling from user/system environment…"; \ + python -m pip uninstall -y arc; \ + fi; \ + echo "✔ Python uninstall complete."; \ + else \ + echo "⚠ Python module 'arc' not installed. Skipping Python uninstall."; \ + fi diff --git a/cli.py b/cli.py deleted file mode 100644 index f6eaf03..0000000 --- a/cli.py +++ /dev/null @@ -1,71 +0,0 @@ -import argparse - -def parse_arguments(): - parser = argparse.ArgumentParser( - description="Scan directories and print/compile file contents." - ) - parser.add_argument( - "paths", - nargs='+', - help="List of files or directories to scan." - ) - parser.add_argument( - "-t", "--file-types", - nargs='+', - default=[], - help="Filter by file types (e.g., .txt, .log)." - ) - parser.add_argument( - "-x", "--ignore-file-strings", - nargs='+', - default=[], - help="Ignore files and folders containing these strings." - ) - parser.add_argument( - "-S", "--show-hidden", - action='store_true', - dest='show_hidden', - default=False, - help="Include hidden directories and files in the scan." - ) - parser.add_argument( - "-v", "--verbose", - action='store_true', - help="Enable verbose mode." - ) - parser.add_argument( - "-N", "--no-comments", - action='store_true', - help="Remove comments from the displayed content based on file type." - ) - parser.add_argument( - "-z", "--compress", - action='store_true', - help="Compress code (for supported file types)." - ) - parser.add_argument( - "-p", "--path-contains", - nargs='+', - default=[], - help="Display files whose paths contain one of these strings." - ) - parser.add_argument( - "-C", "--content-contains", - nargs='+', - default=[], - help="Display files containing one of these strings in their content." - ) - parser.add_argument( - "-G", "--no-gitignore", - action='store_true', - help="Do not respect .gitignore files during scan." - ) - parser.add_argument( - "-b", "--scan-binary-files", - action='store_true', - help="Scan binary files as well (by default these are ignored)." - ) - # Convert show_hidden to ignore_hidden for downstream use - args = parser.parse_args() - args.ignore_hidden = not args.show_hidden - return args diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..f10181d --- /dev/null +++ b/flake.nix @@ -0,0 +1,81 @@ +{ + description = "Analysis-Ready Code (ARC) - recursively scan directories and prepare code for automated analysis."; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.05"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = import nixpkgs { + inherit system; + }; + + python = pkgs.python3; + + # Main ARC package built from pyproject.toml + arcPkg = pkgs.python3Packages.buildPythonApplication { + pname = "analysis-ready-code"; + version = "0.1.0"; + + src = ./.; + + # We are using pyproject.toml with a PEP 517 backend. + format = "pyproject"; + + nativeBuildInputs = with pkgs.python3Packages; [ + setuptools + wheel + ]; + + # xclip is not a Python lib, but we can still add it as a runtime + # dependency so that `xclip` is available in PATH when running ARC + # inside a Nix environment. + propagatedBuildInputs = with pkgs; [ + xclip + ]; + + meta = { + description = "Utility that scans directories and prepares code for AI/computer analysis by stripping comments, filtering files, and optionally compressing content."; + homepage = "https://github.com/kevinveenbirkenbach/analysis-ready-code"; + license = pkgs.lib.licenses.agpl3Plus; + platforms = pkgs.lib.platforms.unix; + }; + }; + in { + # Default package for `nix build .` and `nix build .#arc` + packages.arc = arcPkg; + packages.default = arcPkg; + + # App for `nix run .#arc` + apps.arc = { + type = "app"; + program = "${arcPkg}/bin/arc"; + }; + + # Default app for `nix run .` + apps.default = self.apps.${system}.arc; + + # Dev shell for local development + devShells.default = pkgs.mkShell { + name = "arc-dev-shell"; + + buildInputs = with pkgs; [ + python3 + python3Packages.pip + python3Packages.setuptools + python3Packages.wheel + xclip + ]; + + shellHook = '' + echo "ARC dev shell ready. Typical usage:" + echo " make test" + echo " arc . -x" + ''; + }; + } + ); +} diff --git a/main.py b/main.py deleted file mode 100755 index 5852dd6..0000000 --- a/main.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -import os -import sys -from cli import parse_arguments -from directory_handler import DirectoryHandler - -def main(): - args = parse_arguments() - - for path in args.paths: - if os.path.isdir(path): - DirectoryHandler.handle_directory( - path, - file_types=args.file_types, - ignore_file_strings=args.ignore_file_strings, - ignore_hidden=args.ignore_hidden, - verbose=args.verbose, - no_comments=args.no_comments, - compress=args.compress, - path_contains=args.path_contains, - content_contains=args.content_contains, - no_gitignore=args.no_gitignore, - scan_binary_files=args.scan_binary_files - ) - elif os.path.isfile(path): - if DirectoryHandler.should_print_file( - path, - file_types=args.file_types, - ignore_file_strings=args.ignore_file_strings, - ignore_hidden=args.ignore_hidden, - path_contains=args.path_contains, - content_contains=args.content_contains, - scan_binary_files=args.scan_binary_files - ): - DirectoryHandler.handle_file( - path, - file_types=args.file_types, - ignore_file_strings=args.ignore_file_strings, - ignore_hidden=args.ignore_hidden, - no_comments=args.no_comments, - compress=args.compress, - scan_binary_files=args.scan_binary_files - ) - else: - print(f"Error: {path} is neither a valid file nor a directory.") - sys.exit(1) - -if __name__ == "__main__": - main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..0d080fa --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,52 @@ +[build-system] +requires = ["setuptools>=61", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "analysis-ready-code" +version = "0.1.0" +description = "A utility that recursively scans directories and transforms source code into an analysis-ready format, removing comments and optionally compressing content." +readme = "README.md" +license = { text = "AGPL-3.0" } +authors = [ + { name = "Kevin Veen-Birkenbach", email = "kevin@veen.world" } +] + +requires-python = ">=3.8" + +dependencies = [ + # No dependencies needed for ARC +] + +# Optional: define console script (if you ever want pip-installable CLI) +# ARC normally uses a symlink, but this keeps it compatible with pip. +[project.scripts] +arc = "arc.__main__:main" + +[project.urls] +Homepage = "https://github.com/kevinveenbirkenbach/analysis-ready-code" +Source = "https://github.com/kevinveenbirkenbach/analysis-ready-code" +Issues = "https://github.com/kevinveenbirkenbach/analysis-ready-code/issues" + +[tool.setuptools] +package-dir = {"" = "src"} + +[tool.setuptools.packages.find] +where = ["src"] +include = ["arc*"] + +[tool.setuptools.package-data] +# if you need non-Python files inside packages +arc = ["py.typed"] + +[tool.coverage.run] +branch = true +source = ["src/arc"] + +[tool.coverage.report] +show_missing = true + +[tool.mypy] +python_version = "3.10" +warn_unused_configs = true +ignore_missing_imports = true diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/arc/__init__.py b/src/arc/__init__.py new file mode 100644 index 0000000..9e0e0c4 --- /dev/null +++ b/src/arc/__init__.py @@ -0,0 +1,111 @@ +import io +import os +import subprocess +import sys + +from .cli import parse_arguments +from .directory_handler import DirectoryHandler +from .tee import Tee + +import shutil +import subprocess + +def copy_to_clipboard(text: str, quiet: bool = False): + if shutil.which("xclip"): + subprocess.run(["xclip", "-selection", "clipboard"], input=text, text=True) + return + + if shutil.which("wl-copy"): + subprocess.run(["wl-copy"], input=text, text=True) + return + + if shutil.which("pbcopy"): + subprocess.run(["pbcopy"], input=text, text=True) + return + + if not quiet: + print("Warning: No clipboard tool found (xclip, wl-copy, pbcopy)", file=sys.stderr) + +def main() -> None: + args = parse_arguments() + + # QUIET MODE: + # - no terminal output + # - but clipboard buffer still active + # + # Normal: + # - output goes to stdout + # - optionally tee into buffer + + buffer = None + + if args.clipboard: + buffer = io.StringIO() + + if args.quiet: + # quiet + clipboard → only buffer, no stdout + output_stream = buffer + else: + # normal + clipboard → stdout + buffer + output_stream = Tee(sys.stdout, buffer) + else: + # no clipboard + if args.quiet: + # quiet without clipboard → suppress ALL output + class NullWriter: + def write(self, *_): pass + def flush(self): pass + output_stream = NullWriter() + else: + output_stream = sys.stdout + + # Process all paths + for path in args.paths: + if os.path.isdir(path): + DirectoryHandler.handle_directory( + path, + file_types=args.file_types, + ignore_file_strings=args.ignore_file_strings, + ignore_hidden=args.ignore_hidden, + verbose=args.verbose and not args.quiet, + no_comments=args.no_comments, + compress=args.compress, + path_contains=args.path_contains, + content_contains=args.content_contains, + no_gitignore=args.no_gitignore, + scan_binary_files=args.scan_binary_files, + output_stream=output_stream, + ) + elif os.path.isfile(path): + if DirectoryHandler.should_print_file( + path, + file_types=args.file_types, + ignore_file_strings=args.ignore_file_strings, + ignore_hidden=args.ignore_hidden, + path_contains=args.path_contains, + content_contains=args.content_contains, + scan_binary_files=args.scan_binary_files, + ): + DirectoryHandler.handle_file( + path, + file_types=args.file_types, + ignore_file_strings=args.ignore_file_strings, + ignore_hidden=args.ignore_hidden, + no_comments=args.no_comments, + compress=args.compress, + scan_binary_files=args.scan_binary_files, + output_stream=output_stream, + ) + else: + if not args.quiet: + print(f"Error: {path} is neither file nor directory.", file=sys.stderr) + sys.exit(1) + + # Copy to clipboard if enabled + if buffer is not None: + text = buffer.getvalue() + try: + subprocess.run(["xclip", "-selection", "clipboard"], input=text, text=True, check=False) + except FileNotFoundError: + if not args.quiet: + print("Warning: xclip not found.", file=sys.stderr) diff --git a/src/arc/__main__.py b/src/arc/__main__.py new file mode 100644 index 0000000..a032a20 --- /dev/null +++ b/src/arc/__main__.py @@ -0,0 +1,18 @@ +# src/arc/__main__.py + +from . import main as _arc_main + + +def main() -> None: + """ + Entry point for the `arc` console script and `python -m arc`. + + This keeps all CLI logic in `arc.__init__.py` (main()) and simply + delegates to it, so both setuptools/entry_points and Nix wrappers + can reliably import `arc.__main__:main`. + """ + _arc_main() + + +if __name__ == "__main__": + main() diff --git a/src/arc/cli.py b/src/arc/cli.py new file mode 100644 index 0000000..1ca9033 --- /dev/null +++ b/src/arc/cli.py @@ -0,0 +1,120 @@ +import argparse + + +def parse_arguments(): + parser = argparse.ArgumentParser( + description="Scan directories and print/compile file contents." + ) + + # Positional: paths + parser.add_argument( + "paths", + nargs="+", + help="List of files or directories to scan.", + ) + + # File type filter + parser.add_argument( + "-t", + "--file-types", + nargs="+", + default=[], + help="Filter by file types (e.g., .py, .js, .c).", + ) + + # Ignore file/path strings (was previously -x, jetzt -I) + parser.add_argument( + "-I", + "--ignore-file-strings", + nargs="+", + default=[], + help="Ignore files and folders containing these strings.", + ) + + # Clipboard: alias -x + parser.add_argument( + "-x", + "--clipboard", + action="store_true", + help="Copy the output to the X clipboard via xclip (alias: -x).", + ) + + # Quiet mode + parser.add_argument( + "-q", + "--quiet", + action="store_true", + help="Suppress terminal output (useful with --clipboard).", + ) + + # Show hidden files + parser.add_argument( + "-S", + "--show-hidden", + action="store_true", + dest="show_hidden", + default=False, + help="Include hidden directories and files.", + ) + + # Verbose + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="Enable verbose mode.", + ) + + # Strip comments + parser.add_argument( + "-N", + "--no-comments", + action="store_true", + help="Remove comments from files before printing.", + ) + + # Compress + parser.add_argument( + "-z", + "--compress", + action="store_true", + help="Compress content instead of printing plain text.", + ) + + # Path filter + parser.add_argument( + "-p", + "--path-contains", + nargs="+", + default=[], + help="Only include files whose *path* contains one of these strings.", + ) + + # Content filter + parser.add_argument( + "-C", + "--content-contains", + nargs="+", + default=[], + help="Only include files whose *content* contains one of these strings.", + ) + + # Ignore .gitignore + parser.add_argument( + "-G", + "--no-gitignore", + action="store_true", + help="Do not respect .gitignore files during scan.", + ) + + # Scan binary files + parser.add_argument( + "-b", + "--scan-binary-files", + action="store_true", + help="Also scan binary files (ignored by default).", + ) + + args = parser.parse_args() + args.ignore_hidden = not args.show_hidden + return args diff --git a/code_processor.py b/src/arc/code_processor.py similarity index 100% rename from code_processor.py rename to src/arc/code_processor.py index 7a89e24..1d0fa2f 100644 --- a/code_processor.py +++ b/src/arc/code_processor.py @@ -1,9 +1,9 @@ +import io import re +import tokenize import zlib from dataclasses import dataclass from typing import Dict, Tuple, Pattern, Optional -import io -import tokenize @dataclass(frozen=True) diff --git a/directory_handler.py b/src/arc/directory_handler.py similarity index 65% rename from directory_handler.py rename to src/arc/directory_handler.py index a661622..ef5a427 100644 --- a/directory_handler.py +++ b/src/arc/directory_handler.py @@ -1,6 +1,9 @@ -import os import fnmatch -from code_processor import CodeProcessor +import os +import sys + +from .code_processor import CodeProcessor + class DirectoryHandler: @staticmethod @@ -13,19 +16,23 @@ class DirectoryHandler: """ gitignore_data = [] for dirpath, _, filenames in os.walk(root_path): - if '.gitignore' in filenames: - gitignore_path = os.path.join(dirpath, '.gitignore') + if ".gitignore" in filenames: + gitignore_path = os.path.join(dirpath, ".gitignore") try: - with open(gitignore_path, 'r') as f: + with open(gitignore_path, "r") as f: lines = f.readlines() # Filter out empty lines and comments. - patterns = [line.strip() for line in lines if line.strip() and not line.strip().startswith('#')] + patterns = [ + line.strip() + for line in lines + if line.strip() and not line.strip().startswith("#") + ] # Save the base directory and its patterns. gitignore_data.append((dirpath, patterns)) - except Exception as e: - print(f"Error reading {gitignore_path}: {e}") + except Exception as e: # pragma: no cover - defensive + print(f"Error reading {gitignore_path}: {e}", file=sys.stderr) return gitignore_data - + @staticmethod def is_binary_file(file_path): """ @@ -34,19 +41,19 @@ class DirectoryHandler: is found or if more than 30% of the bytes in the sample are non-text. """ try: - with open(file_path, 'rb') as f: + with open(file_path, "rb") as f: chunk = f.read(1024) # If there's a null byte, it's almost certainly binary. - if b'\x00' in chunk: + if b"\x00" in chunk: return True - + # Define a set of text characters (ASCII printable + common control characters) text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x7F))) # Count non-text characters in the chunk. non_text = sum(byte not in text_chars for byte in chunk) if len(chunk) > 0 and (non_text / len(chunk)) > 0.30: return True - except Exception: + except Exception: # pragma: no cover - defensive # If the file cannot be read in binary mode, assume it's not binary. return False return False @@ -65,15 +72,15 @@ class DirectoryHandler: # file_path and base_dir are on different drives. continue # If the file is not under the current .gitignore base_dir, skip it. - if rel_path.startswith('..'): + if rel_path.startswith(".."): continue # Check all patterns. for pattern in patterns: - if pattern.endswith('/'): + if pattern.endswith("/"): # Directory pattern: check if any folder in the relative path matches. parts = rel_path.split(os.sep) for part in parts[:-1]: - if fnmatch.fnmatch(part + '/', pattern): + if fnmatch.fnmatch(part + "/", pattern): return True else: # Check if the relative path matches the pattern. @@ -87,7 +94,7 @@ class DirectoryHandler: Filter out directories based on ignore_file_strings and hidden status. """ if ignore_hidden: - dirs[:] = [d for d in dirs if not d.startswith('.')] + dirs[:] = [d for d in dirs if not d.startswith(".")] dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_file_strings)] @staticmethod @@ -100,7 +107,7 @@ class DirectoryHandler: if content_contains: try: - with open(file_path, 'r') as f: + with open(file_path, "r") as f: content = f.read() if any(whitelist_str in content for whitelist_str in content_contains): return True @@ -109,7 +116,15 @@ class DirectoryHandler: return False @staticmethod - def should_print_file(file_path, file_types, ignore_file_strings, ignore_hidden, path_contains, content_contains, scan_binary_files=False): + def should_print_file( + file_path, + file_types, + ignore_file_strings, + ignore_hidden, + path_contains, + content_contains, + scan_binary_files=False, + ): """ Determines if a file should be printed based on various criteria. By default, binary files are skipped unless scan_binary_files is True. @@ -118,7 +133,7 @@ class DirectoryHandler: if not scan_binary_files and DirectoryHandler.is_binary_file(file_path): return False - if ignore_hidden and os.path.basename(file_path).startswith('.'): + if ignore_hidden and os.path.basename(file_path).startswith("."): return False if file_types and not any(file_path.endswith(ft) for ft in file_types): @@ -128,31 +143,36 @@ class DirectoryHandler: return False if path_contains or content_contains: - return DirectoryHandler.path_or_content_contains(file_path, path_contains, content_contains) + return DirectoryHandler.path_or_content_contains( + file_path, path_contains, content_contains + ) return True @staticmethod - def print_file_content(file_path, no_comments, compress): + def print_file_content(file_path, no_comments, compress, output_stream): """ Prints the content of a file, optionally removing comments or compressing the output. """ try: - with open(file_path, 'r') as f: + with open(file_path, "r") as f: content = f.read() if no_comments: file_type = os.path.splitext(file_path)[1] content = CodeProcessor.remove_comments(content, file_type) - print(f"<< START: {file_path} >>") + print(f"<< START: {file_path} >>", file=output_stream) if compress: compressed_content = CodeProcessor.compress(content) - print("COMPRESSED CODE:") - print(compressed_content) + print("COMPRESSED CODE:", file=output_stream) + print(compressed_content, file=output_stream) else: - print(content) - print("<< END >>\n") + print(content, file=output_stream) + print("<< END >>\n", file=output_stream) except UnicodeDecodeError: - print(f"Warning: Could not read file due to encoding issues: {file_path}") - exit(1) + print( + f"Warning: Could not read file due to encoding issues: {file_path}", + file=sys.stderr, + ) + sys.exit(1) @staticmethod def handle_directory(directory, **kwargs): @@ -160,34 +180,49 @@ class DirectoryHandler: Scans the directory and processes each file while respecting .gitignore rules. """ gitignore_data = [] - if not kwargs.get('no_gitignore'): + if not kwargs.get("no_gitignore"): gitignore_data = DirectoryHandler.load_gitignore_patterns(directory) + output_stream = kwargs.get("output_stream", sys.stdout) + for root, dirs, files in os.walk(directory): - DirectoryHandler.filter_directories(dirs, kwargs['ignore_file_strings'], kwargs['ignore_hidden']) + DirectoryHandler.filter_directories( + dirs, kwargs["ignore_file_strings"], kwargs["ignore_hidden"] + ) for file in files: file_path = os.path.join(root, file) if gitignore_data and DirectoryHandler.is_gitignored(file_path, gitignore_data): - if kwargs.get('verbose'): - print(f"Skipped (gitignored): {file_path}") + if kwargs.get("verbose"): + print(f"Skipped (gitignored): {file_path}", file=output_stream) continue if DirectoryHandler.should_print_file( file_path, - kwargs['file_types'], - kwargs['ignore_file_strings'], - kwargs['ignore_hidden'], - kwargs['path_contains'], - kwargs['content_contains'], - scan_binary_files=kwargs.get('scan_binary_files', False) + kwargs["file_types"], + kwargs["ignore_file_strings"], + kwargs["ignore_hidden"], + kwargs["path_contains"], + kwargs["content_contains"], + scan_binary_files=kwargs.get("scan_binary_files", False), ): - DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress']) - elif kwargs.get('verbose'): - print(f"Skipped file: {file_path}") + DirectoryHandler.print_file_content( + file_path, + kwargs["no_comments"], + kwargs["compress"], + output_stream=output_stream, + ) + elif kwargs.get("verbose"): + print(f"Skipped file: {file_path}", file=output_stream) @staticmethod def handle_file(file_path, **kwargs): """ Processes an individual file. """ - DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress']) + output_stream = kwargs.get("output_stream", sys.stdout) + DirectoryHandler.print_file_content( + file_path, + kwargs["no_comments"], + kwargs["compress"], + output_stream=output_stream, + ) diff --git a/src/arc/tee.py b/src/arc/tee.py new file mode 100644 index 0000000..6bd8545 --- /dev/null +++ b/src/arc/tee.py @@ -0,0 +1,23 @@ +from typing import TextIO + + +class Tee: + """ + Simple tee-like stream that writes everything to multiple underlying streams. + + Typical usage: + tee = Tee(sys.stdout, buffer) + print("hello", file=tee) + """ + + def __init__(self, *streams: TextIO) -> None: + self.streams = streams + + def write(self, data: str) -> None: + for stream in self.streams: + stream.write(data) + + def flush(self) -> None: + for stream in self.streams: + if hasattr(stream, "flush"): + stream.flush() diff --git a/tests/unit/test_arc.py b/tests/unit/test_arc.py index db25775..7e3c9ce 100644 --- a/tests/unit/test_arc.py +++ b/tests/unit/test_arc.py @@ -6,13 +6,14 @@ import tempfile import unittest from contextlib import redirect_stdout -# Ensure project root is on sys.path when running via discover +# Ensure src/ is on sys.path when running via discover PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) -if PROJECT_ROOT not in sys.path: - sys.path.insert(0, PROJECT_ROOT) +SRC_ROOT = os.path.join(PROJECT_ROOT, "src") +if SRC_ROOT not in sys.path: + sys.path.insert(0, SRC_ROOT) -from code_processor import CodeProcessor -from directory_handler import DirectoryHandler +from arc.code_processor import CodeProcessor +from arc.directory_handler import DirectoryHandler class TestCodeProcessor(unittest.TestCase): @@ -35,7 +36,7 @@ def f(): self.assertNotIn("# a comment", out) # tolerate whitespace normalization from tokenize.untokenize self.assertRegex(out, r'y\s*=\s*"string with # not a comment"') - self.assertIn('triple quoted but not a docstring', out) + self.assertIn("triple quoted but not a docstring", out) def test_cstyle_comment_stripping(self): src = '''\ @@ -170,8 +171,12 @@ class TestDirectoryHandler(unittest.TestCase): with open(p, "w") as f: f.write("# comment only\nx=1\n") buf = io.StringIO() - with redirect_stdout(buf): - DirectoryHandler.print_file_content(p, no_comments=True, compress=False) + DirectoryHandler.print_file_content( + p, + no_comments=True, + compress=False, + output_stream=buf, + ) out = buf.getvalue() self.assertIn("<< START:", out) # be whitespace-tolerant (tokenize may insert spaces) @@ -179,8 +184,12 @@ class TestDirectoryHandler(unittest.TestCase): self.assertNotIn("# comment only", out) buf = io.StringIO() - with redirect_stdout(buf): - DirectoryHandler.print_file_content(p, no_comments=True, compress=True) + DirectoryHandler.print_file_content( + p, + no_comments=True, + compress=True, + output_stream=buf, + ) out = buf.getvalue() self.assertIn("COMPRESSED CODE:", out) self.assertIn("<< END >>", out) diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py new file mode 100644 index 0000000..1015858 --- /dev/null +++ b/tests/unit/test_cli.py @@ -0,0 +1,60 @@ +# tests/unit/test_cli.py +import os +import sys +import unittest +from unittest.mock import patch + +# Ensure src/ is on sys.path when running via discover +PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +SRC_ROOT = os.path.join(PROJECT_ROOT, "src") +if SRC_ROOT not in sys.path: + sys.path.insert(0, SRC_ROOT) + +from arc.cli import parse_arguments # noqa: E402 + + +class TestCliParseArguments(unittest.TestCase): + def test_basic_paths_and_defaults(self): + with patch.object(sys, "argv", ["arc", "foo", "bar"]): + args = parse_arguments() + + self.assertEqual(args.paths, ["foo", "bar"]) + self.assertEqual(args.file_types, []) + self.assertEqual(args.ignore_file_strings, []) + self.assertFalse(args.clipboard) + self.assertFalse(args.quiet) + # show_hidden default is False → ignore_hidden should be True + self.assertFalse(args.show_hidden) + self.assertTrue(args.ignore_hidden) + + def test_clipboard_and_quiet_short_flags(self): + with patch.object(sys, "argv", ["arc", ".", "-x", "-q"]): + args = parse_arguments() + + self.assertTrue(args.clipboard) + self.assertTrue(args.quiet) + + def test_ignore_file_strings_short_and_long(self): + # Test only the short form -I collecting multiple values + with patch.object( + sys, + "argv", + ["arc", ".", "-I", "build", "dist", "node_modules"], + ): + args = parse_arguments() + + self.assertEqual( + args.ignore_file_strings, + ["build", "dist", "node_modules"], + ) + + def test_show_hidden_switches_ignore_hidden_off(self): + with patch.object(sys, "argv", ["arc", ".", "--show-hidden"]): + args = parse_arguments() + + self.assertTrue(args.show_hidden) + self.assertFalse(args.ignore_hidden) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py new file mode 100644 index 0000000..58f6546 --- /dev/null +++ b/tests/unit/test_main.py @@ -0,0 +1,145 @@ +# tests/unit/test_main.py +import io +import os +import sys +import tempfile +import types +import unittest +from contextlib import redirect_stdout +from unittest.mock import patch + +# Ensure src/ is on sys.path when running via discover +PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +SRC_ROOT = os.path.join(PROJECT_ROOT, "src") +if SRC_ROOT not in sys.path: + sys.path.insert(0, SRC_ROOT) + +import arc # noqa: E402 + + +class TestArcMain(unittest.TestCase): + def _make_args( + self, + path, + clipboard=False, + quiet=False, + file_types=None, + ignore_file_strings=None, + ignore_hidden=True, + verbose=False, + no_comments=False, + compress=False, + path_contains=None, + content_contains=None, + no_gitignore=False, + scan_binary_files=False, + ): + return types.SimpleNamespace( + paths=[path], + clipboard=clipboard, + quiet=quiet, + file_types=file_types or [], + ignore_file_strings=ignore_file_strings or [], + ignore_hidden=ignore_hidden, + show_hidden=not ignore_hidden, + verbose=verbose, + no_comments=no_comments, + compress=compress, + path_contains=path_contains or [], + content_contains=content_contains or [], + no_gitignore=no_gitignore, + scan_binary_files=scan_binary_files, + ) + + @patch("arc.subprocess.run") + @patch("arc.DirectoryHandler.handle_directory") + @patch("arc.parse_arguments") + def test_main_clipboard_calls_xclip_and_uses_tee( + self, mock_parse_arguments, mock_handle_directory, mock_run + ): + # create a temporary directory as scan target + with tempfile.TemporaryDirectory() as tmpdir: + args = self._make_args(path=tmpdir, clipboard=True, quiet=False) + mock_parse_arguments.return_value = args + + def fake_handle_directory(path, **kwargs): + out = kwargs["output_stream"] + # should be a Tee instance + self.assertEqual(out.__class__.__name__, "Tee") + out.write("FROM ARC\n") + + mock_handle_directory.side_effect = fake_handle_directory + + buf = io.StringIO() + with redirect_stdout(buf): + arc.main() + + # stdout should contain the text once (via Tee -> sys.stdout) + stdout_value = buf.getvalue() + self.assertIn("FROM ARC", stdout_value) + + # xclip should have been called with the same text in input + mock_run.assert_called_once() + called_args, called_kwargs = mock_run.call_args + self.assertEqual(called_args[0], ["xclip", "-selection", "clipboard"]) + self.assertIn("FROM ARC", called_kwargs.get("input", "")) + + @patch("arc.subprocess.run") + @patch("arc.DirectoryHandler.handle_directory") + @patch("arc.parse_arguments") + def test_main_clipboard_quiet_only_clipboard_no_stdout( + self, mock_parse_arguments, mock_handle_directory, mock_run + ): + with tempfile.TemporaryDirectory() as tmpdir: + args = self._make_args(path=tmpdir, clipboard=True, quiet=True) + mock_parse_arguments.return_value = args + + def fake_handle_directory(path, **kwargs): + out = kwargs["output_stream"] + # quiet + clipboard → output_stream is a buffer (StringIO) + self.assertIsInstance(out, io.StringIO) + out.write("SILENT CONTENT\n") + + mock_handle_directory.side_effect = fake_handle_directory + + buf = io.StringIO() + # stdout should stay empty + with redirect_stdout(buf): + arc.main() + + stdout_value = buf.getvalue() + self.assertEqual(stdout_value, "") + + mock_run.assert_called_once() + called_args, called_kwargs = mock_run.call_args + self.assertEqual(called_args[0], ["xclip", "-selection", "clipboard"]) + self.assertIn("SILENT CONTENT", called_kwargs.get("input", "")) + + @patch("arc.DirectoryHandler.handle_directory") + @patch("arc.parse_arguments") + def test_main_quiet_without_clipboard_uses_nullwriter( + self, mock_parse_arguments, mock_handle_directory + ): + with tempfile.TemporaryDirectory() as tmpdir: + args = self._make_args(path=tmpdir, clipboard=False, quiet=True) + mock_parse_arguments.return_value = args + + def fake_handle_directory(path, **kwargs): + out = kwargs["output_stream"] + # quiet without clipboard → internal NullWriter class + self.assertEqual(out.__class__.__name__, "NullWriter") + # writing should not raise + out.write("SHOULD NOT APPEAR ANYWHERE\n") + + mock_handle_directory.side_effect = fake_handle_directory + + buf = io.StringIO() + with redirect_stdout(buf): + arc.main() + + # Nothing should be printed to stdout + self.assertEqual(buf.getvalue(), "") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/test_tee.py b/tests/unit/test_tee.py new file mode 100644 index 0000000..e61a11a --- /dev/null +++ b/tests/unit/test_tee.py @@ -0,0 +1,54 @@ +# tests/unit/test_tee.py +import io +import os +import sys +import unittest + +# Ensure src/ is on sys.path when running via discover +PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) +SRC_ROOT = os.path.join(PROJECT_ROOT, "src") +if SRC_ROOT not in sys.path: + sys.path.insert(0, SRC_ROOT) + +from arc.tee import Tee # noqa: E402 + + +class TestTee(unittest.TestCase): + def test_write_writes_to_all_streams(self): + buf1 = io.StringIO() + buf2 = io.StringIO() + + tee = Tee(buf1, buf2) + tee.write("hello") + tee.write(" world") + + self.assertEqual(buf1.getvalue(), "hello world") + self.assertEqual(buf2.getvalue(), "hello world") + + def test_flush_flushes_all_streams(self): + class DummyStream: + def __init__(self): + self.flushed = False + self.data = "" + + def write(self, s): + self.data += s + + def flush(self): + self.flushed = True + + s1 = DummyStream() + s2 = DummyStream() + + tee = Tee(s1, s2) + tee.write("x") + tee.flush() + + self.assertTrue(s1.flushed) + self.assertTrue(s2.flushed) + self.assertEqual(s1.data, "x") + self.assertEqual(s2.data, "x") + + +if __name__ == "__main__": + unittest.main()