mirror of
https://github.com/kevinveenbirkenbach/directory-content-scanner.git
synced 2025-12-27 02:46:36 +00:00
Compare commits
14 Commits
74651bb880
...
v0.1.0
| Author | SHA1 | Date | |
|---|---|---|---|
| 6a28c7940d | |||
| 039481d3a9 | |||
| b55576beb2 | |||
| c5938cf482 | |||
| 847b40e9e6 | |||
| 69477fa29e | |||
| ab62b4d1b9 | |||
| 485f068fa5 | |||
| bf2f548a1f | |||
| 11b325ee25 | |||
| 4953993321 | |||
| 49601176e0 | |||
| b822435762 | |||
| 843b16563e |
7
.github/FUNDING.yml
vendored
Normal file
7
.github/FUNDING.yml
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
github: kevinveenbirkenbach
|
||||
|
||||
patreon: kevinveenbirkenbach
|
||||
|
||||
buy_me_a_coffee: kevinveenbirkenbach
|
||||
|
||||
custom: https://s.veen.world/paypaldonate
|
||||
38
.github/workflows/ci-nix.yml
vendored
Normal file
38
.github/workflows/ci-nix.yml
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
name: Nix CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- master
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
nix-tests:
|
||||
name: Nix Build & Tests
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install Nix
|
||||
uses: cachix/install-nix-action@v27
|
||||
with:
|
||||
extra_nix_config: |
|
||||
experimental-features = nix-command flakes
|
||||
|
||||
- name: Build ARC with Nix
|
||||
run: nix build .#arc
|
||||
|
||||
- name: Run flake checks
|
||||
run: nix flake check --show-trace
|
||||
|
||||
- name: Run Makefile tests inside Nix environment
|
||||
run: |
|
||||
nix develop -c bash -c "make test"
|
||||
|
||||
# Changed step:
|
||||
- name: Run arc --help via Nix app
|
||||
run: |
|
||||
nix run .#arc -- --help
|
||||
40
.github/workflows/ci-python.yml
vendored
Normal file
40
.github/workflows/ci-python.yml
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- master
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
python-tests:
|
||||
name: Run make test (Python)
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.10", "3.11", "3.12"]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Upgrade pip
|
||||
run: python -m pip install --upgrade pip
|
||||
|
||||
- name: Install project (normal install)
|
||||
run: |
|
||||
python -m pip install .
|
||||
|
||||
- name: Run tests via Makefile
|
||||
run: make test
|
||||
|
||||
- name: Run arc --help
|
||||
run: arc --help
|
||||
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
*__pycache__*
|
||||
build/
|
||||
src/analysis_ready_code.egg-info/
|
||||
4
CHANGELOG.md
Normal file
4
CHANGELOG.md
Normal file
@@ -0,0 +1,4 @@
|
||||
## [0.1.0] - 2025-12-10
|
||||
|
||||
* Add unified Nix/Python install, new ARC entrypoints, xclip-based -x clipboard flag, Nix flake app, improved CI, and extended test coverage.
|
||||
|
||||
128
Makefile
Normal file
128
Makefile
Normal file
@@ -0,0 +1,128 @@
|
||||
# Makefile for ARC
|
||||
SHELL := /usr/bin/env bash
|
||||
|
||||
APP_NAME := arc
|
||||
BIN_DIR ?= $(HOME)/.local/bin
|
||||
# Flake attribute for the ARC app
|
||||
NIX_ATTR := .#arc
|
||||
|
||||
.PHONY: help test install uninstall detect-nix \
|
||||
install-with-nix install-with-python install-nix install-python \
|
||||
uninstall-nix-wrapper uninstall-python
|
||||
|
||||
help:
|
||||
@echo "Targets:"
|
||||
@echo " make test - Run unit tests"
|
||||
@echo " make install - Install ARC using Nix if available (and usable),"
|
||||
@echo " otherwise fall back to Python."
|
||||
@echo " make uninstall - Uninstall ARC (Nix wrapper + Python package)"
|
||||
@echo " make install-nix - Force Nix installation (no fallback)"
|
||||
@echo " make install-python - Force Python installation"
|
||||
@echo " make uninstall-nix-wrapper - Remove only the arc binary/symlink from BIN_DIR"
|
||||
@echo " make uninstall-python - Remove the Python package 'arc'"
|
||||
|
||||
test:
|
||||
@python -m unittest discover -s tests -p "test_*.py" -t .
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Smart installation selector
|
||||
# -------------------------------------------------------------------
|
||||
install: detect-nix
|
||||
|
||||
detect-nix:
|
||||
@if command -v nix >/dev/null 2>&1; then \
|
||||
echo "Nix detected → trying Nix-based installation…"; \
|
||||
if $(MAKE) install-with-nix; then \
|
||||
echo "Nix installation succeeded."; \
|
||||
else \
|
||||
echo "Nix installation failed → falling back to Python…"; \
|
||||
$(MAKE) install-with-python; \
|
||||
fi; \
|
||||
else \
|
||||
echo "Nix NOT found → installing via Python…"; \
|
||||
$(MAKE) install-with-python; \
|
||||
fi
|
||||
|
||||
# Convenience aliases, if you want to force one path:
|
||||
install-nix:
|
||||
$(MAKE) install-with-nix
|
||||
|
||||
install-python:
|
||||
$(MAKE) install-with-python
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Nix installation (flakes + nix-command enabled via flags)
|
||||
# -------------------------------------------------------------------
|
||||
install-with-nix:
|
||||
@echo "Building ARC using Nix ($(NIX_ATTR))..."
|
||||
nix --extra-experimental-features 'nix-command flakes' build $(NIX_ATTR)
|
||||
@echo "Installing into $(BIN_DIR)..."
|
||||
mkdir -p "$(BIN_DIR)"
|
||||
ln -sf "$(PWD)/result/bin/$(APP_NAME)" "$(BIN_DIR)/$(APP_NAME)"
|
||||
@echo "Done (Nix). Run: $(APP_NAME) --help"
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Python installation (fallback if Nix is unavailable or unusable)
|
||||
# - In a virtualenv: install into the venv (no --user).
|
||||
# - Outside a virtualenv: install with --user.
|
||||
# -------------------------------------------------------------------
|
||||
install-with-python:
|
||||
@echo "Installing ARC via Python…"
|
||||
@if [ -n "$$VIRTUAL_ENV" ]; then \
|
||||
echo "Virtualenv detected at $$VIRTUAL_ENV → installing into venv (no --user)…"; \
|
||||
python -m pip install --upgrade .; \
|
||||
else \
|
||||
echo "No virtualenv detected → installing with --user…"; \
|
||||
python -m pip install --user --upgrade .; \
|
||||
fi
|
||||
@echo "Ensuring $(BIN_DIR) exists..."
|
||||
mkdir -p "$(BIN_DIR)"
|
||||
@echo "Checking for arc binary in $(BIN_DIR)…"
|
||||
@if [ ! -f "$(BIN_DIR)/$(APP_NAME)" ] && [ ! -L "$(BIN_DIR)/$(APP_NAME)" ]; then \
|
||||
echo "arc executable not found in $(BIN_DIR), creating wrapper…"; \
|
||||
echo '#!/usr/bin/env bash' > "$(BIN_DIR)/$(APP_NAME)"; \
|
||||
echo 'python -m arc "$$@"' >> "$(BIN_DIR)/$(APP_NAME)"; \
|
||||
chmod +x "$(BIN_DIR)/$(APP_NAME)"; \
|
||||
else \
|
||||
echo "arc already present in $(BIN_DIR), not touching it."; \
|
||||
fi
|
||||
@echo "Done (Python). Make sure $(BIN_DIR) is in your PATH."
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# High-level uninstall target (calls Nix + Python uninstall helpers)
|
||||
# -------------------------------------------------------------------
|
||||
uninstall: uninstall-nix-wrapper uninstall-python
|
||||
@echo "=== Uninstall finished ==="
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Nix side: remove wrapper/binary from BIN_DIR
|
||||
# -------------------------------------------------------------------
|
||||
uninstall-nix-wrapper:
|
||||
@echo "Removing '$(APP_NAME)' from $(BIN_DIR)..."
|
||||
@if [ -L "$(BIN_DIR)/$(APP_NAME)" ] || [ -f "$(BIN_DIR)/$(APP_NAME)" ]; then \
|
||||
rm -f "$(BIN_DIR)/$(APP_NAME)"; \
|
||||
echo "✔ Removed $(BIN_DIR)/$(APP_NAME)"; \
|
||||
else \
|
||||
echo "⚠ No '$(APP_NAME)' binary found in $(BIN_DIR)."; \
|
||||
fi
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Python side: uninstall the arc package
|
||||
# - In a virtualenv: uninstall from venv.
|
||||
# - Outside a virtualenv: uninstall from user/system environment.
|
||||
# -------------------------------------------------------------------
|
||||
uninstall-python:
|
||||
@echo "Checking for Python installation of 'arc'…"
|
||||
@if python -c "import arc" >/dev/null 2>&1; then \
|
||||
echo "Python package 'arc' detected → uninstalling…"; \
|
||||
if [ -n "$$VIRTUAL_ENV" ]; then \
|
||||
echo "Virtualenv detected ($$VIRTUAL_ENV) → uninstalling inside venv…"; \
|
||||
python -m pip uninstall -y arc; \
|
||||
else \
|
||||
echo "No virtualenv detected → uninstalling from user/system environment…"; \
|
||||
python -m pip uninstall -y arc; \
|
||||
fi; \
|
||||
echo "✔ Python uninstall complete."; \
|
||||
else \
|
||||
echo "⚠ Python module 'arc' not installed. Skipping Python uninstall."; \
|
||||
fi
|
||||
@@ -1,4 +1,6 @@
|
||||
# 🤖👩🔬 Analysis-Ready Code (ARC)
|
||||
[](https://github.com/sponsors/kevinveenbirkenbach) [](https://www.patreon.com/c/kevinveenbirkenbach) [](https://buymeacoffee.com/kevinveenbirkenbach) [](https://s.veen.world/paypaldonate)
|
||||
|
||||
|
||||
Analysis-Ready Code (ARC) is a Python-based utility designed to recursively scan directories and transform source code into a format optimized for AI and computer analysis. By stripping comments, filtering specific file types, and optionally compressing content, ARC ensures that your code is clean and ready for automated processing.
|
||||
|
||||
|
||||
0
__init__.py
Normal file
0
__init__.py
Normal file
81
flake.nix
Normal file
81
flake.nix
Normal file
@@ -0,0 +1,81 @@
|
||||
{
|
||||
description = "Analysis-Ready Code (ARC) - recursively scan directories and prepare code for automated analysis.";
|
||||
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.05";
|
||||
flake-utils.url = "github:numtide/flake-utils";
|
||||
};
|
||||
|
||||
outputs = { self, nixpkgs, flake-utils }:
|
||||
flake-utils.lib.eachDefaultSystem (system:
|
||||
let
|
||||
pkgs = import nixpkgs {
|
||||
inherit system;
|
||||
};
|
||||
|
||||
python = pkgs.python3;
|
||||
|
||||
# Main ARC package built from pyproject.toml
|
||||
arcPkg = pkgs.python3Packages.buildPythonApplication {
|
||||
pname = "analysis-ready-code";
|
||||
version = "0.1.0";
|
||||
|
||||
src = ./.;
|
||||
|
||||
# We are using pyproject.toml with a PEP 517 backend.
|
||||
format = "pyproject";
|
||||
|
||||
nativeBuildInputs = with pkgs.python3Packages; [
|
||||
setuptools
|
||||
wheel
|
||||
];
|
||||
|
||||
# xclip is not a Python lib, but we can still add it as a runtime
|
||||
# dependency so that `xclip` is available in PATH when running ARC
|
||||
# inside a Nix environment.
|
||||
propagatedBuildInputs = with pkgs; [
|
||||
xclip
|
||||
];
|
||||
|
||||
meta = {
|
||||
description = "Utility that scans directories and prepares code for AI/computer analysis by stripping comments, filtering files, and optionally compressing content.";
|
||||
homepage = "https://github.com/kevinveenbirkenbach/analysis-ready-code";
|
||||
license = pkgs.lib.licenses.agpl3Plus;
|
||||
platforms = pkgs.lib.platforms.unix;
|
||||
};
|
||||
};
|
||||
in {
|
||||
# Default package for `nix build .` and `nix build .#arc`
|
||||
packages.arc = arcPkg;
|
||||
packages.default = arcPkg;
|
||||
|
||||
# App for `nix run .#arc`
|
||||
apps.arc = {
|
||||
type = "app";
|
||||
program = "${arcPkg}/bin/arc";
|
||||
};
|
||||
|
||||
# Default app for `nix run .`
|
||||
apps.default = self.apps.${system}.arc;
|
||||
|
||||
# Dev shell for local development
|
||||
devShells.default = pkgs.mkShell {
|
||||
name = "arc-dev-shell";
|
||||
|
||||
buildInputs = with pkgs; [
|
||||
python3
|
||||
python3Packages.pip
|
||||
python3Packages.setuptools
|
||||
python3Packages.wheel
|
||||
xclip
|
||||
];
|
||||
|
||||
shellHook = ''
|
||||
echo "ARC dev shell ready. Typical usage:"
|
||||
echo " make test"
|
||||
echo " arc . -x"
|
||||
'';
|
||||
};
|
||||
}
|
||||
);
|
||||
}
|
||||
185
main.py
185
main.py
@@ -1,185 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import argparse
|
||||
import re
|
||||
import zlib
|
||||
|
||||
class CodeProcessor:
|
||||
PYTHON = ".py"
|
||||
JS = ".js"
|
||||
C = ".c"
|
||||
CPP = ".cpp"
|
||||
H = ".h"
|
||||
BASH = ".sh"
|
||||
SHELL = ".bash"
|
||||
|
||||
@staticmethod
|
||||
def remove_comments(content, file_type):
|
||||
"""Remove comments based on file type."""
|
||||
comment_patterns = {
|
||||
CodeProcessor.PYTHON: [
|
||||
(r'\s*#.*', '',0),
|
||||
(r'\"\"\"(.*?)\"\"\"', '', re.DOTALL),
|
||||
(r"\'\'\'(.*?)\'\'\'", '', re.DOTALL)
|
||||
],
|
||||
CodeProcessor.JS: [
|
||||
(r'\s*//.*', '',0),
|
||||
(r'/\*.*?\*/', '',0)
|
||||
],
|
||||
CodeProcessor.C: [
|
||||
(r'\s*//.*', '',0),
|
||||
(r'/\*.*?\*/', '',0)
|
||||
],
|
||||
CodeProcessor.CPP: [
|
||||
(r'\s*//.*', '',0),
|
||||
(r'/\*.*?\*/', '',0)
|
||||
],
|
||||
CodeProcessor.H: [
|
||||
(r'\s*//.*', '',0),
|
||||
(r'/\*.*?\*/', '',0)
|
||||
],
|
||||
CodeProcessor.BASH: [
|
||||
(r'\s*#.*', '', 0)
|
||||
],
|
||||
CodeProcessor.SHELL: [
|
||||
(r'\s*#.*', '', 0)
|
||||
]
|
||||
}
|
||||
|
||||
patterns = comment_patterns.get(file_type, [])
|
||||
for pattern, repl, flags in patterns:
|
||||
content = re.sub(pattern, repl, content, flags=flags)
|
||||
return content.strip()
|
||||
|
||||
@staticmethod
|
||||
def compress(content):
|
||||
"""Compress code using zlib."""
|
||||
return zlib.compress(content.encode())
|
||||
|
||||
|
||||
class DirectoryHandler:
|
||||
|
||||
@staticmethod
|
||||
def filter_directories(dirs, ignore_file_strings, ignore_hidden):
|
||||
"""Filter out directories based on ignore criteria."""
|
||||
if ignore_hidden:
|
||||
dirs[:] = [d for d in dirs if not d.startswith('.')]
|
||||
dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_file_strings)]
|
||||
|
||||
@staticmethod
|
||||
def path_or_content_contains(file_path, path_contains, content_contains):
|
||||
# Check if the file name contains specific strings (whitelist)
|
||||
if path_contains and any(whitelist_str in file_path for whitelist_str in path_contains):
|
||||
return True
|
||||
|
||||
# Check file content for specific strings (if specified)
|
||||
if content_contains:
|
||||
try:
|
||||
with open(file_path, 'r') as f:
|
||||
content = f.read()
|
||||
# Return True if any of the content_contains strings are found in the content
|
||||
if any(whitelist_str in content for whitelist_str in content_contains):
|
||||
return True
|
||||
except UnicodeDecodeError:
|
||||
# Return False if there's a Unicode decode error (file can't be read)
|
||||
return False
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def should_print_file(file_path, file_types, ignore_file_strings, ignore_hidden, path_contains, content_contains):
|
||||
"""
|
||||
Determine if a file should be printed based on various criteria.
|
||||
|
||||
Args:
|
||||
file_path (str): The path of the file to be checked.
|
||||
file_types (list): List of allowed file extensions.
|
||||
ignore_file_strings (list): List of strings; if any are found in the file path, the file is ignored.
|
||||
ignore_hidden (bool): If True, hidden files (starting with '.') are ignored.
|
||||
path_contains (list): List of strings; the file is processed only if its path contains one of these strings.
|
||||
content_contains (list): List of strings; the file is processed only if its content contains one of these strings.
|
||||
|
||||
Returns:
|
||||
bool: True if the file should be printed, False otherwise.
|
||||
"""
|
||||
|
||||
# Check for hidden files if ignore_hidden is enabled
|
||||
if ignore_hidden and os.path.basename(file_path).startswith('.'):
|
||||
return False
|
||||
|
||||
# Check if the file type is in the allowed list (if specified)
|
||||
if file_types and not any(file_path.endswith(file_type) for file_type in file_types):
|
||||
return False
|
||||
|
||||
# Check if the file should be ignored based on the presence of specific strings in its path
|
||||
if any(ignore_str in file_path for ignore_str in ignore_file_strings):
|
||||
return False
|
||||
|
||||
if path_contains or content_contains:
|
||||
return DirectoryHandler.path_or_content_contains(file_path, path_contains, content_contains)
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def print_file_content(file_path, no_comments, compress):
|
||||
"""Print the content of a file."""
|
||||
try:
|
||||
with open(file_path, 'r') as f:
|
||||
content = f.read()
|
||||
if no_comments:
|
||||
file_type = os.path.splitext(file_path)[1]
|
||||
content = CodeProcessor.remove_comments(content, file_type)
|
||||
print(f"<< START: {file_path} >>")
|
||||
if compress:
|
||||
compressed_content = CodeProcessor.compress(content)
|
||||
print(f"COMPRESSED CODE: ")
|
||||
print(compressed_content)
|
||||
else:
|
||||
print(content)
|
||||
print("<< END >>\n")
|
||||
except UnicodeDecodeError:
|
||||
print(f"Warning: Could not read file due to encoding issues: {file_path}")
|
||||
exit(1)
|
||||
|
||||
@staticmethod
|
||||
def handle_directory(directory, **kwargs):
|
||||
"""Handle scanning and printing for directories."""
|
||||
for root, dirs, files in os.walk(directory):
|
||||
DirectoryHandler.filter_directories(dirs, kwargs['ignore_file_strings'], kwargs['ignore_hidden'])
|
||||
for file in files:
|
||||
if DirectoryHandler.should_print_file(os.path.join(root, file), kwargs['file_types'], kwargs['ignore_file_strings'], kwargs['ignore_hidden'], kwargs['path_contains'], kwargs['content_contains']):
|
||||
DirectoryHandler.print_file_content(os.path.join(root, file), kwargs['no_comments'], kwargs['compress'])
|
||||
elif kwargs['verbose']:
|
||||
print(f"Skipped file: {file}")
|
||||
|
||||
@staticmethod
|
||||
def handle_file(file_path, **kwargs):
|
||||
"""Handle scanning and printing for individual files."""
|
||||
DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress'])
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Scan directories and print/compile file contents.")
|
||||
parser.add_argument("paths", nargs='+', help="List of files or directories to scan.")
|
||||
parser.add_argument("--file-types", nargs='+', default=[], help="Filter by file types (e.g., .txt .log).")
|
||||
parser.add_argument("--ignore-file-strings", nargs='+', default=[], help="Ignore files and folders containing these strings.")
|
||||
parser.add_argument("--ignore-hidden", action='store_true', help="Ignore hidden directories and files.")
|
||||
parser.add_argument("-v", "--verbose", action='store_true', help="Enable verbose mode.")
|
||||
parser.add_argument("--no-comments", action='store_true', help="Remove comments from the displayed content based on file type.")
|
||||
parser.add_argument("--compress", action='store_true', help="Compress code (for Python files).")
|
||||
parser.add_argument("--path-contains", nargs='+', default=[], help="Display files whose paths contain one of these strings.")
|
||||
parser.add_argument("--content-contains", nargs='+', default=[], help="Display files containing one of these strings in their content.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
for path in args.paths:
|
||||
if os.path.isdir(path):
|
||||
DirectoryHandler.handle_directory(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, verbose=args.verbose, no_comments=args.no_comments, compress=args.compress, path_contains=args.path_contains, content_contains=args.content_contains)
|
||||
elif os.path.isfile(path):
|
||||
if DirectoryHandler.should_print_file(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, path_contains=args.path_contains, content_contains=args.content_contains):
|
||||
DirectoryHandler.handle_file(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, no_comments=args.no_comments, compress=args.compress)
|
||||
else:
|
||||
print(f"Error: {path} is neither a valid file nor a directory.")
|
||||
exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
52
pyproject.toml
Normal file
52
pyproject.toml
Normal file
@@ -0,0 +1,52 @@
|
||||
[build-system]
|
||||
requires = ["setuptools>=61", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "analysis-ready-code"
|
||||
version = "0.1.0"
|
||||
description = "A utility that recursively scans directories and transforms source code into an analysis-ready format, removing comments and optionally compressing content."
|
||||
readme = "README.md"
|
||||
license = { text = "AGPL-3.0" }
|
||||
authors = [
|
||||
{ name = "Kevin Veen-Birkenbach", email = "kevin@veen.world" }
|
||||
]
|
||||
|
||||
requires-python = ">=3.8"
|
||||
|
||||
dependencies = [
|
||||
# No dependencies needed for ARC
|
||||
]
|
||||
|
||||
# Optional: define console script (if you ever want pip-installable CLI)
|
||||
# ARC normally uses a symlink, but this keeps it compatible with pip.
|
||||
[project.scripts]
|
||||
arc = "arc.__main__:main"
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://github.com/kevinveenbirkenbach/analysis-ready-code"
|
||||
Source = "https://github.com/kevinveenbirkenbach/analysis-ready-code"
|
||||
Issues = "https://github.com/kevinveenbirkenbach/analysis-ready-code/issues"
|
||||
|
||||
[tool.setuptools]
|
||||
package-dir = {"" = "src"}
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["src"]
|
||||
include = ["arc*"]
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
# if you need non-Python files inside packages
|
||||
arc = ["py.typed"]
|
||||
|
||||
[tool.coverage.run]
|
||||
branch = true
|
||||
source = ["src/arc"]
|
||||
|
||||
[tool.coverage.report]
|
||||
show_missing = true
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.10"
|
||||
warn_unused_configs = true
|
||||
ignore_missing_imports = true
|
||||
0
src/__init__.py
Normal file
0
src/__init__.py
Normal file
111
src/arc/__init__.py
Normal file
111
src/arc/__init__.py
Normal file
@@ -0,0 +1,111 @@
|
||||
import io
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from .cli import parse_arguments
|
||||
from .directory_handler import DirectoryHandler
|
||||
from .tee import Tee
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
def copy_to_clipboard(text: str, quiet: bool = False):
|
||||
if shutil.which("xclip"):
|
||||
subprocess.run(["xclip", "-selection", "clipboard"], input=text, text=True)
|
||||
return
|
||||
|
||||
if shutil.which("wl-copy"):
|
||||
subprocess.run(["wl-copy"], input=text, text=True)
|
||||
return
|
||||
|
||||
if shutil.which("pbcopy"):
|
||||
subprocess.run(["pbcopy"], input=text, text=True)
|
||||
return
|
||||
|
||||
if not quiet:
|
||||
print("Warning: No clipboard tool found (xclip, wl-copy, pbcopy)", file=sys.stderr)
|
||||
|
||||
def main() -> None:
|
||||
args = parse_arguments()
|
||||
|
||||
# QUIET MODE:
|
||||
# - no terminal output
|
||||
# - but clipboard buffer still active
|
||||
#
|
||||
# Normal:
|
||||
# - output goes to stdout
|
||||
# - optionally tee into buffer
|
||||
|
||||
buffer = None
|
||||
|
||||
if args.clipboard:
|
||||
buffer = io.StringIO()
|
||||
|
||||
if args.quiet:
|
||||
# quiet + clipboard → only buffer, no stdout
|
||||
output_stream = buffer
|
||||
else:
|
||||
# normal + clipboard → stdout + buffer
|
||||
output_stream = Tee(sys.stdout, buffer)
|
||||
else:
|
||||
# no clipboard
|
||||
if args.quiet:
|
||||
# quiet without clipboard → suppress ALL output
|
||||
class NullWriter:
|
||||
def write(self, *_): pass
|
||||
def flush(self): pass
|
||||
output_stream = NullWriter()
|
||||
else:
|
||||
output_stream = sys.stdout
|
||||
|
||||
# Process all paths
|
||||
for path in args.paths:
|
||||
if os.path.isdir(path):
|
||||
DirectoryHandler.handle_directory(
|
||||
path,
|
||||
file_types=args.file_types,
|
||||
ignore_file_strings=args.ignore_file_strings,
|
||||
ignore_hidden=args.ignore_hidden,
|
||||
verbose=args.verbose and not args.quiet,
|
||||
no_comments=args.no_comments,
|
||||
compress=args.compress,
|
||||
path_contains=args.path_contains,
|
||||
content_contains=args.content_contains,
|
||||
no_gitignore=args.no_gitignore,
|
||||
scan_binary_files=args.scan_binary_files,
|
||||
output_stream=output_stream,
|
||||
)
|
||||
elif os.path.isfile(path):
|
||||
if DirectoryHandler.should_print_file(
|
||||
path,
|
||||
file_types=args.file_types,
|
||||
ignore_file_strings=args.ignore_file_strings,
|
||||
ignore_hidden=args.ignore_hidden,
|
||||
path_contains=args.path_contains,
|
||||
content_contains=args.content_contains,
|
||||
scan_binary_files=args.scan_binary_files,
|
||||
):
|
||||
DirectoryHandler.handle_file(
|
||||
path,
|
||||
file_types=args.file_types,
|
||||
ignore_file_strings=args.ignore_file_strings,
|
||||
ignore_hidden=args.ignore_hidden,
|
||||
no_comments=args.no_comments,
|
||||
compress=args.compress,
|
||||
scan_binary_files=args.scan_binary_files,
|
||||
output_stream=output_stream,
|
||||
)
|
||||
else:
|
||||
if not args.quiet:
|
||||
print(f"Error: {path} is neither file nor directory.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Copy to clipboard if enabled
|
||||
if buffer is not None:
|
||||
text = buffer.getvalue()
|
||||
try:
|
||||
subprocess.run(["xclip", "-selection", "clipboard"], input=text, text=True, check=False)
|
||||
except FileNotFoundError:
|
||||
if not args.quiet:
|
||||
print("Warning: xclip not found.", file=sys.stderr)
|
||||
18
src/arc/__main__.py
Normal file
18
src/arc/__main__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
# src/arc/__main__.py
|
||||
|
||||
from . import main as _arc_main
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""
|
||||
Entry point for the `arc` console script and `python -m arc`.
|
||||
|
||||
This keeps all CLI logic in `arc.__init__.py` (main()) and simply
|
||||
delegates to it, so both setuptools/entry_points and Nix wrappers
|
||||
can reliably import `arc.__main__:main`.
|
||||
"""
|
||||
_arc_main()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
120
src/arc/cli.py
Normal file
120
src/arc/cli.py
Normal file
@@ -0,0 +1,120 @@
|
||||
import argparse
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Scan directories and print/compile file contents."
|
||||
)
|
||||
|
||||
# Positional: paths
|
||||
parser.add_argument(
|
||||
"paths",
|
||||
nargs="+",
|
||||
help="List of files or directories to scan.",
|
||||
)
|
||||
|
||||
# File type filter
|
||||
parser.add_argument(
|
||||
"-t",
|
||||
"--file-types",
|
||||
nargs="+",
|
||||
default=[],
|
||||
help="Filter by file types (e.g., .py, .js, .c).",
|
||||
)
|
||||
|
||||
# Ignore file/path strings (was previously -x, jetzt -I)
|
||||
parser.add_argument(
|
||||
"-I",
|
||||
"--ignore-file-strings",
|
||||
nargs="+",
|
||||
default=[],
|
||||
help="Ignore files and folders containing these strings.",
|
||||
)
|
||||
|
||||
# Clipboard: alias -x
|
||||
parser.add_argument(
|
||||
"-x",
|
||||
"--clipboard",
|
||||
action="store_true",
|
||||
help="Copy the output to the X clipboard via xclip (alias: -x).",
|
||||
)
|
||||
|
||||
# Quiet mode
|
||||
parser.add_argument(
|
||||
"-q",
|
||||
"--quiet",
|
||||
action="store_true",
|
||||
help="Suppress terminal output (useful with --clipboard).",
|
||||
)
|
||||
|
||||
# Show hidden files
|
||||
parser.add_argument(
|
||||
"-S",
|
||||
"--show-hidden",
|
||||
action="store_true",
|
||||
dest="show_hidden",
|
||||
default=False,
|
||||
help="Include hidden directories and files.",
|
||||
)
|
||||
|
||||
# Verbose
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
help="Enable verbose mode.",
|
||||
)
|
||||
|
||||
# Strip comments
|
||||
parser.add_argument(
|
||||
"-N",
|
||||
"--no-comments",
|
||||
action="store_true",
|
||||
help="Remove comments from files before printing.",
|
||||
)
|
||||
|
||||
# Compress
|
||||
parser.add_argument(
|
||||
"-z",
|
||||
"--compress",
|
||||
action="store_true",
|
||||
help="Compress content instead of printing plain text.",
|
||||
)
|
||||
|
||||
# Path filter
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--path-contains",
|
||||
nargs="+",
|
||||
default=[],
|
||||
help="Only include files whose *path* contains one of these strings.",
|
||||
)
|
||||
|
||||
# Content filter
|
||||
parser.add_argument(
|
||||
"-C",
|
||||
"--content-contains",
|
||||
nargs="+",
|
||||
default=[],
|
||||
help="Only include files whose *content* contains one of these strings.",
|
||||
)
|
||||
|
||||
# Ignore .gitignore
|
||||
parser.add_argument(
|
||||
"-G",
|
||||
"--no-gitignore",
|
||||
action="store_true",
|
||||
help="Do not respect .gitignore files during scan.",
|
||||
)
|
||||
|
||||
# Scan binary files
|
||||
parser.add_argument(
|
||||
"-b",
|
||||
"--scan-binary-files",
|
||||
action="store_true",
|
||||
help="Also scan binary files (ignored by default).",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
args.ignore_hidden = not args.show_hidden
|
||||
return args
|
||||
285
src/arc/code_processor.py
Normal file
285
src/arc/code_processor.py
Normal file
@@ -0,0 +1,285 @@
|
||||
import io
|
||||
import re
|
||||
import tokenize
|
||||
import zlib
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, Tuple, Pattern, Optional
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LanguageSpec:
|
||||
"""Holds compiled comment patterns for a language."""
|
||||
patterns: Tuple[Pattern, ...]
|
||||
|
||||
|
||||
class CodeProcessor:
|
||||
"""
|
||||
Utilities to strip comments and (de)compress code.
|
||||
- Python: tokenize-based (safe) with precise docstring removal.
|
||||
- C/CPP/JS: state-machine comment stripper that respects string/char literals.
|
||||
- Shell/YAML: remove full-line hash comments only.
|
||||
- Jinja: remove {# ... #} blocks.
|
||||
"""
|
||||
# File extensions (normalized to lowercase)
|
||||
EXT_TO_LANG: Dict[str, str] = {
|
||||
".py": "python",
|
||||
".js": "cstyle",
|
||||
".c": "cstyle",
|
||||
".cpp": "cstyle",
|
||||
".h": "cstyle",
|
||||
".sh": "hash",
|
||||
".bash": "hash",
|
||||
".yml": "hash",
|
||||
".yaml": "hash",
|
||||
".j2": "jinja",
|
||||
".jinja": "jinja",
|
||||
".jinja2": "jinja",
|
||||
".tpl": "jinja",
|
||||
}
|
||||
|
||||
# Regex-based specs for hash and jinja
|
||||
_HASH = LanguageSpec(patterns=(
|
||||
re.compile(r"^\s*#.*$", flags=re.MULTILINE), # only full-line comments
|
||||
))
|
||||
_JINJA = LanguageSpec(patterns=(
|
||||
re.compile(r"\{#.*?#\}", flags=re.DOTALL), # {# ... #} across lines
|
||||
))
|
||||
|
||||
LANG_SPECS: Dict[str, LanguageSpec] = {
|
||||
"hash": _HASH,
|
||||
"jinja": _JINJA,
|
||||
# "cstyle" handled by a state machine, not regex
|
||||
# "python" handled by tokenize, not regex
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _lang_from_ext(cls, file_type: str) -> Optional[str]:
|
||||
"""Map an extension like '.py' to an internal language key."""
|
||||
ext = file_type.lower().strip()
|
||||
return cls.EXT_TO_LANG.get(ext)
|
||||
|
||||
# -----------------------------
|
||||
# Python stripping via tokenize
|
||||
# -----------------------------
|
||||
@staticmethod
|
||||
def _strip_python_comments_tokenize(content: str) -> str:
|
||||
"""
|
||||
Remove comments and docstrings safely using tokenize.
|
||||
Rules:
|
||||
- Drop COMMENT tokens.
|
||||
- Drop module docstring only if it's the very first statement at col 0.
|
||||
- Drop the first STRING statement in a suite immediately after 'def'/'class'
|
||||
header (':' NEWLINE INDENT).
|
||||
"""
|
||||
tokens = tokenize.generate_tokens(io.StringIO(content).readline)
|
||||
out_tokens = []
|
||||
|
||||
indent_level = 0
|
||||
module_docstring_candidate = True # until we see first real stmt at module level
|
||||
expect_suite_docstring = False # just entered a suite after def/class
|
||||
last_was_colon = False
|
||||
seen_nontrivial_in_line = False # guards module docstring (start of logical line)
|
||||
|
||||
for tok_type, tok_str, start, end, line in tokens:
|
||||
# Track indentation
|
||||
if tok_type == tokenize.INDENT:
|
||||
indent_level += 1
|
||||
elif tok_type == tokenize.DEDENT:
|
||||
indent_level = max(0, indent_level - 1)
|
||||
|
||||
# New logical line: reset guard
|
||||
if tok_type in (tokenize.NEWLINE, tokenize.NL):
|
||||
seen_nontrivial_in_line = False
|
||||
out_tokens.append((tok_type, tok_str))
|
||||
continue
|
||||
|
||||
# Comments are dropped
|
||||
if tok_type == tokenize.COMMENT:
|
||||
continue
|
||||
|
||||
# Detect ':' ending a def/class header
|
||||
if tok_type == tokenize.OP and tok_str == ":":
|
||||
last_was_colon = True
|
||||
out_tokens.append((tok_type, tok_str))
|
||||
continue
|
||||
|
||||
# After ':' + NEWLINE + INDENT comes a suite start -> allow docstring removal
|
||||
if tok_type == tokenize.INDENT and last_was_colon:
|
||||
expect_suite_docstring = True
|
||||
last_was_colon = False
|
||||
out_tokens.append((tok_type, tok_str))
|
||||
continue
|
||||
# Any non-INDENT token clears the last_was_colon flag
|
||||
if tok_type != tokenize.NL:
|
||||
last_was_colon = False
|
||||
|
||||
# STRING handling
|
||||
if tok_type == tokenize.STRING:
|
||||
at_line_start = (start[1] == 0) and not seen_nontrivial_in_line
|
||||
if indent_level == 0:
|
||||
# Potential module docstring only if first statement at col 0
|
||||
if module_docstring_candidate and at_line_start:
|
||||
module_docstring_candidate = False
|
||||
# drop it
|
||||
continue
|
||||
# Any other top-level string is normal
|
||||
module_docstring_candidate = False
|
||||
out_tokens.append((tok_type, tok_str))
|
||||
seen_nontrivial_in_line = True
|
||||
continue
|
||||
else:
|
||||
# In a suite: if it's the first statement after def/class, drop regardless of column
|
||||
if expect_suite_docstring:
|
||||
expect_suite_docstring = False
|
||||
# drop it
|
||||
continue
|
||||
expect_suite_docstring = False
|
||||
out_tokens.append((tok_type, tok_str))
|
||||
seen_nontrivial_in_line = True
|
||||
continue
|
||||
|
||||
# Any other significant token disables module-docstring candidacy
|
||||
if tok_type not in (tokenize.INDENT, tokenize.DEDENT):
|
||||
if indent_level == 0:
|
||||
module_docstring_candidate = False
|
||||
# Mark we've seen something on this line
|
||||
if tok_type not in (tokenize.NL, tokenize.NEWLINE):
|
||||
seen_nontrivial_in_line = True
|
||||
|
||||
out_tokens.append((tok_type, tok_str))
|
||||
|
||||
return tokenize.untokenize(out_tokens)
|
||||
|
||||
# ---------------------------------
|
||||
# C-style stripping via state machine
|
||||
# ---------------------------------
|
||||
@staticmethod
|
||||
def _strip_cstyle_comments(content: str) -> str:
|
||||
"""
|
||||
Remove // line comments and /* ... */ block comments while preserving
|
||||
string ("...") and char ('...') literals and their escape sequences.
|
||||
"""
|
||||
i = 0
|
||||
n = len(content)
|
||||
out = []
|
||||
in_line_comment = False
|
||||
in_block_comment = False
|
||||
in_string = False
|
||||
in_char = False
|
||||
escape = False
|
||||
|
||||
while i < n:
|
||||
c = content[i]
|
||||
nxt = content[i + 1] if i + 1 < n else ""
|
||||
|
||||
# If inside line comment: consume until newline
|
||||
if in_line_comment:
|
||||
if c == "\n":
|
||||
in_line_comment = False
|
||||
out.append(c)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# If inside block comment: consume until '*/'
|
||||
if in_block_comment:
|
||||
if c == "*" and nxt == "/":
|
||||
in_block_comment = False
|
||||
i += 2
|
||||
else:
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# If inside string literal
|
||||
if in_string:
|
||||
out.append(c)
|
||||
if escape:
|
||||
escape = False
|
||||
else:
|
||||
if c == "\\":
|
||||
escape = True
|
||||
elif c == '"':
|
||||
in_string = False
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# If inside char literal
|
||||
if in_char:
|
||||
out.append(c)
|
||||
if escape:
|
||||
escape = False
|
||||
else:
|
||||
if c == "\\":
|
||||
escape = True
|
||||
elif c == "'":
|
||||
in_char = False
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Not in any special state:
|
||||
# Check for start of comments
|
||||
if c == "/" and nxt == "/":
|
||||
in_line_comment = True
|
||||
i += 2
|
||||
continue
|
||||
if c == "/" and nxt == "*":
|
||||
in_block_comment = True
|
||||
i += 2
|
||||
continue
|
||||
|
||||
# Check for start of string/char literals
|
||||
if c == '"':
|
||||
in_string = True
|
||||
out.append(c)
|
||||
i += 1
|
||||
continue
|
||||
if c == "'":
|
||||
in_char = True
|
||||
out.append(c)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Normal character
|
||||
out.append(c)
|
||||
i += 1
|
||||
|
||||
return "".join(out)
|
||||
|
||||
# -------------------
|
||||
# Public API
|
||||
# -------------------
|
||||
@classmethod
|
||||
def remove_comments(cls, content: str, file_type: str) -> str:
|
||||
"""
|
||||
Remove comments based on file type/extension.
|
||||
- Python: tokenize-based
|
||||
- C/CPP/JS: state-machine
|
||||
- Hash (sh/yaml): regex full-line
|
||||
- Jinja: regex {# ... #}
|
||||
"""
|
||||
lang = cls._lang_from_ext(file_type)
|
||||
if lang is None:
|
||||
return content.strip()
|
||||
|
||||
if lang == "python":
|
||||
return cls._strip_python_comments_tokenize(content).strip()
|
||||
if lang == "cstyle":
|
||||
return cls._strip_cstyle_comments(content).strip()
|
||||
|
||||
spec = cls.LANG_SPECS.get(lang)
|
||||
if not spec:
|
||||
return content.strip()
|
||||
|
||||
cleaned = content
|
||||
for pat in spec.patterns:
|
||||
cleaned = pat.sub("", cleaned)
|
||||
return cleaned.strip()
|
||||
|
||||
@staticmethod
|
||||
def compress(content: str, level: int = 9) -> bytes:
|
||||
"""Compress code using zlib. Returns bytes."""
|
||||
return zlib.compress(content.encode("utf-8"), level)
|
||||
|
||||
@staticmethod
|
||||
def decompress(blob: bytes) -> str:
|
||||
"""Decompress zlib-compressed code back to text."""
|
||||
return zlib.decompress(blob).decode("utf-8")
|
||||
228
src/arc/directory_handler.py
Normal file
228
src/arc/directory_handler.py
Normal file
@@ -0,0 +1,228 @@
|
||||
import fnmatch
|
||||
import os
|
||||
import sys
|
||||
|
||||
from .code_processor import CodeProcessor
|
||||
|
||||
|
||||
class DirectoryHandler:
|
||||
@staticmethod
|
||||
def load_gitignore_patterns(root_path):
|
||||
"""
|
||||
Recursively scans for .gitignore files in the given root_path.
|
||||
Returns a list of tuples (base_dir, patterns) where:
|
||||
- base_dir: the directory in which the .gitignore was found.
|
||||
- patterns: a list of pattern strings from that .gitignore.
|
||||
"""
|
||||
gitignore_data = []
|
||||
for dirpath, _, filenames in os.walk(root_path):
|
||||
if ".gitignore" in filenames:
|
||||
gitignore_path = os.path.join(dirpath, ".gitignore")
|
||||
try:
|
||||
with open(gitignore_path, "r") as f:
|
||||
lines = f.readlines()
|
||||
# Filter out empty lines and comments.
|
||||
patterns = [
|
||||
line.strip()
|
||||
for line in lines
|
||||
if line.strip() and not line.strip().startswith("#")
|
||||
]
|
||||
# Save the base directory and its patterns.
|
||||
gitignore_data.append((dirpath, patterns))
|
||||
except Exception as e: # pragma: no cover - defensive
|
||||
print(f"Error reading {gitignore_path}: {e}", file=sys.stderr)
|
||||
return gitignore_data
|
||||
|
||||
@staticmethod
|
||||
def is_binary_file(file_path):
|
||||
"""
|
||||
Reads the first 1024 bytes of file_path and heuristically determines
|
||||
if the file appears to be binary. This method returns True if a null byte
|
||||
is found or if more than 30% of the bytes in the sample are non-text.
|
||||
"""
|
||||
try:
|
||||
with open(file_path, "rb") as f:
|
||||
chunk = f.read(1024)
|
||||
# If there's a null byte, it's almost certainly binary.
|
||||
if b"\x00" in chunk:
|
||||
return True
|
||||
|
||||
# Define a set of text characters (ASCII printable + common control characters)
|
||||
text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x7F)))
|
||||
# Count non-text characters in the chunk.
|
||||
non_text = sum(byte not in text_chars for byte in chunk)
|
||||
if len(chunk) > 0 and (non_text / len(chunk)) > 0.30:
|
||||
return True
|
||||
except Exception: # pragma: no cover - defensive
|
||||
# If the file cannot be read in binary mode, assume it's not binary.
|
||||
return False
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def is_gitignored(file_path, gitignore_data):
|
||||
"""
|
||||
Checks if file_path should be ignored according to the .gitignore entries.
|
||||
For each tuple (base_dir, patterns), if file_path is under base_dir,
|
||||
computes the relative path and matches it against the patterns.
|
||||
"""
|
||||
for base_dir, patterns in gitignore_data:
|
||||
try:
|
||||
rel_path = os.path.relpath(file_path, base_dir)
|
||||
except ValueError:
|
||||
# file_path and base_dir are on different drives.
|
||||
continue
|
||||
# If the file is not under the current .gitignore base_dir, skip it.
|
||||
if rel_path.startswith(".."):
|
||||
continue
|
||||
# Check all patterns.
|
||||
for pattern in patterns:
|
||||
if pattern.endswith("/"):
|
||||
# Directory pattern: check if any folder in the relative path matches.
|
||||
parts = rel_path.split(os.sep)
|
||||
for part in parts[:-1]:
|
||||
if fnmatch.fnmatch(part + "/", pattern):
|
||||
return True
|
||||
else:
|
||||
# Check if the relative path matches the pattern.
|
||||
if fnmatch.fnmatch(rel_path, pattern):
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def filter_directories(dirs, ignore_file_strings, ignore_hidden):
|
||||
"""
|
||||
Filter out directories based on ignore_file_strings and hidden status.
|
||||
"""
|
||||
if ignore_hidden:
|
||||
dirs[:] = [d for d in dirs if not d.startswith(".")]
|
||||
dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_file_strings)]
|
||||
|
||||
@staticmethod
|
||||
def path_or_content_contains(file_path, path_contains, content_contains):
|
||||
"""
|
||||
Check if the file path contains specific strings or if the file content does.
|
||||
"""
|
||||
if path_contains and any(whitelist_str in file_path for whitelist_str in path_contains):
|
||||
return True
|
||||
|
||||
if content_contains:
|
||||
try:
|
||||
with open(file_path, "r") as f:
|
||||
content = f.read()
|
||||
if any(whitelist_str in content for whitelist_str in content_contains):
|
||||
return True
|
||||
except UnicodeDecodeError:
|
||||
return False
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def should_print_file(
|
||||
file_path,
|
||||
file_types,
|
||||
ignore_file_strings,
|
||||
ignore_hidden,
|
||||
path_contains,
|
||||
content_contains,
|
||||
scan_binary_files=False,
|
||||
):
|
||||
"""
|
||||
Determines if a file should be printed based on various criteria.
|
||||
By default, binary files are skipped unless scan_binary_files is True.
|
||||
"""
|
||||
# Check binary file status using our heuristic.
|
||||
if not scan_binary_files and DirectoryHandler.is_binary_file(file_path):
|
||||
return False
|
||||
|
||||
if ignore_hidden and os.path.basename(file_path).startswith("."):
|
||||
return False
|
||||
|
||||
if file_types and not any(file_path.endswith(ft) for ft in file_types):
|
||||
return False
|
||||
|
||||
if any(ignore_str in file_path for ignore_str in ignore_file_strings):
|
||||
return False
|
||||
|
||||
if path_contains or content_contains:
|
||||
return DirectoryHandler.path_or_content_contains(
|
||||
file_path, path_contains, content_contains
|
||||
)
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def print_file_content(file_path, no_comments, compress, output_stream):
|
||||
"""
|
||||
Prints the content of a file, optionally removing comments or compressing the output.
|
||||
"""
|
||||
try:
|
||||
with open(file_path, "r") as f:
|
||||
content = f.read()
|
||||
if no_comments:
|
||||
file_type = os.path.splitext(file_path)[1]
|
||||
content = CodeProcessor.remove_comments(content, file_type)
|
||||
print(f"<< START: {file_path} >>", file=output_stream)
|
||||
if compress:
|
||||
compressed_content = CodeProcessor.compress(content)
|
||||
print("COMPRESSED CODE:", file=output_stream)
|
||||
print(compressed_content, file=output_stream)
|
||||
else:
|
||||
print(content, file=output_stream)
|
||||
print("<< END >>\n", file=output_stream)
|
||||
except UnicodeDecodeError:
|
||||
print(
|
||||
f"Warning: Could not read file due to encoding issues: {file_path}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
@staticmethod
|
||||
def handle_directory(directory, **kwargs):
|
||||
"""
|
||||
Scans the directory and processes each file while respecting .gitignore rules.
|
||||
"""
|
||||
gitignore_data = []
|
||||
if not kwargs.get("no_gitignore"):
|
||||
gitignore_data = DirectoryHandler.load_gitignore_patterns(directory)
|
||||
|
||||
output_stream = kwargs.get("output_stream", sys.stdout)
|
||||
|
||||
for root, dirs, files in os.walk(directory):
|
||||
DirectoryHandler.filter_directories(
|
||||
dirs, kwargs["ignore_file_strings"], kwargs["ignore_hidden"]
|
||||
)
|
||||
for file in files:
|
||||
file_path = os.path.join(root, file)
|
||||
if gitignore_data and DirectoryHandler.is_gitignored(file_path, gitignore_data):
|
||||
if kwargs.get("verbose"):
|
||||
print(f"Skipped (gitignored): {file_path}", file=output_stream)
|
||||
continue
|
||||
|
||||
if DirectoryHandler.should_print_file(
|
||||
file_path,
|
||||
kwargs["file_types"],
|
||||
kwargs["ignore_file_strings"],
|
||||
kwargs["ignore_hidden"],
|
||||
kwargs["path_contains"],
|
||||
kwargs["content_contains"],
|
||||
scan_binary_files=kwargs.get("scan_binary_files", False),
|
||||
):
|
||||
DirectoryHandler.print_file_content(
|
||||
file_path,
|
||||
kwargs["no_comments"],
|
||||
kwargs["compress"],
|
||||
output_stream=output_stream,
|
||||
)
|
||||
elif kwargs.get("verbose"):
|
||||
print(f"Skipped file: {file_path}", file=output_stream)
|
||||
|
||||
@staticmethod
|
||||
def handle_file(file_path, **kwargs):
|
||||
"""
|
||||
Processes an individual file.
|
||||
"""
|
||||
output_stream = kwargs.get("output_stream", sys.stdout)
|
||||
DirectoryHandler.print_file_content(
|
||||
file_path,
|
||||
kwargs["no_comments"],
|
||||
kwargs["compress"],
|
||||
output_stream=output_stream,
|
||||
)
|
||||
23
src/arc/tee.py
Normal file
23
src/arc/tee.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from typing import TextIO
|
||||
|
||||
|
||||
class Tee:
|
||||
"""
|
||||
Simple tee-like stream that writes everything to multiple underlying streams.
|
||||
|
||||
Typical usage:
|
||||
tee = Tee(sys.stdout, buffer)
|
||||
print("hello", file=tee)
|
||||
"""
|
||||
|
||||
def __init__(self, *streams: TextIO) -> None:
|
||||
self.streams = streams
|
||||
|
||||
def write(self, data: str) -> None:
|
||||
for stream in self.streams:
|
||||
stream.write(data)
|
||||
|
||||
def flush(self) -> None:
|
||||
for stream in self.streams:
|
||||
if hasattr(stream, "flush"):
|
||||
stream.flush()
|
||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
0
tests/unit/__init__.py
Normal file
0
tests/unit/__init__.py
Normal file
199
tests/unit/test_arc.py
Normal file
199
tests/unit/test_arc.py
Normal file
@@ -0,0 +1,199 @@
|
||||
# tests/unit/test_arc.py
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
from contextlib import redirect_stdout
|
||||
|
||||
# Ensure src/ is on sys.path when running via discover
|
||||
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
|
||||
if SRC_ROOT not in sys.path:
|
||||
sys.path.insert(0, SRC_ROOT)
|
||||
|
||||
from arc.code_processor import CodeProcessor
|
||||
from arc.directory_handler import DirectoryHandler
|
||||
|
||||
|
||||
class TestCodeProcessor(unittest.TestCase):
|
||||
def test_python_comment_and_docstring_stripping(self):
|
||||
src = '''\
|
||||
"""module docstring should go away"""
|
||||
|
||||
# a comment
|
||||
x = 1 # inline comment
|
||||
y = "string with # not a comment"
|
||||
|
||||
def f():
|
||||
"""function docstring should go away"""
|
||||
s = """triple quoted but not a docstring"""
|
||||
return x
|
||||
'''
|
||||
out = CodeProcessor.remove_comments(src, ".py")
|
||||
self.assertNotIn("module docstring", out)
|
||||
self.assertNotIn("function docstring", out)
|
||||
self.assertNotIn("# a comment", out)
|
||||
# tolerate whitespace normalization from tokenize.untokenize
|
||||
self.assertRegex(out, r'y\s*=\s*"string with # not a comment"')
|
||||
self.assertIn("triple quoted but not a docstring", out)
|
||||
|
||||
def test_cstyle_comment_stripping(self):
|
||||
src = '''\
|
||||
// line comment
|
||||
int main() {
|
||||
/* block
|
||||
comment */
|
||||
int x = 42; // end comment
|
||||
const char* s = "/* not a comment here */";
|
||||
return x;
|
||||
}
|
||||
'''
|
||||
out = CodeProcessor.remove_comments(src, ".c")
|
||||
# line comment and block comment gone
|
||||
self.assertNotIn("// line comment", out)
|
||||
self.assertNotIn("block\n comment", out)
|
||||
# string content with /* */ inside should remain
|
||||
self.assertIn('const char* s = "/* not a comment here */";', out)
|
||||
|
||||
def test_hash_comment_stripping(self):
|
||||
src = """\
|
||||
# top comment
|
||||
KEY=value # trailing comment should be kept by default
|
||||
plain: value
|
||||
"""
|
||||
out = CodeProcessor.remove_comments(src, ".yml")
|
||||
# Our regex removes full lines starting with optional spaces then '#'
|
||||
self.assertNotIn("top comment", out)
|
||||
# It does not remove trailing fragments after content for hash style
|
||||
self.assertIn("KEY=value", out)
|
||||
self.assertIn("plain: value", out)
|
||||
|
||||
def test_jinja_comment_stripping(self):
|
||||
src = """\
|
||||
{# top jinja comment #}
|
||||
Hello {{ name }}!
|
||||
{#
|
||||
multi-line
|
||||
jinja comment
|
||||
#}
|
||||
Body text and {{ value }}.
|
||||
"""
|
||||
out = CodeProcessor.remove_comments(src, ".j2")
|
||||
self.assertNotIn("top jinja comment", out)
|
||||
self.assertNotIn("multi-line", out)
|
||||
# Regular content and expressions remain
|
||||
self.assertIn("Hello {{ name }}!", out)
|
||||
self.assertIn("Body text and {{ value }}.", out)
|
||||
|
||||
def test_unknown_extension_returns_stripped(self):
|
||||
src = " x = 1 # not removed for unknown "
|
||||
out = CodeProcessor.remove_comments(src, ".unknown")
|
||||
self.assertEqual(out, "x = 1 # not removed for unknown")
|
||||
|
||||
def test_compress_decompress_roundtrip(self):
|
||||
src = "def x():\n return 42\n"
|
||||
blob = CodeProcessor.compress(src)
|
||||
self.assertIsInstance(blob, (bytes, bytearray))
|
||||
back = CodeProcessor.decompress(blob)
|
||||
self.assertEqual(src, back)
|
||||
|
||||
|
||||
class TestDirectoryHandler(unittest.TestCase):
|
||||
def test_is_binary_file(self):
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tf:
|
||||
tf.write(b"\x00\x01\x02BINARY")
|
||||
path = tf.name
|
||||
try:
|
||||
self.assertTrue(DirectoryHandler.is_binary_file(path))
|
||||
finally:
|
||||
os.remove(path)
|
||||
|
||||
def test_gitignore_matching(self):
|
||||
with tempfile.TemporaryDirectory() as root:
|
||||
# Create .gitignore ignoring build/ and *.log
|
||||
gi_dir = os.path.join(root, "a")
|
||||
os.makedirs(gi_dir, exist_ok=True)
|
||||
with open(os.path.join(gi_dir, ".gitignore"), "w") as f:
|
||||
f.write("build/\n*.log\n")
|
||||
|
||||
# Files
|
||||
os.makedirs(os.path.join(gi_dir, "build"), exist_ok=True)
|
||||
ignored_dir_file = os.path.join(gi_dir, "build", "x.txt")
|
||||
with open(ignored_dir_file, "w") as f:
|
||||
f.write("ignored")
|
||||
ignored_log = os.path.join(gi_dir, "debug.log")
|
||||
with open(ignored_log, "w") as f:
|
||||
f.write("ignored log")
|
||||
kept_file = os.path.join(gi_dir, "src.txt")
|
||||
with open(kept_file, "w") as f:
|
||||
f.write("keep me")
|
||||
|
||||
gi_data = DirectoryHandler.load_gitignore_patterns(root)
|
||||
|
||||
self.assertTrue(DirectoryHandler.is_gitignored(ignored_dir_file, gi_data))
|
||||
self.assertTrue(DirectoryHandler.is_gitignored(ignored_log, gi_data))
|
||||
self.assertFalse(DirectoryHandler.is_gitignored(kept_file, gi_data))
|
||||
|
||||
def test_should_print_file_filters_hidden_and_types(self):
|
||||
with tempfile.TemporaryDirectory() as root:
|
||||
hidden = os.path.join(root, ".hidden.txt")
|
||||
plain = os.path.join(root, "keep.py")
|
||||
with open(hidden, "w") as f:
|
||||
f.write("data")
|
||||
with open(plain, "w") as f:
|
||||
f.write("print('hi')")
|
||||
|
||||
self.assertFalse(
|
||||
DirectoryHandler.should_print_file(
|
||||
hidden,
|
||||
file_types=[".py"],
|
||||
ignore_file_strings=[],
|
||||
ignore_hidden=True,
|
||||
path_contains=[],
|
||||
content_contains=[],
|
||||
)
|
||||
)
|
||||
self.assertTrue(
|
||||
DirectoryHandler.should_print_file(
|
||||
plain,
|
||||
file_types=[".py"],
|
||||
ignore_file_strings=[],
|
||||
ignore_hidden=True,
|
||||
path_contains=[],
|
||||
content_contains=[],
|
||||
)
|
||||
)
|
||||
|
||||
def test_print_file_content_no_comments_and_compress(self):
|
||||
with tempfile.TemporaryDirectory() as root:
|
||||
p = os.path.join(root, "t.py")
|
||||
with open(p, "w") as f:
|
||||
f.write("# comment only\nx=1\n")
|
||||
buf = io.StringIO()
|
||||
DirectoryHandler.print_file_content(
|
||||
p,
|
||||
no_comments=True,
|
||||
compress=False,
|
||||
output_stream=buf,
|
||||
)
|
||||
out = buf.getvalue()
|
||||
self.assertIn("<< START:", out)
|
||||
# be whitespace-tolerant (tokenize may insert spaces)
|
||||
self.assertRegex(out, r"x\s*=\s*1")
|
||||
self.assertNotIn("# comment only", out)
|
||||
|
||||
buf = io.StringIO()
|
||||
DirectoryHandler.print_file_content(
|
||||
p,
|
||||
no_comments=True,
|
||||
compress=True,
|
||||
output_stream=buf,
|
||||
)
|
||||
out = buf.getvalue()
|
||||
self.assertIn("COMPRESSED CODE:", out)
|
||||
self.assertIn("<< END >>", out)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
60
tests/unit/test_cli.py
Normal file
60
tests/unit/test_cli.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# tests/unit/test_cli.py
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
# Ensure src/ is on sys.path when running via discover
|
||||
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
|
||||
if SRC_ROOT not in sys.path:
|
||||
sys.path.insert(0, SRC_ROOT)
|
||||
|
||||
from arc.cli import parse_arguments # noqa: E402
|
||||
|
||||
|
||||
class TestCliParseArguments(unittest.TestCase):
|
||||
def test_basic_paths_and_defaults(self):
|
||||
with patch.object(sys, "argv", ["arc", "foo", "bar"]):
|
||||
args = parse_arguments()
|
||||
|
||||
self.assertEqual(args.paths, ["foo", "bar"])
|
||||
self.assertEqual(args.file_types, [])
|
||||
self.assertEqual(args.ignore_file_strings, [])
|
||||
self.assertFalse(args.clipboard)
|
||||
self.assertFalse(args.quiet)
|
||||
# show_hidden default is False → ignore_hidden should be True
|
||||
self.assertFalse(args.show_hidden)
|
||||
self.assertTrue(args.ignore_hidden)
|
||||
|
||||
def test_clipboard_and_quiet_short_flags(self):
|
||||
with patch.object(sys, "argv", ["arc", ".", "-x", "-q"]):
|
||||
args = parse_arguments()
|
||||
|
||||
self.assertTrue(args.clipboard)
|
||||
self.assertTrue(args.quiet)
|
||||
|
||||
def test_ignore_file_strings_short_and_long(self):
|
||||
# Test only the short form -I collecting multiple values
|
||||
with patch.object(
|
||||
sys,
|
||||
"argv",
|
||||
["arc", ".", "-I", "build", "dist", "node_modules"],
|
||||
):
|
||||
args = parse_arguments()
|
||||
|
||||
self.assertEqual(
|
||||
args.ignore_file_strings,
|
||||
["build", "dist", "node_modules"],
|
||||
)
|
||||
|
||||
def test_show_hidden_switches_ignore_hidden_off(self):
|
||||
with patch.object(sys, "argv", ["arc", ".", "--show-hidden"]):
|
||||
args = parse_arguments()
|
||||
|
||||
self.assertTrue(args.show_hidden)
|
||||
self.assertFalse(args.ignore_hidden)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
145
tests/unit/test_main.py
Normal file
145
tests/unit/test_main.py
Normal file
@@ -0,0 +1,145 @@
|
||||
# tests/unit/test_main.py
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import types
|
||||
import unittest
|
||||
from contextlib import redirect_stdout
|
||||
from unittest.mock import patch
|
||||
|
||||
# Ensure src/ is on sys.path when running via discover
|
||||
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
|
||||
if SRC_ROOT not in sys.path:
|
||||
sys.path.insert(0, SRC_ROOT)
|
||||
|
||||
import arc # noqa: E402
|
||||
|
||||
|
||||
class TestArcMain(unittest.TestCase):
|
||||
def _make_args(
|
||||
self,
|
||||
path,
|
||||
clipboard=False,
|
||||
quiet=False,
|
||||
file_types=None,
|
||||
ignore_file_strings=None,
|
||||
ignore_hidden=True,
|
||||
verbose=False,
|
||||
no_comments=False,
|
||||
compress=False,
|
||||
path_contains=None,
|
||||
content_contains=None,
|
||||
no_gitignore=False,
|
||||
scan_binary_files=False,
|
||||
):
|
||||
return types.SimpleNamespace(
|
||||
paths=[path],
|
||||
clipboard=clipboard,
|
||||
quiet=quiet,
|
||||
file_types=file_types or [],
|
||||
ignore_file_strings=ignore_file_strings or [],
|
||||
ignore_hidden=ignore_hidden,
|
||||
show_hidden=not ignore_hidden,
|
||||
verbose=verbose,
|
||||
no_comments=no_comments,
|
||||
compress=compress,
|
||||
path_contains=path_contains or [],
|
||||
content_contains=content_contains or [],
|
||||
no_gitignore=no_gitignore,
|
||||
scan_binary_files=scan_binary_files,
|
||||
)
|
||||
|
||||
@patch("arc.subprocess.run")
|
||||
@patch("arc.DirectoryHandler.handle_directory")
|
||||
@patch("arc.parse_arguments")
|
||||
def test_main_clipboard_calls_xclip_and_uses_tee(
|
||||
self, mock_parse_arguments, mock_handle_directory, mock_run
|
||||
):
|
||||
# create a temporary directory as scan target
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
args = self._make_args(path=tmpdir, clipboard=True, quiet=False)
|
||||
mock_parse_arguments.return_value = args
|
||||
|
||||
def fake_handle_directory(path, **kwargs):
|
||||
out = kwargs["output_stream"]
|
||||
# should be a Tee instance
|
||||
self.assertEqual(out.__class__.__name__, "Tee")
|
||||
out.write("FROM ARC\n")
|
||||
|
||||
mock_handle_directory.side_effect = fake_handle_directory
|
||||
|
||||
buf = io.StringIO()
|
||||
with redirect_stdout(buf):
|
||||
arc.main()
|
||||
|
||||
# stdout should contain the text once (via Tee -> sys.stdout)
|
||||
stdout_value = buf.getvalue()
|
||||
self.assertIn("FROM ARC", stdout_value)
|
||||
|
||||
# xclip should have been called with the same text in input
|
||||
mock_run.assert_called_once()
|
||||
called_args, called_kwargs = mock_run.call_args
|
||||
self.assertEqual(called_args[0], ["xclip", "-selection", "clipboard"])
|
||||
self.assertIn("FROM ARC", called_kwargs.get("input", ""))
|
||||
|
||||
@patch("arc.subprocess.run")
|
||||
@patch("arc.DirectoryHandler.handle_directory")
|
||||
@patch("arc.parse_arguments")
|
||||
def test_main_clipboard_quiet_only_clipboard_no_stdout(
|
||||
self, mock_parse_arguments, mock_handle_directory, mock_run
|
||||
):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
args = self._make_args(path=tmpdir, clipboard=True, quiet=True)
|
||||
mock_parse_arguments.return_value = args
|
||||
|
||||
def fake_handle_directory(path, **kwargs):
|
||||
out = kwargs["output_stream"]
|
||||
# quiet + clipboard → output_stream is a buffer (StringIO)
|
||||
self.assertIsInstance(out, io.StringIO)
|
||||
out.write("SILENT CONTENT\n")
|
||||
|
||||
mock_handle_directory.side_effect = fake_handle_directory
|
||||
|
||||
buf = io.StringIO()
|
||||
# stdout should stay empty
|
||||
with redirect_stdout(buf):
|
||||
arc.main()
|
||||
|
||||
stdout_value = buf.getvalue()
|
||||
self.assertEqual(stdout_value, "")
|
||||
|
||||
mock_run.assert_called_once()
|
||||
called_args, called_kwargs = mock_run.call_args
|
||||
self.assertEqual(called_args[0], ["xclip", "-selection", "clipboard"])
|
||||
self.assertIn("SILENT CONTENT", called_kwargs.get("input", ""))
|
||||
|
||||
@patch("arc.DirectoryHandler.handle_directory")
|
||||
@patch("arc.parse_arguments")
|
||||
def test_main_quiet_without_clipboard_uses_nullwriter(
|
||||
self, mock_parse_arguments, mock_handle_directory
|
||||
):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
args = self._make_args(path=tmpdir, clipboard=False, quiet=True)
|
||||
mock_parse_arguments.return_value = args
|
||||
|
||||
def fake_handle_directory(path, **kwargs):
|
||||
out = kwargs["output_stream"]
|
||||
# quiet without clipboard → internal NullWriter class
|
||||
self.assertEqual(out.__class__.__name__, "NullWriter")
|
||||
# writing should not raise
|
||||
out.write("SHOULD NOT APPEAR ANYWHERE\n")
|
||||
|
||||
mock_handle_directory.side_effect = fake_handle_directory
|
||||
|
||||
buf = io.StringIO()
|
||||
with redirect_stdout(buf):
|
||||
arc.main()
|
||||
|
||||
# Nothing should be printed to stdout
|
||||
self.assertEqual(buf.getvalue(), "")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
54
tests/unit/test_tee.py
Normal file
54
tests/unit/test_tee.py
Normal file
@@ -0,0 +1,54 @@
|
||||
# tests/unit/test_tee.py
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
# Ensure src/ is on sys.path when running via discover
|
||||
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||
SRC_ROOT = os.path.join(PROJECT_ROOT, "src")
|
||||
if SRC_ROOT not in sys.path:
|
||||
sys.path.insert(0, SRC_ROOT)
|
||||
|
||||
from arc.tee import Tee # noqa: E402
|
||||
|
||||
|
||||
class TestTee(unittest.TestCase):
|
||||
def test_write_writes_to_all_streams(self):
|
||||
buf1 = io.StringIO()
|
||||
buf2 = io.StringIO()
|
||||
|
||||
tee = Tee(buf1, buf2)
|
||||
tee.write("hello")
|
||||
tee.write(" world")
|
||||
|
||||
self.assertEqual(buf1.getvalue(), "hello world")
|
||||
self.assertEqual(buf2.getvalue(), "hello world")
|
||||
|
||||
def test_flush_flushes_all_streams(self):
|
||||
class DummyStream:
|
||||
def __init__(self):
|
||||
self.flushed = False
|
||||
self.data = ""
|
||||
|
||||
def write(self, s):
|
||||
self.data += s
|
||||
|
||||
def flush(self):
|
||||
self.flushed = True
|
||||
|
||||
s1 = DummyStream()
|
||||
s2 = DummyStream()
|
||||
|
||||
tee = Tee(s1, s2)
|
||||
tee.write("x")
|
||||
tee.flush()
|
||||
|
||||
self.assertTrue(s1.flushed)
|
||||
self.assertTrue(s2.flushed)
|
||||
self.assertEqual(s1.data, "x")
|
||||
self.assertEqual(s2.data, "x")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user