mirror of
https://github.com/kevinveenbirkenbach/directory-content-scanner.git
synced 2025-11-19 19:26:39 +00:00
Compare commits
1 Commits
main
...
74651bb880
| Author | SHA1 | Date | |
|---|---|---|---|
| 74651bb880 |
7
.github/FUNDING.yml
vendored
7
.github/FUNDING.yml
vendored
@@ -1,7 +0,0 @@
|
|||||||
github: kevinveenbirkenbach
|
|
||||||
|
|
||||||
patreon: kevinveenbirkenbach
|
|
||||||
|
|
||||||
buy_me_a_coffee: kevinveenbirkenbach
|
|
||||||
|
|
||||||
custom: https://s.veen.world/paypaldonate
|
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1 +0,0 @@
|
|||||||
*__pycache__*
|
|
||||||
17
Makefile
17
Makefile
@@ -1,17 +0,0 @@
|
|||||||
# Makefile for ARC
|
|
||||||
.PHONY: test install help
|
|
||||||
|
|
||||||
help:
|
|
||||||
@echo "Targets:"
|
|
||||||
@echo " make test - Run unit tests"
|
|
||||||
@echo " make install - Show how to install via Kevin's Package Manager"
|
|
||||||
|
|
||||||
test:
|
|
||||||
@python -m unittest discover -s tests -p "test_*.py" -t .
|
|
||||||
|
|
||||||
install:
|
|
||||||
@echo "ARC is distributed via Kevin's Package Manager."
|
|
||||||
@echo "Install it with:"
|
|
||||||
@echo " package-manager install arc"
|
|
||||||
@echo ""
|
|
||||||
@echo "(This 'make install' does not perform any other actions.)"
|
|
||||||
@@ -1,6 +1,4 @@
|
|||||||
# 🤖👩🔬 Analysis-Ready Code (ARC)
|
# 🤖👩🔬 Analysis-Ready Code (ARC)
|
||||||
[](https://github.com/sponsors/kevinveenbirkenbach) [](https://www.patreon.com/c/kevinveenbirkenbach) [](https://buymeacoffee.com/kevinveenbirkenbach) [](https://s.veen.world/paypaldonate)
|
|
||||||
|
|
||||||
|
|
||||||
Analysis-Ready Code (ARC) is a Python-based utility designed to recursively scan directories and transform source code into a format optimized for AI and computer analysis. By stripping comments, filtering specific file types, and optionally compressing content, ARC ensures that your code is clean and ready for automated processing.
|
Analysis-Ready Code (ARC) is a Python-based utility designed to recursively scan directories and transform source code into a format optimized for AI and computer analysis. By stripping comments, filtering specific file types, and optionally compressing content, ARC ensures that your code is clean and ready for automated processing.
|
||||||
|
|
||||||
|
|||||||
71
cli.py
71
cli.py
@@ -1,71 +0,0 @@
|
|||||||
import argparse
|
|
||||||
|
|
||||||
def parse_arguments():
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Scan directories and print/compile file contents."
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"paths",
|
|
||||||
nargs='+',
|
|
||||||
help="List of files or directories to scan."
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-t", "--file-types",
|
|
||||||
nargs='+',
|
|
||||||
default=[],
|
|
||||||
help="Filter by file types (e.g., .txt, .log)."
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-x", "--ignore-file-strings",
|
|
||||||
nargs='+',
|
|
||||||
default=[],
|
|
||||||
help="Ignore files and folders containing these strings."
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-S", "--show-hidden",
|
|
||||||
action='store_true',
|
|
||||||
dest='show_hidden',
|
|
||||||
default=False,
|
|
||||||
help="Include hidden directories and files in the scan."
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-v", "--verbose",
|
|
||||||
action='store_true',
|
|
||||||
help="Enable verbose mode."
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-N", "--no-comments",
|
|
||||||
action='store_true',
|
|
||||||
help="Remove comments from the displayed content based on file type."
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-z", "--compress",
|
|
||||||
action='store_true',
|
|
||||||
help="Compress code (for supported file types)."
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-p", "--path-contains",
|
|
||||||
nargs='+',
|
|
||||||
default=[],
|
|
||||||
help="Display files whose paths contain one of these strings."
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-C", "--content-contains",
|
|
||||||
nargs='+',
|
|
||||||
default=[],
|
|
||||||
help="Display files containing one of these strings in their content."
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-G", "--no-gitignore",
|
|
||||||
action='store_true',
|
|
||||||
help="Do not respect .gitignore files during scan."
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-b", "--scan-binary-files",
|
|
||||||
action='store_true',
|
|
||||||
help="Scan binary files as well (by default these are ignored)."
|
|
||||||
)
|
|
||||||
# Convert show_hidden to ignore_hidden for downstream use
|
|
||||||
args = parser.parse_args()
|
|
||||||
args.ignore_hidden = not args.show_hidden
|
|
||||||
return args
|
|
||||||
@@ -1,285 +0,0 @@
|
|||||||
import re
|
|
||||||
import zlib
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import Dict, Tuple, Pattern, Optional
|
|
||||||
import io
|
|
||||||
import tokenize
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class LanguageSpec:
|
|
||||||
"""Holds compiled comment patterns for a language."""
|
|
||||||
patterns: Tuple[Pattern, ...]
|
|
||||||
|
|
||||||
|
|
||||||
class CodeProcessor:
|
|
||||||
"""
|
|
||||||
Utilities to strip comments and (de)compress code.
|
|
||||||
- Python: tokenize-based (safe) with precise docstring removal.
|
|
||||||
- C/CPP/JS: state-machine comment stripper that respects string/char literals.
|
|
||||||
- Shell/YAML: remove full-line hash comments only.
|
|
||||||
- Jinja: remove {# ... #} blocks.
|
|
||||||
"""
|
|
||||||
# File extensions (normalized to lowercase)
|
|
||||||
EXT_TO_LANG: Dict[str, str] = {
|
|
||||||
".py": "python",
|
|
||||||
".js": "cstyle",
|
|
||||||
".c": "cstyle",
|
|
||||||
".cpp": "cstyle",
|
|
||||||
".h": "cstyle",
|
|
||||||
".sh": "hash",
|
|
||||||
".bash": "hash",
|
|
||||||
".yml": "hash",
|
|
||||||
".yaml": "hash",
|
|
||||||
".j2": "jinja",
|
|
||||||
".jinja": "jinja",
|
|
||||||
".jinja2": "jinja",
|
|
||||||
".tpl": "jinja",
|
|
||||||
}
|
|
||||||
|
|
||||||
# Regex-based specs for hash and jinja
|
|
||||||
_HASH = LanguageSpec(patterns=(
|
|
||||||
re.compile(r"^\s*#.*$", flags=re.MULTILINE), # only full-line comments
|
|
||||||
))
|
|
||||||
_JINJA = LanguageSpec(patterns=(
|
|
||||||
re.compile(r"\{#.*?#\}", flags=re.DOTALL), # {# ... #} across lines
|
|
||||||
))
|
|
||||||
|
|
||||||
LANG_SPECS: Dict[str, LanguageSpec] = {
|
|
||||||
"hash": _HASH,
|
|
||||||
"jinja": _JINJA,
|
|
||||||
# "cstyle" handled by a state machine, not regex
|
|
||||||
# "python" handled by tokenize, not regex
|
|
||||||
}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _lang_from_ext(cls, file_type: str) -> Optional[str]:
|
|
||||||
"""Map an extension like '.py' to an internal language key."""
|
|
||||||
ext = file_type.lower().strip()
|
|
||||||
return cls.EXT_TO_LANG.get(ext)
|
|
||||||
|
|
||||||
# -----------------------------
|
|
||||||
# Python stripping via tokenize
|
|
||||||
# -----------------------------
|
|
||||||
@staticmethod
|
|
||||||
def _strip_python_comments_tokenize(content: str) -> str:
|
|
||||||
"""
|
|
||||||
Remove comments and docstrings safely using tokenize.
|
|
||||||
Rules:
|
|
||||||
- Drop COMMENT tokens.
|
|
||||||
- Drop module docstring only if it's the very first statement at col 0.
|
|
||||||
- Drop the first STRING statement in a suite immediately after 'def'/'class'
|
|
||||||
header (':' NEWLINE INDENT).
|
|
||||||
"""
|
|
||||||
tokens = tokenize.generate_tokens(io.StringIO(content).readline)
|
|
||||||
out_tokens = []
|
|
||||||
|
|
||||||
indent_level = 0
|
|
||||||
module_docstring_candidate = True # until we see first real stmt at module level
|
|
||||||
expect_suite_docstring = False # just entered a suite after def/class
|
|
||||||
last_was_colon = False
|
|
||||||
seen_nontrivial_in_line = False # guards module docstring (start of logical line)
|
|
||||||
|
|
||||||
for tok_type, tok_str, start, end, line in tokens:
|
|
||||||
# Track indentation
|
|
||||||
if tok_type == tokenize.INDENT:
|
|
||||||
indent_level += 1
|
|
||||||
elif tok_type == tokenize.DEDENT:
|
|
||||||
indent_level = max(0, indent_level - 1)
|
|
||||||
|
|
||||||
# New logical line: reset guard
|
|
||||||
if tok_type in (tokenize.NEWLINE, tokenize.NL):
|
|
||||||
seen_nontrivial_in_line = False
|
|
||||||
out_tokens.append((tok_type, tok_str))
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Comments are dropped
|
|
||||||
if tok_type == tokenize.COMMENT:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Detect ':' ending a def/class header
|
|
||||||
if tok_type == tokenize.OP and tok_str == ":":
|
|
||||||
last_was_colon = True
|
|
||||||
out_tokens.append((tok_type, tok_str))
|
|
||||||
continue
|
|
||||||
|
|
||||||
# After ':' + NEWLINE + INDENT comes a suite start -> allow docstring removal
|
|
||||||
if tok_type == tokenize.INDENT and last_was_colon:
|
|
||||||
expect_suite_docstring = True
|
|
||||||
last_was_colon = False
|
|
||||||
out_tokens.append((tok_type, tok_str))
|
|
||||||
continue
|
|
||||||
# Any non-INDENT token clears the last_was_colon flag
|
|
||||||
if tok_type != tokenize.NL:
|
|
||||||
last_was_colon = False
|
|
||||||
|
|
||||||
# STRING handling
|
|
||||||
if tok_type == tokenize.STRING:
|
|
||||||
at_line_start = (start[1] == 0) and not seen_nontrivial_in_line
|
|
||||||
if indent_level == 0:
|
|
||||||
# Potential module docstring only if first statement at col 0
|
|
||||||
if module_docstring_candidate and at_line_start:
|
|
||||||
module_docstring_candidate = False
|
|
||||||
# drop it
|
|
||||||
continue
|
|
||||||
# Any other top-level string is normal
|
|
||||||
module_docstring_candidate = False
|
|
||||||
out_tokens.append((tok_type, tok_str))
|
|
||||||
seen_nontrivial_in_line = True
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
# In a suite: if it's the first statement after def/class, drop regardless of column
|
|
||||||
if expect_suite_docstring:
|
|
||||||
expect_suite_docstring = False
|
|
||||||
# drop it
|
|
||||||
continue
|
|
||||||
expect_suite_docstring = False
|
|
||||||
out_tokens.append((tok_type, tok_str))
|
|
||||||
seen_nontrivial_in_line = True
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Any other significant token disables module-docstring candidacy
|
|
||||||
if tok_type not in (tokenize.INDENT, tokenize.DEDENT):
|
|
||||||
if indent_level == 0:
|
|
||||||
module_docstring_candidate = False
|
|
||||||
# Mark we've seen something on this line
|
|
||||||
if tok_type not in (tokenize.NL, tokenize.NEWLINE):
|
|
||||||
seen_nontrivial_in_line = True
|
|
||||||
|
|
||||||
out_tokens.append((tok_type, tok_str))
|
|
||||||
|
|
||||||
return tokenize.untokenize(out_tokens)
|
|
||||||
|
|
||||||
# ---------------------------------
|
|
||||||
# C-style stripping via state machine
|
|
||||||
# ---------------------------------
|
|
||||||
@staticmethod
|
|
||||||
def _strip_cstyle_comments(content: str) -> str:
|
|
||||||
"""
|
|
||||||
Remove // line comments and /* ... */ block comments while preserving
|
|
||||||
string ("...") and char ('...') literals and their escape sequences.
|
|
||||||
"""
|
|
||||||
i = 0
|
|
||||||
n = len(content)
|
|
||||||
out = []
|
|
||||||
in_line_comment = False
|
|
||||||
in_block_comment = False
|
|
||||||
in_string = False
|
|
||||||
in_char = False
|
|
||||||
escape = False
|
|
||||||
|
|
||||||
while i < n:
|
|
||||||
c = content[i]
|
|
||||||
nxt = content[i + 1] if i + 1 < n else ""
|
|
||||||
|
|
||||||
# If inside line comment: consume until newline
|
|
||||||
if in_line_comment:
|
|
||||||
if c == "\n":
|
|
||||||
in_line_comment = False
|
|
||||||
out.append(c)
|
|
||||||
i += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# If inside block comment: consume until '*/'
|
|
||||||
if in_block_comment:
|
|
||||||
if c == "*" and nxt == "/":
|
|
||||||
in_block_comment = False
|
|
||||||
i += 2
|
|
||||||
else:
|
|
||||||
i += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# If inside string literal
|
|
||||||
if in_string:
|
|
||||||
out.append(c)
|
|
||||||
if escape:
|
|
||||||
escape = False
|
|
||||||
else:
|
|
||||||
if c == "\\":
|
|
||||||
escape = True
|
|
||||||
elif c == '"':
|
|
||||||
in_string = False
|
|
||||||
i += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# If inside char literal
|
|
||||||
if in_char:
|
|
||||||
out.append(c)
|
|
||||||
if escape:
|
|
||||||
escape = False
|
|
||||||
else:
|
|
||||||
if c == "\\":
|
|
||||||
escape = True
|
|
||||||
elif c == "'":
|
|
||||||
in_char = False
|
|
||||||
i += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Not in any special state:
|
|
||||||
# Check for start of comments
|
|
||||||
if c == "/" and nxt == "/":
|
|
||||||
in_line_comment = True
|
|
||||||
i += 2
|
|
||||||
continue
|
|
||||||
if c == "/" and nxt == "*":
|
|
||||||
in_block_comment = True
|
|
||||||
i += 2
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check for start of string/char literals
|
|
||||||
if c == '"':
|
|
||||||
in_string = True
|
|
||||||
out.append(c)
|
|
||||||
i += 1
|
|
||||||
continue
|
|
||||||
if c == "'":
|
|
||||||
in_char = True
|
|
||||||
out.append(c)
|
|
||||||
i += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Normal character
|
|
||||||
out.append(c)
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
return "".join(out)
|
|
||||||
|
|
||||||
# -------------------
|
|
||||||
# Public API
|
|
||||||
# -------------------
|
|
||||||
@classmethod
|
|
||||||
def remove_comments(cls, content: str, file_type: str) -> str:
|
|
||||||
"""
|
|
||||||
Remove comments based on file type/extension.
|
|
||||||
- Python: tokenize-based
|
|
||||||
- C/CPP/JS: state-machine
|
|
||||||
- Hash (sh/yaml): regex full-line
|
|
||||||
- Jinja: regex {# ... #}
|
|
||||||
"""
|
|
||||||
lang = cls._lang_from_ext(file_type)
|
|
||||||
if lang is None:
|
|
||||||
return content.strip()
|
|
||||||
|
|
||||||
if lang == "python":
|
|
||||||
return cls._strip_python_comments_tokenize(content).strip()
|
|
||||||
if lang == "cstyle":
|
|
||||||
return cls._strip_cstyle_comments(content).strip()
|
|
||||||
|
|
||||||
spec = cls.LANG_SPECS.get(lang)
|
|
||||||
if not spec:
|
|
||||||
return content.strip()
|
|
||||||
|
|
||||||
cleaned = content
|
|
||||||
for pat in spec.patterns:
|
|
||||||
cleaned = pat.sub("", cleaned)
|
|
||||||
return cleaned.strip()
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def compress(content: str, level: int = 9) -> bytes:
|
|
||||||
"""Compress code using zlib. Returns bytes."""
|
|
||||||
return zlib.compress(content.encode("utf-8"), level)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def decompress(blob: bytes) -> str:
|
|
||||||
"""Decompress zlib-compressed code back to text."""
|
|
||||||
return zlib.decompress(blob).decode("utf-8")
|
|
||||||
@@ -1,193 +0,0 @@
|
|||||||
import os
|
|
||||||
import fnmatch
|
|
||||||
from code_processor import CodeProcessor
|
|
||||||
|
|
||||||
class DirectoryHandler:
|
|
||||||
@staticmethod
|
|
||||||
def load_gitignore_patterns(root_path):
|
|
||||||
"""
|
|
||||||
Recursively scans for .gitignore files in the given root_path.
|
|
||||||
Returns a list of tuples (base_dir, patterns) where:
|
|
||||||
- base_dir: the directory in which the .gitignore was found.
|
|
||||||
- patterns: a list of pattern strings from that .gitignore.
|
|
||||||
"""
|
|
||||||
gitignore_data = []
|
|
||||||
for dirpath, _, filenames in os.walk(root_path):
|
|
||||||
if '.gitignore' in filenames:
|
|
||||||
gitignore_path = os.path.join(dirpath, '.gitignore')
|
|
||||||
try:
|
|
||||||
with open(gitignore_path, 'r') as f:
|
|
||||||
lines = f.readlines()
|
|
||||||
# Filter out empty lines and comments.
|
|
||||||
patterns = [line.strip() for line in lines if line.strip() and not line.strip().startswith('#')]
|
|
||||||
# Save the base directory and its patterns.
|
|
||||||
gitignore_data.append((dirpath, patterns))
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error reading {gitignore_path}: {e}")
|
|
||||||
return gitignore_data
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def is_binary_file(file_path):
|
|
||||||
"""
|
|
||||||
Reads the first 1024 bytes of file_path and heuristically determines
|
|
||||||
if the file appears to be binary. This method returns True if a null byte
|
|
||||||
is found or if more than 30% of the bytes in the sample are non-text.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
with open(file_path, 'rb') as f:
|
|
||||||
chunk = f.read(1024)
|
|
||||||
# If there's a null byte, it's almost certainly binary.
|
|
||||||
if b'\x00' in chunk:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Define a set of text characters (ASCII printable + common control characters)
|
|
||||||
text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x7F)))
|
|
||||||
# Count non-text characters in the chunk.
|
|
||||||
non_text = sum(byte not in text_chars for byte in chunk)
|
|
||||||
if len(chunk) > 0 and (non_text / len(chunk)) > 0.30:
|
|
||||||
return True
|
|
||||||
except Exception:
|
|
||||||
# If the file cannot be read in binary mode, assume it's not binary.
|
|
||||||
return False
|
|
||||||
return False
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def is_gitignored(file_path, gitignore_data):
|
|
||||||
"""
|
|
||||||
Checks if file_path should be ignored according to the .gitignore entries.
|
|
||||||
For each tuple (base_dir, patterns), if file_path is under base_dir,
|
|
||||||
computes the relative path and matches it against the patterns.
|
|
||||||
"""
|
|
||||||
for base_dir, patterns in gitignore_data:
|
|
||||||
try:
|
|
||||||
rel_path = os.path.relpath(file_path, base_dir)
|
|
||||||
except ValueError:
|
|
||||||
# file_path and base_dir are on different drives.
|
|
||||||
continue
|
|
||||||
# If the file is not under the current .gitignore base_dir, skip it.
|
|
||||||
if rel_path.startswith('..'):
|
|
||||||
continue
|
|
||||||
# Check all patterns.
|
|
||||||
for pattern in patterns:
|
|
||||||
if pattern.endswith('/'):
|
|
||||||
# Directory pattern: check if any folder in the relative path matches.
|
|
||||||
parts = rel_path.split(os.sep)
|
|
||||||
for part in parts[:-1]:
|
|
||||||
if fnmatch.fnmatch(part + '/', pattern):
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
# Check if the relative path matches the pattern.
|
|
||||||
if fnmatch.fnmatch(rel_path, pattern):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def filter_directories(dirs, ignore_file_strings, ignore_hidden):
|
|
||||||
"""
|
|
||||||
Filter out directories based on ignore_file_strings and hidden status.
|
|
||||||
"""
|
|
||||||
if ignore_hidden:
|
|
||||||
dirs[:] = [d for d in dirs if not d.startswith('.')]
|
|
||||||
dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_file_strings)]
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def path_or_content_contains(file_path, path_contains, content_contains):
|
|
||||||
"""
|
|
||||||
Check if the file path contains specific strings or if the file content does.
|
|
||||||
"""
|
|
||||||
if path_contains and any(whitelist_str in file_path for whitelist_str in path_contains):
|
|
||||||
return True
|
|
||||||
|
|
||||||
if content_contains:
|
|
||||||
try:
|
|
||||||
with open(file_path, 'r') as f:
|
|
||||||
content = f.read()
|
|
||||||
if any(whitelist_str in content for whitelist_str in content_contains):
|
|
||||||
return True
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
return False
|
|
||||||
return False
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def should_print_file(file_path, file_types, ignore_file_strings, ignore_hidden, path_contains, content_contains, scan_binary_files=False):
|
|
||||||
"""
|
|
||||||
Determines if a file should be printed based on various criteria.
|
|
||||||
By default, binary files are skipped unless scan_binary_files is True.
|
|
||||||
"""
|
|
||||||
# Check binary file status using our heuristic.
|
|
||||||
if not scan_binary_files and DirectoryHandler.is_binary_file(file_path):
|
|
||||||
return False
|
|
||||||
|
|
||||||
if ignore_hidden and os.path.basename(file_path).startswith('.'):
|
|
||||||
return False
|
|
||||||
|
|
||||||
if file_types and not any(file_path.endswith(ft) for ft in file_types):
|
|
||||||
return False
|
|
||||||
|
|
||||||
if any(ignore_str in file_path for ignore_str in ignore_file_strings):
|
|
||||||
return False
|
|
||||||
|
|
||||||
if path_contains or content_contains:
|
|
||||||
return DirectoryHandler.path_or_content_contains(file_path, path_contains, content_contains)
|
|
||||||
return True
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def print_file_content(file_path, no_comments, compress):
|
|
||||||
"""
|
|
||||||
Prints the content of a file, optionally removing comments or compressing the output.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
with open(file_path, 'r') as f:
|
|
||||||
content = f.read()
|
|
||||||
if no_comments:
|
|
||||||
file_type = os.path.splitext(file_path)[1]
|
|
||||||
content = CodeProcessor.remove_comments(content, file_type)
|
|
||||||
print(f"<< START: {file_path} >>")
|
|
||||||
if compress:
|
|
||||||
compressed_content = CodeProcessor.compress(content)
|
|
||||||
print("COMPRESSED CODE:")
|
|
||||||
print(compressed_content)
|
|
||||||
else:
|
|
||||||
print(content)
|
|
||||||
print("<< END >>\n")
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
print(f"Warning: Could not read file due to encoding issues: {file_path}")
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def handle_directory(directory, **kwargs):
|
|
||||||
"""
|
|
||||||
Scans the directory and processes each file while respecting .gitignore rules.
|
|
||||||
"""
|
|
||||||
gitignore_data = []
|
|
||||||
if not kwargs.get('no_gitignore'):
|
|
||||||
gitignore_data = DirectoryHandler.load_gitignore_patterns(directory)
|
|
||||||
|
|
||||||
for root, dirs, files in os.walk(directory):
|
|
||||||
DirectoryHandler.filter_directories(dirs, kwargs['ignore_file_strings'], kwargs['ignore_hidden'])
|
|
||||||
for file in files:
|
|
||||||
file_path = os.path.join(root, file)
|
|
||||||
if gitignore_data and DirectoryHandler.is_gitignored(file_path, gitignore_data):
|
|
||||||
if kwargs.get('verbose'):
|
|
||||||
print(f"Skipped (gitignored): {file_path}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if DirectoryHandler.should_print_file(
|
|
||||||
file_path,
|
|
||||||
kwargs['file_types'],
|
|
||||||
kwargs['ignore_file_strings'],
|
|
||||||
kwargs['ignore_hidden'],
|
|
||||||
kwargs['path_contains'],
|
|
||||||
kwargs['content_contains'],
|
|
||||||
scan_binary_files=kwargs.get('scan_binary_files', False)
|
|
||||||
):
|
|
||||||
DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress'])
|
|
||||||
elif kwargs.get('verbose'):
|
|
||||||
print(f"Skipped file: {file_path}")
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def handle_file(file_path, **kwargs):
|
|
||||||
"""
|
|
||||||
Processes an individual file.
|
|
||||||
"""
|
|
||||||
DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress'])
|
|
||||||
208
main.py
208
main.py
@@ -1,49 +1,185 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import argparse
|
||||||
from cli import parse_arguments
|
import re
|
||||||
from directory_handler import DirectoryHandler
|
import zlib
|
||||||
|
|
||||||
|
class CodeProcessor:
|
||||||
|
PYTHON = ".py"
|
||||||
|
JS = ".js"
|
||||||
|
C = ".c"
|
||||||
|
CPP = ".cpp"
|
||||||
|
H = ".h"
|
||||||
|
BASH = ".sh"
|
||||||
|
SHELL = ".bash"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def remove_comments(content, file_type):
|
||||||
|
"""Remove comments based on file type."""
|
||||||
|
comment_patterns = {
|
||||||
|
CodeProcessor.PYTHON: [
|
||||||
|
(r'\s*#.*', '',0),
|
||||||
|
(r'\"\"\"(.*?)\"\"\"', '', re.DOTALL),
|
||||||
|
(r"\'\'\'(.*?)\'\'\'", '', re.DOTALL)
|
||||||
|
],
|
||||||
|
CodeProcessor.JS: [
|
||||||
|
(r'\s*//.*', '',0),
|
||||||
|
(r'/\*.*?\*/', '',0)
|
||||||
|
],
|
||||||
|
CodeProcessor.C: [
|
||||||
|
(r'\s*//.*', '',0),
|
||||||
|
(r'/\*.*?\*/', '',0)
|
||||||
|
],
|
||||||
|
CodeProcessor.CPP: [
|
||||||
|
(r'\s*//.*', '',0),
|
||||||
|
(r'/\*.*?\*/', '',0)
|
||||||
|
],
|
||||||
|
CodeProcessor.H: [
|
||||||
|
(r'\s*//.*', '',0),
|
||||||
|
(r'/\*.*?\*/', '',0)
|
||||||
|
],
|
||||||
|
CodeProcessor.BASH: [
|
||||||
|
(r'\s*#.*', '', 0)
|
||||||
|
],
|
||||||
|
CodeProcessor.SHELL: [
|
||||||
|
(r'\s*#.*', '', 0)
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
patterns = comment_patterns.get(file_type, [])
|
||||||
|
for pattern, repl, flags in patterns:
|
||||||
|
content = re.sub(pattern, repl, content, flags=flags)
|
||||||
|
return content.strip()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def compress(content):
|
||||||
|
"""Compress code using zlib."""
|
||||||
|
return zlib.compress(content.encode())
|
||||||
|
|
||||||
|
|
||||||
|
class DirectoryHandler:
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def filter_directories(dirs, ignore_file_strings, ignore_hidden):
|
||||||
|
"""Filter out directories based on ignore criteria."""
|
||||||
|
if ignore_hidden:
|
||||||
|
dirs[:] = [d for d in dirs if not d.startswith('.')]
|
||||||
|
dirs[:] = [d for d in dirs if not any(ig in d for ig in ignore_file_strings)]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def path_or_content_contains(file_path, path_contains, content_contains):
|
||||||
|
# Check if the file name contains specific strings (whitelist)
|
||||||
|
if path_contains and any(whitelist_str in file_path for whitelist_str in path_contains):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Check file content for specific strings (if specified)
|
||||||
|
if content_contains:
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r') as f:
|
||||||
|
content = f.read()
|
||||||
|
# Return True if any of the content_contains strings are found in the content
|
||||||
|
if any(whitelist_str in content for whitelist_str in content_contains):
|
||||||
|
return True
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
# Return False if there's a Unicode decode error (file can't be read)
|
||||||
|
return False
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def should_print_file(file_path, file_types, ignore_file_strings, ignore_hidden, path_contains, content_contains):
|
||||||
|
"""
|
||||||
|
Determine if a file should be printed based on various criteria.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path (str): The path of the file to be checked.
|
||||||
|
file_types (list): List of allowed file extensions.
|
||||||
|
ignore_file_strings (list): List of strings; if any are found in the file path, the file is ignored.
|
||||||
|
ignore_hidden (bool): If True, hidden files (starting with '.') are ignored.
|
||||||
|
path_contains (list): List of strings; the file is processed only if its path contains one of these strings.
|
||||||
|
content_contains (list): List of strings; the file is processed only if its content contains one of these strings.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the file should be printed, False otherwise.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Check for hidden files if ignore_hidden is enabled
|
||||||
|
if ignore_hidden and os.path.basename(file_path).startswith('.'):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check if the file type is in the allowed list (if specified)
|
||||||
|
if file_types and not any(file_path.endswith(file_type) for file_type in file_types):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check if the file should be ignored based on the presence of specific strings in its path
|
||||||
|
if any(ignore_str in file_path for ignore_str in ignore_file_strings):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if path_contains or content_contains:
|
||||||
|
return DirectoryHandler.path_or_content_contains(file_path, path_contains, content_contains)
|
||||||
|
return True
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def print_file_content(file_path, no_comments, compress):
|
||||||
|
"""Print the content of a file."""
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r') as f:
|
||||||
|
content = f.read()
|
||||||
|
if no_comments:
|
||||||
|
file_type = os.path.splitext(file_path)[1]
|
||||||
|
content = CodeProcessor.remove_comments(content, file_type)
|
||||||
|
print(f"<< START: {file_path} >>")
|
||||||
|
if compress:
|
||||||
|
compressed_content = CodeProcessor.compress(content)
|
||||||
|
print(f"COMPRESSED CODE: ")
|
||||||
|
print(compressed_content)
|
||||||
|
else:
|
||||||
|
print(content)
|
||||||
|
print("<< END >>\n")
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
print(f"Warning: Could not read file due to encoding issues: {file_path}")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def handle_directory(directory, **kwargs):
|
||||||
|
"""Handle scanning and printing for directories."""
|
||||||
|
for root, dirs, files in os.walk(directory):
|
||||||
|
DirectoryHandler.filter_directories(dirs, kwargs['ignore_file_strings'], kwargs['ignore_hidden'])
|
||||||
|
for file in files:
|
||||||
|
if DirectoryHandler.should_print_file(os.path.join(root, file), kwargs['file_types'], kwargs['ignore_file_strings'], kwargs['ignore_hidden'], kwargs['path_contains'], kwargs['content_contains']):
|
||||||
|
DirectoryHandler.print_file_content(os.path.join(root, file), kwargs['no_comments'], kwargs['compress'])
|
||||||
|
elif kwargs['verbose']:
|
||||||
|
print(f"Skipped file: {file}")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def handle_file(file_path, **kwargs):
|
||||||
|
"""Handle scanning and printing for individual files."""
|
||||||
|
DirectoryHandler.print_file_content(file_path, kwargs['no_comments'], kwargs['compress'])
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
args = parse_arguments()
|
parser = argparse.ArgumentParser(description="Scan directories and print/compile file contents.")
|
||||||
|
parser.add_argument("paths", nargs='+', help="List of files or directories to scan.")
|
||||||
|
parser.add_argument("--file-types", nargs='+', default=[], help="Filter by file types (e.g., .txt .log).")
|
||||||
|
parser.add_argument("--ignore-file-strings", nargs='+', default=[], help="Ignore files and folders containing these strings.")
|
||||||
|
parser.add_argument("--ignore-hidden", action='store_true', help="Ignore hidden directories and files.")
|
||||||
|
parser.add_argument("-v", "--verbose", action='store_true', help="Enable verbose mode.")
|
||||||
|
parser.add_argument("--no-comments", action='store_true', help="Remove comments from the displayed content based on file type.")
|
||||||
|
parser.add_argument("--compress", action='store_true', help="Compress code (for Python files).")
|
||||||
|
parser.add_argument("--path-contains", nargs='+', default=[], help="Display files whose paths contain one of these strings.")
|
||||||
|
parser.add_argument("--content-contains", nargs='+', default=[], help="Display files containing one of these strings in their content.")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
for path in args.paths:
|
for path in args.paths:
|
||||||
if os.path.isdir(path):
|
if os.path.isdir(path):
|
||||||
DirectoryHandler.handle_directory(
|
DirectoryHandler.handle_directory(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, verbose=args.verbose, no_comments=args.no_comments, compress=args.compress, path_contains=args.path_contains, content_contains=args.content_contains)
|
||||||
path,
|
|
||||||
file_types=args.file_types,
|
|
||||||
ignore_file_strings=args.ignore_file_strings,
|
|
||||||
ignore_hidden=args.ignore_hidden,
|
|
||||||
verbose=args.verbose,
|
|
||||||
no_comments=args.no_comments,
|
|
||||||
compress=args.compress,
|
|
||||||
path_contains=args.path_contains,
|
|
||||||
content_contains=args.content_contains,
|
|
||||||
no_gitignore=args.no_gitignore,
|
|
||||||
scan_binary_files=args.scan_binary_files
|
|
||||||
)
|
|
||||||
elif os.path.isfile(path):
|
elif os.path.isfile(path):
|
||||||
if DirectoryHandler.should_print_file(
|
if DirectoryHandler.should_print_file(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, path_contains=args.path_contains, content_contains=args.content_contains):
|
||||||
path,
|
DirectoryHandler.handle_file(path, file_types=args.file_types, ignore_file_strings=args.ignore_file_strings, ignore_hidden=args.ignore_hidden, no_comments=args.no_comments, compress=args.compress)
|
||||||
file_types=args.file_types,
|
|
||||||
ignore_file_strings=args.ignore_file_strings,
|
|
||||||
ignore_hidden=args.ignore_hidden,
|
|
||||||
path_contains=args.path_contains,
|
|
||||||
content_contains=args.content_contains,
|
|
||||||
scan_binary_files=args.scan_binary_files
|
|
||||||
):
|
|
||||||
DirectoryHandler.handle_file(
|
|
||||||
path,
|
|
||||||
file_types=args.file_types,
|
|
||||||
ignore_file_strings=args.ignore_file_strings,
|
|
||||||
ignore_hidden=args.ignore_hidden,
|
|
||||||
no_comments=args.no_comments,
|
|
||||||
compress=args.compress,
|
|
||||||
scan_binary_files=args.scan_binary_files
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
print(f"Error: {path} is neither a valid file nor a directory.")
|
print(f"Error: {path} is neither a valid file nor a directory.")
|
||||||
sys.exit(1)
|
exit(1)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
@@ -1,190 +0,0 @@
|
|||||||
# tests/unit/test_arc.py
|
|
||||||
import io
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import tempfile
|
|
||||||
import unittest
|
|
||||||
from contextlib import redirect_stdout
|
|
||||||
|
|
||||||
# Ensure project root is on sys.path when running via discover
|
|
||||||
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
||||||
if PROJECT_ROOT not in sys.path:
|
|
||||||
sys.path.insert(0, PROJECT_ROOT)
|
|
||||||
|
|
||||||
from code_processor import CodeProcessor
|
|
||||||
from directory_handler import DirectoryHandler
|
|
||||||
|
|
||||||
|
|
||||||
class TestCodeProcessor(unittest.TestCase):
|
|
||||||
def test_python_comment_and_docstring_stripping(self):
|
|
||||||
src = '''\
|
|
||||||
"""module docstring should go away"""
|
|
||||||
|
|
||||||
# a comment
|
|
||||||
x = 1 # inline comment
|
|
||||||
y = "string with # not a comment"
|
|
||||||
|
|
||||||
def f():
|
|
||||||
"""function docstring should go away"""
|
|
||||||
s = """triple quoted but not a docstring"""
|
|
||||||
return x
|
|
||||||
'''
|
|
||||||
out = CodeProcessor.remove_comments(src, ".py")
|
|
||||||
self.assertNotIn("module docstring", out)
|
|
||||||
self.assertNotIn("function docstring", out)
|
|
||||||
self.assertNotIn("# a comment", out)
|
|
||||||
# tolerate whitespace normalization from tokenize.untokenize
|
|
||||||
self.assertRegex(out, r'y\s*=\s*"string with # not a comment"')
|
|
||||||
self.assertIn('triple quoted but not a docstring', out)
|
|
||||||
|
|
||||||
def test_cstyle_comment_stripping(self):
|
|
||||||
src = '''\
|
|
||||||
// line comment
|
|
||||||
int main() {
|
|
||||||
/* block
|
|
||||||
comment */
|
|
||||||
int x = 42; // end comment
|
|
||||||
const char* s = "/* not a comment here */";
|
|
||||||
return x;
|
|
||||||
}
|
|
||||||
'''
|
|
||||||
out = CodeProcessor.remove_comments(src, ".c")
|
|
||||||
# line comment and block comment gone
|
|
||||||
self.assertNotIn("// line comment", out)
|
|
||||||
self.assertNotIn("block\n comment", out)
|
|
||||||
# string content with /* */ inside should remain
|
|
||||||
self.assertIn('const char* s = "/* not a comment here */";', out)
|
|
||||||
|
|
||||||
def test_hash_comment_stripping(self):
|
|
||||||
src = """\
|
|
||||||
# top comment
|
|
||||||
KEY=value # trailing comment should be kept by default
|
|
||||||
plain: value
|
|
||||||
"""
|
|
||||||
out = CodeProcessor.remove_comments(src, ".yml")
|
|
||||||
# Our regex removes full lines starting with optional spaces then '#'
|
|
||||||
self.assertNotIn("top comment", out)
|
|
||||||
# It does not remove trailing fragments after content for hash style
|
|
||||||
self.assertIn("KEY=value", out)
|
|
||||||
self.assertIn("plain: value", out)
|
|
||||||
|
|
||||||
def test_jinja_comment_stripping(self):
|
|
||||||
src = """\
|
|
||||||
{# top jinja comment #}
|
|
||||||
Hello {{ name }}!
|
|
||||||
{#
|
|
||||||
multi-line
|
|
||||||
jinja comment
|
|
||||||
#}
|
|
||||||
Body text and {{ value }}.
|
|
||||||
"""
|
|
||||||
out = CodeProcessor.remove_comments(src, ".j2")
|
|
||||||
self.assertNotIn("top jinja comment", out)
|
|
||||||
self.assertNotIn("multi-line", out)
|
|
||||||
# Regular content and expressions remain
|
|
||||||
self.assertIn("Hello {{ name }}!", out)
|
|
||||||
self.assertIn("Body text and {{ value }}.", out)
|
|
||||||
|
|
||||||
def test_unknown_extension_returns_stripped(self):
|
|
||||||
src = " x = 1 # not removed for unknown "
|
|
||||||
out = CodeProcessor.remove_comments(src, ".unknown")
|
|
||||||
self.assertEqual(out, "x = 1 # not removed for unknown")
|
|
||||||
|
|
||||||
def test_compress_decompress_roundtrip(self):
|
|
||||||
src = "def x():\n return 42\n"
|
|
||||||
blob = CodeProcessor.compress(src)
|
|
||||||
self.assertIsInstance(blob, (bytes, bytearray))
|
|
||||||
back = CodeProcessor.decompress(blob)
|
|
||||||
self.assertEqual(src, back)
|
|
||||||
|
|
||||||
|
|
||||||
class TestDirectoryHandler(unittest.TestCase):
|
|
||||||
def test_is_binary_file(self):
|
|
||||||
with tempfile.NamedTemporaryFile(delete=False) as tf:
|
|
||||||
tf.write(b"\x00\x01\x02BINARY")
|
|
||||||
path = tf.name
|
|
||||||
try:
|
|
||||||
self.assertTrue(DirectoryHandler.is_binary_file(path))
|
|
||||||
finally:
|
|
||||||
os.remove(path)
|
|
||||||
|
|
||||||
def test_gitignore_matching(self):
|
|
||||||
with tempfile.TemporaryDirectory() as root:
|
|
||||||
# Create .gitignore ignoring build/ and *.log
|
|
||||||
gi_dir = os.path.join(root, "a")
|
|
||||||
os.makedirs(gi_dir, exist_ok=True)
|
|
||||||
with open(os.path.join(gi_dir, ".gitignore"), "w") as f:
|
|
||||||
f.write("build/\n*.log\n")
|
|
||||||
|
|
||||||
# Files
|
|
||||||
os.makedirs(os.path.join(gi_dir, "build"), exist_ok=True)
|
|
||||||
ignored_dir_file = os.path.join(gi_dir, "build", "x.txt")
|
|
||||||
with open(ignored_dir_file, "w") as f:
|
|
||||||
f.write("ignored")
|
|
||||||
ignored_log = os.path.join(gi_dir, "debug.log")
|
|
||||||
with open(ignored_log, "w") as f:
|
|
||||||
f.write("ignored log")
|
|
||||||
kept_file = os.path.join(gi_dir, "src.txt")
|
|
||||||
with open(kept_file, "w") as f:
|
|
||||||
f.write("keep me")
|
|
||||||
|
|
||||||
gi_data = DirectoryHandler.load_gitignore_patterns(root)
|
|
||||||
|
|
||||||
self.assertTrue(DirectoryHandler.is_gitignored(ignored_dir_file, gi_data))
|
|
||||||
self.assertTrue(DirectoryHandler.is_gitignored(ignored_log, gi_data))
|
|
||||||
self.assertFalse(DirectoryHandler.is_gitignored(kept_file, gi_data))
|
|
||||||
|
|
||||||
def test_should_print_file_filters_hidden_and_types(self):
|
|
||||||
with tempfile.TemporaryDirectory() as root:
|
|
||||||
hidden = os.path.join(root, ".hidden.txt")
|
|
||||||
plain = os.path.join(root, "keep.py")
|
|
||||||
with open(hidden, "w") as f:
|
|
||||||
f.write("data")
|
|
||||||
with open(plain, "w") as f:
|
|
||||||
f.write("print('hi')")
|
|
||||||
|
|
||||||
self.assertFalse(
|
|
||||||
DirectoryHandler.should_print_file(
|
|
||||||
hidden,
|
|
||||||
file_types=[".py"],
|
|
||||||
ignore_file_strings=[],
|
|
||||||
ignore_hidden=True,
|
|
||||||
path_contains=[],
|
|
||||||
content_contains=[],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
self.assertTrue(
|
|
||||||
DirectoryHandler.should_print_file(
|
|
||||||
plain,
|
|
||||||
file_types=[".py"],
|
|
||||||
ignore_file_strings=[],
|
|
||||||
ignore_hidden=True,
|
|
||||||
path_contains=[],
|
|
||||||
content_contains=[],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_print_file_content_no_comments_and_compress(self):
|
|
||||||
with tempfile.TemporaryDirectory() as root:
|
|
||||||
p = os.path.join(root, "t.py")
|
|
||||||
with open(p, "w") as f:
|
|
||||||
f.write("# comment only\nx=1\n")
|
|
||||||
buf = io.StringIO()
|
|
||||||
with redirect_stdout(buf):
|
|
||||||
DirectoryHandler.print_file_content(p, no_comments=True, compress=False)
|
|
||||||
out = buf.getvalue()
|
|
||||||
self.assertIn("<< START:", out)
|
|
||||||
# be whitespace-tolerant (tokenize may insert spaces)
|
|
||||||
self.assertRegex(out, r"x\s*=\s*1")
|
|
||||||
self.assertNotIn("# comment only", out)
|
|
||||||
|
|
||||||
buf = io.StringIO()
|
|
||||||
with redirect_stdout(buf):
|
|
||||||
DirectoryHandler.print_file_content(p, no_comments=True, compress=True)
|
|
||||||
out = buf.getvalue()
|
|
||||||
self.assertIn("COMPRESSED CODE:", out)
|
|
||||||
self.assertIn("<< END >>", out)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unittest.main()
|
|
||||||
Reference in New Issue
Block a user