From d36fc0c916b8e0a30f61cf7feb5bd04cc086410c Mon Sep 17 00:00:00 2001 From: Kevin Veen-Birkenbach Date: Mon, 17 Mar 2025 01:29:10 +0100 Subject: [PATCH] Refactored Sphinx Code --- LICENSE.md | 4 +- sphinx/conf.py | 15 +- sphinx/extensions/__init__.py | 0 sphinx/extensions/local_file_headings.py | 49 ++++++ sphinx/extensions/local_subfolders.py | 71 ++++++++ sphinx/{ => extensions}/markdown_include.py | 0 sphinx/extensions/nav_utils.py | 84 ++++++++++ sphinx/{ => extensions}/roles_overview.py | 0 sphinx/local_md_files.py | 175 -------------------- 9 files changed, 216 insertions(+), 182 deletions(-) create mode 100644 sphinx/extensions/__init__.py create mode 100644 sphinx/extensions/local_file_headings.py create mode 100644 sphinx/extensions/local_subfolders.py rename sphinx/{ => extensions}/markdown_include.py (100%) create mode 100644 sphinx/extensions/nav_utils.py rename sphinx/{ => extensions}/roles_overview.py (100%) delete mode 100644 sphinx/local_md_files.py diff --git a/LICENSE.md b/LICENSE.md index 34679fa0..e158a1a4 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,4 +1,6 @@ -# CyMaIS NonCommercial License (CNCL) +# License Agreement + +**CyMaIS NonCommercial License (CNCL)** ## Definitions - **"Software":** Refers to *"[CyMaIS - Cyber Master Infrastructure Solution](https://cymais.cloud/)"* and its associated source code. diff --git a/sphinx/conf.py b/sphinx/conf.py index 092cb4ae..2b1e8555 100644 --- a/sphinx/conf.py +++ b/sphinx/conf.py @@ -49,14 +49,17 @@ source_suffix = { '.md': 'markdown', } +sys.path.insert(0, os.path.abspath('./extensions')) extensions = [ - "sphinx.ext.autosummary", - "sphinx.ext.autodoc", - "myst_parser", - 'local_md_files', - 'roles_overview', - 'markdown_include', + 'sphinx.ext.autosummary', + 'sphinx.ext.autodoc', + 'myst_parser', + 'extensions.local_file_headings', + 'extensions.local_subfolders', + 'extensions.roles_overview', + 'extensions.markdown_include', ] + autosummary_generate = True myst_enable_extensions = [ diff --git a/sphinx/extensions/__init__.py b/sphinx/extensions/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sphinx/extensions/local_file_headings.py b/sphinx/extensions/local_file_headings.py new file mode 100644 index 00000000..eb97f199 --- /dev/null +++ b/sphinx/extensions/local_file_headings.py @@ -0,0 +1,49 @@ +import os +from sphinx.util import logging +from docutils.parsers.rst import Directive +from .nav_utils import natural_sort_key, extract_headings_from_file, group_headings, sort_tree, MAX_HEADING_LEVEL, DEFAULT_MAX_NAV_DEPTH + +logger = logging.getLogger(__name__) +DEFAULT_MAX_NAV_DEPTH = 4 + +def add_local_file_headings(app, pagename, templatename, context, doctree): + srcdir = app.srcdir + directory = os.path.dirname(pagename) + abs_dir = os.path.join(srcdir, directory) + if not os.path.isdir(abs_dir): + logger.warning(f"Directory {abs_dir} not found for page {pagename}.") + context['local_md_headings'] = [] + return + + # Get only files with .md or .rst extensions. + files = [f for f in os.listdir(abs_dir) if f.endswith('.md') or f.endswith('.rst')] + # If an index file is present, remove any readme files (case-insensitive). + files_lower = [f.lower() for f in files] + if 'index.md' in files_lower or 'index.rst' in files_lower: + files = [f for f in files if f.lower() not in ['readme.md', 'readme.rst']] + + file_items = [] + for file in files: + filepath = os.path.join(abs_dir, file) + headings = extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL) + basename, _ = os.path.splitext(file) + # Set priority: index gets priority 0, otherwise 1. + priority = 0 if basename.lower() == 'index' else 1 + for heading in headings: + file_link = os.path.join(directory, basename) + file_items.append({ + 'level': heading['level'], + 'text': heading['text'], + 'link': file_link, + 'anchor': heading['anchor'], + 'priority': priority, + 'filename': basename + }) + tree = group_headings(file_items) + sort_tree(tree) + context['local_md_headings'] = tree + +def setup(app): + app.add_config_value('local_nav_max_depth', DEFAULT_MAX_NAV_DEPTH, 'env') + app.connect('html-page-context', add_local_file_headings) + return {'version': '0.1', 'parallel_read_safe': True} diff --git a/sphinx/extensions/local_subfolders.py b/sphinx/extensions/local_subfolders.py new file mode 100644 index 00000000..390561c1 --- /dev/null +++ b/sphinx/extensions/local_subfolders.py @@ -0,0 +1,71 @@ +import os +import re +from sphinx.util import logging +from docutils.parsers.rst import Directive +from .nav_utils import natural_sort_key, extract_headings_from_file, group_headings, sort_tree, MAX_HEADING_LEVEL, DEFAULT_MAX_NAV_DEPTH + +logger = logging.getLogger(__name__) + +def collect_subfolder_tree(dir_path, base_url, current_depth, max_depth): + """ + Recursively collects navigation items from subdirectories. + For each subfolder, it looks for a candidate file (prefer "index.rst" then "README.md") + and extracts its first level‑1 heading as the title. If no candidate or heading is found, + the folder name is used. + Returns a list representing the subfolder tree. + """ + items = [] + for item in sorted(os.listdir(dir_path), key=lambda s: s.lower()): + full_path = os.path.join(dir_path, item) + if os.path.isdir(full_path): + candidate = None + for cand in ['index.rst', 'README.md']: + candidate_path = os.path.join(full_path, cand) + if os.path.isfile(candidate_path): + candidate = candidate_path + break + if candidate: + headings = extract_headings_from_file(candidate, max_level=MAX_HEADING_LEVEL) + title = headings[0]['text'] if headings else item + else: + title = item + link = os.path.join(base_url, item) + entry = { + 'level': 1, + 'text': title, + 'link': link, + 'anchor': '', + 'priority': 0, + 'filename': item + } + if current_depth < max_depth: + children = collect_subfolder_tree(full_path, os.path.join(base_url, item), current_depth + 1, max_depth) + if children: + entry['children'] = children + items.append(entry) + return items + +def add_local_subfolders(app, pagename, templatename, context, doctree): + """ + Collects a tree of subfolder navigation items from the current directory. + For each subfolder, the title is determined by scanning for a candidate file (prefer "index.rst" then "README.md") + and extracting its first level‑1 heading, or using the folder name if none is found. + The resulting tree is stored in context['local_subfolders']. + """ + srcdir = app.srcdir + directory = os.path.dirname(pagename) + abs_dir = os.path.join(srcdir, directory) + if not os.path.isdir(abs_dir): + logger.warning(f"Directory {abs_dir} not found for page {pagename}.") + context['local_subfolders'] = [] + return + + max_nav_depth = getattr(app.config, 'local_nav_max_depth', DEFAULT_MAX_NAV_DEPTH) + subfolder_tree = collect_subfolder_tree(abs_dir, directory, current_depth=0, max_depth=max_nav_depth) + sort_tree(subfolder_tree) + context['local_subfolders'] = subfolder_tree + +def setup(app): + # Do not add the config value here to avoid conflicts. + app.connect('html-page-context', add_local_subfolders) + return {'version': '0.1', 'parallel_read_safe': True} diff --git a/sphinx/markdown_include.py b/sphinx/extensions/markdown_include.py similarity index 100% rename from sphinx/markdown_include.py rename to sphinx/extensions/markdown_include.py diff --git a/sphinx/extensions/nav_utils.py b/sphinx/extensions/nav_utils.py new file mode 100644 index 00000000..1a604d07 --- /dev/null +++ b/sphinx/extensions/nav_utils.py @@ -0,0 +1,84 @@ +import os +import re +import yaml + +DEFAULT_MAX_NAV_DEPTH = 4 +MAX_HEADING_LEVEL = 2 + +def natural_sort_key(text): + """ + Generate a key for natural (human-friendly) sorting, + taking numeric parts into account. + """ + return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', text)] + +def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL): + """ + Extract headings from a file. + For Markdown (.md) files, looks for lines starting with '#' (up to max_level). + For reStructuredText (.rst) files, looks for a line immediately followed by an underline. + Returns a list of dictionaries with keys: 'level', 'text', and 'anchor' (if applicable). + """ + headings = [] + ext = os.path.splitext(filepath)[1].lower() + try: + with open(filepath, 'r', encoding='utf-8') as f: + if ext == '.md': + in_code_block = False + for line in f: + if line.strip().startswith("```"): + in_code_block = not in_code_block + continue + if in_code_block: + continue + match = re.match(r'^(#{1,})\s+(.*)$', line) + if match: + level = len(match.group(1)) + if level <= max_level: + heading_text = match.group(2).strip() + anchor = re.sub(r'\s+', '-', heading_text.lower()) + anchor = re.sub(r'[^a-z0-9\-]', '', anchor) + headings.append({'level': level, 'text': heading_text, 'anchor': anchor}) + elif ext == '.rst': + lines = f.readlines() + for i in range(len(lines)-1): + text_line = lines[i].rstrip("\n") + underline = lines[i+1].rstrip("\n") + if len(underline) >= 3 and re.fullmatch(r'[-=~\^\+"\'`]+', underline): + level = 1 + heading_text = text_line.strip() + # For reST, the anchor is left empty (can be generated later if needed) + headings.append({'level': level, 'text': heading_text, 'anchor': ''}) + except Exception as e: + print(f"Warning: Error reading {filepath}: {e}") + return headings + +def group_headings(headings): + """ + Convert a flat list of headings into a tree structure based on their level. + Each heading gets a 'children' list. + """ + tree = [] + stack = [] + for heading in headings: + heading['children'] = [] + while stack and stack[-1]['level'] >= heading['level']: + stack.pop() + if stack: + stack[-1]['children'].append(heading) + else: + tree.append(heading) + stack.append(heading) + return tree + +def sort_tree(tree): + """ + Sort a tree of navigation items, first by a 'priority' value (lower comes first) + and then by a natural sort key based on the 'filename' field (or the 'text' field if no filename is provided). + This ensures that 'index' and 'readme' (priority 0) always appear at the top. + """ + tree.sort(key=lambda x: (x.get('priority', 1), natural_sort_key(x.get('filename', x['text'])))) + for item in tree: + if item.get('children'): + sort_tree(item['children']) + diff --git a/sphinx/roles_overview.py b/sphinx/extensions/roles_overview.py similarity index 100% rename from sphinx/roles_overview.py rename to sphinx/extensions/roles_overview.py diff --git a/sphinx/local_md_files.py b/sphinx/local_md_files.py deleted file mode 100644 index eb74cff9..00000000 --- a/sphinx/local_md_files.py +++ /dev/null @@ -1,175 +0,0 @@ -import os -import re -from sphinx.util import logging - -logger = logging.getLogger(__name__) - -# Set the maximum heading level for Markdown headings -MAX_HEADING_LEVEL = 3 -DEFAULT_MAX_NAV_DEPTH = 2 # Default maximum navigation depth; configurable via conf.py - -def natural_sort_key(text): - """ - Generate a key for natural (human-friendly) sorting, - where numbers in the text are taken into account by their numeric value. - """ - return [int(c) if c.isdigit() else c.lower() for c in re.split('(\d+)', text)] - -def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL): - """ - Extract headings from a file. - For Markdown files, look for lines starting with '#' (up to max_level). - For reStructuredText files, look for a line immediately followed by an underline made of punctuation. - """ - headings = [] - ext = os.path.splitext(filepath)[1].lower() - try: - with open(filepath, 'r', encoding='utf-8') as f: - if ext == '.md': - in_code_block = False - for line in f: - if line.strip().startswith("```"): - in_code_block = not in_code_block - continue - if in_code_block: - continue - match = re.match(r'^(#{1,})\s+(.*)$', line) - if match: - level = len(match.group(1)) - if level <= max_level: - heading_text = match.group(2).strip() - anchor = re.sub(r'\s+', '-', heading_text.lower()) - anchor = re.sub(r'[^a-z0-9\-]', '', anchor) - headings.append({'level': level, 'text': heading_text, 'anchor': anchor}) - elif ext == '.rst': - lines = f.readlines() - for i in range(len(lines)-1): - text_line = lines[i].rstrip("\n") - underline = lines[i+1].rstrip("\n") - if len(underline) >= 3 and re.fullmatch(r'[-=~\^\+"\'`]+', underline): - level = 1 # default level; adjust if needed - heading_text = text_line.strip() - anchor = re.sub(r'\s+', '-', heading_text.lower()) - anchor = re.sub(r'[^a-z0-9\-]', '', anchor) - headings.append({'level': level, 'text': heading_text, 'anchor': anchor}) - except Exception as e: - logger.warning(f"Error reading {filepath}: {e}") - return headings - -def group_headings(headings): - """ - Converts a flat list of headings into a tree structure based on their level. - Each heading gets a 'children' list. - """ - tree = [] - stack = [] - for heading in headings: - heading['children'] = [] - while stack and stack[-1]['level'] >= heading['level']: - stack.pop() - if stack: - stack[-1]['children'].append(heading) - else: - tree.append(heading) - stack.append(heading) - return tree - -def sort_tree(tree): - tree.sort(key=lambda x: (x.get('priority', 1), natural_sort_key(x.get('filename', x['text'])))) - -def collect_nav_items(dir_path, base_url, current_depth, max_depth): - """ - Recursively collects navigation items from subdirectories. - For each subdirectory, if an 'index.rst' exists (preferred) or a 'readme.md' exists, - the first heading from that file is used as the title. - """ - nav_items = [] - # Look for candidate file in this subdirectory (prefer index.rst, then readme.md) - candidate = None - for cand in ['index.rst', 'readme.md']: - candidate_path = os.path.join(dir_path, cand) - if os.path.isfile(candidate_path): - candidate = cand - break - if candidate: - candidate_path = os.path.join(dir_path, candidate) - headings = extract_headings_from_file(candidate_path) - if headings: - title = headings[0]['text'] - else: - title = os.path.splitext(candidate)[0].capitalize() - # Build link relative to base_url - link = os.path.join(base_url, os.path.splitext(candidate)[0]) - nav_items.append({ - 'level': 1, - 'text': title, - 'link': link, - 'anchor': '', - 'priority': 0 - }) - # Recurse into subdirectories if within max_depth - if current_depth < max_depth: - for item in os.listdir(dir_path): - full_path = os.path.join(dir_path, item) - if os.path.isdir(full_path): - sub_base_url = os.path.join(base_url, item) - nav_items.extend(collect_nav_items(full_path, sub_base_url, current_depth + 1, max_depth)) - return nav_items - -def add_local_md_headings(app, pagename, templatename, context, doctree): - srcdir = app.srcdir - directory = os.path.dirname(pagename) - abs_dir = os.path.join(srcdir, directory) - if not os.path.isdir(abs_dir): - logger.warning(f"Directory {abs_dir} not found for page {pagename}.") - context['local_md_headings'] = [] - return - - max_nav_depth = getattr(app.config, 'local_nav_max_depth', DEFAULT_MAX_NAV_DEPTH) - - # Collect navigation items from subdirectories only - nav_items = [] - for item in os.listdir(abs_dir): - full_path = os.path.join(abs_dir, item) - if os.path.isdir(full_path): - nav_items.extend(collect_nav_items(full_path, os.path.join(directory, item), current_depth=1, max_depth=max_nav_depth)) - - # Process files in the current directory. - files = os.listdir(abs_dir) - files_lower = [f.lower() for f in files] - # If both index.rst and readme.md exist in the current directory, keep only index.rst. - if "index.rst" in files_lower: - files = [f for f in files if f.lower() != "readme.md"] - local_md_headings = [] - for file in files: - if file.endswith('.md') or file.endswith('.rst'): - filepath = os.path.join(abs_dir, file) - headings = extract_headings_from_file(filepath) - basename, _ = os.path.splitext(file) - # Setze Priorität: "index" und "readme" erhalten Priorität 0. - if basename.lower() in ['index', 'readme']: - priority = 0 - else: - priority = 1 - for heading in headings: - file_link = os.path.join(directory, basename) - local_md_headings.append({ - 'level': heading['level'], - 'text': heading['text'], - 'link': file_link, - 'anchor': heading['anchor'], - 'priority': priority, - 'filename': basename # Neues Feld zur Sortierung - }) - - # Combine current directory items with subdirectory nav items. - # If an index or readme from the current directory exists, it will be included only once. - all_items = local_md_headings + nav_items - tree = group_headings(all_items) - sort_tree(tree) - context['local_md_headings'] = tree - -def setup(app): - app.add_config_value('local_nav_max_depth', DEFAULT_MAX_NAV_DEPTH, 'env') - app.connect('html-page-context', add_local_md_headings) - return {'version': '0.1', 'parallel_read_safe': True}