Refactored Sphinx Code

This commit is contained in:
Kevin Veen-Birkenbach 2025-03-17 01:29:10 +01:00
parent 48b32c2816
commit d36fc0c916
No known key found for this signature in database
GPG Key ID: 44D8F11FD62F878E
9 changed files with 216 additions and 182 deletions

View File

@ -1,4 +1,6 @@
# CyMaIS NonCommercial License (CNCL)
# License Agreement
**CyMaIS NonCommercial License (CNCL)**
## Definitions
- **"Software":** Refers to *"[CyMaIS - Cyber Master Infrastructure Solution](https://cymais.cloud/)"* and its associated source code.

View File

@ -49,14 +49,17 @@ source_suffix = {
'.md': 'markdown',
}
sys.path.insert(0, os.path.abspath('./extensions'))
extensions = [
"sphinx.ext.autosummary",
"sphinx.ext.autodoc",
"myst_parser",
'local_md_files',
'roles_overview',
'markdown_include',
'sphinx.ext.autosummary',
'sphinx.ext.autodoc',
'myst_parser',
'extensions.local_file_headings',
'extensions.local_subfolders',
'extensions.roles_overview',
'extensions.markdown_include',
]
autosummary_generate = True
myst_enable_extensions = [

View File

View File

@ -0,0 +1,49 @@
import os
from sphinx.util import logging
from docutils.parsers.rst import Directive
from .nav_utils import natural_sort_key, extract_headings_from_file, group_headings, sort_tree, MAX_HEADING_LEVEL, DEFAULT_MAX_NAV_DEPTH
logger = logging.getLogger(__name__)
DEFAULT_MAX_NAV_DEPTH = 4
def add_local_file_headings(app, pagename, templatename, context, doctree):
srcdir = app.srcdir
directory = os.path.dirname(pagename)
abs_dir = os.path.join(srcdir, directory)
if not os.path.isdir(abs_dir):
logger.warning(f"Directory {abs_dir} not found for page {pagename}.")
context['local_md_headings'] = []
return
# Get only files with .md or .rst extensions.
files = [f for f in os.listdir(abs_dir) if f.endswith('.md') or f.endswith('.rst')]
# If an index file is present, remove any readme files (case-insensitive).
files_lower = [f.lower() for f in files]
if 'index.md' in files_lower or 'index.rst' in files_lower:
files = [f for f in files if f.lower() not in ['readme.md', 'readme.rst']]
file_items = []
for file in files:
filepath = os.path.join(abs_dir, file)
headings = extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL)
basename, _ = os.path.splitext(file)
# Set priority: index gets priority 0, otherwise 1.
priority = 0 if basename.lower() == 'index' else 1
for heading in headings:
file_link = os.path.join(directory, basename)
file_items.append({
'level': heading['level'],
'text': heading['text'],
'link': file_link,
'anchor': heading['anchor'],
'priority': priority,
'filename': basename
})
tree = group_headings(file_items)
sort_tree(tree)
context['local_md_headings'] = tree
def setup(app):
app.add_config_value('local_nav_max_depth', DEFAULT_MAX_NAV_DEPTH, 'env')
app.connect('html-page-context', add_local_file_headings)
return {'version': '0.1', 'parallel_read_safe': True}

View File

@ -0,0 +1,71 @@
import os
import re
from sphinx.util import logging
from docutils.parsers.rst import Directive
from .nav_utils import natural_sort_key, extract_headings_from_file, group_headings, sort_tree, MAX_HEADING_LEVEL, DEFAULT_MAX_NAV_DEPTH
logger = logging.getLogger(__name__)
def collect_subfolder_tree(dir_path, base_url, current_depth, max_depth):
"""
Recursively collects navigation items from subdirectories.
For each subfolder, it looks for a candidate file (prefer "index.rst" then "README.md")
and extracts its first level1 heading as the title. If no candidate or heading is found,
the folder name is used.
Returns a list representing the subfolder tree.
"""
items = []
for item in sorted(os.listdir(dir_path), key=lambda s: s.lower()):
full_path = os.path.join(dir_path, item)
if os.path.isdir(full_path):
candidate = None
for cand in ['index.rst', 'README.md']:
candidate_path = os.path.join(full_path, cand)
if os.path.isfile(candidate_path):
candidate = candidate_path
break
if candidate:
headings = extract_headings_from_file(candidate, max_level=MAX_HEADING_LEVEL)
title = headings[0]['text'] if headings else item
else:
title = item
link = os.path.join(base_url, item)
entry = {
'level': 1,
'text': title,
'link': link,
'anchor': '',
'priority': 0,
'filename': item
}
if current_depth < max_depth:
children = collect_subfolder_tree(full_path, os.path.join(base_url, item), current_depth + 1, max_depth)
if children:
entry['children'] = children
items.append(entry)
return items
def add_local_subfolders(app, pagename, templatename, context, doctree):
"""
Collects a tree of subfolder navigation items from the current directory.
For each subfolder, the title is determined by scanning for a candidate file (prefer "index.rst" then "README.md")
and extracting its first level1 heading, or using the folder name if none is found.
The resulting tree is stored in context['local_subfolders'].
"""
srcdir = app.srcdir
directory = os.path.dirname(pagename)
abs_dir = os.path.join(srcdir, directory)
if not os.path.isdir(abs_dir):
logger.warning(f"Directory {abs_dir} not found for page {pagename}.")
context['local_subfolders'] = []
return
max_nav_depth = getattr(app.config, 'local_nav_max_depth', DEFAULT_MAX_NAV_DEPTH)
subfolder_tree = collect_subfolder_tree(abs_dir, directory, current_depth=0, max_depth=max_nav_depth)
sort_tree(subfolder_tree)
context['local_subfolders'] = subfolder_tree
def setup(app):
# Do not add the config value here to avoid conflicts.
app.connect('html-page-context', add_local_subfolders)
return {'version': '0.1', 'parallel_read_safe': True}

View File

@ -0,0 +1,84 @@
import os
import re
import yaml
DEFAULT_MAX_NAV_DEPTH = 4
MAX_HEADING_LEVEL = 2
def natural_sort_key(text):
"""
Generate a key for natural (human-friendly) sorting,
taking numeric parts into account.
"""
return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', text)]
def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL):
"""
Extract headings from a file.
For Markdown (.md) files, looks for lines starting with '#' (up to max_level).
For reStructuredText (.rst) files, looks for a line immediately followed by an underline.
Returns a list of dictionaries with keys: 'level', 'text', and 'anchor' (if applicable).
"""
headings = []
ext = os.path.splitext(filepath)[1].lower()
try:
with open(filepath, 'r', encoding='utf-8') as f:
if ext == '.md':
in_code_block = False
for line in f:
if line.strip().startswith("```"):
in_code_block = not in_code_block
continue
if in_code_block:
continue
match = re.match(r'^(#{1,})\s+(.*)$', line)
if match:
level = len(match.group(1))
if level <= max_level:
heading_text = match.group(2).strip()
anchor = re.sub(r'\s+', '-', heading_text.lower())
anchor = re.sub(r'[^a-z0-9\-]', '', anchor)
headings.append({'level': level, 'text': heading_text, 'anchor': anchor})
elif ext == '.rst':
lines = f.readlines()
for i in range(len(lines)-1):
text_line = lines[i].rstrip("\n")
underline = lines[i+1].rstrip("\n")
if len(underline) >= 3 and re.fullmatch(r'[-=~\^\+"\'`]+', underline):
level = 1
heading_text = text_line.strip()
# For reST, the anchor is left empty (can be generated later if needed)
headings.append({'level': level, 'text': heading_text, 'anchor': ''})
except Exception as e:
print(f"Warning: Error reading {filepath}: {e}")
return headings
def group_headings(headings):
"""
Convert a flat list of headings into a tree structure based on their level.
Each heading gets a 'children' list.
"""
tree = []
stack = []
for heading in headings:
heading['children'] = []
while stack and stack[-1]['level'] >= heading['level']:
stack.pop()
if stack:
stack[-1]['children'].append(heading)
else:
tree.append(heading)
stack.append(heading)
return tree
def sort_tree(tree):
"""
Sort a tree of navigation items, first by a 'priority' value (lower comes first)
and then by a natural sort key based on the 'filename' field (or the 'text' field if no filename is provided).
This ensures that 'index' and 'readme' (priority 0) always appear at the top.
"""
tree.sort(key=lambda x: (x.get('priority', 1), natural_sort_key(x.get('filename', x['text']))))
for item in tree:
if item.get('children'):
sort_tree(item['children'])

View File

@ -1,175 +0,0 @@
import os
import re
from sphinx.util import logging
logger = logging.getLogger(__name__)
# Set the maximum heading level for Markdown headings
MAX_HEADING_LEVEL = 3
DEFAULT_MAX_NAV_DEPTH = 2 # Default maximum navigation depth; configurable via conf.py
def natural_sort_key(text):
"""
Generate a key for natural (human-friendly) sorting,
where numbers in the text are taken into account by their numeric value.
"""
return [int(c) if c.isdigit() else c.lower() for c in re.split('(\d+)', text)]
def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL):
"""
Extract headings from a file.
For Markdown files, look for lines starting with '#' (up to max_level).
For reStructuredText files, look for a line immediately followed by an underline made of punctuation.
"""
headings = []
ext = os.path.splitext(filepath)[1].lower()
try:
with open(filepath, 'r', encoding='utf-8') as f:
if ext == '.md':
in_code_block = False
for line in f:
if line.strip().startswith("```"):
in_code_block = not in_code_block
continue
if in_code_block:
continue
match = re.match(r'^(#{1,})\s+(.*)$', line)
if match:
level = len(match.group(1))
if level <= max_level:
heading_text = match.group(2).strip()
anchor = re.sub(r'\s+', '-', heading_text.lower())
anchor = re.sub(r'[^a-z0-9\-]', '', anchor)
headings.append({'level': level, 'text': heading_text, 'anchor': anchor})
elif ext == '.rst':
lines = f.readlines()
for i in range(len(lines)-1):
text_line = lines[i].rstrip("\n")
underline = lines[i+1].rstrip("\n")
if len(underline) >= 3 and re.fullmatch(r'[-=~\^\+"\'`]+', underline):
level = 1 # default level; adjust if needed
heading_text = text_line.strip()
anchor = re.sub(r'\s+', '-', heading_text.lower())
anchor = re.sub(r'[^a-z0-9\-]', '', anchor)
headings.append({'level': level, 'text': heading_text, 'anchor': anchor})
except Exception as e:
logger.warning(f"Error reading {filepath}: {e}")
return headings
def group_headings(headings):
"""
Converts a flat list of headings into a tree structure based on their level.
Each heading gets a 'children' list.
"""
tree = []
stack = []
for heading in headings:
heading['children'] = []
while stack and stack[-1]['level'] >= heading['level']:
stack.pop()
if stack:
stack[-1]['children'].append(heading)
else:
tree.append(heading)
stack.append(heading)
return tree
def sort_tree(tree):
tree.sort(key=lambda x: (x.get('priority', 1), natural_sort_key(x.get('filename', x['text']))))
def collect_nav_items(dir_path, base_url, current_depth, max_depth):
"""
Recursively collects navigation items from subdirectories.
For each subdirectory, if an 'index.rst' exists (preferred) or a 'readme.md' exists,
the first heading from that file is used as the title.
"""
nav_items = []
# Look for candidate file in this subdirectory (prefer index.rst, then readme.md)
candidate = None
for cand in ['index.rst', 'readme.md']:
candidate_path = os.path.join(dir_path, cand)
if os.path.isfile(candidate_path):
candidate = cand
break
if candidate:
candidate_path = os.path.join(dir_path, candidate)
headings = extract_headings_from_file(candidate_path)
if headings:
title = headings[0]['text']
else:
title = os.path.splitext(candidate)[0].capitalize()
# Build link relative to base_url
link = os.path.join(base_url, os.path.splitext(candidate)[0])
nav_items.append({
'level': 1,
'text': title,
'link': link,
'anchor': '',
'priority': 0
})
# Recurse into subdirectories if within max_depth
if current_depth < max_depth:
for item in os.listdir(dir_path):
full_path = os.path.join(dir_path, item)
if os.path.isdir(full_path):
sub_base_url = os.path.join(base_url, item)
nav_items.extend(collect_nav_items(full_path, sub_base_url, current_depth + 1, max_depth))
return nav_items
def add_local_md_headings(app, pagename, templatename, context, doctree):
srcdir = app.srcdir
directory = os.path.dirname(pagename)
abs_dir = os.path.join(srcdir, directory)
if not os.path.isdir(abs_dir):
logger.warning(f"Directory {abs_dir} not found for page {pagename}.")
context['local_md_headings'] = []
return
max_nav_depth = getattr(app.config, 'local_nav_max_depth', DEFAULT_MAX_NAV_DEPTH)
# Collect navigation items from subdirectories only
nav_items = []
for item in os.listdir(abs_dir):
full_path = os.path.join(abs_dir, item)
if os.path.isdir(full_path):
nav_items.extend(collect_nav_items(full_path, os.path.join(directory, item), current_depth=1, max_depth=max_nav_depth))
# Process files in the current directory.
files = os.listdir(abs_dir)
files_lower = [f.lower() for f in files]
# If both index.rst and readme.md exist in the current directory, keep only index.rst.
if "index.rst" in files_lower:
files = [f for f in files if f.lower() != "readme.md"]
local_md_headings = []
for file in files:
if file.endswith('.md') or file.endswith('.rst'):
filepath = os.path.join(abs_dir, file)
headings = extract_headings_from_file(filepath)
basename, _ = os.path.splitext(file)
# Setze Priorität: "index" und "readme" erhalten Priorität 0.
if basename.lower() in ['index', 'readme']:
priority = 0
else:
priority = 1
for heading in headings:
file_link = os.path.join(directory, basename)
local_md_headings.append({
'level': heading['level'],
'text': heading['text'],
'link': file_link,
'anchor': heading['anchor'],
'priority': priority,
'filename': basename # Neues Feld zur Sortierung
})
# Combine current directory items with subdirectory nav items.
# If an index or readme from the current directory exists, it will be included only once.
all_items = local_md_headings + nav_items
tree = group_headings(all_items)
sort_tree(tree)
context['local_md_headings'] = tree
def setup(app):
app.add_config_value('local_nav_max_depth', DEFAULT_MAX_NAV_DEPTH, 'env')
app.connect('html-page-context', add_local_md_headings)
return {'version': '0.1', 'parallel_read_safe': True}