Refactored sphinx

This commit is contained in:
Kevin Veen-Birkenbach 2025-03-17 13:05:59 +01:00
parent 8b0308589d
commit 75a110fde9
No known key found for this signature in database
GPG Key ID: 44D8F11FD62F878E
2 changed files with 24 additions and 44 deletions

View File

@ -1,27 +1,27 @@
import os import os
from sphinx.util import logging from sphinx.util import logging
from .nav_utils import extract_headings_from_file from .nav_utils import extract_headings_from_file, MAX_HEADING_LEVEL
MAX_HEADING_LEVEL = 0
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def collect_folder_tree(dir_path, base_url): def collect_folder_tree(dir_path, base_url):
""" """
Recursively collects the folder tree starting from the given directory. Recursively collects the folder tree starting from the given directory.
For each folder: For each folder:
- It is ignored if it is hidden. - Hidden folders (names starting with a dot) are skipped.
- If a representative file (index.rst/index.md or readme.md/readme.rst) exists, - A folder is processed only if it contains one of the representative files:
its first heading is used as the folder title. index.rst, index.md, readme.md, or readme.rst.
- Folders without such a representative file are skipped. - The first heading of the representative file is used as the folder title.
- All Markdown and reStructuredText files (except the representative file) - The representative file is not listed as a file in the folder.
are listed without sub-headings, using the first heading as their title. - All other Markdown and reStructuredText files are listed without sub-headings,
using their first heading as the file title.
""" """
# Ignore hidden directories # Skip hidden directories
if os.path.basename(dir_path).startswith('.'): if os.path.basename(dir_path).startswith('.'):
return None return None
# List all files in current directory with .md or .rst extension # List all files in the current directory with .md or .rst extension
files = [f for f in os.listdir(dir_path) files = [f for f in os.listdir(dir_path)
if os.path.isfile(os.path.join(dir_path, f)) if os.path.isfile(os.path.join(dir_path, f))
and (f.endswith('.md') or f.endswith('.rst'))] and (f.endswith('.md') or f.endswith('.rst'))]
@ -36,24 +36,25 @@ def collect_folder_tree(dir_path, base_url):
if rep_file: if rep_file:
break break
# If no representative file, skip this folder # Skip this folder if no representative file exists
if not rep_file: if not rep_file:
return None return None
rep_path = os.path.join(dir_path, rep_file) rep_path = os.path.join(dir_path, rep_file)
# If MAX_HEADING_LEVEL is 0, use an effectively infinite level (e.g., 9999) headings = extract_headings_from_file(rep_path, max_level=MAX_HEADING_LEVEL)
effective_max = MAX_HEADING_LEVEL if MAX_HEADING_LEVEL != 0 else 9999
headings = extract_headings_from_file(rep_path, max_level=effective_max)
folder_title = headings[0]['text'] if headings else os.path.basename(dir_path) folder_title = headings[0]['text'] if headings else os.path.basename(dir_path)
folder_link = os.path.join(base_url, os.path.splitext(rep_file)[0]) folder_link = os.path.join(base_url, os.path.splitext(rep_file)[0])
# Remove the representative file from the list to avoid duplication # Remove the representative file from the list to avoid duplication
files.remove(rep_file) files.remove(rep_file)
# Also filter out any files that are explicitly "readme.md" or "index.rst"
files = [f for f in files if f.lower() not in ['readme.md', 'index.rst']]
# Process the remaining files in the current directory # Process the remaining files in the current directory
file_items = [] file_items = []
for file in sorted(files, key=lambda s: s.lower()): for file in sorted(files, key=lambda s: s.lower()):
file_path = os.path.join(dir_path, file) file_path = os.path.join(dir_path, file)
file_headings = extract_headings_from_file(file_path, max_level=effective_max) file_headings = extract_headings_from_file(file_path, max_level=MAX_HEADING_LEVEL)
file_title = file_headings[0]['text'] if file_headings else file file_title = file_headings[0]['text'] if file_headings else file
file_base = os.path.splitext(file)[0] file_base = os.path.splitext(file)[0]
file_link = os.path.join(base_url, file_base) file_link = os.path.join(base_url, file_base)

View File

@ -3,25 +3,16 @@ import re
import yaml import yaml
DEFAULT_MAX_NAV_DEPTH = 4 DEFAULT_MAX_NAV_DEPTH = 4
MAX_HEADING_LEVEL = 2 MAX_HEADING_LEVEL = 0 # This can be overridden in your configuration
def natural_sort_key(text): def natural_sort_key(text):
"""
Generate a key for natural (human-friendly) sorting,
taking numeric parts into account.
"""
return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', text)] return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', text)]
def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL): def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL):
""" # If max_level is 0, set it to a very high value to effectively iterate infinitely
Extract headings from a file. if max_level == 0:
For Markdown (.md) files, looks for lines starting with '#' (up to max_level). max_level = 9999
For reStructuredText (.rst) files, looks for a line immediately followed by an underline.
If no headings are found and the file is an index file while a README.md exists in the same folder,
it will try to extract headings from the README.md instead.
Returns a list of dictionaries with keys: 'level', 'text', and 'anchor' (if applicable).
"""
import os, re
headings = [] headings = []
ext = os.path.splitext(filepath)[1].lower() ext = os.path.splitext(filepath)[1].lower()
try: try:
@ -34,7 +25,8 @@ def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL):
continue continue
if in_code_block: if in_code_block:
continue continue
match = re.match(r'^(#{1,})\s+(.*)$', line) # Assuming markdown headings are defined with '#' characters
match = re.match(r'^(#{1,})(.*?)$', line)
if match: if match:
level = len(match.group(1)) level = len(match.group(1))
if level <= max_level: if level <= max_level:
@ -53,9 +45,6 @@ def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL):
headings.append({'level': level, 'text': heading_text, 'anchor': ''}) headings.append({'level': level, 'text': heading_text, 'anchor': ''})
except Exception as e: except Exception as e:
print(f"Warning: Error reading {filepath}: {e}") print(f"Warning: Error reading {filepath}: {e}")
# If no headings were found and the file is an index file,
# then try to load headings from a README.md in the same folder.
if not headings: if not headings:
base = os.path.basename(filepath).lower() base = os.path.basename(filepath).lower()
if base == 'index.rst': if base == 'index.rst':
@ -69,10 +58,6 @@ def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL):
return headings return headings
def group_headings(headings): def group_headings(headings):
"""
Convert a flat list of headings into a tree structure based on their level.
Each heading gets a 'children' list.
"""
tree = [] tree = []
stack = [] stack = []
for heading in headings: for heading in headings:
@ -87,13 +72,7 @@ def group_headings(headings):
return tree return tree
def sort_tree(tree): def sort_tree(tree):
"""
Sort a tree of navigation items, first by a 'priority' value (lower comes first)
and then by a natural sort key based on the 'filename' field (or the 'text' field if no filename is provided).
This ensures that 'index' and 'readme' (priority 0) always appear at the top.
"""
tree.sort(key=lambda x: (x.get('priority', 1), natural_sort_key(x.get('filename', x['text'])))) tree.sort(key=lambda x: (x.get('priority', 1), natural_sort_key(x.get('filename', x['text']))))
for item in tree: for item in tree:
if item.get('children'): if item.get('children'):
sort_tree(item['children']) sort_tree(item['children'])