Refactored sphinx

This commit is contained in:
Kevin Veen-Birkenbach 2025-03-17 13:05:59 +01:00
parent 8b0308589d
commit 75a110fde9
No known key found for this signature in database
GPG Key ID: 44D8F11FD62F878E
2 changed files with 24 additions and 44 deletions

View File

@ -1,27 +1,27 @@
import os
from sphinx.util import logging
from .nav_utils import extract_headings_from_file
MAX_HEADING_LEVEL = 0
from .nav_utils import extract_headings_from_file, MAX_HEADING_LEVEL
logger = logging.getLogger(__name__)
def collect_folder_tree(dir_path, base_url):
"""
Recursively collects the folder tree starting from the given directory.
For each folder:
- It is ignored if it is hidden.
- If a representative file (index.rst/index.md or readme.md/readme.rst) exists,
its first heading is used as the folder title.
- Folders without such a representative file are skipped.
- All Markdown and reStructuredText files (except the representative file)
are listed without sub-headings, using the first heading as their title.
- Hidden folders (names starting with a dot) are skipped.
- A folder is processed only if it contains one of the representative files:
index.rst, index.md, readme.md, or readme.rst.
- The first heading of the representative file is used as the folder title.
- The representative file is not listed as a file in the folder.
- All other Markdown and reStructuredText files are listed without sub-headings,
using their first heading as the file title.
"""
# Ignore hidden directories
# Skip hidden directories
if os.path.basename(dir_path).startswith('.'):
return None
# List all files in current directory with .md or .rst extension
# List all files in the current directory with .md or .rst extension
files = [f for f in os.listdir(dir_path)
if os.path.isfile(os.path.join(dir_path, f))
and (f.endswith('.md') or f.endswith('.rst'))]
@ -36,24 +36,25 @@ def collect_folder_tree(dir_path, base_url):
if rep_file:
break
# If no representative file, skip this folder
# Skip this folder if no representative file exists
if not rep_file:
return None
rep_path = os.path.join(dir_path, rep_file)
# If MAX_HEADING_LEVEL is 0, use an effectively infinite level (e.g., 9999)
effective_max = MAX_HEADING_LEVEL if MAX_HEADING_LEVEL != 0 else 9999
headings = extract_headings_from_file(rep_path, max_level=effective_max)
headings = extract_headings_from_file(rep_path, max_level=MAX_HEADING_LEVEL)
folder_title = headings[0]['text'] if headings else os.path.basename(dir_path)
folder_link = os.path.join(base_url, os.path.splitext(rep_file)[0])
# Remove the representative file from the list to avoid duplication
files.remove(rep_file)
# Also filter out any files that are explicitly "readme.md" or "index.rst"
files = [f for f in files if f.lower() not in ['readme.md', 'index.rst']]
# Process the remaining files in the current directory
file_items = []
for file in sorted(files, key=lambda s: s.lower()):
file_path = os.path.join(dir_path, file)
file_headings = extract_headings_from_file(file_path, max_level=effective_max)
file_headings = extract_headings_from_file(file_path, max_level=MAX_HEADING_LEVEL)
file_title = file_headings[0]['text'] if file_headings else file
file_base = os.path.splitext(file)[0]
file_link = os.path.join(base_url, file_base)

View File

@ -3,25 +3,16 @@ import re
import yaml
DEFAULT_MAX_NAV_DEPTH = 4
MAX_HEADING_LEVEL = 2
MAX_HEADING_LEVEL = 0 # This can be overridden in your configuration
def natural_sort_key(text):
"""
Generate a key for natural (human-friendly) sorting,
taking numeric parts into account.
"""
return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', text)]
def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL):
"""
Extract headings from a file.
For Markdown (.md) files, looks for lines starting with '#' (up to max_level).
For reStructuredText (.rst) files, looks for a line immediately followed by an underline.
If no headings are found and the file is an index file while a README.md exists in the same folder,
it will try to extract headings from the README.md instead.
Returns a list of dictionaries with keys: 'level', 'text', and 'anchor' (if applicable).
"""
import os, re
# If max_level is 0, set it to a very high value to effectively iterate infinitely
if max_level == 0:
max_level = 9999
headings = []
ext = os.path.splitext(filepath)[1].lower()
try:
@ -34,7 +25,8 @@ def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL):
continue
if in_code_block:
continue
match = re.match(r'^(#{1,})\s+(.*)$', line)
# Assuming markdown headings are defined with '#' characters
match = re.match(r'^(#{1,})(.*?)$', line)
if match:
level = len(match.group(1))
if level <= max_level:
@ -53,9 +45,6 @@ def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL):
headings.append({'level': level, 'text': heading_text, 'anchor': ''})
except Exception as e:
print(f"Warning: Error reading {filepath}: {e}")
# If no headings were found and the file is an index file,
# then try to load headings from a README.md in the same folder.
if not headings:
base = os.path.basename(filepath).lower()
if base == 'index.rst':
@ -69,10 +58,6 @@ def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL):
return headings
def group_headings(headings):
"""
Convert a flat list of headings into a tree structure based on their level.
Each heading gets a 'children' list.
"""
tree = []
stack = []
for heading in headings:
@ -87,13 +72,7 @@ def group_headings(headings):
return tree
def sort_tree(tree):
"""
Sort a tree of navigation items, first by a 'priority' value (lower comes first)
and then by a natural sort key based on the 'filename' field (or the 'text' field if no filename is provided).
This ensures that 'index' and 'readme' (priority 0) always appear at the top.
"""
tree.sort(key=lambda x: (x.get('priority', 1), natural_sort_key(x.get('filename', x['text']))))
for item in tree:
if item.get('children'):
sort_tree(item['children'])