From 75a110fde91bc5d58a07748e62c3b372d8658414 Mon Sep 17 00:00:00 2001 From: Kevin Veen-Birkenbach Date: Mon, 17 Mar 2025 13:05:59 +0100 Subject: [PATCH] Refactored sphinx --- sphinx/extensions/local_subfolders.py | 33 +++++++++++++------------ sphinx/extensions/nav_utils.py | 35 ++++++--------------------- 2 files changed, 24 insertions(+), 44 deletions(-) diff --git a/sphinx/extensions/local_subfolders.py b/sphinx/extensions/local_subfolders.py index f4ac2da9..805eb761 100644 --- a/sphinx/extensions/local_subfolders.py +++ b/sphinx/extensions/local_subfolders.py @@ -1,27 +1,27 @@ import os from sphinx.util import logging -from .nav_utils import extract_headings_from_file -MAX_HEADING_LEVEL = 0 +from .nav_utils import extract_headings_from_file, MAX_HEADING_LEVEL logger = logging.getLogger(__name__) def collect_folder_tree(dir_path, base_url): """ Recursively collects the folder tree starting from the given directory. - + For each folder: - - It is ignored if it is hidden. - - If a representative file (index.rst/index.md or readme.md/readme.rst) exists, - its first heading is used as the folder title. - - Folders without such a representative file are skipped. - - All Markdown and reStructuredText files (except the representative file) - are listed without sub-headings, using the first heading as their title. + - Hidden folders (names starting with a dot) are skipped. + - A folder is processed only if it contains one of the representative files: + index.rst, index.md, readme.md, or readme.rst. + - The first heading of the representative file is used as the folder title. + - The representative file is not listed as a file in the folder. + - All other Markdown and reStructuredText files are listed without sub-headings, + using their first heading as the file title. """ - # Ignore hidden directories + # Skip hidden directories if os.path.basename(dir_path).startswith('.'): return None - # List all files in current directory with .md or .rst extension + # List all files in the current directory with .md or .rst extension files = [f for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f)) and (f.endswith('.md') or f.endswith('.rst'))] @@ -36,24 +36,25 @@ def collect_folder_tree(dir_path, base_url): if rep_file: break - # If no representative file, skip this folder + # Skip this folder if no representative file exists if not rep_file: return None rep_path = os.path.join(dir_path, rep_file) - # If MAX_HEADING_LEVEL is 0, use an effectively infinite level (e.g., 9999) - effective_max = MAX_HEADING_LEVEL if MAX_HEADING_LEVEL != 0 else 9999 - headings = extract_headings_from_file(rep_path, max_level=effective_max) + headings = extract_headings_from_file(rep_path, max_level=MAX_HEADING_LEVEL) folder_title = headings[0]['text'] if headings else os.path.basename(dir_path) folder_link = os.path.join(base_url, os.path.splitext(rep_file)[0]) + # Remove the representative file from the list to avoid duplication files.remove(rep_file) + # Also filter out any files that are explicitly "readme.md" or "index.rst" + files = [f for f in files if f.lower() not in ['readme.md', 'index.rst']] # Process the remaining files in the current directory file_items = [] for file in sorted(files, key=lambda s: s.lower()): file_path = os.path.join(dir_path, file) - file_headings = extract_headings_from_file(file_path, max_level=effective_max) + file_headings = extract_headings_from_file(file_path, max_level=MAX_HEADING_LEVEL) file_title = file_headings[0]['text'] if file_headings else file file_base = os.path.splitext(file)[0] file_link = os.path.join(base_url, file_base) diff --git a/sphinx/extensions/nav_utils.py b/sphinx/extensions/nav_utils.py index c2187eca..41ab1ae4 100644 --- a/sphinx/extensions/nav_utils.py +++ b/sphinx/extensions/nav_utils.py @@ -3,25 +3,16 @@ import re import yaml DEFAULT_MAX_NAV_DEPTH = 4 -MAX_HEADING_LEVEL = 2 +MAX_HEADING_LEVEL = 0 # This can be overridden in your configuration def natural_sort_key(text): - """ - Generate a key for natural (human-friendly) sorting, - taking numeric parts into account. - """ return [int(c) if c.isdigit() else c.lower() for c in re.split(r'(\d+)', text)] def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL): - """ - Extract headings from a file. - For Markdown (.md) files, looks for lines starting with '#' (up to max_level). - For reStructuredText (.rst) files, looks for a line immediately followed by an underline. - If no headings are found and the file is an index file while a README.md exists in the same folder, - it will try to extract headings from the README.md instead. - Returns a list of dictionaries with keys: 'level', 'text', and 'anchor' (if applicable). - """ - import os, re + # If max_level is 0, set it to a very high value to effectively iterate infinitely + if max_level == 0: + max_level = 9999 + headings = [] ext = os.path.splitext(filepath)[1].lower() try: @@ -34,7 +25,8 @@ def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL): continue if in_code_block: continue - match = re.match(r'^(#{1,})\s+(.*)$', line) + # Assuming markdown headings are defined with '#' characters + match = re.match(r'^(#{1,})(.*?)$', line) if match: level = len(match.group(1)) if level <= max_level: @@ -53,9 +45,6 @@ def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL): headings.append({'level': level, 'text': heading_text, 'anchor': ''}) except Exception as e: print(f"Warning: Error reading {filepath}: {e}") - - # If no headings were found and the file is an index file, - # then try to load headings from a README.md in the same folder. if not headings: base = os.path.basename(filepath).lower() if base == 'index.rst': @@ -69,10 +58,6 @@ def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL): return headings def group_headings(headings): - """ - Convert a flat list of headings into a tree structure based on their level. - Each heading gets a 'children' list. - """ tree = [] stack = [] for heading in headings: @@ -87,13 +72,7 @@ def group_headings(headings): return tree def sort_tree(tree): - """ - Sort a tree of navigation items, first by a 'priority' value (lower comes first) - and then by a natural sort key based on the 'filename' field (or the 'text' field if no filename is provided). - This ensures that 'index' and 'readme' (priority 0) always appear at the top. - """ tree.sort(key=lambda x: (x.get('priority', 1), natural_sort_key(x.get('filename', x['text'])))) for item in tree: if item.get('children'): sort_tree(item['children']) -