Implemented rst file parsing

This commit is contained in:
Kevin Veen-Birkenbach 2025-03-16 21:17:55 +01:00
parent 6a1d6e40a7
commit c4943ef3b3
No known key found for this signature in database
GPG Key ID: 44D8F11FD62F878E
3 changed files with 44 additions and 41 deletions

View File

@ -1,3 +1,4 @@
# Applications by Category Applications by Category
=========================
.. roles-overview:: .. roles-overview::

View File

@ -1,4 +1,5 @@
# Application Glosar Application Glosar
===================
.. toctree:: .. toctree::
:maxdepth: 1 :maxdepth: 1

View File

@ -4,7 +4,7 @@ from sphinx.util import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Set the maximum heading level to include (e.g., include headings up to H3) # Set the maximum heading level to include (e.g., include headings up to H3 for Markdown)
MAX_HEADING_LEVEL = 3 MAX_HEADING_LEVEL = 3
def natural_sort_key(text): def natural_sort_key(text):
@ -16,30 +16,44 @@ def natural_sort_key(text):
def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL): def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL):
""" """
Extract Markdown headings (up to max_level) from the file at filepath. Extract headings from a file. For Markdown files, look for lines starting with '#' (up to max_level).
Skips fenced code blocks. For reStructuredText files, look for a line that is immediately followed by an underline made of punctuation.
""" """
headings = [] headings = []
ext = os.path.splitext(filepath)[1].lower()
try: try:
with open(filepath, 'r', encoding='utf-8') as f: with open(filepath, 'r', encoding='utf-8') as f:
in_code_block = False if ext == '.md':
for line in f: in_code_block = False
# Toggle code block state if a line starts with ``` for line in f:
if line.strip().startswith("```"): # Toggle code block state if a line starts with ```
in_code_block = not in_code_block if line.strip().startswith("```"):
continue in_code_block = not in_code_block
if in_code_block: continue
continue if in_code_block:
# Match Markdown headings: one or more '#' followed by a space and the title. continue
match = re.match(r'^(#{1,})\s+(.*)$', line) # Match Markdown headings: one or more '#' followed by a space and the title.
if match: match = re.match(r'^(#{1,})\s+(.*)$', line)
level = len(match.group(1)) if match:
if level <= max_level: level = len(match.group(1))
heading_text = match.group(2).strip() if level <= max_level:
# Create a simple slug for the anchor: heading_text = match.group(2).strip()
# - convert to lowercase # Create a simple slug for the anchor:
# - replace spaces with hyphens anchor = re.sub(r'\s+', '-', heading_text.lower())
# - remove non-alphanumeric characters (except hyphens) anchor = re.sub(r'[^a-z0-9\-]', '', anchor)
headings.append({'level': level, 'text': heading_text, 'anchor': anchor})
elif ext == '.rst':
lines = f.readlines()
# Look for headings in reST: a line followed by a line consisting only of punctuation (at least 3 characters)
for i in range(len(lines)-1):
text_line = lines[i].rstrip("\n")
underline = lines[i+1].rstrip("\n")
# Check if underline consists entirely of punctuation characters and is at least 3 characters long.
if len(underline) >= 3 and re.fullmatch(r'[-=~\^\+"\'`]+', underline):
# Here you could differentiate levels based on the punctuation (e.g., '=' -> level 1, '-' -> level 2),
# for simplicity, we'll set a default level (e.g., 1)
level = 1
heading_text = text_line.strip()
anchor = re.sub(r'\s+', '-', heading_text.lower()) anchor = re.sub(r'\s+', '-', heading_text.lower())
anchor = re.sub(r'[^a-z0-9\-]', '', anchor) anchor = re.sub(r'[^a-z0-9\-]', '', anchor)
headings.append({'level': level, 'text': heading_text, 'anchor': anchor}) headings.append({'level': level, 'text': heading_text, 'anchor': anchor})
@ -56,26 +70,15 @@ def group_headings(headings):
stack = [] stack = []
for heading in headings: for heading in headings:
heading['children'] = [] heading['children'] = []
# Pop headings from the stack that are at or deeper than the current level
while stack and stack[-1]['level'] >= heading['level']: while stack and stack[-1]['level'] >= heading['level']:
stack.pop() stack.pop()
if stack: if stack:
# Append the current heading as a child of the last item in the stack
stack[-1]['children'].append(heading) stack[-1]['children'].append(heading)
else: else:
tree.append(heading) tree.append(heading)
stack.append(heading) stack.append(heading)
return tree return tree
def sort_tree(tree):
"""
Sorts a list of headings (and their children) by their text.
"""
tree.sort(key=lambda x: natural_sort_key(x['text']))
for node in tree:
if node.get('children'):
sort_tree(node['children'])
def add_local_md_headings(app, pagename, templatename, context, doctree): def add_local_md_headings(app, pagename, templatename, context, doctree):
srcdir = app.srcdir srcdir = app.srcdir
directory = os.path.dirname(pagename) directory = os.path.dirname(pagename)
@ -87,22 +90,20 @@ def add_local_md_headings(app, pagename, templatename, context, doctree):
local_md_headings = [] local_md_headings = []
for file in os.listdir(abs_dir): for file in os.listdir(abs_dir):
if file.endswith('.md'): if file.endswith('.md') or file.endswith('.rst'):
filepath = os.path.join(abs_dir, file) filepath = os.path.join(abs_dir, file)
headings = extract_headings_from_file(filepath) headings = extract_headings_from_file(filepath)
for heading in headings: for heading in headings:
base = file[:-3] basename, _ = os.path.splitext(file)
file_link = os.path.join(directory, base) file_link = os.path.join(directory, basename)
file_link += ".html" # Ensure link ends with .html
local_md_headings.append({ local_md_headings.append({
'level': heading['level'], 'level': heading['level'],
'text': heading['text'], 'text': heading['text'],
'link': file_link, 'link': file_link,
'anchor': heading['anchor'] 'anchor': heading['anchor']
}) })
# Proceed with grouping and sorting as before... context['local_md_headings'] = group_headings(local_md_headings)
tree = group_headings(local_md_headings)
#sort_tree(tree)
context['local_md_headings'] = tree
def setup(app): def setup(app):
app.connect('html-page-context', add_local_md_headings) app.connect('html-page-context', add_local_md_headings)