mirror of
https://github.com/kevinveenbirkenbach/computer-playbook.git
synced 2025-03-29 04:23:34 +01:00
Implemented rst file parsing
This commit is contained in:
parent
6a1d6e40a7
commit
c4943ef3b3
@ -1,3 +1,4 @@
|
|||||||
# Applications by Category
|
Applications by Category
|
||||||
|
=========================
|
||||||
|
|
||||||
.. roles-overview::
|
.. roles-overview::
|
@ -1,4 +1,5 @@
|
|||||||
# Application Glosar
|
Application Glosar
|
||||||
|
===================
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
@ -4,7 +4,7 @@ from sphinx.util import logging
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Set the maximum heading level to include (e.g., include headings up to H3)
|
# Set the maximum heading level to include (e.g., include headings up to H3 for Markdown)
|
||||||
MAX_HEADING_LEVEL = 3
|
MAX_HEADING_LEVEL = 3
|
||||||
|
|
||||||
def natural_sort_key(text):
|
def natural_sort_key(text):
|
||||||
@ -16,30 +16,44 @@ def natural_sort_key(text):
|
|||||||
|
|
||||||
def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL):
|
def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL):
|
||||||
"""
|
"""
|
||||||
Extract Markdown headings (up to max_level) from the file at filepath.
|
Extract headings from a file. For Markdown files, look for lines starting with '#' (up to max_level).
|
||||||
Skips fenced code blocks.
|
For reStructuredText files, look for a line that is immediately followed by an underline made of punctuation.
|
||||||
"""
|
"""
|
||||||
headings = []
|
headings = []
|
||||||
|
ext = os.path.splitext(filepath)[1].lower()
|
||||||
try:
|
try:
|
||||||
with open(filepath, 'r', encoding='utf-8') as f:
|
with open(filepath, 'r', encoding='utf-8') as f:
|
||||||
in_code_block = False
|
if ext == '.md':
|
||||||
for line in f:
|
in_code_block = False
|
||||||
# Toggle code block state if a line starts with ```
|
for line in f:
|
||||||
if line.strip().startswith("```"):
|
# Toggle code block state if a line starts with ```
|
||||||
in_code_block = not in_code_block
|
if line.strip().startswith("```"):
|
||||||
continue
|
in_code_block = not in_code_block
|
||||||
if in_code_block:
|
continue
|
||||||
continue
|
if in_code_block:
|
||||||
# Match Markdown headings: one or more '#' followed by a space and the title.
|
continue
|
||||||
match = re.match(r'^(#{1,})\s+(.*)$', line)
|
# Match Markdown headings: one or more '#' followed by a space and the title.
|
||||||
if match:
|
match = re.match(r'^(#{1,})\s+(.*)$', line)
|
||||||
level = len(match.group(1))
|
if match:
|
||||||
if level <= max_level:
|
level = len(match.group(1))
|
||||||
heading_text = match.group(2).strip()
|
if level <= max_level:
|
||||||
# Create a simple slug for the anchor:
|
heading_text = match.group(2).strip()
|
||||||
# - convert to lowercase
|
# Create a simple slug for the anchor:
|
||||||
# - replace spaces with hyphens
|
anchor = re.sub(r'\s+', '-', heading_text.lower())
|
||||||
# - remove non-alphanumeric characters (except hyphens)
|
anchor = re.sub(r'[^a-z0-9\-]', '', anchor)
|
||||||
|
headings.append({'level': level, 'text': heading_text, 'anchor': anchor})
|
||||||
|
elif ext == '.rst':
|
||||||
|
lines = f.readlines()
|
||||||
|
# Look for headings in reST: a line followed by a line consisting only of punctuation (at least 3 characters)
|
||||||
|
for i in range(len(lines)-1):
|
||||||
|
text_line = lines[i].rstrip("\n")
|
||||||
|
underline = lines[i+1].rstrip("\n")
|
||||||
|
# Check if underline consists entirely of punctuation characters and is at least 3 characters long.
|
||||||
|
if len(underline) >= 3 and re.fullmatch(r'[-=~\^\+"\'`]+', underline):
|
||||||
|
# Here you could differentiate levels based on the punctuation (e.g., '=' -> level 1, '-' -> level 2),
|
||||||
|
# for simplicity, we'll set a default level (e.g., 1)
|
||||||
|
level = 1
|
||||||
|
heading_text = text_line.strip()
|
||||||
anchor = re.sub(r'\s+', '-', heading_text.lower())
|
anchor = re.sub(r'\s+', '-', heading_text.lower())
|
||||||
anchor = re.sub(r'[^a-z0-9\-]', '', anchor)
|
anchor = re.sub(r'[^a-z0-9\-]', '', anchor)
|
||||||
headings.append({'level': level, 'text': heading_text, 'anchor': anchor})
|
headings.append({'level': level, 'text': heading_text, 'anchor': anchor})
|
||||||
@ -56,26 +70,15 @@ def group_headings(headings):
|
|||||||
stack = []
|
stack = []
|
||||||
for heading in headings:
|
for heading in headings:
|
||||||
heading['children'] = []
|
heading['children'] = []
|
||||||
# Pop headings from the stack that are at or deeper than the current level
|
|
||||||
while stack and stack[-1]['level'] >= heading['level']:
|
while stack and stack[-1]['level'] >= heading['level']:
|
||||||
stack.pop()
|
stack.pop()
|
||||||
if stack:
|
if stack:
|
||||||
# Append the current heading as a child of the last item in the stack
|
|
||||||
stack[-1]['children'].append(heading)
|
stack[-1]['children'].append(heading)
|
||||||
else:
|
else:
|
||||||
tree.append(heading)
|
tree.append(heading)
|
||||||
stack.append(heading)
|
stack.append(heading)
|
||||||
return tree
|
return tree
|
||||||
|
|
||||||
def sort_tree(tree):
|
|
||||||
"""
|
|
||||||
Sorts a list of headings (and their children) by their text.
|
|
||||||
"""
|
|
||||||
tree.sort(key=lambda x: natural_sort_key(x['text']))
|
|
||||||
for node in tree:
|
|
||||||
if node.get('children'):
|
|
||||||
sort_tree(node['children'])
|
|
||||||
|
|
||||||
def add_local_md_headings(app, pagename, templatename, context, doctree):
|
def add_local_md_headings(app, pagename, templatename, context, doctree):
|
||||||
srcdir = app.srcdir
|
srcdir = app.srcdir
|
||||||
directory = os.path.dirname(pagename)
|
directory = os.path.dirname(pagename)
|
||||||
@ -87,22 +90,20 @@ def add_local_md_headings(app, pagename, templatename, context, doctree):
|
|||||||
|
|
||||||
local_md_headings = []
|
local_md_headings = []
|
||||||
for file in os.listdir(abs_dir):
|
for file in os.listdir(abs_dir):
|
||||||
if file.endswith('.md'):
|
if file.endswith('.md') or file.endswith('.rst'):
|
||||||
filepath = os.path.join(abs_dir, file)
|
filepath = os.path.join(abs_dir, file)
|
||||||
headings = extract_headings_from_file(filepath)
|
headings = extract_headings_from_file(filepath)
|
||||||
for heading in headings:
|
for heading in headings:
|
||||||
base = file[:-3]
|
basename, _ = os.path.splitext(file)
|
||||||
file_link = os.path.join(directory, base)
|
file_link = os.path.join(directory, basename)
|
||||||
|
file_link += ".html" # Ensure link ends with .html
|
||||||
local_md_headings.append({
|
local_md_headings.append({
|
||||||
'level': heading['level'],
|
'level': heading['level'],
|
||||||
'text': heading['text'],
|
'text': heading['text'],
|
||||||
'link': file_link,
|
'link': file_link,
|
||||||
'anchor': heading['anchor']
|
'anchor': heading['anchor']
|
||||||
})
|
})
|
||||||
# Proceed with grouping and sorting as before...
|
context['local_md_headings'] = group_headings(local_md_headings)
|
||||||
tree = group_headings(local_md_headings)
|
|
||||||
#sort_tree(tree)
|
|
||||||
context['local_md_headings'] = tree
|
|
||||||
|
|
||||||
def setup(app):
|
def setup(app):
|
||||||
app.connect('html-page-context', add_local_md_headings)
|
app.connect('html-page-context', add_local_md_headings)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user