mirror of
				https://github.com/kevinveenbirkenbach/computer-playbook.git
				synced 2025-11-03 19:58:14 +00:00 
			
		
		
		
	Implemented rst file parsing
This commit is contained in:
		@@ -1,3 +1,4 @@
 | 
				
			|||||||
# Applications by Category
 | 
					Applications by Category
 | 
				
			||||||
 | 
					=========================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.. roles-overview::
 | 
					.. roles-overview::
 | 
				
			||||||
@@ -1,4 +1,5 @@
 | 
				
			|||||||
# Application Glosar
 | 
					Application Glosar
 | 
				
			||||||
 | 
					===================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.. toctree::
 | 
					.. toctree::
 | 
				
			||||||
   :maxdepth: 1
 | 
					   :maxdepth: 1
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -4,7 +4,7 @@ from sphinx.util import logging
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
logger = logging.getLogger(__name__)
 | 
					logger = logging.getLogger(__name__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Set the maximum heading level to include (e.g., include headings up to H3)
 | 
					# Set the maximum heading level to include (e.g., include headings up to H3 for Markdown)
 | 
				
			||||||
MAX_HEADING_LEVEL = 3
 | 
					MAX_HEADING_LEVEL = 3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def natural_sort_key(text):
 | 
					def natural_sort_key(text):
 | 
				
			||||||
@@ -16,30 +16,44 @@ def natural_sort_key(text):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL):
 | 
					def extract_headings_from_file(filepath, max_level=MAX_HEADING_LEVEL):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Extract Markdown headings (up to max_level) from the file at filepath.
 | 
					    Extract headings from a file. For Markdown files, look for lines starting with '#' (up to max_level).
 | 
				
			||||||
    Skips fenced code blocks.
 | 
					    For reStructuredText files, look for a line that is immediately followed by an underline made of punctuation.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    headings = []
 | 
					    headings = []
 | 
				
			||||||
 | 
					    ext = os.path.splitext(filepath)[1].lower()
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        with open(filepath, 'r', encoding='utf-8') as f:
 | 
					        with open(filepath, 'r', encoding='utf-8') as f:
 | 
				
			||||||
            in_code_block = False
 | 
					            if ext == '.md':
 | 
				
			||||||
            for line in f:
 | 
					                in_code_block = False
 | 
				
			||||||
                # Toggle code block state if a line starts with ```
 | 
					                for line in f:
 | 
				
			||||||
                if line.strip().startswith("```"):
 | 
					                    # Toggle code block state if a line starts with ```
 | 
				
			||||||
                    in_code_block = not in_code_block
 | 
					                    if line.strip().startswith("```"):
 | 
				
			||||||
                    continue
 | 
					                        in_code_block = not in_code_block
 | 
				
			||||||
                if in_code_block:
 | 
					                        continue
 | 
				
			||||||
                    continue
 | 
					                    if in_code_block:
 | 
				
			||||||
                # Match Markdown headings: one or more '#' followed by a space and the title.
 | 
					                        continue
 | 
				
			||||||
                match = re.match(r'^(#{1,})\s+(.*)$', line)
 | 
					                    # Match Markdown headings: one or more '#' followed by a space and the title.
 | 
				
			||||||
                if match:
 | 
					                    match = re.match(r'^(#{1,})\s+(.*)$', line)
 | 
				
			||||||
                    level = len(match.group(1))
 | 
					                    if match:
 | 
				
			||||||
                    if level <= max_level:
 | 
					                        level = len(match.group(1))
 | 
				
			||||||
                        heading_text = match.group(2).strip()
 | 
					                        if level <= max_level:
 | 
				
			||||||
                        # Create a simple slug for the anchor:
 | 
					                            heading_text = match.group(2).strip()
 | 
				
			||||||
                        # - convert to lowercase
 | 
					                            # Create a simple slug for the anchor:
 | 
				
			||||||
                        # - replace spaces with hyphens
 | 
					                            anchor = re.sub(r'\s+', '-', heading_text.lower())
 | 
				
			||||||
                        # - remove non-alphanumeric characters (except hyphens)
 | 
					                            anchor = re.sub(r'[^a-z0-9\-]', '', anchor)
 | 
				
			||||||
 | 
					                            headings.append({'level': level, 'text': heading_text, 'anchor': anchor})
 | 
				
			||||||
 | 
					            elif ext == '.rst':
 | 
				
			||||||
 | 
					                lines = f.readlines()
 | 
				
			||||||
 | 
					                # Look for headings in reST: a line followed by a line consisting only of punctuation (at least 3 characters)
 | 
				
			||||||
 | 
					                for i in range(len(lines)-1):
 | 
				
			||||||
 | 
					                    text_line = lines[i].rstrip("\n")
 | 
				
			||||||
 | 
					                    underline = lines[i+1].rstrip("\n")
 | 
				
			||||||
 | 
					                    # Check if underline consists entirely of punctuation characters and is at least 3 characters long.
 | 
				
			||||||
 | 
					                    if len(underline) >= 3 and re.fullmatch(r'[-=~\^\+"\'`]+', underline):
 | 
				
			||||||
 | 
					                        # Here you could differentiate levels based on the punctuation (e.g., '=' -> level 1, '-' -> level 2),
 | 
				
			||||||
 | 
					                        # for simplicity, we'll set a default level (e.g., 1)
 | 
				
			||||||
 | 
					                        level = 1
 | 
				
			||||||
 | 
					                        heading_text = text_line.strip()
 | 
				
			||||||
                        anchor = re.sub(r'\s+', '-', heading_text.lower())
 | 
					                        anchor = re.sub(r'\s+', '-', heading_text.lower())
 | 
				
			||||||
                        anchor = re.sub(r'[^a-z0-9\-]', '', anchor)
 | 
					                        anchor = re.sub(r'[^a-z0-9\-]', '', anchor)
 | 
				
			||||||
                        headings.append({'level': level, 'text': heading_text, 'anchor': anchor})
 | 
					                        headings.append({'level': level, 'text': heading_text, 'anchor': anchor})
 | 
				
			||||||
@@ -56,26 +70,15 @@ def group_headings(headings):
 | 
				
			|||||||
    stack = []
 | 
					    stack = []
 | 
				
			||||||
    for heading in headings:
 | 
					    for heading in headings:
 | 
				
			||||||
        heading['children'] = []
 | 
					        heading['children'] = []
 | 
				
			||||||
        # Pop headings from the stack that are at or deeper than the current level
 | 
					 | 
				
			||||||
        while stack and stack[-1]['level'] >= heading['level']:
 | 
					        while stack and stack[-1]['level'] >= heading['level']:
 | 
				
			||||||
            stack.pop()
 | 
					            stack.pop()
 | 
				
			||||||
        if stack:
 | 
					        if stack:
 | 
				
			||||||
            # Append the current heading as a child of the last item in the stack
 | 
					 | 
				
			||||||
            stack[-1]['children'].append(heading)
 | 
					            stack[-1]['children'].append(heading)
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            tree.append(heading)
 | 
					            tree.append(heading)
 | 
				
			||||||
        stack.append(heading)
 | 
					        stack.append(heading)
 | 
				
			||||||
    return tree
 | 
					    return tree
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def sort_tree(tree):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    Sorts a list of headings (and their children) by their text.
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    tree.sort(key=lambda x: natural_sort_key(x['text']))
 | 
					 | 
				
			||||||
    for node in tree:
 | 
					 | 
				
			||||||
        if node.get('children'):
 | 
					 | 
				
			||||||
            sort_tree(node['children'])
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def add_local_md_headings(app, pagename, templatename, context, doctree):
 | 
					def add_local_md_headings(app, pagename, templatename, context, doctree):
 | 
				
			||||||
    srcdir = app.srcdir
 | 
					    srcdir = app.srcdir
 | 
				
			||||||
    directory = os.path.dirname(pagename)
 | 
					    directory = os.path.dirname(pagename)
 | 
				
			||||||
@@ -87,22 +90,20 @@ def add_local_md_headings(app, pagename, templatename, context, doctree):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    local_md_headings = []
 | 
					    local_md_headings = []
 | 
				
			||||||
    for file in os.listdir(abs_dir):
 | 
					    for file in os.listdir(abs_dir):
 | 
				
			||||||
        if file.endswith('.md'):
 | 
					        if file.endswith('.md') or file.endswith('.rst'):
 | 
				
			||||||
            filepath = os.path.join(abs_dir, file)
 | 
					            filepath = os.path.join(abs_dir, file)
 | 
				
			||||||
            headings = extract_headings_from_file(filepath)
 | 
					            headings = extract_headings_from_file(filepath)
 | 
				
			||||||
            for heading in headings:
 | 
					            for heading in headings:
 | 
				
			||||||
                base = file[:-3]
 | 
					                basename, _ = os.path.splitext(file)
 | 
				
			||||||
                file_link = os.path.join(directory, base)
 | 
					                file_link = os.path.join(directory, basename)
 | 
				
			||||||
 | 
					                file_link += ".html"  # Ensure link ends with .html
 | 
				
			||||||
                local_md_headings.append({
 | 
					                local_md_headings.append({
 | 
				
			||||||
                    'level': heading['level'],
 | 
					                    'level': heading['level'],
 | 
				
			||||||
                    'text': heading['text'],
 | 
					                    'text': heading['text'],
 | 
				
			||||||
                    'link': file_link,
 | 
					                    'link': file_link,
 | 
				
			||||||
                    'anchor': heading['anchor']
 | 
					                    'anchor': heading['anchor']
 | 
				
			||||||
                })
 | 
					                })
 | 
				
			||||||
    # Proceed with grouping and sorting as before...
 | 
					    context['local_md_headings'] = group_headings(local_md_headings)
 | 
				
			||||||
    tree = group_headings(local_md_headings)
 | 
					 | 
				
			||||||
    #sort_tree(tree)
 | 
					 | 
				
			||||||
    context['local_md_headings'] = tree
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def setup(app):
 | 
					def setup(app):
 | 
				
			||||||
    app.connect('html-page-context', add_local_md_headings)
 | 
					    app.connect('html-page-context', add_local_md_headings)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user