mirror of
				https://github.com/kevinveenbirkenbach/computer-playbook.git
				synced 2025-11-03 19:58:14 +00:00 
			
		
		
		
	feat(filter_plugins/url_join): add query parameter support
- Support query elements starting with '?' or '&' * First query element normalized to '?', subsequent to '&' * Each query element must be exactly one 'key=value' pair * Query elements may only appear after path elements * Once query starts, no more path elements are allowed - Extend test suite with success and failure cases for query handling See: https://chatgpt.com/share/68b537ea-d198-800f-927a-940c4de832f2
This commit is contained in:
		@@ -2,6 +2,10 @@
 | 
				
			|||||||
Ansible filter plugin that safely joins URL components from a list.
 | 
					Ansible filter plugin that safely joins URL components from a list.
 | 
				
			||||||
- Requires a valid '<scheme>://' in the first element (any RFC-3986-ish scheme)
 | 
					- Requires a valid '<scheme>://' in the first element (any RFC-3986-ish scheme)
 | 
				
			||||||
- Preserves the double slash after the scheme, collapses other duplicate slashes
 | 
					- Preserves the double slash after the scheme, collapses other duplicate slashes
 | 
				
			||||||
 | 
					- Supports query parts introduced by elements starting with '?' or '&'
 | 
				
			||||||
 | 
					  * first query element uses '?', subsequent use '&' (regardless of given prefix)
 | 
				
			||||||
 | 
					  * each query element must be exactly one 'key=value' pair
 | 
				
			||||||
 | 
					  * query elements may only appear after path elements; once query starts, no more path parts
 | 
				
			||||||
- Raises specific AnsibleFilterError messages for common misuse
 | 
					- Raises specific AnsibleFilterError messages for common misuse
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -9,6 +13,7 @@ import re
 | 
				
			|||||||
from ansible.errors import AnsibleFilterError
 | 
					from ansible.errors import AnsibleFilterError
 | 
				
			||||||
 | 
					
 | 
				
			||||||
_SCHEME_RE = re.compile(r'^([a-zA-Z][a-zA-Z0-9+.\-]*://)(.*)$')
 | 
					_SCHEME_RE = re.compile(r'^([a-zA-Z][a-zA-Z0-9+.\-]*://)(.*)$')
 | 
				
			||||||
 | 
					_QUERY_PAIR_RE = re.compile(r'^[^&=?#]+=[^&?#]*$')  # key=value (no '&', no extra '?' or '#')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _to_str_or_error(obj, index):
 | 
					def _to_str_or_error(obj, index):
 | 
				
			||||||
    """Cast to str, raising a specific AnsibleFilterError with index context."""
 | 
					    """Cast to str, raising a specific AnsibleFilterError with index context."""
 | 
				
			||||||
@@ -21,22 +26,20 @@ def _to_str_or_error(obj, index):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def url_join(parts):
 | 
					def url_join(parts):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Join a list of URL parts, URL-aware.
 | 
					    Join a list of URL parts, URL-aware (scheme, path, query).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Args:
 | 
					    Args:
 | 
				
			||||||
        parts (list): List/tuple of URL segments. First element MUST include '<scheme>://'.
 | 
					        parts (list|tuple): URL segments. First element MUST include '<scheme>://'.
 | 
				
			||||||
 | 
					            Path elements are plain strings.
 | 
				
			||||||
 | 
					            Query elements must start with '?' or '&' and contain exactly one 'key=value'.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Returns:
 | 
					    Returns:
 | 
				
			||||||
        str: Joined URL.
 | 
					        str: Joined URL.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    Raises (all via AnsibleFilterError with specific messages):
 | 
					    Raises:
 | 
				
			||||||
        - Input is None/empty
 | 
					        AnsibleFilterError: with specific, descriptive messages.
 | 
				
			||||||
        - Input is not a list/tuple
 | 
					 | 
				
			||||||
        - First element missing/invalid scheme
 | 
					 | 
				
			||||||
        - Part cannot be converted to string (includes index)
 | 
					 | 
				
			||||||
        - Additional scheme found in a later element
 | 
					 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    # Basic input validation
 | 
					    # --- basic input validation ---
 | 
				
			||||||
    if parts is None:
 | 
					    if parts is None:
 | 
				
			||||||
        raise AnsibleFilterError("url_join: parts must be a non-empty list; got None")
 | 
					        raise AnsibleFilterError("url_join: parts must be a non-empty list; got None")
 | 
				
			||||||
    if not isinstance(parts, (list, tuple)):
 | 
					    if not isinstance(parts, (list, tuple)):
 | 
				
			||||||
@@ -46,7 +49,7 @@ def url_join(parts):
 | 
				
			|||||||
    if len(parts) == 0:
 | 
					    if len(parts) == 0:
 | 
				
			||||||
        raise AnsibleFilterError("url_join: parts must be a non-empty list")
 | 
					        raise AnsibleFilterError("url_join: parts must be a non-empty list")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # First element must carry the scheme
 | 
					    # --- first element must carry a scheme ---
 | 
				
			||||||
    first_raw = parts[0]
 | 
					    first_raw = parts[0]
 | 
				
			||||||
    if first_raw is None:
 | 
					    if first_raw is None:
 | 
				
			||||||
        raise AnsibleFilterError(
 | 
					        raise AnsibleFilterError(
 | 
				
			||||||
@@ -64,36 +67,76 @@ def url_join(parts):
 | 
				
			|||||||
    scheme = m.group(1)                    # e.g., 'https://', 'ftp://', 'myapp+v1://'
 | 
					    scheme = m.group(1)                    # e.g., 'https://', 'ftp://', 'myapp+v1://'
 | 
				
			||||||
    after_scheme = m.group(2).lstrip('/')  # strip only leading slashes right after scheme
 | 
					    after_scheme = m.group(2).lstrip('/')  # strip only leading slashes right after scheme
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Normalize all parts to strings (with index-aware errors)
 | 
					    # --- iterate parts: collect path parts until first query part; then only query parts allowed ---
 | 
				
			||||||
    normalized = []
 | 
					    path_parts = []
 | 
				
			||||||
 | 
					    query_pairs = []
 | 
				
			||||||
 | 
					    in_query = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for i, p in enumerate(parts):
 | 
					    for i, p in enumerate(parts):
 | 
				
			||||||
        if p is None:
 | 
					        if p is None:
 | 
				
			||||||
            # Skip None parts silently (like path_join behavior)
 | 
					            # skip None silently (consistent with path_join-ish behavior)
 | 
				
			||||||
            continue
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        s = _to_str_or_error(p, i)
 | 
					        s = _to_str_or_error(p, i)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # disallow additional scheme in later parts
 | 
				
			||||||
        if i > 0 and "://" in s:
 | 
					        if i > 0 and "://" in s:
 | 
				
			||||||
            raise AnsibleFilterError(
 | 
					            raise AnsibleFilterError(
 | 
				
			||||||
                f"url_join: only the first element may contain a scheme; part at index {i} "
 | 
					                f"url_join: only the first element may contain a scheme; part at index {i} "
 | 
				
			||||||
                f"looks like a URL with scheme ('{s}')."
 | 
					                f"looks like a URL with scheme ('{s}')."
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        normalized.append(s)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Replace first element with remainder after scheme
 | 
					        # first element: replace with remainder after scheme and continue
 | 
				
			||||||
    if normalized:
 | 
					        if i == 0:
 | 
				
			||||||
        normalized[0] = after_scheme
 | 
					            s = after_scheme
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # check if this is a query element (starts with ? or &)
 | 
				
			||||||
 | 
					        if s.startswith('?') or s.startswith('&'):
 | 
				
			||||||
 | 
					            in_query = True
 | 
				
			||||||
 | 
					            raw_pair = s[1:]  # strip the leading ? or &
 | 
				
			||||||
 | 
					            if raw_pair == '':
 | 
				
			||||||
 | 
					                raise AnsibleFilterError(
 | 
				
			||||||
 | 
					                    f"url_join: query element at index {i} is empty; expected '?key=value' or '&key=value'"
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					            # Disallow multiple pairs in a single element; enforce exactly one key=value
 | 
				
			||||||
 | 
					            if '&' in raw_pair:
 | 
				
			||||||
 | 
					                raise AnsibleFilterError(
 | 
				
			||||||
 | 
					                    f"url_join: query element at index {i} must contain exactly one 'key=value' pair "
 | 
				
			||||||
 | 
					                    f"without '&'; got '{s}'"
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					            if not _QUERY_PAIR_RE.match(raw_pair):
 | 
				
			||||||
 | 
					                raise AnsibleFilterError(
 | 
				
			||||||
 | 
					                    f"url_join: query element at index {i} must match 'key=value' (no extra '?', '&', '#'); got '{s}'"
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					            query_pairs.append(raw_pair)
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
        # This can only happen if all parts were None, but we gated that earlier
 | 
					            # non-query element
 | 
				
			||||||
        return scheme
 | 
					            if in_query:
 | 
				
			||||||
 | 
					                # once query started, no more path parts allowed
 | 
				
			||||||
 | 
					                raise AnsibleFilterError(
 | 
				
			||||||
 | 
					                    f"url_join: path element found at index {i} after query parameters started; "
 | 
				
			||||||
 | 
					                    f"query parts must come last"
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					            # normal path part: strip slashes to avoid duplicate '/'
 | 
				
			||||||
 | 
					            path_parts.append(s.strip('/'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Strip slashes at both ends of each part, then filter out empties
 | 
					    # normalize path: remove empty chunks
 | 
				
			||||||
    stripped = [p.strip('/') for p in normalized]
 | 
					    path_parts = [p for p in path_parts if p != '']
 | 
				
			||||||
    stripped = [p for p in stripped if p != '']
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # If everything is empty after stripping, return just the scheme
 | 
					    # --- build result ---
 | 
				
			||||||
    if not stripped:
 | 
					    # path portion
 | 
				
			||||||
        return scheme
 | 
					    if path_parts:
 | 
				
			||||||
 | 
					        joined_path = "/".join(path_parts)
 | 
				
			||||||
 | 
					        base = scheme + joined_path
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        # no path beyond scheme
 | 
				
			||||||
 | 
					        base = scheme
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return scheme + "/".join(stripped)
 | 
					    # query portion
 | 
				
			||||||
 | 
					    if query_pairs:
 | 
				
			||||||
 | 
					        base = base + "?" + "&".join(query_pairs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return base
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class FilterModule(object):
 | 
					class FilterModule(object):
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -13,7 +13,7 @@ from url_join import url_join
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestUrlJoinFilter(unittest.TestCase):
 | 
					class TestUrlJoinFilter(unittest.TestCase):
 | 
				
			||||||
    # --- success cases ---
 | 
					    # --- success cases (path only) ---
 | 
				
			||||||
    def test_http_basic(self):
 | 
					    def test_http_basic(self):
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            url_join(['http://example.com', 'foo', 'bar']),
 | 
					            url_join(['http://example.com', 'foo', 'bar']),
 | 
				
			||||||
@@ -32,31 +32,41 @@ class TestUrlJoinFilter(unittest.TestCase):
 | 
				
			|||||||
            'myapp+v1://host/section/item'
 | 
					            'myapp+v1://host/section/item'
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_scheme_with_path_in_first(self):
 | 
					    def test_only_scheme(self):
 | 
				
			||||||
 | 
					        self.assertEqual(url_join(['https://']), 'https://')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # --- success cases with query ---
 | 
				
			||||||
 | 
					    def test_query_normalization_first_q_then_amp(self):
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            url_join(['https://example.com/base/', '/deep/', 'leaf']),
 | 
					            url_join(['https://example.com', 'api', '?a=1', '&b=2']),
 | 
				
			||||||
            'https://example.com/base/deep/leaf'
 | 
					            'https://example.com/api?a=1&b=2'
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_query_ignores_given_prefix_order(self):
 | 
				
			||||||
 | 
					        self.assertEqual(
 | 
				
			||||||
 | 
					            url_join(['https://example.com', '?a=1', '?b=2', '&c=3']),
 | 
				
			||||||
 | 
					            'https://example.com?a=1&b=2&c=3'
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_query_after_path_with_slashes(self):
 | 
				
			||||||
 | 
					        self.assertEqual(
 | 
				
			||||||
 | 
					            url_join(['https://example.com/', '/x/', 'y/', '?q=ok']),
 | 
				
			||||||
 | 
					            'https://example.com/x/y?q=ok'
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_query_with_numeric_value(self):
 | 
				
			||||||
 | 
					        self.assertEqual(
 | 
				
			||||||
 | 
					            url_join(['https://example.com', '?n=123']),
 | 
				
			||||||
 | 
					            'https://example.com?n=123'
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_none_in_list(self):
 | 
					    def test_none_in_list(self):
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            url_join(['https://example.com', None, 'foo']),
 | 
					            url_join(['https://example.com', None, 'foo', None, '?a=1', None]),
 | 
				
			||||||
            'https://example.com/foo'
 | 
					            'https://example.com/foo?a=1'
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_numeric_parts(self):
 | 
					    # --- error cases ---
 | 
				
			||||||
        self.assertEqual(
 | 
					 | 
				
			||||||
            url_join(['https://example.com', 123, '456']),
 | 
					 | 
				
			||||||
            'https://example.com/123/456'
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def test_only_scheme_returns_scheme(self):
 | 
					 | 
				
			||||||
        self.assertEqual(
 | 
					 | 
				
			||||||
            url_join(['https://']),
 | 
					 | 
				
			||||||
            'https://'
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # --- error cases with specific messages ---
 | 
					 | 
				
			||||||
    def test_none_input_raises(self):
 | 
					    def test_none_input_raises(self):
 | 
				
			||||||
        with self.assertRaisesRegex(AnsibleFilterError, r"parts must be a non-empty list; got None"):
 | 
					        with self.assertRaisesRegex(AnsibleFilterError, r"parts must be a non-empty list; got None"):
 | 
				
			||||||
            url_join(None)
 | 
					            url_join(None)
 | 
				
			||||||
@@ -81,6 +91,26 @@ class TestUrlJoinFilter(unittest.TestCase):
 | 
				
			|||||||
        with self.assertRaisesRegex(AnsibleFilterError, r"only the first element may contain a scheme"):
 | 
					        with self.assertRaisesRegex(AnsibleFilterError, r"only the first element may contain a scheme"):
 | 
				
			||||||
            url_join(['https://example.com', 'https://elsewhere'])
 | 
					            url_join(['https://example.com', 'https://elsewhere'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_path_after_query_raises(self):
 | 
				
			||||||
 | 
					        with self.assertRaisesRegex(AnsibleFilterError, r"path element .* after query parameters started"):
 | 
				
			||||||
 | 
					            url_join(['https://example.com', '?a=1', 'still/path'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_query_element_empty_raises(self):
 | 
				
			||||||
 | 
					        with self.assertRaisesRegex(AnsibleFilterError, r"query element .* is empty"):
 | 
				
			||||||
 | 
					            url_join(['https://example.com', '?'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_query_element_multiple_pairs_raises(self):
 | 
				
			||||||
 | 
					        with self.assertRaisesRegex(AnsibleFilterError, r"must contain exactly one 'key=value' pair"):
 | 
				
			||||||
 | 
					            url_join(['https://example.com', '?a=1&b=2'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_query_element_missing_equal_raises(self):
 | 
				
			||||||
 | 
					        with self.assertRaisesRegex(AnsibleFilterError, r"must match 'key=value'"):
 | 
				
			||||||
 | 
					            url_join(['https://example.com', '&a'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_query_element_bad_chars_raises(self):
 | 
				
			||||||
 | 
					        with self.assertRaisesRegex(AnsibleFilterError, r"must match 'key=value'"):
 | 
				
			||||||
 | 
					            url_join(['https://example.com', '?a#=1'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_unstringifiable_first_part_raises(self):
 | 
					    def test_unstringifiable_first_part_raises(self):
 | 
				
			||||||
        class Bad:
 | 
					        class Bad:
 | 
				
			||||||
            def __str__(self):
 | 
					            def __str__(self):
 | 
				
			||||||
@@ -92,8 +122,8 @@ class TestUrlJoinFilter(unittest.TestCase):
 | 
				
			|||||||
        class Bad:
 | 
					        class Bad:
 | 
				
			||||||
            def __str__(self):
 | 
					            def __str__(self):
 | 
				
			||||||
                raise ValueError("boom")
 | 
					                raise ValueError("boom")
 | 
				
			||||||
        with self.assertRaisesRegex(AnsibleFilterError, r"unable to convert part at index 2"):
 | 
					        with self.assertRaisesRegex(AnsibleFilterError, r"unable to convert part at index 3"):
 | 
				
			||||||
            url_join(['https://example.com', 'ok', Bad()])
 | 
					            url_join(['https://example.com', 'ok', '?a=1', Bad()])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user