From 3bfab9ef8e13942f8f84fef6e08e4db692b31e28 Mon Sep 17 00:00:00 2001 From: Kevin Veen-Birkenbach Date: Mon, 1 Sep 2025 08:16:22 +0200 Subject: [PATCH] feat(filter_plugins/url_join): add query parameter support - Support query elements starting with '?' or '&' * First query element normalized to '?', subsequent to '&' * Each query element must be exactly one 'key=value' pair * Query elements may only appear after path elements * Once query starts, no more path elements are allowed - Extend test suite with success and failure cases for query handling See: https://chatgpt.com/share/68b537ea-d198-800f-927a-940c4de832f2 --- filter_plugins/url_join.py | 97 ++++++++++++++++------ tests/unit/filter_plugins/test_url_join.py | 72 +++++++++++----- 2 files changed, 121 insertions(+), 48 deletions(-) diff --git a/filter_plugins/url_join.py b/filter_plugins/url_join.py index 253c5d8f..bfa62109 100644 --- a/filter_plugins/url_join.py +++ b/filter_plugins/url_join.py @@ -2,6 +2,10 @@ Ansible filter plugin that safely joins URL components from a list. - Requires a valid '://' in the first element (any RFC-3986-ish scheme) - Preserves the double slash after the scheme, collapses other duplicate slashes +- Supports query parts introduced by elements starting with '?' or '&' + * first query element uses '?', subsequent use '&' (regardless of given prefix) + * each query element must be exactly one 'key=value' pair + * query elements may only appear after path elements; once query starts, no more path parts - Raises specific AnsibleFilterError messages for common misuse """ @@ -9,6 +13,7 @@ import re from ansible.errors import AnsibleFilterError _SCHEME_RE = re.compile(r'^([a-zA-Z][a-zA-Z0-9+.\-]*://)(.*)$') +_QUERY_PAIR_RE = re.compile(r'^[^&=?#]+=[^&?#]*$') # key=value (no '&', no extra '?' or '#') def _to_str_or_error(obj, index): """Cast to str, raising a specific AnsibleFilterError with index context.""" @@ -21,22 +26,20 @@ def _to_str_or_error(obj, index): def url_join(parts): """ - Join a list of URL parts, URL-aware. + Join a list of URL parts, URL-aware (scheme, path, query). Args: - parts (list): List/tuple of URL segments. First element MUST include '://'. + parts (list|tuple): URL segments. First element MUST include '://'. + Path elements are plain strings. + Query elements must start with '?' or '&' and contain exactly one 'key=value'. Returns: str: Joined URL. - Raises (all via AnsibleFilterError with specific messages): - - Input is None/empty - - Input is not a list/tuple - - First element missing/invalid scheme - - Part cannot be converted to string (includes index) - - Additional scheme found in a later element + Raises: + AnsibleFilterError: with specific, descriptive messages. """ - # Basic input validation + # --- basic input validation --- if parts is None: raise AnsibleFilterError("url_join: parts must be a non-empty list; got None") if not isinstance(parts, (list, tuple)): @@ -46,7 +49,7 @@ def url_join(parts): if len(parts) == 0: raise AnsibleFilterError("url_join: parts must be a non-empty list") - # First element must carry the scheme + # --- first element must carry a scheme --- first_raw = parts[0] if first_raw is None: raise AnsibleFilterError( @@ -64,36 +67,76 @@ def url_join(parts): scheme = m.group(1) # e.g., 'https://', 'ftp://', 'myapp+v1://' after_scheme = m.group(2).lstrip('/') # strip only leading slashes right after scheme - # Normalize all parts to strings (with index-aware errors) - normalized = [] + # --- iterate parts: collect path parts until first query part; then only query parts allowed --- + path_parts = [] + query_pairs = [] + in_query = False + for i, p in enumerate(parts): if p is None: - # Skip None parts silently (like path_join behavior) + # skip None silently (consistent with path_join-ish behavior) continue + s = _to_str_or_error(p, i) + + # disallow additional scheme in later parts if i > 0 and "://" in s: raise AnsibleFilterError( f"url_join: only the first element may contain a scheme; part at index {i} " f"looks like a URL with scheme ('{s}')." ) - normalized.append(s) - # Replace first element with remainder after scheme - if normalized: - normalized[0] = after_scheme + # first element: replace with remainder after scheme and continue + if i == 0: + s = after_scheme + + # check if this is a query element (starts with ? or &) + if s.startswith('?') or s.startswith('&'): + in_query = True + raw_pair = s[1:] # strip the leading ? or & + if raw_pair == '': + raise AnsibleFilterError( + f"url_join: query element at index {i} is empty; expected '?key=value' or '&key=value'" + ) + # Disallow multiple pairs in a single element; enforce exactly one key=value + if '&' in raw_pair: + raise AnsibleFilterError( + f"url_join: query element at index {i} must contain exactly one 'key=value' pair " + f"without '&'; got '{s}'" + ) + if not _QUERY_PAIR_RE.match(raw_pair): + raise AnsibleFilterError( + f"url_join: query element at index {i} must match 'key=value' (no extra '?', '&', '#'); got '{s}'" + ) + query_pairs.append(raw_pair) + else: + # non-query element + if in_query: + # once query started, no more path parts allowed + raise AnsibleFilterError( + f"url_join: path element found at index {i} after query parameters started; " + f"query parts must come last" + ) + # normal path part: strip slashes to avoid duplicate '/' + path_parts.append(s.strip('/')) + + # normalize path: remove empty chunks + path_parts = [p for p in path_parts if p != ''] + + # --- build result --- + # path portion + if path_parts: + joined_path = "/".join(path_parts) + base = scheme + joined_path else: - # This can only happen if all parts were None, but we gated that earlier - return scheme + # no path beyond scheme + base = scheme - # Strip slashes at both ends of each part, then filter out empties - stripped = [p.strip('/') for p in normalized] - stripped = [p for p in stripped if p != ''] + # query portion + if query_pairs: + base = base + "?" + "&".join(query_pairs) - # If everything is empty after stripping, return just the scheme - if not stripped: - return scheme - - return scheme + "/".join(stripped) + return base class FilterModule(object): diff --git a/tests/unit/filter_plugins/test_url_join.py b/tests/unit/filter_plugins/test_url_join.py index 8a98fcc7..3cd09f60 100644 --- a/tests/unit/filter_plugins/test_url_join.py +++ b/tests/unit/filter_plugins/test_url_join.py @@ -13,7 +13,7 @@ from url_join import url_join class TestUrlJoinFilter(unittest.TestCase): - # --- success cases --- + # --- success cases (path only) --- def test_http_basic(self): self.assertEqual( url_join(['http://example.com', 'foo', 'bar']), @@ -32,31 +32,41 @@ class TestUrlJoinFilter(unittest.TestCase): 'myapp+v1://host/section/item' ) - def test_scheme_with_path_in_first(self): + def test_only_scheme(self): + self.assertEqual(url_join(['https://']), 'https://') + + # --- success cases with query --- + def test_query_normalization_first_q_then_amp(self): self.assertEqual( - url_join(['https://example.com/base/', '/deep/', 'leaf']), - 'https://example.com/base/deep/leaf' + url_join(['https://example.com', 'api', '?a=1', '&b=2']), + 'https://example.com/api?a=1&b=2' + ) + + def test_query_ignores_given_prefix_order(self): + self.assertEqual( + url_join(['https://example.com', '?a=1', '?b=2', '&c=3']), + 'https://example.com?a=1&b=2&c=3' + ) + + def test_query_after_path_with_slashes(self): + self.assertEqual( + url_join(['https://example.com/', '/x/', 'y/', '?q=ok']), + 'https://example.com/x/y?q=ok' + ) + + def test_query_with_numeric_value(self): + self.assertEqual( + url_join(['https://example.com', '?n=123']), + 'https://example.com?n=123' ) def test_none_in_list(self): self.assertEqual( - url_join(['https://example.com', None, 'foo']), - 'https://example.com/foo' + url_join(['https://example.com', None, 'foo', None, '?a=1', None]), + 'https://example.com/foo?a=1' ) - def test_numeric_parts(self): - self.assertEqual( - url_join(['https://example.com', 123, '456']), - 'https://example.com/123/456' - ) - - def test_only_scheme_returns_scheme(self): - self.assertEqual( - url_join(['https://']), - 'https://' - ) - - # --- error cases with specific messages --- + # --- error cases --- def test_none_input_raises(self): with self.assertRaisesRegex(AnsibleFilterError, r"parts must be a non-empty list; got None"): url_join(None) @@ -81,6 +91,26 @@ class TestUrlJoinFilter(unittest.TestCase): with self.assertRaisesRegex(AnsibleFilterError, r"only the first element may contain a scheme"): url_join(['https://example.com', 'https://elsewhere']) + def test_path_after_query_raises(self): + with self.assertRaisesRegex(AnsibleFilterError, r"path element .* after query parameters started"): + url_join(['https://example.com', '?a=1', 'still/path']) + + def test_query_element_empty_raises(self): + with self.assertRaisesRegex(AnsibleFilterError, r"query element .* is empty"): + url_join(['https://example.com', '?']) + + def test_query_element_multiple_pairs_raises(self): + with self.assertRaisesRegex(AnsibleFilterError, r"must contain exactly one 'key=value' pair"): + url_join(['https://example.com', '?a=1&b=2']) + + def test_query_element_missing_equal_raises(self): + with self.assertRaisesRegex(AnsibleFilterError, r"must match 'key=value'"): + url_join(['https://example.com', '&a']) + + def test_query_element_bad_chars_raises(self): + with self.assertRaisesRegex(AnsibleFilterError, r"must match 'key=value'"): + url_join(['https://example.com', '?a#=1']) + def test_unstringifiable_first_part_raises(self): class Bad: def __str__(self): @@ -92,8 +122,8 @@ class TestUrlJoinFilter(unittest.TestCase): class Bad: def __str__(self): raise ValueError("boom") - with self.assertRaisesRegex(AnsibleFilterError, r"unable to convert part at index 2"): - url_join(['https://example.com', 'ok', Bad()]) + with self.assertRaisesRegex(AnsibleFilterError, r"unable to convert part at index 3"): + url_join(['https://example.com', 'ok', '?a=1', Bad()]) if __name__ == '__main__':