mirror of
				https://github.com/kevinveenbirkenbach/computer-playbook.git
				synced 2025-10-31 18:29:21 +00:00 
			
		
		
		
	feat(filter_plugins/url_join): add query parameter support
- Support query elements starting with '?' or '&' * First query element normalized to '?', subsequent to '&' * Each query element must be exactly one 'key=value' pair * Query elements may only appear after path elements * Once query starts, no more path elements are allowed - Extend test suite with success and failure cases for query handling See: https://chatgpt.com/share/68b537ea-d198-800f-927a-940c4de832f2
This commit is contained in:
		| @@ -2,6 +2,10 @@ | ||||
| Ansible filter plugin that safely joins URL components from a list. | ||||
| - Requires a valid '<scheme>://' in the first element (any RFC-3986-ish scheme) | ||||
| - Preserves the double slash after the scheme, collapses other duplicate slashes | ||||
| - Supports query parts introduced by elements starting with '?' or '&' | ||||
|   * first query element uses '?', subsequent use '&' (regardless of given prefix) | ||||
|   * each query element must be exactly one 'key=value' pair | ||||
|   * query elements may only appear after path elements; once query starts, no more path parts | ||||
| - Raises specific AnsibleFilterError messages for common misuse | ||||
| """ | ||||
|  | ||||
| @@ -9,6 +13,7 @@ import re | ||||
| from ansible.errors import AnsibleFilterError | ||||
|  | ||||
| _SCHEME_RE = re.compile(r'^([a-zA-Z][a-zA-Z0-9+.\-]*://)(.*)$') | ||||
| _QUERY_PAIR_RE = re.compile(r'^[^&=?#]+=[^&?#]*$')  # key=value (no '&', no extra '?' or '#') | ||||
|  | ||||
| def _to_str_or_error(obj, index): | ||||
|     """Cast to str, raising a specific AnsibleFilterError with index context.""" | ||||
| @@ -21,22 +26,20 @@ def _to_str_or_error(obj, index): | ||||
|  | ||||
| def url_join(parts): | ||||
|     """ | ||||
|     Join a list of URL parts, URL-aware. | ||||
|     Join a list of URL parts, URL-aware (scheme, path, query). | ||||
|  | ||||
|     Args: | ||||
|         parts (list): List/tuple of URL segments. First element MUST include '<scheme>://'. | ||||
|         parts (list|tuple): URL segments. First element MUST include '<scheme>://'. | ||||
|             Path elements are plain strings. | ||||
|             Query elements must start with '?' or '&' and contain exactly one 'key=value'. | ||||
|  | ||||
|     Returns: | ||||
|         str: Joined URL. | ||||
|  | ||||
|     Raises (all via AnsibleFilterError with specific messages): | ||||
|         - Input is None/empty | ||||
|         - Input is not a list/tuple | ||||
|         - First element missing/invalid scheme | ||||
|         - Part cannot be converted to string (includes index) | ||||
|         - Additional scheme found in a later element | ||||
|     Raises: | ||||
|         AnsibleFilterError: with specific, descriptive messages. | ||||
|     """ | ||||
|     # Basic input validation | ||||
|     # --- basic input validation --- | ||||
|     if parts is None: | ||||
|         raise AnsibleFilterError("url_join: parts must be a non-empty list; got None") | ||||
|     if not isinstance(parts, (list, tuple)): | ||||
| @@ -46,7 +49,7 @@ def url_join(parts): | ||||
|     if len(parts) == 0: | ||||
|         raise AnsibleFilterError("url_join: parts must be a non-empty list") | ||||
|  | ||||
|     # First element must carry the scheme | ||||
|     # --- first element must carry a scheme --- | ||||
|     first_raw = parts[0] | ||||
|     if first_raw is None: | ||||
|         raise AnsibleFilterError( | ||||
| @@ -64,36 +67,76 @@ def url_join(parts): | ||||
|     scheme = m.group(1)                    # e.g., 'https://', 'ftp://', 'myapp+v1://' | ||||
|     after_scheme = m.group(2).lstrip('/')  # strip only leading slashes right after scheme | ||||
|  | ||||
|     # Normalize all parts to strings (with index-aware errors) | ||||
|     normalized = [] | ||||
|     # --- iterate parts: collect path parts until first query part; then only query parts allowed --- | ||||
|     path_parts = [] | ||||
|     query_pairs = [] | ||||
|     in_query = False | ||||
|  | ||||
|     for i, p in enumerate(parts): | ||||
|         if p is None: | ||||
|             # Skip None parts silently (like path_join behavior) | ||||
|             # skip None silently (consistent with path_join-ish behavior) | ||||
|             continue | ||||
|  | ||||
|         s = _to_str_or_error(p, i) | ||||
|  | ||||
|         # disallow additional scheme in later parts | ||||
|         if i > 0 and "://" in s: | ||||
|             raise AnsibleFilterError( | ||||
|                 f"url_join: only the first element may contain a scheme; part at index {i} " | ||||
|                 f"looks like a URL with scheme ('{s}')." | ||||
|             ) | ||||
|         normalized.append(s) | ||||
|  | ||||
|     # Replace first element with remainder after scheme | ||||
|     if normalized: | ||||
|         normalized[0] = after_scheme | ||||
|         # first element: replace with remainder after scheme and continue | ||||
|         if i == 0: | ||||
|             s = after_scheme | ||||
|  | ||||
|         # check if this is a query element (starts with ? or &) | ||||
|         if s.startswith('?') or s.startswith('&'): | ||||
|             in_query = True | ||||
|             raw_pair = s[1:]  # strip the leading ? or & | ||||
|             if raw_pair == '': | ||||
|                 raise AnsibleFilterError( | ||||
|                     f"url_join: query element at index {i} is empty; expected '?key=value' or '&key=value'" | ||||
|                 ) | ||||
|             # Disallow multiple pairs in a single element; enforce exactly one key=value | ||||
|             if '&' in raw_pair: | ||||
|                 raise AnsibleFilterError( | ||||
|                     f"url_join: query element at index {i} must contain exactly one 'key=value' pair " | ||||
|                     f"without '&'; got '{s}'" | ||||
|                 ) | ||||
|             if not _QUERY_PAIR_RE.match(raw_pair): | ||||
|                 raise AnsibleFilterError( | ||||
|                     f"url_join: query element at index {i} must match 'key=value' (no extra '?', '&', '#'); got '{s}'" | ||||
|                 ) | ||||
|             query_pairs.append(raw_pair) | ||||
|         else: | ||||
|             # non-query element | ||||
|             if in_query: | ||||
|                 # once query started, no more path parts allowed | ||||
|                 raise AnsibleFilterError( | ||||
|                     f"url_join: path element found at index {i} after query parameters started; " | ||||
|                     f"query parts must come last" | ||||
|                 ) | ||||
|             # normal path part: strip slashes to avoid duplicate '/' | ||||
|             path_parts.append(s.strip('/')) | ||||
|  | ||||
|     # normalize path: remove empty chunks | ||||
|     path_parts = [p for p in path_parts if p != ''] | ||||
|  | ||||
|     # --- build result --- | ||||
|     # path portion | ||||
|     if path_parts: | ||||
|         joined_path = "/".join(path_parts) | ||||
|         base = scheme + joined_path | ||||
|     else: | ||||
|         # This can only happen if all parts were None, but we gated that earlier | ||||
|         return scheme | ||||
|         # no path beyond scheme | ||||
|         base = scheme | ||||
|  | ||||
|     # Strip slashes at both ends of each part, then filter out empties | ||||
|     stripped = [p.strip('/') for p in normalized] | ||||
|     stripped = [p for p in stripped if p != ''] | ||||
|     # query portion | ||||
|     if query_pairs: | ||||
|         base = base + "?" + "&".join(query_pairs) | ||||
|  | ||||
|     # If everything is empty after stripping, return just the scheme | ||||
|     if not stripped: | ||||
|         return scheme | ||||
|  | ||||
|     return scheme + "/".join(stripped) | ||||
|     return base | ||||
|  | ||||
|  | ||||
| class FilterModule(object): | ||||
|   | ||||
| @@ -13,7 +13,7 @@ from url_join import url_join | ||||
|  | ||||
|  | ||||
| class TestUrlJoinFilter(unittest.TestCase): | ||||
|     # --- success cases --- | ||||
|     # --- success cases (path only) --- | ||||
|     def test_http_basic(self): | ||||
|         self.assertEqual( | ||||
|             url_join(['http://example.com', 'foo', 'bar']), | ||||
| @@ -32,31 +32,41 @@ class TestUrlJoinFilter(unittest.TestCase): | ||||
|             'myapp+v1://host/section/item' | ||||
|         ) | ||||
|  | ||||
|     def test_scheme_with_path_in_first(self): | ||||
|     def test_only_scheme(self): | ||||
|         self.assertEqual(url_join(['https://']), 'https://') | ||||
|  | ||||
|     # --- success cases with query --- | ||||
|     def test_query_normalization_first_q_then_amp(self): | ||||
|         self.assertEqual( | ||||
|             url_join(['https://example.com/base/', '/deep/', 'leaf']), | ||||
|             'https://example.com/base/deep/leaf' | ||||
|             url_join(['https://example.com', 'api', '?a=1', '&b=2']), | ||||
|             'https://example.com/api?a=1&b=2' | ||||
|         ) | ||||
|  | ||||
|     def test_query_ignores_given_prefix_order(self): | ||||
|         self.assertEqual( | ||||
|             url_join(['https://example.com', '?a=1', '?b=2', '&c=3']), | ||||
|             'https://example.com?a=1&b=2&c=3' | ||||
|         ) | ||||
|  | ||||
|     def test_query_after_path_with_slashes(self): | ||||
|         self.assertEqual( | ||||
|             url_join(['https://example.com/', '/x/', 'y/', '?q=ok']), | ||||
|             'https://example.com/x/y?q=ok' | ||||
|         ) | ||||
|  | ||||
|     def test_query_with_numeric_value(self): | ||||
|         self.assertEqual( | ||||
|             url_join(['https://example.com', '?n=123']), | ||||
|             'https://example.com?n=123' | ||||
|         ) | ||||
|  | ||||
|     def test_none_in_list(self): | ||||
|         self.assertEqual( | ||||
|             url_join(['https://example.com', None, 'foo']), | ||||
|             'https://example.com/foo' | ||||
|             url_join(['https://example.com', None, 'foo', None, '?a=1', None]), | ||||
|             'https://example.com/foo?a=1' | ||||
|         ) | ||||
|  | ||||
|     def test_numeric_parts(self): | ||||
|         self.assertEqual( | ||||
|             url_join(['https://example.com', 123, '456']), | ||||
|             'https://example.com/123/456' | ||||
|         ) | ||||
|  | ||||
|     def test_only_scheme_returns_scheme(self): | ||||
|         self.assertEqual( | ||||
|             url_join(['https://']), | ||||
|             'https://' | ||||
|         ) | ||||
|  | ||||
|     # --- error cases with specific messages --- | ||||
|     # --- error cases --- | ||||
|     def test_none_input_raises(self): | ||||
|         with self.assertRaisesRegex(AnsibleFilterError, r"parts must be a non-empty list; got None"): | ||||
|             url_join(None) | ||||
| @@ -81,6 +91,26 @@ class TestUrlJoinFilter(unittest.TestCase): | ||||
|         with self.assertRaisesRegex(AnsibleFilterError, r"only the first element may contain a scheme"): | ||||
|             url_join(['https://example.com', 'https://elsewhere']) | ||||
|  | ||||
|     def test_path_after_query_raises(self): | ||||
|         with self.assertRaisesRegex(AnsibleFilterError, r"path element .* after query parameters started"): | ||||
|             url_join(['https://example.com', '?a=1', 'still/path']) | ||||
|  | ||||
|     def test_query_element_empty_raises(self): | ||||
|         with self.assertRaisesRegex(AnsibleFilterError, r"query element .* is empty"): | ||||
|             url_join(['https://example.com', '?']) | ||||
|  | ||||
|     def test_query_element_multiple_pairs_raises(self): | ||||
|         with self.assertRaisesRegex(AnsibleFilterError, r"must contain exactly one 'key=value' pair"): | ||||
|             url_join(['https://example.com', '?a=1&b=2']) | ||||
|  | ||||
|     def test_query_element_missing_equal_raises(self): | ||||
|         with self.assertRaisesRegex(AnsibleFilterError, r"must match 'key=value'"): | ||||
|             url_join(['https://example.com', '&a']) | ||||
|  | ||||
|     def test_query_element_bad_chars_raises(self): | ||||
|         with self.assertRaisesRegex(AnsibleFilterError, r"must match 'key=value'"): | ||||
|             url_join(['https://example.com', '?a#=1']) | ||||
|  | ||||
|     def test_unstringifiable_first_part_raises(self): | ||||
|         class Bad: | ||||
|             def __str__(self): | ||||
| @@ -92,8 +122,8 @@ class TestUrlJoinFilter(unittest.TestCase): | ||||
|         class Bad: | ||||
|             def __str__(self): | ||||
|                 raise ValueError("boom") | ||||
|         with self.assertRaisesRegex(AnsibleFilterError, r"unable to convert part at index 2"): | ||||
|             url_join(['https://example.com', 'ok', Bad()]) | ||||
|         with self.assertRaisesRegex(AnsibleFilterError, r"unable to convert part at index 3"): | ||||
|             url_join(['https://example.com', 'ok', '?a=1', Bad()]) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|   | ||||
		Reference in New Issue
	
	Block a user