mirror of
https://github.com/kevinveenbirkenbach/computer-playbook.git
synced 2025-09-09 11:47:14 +02:00
feat(filter_plugins/url_join): add query parameter support
- Support query elements starting with '?' or '&' * First query element normalized to '?', subsequent to '&' * Each query element must be exactly one 'key=value' pair * Query elements may only appear after path elements * Once query starts, no more path elements are allowed - Extend test suite with success and failure cases for query handling See: https://chatgpt.com/share/68b537ea-d198-800f-927a-940c4de832f2
This commit is contained in:
@@ -2,6 +2,10 @@
|
|||||||
Ansible filter plugin that safely joins URL components from a list.
|
Ansible filter plugin that safely joins URL components from a list.
|
||||||
- Requires a valid '<scheme>://' in the first element (any RFC-3986-ish scheme)
|
- Requires a valid '<scheme>://' in the first element (any RFC-3986-ish scheme)
|
||||||
- Preserves the double slash after the scheme, collapses other duplicate slashes
|
- Preserves the double slash after the scheme, collapses other duplicate slashes
|
||||||
|
- Supports query parts introduced by elements starting with '?' or '&'
|
||||||
|
* first query element uses '?', subsequent use '&' (regardless of given prefix)
|
||||||
|
* each query element must be exactly one 'key=value' pair
|
||||||
|
* query elements may only appear after path elements; once query starts, no more path parts
|
||||||
- Raises specific AnsibleFilterError messages for common misuse
|
- Raises specific AnsibleFilterError messages for common misuse
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -9,6 +13,7 @@ import re
|
|||||||
from ansible.errors import AnsibleFilterError
|
from ansible.errors import AnsibleFilterError
|
||||||
|
|
||||||
_SCHEME_RE = re.compile(r'^([a-zA-Z][a-zA-Z0-9+.\-]*://)(.*)$')
|
_SCHEME_RE = re.compile(r'^([a-zA-Z][a-zA-Z0-9+.\-]*://)(.*)$')
|
||||||
|
_QUERY_PAIR_RE = re.compile(r'^[^&=?#]+=[^&?#]*$') # key=value (no '&', no extra '?' or '#')
|
||||||
|
|
||||||
def _to_str_or_error(obj, index):
|
def _to_str_or_error(obj, index):
|
||||||
"""Cast to str, raising a specific AnsibleFilterError with index context."""
|
"""Cast to str, raising a specific AnsibleFilterError with index context."""
|
||||||
@@ -21,22 +26,20 @@ def _to_str_or_error(obj, index):
|
|||||||
|
|
||||||
def url_join(parts):
|
def url_join(parts):
|
||||||
"""
|
"""
|
||||||
Join a list of URL parts, URL-aware.
|
Join a list of URL parts, URL-aware (scheme, path, query).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
parts (list): List/tuple of URL segments. First element MUST include '<scheme>://'.
|
parts (list|tuple): URL segments. First element MUST include '<scheme>://'.
|
||||||
|
Path elements are plain strings.
|
||||||
|
Query elements must start with '?' or '&' and contain exactly one 'key=value'.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: Joined URL.
|
str: Joined URL.
|
||||||
|
|
||||||
Raises (all via AnsibleFilterError with specific messages):
|
Raises:
|
||||||
- Input is None/empty
|
AnsibleFilterError: with specific, descriptive messages.
|
||||||
- Input is not a list/tuple
|
|
||||||
- First element missing/invalid scheme
|
|
||||||
- Part cannot be converted to string (includes index)
|
|
||||||
- Additional scheme found in a later element
|
|
||||||
"""
|
"""
|
||||||
# Basic input validation
|
# --- basic input validation ---
|
||||||
if parts is None:
|
if parts is None:
|
||||||
raise AnsibleFilterError("url_join: parts must be a non-empty list; got None")
|
raise AnsibleFilterError("url_join: parts must be a non-empty list; got None")
|
||||||
if not isinstance(parts, (list, tuple)):
|
if not isinstance(parts, (list, tuple)):
|
||||||
@@ -46,7 +49,7 @@ def url_join(parts):
|
|||||||
if len(parts) == 0:
|
if len(parts) == 0:
|
||||||
raise AnsibleFilterError("url_join: parts must be a non-empty list")
|
raise AnsibleFilterError("url_join: parts must be a non-empty list")
|
||||||
|
|
||||||
# First element must carry the scheme
|
# --- first element must carry a scheme ---
|
||||||
first_raw = parts[0]
|
first_raw = parts[0]
|
||||||
if first_raw is None:
|
if first_raw is None:
|
||||||
raise AnsibleFilterError(
|
raise AnsibleFilterError(
|
||||||
@@ -64,36 +67,76 @@ def url_join(parts):
|
|||||||
scheme = m.group(1) # e.g., 'https://', 'ftp://', 'myapp+v1://'
|
scheme = m.group(1) # e.g., 'https://', 'ftp://', 'myapp+v1://'
|
||||||
after_scheme = m.group(2).lstrip('/') # strip only leading slashes right after scheme
|
after_scheme = m.group(2).lstrip('/') # strip only leading slashes right after scheme
|
||||||
|
|
||||||
# Normalize all parts to strings (with index-aware errors)
|
# --- iterate parts: collect path parts until first query part; then only query parts allowed ---
|
||||||
normalized = []
|
path_parts = []
|
||||||
|
query_pairs = []
|
||||||
|
in_query = False
|
||||||
|
|
||||||
for i, p in enumerate(parts):
|
for i, p in enumerate(parts):
|
||||||
if p is None:
|
if p is None:
|
||||||
# Skip None parts silently (like path_join behavior)
|
# skip None silently (consistent with path_join-ish behavior)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
s = _to_str_or_error(p, i)
|
s = _to_str_or_error(p, i)
|
||||||
|
|
||||||
|
# disallow additional scheme in later parts
|
||||||
if i > 0 and "://" in s:
|
if i > 0 and "://" in s:
|
||||||
raise AnsibleFilterError(
|
raise AnsibleFilterError(
|
||||||
f"url_join: only the first element may contain a scheme; part at index {i} "
|
f"url_join: only the first element may contain a scheme; part at index {i} "
|
||||||
f"looks like a URL with scheme ('{s}')."
|
f"looks like a URL with scheme ('{s}')."
|
||||||
)
|
)
|
||||||
normalized.append(s)
|
|
||||||
|
|
||||||
# Replace first element with remainder after scheme
|
# first element: replace with remainder after scheme and continue
|
||||||
if normalized:
|
if i == 0:
|
||||||
normalized[0] = after_scheme
|
s = after_scheme
|
||||||
|
|
||||||
|
# check if this is a query element (starts with ? or &)
|
||||||
|
if s.startswith('?') or s.startswith('&'):
|
||||||
|
in_query = True
|
||||||
|
raw_pair = s[1:] # strip the leading ? or &
|
||||||
|
if raw_pair == '':
|
||||||
|
raise AnsibleFilterError(
|
||||||
|
f"url_join: query element at index {i} is empty; expected '?key=value' or '&key=value'"
|
||||||
|
)
|
||||||
|
# Disallow multiple pairs in a single element; enforce exactly one key=value
|
||||||
|
if '&' in raw_pair:
|
||||||
|
raise AnsibleFilterError(
|
||||||
|
f"url_join: query element at index {i} must contain exactly one 'key=value' pair "
|
||||||
|
f"without '&'; got '{s}'"
|
||||||
|
)
|
||||||
|
if not _QUERY_PAIR_RE.match(raw_pair):
|
||||||
|
raise AnsibleFilterError(
|
||||||
|
f"url_join: query element at index {i} must match 'key=value' (no extra '?', '&', '#'); got '{s}'"
|
||||||
|
)
|
||||||
|
query_pairs.append(raw_pair)
|
||||||
else:
|
else:
|
||||||
# This can only happen if all parts were None, but we gated that earlier
|
# non-query element
|
||||||
return scheme
|
if in_query:
|
||||||
|
# once query started, no more path parts allowed
|
||||||
|
raise AnsibleFilterError(
|
||||||
|
f"url_join: path element found at index {i} after query parameters started; "
|
||||||
|
f"query parts must come last"
|
||||||
|
)
|
||||||
|
# normal path part: strip slashes to avoid duplicate '/'
|
||||||
|
path_parts.append(s.strip('/'))
|
||||||
|
|
||||||
# Strip slashes at both ends of each part, then filter out empties
|
# normalize path: remove empty chunks
|
||||||
stripped = [p.strip('/') for p in normalized]
|
path_parts = [p for p in path_parts if p != '']
|
||||||
stripped = [p for p in stripped if p != '']
|
|
||||||
|
|
||||||
# If everything is empty after stripping, return just the scheme
|
# --- build result ---
|
||||||
if not stripped:
|
# path portion
|
||||||
return scheme
|
if path_parts:
|
||||||
|
joined_path = "/".join(path_parts)
|
||||||
|
base = scheme + joined_path
|
||||||
|
else:
|
||||||
|
# no path beyond scheme
|
||||||
|
base = scheme
|
||||||
|
|
||||||
return scheme + "/".join(stripped)
|
# query portion
|
||||||
|
if query_pairs:
|
||||||
|
base = base + "?" + "&".join(query_pairs)
|
||||||
|
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
class FilterModule(object):
|
class FilterModule(object):
|
||||||
|
@@ -13,7 +13,7 @@ from url_join import url_join
|
|||||||
|
|
||||||
|
|
||||||
class TestUrlJoinFilter(unittest.TestCase):
|
class TestUrlJoinFilter(unittest.TestCase):
|
||||||
# --- success cases ---
|
# --- success cases (path only) ---
|
||||||
def test_http_basic(self):
|
def test_http_basic(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
url_join(['http://example.com', 'foo', 'bar']),
|
url_join(['http://example.com', 'foo', 'bar']),
|
||||||
@@ -32,31 +32,41 @@ class TestUrlJoinFilter(unittest.TestCase):
|
|||||||
'myapp+v1://host/section/item'
|
'myapp+v1://host/section/item'
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_scheme_with_path_in_first(self):
|
def test_only_scheme(self):
|
||||||
|
self.assertEqual(url_join(['https://']), 'https://')
|
||||||
|
|
||||||
|
# --- success cases with query ---
|
||||||
|
def test_query_normalization_first_q_then_amp(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
url_join(['https://example.com/base/', '/deep/', 'leaf']),
|
url_join(['https://example.com', 'api', '?a=1', '&b=2']),
|
||||||
'https://example.com/base/deep/leaf'
|
'https://example.com/api?a=1&b=2'
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_query_ignores_given_prefix_order(self):
|
||||||
|
self.assertEqual(
|
||||||
|
url_join(['https://example.com', '?a=1', '?b=2', '&c=3']),
|
||||||
|
'https://example.com?a=1&b=2&c=3'
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_query_after_path_with_slashes(self):
|
||||||
|
self.assertEqual(
|
||||||
|
url_join(['https://example.com/', '/x/', 'y/', '?q=ok']),
|
||||||
|
'https://example.com/x/y?q=ok'
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_query_with_numeric_value(self):
|
||||||
|
self.assertEqual(
|
||||||
|
url_join(['https://example.com', '?n=123']),
|
||||||
|
'https://example.com?n=123'
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_none_in_list(self):
|
def test_none_in_list(self):
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
url_join(['https://example.com', None, 'foo']),
|
url_join(['https://example.com', None, 'foo', None, '?a=1', None]),
|
||||||
'https://example.com/foo'
|
'https://example.com/foo?a=1'
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_numeric_parts(self):
|
# --- error cases ---
|
||||||
self.assertEqual(
|
|
||||||
url_join(['https://example.com', 123, '456']),
|
|
||||||
'https://example.com/123/456'
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_only_scheme_returns_scheme(self):
|
|
||||||
self.assertEqual(
|
|
||||||
url_join(['https://']),
|
|
||||||
'https://'
|
|
||||||
)
|
|
||||||
|
|
||||||
# --- error cases with specific messages ---
|
|
||||||
def test_none_input_raises(self):
|
def test_none_input_raises(self):
|
||||||
with self.assertRaisesRegex(AnsibleFilterError, r"parts must be a non-empty list; got None"):
|
with self.assertRaisesRegex(AnsibleFilterError, r"parts must be a non-empty list; got None"):
|
||||||
url_join(None)
|
url_join(None)
|
||||||
@@ -81,6 +91,26 @@ class TestUrlJoinFilter(unittest.TestCase):
|
|||||||
with self.assertRaisesRegex(AnsibleFilterError, r"only the first element may contain a scheme"):
|
with self.assertRaisesRegex(AnsibleFilterError, r"only the first element may contain a scheme"):
|
||||||
url_join(['https://example.com', 'https://elsewhere'])
|
url_join(['https://example.com', 'https://elsewhere'])
|
||||||
|
|
||||||
|
def test_path_after_query_raises(self):
|
||||||
|
with self.assertRaisesRegex(AnsibleFilterError, r"path element .* after query parameters started"):
|
||||||
|
url_join(['https://example.com', '?a=1', 'still/path'])
|
||||||
|
|
||||||
|
def test_query_element_empty_raises(self):
|
||||||
|
with self.assertRaisesRegex(AnsibleFilterError, r"query element .* is empty"):
|
||||||
|
url_join(['https://example.com', '?'])
|
||||||
|
|
||||||
|
def test_query_element_multiple_pairs_raises(self):
|
||||||
|
with self.assertRaisesRegex(AnsibleFilterError, r"must contain exactly one 'key=value' pair"):
|
||||||
|
url_join(['https://example.com', '?a=1&b=2'])
|
||||||
|
|
||||||
|
def test_query_element_missing_equal_raises(self):
|
||||||
|
with self.assertRaisesRegex(AnsibleFilterError, r"must match 'key=value'"):
|
||||||
|
url_join(['https://example.com', '&a'])
|
||||||
|
|
||||||
|
def test_query_element_bad_chars_raises(self):
|
||||||
|
with self.assertRaisesRegex(AnsibleFilterError, r"must match 'key=value'"):
|
||||||
|
url_join(['https://example.com', '?a#=1'])
|
||||||
|
|
||||||
def test_unstringifiable_first_part_raises(self):
|
def test_unstringifiable_first_part_raises(self):
|
||||||
class Bad:
|
class Bad:
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
@@ -92,8 +122,8 @@ class TestUrlJoinFilter(unittest.TestCase):
|
|||||||
class Bad:
|
class Bad:
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
raise ValueError("boom")
|
raise ValueError("boom")
|
||||||
with self.assertRaisesRegex(AnsibleFilterError, r"unable to convert part at index 2"):
|
with self.assertRaisesRegex(AnsibleFilterError, r"unable to convert part at index 3"):
|
||||||
url_join(['https://example.com', 'ok', Bad()])
|
url_join(['https://example.com', 'ok', '?a=1', Bad()])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
Reference in New Issue
Block a user