From f1870c07beda1abd36de5a52d24af7d144feb504 Mon Sep 17 00:00:00 2001 From: Kevin Veen-Birkenbach Date: Mon, 1 Sep 2025 08:06:48 +0200 Subject: [PATCH] refactor(filter_plugins/url_join): enforce mandatory scheme and raise specific AnsibleFilterError messages Improved url_join filter: - Requires first element to contain a valid '://' - Raises specific errors for None, empty list, wrong type, missing scheme, extra schemes in later parts, or string conversion failures - Provides clearer error messages with index context in parts See: https://chatgpt.com/share/68b537ea-d198-800f-927a-940c4de832f2 --- filter_plugins/url_join.py | 103 +++++++++++++++++++++ tests/unit/filter_plugins/test_url_join.py | 100 ++++++++++++++++++++ 2 files changed, 203 insertions(+) create mode 100644 filter_plugins/url_join.py create mode 100644 tests/unit/filter_plugins/test_url_join.py diff --git a/filter_plugins/url_join.py b/filter_plugins/url_join.py new file mode 100644 index 00000000..253c5d8f --- /dev/null +++ b/filter_plugins/url_join.py @@ -0,0 +1,103 @@ +""" +Ansible filter plugin that safely joins URL components from a list. +- Requires a valid '://' in the first element (any RFC-3986-ish scheme) +- Preserves the double slash after the scheme, collapses other duplicate slashes +- Raises specific AnsibleFilterError messages for common misuse +""" + +import re +from ansible.errors import AnsibleFilterError + +_SCHEME_RE = re.compile(r'^([a-zA-Z][a-zA-Z0-9+.\-]*://)(.*)$') + +def _to_str_or_error(obj, index): + """Cast to str, raising a specific AnsibleFilterError with index context.""" + try: + return str(obj) + except Exception as e: + raise AnsibleFilterError( + f"url_join: unable to convert part at index {index} to string: {e}" + ) + +def url_join(parts): + """ + Join a list of URL parts, URL-aware. + + Args: + parts (list): List/tuple of URL segments. First element MUST include '://'. + + Returns: + str: Joined URL. + + Raises (all via AnsibleFilterError with specific messages): + - Input is None/empty + - Input is not a list/tuple + - First element missing/invalid scheme + - Part cannot be converted to string (includes index) + - Additional scheme found in a later element + """ + # Basic input validation + if parts is None: + raise AnsibleFilterError("url_join: parts must be a non-empty list; got None") + if not isinstance(parts, (list, tuple)): + raise AnsibleFilterError( + f"url_join: parts must be a list/tuple; got {type(parts).__name__}" + ) + if len(parts) == 0: + raise AnsibleFilterError("url_join: parts must be a non-empty list") + + # First element must carry the scheme + first_raw = parts[0] + if first_raw is None: + raise AnsibleFilterError( + "url_join: first element must include a scheme like 'https://'; got None" + ) + + first_str = _to_str_or_error(first_raw, 0) + m = _SCHEME_RE.match(first_str) + if not m: + raise AnsibleFilterError( + "url_join: first element must start with '://', e.g. 'https://example.com'; " + f"got '{first_str}'" + ) + + scheme = m.group(1) # e.g., 'https://', 'ftp://', 'myapp+v1://' + after_scheme = m.group(2).lstrip('/') # strip only leading slashes right after scheme + + # Normalize all parts to strings (with index-aware errors) + normalized = [] + for i, p in enumerate(parts): + if p is None: + # Skip None parts silently (like path_join behavior) + continue + s = _to_str_or_error(p, i) + if i > 0 and "://" in s: + raise AnsibleFilterError( + f"url_join: only the first element may contain a scheme; part at index {i} " + f"looks like a URL with scheme ('{s}')." + ) + normalized.append(s) + + # Replace first element with remainder after scheme + if normalized: + normalized[0] = after_scheme + else: + # This can only happen if all parts were None, but we gated that earlier + return scheme + + # Strip slashes at both ends of each part, then filter out empties + stripped = [p.strip('/') for p in normalized] + stripped = [p for p in stripped if p != ''] + + # If everything is empty after stripping, return just the scheme + if not stripped: + return scheme + + return scheme + "/".join(stripped) + + +class FilterModule(object): + def filters(self): + return { + 'url_join': url_join, + } diff --git a/tests/unit/filter_plugins/test_url_join.py b/tests/unit/filter_plugins/test_url_join.py new file mode 100644 index 00000000..8a98fcc7 --- /dev/null +++ b/tests/unit/filter_plugins/test_url_join.py @@ -0,0 +1,100 @@ +import unittest +import sys +import os + +# Ensure filter_plugins directory is on the path +sys.path.insert( + 0, + os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../filter_plugins')) +) + +from ansible.errors import AnsibleFilterError +from url_join import url_join + + +class TestUrlJoinFilter(unittest.TestCase): + # --- success cases --- + def test_http_basic(self): + self.assertEqual( + url_join(['http://example.com', 'foo', 'bar']), + 'http://example.com/foo/bar' + ) + + def test_https_slashes(self): + self.assertEqual( + url_join(['https://example.com/', '/api/', '/v1/', '/users/']), + 'https://example.com/api/v1/users' + ) + + def test_custom_scheme(self): + self.assertEqual( + url_join(['myapp+v1://host/', '//section', 'item']), + 'myapp+v1://host/section/item' + ) + + def test_scheme_with_path_in_first(self): + self.assertEqual( + url_join(['https://example.com/base/', '/deep/', 'leaf']), + 'https://example.com/base/deep/leaf' + ) + + def test_none_in_list(self): + self.assertEqual( + url_join(['https://example.com', None, 'foo']), + 'https://example.com/foo' + ) + + def test_numeric_parts(self): + self.assertEqual( + url_join(['https://example.com', 123, '456']), + 'https://example.com/123/456' + ) + + def test_only_scheme_returns_scheme(self): + self.assertEqual( + url_join(['https://']), + 'https://' + ) + + # --- error cases with specific messages --- + def test_none_input_raises(self): + with self.assertRaisesRegex(AnsibleFilterError, r"parts must be a non-empty list; got None"): + url_join(None) + + def test_empty_list_raises(self): + with self.assertRaisesRegex(AnsibleFilterError, r"parts must be a non-empty list"): + url_join([]) + + def test_non_list_raises(self): + with self.assertRaisesRegex(AnsibleFilterError, r"parts must be a list/tuple; got str"): + url_join("https://example.com") + + def test_first_none_raises(self): + with self.assertRaisesRegex(AnsibleFilterError, r"first element must include a scheme"): + url_join([None, 'foo']) + + def test_no_scheme_raises(self): + with self.assertRaisesRegex(AnsibleFilterError, r"must start with '://'"): + url_join(['example.com', 'foo']) + + def test_additional_scheme_in_later_part_raises(self): + with self.assertRaisesRegex(AnsibleFilterError, r"only the first element may contain a scheme"): + url_join(['https://example.com', 'https://elsewhere']) + + def test_unstringifiable_first_part_raises(self): + class Bad: + def __str__(self): + raise ValueError("boom") + with self.assertRaisesRegex(AnsibleFilterError, r"unable to convert part at index 0"): + url_join([Bad(), 'x']) + + def test_unstringifiable_later_part_raises(self): + class Bad: + def __str__(self): + raise ValueError("boom") + with self.assertRaisesRegex(AnsibleFilterError, r"unable to convert part at index 2"): + url_join(['https://example.com', 'ok', Bad()]) + + +if __name__ == '__main__': + unittest.main()