Skip to content

Commit

Permalink
add workaround patch of enum with colon failing CWL validation (relat…
Browse files Browse the repository at this point in the history
  • Loading branch information
fmigneault committed Nov 23, 2024
1 parent bc032fa commit 39b9570
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 11 deletions.
5 changes: 5 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ Changes:

Fixes:
------
- Fix `CWL` ``enum`` type mishandling ``symbols`` containing a colon (``:``) character (e.g.: a list of allowed times)
leading to their invalid interpretation as namespaced strings (i.e.: ``<ns>:<value>``), in turn failing validation
and breaking the resulting `CWL`. Such ``enum`` will be patched with updated ``symbols`` prefixed by ``#`` to respect
the expected URI representation of ``enum`` values by the `CWL` parser (relates to
`common-workflow-language/cwltool#2071 <https://github.com/common-workflow-language/cwltool/issues/2071>`_).
- Fix `CWL` conversion from a `OGC API - Processes` definition specifying an `I/O` with ``schema`` explicitly
indicating a ``type: array`` and nested ``enum``, even if ``minOccurs: 1`` is omitted or explicitly set.
- Fix ``url`` parameter to override the `CLI` internal ``url`` when passed explicitly to the invoked operation.
Expand Down
90 changes: 87 additions & 3 deletions tests/processes/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -2236,36 +2236,42 @@ def test_ogcapi2cwl_process_without_extra():


@pytest.mark.parametrize(
["input_str", "input_int", "input_float"],
["input_str", "input_int", "input_float", "input_time"],
[
# OpenAPI schema references
(
{"schema": {"type": "string", "enum": ["a", "b", "c"]}},
{"schema": {"type": "integer", "enum": [1, 2, 3]}},
{"schema": {"type": "number", "format": "float", "enum": [1.2, 3.4]}},
{"schema": {"type": "string", "format": "time", "enum": ["12:00", "24:00"]}},
),
# OGC-API input definitions
(
{"data_type": "string", "allowed_values": ["a", "b", "c"]},
{"data_type": "integer", "allowed_values": [1, 2, 3]},
{"data_type": "float", "allowed_values": [1.2, 3.4]},
{"data_type": "string", "allowed_values": ["12:00", "24:00"]},
),
]
)
def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float):
def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float, input_time):
"""
Test that a :term:`CWL` with pseudo-``Enum`` type has the necessary :term:`CWL` requirements to perform validation.
Test that a :term:`CWL` with pseudo-``enum`` type has the necessary :term:`CWL` requirements to perform validation.
.. seealso::
- :func:`test_any2cwl_io_enum_convert`
- :func:`test_any2cwl_io_enum_validate`
- :func:`weaver.processes.convert._convert_cwl_io_enum`
- :func:`weaver.processes.convert._get_cwl_js_value_from`
- :func:`weaver.processes.convert._patch_cwl_enum_js_requirement`
"""
href = "https://remote-server.com/processes/test-process"
body = {
"inputs": {
"enum-str": input_str,
"enum-int": input_int,
"enum-float": input_float,
"enum-time": input_time,
},
"outputs": {
"output": {"schema": {"type": "string", "contentMediaType": ContentType.TEXT_PLAIN}},
Expand Down Expand Up @@ -2295,6 +2301,84 @@ def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float):
assert "values.includes(self)" in cwl_value_from
assert "self.every(item => values.includes(item))" not in cwl_value_from

assert cwl["inputs"]["enum-time"]["type"] == {"type": "enum", "symbols": ["#12:00", "#24:00"]}
assert "inputBinding" not in cwl["inputs"]["enum-time"]


@pytest.mark.parametrize(
["cwl_io", "wps_io_expected"],
[
(
{
"name": "test",
"type": "enum",
"symbols": ["#12:00", "#24:00"],
},
{
"id": "test",
"allowed_values": ["12:00", "24:00"],
}
),
(
{
"name": "test",
"type": {
"type": "enum",
"symbols": ["#12:00", "#24:00"],
}
},
{
"id": "test",
"allowed_values": ["12:00", "24:00"],
}
),
(
{
"name": "test",
"type": {
"type": "array",
"items": {
"type": "enum",
"symbols": ["#12:00", "#24:00"],
}
}
},
{
"id": "test",
"allowed_values": ["12:00", "24:00"],
"min_occurs": 1,
}
),
(
{
"name": "test",
"type": [
"null",
{
"type": "array",
"items": {
"type": "enum",
"symbols": ["#12:00", "#24:00"],
}
}
]
},
{
"id": "test",
"allowed_values": ["12:00", "24:00"],
"min_occurs": 1,
}
)
]
)
def test_patched_cwl_enum_colon_back_conversion(cwl_io, wps_io_expected):
"""
Given a :term:`CWL` ``enum`` that contains ``:`` characters patched with prefixed ``#`` test the inverse conversion.
"""
wps_io = cwl2wps_io(cwl_io, IO_INPUT)
wps_allow = [allow.value for allow in wps_io.allowed_values]
assert wps_allow == ["12:00", "24:00"]


@mocked_remote_server_requests_wps1([
resources.TEST_REMOTE_SERVER_URL,
Expand Down
2 changes: 1 addition & 1 deletion weaver/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,7 @@ def get_cwl_file_format(media_type, make_reference=False, must_exist=True, allow
``must_exist=False`` before providing it to the `CWL` I/O definition. Setting ``must_exist=False`` should be
used only for literal string comparison or pre-processing steps to evaluate formats.
:param media_type: Some reference, namespace'd or literal (possibly extended) media-type string.
:param media_type: Some reference, namespaced or literal (possibly extended) media-type string.
:param make_reference: Construct the full URL reference to the resolved media-type. Otherwise, return tuple details.
:param must_exist:
Return result only if it can be resolved to an official media-type (or synonym if enabled), otherwise ``None``.
Expand Down
48 changes: 41 additions & 7 deletions weaver/processes/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,10 +857,12 @@ def _get_cwl_js_value_from(cwl_io_symbols, allow_unique, allow_array):
def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, allow_array):
# type: (Union[str, Type[null]], List[AnyValueType], IO_Select_Type, bool, bool) -> CWL_IO_Type
"""
Converts the :term:`I/O` definition to a :term:`CWL` :term:`I/O` that allows ``Enum``-like functionality.
Converts the :term:`I/O` definition to a :term:`CWL` :term:`I/O` that allows ``enum``-like functionality.
In the event of an explicit ``string`` as base type, :term:`CWL` directly supports ``type: enum``. Other basic
types are not directly supported, and must instead perform manual validation against the set of allowed values.
types are not directly supported, and must instead perform manual validation against the set of allowed values,
using JavaScript evaluation applied by ``valueFrom`` against the the submitted values to replicate the automatic
behavior performed by ``enum`` type.
.. seealso::
- https://github.com/common-workflow-language/cwl-v1.2/issues/267
Expand All @@ -871,15 +873,35 @@ def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, a
Because ``valueFrom`` can only be used with ``inputBinding``, any output providing a set of allowed values
that are not ``string``-based will be ignored when converted to :term:`CWL` :term:`I/O`.
.. seealso::
- :func:`_get_cwl_js_value_from` defines the ``enum``-like ``valueFrom`` checks
- :func:`_patch_cwl_enum_js_requirement` must be called on the entire :term:`CWL` to inject the
relevant :data:`CWL_REQUIREMENT_INLINE_JAVASCRIPT`, since it might not be defined in the original :term:`CWL`.
Another edge-case that happens even if the base type is a ``string`` is when the enum ``symbols``
contain an entry with a ``:`` character (e.g.: as in the case of a time ``HH:MM`` string).
In such case, :mod:`schema_salad` incorrectly parses it as if a namespaced :term:`URL` (i.e.: ``<ns>:<value>``)
was specified. Because of this, the symbol strings get extended to an unresolvable namespace, which leads to a
partial and incorrect enum value (e.g.: ``abc:def`` becomes ``input-id#def``). This causes the resulting ``enum``
to never accept the submitted values, or worst, causes the entire :term:`CWL` to fail validation if duplicate
values are obtained (e.g.: ``12:00`` and ``24:00`` both extend to ``input-id#00``, causing a duplicate ``00``).
To handle this case, the ``symbols`` can be updated with a prefixed ``#`` character, making :mod:`schema_salad`
interpret it as if it was already a :term:`URI` relative to the input, which is what it aims to generate, and
therefore interprets the rest of the string (including the ``:``) literally.
.. seealso::
- https://github.com/common-workflow-language/cwltool/issues/2071
:param cwl_io_type: Basic type for which allowed values should apply.
:param cwl_io_symbols: Allowed values to restrict the :term:`I/O` definition.
:return: Converted definition as CWL Enum or with relevant value validation as applicable for the type.
"""
if cwl_io_type not in PACKAGE_BASIC_TYPES:
return {}
if cwl_io_type == "string":
any_colon_symbol = any(":" in str(symbol) for symbol in cwl_io_symbols)
if cwl_io_type == "string" and not any_colon_symbol:
return {"type": {"type": PACKAGE_ENUM_BASE, "symbols": cwl_io_symbols}}
if cwl_io_type not in PACKAGE_NUMERIC_TYPES:
if cwl_io_type not in PACKAGE_NUMERIC_TYPES and not (cwl_io_type == "string" and any_colon_symbol):
LOGGER.warning(
"Could not resolve conversion of CWL I/O as Enum for type '%s'. "
"Ignoring value validation against specified allowed values: %s.",
Expand All @@ -889,9 +911,10 @@ def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, a
return {"type": cwl_io_type}

if not (
(all(isinstance(value, bool) for value in cwl_io_symbols) and cwl_io_type == "boolean") or
(all(isinstance(value, int) for value in cwl_io_symbols) and cwl_io_type in PACKAGE_INTEGER_TYPES) or
(all(isinstance(value, float) for value in cwl_io_symbols) and cwl_io_type in PACKAGE_FLOATING_TYPES)
(cwl_io_type == "string" and all(isinstance(value, str) for value in cwl_io_symbols)) or
(cwl_io_type == "boolean" and all(isinstance(value, bool) for value in cwl_io_symbols)) or
(cwl_io_type in PACKAGE_INTEGER_TYPES and all(isinstance(value, int) for value in cwl_io_symbols)) or
(cwl_io_type in PACKAGE_FLOATING_TYPES and all(isinstance(value, float) for value in cwl_io_symbols))
):
LOGGER.warning(
"Incompatible CWL I/O type '%s' detected for specified allowed values: %s. "
Expand All @@ -901,6 +924,10 @@ def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, a
)
cwl_io_type = "Any"

if any_colon_symbol:
cwl_io_symbols = [f"#{symbol}" for symbol in cwl_io_symbols]
return {"type": cwl_io_type, "symbols": cwl_io_symbols}

if io_select != IO_INPUT:
return {"type": cwl_io_type}

Expand Down Expand Up @@ -1391,6 +1418,13 @@ def parse_cwl_enum_type(io_info):
f"Unsupported I/O 'enum' base type: `{type(first_allow)!s}`, from definition: `{io_info!r}`."
)

if io_type == "string":
# un-patch enum causing CWL parsing errors caused by ':' (see _convert_cwl_io_enum)
io_allow = [
allow[1:] if allow.startswith("#") and ":" in allow else allow
for allow in io_allow
]

io_def = CWLIODefinition(
type=io_type, # type: ignore
enum=True,
Expand Down

0 comments on commit 39b9570

Please sign in to comment.