From 04baaa092023ec3e3490b57fc38d1658a0d24872 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 18 Sep 2023 21:32:12 -0400 Subject: [PATCH] =?UTF-8?q?[wip]=20workaround=20fix=20for=20CWL=20type=20[?= =?UTF-8?q?null,=20enum,=20array-enum]=C2=A0without=20explicit=20name=20(r?= =?UTF-8?q?elates=20to=20https://github.com/common-workflow-language/cwlto?= =?UTF-8?q?ol/issues/1908)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES.rst | 4 + .../Finch_EnsembleGridPointWetdays/deploy.yml | 287 ++++++++++++++++++ weaver/processes/convert.py | 33 +- weaver/processes/wps_package.py | 51 +++- weaver/typedefs.py | 7 + 5 files changed, 375 insertions(+), 7 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index df920a2b4..23de40308 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -38,6 +38,10 @@ Fixes: Links will only be listed within the returned ``processSummary`` to respect the `OGC API - Processes` schema. - Fix `CLI` not removing embedded ``links`` in ``processSummary`` from ``deploy`` operation response when ``-nL``/``--no-links`` option is specified. +- Fix `CWL` definitions combining nested ``enum`` types as ``["null", , {type: array, items: ]`` without an + explicit ``name`` or ``SchemaDefRequirement`` causing failing ``schema_salad`` resolution under ``cwltool``. A patch + is applied for the moment to inject a temporary ``name`` to let the `CWL` engine succeed schema validation (relates + to `common-workflow-language/cwltool#1908 `_). .. _changes_4.31.0: diff --git a/tests/functional/application-packages/Finch_EnsembleGridPointWetdays/deploy.yml b/tests/functional/application-packages/Finch_EnsembleGridPointWetdays/deploy.yml index eadbae766..c4b5a5ac1 100644 --- a/tests/functional/application-packages/Finch_EnsembleGridPointWetdays/deploy.yml +++ b/tests/functional/application-packages/Finch_EnsembleGridPointWetdays/deploy.yml @@ -1,6 +1,293 @@ processDescription: process: id: Finch_EnsembleGridPointWetdays +# inputs: +# lat: +# schema: +# oneOf: +# - type: string +# - type: array +# items: string +# lon: +# schema: +# oneOf: +# - type: string +# - type: array +# items: string +# start_date: +# schema: +# required: false +# type: string +# end_date: +# schema: +# required: false +# type: string +# ensemble_percentiles: +# schema: +# required: false +# type: string +# default: 10,50,90 +# average: +# schema: +# required: false +# type: boolean +# default: false +# dataset: +# schema: +# required: false +# type: string +# enum: +# - humidex-daily +# - candcs-u5 +# - candcs-u6 +# - bccaqv2 +# default: candcs-u5 +# scenario: +# schema: +# required: false +# oneOf: +# - type: string +# enum: +# - ssp126 +# - rcp85 +# - rcp45 +# - rcp26 +# - ssp585 +# - ssp245 +# - type: array +# items: +# type: string +# enum: +# - ssp126 +# - rcp85 +# - rcp45 +# - rcp26 +# - ssp585 +# - ssp245 +# models: +# schema: +# required: false +# default: all +# oneOf: +# - type: string +# enum: +# - KACE-1-0-G +# - CCSM4 +# - MIROC5 +# - EC-Earth3-Veg +# - TaiESM1 +# - GFDL-ESM4 +# - GFDL-CM3 +# - CanESM5 +# - HadGEM3-GC31-LL +# - INM-CM4-8 +# - IPSL-CM5A-MR +# - EC-Earth3 +# - GFDL-ESM2G +# - humidex_models +# - GFDL-ESM2M +# - MIROC-ESM +# - CSIRO-Mk3-6-0 +# - MPI-ESM-LR +# - NorESM1-M +# - CNRM-CM5 +# - all +# - GISS-E2-1-G +# - 24models +# - MPI-ESM1-2-HR +# - CNRM-ESM2-1 +# - CNRM-CM6-1 +# - CanESM2 +# - FGOALS-g3 +# - NorESM1-ME +# - IPSL-CM6A-LR +# - CMCC-ESM2 +# - pcic12 +# - EC-Earth3-Veg-LR +# - ACCESS-ESM1-5 +# - MRI-CGCM3 +# - MIROC-ESM-CHEM +# - NorESM2-MM +# - bcc-csm1-1-m +# - BNU-ESM +# - UKESM1-0-LL +# - CESM1-CAM5 +# - MIROC-ES2L +# - MRI-ESM2-0 +# - HadGEM2-ES +# - MIROC6 +# - MPI-ESM-MR +# - INM-CM5-0 +# - bcc-csm1-1 +# - BCC-CSM2-MR +# - ACCESS-CM2 +# - NorESM2-LM +# - IPSL-CM5A-LR +# - FGOALS-g2 +# - HadGEM2-AO +# - 26models +# - MPI-ESM1-2-LR +# - KIOST-ESM +# - type: array +# items: +# type: string +# enum: +# - KACE-1-0-G +# - CCSM4 +# - MIROC5 +# - EC-Earth3-Veg +# - TaiESM1 +# - GFDL-ESM4 +# - GFDL-CM3 +# - CanESM5 +# - HadGEM3-GC31-LL +# - INM-CM4-8 +# - IPSL-CM5A-MR +# - EC-Earth3 +# - GFDL-ESM2G +# - humidex_models +# - GFDL-ESM2M +# - MIROC-ESM +# - CSIRO-Mk3-6-0 +# - MPI-ESM-LR +# - NorESM1-M +# - CNRM-CM5 +# - all +# - GISS-E2-1-G +# - 24models +# - MPI-ESM1-2-HR +# - CNRM-ESM2-1 +# - CNRM-CM6-1 +# - CanESM2 +# - FGOALS-g3 +# - NorESM1-ME +# - IPSL-CM6A-LR +# - CMCC-ESM2 +# - pcic12 +# - EC-Earth3-Veg-LR +# - ACCESS-ESM1-5 +# - MRI-CGCM3 +# - MIROC-ESM-CHEM +# - NorESM2-MM +# - bcc-csm1-1-m +# - BNU-ESM +# - UKESM1-0-LL +# - CESM1-CAM5 +# - MIROC-ES2L +# - MRI-ESM2-0 +# - HadGEM2-ES +# - MIROC6 +# - MPI-ESM-MR +# - INM-CM5-0 +# - bcc-csm1-1 +# - BCC-CSM2-MR +# - ACCESS-CM2 +# - NorESM2-LM +# - IPSL-CM5A-LR +# - FGOALS-g2 +# - HadGEM2-AO +# - 26models +# - MPI-ESM1-2-LR +# - KIOST-ESM +# thresh: +# schema: +# required: false +# type: string +# default: "1.0 mm/day" +# freq: +# schema: +# required: false +# default: YS +# type: string +# enum: +# - YS +# - QS-DEC +# - AS-JUL +# - MS +# op: +# schema: +# type: string +# required: false +# default: '>=' +# enum: +# - '>=' +# - '>' +# - gt +# - ge +# month: +# schema: +# required: false +# oneOf: +# - type: integer +# enum: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ] +# - type: array +# items: +# type: integer +# enum: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ] +# season: +# schema: +# required: false +# type: string +# enum: +# - SON +# - MAM +# - JJA +# - DJF +# check_missing: +# schema: +# required: false +# type: string +# enum: +# - pct +# - at_least_n +# - wmo +# - skip +# - from_context +# - any +# missing_options: +# schema: +# required: false +# type: string +# contentMediaType: application/json +# cf_compliance: +# schema: +# required: false +# default: warn +# type: string +# enum: +# - raise +# - log +# - warn +# data_validation: +# schema: +# required: false +# default: raise +# type: string +# enum: +# - raise +# - log +# - warn +# output_name: +# schema: +# required: false +# type: string +# output_format: +# schema: +# required: false +# default: netcdf +# type: string +# enum: +# - csv +# - netcdf +# csv_precision: +# schema: +# required: false +# type: integer +# outputs: +# output: +# schema: +# type: string +# contentMediaType: text/plain jobControlOptions: - async-execute outputTransmission: diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index 9016e0cb0..f4db64649 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -122,14 +122,14 @@ AnySettingsContainer, AnyValueType, CWL, - CWL_Input_Type, CWL_IO_ComplexType, CWL_IO_DataType, CWL_IO_EnumSymbols, CWL_IO_FileValue, CWL_IO_LiteralType, + CWL_IO_Type, CWL_IO_Value, - CWL_Output_Type, + CWL_SchemaNames, ExecutionInputs, ExecutionInputsList, ExecutionInputsMap, @@ -176,7 +176,6 @@ "supported_formats": NotRequired[List[JSON_Format]], }, total=False) JSON_IO_ListOrMap = Union[List[JSON], Dict[str, Union[JSON, str]]] - CWL_IO_Type = Union[CWL_Input_Type, CWL_Output_Type] PKG_IO_Type = Union[JSON_IO_Type, WPS_IO_Type] ANY_IO_Type = Union[CWL_IO_Type, JSON_IO_Type, WPS_IO_Type, OWS_IO_Type] ANY_Format_Type = Union[Dict[str, Optional[str]], Format] @@ -1175,6 +1174,28 @@ def get_cwl_io_type_name(io_type): return io_type +def resolve_cwl_io_type_schema(io_info, cwl_schema_names=None): + # type: (CWL_IO_Type, Optional[CWLSchemaNames]) -> CWL_IO_Type + """ + Reverse :term:`CWL` schema references by name back to their full :term:`CWL` I/O definition. + + .. seealso:: + - :meth:`weaver.processes.wps_package.WpsPackage.make_inputs` + - :meth:`weaver.processes.wps_package.WpsPackage.update_cwl_schema_names` + """ + if not isinstance(io_info, dict) or not cwl_schema_names: + return io_info + io_type = io_info.get("type") + io_item = io_info.get("items") + if io_type == "array" and isinstance(io_item, str) and io_item in cwl_schema_names: + io_info = io_info.copy() # avoid undoing CWL tool parsing/resolution + io_info["items"] = cwl_schema_names[io_item]._props + elif isinstance(io_type, str) and io_type in cwl_schema_names: + io_info = io_info.copy() # avoid undoing CWL tool parsing/resolution + io_info["type"] = cwl_schema_names[io_type]._props + return io_info + + @dataclass class CWLIODefinition(object): """ @@ -1272,8 +1293,8 @@ def __iter__(self): """ -def get_cwl_io_type(io_info, strict=True): - # type: (CWL_IO_Type, bool) -> CWLIODefinition +def get_cwl_io_type(io_info, strict=True, cwl_schema_names=None): + # type: (CWL_IO_Type, bool, Optional[CWL_SchemaNames]) -> CWLIODefinition """ Obtains the basic type of the CWL input and identity if it is optional. @@ -1290,6 +1311,7 @@ def get_cwl_io_type(io_info, strict=True): :param io_info: :term:`CWL` definition to parse. :param strict: Indicates if only pure :term:`CWL` definition is allowed, or allow implicit data-type conversions. + :param cwl_schema_names: Mapping of CWL type schema references to resolve in long form if used in a definition. :return: tuple of guessed base type and flag indicating if it can be null (optional input). """ io_type = get_cwl_io_type_name(io_info["type"]) @@ -1316,6 +1338,7 @@ def get_cwl_io_type(io_info, strict=True): io_base_type = None for i, typ in enumerate(io_type, start=int(is_null)): typ = get_cwl_io_type_name(typ) + typ = resolve_cwl_io_type_schema(typ, cwl_schema_names) io_name = io_info["name"] sub_type = {"type": typ, "name": f"{io_name}[{i}]"} # type: CWL_IO_Type array_io_def = parse_cwl_array_type(sub_type, strict=strict) diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py index 0ba7b9d74..14b40ccad 100644 --- a/weaver/processes/wps_package.py +++ b/weaver/processes/wps_package.py @@ -158,10 +158,12 @@ CWL, CWL_AnyRequirements, CWL_IO_ComplexType, + CWL_IO_Type, CWL_Requirement, CWL_RequirementNames, CWL_RequirementsDict, CWL_Results, + CWL_SchemaNames, CWL_ToolPathObject, CWL_WorkflowStepPackage, CWL_WorkflowStepPackageMap, @@ -1542,6 +1544,47 @@ def update_requirements(self): if self.package.get("baseCommand") == "python": self.package["baseCommand"] = os.path.join(active_python_path, "python") + def update_cwl_schema_names(self): + """ + Detect duplicate :term:`CWL` schema types not referred by name to provide one and avoid resolution failure. + + Doing this resolution avoids reused definitions being considered as "conflicts" because of missing ``name``. + To avoid introducing a real conflict, names are injected only under corresponding :term:`CWL` I/O by ID. + The most common type of definition resolve this way is when :term:`CWL` ``Enum`` is reused for single and + array-based definitions simultaneously. + + .. seealso:: + - :func:`weaver.processes.convert.resolve_cwl_io_type_schema` + - :meth:`weaver.processes.wps_package.WpsPackage.make_inputs` + + .. fixme:: + Workaround for https://github.com/common-workflow-language/cwltool/issues/1908. + """ + for io_select in ["inputs", "outputs"]: + if isinstance(self.package[io_select], dict): + io_items = self.package[io_select] # type: Dict[str, CWL_IO_Type] + else: + io_items = {item["id"]: item for item in self.package[io_select]} # type: Dict[str, CWL_IO_Type] + for io_name, io_def in io_items.items(): + if isinstance(io_def["type"], list): + item_enum = None + array_enum = None + for io_item in io_def["type"]: + if not isinstance(io_item, dict): + continue + if io_item.get("type") == "enum": + item_enum = io_item + continue + if io_item.get("type") != "array": + continue + if not isinstance(io_item.get("items", {}), dict): + continue + if io_item["items"].get("type") == "enum": + array_enum = io_item["items"] + # only apply the name reference if not already provided (eg: explicit name defined in original CWL) + if item_enum and array_enum and item_enum == array_enum and "name" not in io_item: + item_enum["name"] = array_enum["name"] = f"{io_name}{uuid.uuid4()}" + def update_effective_user(self): # type: () -> None """ @@ -1732,6 +1775,7 @@ def _handler(self, request, response): self.update_effective_user() self.update_requirements() + self.update_cwl_schema_names() runtime_params = self.setup_runtime() self.logger.debug("Using cwltool.RuntimeContext args:\n%s", json.dumps(runtime_params, indent=2)) @@ -1767,7 +1811,8 @@ def _handler(self, request, response): eoimage_data_sources, accept_mime_types, settings=self.settings) - cwl_inputs = self.make_inputs(request.inputs, cwl_inputs_info) + cwl_schema_refs = package_inst.t.names.names + cwl_inputs = self.make_inputs(request.inputs, cwl_inputs_info, cwl_schema_refs) self.update_status("Convert package inputs done.", PACKAGE_PROGRESS_CONVERT_INPUT, Status.RUNNING) except PackageException as exc: raise self.exception_message(type(exc), None, str(exc)) # re-raise as is, but with extra log entry @@ -1838,6 +1883,7 @@ def must_fetch(self, input_ref, input_type): def make_inputs(self, wps_inputs, # type: Dict[str, Deque[WPS_Input_Type]] cwl_inputs_info, # type: Dict[str, CWL_Input_Type] + cwl_schema_names, # type: Dict[str, CWL_SchemaNames] ): # type: (...) -> Dict[str, ValueType] """ Converts :term:`WPS` input values to corresponding :term:`CWL` input values for processing by the package. @@ -1848,6 +1894,7 @@ def make_inputs(self, :param wps_inputs: Actual :term:`WPS` inputs parsed from execution request. :param cwl_inputs_info: Expected CWL input definitions for mapping. + :param cwl_schema_names: Mapping of CWL type schema references to resolve 'type: ' if used in a definition. :return: :term:`CWL` input values. """ cwl_inputs = {} @@ -1859,7 +1906,7 @@ def make_inputs(self, # process single occurrences input_i = input_occurs[0] # handle as reference/data - io_def = get_cwl_io_type(cwl_inputs_info[input_id]) + io_def = get_cwl_io_type(cwl_inputs_info[input_id], cwl_schema_names=cwl_schema_names) if isinstance(input_i, ComplexInput) or io_def.type in PACKAGE_COMPLEX_TYPES: # extend array data that allow max_occur > 1 # drop invalid inputs returned as None diff --git a/weaver/typedefs.py b/weaver/typedefs.py index e4ed88825..b1ad7d987 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -167,6 +167,13 @@ }, total=False) CWL_Inputs = Union[List[CWL_Input_Type], Dict[str, CWL_Input_Type]] CWL_Outputs = Union[List[CWL_Output_Type], Dict[str, CWL_Output_Type]] + CWL_IO_Type = Union[CWL_Input_Type, CWL_Output_Type] + + class CWL_SchemaName(Protocol): + name: str + _props: CWL_IO_Type + + CWL_SchemaNames = Dict[str, CWL_SchemaName] # 'requirements' includes 'hints' CWL_Requirement = TypedDict("CWL_Requirement", {