diff --git a/.github/config/wordlist.txt b/.github/config/wordlist.txt index ec05e6b..5e02299 100644 --- a/.github/config/wordlist.txt +++ b/.github/config/wordlist.txt @@ -160,6 +160,8 @@ loadlistingrequirement localhost lookahead loopback +loopinput +loopoutputmethod maccallum macos mappredicate @@ -194,6 +196,7 @@ outputbinding outputenumschema outputeval outputformat +outputmethod outputschema outputsink outputsource diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 1e8ce6d..4f04189 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -57,7 +57,7 @@ Reporting If you are being harassed by a member of the CWL Project, notice that someone else is being harassed, or have any other concerns, please contact the CWL Leadership Team at leadership@commonwl.org. If person who is harassing -you is on the team, they will recurse themselves from handling your incident. We +you is on the team, they will recuse themselves from handling your incident. We will respond as promptly as we can. This code of conduct applies to CWL Project spaces, but if you are being diff --git a/Workflow.yml b/Workflow.yml index 459f8d5..f711fe1 100644 --- a/Workflow.yml +++ b/Workflow.yml @@ -275,7 +275,7 @@ $graph: - type: record name: WorkflowStepInput extends: [Identified, InputSink, LoadContents, Labeled] - docParent: "#WorkflowStep" + docParent: "#AbstractWorkflowStep" doc: | The input of a workflow step connects an upstream parameter (from the workflow inputs, or the outputs of other workflows steps) with the input @@ -301,7 +301,7 @@ $graph: in the workflow or workflow step requirements. If the sink parameter is an array, or named in a [workflow - scatter](#WorkflowStep) operation, there may be multiple inbound + scatter](#ScatterWorkflowStep) operation, there may be multiple inbound data links listed in the `source` field. The values from the input links are merged depending on the method specified in the `linkMerge` field. If both `linkMerge` and `pickValue` are null @@ -338,7 +338,7 @@ $graph: 3. Before `scatter` or `valueFrom`. This is specifically intended to be useful in combination with - [conditional execution](#WorkflowStep), where several upstream + [conditional execution](#AbstractWorkflowStep), where several upstream steps may be connected to a single input (`source` is a list), and skipped steps produce null values. @@ -430,7 +430,7 @@ $graph: - type: record name: WorkflowStepOutput - docParent: "#WorkflowStep" + docParent: "#AbstractWorkflowStep" extends: Identified doc: | Associate an output parameter of the underlying process with a workflow @@ -451,7 +451,7 @@ $graph: type: ["null", File, Directory, Any] doc: | The default value for this parameter to use if either there is no - `source` field, or the value produced by the `source` is `null`. The + `outputSource` field, or the value produced by the `source` is `null`. The default must be applied prior to scattering or evaluating `valueFrom`. jsonldPredicate: _id: "sld:default" @@ -483,8 +483,8 @@ $graph: - name: ScatterMethod type: enum - docParent: "#WorkflowStep" - doc: The scatter method, as described in [workflow step scatter](#WorkflowStep). + docParent: "#ScatterWorkflowStep" + doc: The scatter method, as described in [workflow step scatter](#ScatterWorkflowStep). symbols: - dotproduct - nested_crossproduct @@ -493,14 +493,14 @@ $graph: - name: LoopOutputMethod type: enum - docParent: "#WorkflowStep" - doc: The loop output method, as described in [workflow step loop](#WorkflowStep). + docParent: "#LoopWorkflowStep" + doc: The loop output method, as described in [workflow step loop](#LoopWorkflowStep). symbols: - last - all -- name: WorkflowStep +- name: AbstractWorkflowStep type: record extends: [Identified, Labeled, sld:Documented] docParent: "#Workflow" @@ -510,96 +510,23 @@ $graph: `Workflow`) in the `run` field and connects the input and output parameters of the underlying process to workflow parameters. - # Scatter/gather - - To use scatter/gather, - [ScatterFeatureRequirement](#ScatterFeatureRequirement) must be specified - in the workflow or workflow step requirements. - - A "scatter" operation specifies that the associated workflow step or - subworkflow should execute separately over a list of input elements. Each - job making up a scatter operation is independent and may be executed - concurrently. - - The `scatter` field specifies one or more input parameters which will be - scattered. An input parameter may be listed more than once. The declared - type of each input parameter implicitly becomes an array of items of the - input parameter type. If a parameter is listed more than once, it becomes - a nested array. As a result, upstream parameters which are connected to - scattered parameters must be arrays. - - All output parameter types are also implicitly wrapped in arrays. Each job - in the scatter results in an entry in the output array. - - If any scattered parameter runtime value is an empty array, all outputs are - set to empty arrays and no work is done for the step, according to - applicable scattering rules. - - If `scatter` declares more than one input parameter, `scatterMethod` - describes how to decompose the input into a discrete set of jobs. - - * **dotproduct** specifies that each of the input arrays are aligned and one - element taken from each array to construct each job. It is an error - if all input arrays are not the same length. - - * **nested_crossproduct** specifies the Cartesian product of the inputs, - producing a job for every combination of the scattered inputs. The - output must be nested arrays for each level of scattering, in the - order that the input arrays are listed in the `scatter` field. - - * **flat_crossproduct** specifies the Cartesian product of the inputs, - producing a job for every combination of the scattered inputs. The - output arrays must be flattened to a single level, but otherwise listed in the - order that the input arrays are listed in the `scatter` field. - - # Conditional and iterative execution (Optional) + # Conditional execution (Optional) Conditional execution makes execution of a step conditional on an expression. A step that is not executed is "skipped". A skipped step produces `null` for all output parameters. - The condition is evaluated after `scatter`, using the input object - of each individual scatter job. This means over a set of scatter - jobs, some may be executed and some may be skipped. When the - results are gathered, skipped steps must be `null` in the output - arrays. - The `when` field controls conditional execution. This is an expression that must be evaluated with `inputs` bound to the step input object (or individual scatter job), and returns a boolean value. It is an error if this expression returns a value other than `true` or `false`. - - The `loop` field controls iterative execution. It defines the input - parameters of the loop iterations after the first one (inputs of the - first iteration are the step input parameters, as usual). If no - `loop` rule is specified for a given step `in` field, the initial - value is kept constant among all iterations. - - When a `loop` field is present, the `when` field is mandatory. It is - evaluated before each loop iteration and acts as a termination condition: - as soon as the `when` expression evaluates to `false`, the loop terminates - and the step outputs are propagated to the subsequent workflow steps. - - The `outputMethod` field describes how to deal with loop outputs after - termination: - * **last** specifies that only the last computed element for each output - parameter should be propagated to the subsequenct steps. This is the - default value. - - * **all** specifies that a single ordered array with all output values - computed at the end of each loop iteration should be propagated to the - subsequent steps. - - Conditionals and iterative execution in CWL are an optional features - and are not required to be implemented by all consumers of CWL documents. - An implementation that does not support conditionals must return a - fatal error when attempting to execute a workflow that uses - conditional constructs the implementation does not support. - - At this time, the `loop` field is not compatible with the `scatter` field. - Combining the two in the same step will produce an error. + Conditional execution in CWL is an optional feature and is not required + to be implemented by all consumers of CWL documents. An implementation that + does not support conditional executions must return a fatal error when + attempting to execute a workflow that uses conditional constructs the + implementation does not support. # Subworkflows @@ -675,6 +602,68 @@ $graph: If defined, only run the step when the expression evaluates to `true`. If `false` the step is skipped. A skipped step produces a `null` on each output. + + +- name: SimpleWorkflowStep + type: Record + extends: WorkflowStep + docParent: "#Workflow" + + +- name: ScatterWorkflowStep + type: Record + extends: WorkflowStep + docParent: "#Workflow" + doc: | + To use scatter/gather, + [ScatterFeatureRequirement](#ScatterFeatureRequirement) must be specified + in the workflow or workflow step requirements. + + A "scatter" operation specifies that the associated workflow step or + subworkflow should execute separately over a list of input elements. Each + job making up a scatter operation is independent and may be executed + concurrently. + + The `scatter` field specifies one or more input parameters which will be + scattered. An input parameter may be listed more than once. The declared + type of each input parameter implicitly becomes an array of items of the + input parameter type. If a parameter is listed more than once, it becomes + a nested array. As a result, upstream parameters which are connected to + scattered parameters must be arrays. + + All output parameter types are also implicitly wrapped in arrays. Each job + in the scatter results in an entry in the output array. + + If any scattered parameter runtime value is an empty array, all outputs are + set to empty arrays and no work is done for the step, according to + applicable scattering rules. + + If `scatter` declares more than one input parameter, `scatterMethod` + describes how to decompose the input into a discrete set of jobs. + + * **dotproduct** specifies that each of the input arrays are aligned and one + element taken from each array to construct each job. It is an error + if all input arrays are not the same length. + + * **nested_crossproduct** specifies the Cartesian product of the inputs, + producing a job for every combination of the scattered inputs. The + output must be nested arrays for each level of scattering, in the + order that the input arrays are listed in the `scatter` field. + + * **flat_crossproduct** specifies the Cartesian product of the inputs, + producing a job for every combination of the scattered inputs. The + output arrays must be flattened to a single level, but otherwise listed in the + order that the input arrays are listed in the `scatter` field. + + # Conditional execution (Optional) + + The condition is evaluated after `scatter`, using the input object + of each individual scatter job. This means over a set of scatter + jobs, some may be executed and some may be skipped. When the + results are gathered, skipped steps must be `null` in the output + arrays. + + fields: - name: scatter type: - string? @@ -691,6 +680,45 @@ $graph: jsonldPredicate: "_id": "cwl:scatterMethod" "_type": "@vocab" + + +- name: LoopWorkflowStep + type: record + extends: WorkflowStep + docParent: "#Workflow" + doc: | + # Iterative execution (Optional) + + The `loop` field controls iterative execution. It defines the input + parameters of the loop iterations after the first one (inputs of the + first iteration are the step input parameters, as usual). If no + `loop` rule is specified for a given step `in` field, the initial + value is kept constant among all iterations. + + When a `loop` field is present, the `when` field is mandatory. It is + evaluated before each loop iteration and acts as a termination condition: + as soon as the `when` expression evaluates to `false`, the loop terminates + and the step outputs are propagated to the subsequent workflow steps. + + The `outputMethod` field describes how to deal with loop outputs after + termination: + + * **last** specifies that only the last computed element for each output + parameter should be propagated to the subsequent steps. This is the + default value. + + * **all** specifies that an array with all output values computed at the + end of each loop iteration should be propagated to the subsequent steps. + Elements in the array must be ordered according to the loop iterations + that produced them. + + Iterative execution in CWL is an optional feature and is not required + to be implemented by all consumers of CWL documents. An implementation that + does not support iterative executions must return a fatal error when + attempting to execute a workflow that uses iterative constructs the + implementation does not support. + + fields: - name: loop doc: | Defines the input parameters of the loop iterations after the first one @@ -704,11 +732,21 @@ $graph: mapPredicate: outputSource - name: outputMethod doc: | - Required if `loop` is defined. + If not specified, the default method is "last". type: LoopOutputMethod? + default: last jsonldPredicate: "_id": "cwl:outputMethod" "_type": "@vocab" + - name: when + type: + - Expression + jsonldPredicate: "cwl:when" + doc: | + Only run the next iteration when the expression evaluates to `true`. + If the first iteration evaluates to `false` the step is skipped. + A skipped step produces a `null` on each output if the `outputMethod` + is set to `last`, and an empty array if the `outputMehtod` is set to `all`. - name: Workflow @@ -791,7 +829,7 @@ $graph: concurrently, provided that dependencies between steps are met. type: - type: array - items: "#WorkflowStep" + items: [LoopWorkflowStep, ScatterWorkflowStep, SimpleWorkflowStep] jsonldPredicate: mapSubject: id @@ -801,7 +839,7 @@ $graph: extends: ProcessRequirement doc: | Indicates that the workflow platform must support nested workflows in - the `run` field of [WorkflowStep](#WorkflowStep). + the `run` field of [AbstractWorkflowStep](#AbstractWorkflowStep). fields: - name: "class" type: @@ -819,7 +857,7 @@ $graph: extends: ProcessRequirement doc: | Indicates that the workflow platform must support the `scatter` and - `scatterMethod` fields of [WorkflowStep](#WorkflowStep). + `scatterMethod` fields of [ScatterWorkflowStep](#ScatterWorkflowStep). fields: - name: "class" type: diff --git a/design-documents/conditionals-2019.md b/design-documents/conditionals-2019.md index 34f7d69..ca581c3 100644 --- a/design-documents/conditionals-2019.md +++ b/design-documents/conditionals-2019.md @@ -10,7 +10,7 @@ This is a documentation of the design and design decisions for conditionals as o ![dual scatter nested](conditionals/conditional-patterns-3.png) ![dual scatter flattened](conditionals/conditional-patterns-4.png) -The design adds a new field `when` to a `WorkflowStep`. This field is an expression that +The design adds a new field `when` to a `AbstractWorkflowStep`. This field is an expression that evaluates to `True` or `False`. The executor runs the step if the value is `True`, skips it if `False`. A skipped step produces `null` values on all its outputs. @@ -114,7 +114,7 @@ outputs: pickValue: first_non_null ``` -The new syntax adds a single field to `WorkflowStep` (`when`) and a new +The new syntax adds a single field to `AbstractWorkflowStep` (`when`) and a new operator called `pickValue` to the `WorkflowStepInput` and `WorkflowOutputParameter`. This is a fairly non-intrusive modification, fully backwards compatible (it's an addition, not a modification) and allows diff --git a/json-schema/cwl.yaml b/json-schema/cwl.yaml index 7ba8c34..944dae2 100644 --- a/json-schema/cwl.yaml +++ b/json-schema/cwl.yaml @@ -503,7 +503,7 @@ $defs: description: | A 'scatter' operation specifies that the associated Workflow step should execute separately over a list of input elements. Each job making up a scatter operation is independent and may be executed concurrently - (see also: https://www.commonwl.org/v1.2/Workflow.html#WorkflowStep). + (see also: https://www.commonwl.org/v1.2/Workflow.html#ScatterWorkflowStep). $comment: Fields 'scatter' and 'scatterMethod' at the root of a 'WorkflowStep', not within the requirement. properties: class: diff --git a/tests/conditionals/test-index.yaml b/tests/conditionals/test-index.yaml index cee57e9..f646a56 100644 --- a/tests/conditionals/test-index.yaml +++ b/tests/conditionals/test-index.yaml @@ -20,7 +20,7 @@ tags: [ conditional, inline_javascript, workflow ] - id: direct_required - doc: Conditional using intermediate WorkflowStep.in input + doc: Conditional using intermediate AbstractWorkflowStep.in input tool: cond-wf-002.cwl job: val.1.job.yaml output: @@ -186,7 +186,7 @@ out1: [ "foo 2", "foo 4", "foo 6", "bar 1", "bar 3", "bar 5"] - tags: [ conditional, inline_javascript, scatter, multiple, workflow ] + tags: [ conditional, inline_javascript, scatter, multiple_input, workflow ] - id: direct_optional_null_result_nojs doc: simplest conditional pattern (true), no javascript @@ -205,7 +205,7 @@ tags: [ conditional, workflow ] - id: direct_required_nojs - doc: Conditional using intermediate WorkflowStep.in input; no javascript + doc: Conditional using intermediate AbstractWorkflowStep.in input; no javascript tool: cond-wf-002_nojs.cwl job: val.1.job.yaml output: @@ -371,7 +371,7 @@ out1: [ "foo 2", "foo 4", "foo 6", "bar 1", "bar 3", "bar 5"] - tags: [ conditional, scatter, multiple, workflow ] + tags: [ conditional, scatter, multiple_input, workflow ] - id: cond-with-defaults-1 @@ -395,7 +395,7 @@ "size": 34 } ] - tags: [ conditional, scatter, multiple, workflow ] + tags: [ conditional, scatter, multiple_input, workflow ] - id: cond-with-defaults-2 doc: "Default inputs, choose step to run based on what was provided, second case" @@ -411,4 +411,4 @@ "size": 12 } ] - tags: [ conditional, scatter, multiple, workflow ] + tags: [ conditional, scatter, multiple_input, workflow ]