comparison planemo/lib/python3.7/site-packages/galaxy/tool_util/cwl/representation.py @ 0:d30785e31577 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:18:57 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d30785e31577
1 """ This module is responsible for converting between Galaxy's tool
2 input description and the CWL description for a job json. """
3
4 import collections
5 import json
6 import logging
7 import os
8
9 from six import string_types
10
11 from galaxy.exceptions import RequestParameterInvalidException
12 from galaxy.util import safe_makedirs, string_as_bool
13 from galaxy.util.bunch import Bunch
14 from .util import set_basename_and_derived_properties
15
16
17 log = logging.getLogger(__name__)
18
19 NOT_PRESENT = object()
20
21 NO_GALAXY_INPUT = object()
22
23 INPUT_TYPE = Bunch(
24 DATA="data",
25 INTEGER="integer",
26 FLOAT="float",
27 TEXT="text",
28 BOOLEAN="boolean",
29 SELECT="select",
30 FIELD="field",
31 CONDITIONAL="conditional",
32 DATA_COLLECTON="data_collection",
33 )
34
35 # There are two approaches to mapping CWL tool state to Galaxy tool state
36 # one is to map CWL types to compound Galaxy tool parameters combinations
37 # with conditionals and the other is to use a new Galaxy parameter type that
38 # allows unions, optional specifications, etc.... The problem with the former
39 # is that it doesn't work with the workflow parameters for instance and is
40 # very complex on the backend. The problem with the latter is that the GUI
41 # for this parameter type is undefined curently.
42 USE_FIELD_TYPES = True
43
44 # There are two approaches to mapping CWL workflow inputs to Galaxy workflow
45 # steps. The first is to simply map everything to expressions and stick them into
46 # files and use data inputs - the second is to use parameter_input steps with
47 # fields types. We are dispatching on USE_FIELD_TYPES for now - to choose but
48 # may diverge later?
49 # There are open issues with each approach:
50 # - Mapping everything to files makes the GUI harder to imagine but the backend
51 # easier to manage in someways.
52 USE_STEP_PARAMETERS = USE_FIELD_TYPES
53
54 TypeRepresentation = collections.namedtuple("TypeRepresentation", ["name", "galaxy_param_type", "label", "collection_type"])
55 TYPE_REPRESENTATIONS = [
56 TypeRepresentation("null", NO_GALAXY_INPUT, "no input", None),
57 TypeRepresentation("integer", INPUT_TYPE.INTEGER, "an integer", None),
58 TypeRepresentation("float", INPUT_TYPE.FLOAT, "a decimal number", None),
59 TypeRepresentation("double", INPUT_TYPE.FLOAT, "a decimal number", None),
60 TypeRepresentation("file", INPUT_TYPE.DATA, "a dataset", None),
61 TypeRepresentation("directory", INPUT_TYPE.DATA, "a directory", None),
62 TypeRepresentation("boolean", INPUT_TYPE.BOOLEAN, "a boolean", None),
63 TypeRepresentation("text", INPUT_TYPE.TEXT, "a simple text field", None),
64 TypeRepresentation("record", INPUT_TYPE.DATA_COLLECTON, "record as a dataset collection", "record"),
65 TypeRepresentation("json", INPUT_TYPE.TEXT, "arbitrary JSON structure", None),
66 TypeRepresentation("array", INPUT_TYPE.DATA_COLLECTON, "as a dataset list", "list"),
67 TypeRepresentation("enum", INPUT_TYPE.TEXT, "enum value", None), # TODO: make this a select...
68 TypeRepresentation("field", INPUT_TYPE.FIELD, "arbitrary JSON structure", None),
69 ]
70 FIELD_TYPE_REPRESENTATION = TYPE_REPRESENTATIONS[-1]
71 TypeRepresentation.uses_param = lambda self: self.galaxy_param_type is not NO_GALAXY_INPUT
72
73 if not USE_FIELD_TYPES:
74 CWL_TYPE_TO_REPRESENTATIONS = {
75 "Any": ["integer", "float", "file", "boolean", "text", "record", "json"],
76 "array": ["array"],
77 "string": ["text"],
78 "boolean": ["boolean"],
79 "int": ["integer"],
80 "float": ["float"],
81 "File": ["file"],
82 "Directory": ["directory"],
83 "null": ["null"],
84 "record": ["record"],
85 }
86 else:
87 CWL_TYPE_TO_REPRESENTATIONS = {
88 "Any": ["field"],
89 "array": ["array"],
90 "string": ["text"],
91 "boolean": ["boolean"],
92 "int": ["integer"],
93 "float": ["float"],
94 "File": ["file"],
95 "Directory": ["directory"],
96 "null": ["null"],
97 "record": ["record"],
98 "enum": ["enum"],
99 "double": ["double"],
100 }
101
102
103 def type_representation_from_name(type_representation_name):
104 for type_representation in TYPE_REPRESENTATIONS:
105 if type_representation.name == type_representation_name:
106 return type_representation
107
108 assert False
109
110
111 def type_descriptions_for_field_types(field_types):
112 type_representation_names = set()
113 for field_type in field_types:
114 if isinstance(field_type, dict) and field_type.get("type"):
115 field_type = field_type.get("type")
116
117 try:
118 type_representation_names_for_field_type = CWL_TYPE_TO_REPRESENTATIONS.get(field_type)
119 except TypeError:
120 raise Exception("Failed to convert field_type %s" % field_type)
121 if type_representation_names_for_field_type is None:
122 raise Exception("Failed to convert type %s" % field_type)
123 type_representation_names.update(type_representation_names_for_field_type)
124 type_representations = []
125 for type_representation in TYPE_REPRESENTATIONS:
126 if type_representation.name in type_representation_names:
127 type_representations.append(type_representation)
128 return type_representations
129
130
131 def dataset_wrapper_to_file_json(inputs_dir, dataset_wrapper):
132 if dataset_wrapper.ext == "expression.json":
133 with open(dataset_wrapper.file_name, "r") as f:
134 return json.load(f)
135
136 if dataset_wrapper.ext == "directory":
137 return dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper)
138
139 extra_files_path = dataset_wrapper.extra_files_path
140 secondary_files_path = os.path.join(extra_files_path, "__secondary_files__")
141 path = str(dataset_wrapper)
142 raw_file_object = {"class": "File"}
143
144 if os.path.exists(secondary_files_path):
145 safe_makedirs(inputs_dir)
146 name = os.path.basename(path)
147 new_input_path = os.path.join(inputs_dir, name)
148 os.symlink(path, new_input_path)
149 secondary_files = []
150 for secondary_file_name in os.listdir(secondary_files_path):
151 secondary_file_path = os.path.join(secondary_files_path, secondary_file_name)
152 target = os.path.join(inputs_dir, secondary_file_name)
153 log.info("linking [%s] to [%s]" % (secondary_file_path, target))
154 os.symlink(secondary_file_path, target)
155 is_dir = os.path.isdir(os.path.realpath(secondary_file_path))
156 secondary_files.append({"class": "File" if not is_dir else "Directory", "location": target})
157
158 raw_file_object["secondaryFiles"] = secondary_files
159 path = new_input_path
160
161 raw_file_object["location"] = path
162
163 # Verify it isn't a NoneDataset
164 if dataset_wrapper.unsanitized:
165 raw_file_object["size"] = int(dataset_wrapper.get_size())
166
167 set_basename_and_derived_properties(raw_file_object, str(dataset_wrapper.created_from_basename or dataset_wrapper.name))
168 return raw_file_object
169
170
171 def dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper):
172 assert dataset_wrapper.ext == "directory"
173
174 # get directory name
175 archive_name = str(dataset_wrapper.created_from_basename or dataset_wrapper.name)
176 nameroot, nameext = os.path.splitext(archive_name)
177 directory_name = nameroot # assume archive file name contains the directory name
178
179 # get archive location
180 try:
181 archive_location = dataset_wrapper.unsanitized.file_name
182 except Exception:
183 archive_location = None
184
185 directory_json = {"location": dataset_wrapper.extra_files_path,
186 "class": "Directory",
187 "name": directory_name,
188 "archive_location": archive_location,
189 "archive_nameext": nameext,
190 "archive_nameroot": nameroot}
191
192 return directory_json
193
194
195 def collection_wrapper_to_array(inputs_dir, wrapped_value):
196 rval = []
197 for value in wrapped_value:
198 rval.append(dataset_wrapper_to_file_json(inputs_dir, value))
199 return rval
200
201
202 def collection_wrapper_to_record(inputs_dir, wrapped_value):
203 rval = collections.OrderedDict()
204 for key, value in wrapped_value.items():
205 rval[key] = dataset_wrapper_to_file_json(inputs_dir, value)
206 return rval
207
208
209 def to_cwl_job(tool, param_dict, local_working_directory):
210 """ tool is Galaxy's representation of the tool and param_dict is the
211 parameter dictionary with wrapped values.
212 """
213 tool_proxy = tool._cwl_tool_proxy
214 input_fields = tool_proxy.input_fields()
215 inputs = tool.inputs
216 input_json = {}
217
218 inputs_dir = os.path.join(local_working_directory, "_inputs")
219
220 def simple_value(input, param_dict_value, type_representation_name=None):
221 type_representation = type_representation_from_name(type_representation_name)
222 # Hmm... cwl_type isn't really the cwl type in every case,
223 # like in the case of json for instance.
224
225 if type_representation.galaxy_param_type == NO_GALAXY_INPUT:
226 assert param_dict_value is None
227 return None
228
229 if type_representation.name == "file":
230 dataset_wrapper = param_dict_value
231 return dataset_wrapper_to_file_json(inputs_dir, dataset_wrapper)
232 elif type_representation.name == "directory":
233 dataset_wrapper = param_dict_value
234 return dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper)
235 elif type_representation.name == "integer":
236 return int(str(param_dict_value))
237 elif type_representation.name == "long":
238 return int(str(param_dict_value))
239 elif type_representation.name in ["float", "double"]:
240 return float(str(param_dict_value))
241 elif type_representation.name == "boolean":
242 return string_as_bool(param_dict_value)
243 elif type_representation.name == "text":
244 return str(param_dict_value)
245 elif type_representation.name == "enum":
246 return str(param_dict_value)
247 elif type_representation.name == "json":
248 raw_value = param_dict_value.value
249 return json.loads(raw_value)
250 elif type_representation.name == "field":
251 if param_dict_value is None:
252 return None
253 if hasattr(param_dict_value, "value"):
254 # Is InputValueWrapper
255 rval = param_dict_value.value
256 if isinstance(rval, dict) and "src" in rval and rval["src"] == "json":
257 # needed for wf_step_connect_undeclared_param, so non-file defaults?
258 return rval["value"]
259 return rval
260 elif not param_dict_value.is_collection:
261 # Is DatasetFilenameWrapper
262 return dataset_wrapper_to_file_json(inputs_dir, param_dict_value)
263 else:
264 # Is DatasetCollectionWrapper
265 hdca_wrapper = param_dict_value
266 if hdca_wrapper.collection_type == "list":
267 # TODO: generalize to lists of lists and lists of non-files...
268 return collection_wrapper_to_array(inputs_dir, hdca_wrapper)
269 elif hdca_wrapper.collection_type.collection_type == "record":
270 return collection_wrapper_to_record(inputs_dir, hdca_wrapper)
271
272 elif type_representation.name == "array":
273 # TODO: generalize to lists of lists and lists of non-files...
274 return collection_wrapper_to_array(inputs_dir, param_dict_value)
275 elif type_representation.name == "record":
276 return collection_wrapper_to_record(inputs_dir, param_dict_value)
277 else:
278 return str(param_dict_value)
279
280 for input_name, input in inputs.items():
281 if input.type == "repeat":
282 only_input = next(iter(input.inputs.values()))
283 array_value = []
284 for instance in param_dict[input_name]:
285 array_value.append(simple_value(only_input, instance[input_name[:-len("_repeat")]]))
286 input_json[input_name[:-len("_repeat")]] = array_value
287 elif input.type == "conditional":
288 assert input_name in param_dict, "No value for %s in %s" % (input_name, param_dict)
289 current_case = param_dict[input_name]["_cwl__type_"]
290 if str(current_case) != "null": # str because it is a wrapped...
291 case_index = input.get_current_case(current_case)
292 case_input = input.cases[case_index].inputs["_cwl__value_"]
293 case_value = param_dict[input_name]["_cwl__value_"]
294 input_json[input_name] = simple_value(case_input, case_value, current_case)
295 else:
296 matched_field = None
297 for field in input_fields:
298 if field["name"] == input_name:
299 matched_field = field
300 field_type = field_to_field_type(matched_field)
301 if isinstance(field_type, list):
302 assert USE_FIELD_TYPES
303 type_descriptions = [FIELD_TYPE_REPRESENTATION]
304 else:
305 type_descriptions = type_descriptions_for_field_types([field_type])
306 assert len(type_descriptions) == 1
307 type_description_name = type_descriptions[0].name
308 input_json[input_name] = simple_value(input, param_dict[input_name], type_description_name)
309
310 log.debug("Galaxy Tool State is CWL State is %s" % input_json)
311 return input_json
312
313
314 def to_galaxy_parameters(tool, as_dict):
315 """ Tool is Galaxy's representation of the tool and as_dict is a Galaxified
316 representation of the input json (no paths, HDA references for instance).
317 """
318 inputs = tool.inputs
319 galaxy_request = {}
320
321 def from_simple_value(input, param_dict_value, type_representation_name=None):
322 if type_representation_name == "json":
323 return json.dumps(param_dict_value)
324 else:
325 return param_dict_value
326
327 for input_name, input in inputs.items():
328 as_dict_value = as_dict.get(input_name, NOT_PRESENT)
329 galaxy_input_type = input.type
330
331 if galaxy_input_type == "repeat":
332 if input_name not in as_dict:
333 continue
334
335 only_input = next(iter(input.inputs.values()))
336 for index, value in enumerate(as_dict_value):
337 key = "%s_repeat_0|%s" % (input_name, only_input.name)
338 galaxy_value = from_simple_value(only_input, value)
339 galaxy_request[key] = galaxy_value
340 elif galaxy_input_type == "conditional":
341 case_strings = input.case_strings
342 # TODO: less crazy handling of defaults...
343 if (as_dict_value is NOT_PRESENT or as_dict_value is None) and "null" in case_strings:
344 type_representation_name = "null"
345 elif (as_dict_value is NOT_PRESENT or as_dict_value is None):
346 raise RequestParameterInvalidException(
347 "Cannot translate CWL datatype - value [%s] of type [%s] with case_strings [%s]. Non-null property must be set." % (
348 as_dict_value, type(as_dict_value), case_strings
349 )
350 )
351 elif isinstance(as_dict_value, bool) and "boolean" in case_strings:
352 type_representation_name = "boolean"
353 elif isinstance(as_dict_value, int) and "integer" in case_strings:
354 type_representation_name = "integer"
355 elif isinstance(as_dict_value, int) and "long" in case_strings:
356 type_representation_name = "long"
357 elif isinstance(as_dict_value, (int, float)) and "float" in case_strings:
358 type_representation_name = "float"
359 elif isinstance(as_dict_value, (int, float)) and "double" in case_strings:
360 type_representation_name = "double"
361 elif isinstance(as_dict_value, string_types) and "string" in case_strings:
362 type_representation_name = "string"
363 elif isinstance(as_dict_value, dict) and "src" in as_dict_value and "id" in as_dict_value and "file" in case_strings:
364 type_representation_name = "file"
365 elif isinstance(as_dict_value, dict) and "src" in as_dict_value and "id" in as_dict_value and "directory" in case_strings:
366 # TODO: can't disambiuate with above if both are available...
367 type_representation_name = "directory"
368 elif "field" in case_strings:
369 type_representation_name = "field"
370 elif "json" in case_strings and as_dict_value is not None:
371 type_representation_name = "json"
372 else:
373 raise RequestParameterInvalidException(
374 "Cannot translate CWL datatype - value [%s] of type [%s] with case_strings [%s]." % (
375 as_dict_value, type(as_dict_value), case_strings
376 )
377 )
378 galaxy_request["%s|_cwl__type_" % input_name] = type_representation_name
379 if type_representation_name != "null":
380 current_case_index = input.get_current_case(type_representation_name)
381 current_case_inputs = input.cases[current_case_index].inputs
382 current_case_input = current_case_inputs["_cwl__value_"]
383 galaxy_value = from_simple_value(current_case_input, as_dict_value, type_representation_name)
384 galaxy_request["%s|_cwl__value_" % input_name] = galaxy_value
385 elif as_dict_value is NOT_PRESENT:
386 continue
387 else:
388 galaxy_value = from_simple_value(input, as_dict_value)
389 galaxy_request[input_name] = galaxy_value
390
391 log.info("Converted galaxy_request is %s" % galaxy_request)
392 return galaxy_request
393
394
395 def field_to_field_type(field):
396 field_type = field["type"]
397 if isinstance(field_type, dict):
398 field_type = field_type["type"]
399 if isinstance(field_type, list):
400 field_type_length = len(field_type)
401 if field_type_length == 0:
402 raise Exception("Zero-length type list encountered, invalid CWL?")
403 elif len(field_type) == 1:
404 field_type = field_type[0]
405
406 return field_type