Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/galaxy/tool_util/cwl/representation.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:18:57 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d30785e31577 |
---|---|
1 """ This module is responsible for converting between Galaxy's tool | |
2 input description and the CWL description for a job json. """ | |
3 | |
4 import collections | |
5 import json | |
6 import logging | |
7 import os | |
8 | |
9 from six import string_types | |
10 | |
11 from galaxy.exceptions import RequestParameterInvalidException | |
12 from galaxy.util import safe_makedirs, string_as_bool | |
13 from galaxy.util.bunch import Bunch | |
14 from .util import set_basename_and_derived_properties | |
15 | |
16 | |
17 log = logging.getLogger(__name__) | |
18 | |
19 NOT_PRESENT = object() | |
20 | |
21 NO_GALAXY_INPUT = object() | |
22 | |
23 INPUT_TYPE = Bunch( | |
24 DATA="data", | |
25 INTEGER="integer", | |
26 FLOAT="float", | |
27 TEXT="text", | |
28 BOOLEAN="boolean", | |
29 SELECT="select", | |
30 FIELD="field", | |
31 CONDITIONAL="conditional", | |
32 DATA_COLLECTON="data_collection", | |
33 ) | |
34 | |
35 # There are two approaches to mapping CWL tool state to Galaxy tool state | |
36 # one is to map CWL types to compound Galaxy tool parameters combinations | |
37 # with conditionals and the other is to use a new Galaxy parameter type that | |
38 # allows unions, optional specifications, etc.... The problem with the former | |
39 # is that it doesn't work with the workflow parameters for instance and is | |
40 # very complex on the backend. The problem with the latter is that the GUI | |
41 # for this parameter type is undefined curently. | |
42 USE_FIELD_TYPES = True | |
43 | |
44 # There are two approaches to mapping CWL workflow inputs to Galaxy workflow | |
45 # steps. The first is to simply map everything to expressions and stick them into | |
46 # files and use data inputs - the second is to use parameter_input steps with | |
47 # fields types. We are dispatching on USE_FIELD_TYPES for now - to choose but | |
48 # may diverge later? | |
49 # There are open issues with each approach: | |
50 # - Mapping everything to files makes the GUI harder to imagine but the backend | |
51 # easier to manage in someways. | |
52 USE_STEP_PARAMETERS = USE_FIELD_TYPES | |
53 | |
54 TypeRepresentation = collections.namedtuple("TypeRepresentation", ["name", "galaxy_param_type", "label", "collection_type"]) | |
55 TYPE_REPRESENTATIONS = [ | |
56 TypeRepresentation("null", NO_GALAXY_INPUT, "no input", None), | |
57 TypeRepresentation("integer", INPUT_TYPE.INTEGER, "an integer", None), | |
58 TypeRepresentation("float", INPUT_TYPE.FLOAT, "a decimal number", None), | |
59 TypeRepresentation("double", INPUT_TYPE.FLOAT, "a decimal number", None), | |
60 TypeRepresentation("file", INPUT_TYPE.DATA, "a dataset", None), | |
61 TypeRepresentation("directory", INPUT_TYPE.DATA, "a directory", None), | |
62 TypeRepresentation("boolean", INPUT_TYPE.BOOLEAN, "a boolean", None), | |
63 TypeRepresentation("text", INPUT_TYPE.TEXT, "a simple text field", None), | |
64 TypeRepresentation("record", INPUT_TYPE.DATA_COLLECTON, "record as a dataset collection", "record"), | |
65 TypeRepresentation("json", INPUT_TYPE.TEXT, "arbitrary JSON structure", None), | |
66 TypeRepresentation("array", INPUT_TYPE.DATA_COLLECTON, "as a dataset list", "list"), | |
67 TypeRepresentation("enum", INPUT_TYPE.TEXT, "enum value", None), # TODO: make this a select... | |
68 TypeRepresentation("field", INPUT_TYPE.FIELD, "arbitrary JSON structure", None), | |
69 ] | |
70 FIELD_TYPE_REPRESENTATION = TYPE_REPRESENTATIONS[-1] | |
71 TypeRepresentation.uses_param = lambda self: self.galaxy_param_type is not NO_GALAXY_INPUT | |
72 | |
73 if not USE_FIELD_TYPES: | |
74 CWL_TYPE_TO_REPRESENTATIONS = { | |
75 "Any": ["integer", "float", "file", "boolean", "text", "record", "json"], | |
76 "array": ["array"], | |
77 "string": ["text"], | |
78 "boolean": ["boolean"], | |
79 "int": ["integer"], | |
80 "float": ["float"], | |
81 "File": ["file"], | |
82 "Directory": ["directory"], | |
83 "null": ["null"], | |
84 "record": ["record"], | |
85 } | |
86 else: | |
87 CWL_TYPE_TO_REPRESENTATIONS = { | |
88 "Any": ["field"], | |
89 "array": ["array"], | |
90 "string": ["text"], | |
91 "boolean": ["boolean"], | |
92 "int": ["integer"], | |
93 "float": ["float"], | |
94 "File": ["file"], | |
95 "Directory": ["directory"], | |
96 "null": ["null"], | |
97 "record": ["record"], | |
98 "enum": ["enum"], | |
99 "double": ["double"], | |
100 } | |
101 | |
102 | |
103 def type_representation_from_name(type_representation_name): | |
104 for type_representation in TYPE_REPRESENTATIONS: | |
105 if type_representation.name == type_representation_name: | |
106 return type_representation | |
107 | |
108 assert False | |
109 | |
110 | |
111 def type_descriptions_for_field_types(field_types): | |
112 type_representation_names = set() | |
113 for field_type in field_types: | |
114 if isinstance(field_type, dict) and field_type.get("type"): | |
115 field_type = field_type.get("type") | |
116 | |
117 try: | |
118 type_representation_names_for_field_type = CWL_TYPE_TO_REPRESENTATIONS.get(field_type) | |
119 except TypeError: | |
120 raise Exception("Failed to convert field_type %s" % field_type) | |
121 if type_representation_names_for_field_type is None: | |
122 raise Exception("Failed to convert type %s" % field_type) | |
123 type_representation_names.update(type_representation_names_for_field_type) | |
124 type_representations = [] | |
125 for type_representation in TYPE_REPRESENTATIONS: | |
126 if type_representation.name in type_representation_names: | |
127 type_representations.append(type_representation) | |
128 return type_representations | |
129 | |
130 | |
131 def dataset_wrapper_to_file_json(inputs_dir, dataset_wrapper): | |
132 if dataset_wrapper.ext == "expression.json": | |
133 with open(dataset_wrapper.file_name, "r") as f: | |
134 return json.load(f) | |
135 | |
136 if dataset_wrapper.ext == "directory": | |
137 return dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper) | |
138 | |
139 extra_files_path = dataset_wrapper.extra_files_path | |
140 secondary_files_path = os.path.join(extra_files_path, "__secondary_files__") | |
141 path = str(dataset_wrapper) | |
142 raw_file_object = {"class": "File"} | |
143 | |
144 if os.path.exists(secondary_files_path): | |
145 safe_makedirs(inputs_dir) | |
146 name = os.path.basename(path) | |
147 new_input_path = os.path.join(inputs_dir, name) | |
148 os.symlink(path, new_input_path) | |
149 secondary_files = [] | |
150 for secondary_file_name in os.listdir(secondary_files_path): | |
151 secondary_file_path = os.path.join(secondary_files_path, secondary_file_name) | |
152 target = os.path.join(inputs_dir, secondary_file_name) | |
153 log.info("linking [%s] to [%s]" % (secondary_file_path, target)) | |
154 os.symlink(secondary_file_path, target) | |
155 is_dir = os.path.isdir(os.path.realpath(secondary_file_path)) | |
156 secondary_files.append({"class": "File" if not is_dir else "Directory", "location": target}) | |
157 | |
158 raw_file_object["secondaryFiles"] = secondary_files | |
159 path = new_input_path | |
160 | |
161 raw_file_object["location"] = path | |
162 | |
163 # Verify it isn't a NoneDataset | |
164 if dataset_wrapper.unsanitized: | |
165 raw_file_object["size"] = int(dataset_wrapper.get_size()) | |
166 | |
167 set_basename_and_derived_properties(raw_file_object, str(dataset_wrapper.created_from_basename or dataset_wrapper.name)) | |
168 return raw_file_object | |
169 | |
170 | |
171 def dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper): | |
172 assert dataset_wrapper.ext == "directory" | |
173 | |
174 # get directory name | |
175 archive_name = str(dataset_wrapper.created_from_basename or dataset_wrapper.name) | |
176 nameroot, nameext = os.path.splitext(archive_name) | |
177 directory_name = nameroot # assume archive file name contains the directory name | |
178 | |
179 # get archive location | |
180 try: | |
181 archive_location = dataset_wrapper.unsanitized.file_name | |
182 except Exception: | |
183 archive_location = None | |
184 | |
185 directory_json = {"location": dataset_wrapper.extra_files_path, | |
186 "class": "Directory", | |
187 "name": directory_name, | |
188 "archive_location": archive_location, | |
189 "archive_nameext": nameext, | |
190 "archive_nameroot": nameroot} | |
191 | |
192 return directory_json | |
193 | |
194 | |
195 def collection_wrapper_to_array(inputs_dir, wrapped_value): | |
196 rval = [] | |
197 for value in wrapped_value: | |
198 rval.append(dataset_wrapper_to_file_json(inputs_dir, value)) | |
199 return rval | |
200 | |
201 | |
202 def collection_wrapper_to_record(inputs_dir, wrapped_value): | |
203 rval = collections.OrderedDict() | |
204 for key, value in wrapped_value.items(): | |
205 rval[key] = dataset_wrapper_to_file_json(inputs_dir, value) | |
206 return rval | |
207 | |
208 | |
209 def to_cwl_job(tool, param_dict, local_working_directory): | |
210 """ tool is Galaxy's representation of the tool and param_dict is the | |
211 parameter dictionary with wrapped values. | |
212 """ | |
213 tool_proxy = tool._cwl_tool_proxy | |
214 input_fields = tool_proxy.input_fields() | |
215 inputs = tool.inputs | |
216 input_json = {} | |
217 | |
218 inputs_dir = os.path.join(local_working_directory, "_inputs") | |
219 | |
220 def simple_value(input, param_dict_value, type_representation_name=None): | |
221 type_representation = type_representation_from_name(type_representation_name) | |
222 # Hmm... cwl_type isn't really the cwl type in every case, | |
223 # like in the case of json for instance. | |
224 | |
225 if type_representation.galaxy_param_type == NO_GALAXY_INPUT: | |
226 assert param_dict_value is None | |
227 return None | |
228 | |
229 if type_representation.name == "file": | |
230 dataset_wrapper = param_dict_value | |
231 return dataset_wrapper_to_file_json(inputs_dir, dataset_wrapper) | |
232 elif type_representation.name == "directory": | |
233 dataset_wrapper = param_dict_value | |
234 return dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper) | |
235 elif type_representation.name == "integer": | |
236 return int(str(param_dict_value)) | |
237 elif type_representation.name == "long": | |
238 return int(str(param_dict_value)) | |
239 elif type_representation.name in ["float", "double"]: | |
240 return float(str(param_dict_value)) | |
241 elif type_representation.name == "boolean": | |
242 return string_as_bool(param_dict_value) | |
243 elif type_representation.name == "text": | |
244 return str(param_dict_value) | |
245 elif type_representation.name == "enum": | |
246 return str(param_dict_value) | |
247 elif type_representation.name == "json": | |
248 raw_value = param_dict_value.value | |
249 return json.loads(raw_value) | |
250 elif type_representation.name == "field": | |
251 if param_dict_value is None: | |
252 return None | |
253 if hasattr(param_dict_value, "value"): | |
254 # Is InputValueWrapper | |
255 rval = param_dict_value.value | |
256 if isinstance(rval, dict) and "src" in rval and rval["src"] == "json": | |
257 # needed for wf_step_connect_undeclared_param, so non-file defaults? | |
258 return rval["value"] | |
259 return rval | |
260 elif not param_dict_value.is_collection: | |
261 # Is DatasetFilenameWrapper | |
262 return dataset_wrapper_to_file_json(inputs_dir, param_dict_value) | |
263 else: | |
264 # Is DatasetCollectionWrapper | |
265 hdca_wrapper = param_dict_value | |
266 if hdca_wrapper.collection_type == "list": | |
267 # TODO: generalize to lists of lists and lists of non-files... | |
268 return collection_wrapper_to_array(inputs_dir, hdca_wrapper) | |
269 elif hdca_wrapper.collection_type.collection_type == "record": | |
270 return collection_wrapper_to_record(inputs_dir, hdca_wrapper) | |
271 | |
272 elif type_representation.name == "array": | |
273 # TODO: generalize to lists of lists and lists of non-files... | |
274 return collection_wrapper_to_array(inputs_dir, param_dict_value) | |
275 elif type_representation.name == "record": | |
276 return collection_wrapper_to_record(inputs_dir, param_dict_value) | |
277 else: | |
278 return str(param_dict_value) | |
279 | |
280 for input_name, input in inputs.items(): | |
281 if input.type == "repeat": | |
282 only_input = next(iter(input.inputs.values())) | |
283 array_value = [] | |
284 for instance in param_dict[input_name]: | |
285 array_value.append(simple_value(only_input, instance[input_name[:-len("_repeat")]])) | |
286 input_json[input_name[:-len("_repeat")]] = array_value | |
287 elif input.type == "conditional": | |
288 assert input_name in param_dict, "No value for %s in %s" % (input_name, param_dict) | |
289 current_case = param_dict[input_name]["_cwl__type_"] | |
290 if str(current_case) != "null": # str because it is a wrapped... | |
291 case_index = input.get_current_case(current_case) | |
292 case_input = input.cases[case_index].inputs["_cwl__value_"] | |
293 case_value = param_dict[input_name]["_cwl__value_"] | |
294 input_json[input_name] = simple_value(case_input, case_value, current_case) | |
295 else: | |
296 matched_field = None | |
297 for field in input_fields: | |
298 if field["name"] == input_name: | |
299 matched_field = field | |
300 field_type = field_to_field_type(matched_field) | |
301 if isinstance(field_type, list): | |
302 assert USE_FIELD_TYPES | |
303 type_descriptions = [FIELD_TYPE_REPRESENTATION] | |
304 else: | |
305 type_descriptions = type_descriptions_for_field_types([field_type]) | |
306 assert len(type_descriptions) == 1 | |
307 type_description_name = type_descriptions[0].name | |
308 input_json[input_name] = simple_value(input, param_dict[input_name], type_description_name) | |
309 | |
310 log.debug("Galaxy Tool State is CWL State is %s" % input_json) | |
311 return input_json | |
312 | |
313 | |
314 def to_galaxy_parameters(tool, as_dict): | |
315 """ Tool is Galaxy's representation of the tool and as_dict is a Galaxified | |
316 representation of the input json (no paths, HDA references for instance). | |
317 """ | |
318 inputs = tool.inputs | |
319 galaxy_request = {} | |
320 | |
321 def from_simple_value(input, param_dict_value, type_representation_name=None): | |
322 if type_representation_name == "json": | |
323 return json.dumps(param_dict_value) | |
324 else: | |
325 return param_dict_value | |
326 | |
327 for input_name, input in inputs.items(): | |
328 as_dict_value = as_dict.get(input_name, NOT_PRESENT) | |
329 galaxy_input_type = input.type | |
330 | |
331 if galaxy_input_type == "repeat": | |
332 if input_name not in as_dict: | |
333 continue | |
334 | |
335 only_input = next(iter(input.inputs.values())) | |
336 for index, value in enumerate(as_dict_value): | |
337 key = "%s_repeat_0|%s" % (input_name, only_input.name) | |
338 galaxy_value = from_simple_value(only_input, value) | |
339 galaxy_request[key] = galaxy_value | |
340 elif galaxy_input_type == "conditional": | |
341 case_strings = input.case_strings | |
342 # TODO: less crazy handling of defaults... | |
343 if (as_dict_value is NOT_PRESENT or as_dict_value is None) and "null" in case_strings: | |
344 type_representation_name = "null" | |
345 elif (as_dict_value is NOT_PRESENT or as_dict_value is None): | |
346 raise RequestParameterInvalidException( | |
347 "Cannot translate CWL datatype - value [%s] of type [%s] with case_strings [%s]. Non-null property must be set." % ( | |
348 as_dict_value, type(as_dict_value), case_strings | |
349 ) | |
350 ) | |
351 elif isinstance(as_dict_value, bool) and "boolean" in case_strings: | |
352 type_representation_name = "boolean" | |
353 elif isinstance(as_dict_value, int) and "integer" in case_strings: | |
354 type_representation_name = "integer" | |
355 elif isinstance(as_dict_value, int) and "long" in case_strings: | |
356 type_representation_name = "long" | |
357 elif isinstance(as_dict_value, (int, float)) and "float" in case_strings: | |
358 type_representation_name = "float" | |
359 elif isinstance(as_dict_value, (int, float)) and "double" in case_strings: | |
360 type_representation_name = "double" | |
361 elif isinstance(as_dict_value, string_types) and "string" in case_strings: | |
362 type_representation_name = "string" | |
363 elif isinstance(as_dict_value, dict) and "src" in as_dict_value and "id" in as_dict_value and "file" in case_strings: | |
364 type_representation_name = "file" | |
365 elif isinstance(as_dict_value, dict) and "src" in as_dict_value and "id" in as_dict_value and "directory" in case_strings: | |
366 # TODO: can't disambiuate with above if both are available... | |
367 type_representation_name = "directory" | |
368 elif "field" in case_strings: | |
369 type_representation_name = "field" | |
370 elif "json" in case_strings and as_dict_value is not None: | |
371 type_representation_name = "json" | |
372 else: | |
373 raise RequestParameterInvalidException( | |
374 "Cannot translate CWL datatype - value [%s] of type [%s] with case_strings [%s]." % ( | |
375 as_dict_value, type(as_dict_value), case_strings | |
376 ) | |
377 ) | |
378 galaxy_request["%s|_cwl__type_" % input_name] = type_representation_name | |
379 if type_representation_name != "null": | |
380 current_case_index = input.get_current_case(type_representation_name) | |
381 current_case_inputs = input.cases[current_case_index].inputs | |
382 current_case_input = current_case_inputs["_cwl__value_"] | |
383 galaxy_value = from_simple_value(current_case_input, as_dict_value, type_representation_name) | |
384 galaxy_request["%s|_cwl__value_" % input_name] = galaxy_value | |
385 elif as_dict_value is NOT_PRESENT: | |
386 continue | |
387 else: | |
388 galaxy_value = from_simple_value(input, as_dict_value) | |
389 galaxy_request[input_name] = galaxy_value | |
390 | |
391 log.info("Converted galaxy_request is %s" % galaxy_request) | |
392 return galaxy_request | |
393 | |
394 | |
395 def field_to_field_type(field): | |
396 field_type = field["type"] | |
397 if isinstance(field_type, dict): | |
398 field_type = field_type["type"] | |
399 if isinstance(field_type, list): | |
400 field_type_length = len(field_type) | |
401 if field_type_length == 0: | |
402 raise Exception("Zero-length type list encountered, invalid CWL?") | |
403 elif len(field_type) == 1: | |
404 field_type = field_type[0] | |
405 | |
406 return field_type |