Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/galaxy/tool_util/cwl/representation.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler | 
|---|---|
| date | Fri, 31 Jul 2020 00:18:57 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:d30785e31577 | 
|---|---|
| 1 """ This module is responsible for converting between Galaxy's tool | |
| 2 input description and the CWL description for a job json. """ | |
| 3 | |
| 4 import collections | |
| 5 import json | |
| 6 import logging | |
| 7 import os | |
| 8 | |
| 9 from six import string_types | |
| 10 | |
| 11 from galaxy.exceptions import RequestParameterInvalidException | |
| 12 from galaxy.util import safe_makedirs, string_as_bool | |
| 13 from galaxy.util.bunch import Bunch | |
| 14 from .util import set_basename_and_derived_properties | |
| 15 | |
| 16 | |
| 17 log = logging.getLogger(__name__) | |
| 18 | |
| 19 NOT_PRESENT = object() | |
| 20 | |
| 21 NO_GALAXY_INPUT = object() | |
| 22 | |
| 23 INPUT_TYPE = Bunch( | |
| 24 DATA="data", | |
| 25 INTEGER="integer", | |
| 26 FLOAT="float", | |
| 27 TEXT="text", | |
| 28 BOOLEAN="boolean", | |
| 29 SELECT="select", | |
| 30 FIELD="field", | |
| 31 CONDITIONAL="conditional", | |
| 32 DATA_COLLECTON="data_collection", | |
| 33 ) | |
| 34 | |
| 35 # There are two approaches to mapping CWL tool state to Galaxy tool state | |
| 36 # one is to map CWL types to compound Galaxy tool parameters combinations | |
| 37 # with conditionals and the other is to use a new Galaxy parameter type that | |
| 38 # allows unions, optional specifications, etc.... The problem with the former | |
| 39 # is that it doesn't work with the workflow parameters for instance and is | |
| 40 # very complex on the backend. The problem with the latter is that the GUI | |
| 41 # for this parameter type is undefined curently. | |
| 42 USE_FIELD_TYPES = True | |
| 43 | |
| 44 # There are two approaches to mapping CWL workflow inputs to Galaxy workflow | |
| 45 # steps. The first is to simply map everything to expressions and stick them into | |
| 46 # files and use data inputs - the second is to use parameter_input steps with | |
| 47 # fields types. We are dispatching on USE_FIELD_TYPES for now - to choose but | |
| 48 # may diverge later? | |
| 49 # There are open issues with each approach: | |
| 50 # - Mapping everything to files makes the GUI harder to imagine but the backend | |
| 51 # easier to manage in someways. | |
| 52 USE_STEP_PARAMETERS = USE_FIELD_TYPES | |
| 53 | |
| 54 TypeRepresentation = collections.namedtuple("TypeRepresentation", ["name", "galaxy_param_type", "label", "collection_type"]) | |
| 55 TYPE_REPRESENTATIONS = [ | |
| 56 TypeRepresentation("null", NO_GALAXY_INPUT, "no input", None), | |
| 57 TypeRepresentation("integer", INPUT_TYPE.INTEGER, "an integer", None), | |
| 58 TypeRepresentation("float", INPUT_TYPE.FLOAT, "a decimal number", None), | |
| 59 TypeRepresentation("double", INPUT_TYPE.FLOAT, "a decimal number", None), | |
| 60 TypeRepresentation("file", INPUT_TYPE.DATA, "a dataset", None), | |
| 61 TypeRepresentation("directory", INPUT_TYPE.DATA, "a directory", None), | |
| 62 TypeRepresentation("boolean", INPUT_TYPE.BOOLEAN, "a boolean", None), | |
| 63 TypeRepresentation("text", INPUT_TYPE.TEXT, "a simple text field", None), | |
| 64 TypeRepresentation("record", INPUT_TYPE.DATA_COLLECTON, "record as a dataset collection", "record"), | |
| 65 TypeRepresentation("json", INPUT_TYPE.TEXT, "arbitrary JSON structure", None), | |
| 66 TypeRepresentation("array", INPUT_TYPE.DATA_COLLECTON, "as a dataset list", "list"), | |
| 67 TypeRepresentation("enum", INPUT_TYPE.TEXT, "enum value", None), # TODO: make this a select... | |
| 68 TypeRepresentation("field", INPUT_TYPE.FIELD, "arbitrary JSON structure", None), | |
| 69 ] | |
| 70 FIELD_TYPE_REPRESENTATION = TYPE_REPRESENTATIONS[-1] | |
| 71 TypeRepresentation.uses_param = lambda self: self.galaxy_param_type is not NO_GALAXY_INPUT | |
| 72 | |
| 73 if not USE_FIELD_TYPES: | |
| 74 CWL_TYPE_TO_REPRESENTATIONS = { | |
| 75 "Any": ["integer", "float", "file", "boolean", "text", "record", "json"], | |
| 76 "array": ["array"], | |
| 77 "string": ["text"], | |
| 78 "boolean": ["boolean"], | |
| 79 "int": ["integer"], | |
| 80 "float": ["float"], | |
| 81 "File": ["file"], | |
| 82 "Directory": ["directory"], | |
| 83 "null": ["null"], | |
| 84 "record": ["record"], | |
| 85 } | |
| 86 else: | |
| 87 CWL_TYPE_TO_REPRESENTATIONS = { | |
| 88 "Any": ["field"], | |
| 89 "array": ["array"], | |
| 90 "string": ["text"], | |
| 91 "boolean": ["boolean"], | |
| 92 "int": ["integer"], | |
| 93 "float": ["float"], | |
| 94 "File": ["file"], | |
| 95 "Directory": ["directory"], | |
| 96 "null": ["null"], | |
| 97 "record": ["record"], | |
| 98 "enum": ["enum"], | |
| 99 "double": ["double"], | |
| 100 } | |
| 101 | |
| 102 | |
| 103 def type_representation_from_name(type_representation_name): | |
| 104 for type_representation in TYPE_REPRESENTATIONS: | |
| 105 if type_representation.name == type_representation_name: | |
| 106 return type_representation | |
| 107 | |
| 108 assert False | |
| 109 | |
| 110 | |
| 111 def type_descriptions_for_field_types(field_types): | |
| 112 type_representation_names = set() | |
| 113 for field_type in field_types: | |
| 114 if isinstance(field_type, dict) and field_type.get("type"): | |
| 115 field_type = field_type.get("type") | |
| 116 | |
| 117 try: | |
| 118 type_representation_names_for_field_type = CWL_TYPE_TO_REPRESENTATIONS.get(field_type) | |
| 119 except TypeError: | |
| 120 raise Exception("Failed to convert field_type %s" % field_type) | |
| 121 if type_representation_names_for_field_type is None: | |
| 122 raise Exception("Failed to convert type %s" % field_type) | |
| 123 type_representation_names.update(type_representation_names_for_field_type) | |
| 124 type_representations = [] | |
| 125 for type_representation in TYPE_REPRESENTATIONS: | |
| 126 if type_representation.name in type_representation_names: | |
| 127 type_representations.append(type_representation) | |
| 128 return type_representations | |
| 129 | |
| 130 | |
| 131 def dataset_wrapper_to_file_json(inputs_dir, dataset_wrapper): | |
| 132 if dataset_wrapper.ext == "expression.json": | |
| 133 with open(dataset_wrapper.file_name, "r") as f: | |
| 134 return json.load(f) | |
| 135 | |
| 136 if dataset_wrapper.ext == "directory": | |
| 137 return dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper) | |
| 138 | |
| 139 extra_files_path = dataset_wrapper.extra_files_path | |
| 140 secondary_files_path = os.path.join(extra_files_path, "__secondary_files__") | |
| 141 path = str(dataset_wrapper) | |
| 142 raw_file_object = {"class": "File"} | |
| 143 | |
| 144 if os.path.exists(secondary_files_path): | |
| 145 safe_makedirs(inputs_dir) | |
| 146 name = os.path.basename(path) | |
| 147 new_input_path = os.path.join(inputs_dir, name) | |
| 148 os.symlink(path, new_input_path) | |
| 149 secondary_files = [] | |
| 150 for secondary_file_name in os.listdir(secondary_files_path): | |
| 151 secondary_file_path = os.path.join(secondary_files_path, secondary_file_name) | |
| 152 target = os.path.join(inputs_dir, secondary_file_name) | |
| 153 log.info("linking [%s] to [%s]" % (secondary_file_path, target)) | |
| 154 os.symlink(secondary_file_path, target) | |
| 155 is_dir = os.path.isdir(os.path.realpath(secondary_file_path)) | |
| 156 secondary_files.append({"class": "File" if not is_dir else "Directory", "location": target}) | |
| 157 | |
| 158 raw_file_object["secondaryFiles"] = secondary_files | |
| 159 path = new_input_path | |
| 160 | |
| 161 raw_file_object["location"] = path | |
| 162 | |
| 163 # Verify it isn't a NoneDataset | |
| 164 if dataset_wrapper.unsanitized: | |
| 165 raw_file_object["size"] = int(dataset_wrapper.get_size()) | |
| 166 | |
| 167 set_basename_and_derived_properties(raw_file_object, str(dataset_wrapper.created_from_basename or dataset_wrapper.name)) | |
| 168 return raw_file_object | |
| 169 | |
| 170 | |
| 171 def dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper): | |
| 172 assert dataset_wrapper.ext == "directory" | |
| 173 | |
| 174 # get directory name | |
| 175 archive_name = str(dataset_wrapper.created_from_basename or dataset_wrapper.name) | |
| 176 nameroot, nameext = os.path.splitext(archive_name) | |
| 177 directory_name = nameroot # assume archive file name contains the directory name | |
| 178 | |
| 179 # get archive location | |
| 180 try: | |
| 181 archive_location = dataset_wrapper.unsanitized.file_name | |
| 182 except Exception: | |
| 183 archive_location = None | |
| 184 | |
| 185 directory_json = {"location": dataset_wrapper.extra_files_path, | |
| 186 "class": "Directory", | |
| 187 "name": directory_name, | |
| 188 "archive_location": archive_location, | |
| 189 "archive_nameext": nameext, | |
| 190 "archive_nameroot": nameroot} | |
| 191 | |
| 192 return directory_json | |
| 193 | |
| 194 | |
| 195 def collection_wrapper_to_array(inputs_dir, wrapped_value): | |
| 196 rval = [] | |
| 197 for value in wrapped_value: | |
| 198 rval.append(dataset_wrapper_to_file_json(inputs_dir, value)) | |
| 199 return rval | |
| 200 | |
| 201 | |
| 202 def collection_wrapper_to_record(inputs_dir, wrapped_value): | |
| 203 rval = collections.OrderedDict() | |
| 204 for key, value in wrapped_value.items(): | |
| 205 rval[key] = dataset_wrapper_to_file_json(inputs_dir, value) | |
| 206 return rval | |
| 207 | |
| 208 | |
| 209 def to_cwl_job(tool, param_dict, local_working_directory): | |
| 210 """ tool is Galaxy's representation of the tool and param_dict is the | |
| 211 parameter dictionary with wrapped values. | |
| 212 """ | |
| 213 tool_proxy = tool._cwl_tool_proxy | |
| 214 input_fields = tool_proxy.input_fields() | |
| 215 inputs = tool.inputs | |
| 216 input_json = {} | |
| 217 | |
| 218 inputs_dir = os.path.join(local_working_directory, "_inputs") | |
| 219 | |
| 220 def simple_value(input, param_dict_value, type_representation_name=None): | |
| 221 type_representation = type_representation_from_name(type_representation_name) | |
| 222 # Hmm... cwl_type isn't really the cwl type in every case, | |
| 223 # like in the case of json for instance. | |
| 224 | |
| 225 if type_representation.galaxy_param_type == NO_GALAXY_INPUT: | |
| 226 assert param_dict_value is None | |
| 227 return None | |
| 228 | |
| 229 if type_representation.name == "file": | |
| 230 dataset_wrapper = param_dict_value | |
| 231 return dataset_wrapper_to_file_json(inputs_dir, dataset_wrapper) | |
| 232 elif type_representation.name == "directory": | |
| 233 dataset_wrapper = param_dict_value | |
| 234 return dataset_wrapper_to_directory_json(inputs_dir, dataset_wrapper) | |
| 235 elif type_representation.name == "integer": | |
| 236 return int(str(param_dict_value)) | |
| 237 elif type_representation.name == "long": | |
| 238 return int(str(param_dict_value)) | |
| 239 elif type_representation.name in ["float", "double"]: | |
| 240 return float(str(param_dict_value)) | |
| 241 elif type_representation.name == "boolean": | |
| 242 return string_as_bool(param_dict_value) | |
| 243 elif type_representation.name == "text": | |
| 244 return str(param_dict_value) | |
| 245 elif type_representation.name == "enum": | |
| 246 return str(param_dict_value) | |
| 247 elif type_representation.name == "json": | |
| 248 raw_value = param_dict_value.value | |
| 249 return json.loads(raw_value) | |
| 250 elif type_representation.name == "field": | |
| 251 if param_dict_value is None: | |
| 252 return None | |
| 253 if hasattr(param_dict_value, "value"): | |
| 254 # Is InputValueWrapper | |
| 255 rval = param_dict_value.value | |
| 256 if isinstance(rval, dict) and "src" in rval and rval["src"] == "json": | |
| 257 # needed for wf_step_connect_undeclared_param, so non-file defaults? | |
| 258 return rval["value"] | |
| 259 return rval | |
| 260 elif not param_dict_value.is_collection: | |
| 261 # Is DatasetFilenameWrapper | |
| 262 return dataset_wrapper_to_file_json(inputs_dir, param_dict_value) | |
| 263 else: | |
| 264 # Is DatasetCollectionWrapper | |
| 265 hdca_wrapper = param_dict_value | |
| 266 if hdca_wrapper.collection_type == "list": | |
| 267 # TODO: generalize to lists of lists and lists of non-files... | |
| 268 return collection_wrapper_to_array(inputs_dir, hdca_wrapper) | |
| 269 elif hdca_wrapper.collection_type.collection_type == "record": | |
| 270 return collection_wrapper_to_record(inputs_dir, hdca_wrapper) | |
| 271 | |
| 272 elif type_representation.name == "array": | |
| 273 # TODO: generalize to lists of lists and lists of non-files... | |
| 274 return collection_wrapper_to_array(inputs_dir, param_dict_value) | |
| 275 elif type_representation.name == "record": | |
| 276 return collection_wrapper_to_record(inputs_dir, param_dict_value) | |
| 277 else: | |
| 278 return str(param_dict_value) | |
| 279 | |
| 280 for input_name, input in inputs.items(): | |
| 281 if input.type == "repeat": | |
| 282 only_input = next(iter(input.inputs.values())) | |
| 283 array_value = [] | |
| 284 for instance in param_dict[input_name]: | |
| 285 array_value.append(simple_value(only_input, instance[input_name[:-len("_repeat")]])) | |
| 286 input_json[input_name[:-len("_repeat")]] = array_value | |
| 287 elif input.type == "conditional": | |
| 288 assert input_name in param_dict, "No value for %s in %s" % (input_name, param_dict) | |
| 289 current_case = param_dict[input_name]["_cwl__type_"] | |
| 290 if str(current_case) != "null": # str because it is a wrapped... | |
| 291 case_index = input.get_current_case(current_case) | |
| 292 case_input = input.cases[case_index].inputs["_cwl__value_"] | |
| 293 case_value = param_dict[input_name]["_cwl__value_"] | |
| 294 input_json[input_name] = simple_value(case_input, case_value, current_case) | |
| 295 else: | |
| 296 matched_field = None | |
| 297 for field in input_fields: | |
| 298 if field["name"] == input_name: | |
| 299 matched_field = field | |
| 300 field_type = field_to_field_type(matched_field) | |
| 301 if isinstance(field_type, list): | |
| 302 assert USE_FIELD_TYPES | |
| 303 type_descriptions = [FIELD_TYPE_REPRESENTATION] | |
| 304 else: | |
| 305 type_descriptions = type_descriptions_for_field_types([field_type]) | |
| 306 assert len(type_descriptions) == 1 | |
| 307 type_description_name = type_descriptions[0].name | |
| 308 input_json[input_name] = simple_value(input, param_dict[input_name], type_description_name) | |
| 309 | |
| 310 log.debug("Galaxy Tool State is CWL State is %s" % input_json) | |
| 311 return input_json | |
| 312 | |
| 313 | |
| 314 def to_galaxy_parameters(tool, as_dict): | |
| 315 """ Tool is Galaxy's representation of the tool and as_dict is a Galaxified | |
| 316 representation of the input json (no paths, HDA references for instance). | |
| 317 """ | |
| 318 inputs = tool.inputs | |
| 319 galaxy_request = {} | |
| 320 | |
| 321 def from_simple_value(input, param_dict_value, type_representation_name=None): | |
| 322 if type_representation_name == "json": | |
| 323 return json.dumps(param_dict_value) | |
| 324 else: | |
| 325 return param_dict_value | |
| 326 | |
| 327 for input_name, input in inputs.items(): | |
| 328 as_dict_value = as_dict.get(input_name, NOT_PRESENT) | |
| 329 galaxy_input_type = input.type | |
| 330 | |
| 331 if galaxy_input_type == "repeat": | |
| 332 if input_name not in as_dict: | |
| 333 continue | |
| 334 | |
| 335 only_input = next(iter(input.inputs.values())) | |
| 336 for index, value in enumerate(as_dict_value): | |
| 337 key = "%s_repeat_0|%s" % (input_name, only_input.name) | |
| 338 galaxy_value = from_simple_value(only_input, value) | |
| 339 galaxy_request[key] = galaxy_value | |
| 340 elif galaxy_input_type == "conditional": | |
| 341 case_strings = input.case_strings | |
| 342 # TODO: less crazy handling of defaults... | |
| 343 if (as_dict_value is NOT_PRESENT or as_dict_value is None) and "null" in case_strings: | |
| 344 type_representation_name = "null" | |
| 345 elif (as_dict_value is NOT_PRESENT or as_dict_value is None): | |
| 346 raise RequestParameterInvalidException( | |
| 347 "Cannot translate CWL datatype - value [%s] of type [%s] with case_strings [%s]. Non-null property must be set." % ( | |
| 348 as_dict_value, type(as_dict_value), case_strings | |
| 349 ) | |
| 350 ) | |
| 351 elif isinstance(as_dict_value, bool) and "boolean" in case_strings: | |
| 352 type_representation_name = "boolean" | |
| 353 elif isinstance(as_dict_value, int) and "integer" in case_strings: | |
| 354 type_representation_name = "integer" | |
| 355 elif isinstance(as_dict_value, int) and "long" in case_strings: | |
| 356 type_representation_name = "long" | |
| 357 elif isinstance(as_dict_value, (int, float)) and "float" in case_strings: | |
| 358 type_representation_name = "float" | |
| 359 elif isinstance(as_dict_value, (int, float)) and "double" in case_strings: | |
| 360 type_representation_name = "double" | |
| 361 elif isinstance(as_dict_value, string_types) and "string" in case_strings: | |
| 362 type_representation_name = "string" | |
| 363 elif isinstance(as_dict_value, dict) and "src" in as_dict_value and "id" in as_dict_value and "file" in case_strings: | |
| 364 type_representation_name = "file" | |
| 365 elif isinstance(as_dict_value, dict) and "src" in as_dict_value and "id" in as_dict_value and "directory" in case_strings: | |
| 366 # TODO: can't disambiuate with above if both are available... | |
| 367 type_representation_name = "directory" | |
| 368 elif "field" in case_strings: | |
| 369 type_representation_name = "field" | |
| 370 elif "json" in case_strings and as_dict_value is not None: | |
| 371 type_representation_name = "json" | |
| 372 else: | |
| 373 raise RequestParameterInvalidException( | |
| 374 "Cannot translate CWL datatype - value [%s] of type [%s] with case_strings [%s]." % ( | |
| 375 as_dict_value, type(as_dict_value), case_strings | |
| 376 ) | |
| 377 ) | |
| 378 galaxy_request["%s|_cwl__type_" % input_name] = type_representation_name | |
| 379 if type_representation_name != "null": | |
| 380 current_case_index = input.get_current_case(type_representation_name) | |
| 381 current_case_inputs = input.cases[current_case_index].inputs | |
| 382 current_case_input = current_case_inputs["_cwl__value_"] | |
| 383 galaxy_value = from_simple_value(current_case_input, as_dict_value, type_representation_name) | |
| 384 galaxy_request["%s|_cwl__value_" % input_name] = galaxy_value | |
| 385 elif as_dict_value is NOT_PRESENT: | |
| 386 continue | |
| 387 else: | |
| 388 galaxy_value = from_simple_value(input, as_dict_value) | |
| 389 galaxy_request[input_name] = galaxy_value | |
| 390 | |
| 391 log.info("Converted galaxy_request is %s" % galaxy_request) | |
| 392 return galaxy_request | |
| 393 | |
| 394 | |
| 395 def field_to_field_type(field): | |
| 396 field_type = field["type"] | |
| 397 if isinstance(field_type, dict): | |
| 398 field_type = field_type["type"] | |
| 399 if isinstance(field_type, list): | |
| 400 field_type_length = len(field_type) | |
| 401 if field_type_length == 0: | |
| 402 raise Exception("Zero-length type list encountered, invalid CWL?") | |
| 403 elif len(field_type) == 1: | |
| 404 field_type = field_type[0] | |
| 405 | |
| 406 return field_type | 
