comparison keras_deep_learning.py @ 0:af2624d5ab32 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
author bgruening
date Sat, 01 May 2021 01:24:32 +0000
parents
children 9349ed2749c6
comparison
equal deleted inserted replaced
-1:000000000000 0:af2624d5ab32
1 import argparse
2 import json
3 import pickle
4 import warnings
5 from ast import literal_eval
6
7 import keras
8 import pandas as pd
9 import six
10 from galaxy_ml.utils import get_search_params, SafeEval, try_get_attr
11 from keras.models import Model, Sequential
12
13 safe_eval = SafeEval()
14
15
16 def _handle_shape(literal):
17 """
18 Eval integer or list/tuple of integers from string
19
20 Parameters:
21 -----------
22 literal : str.
23 """
24 literal = literal.strip()
25 if not literal:
26 return None
27 try:
28 return literal_eval(literal)
29 except NameError as e:
30 print(e)
31 return literal
32
33
34 def _handle_regularizer(literal):
35 """
36 Construct regularizer from string literal
37
38 Parameters
39 ----------
40 literal : str. E.g. '(0.1, 0)'
41 """
42 literal = literal.strip()
43 if not literal:
44 return None
45
46 l1, l2 = literal_eval(literal)
47
48 if not l1 and not l2:
49 return None
50
51 if l1 is None:
52 l1 = 0.0
53 if l2 is None:
54 l2 = 0.0
55
56 return keras.regularizers.l1_l2(l1=l1, l2=l2)
57
58
59 def _handle_constraint(config):
60 """
61 Construct constraint from galaxy tool parameters.
62 Suppose correct dictionary format
63
64 Parameters
65 ----------
66 config : dict. E.g.
67 "bias_constraint":
68 {"constraint_options":
69 {"max_value":1.0,
70 "min_value":0.0,
71 "axis":"[0, 1, 2]"
72 },
73 "constraint_type":
74 "MinMaxNorm"
75 }
76 """
77 constraint_type = config["constraint_type"]
78 if constraint_type in ("None", ""):
79 return None
80
81 klass = getattr(keras.constraints, constraint_type)
82 options = config.get("constraint_options", {})
83 if "axis" in options:
84 options["axis"] = literal_eval(options["axis"])
85
86 return klass(**options)
87
88
89 def _handle_lambda(literal):
90 return None
91
92
93 def _handle_layer_parameters(params):
94 """
95 Access to handle all kinds of parameters
96 """
97 for key, value in six.iteritems(params):
98 if value in ("None", ""):
99 params[key] = None
100 continue
101
102 if type(value) in [int, float, bool] or (
103 type(value) is str and value.isalpha()
104 ):
105 continue
106
107 if (
108 key
109 in [
110 "input_shape",
111 "noise_shape",
112 "shape",
113 "batch_shape",
114 "target_shape",
115 "dims",
116 "kernel_size",
117 "strides",
118 "dilation_rate",
119 "output_padding",
120 "cropping",
121 "size",
122 "padding",
123 "pool_size",
124 "axis",
125 "shared_axes",
126 ]
127 and isinstance(value, str)
128 ):
129 params[key] = _handle_shape(value)
130
131 elif key.endswith("_regularizer") and isinstance(value, dict):
132 params[key] = _handle_regularizer(value)
133
134 elif key.endswith("_constraint") and isinstance(value, dict):
135 params[key] = _handle_constraint(value)
136
137 elif key == "function": # No support for lambda/function eval
138 params.pop(key)
139
140 return params
141
142
143 def get_sequential_model(config):
144 """
145 Construct keras Sequential model from Galaxy tool parameters
146
147 Parameters:
148 -----------
149 config : dictionary, galaxy tool parameters loaded by JSON
150 """
151 model = Sequential()
152 input_shape = _handle_shape(config["input_shape"])
153 layers = config["layers"]
154 for layer in layers:
155 options = layer["layer_selection"]
156 layer_type = options.pop("layer_type")
157 klass = getattr(keras.layers, layer_type)
158 kwargs = options.pop("kwargs", "")
159
160 # parameters needs special care
161 options = _handle_layer_parameters(options)
162
163 if kwargs:
164 kwargs = safe_eval("dict(" + kwargs + ")")
165 options.update(kwargs)
166
167 # add input_shape to the first layer only
168 if not getattr(model, "_layers") and input_shape is not None:
169 options["input_shape"] = input_shape
170
171 model.add(klass(**options))
172
173 return model
174
175
176 def get_functional_model(config):
177 """
178 Construct keras functional model from Galaxy tool parameters
179
180 Parameters
181 -----------
182 config : dictionary, galaxy tool parameters loaded by JSON
183 """
184 layers = config["layers"]
185 all_layers = []
186 for layer in layers:
187 options = layer["layer_selection"]
188 layer_type = options.pop("layer_type")
189 klass = getattr(keras.layers, layer_type)
190 inbound_nodes = options.pop("inbound_nodes", None)
191 kwargs = options.pop("kwargs", "")
192
193 # parameters needs special care
194 options = _handle_layer_parameters(options)
195
196 if kwargs:
197 kwargs = safe_eval("dict(" + kwargs + ")")
198 options.update(kwargs)
199
200 # merge layers
201 if "merging_layers" in options:
202 idxs = literal_eval(options.pop("merging_layers"))
203 merging_layers = [all_layers[i - 1] for i in idxs]
204 new_layer = klass(**options)(merging_layers)
205 # non-input layers
206 elif inbound_nodes is not None:
207 new_layer = klass(**options)(all_layers[inbound_nodes - 1])
208 # input layers
209 else:
210 new_layer = klass(**options)
211
212 all_layers.append(new_layer)
213
214 input_indexes = _handle_shape(config["input_layers"])
215 input_layers = [all_layers[i - 1] for i in input_indexes]
216
217 output_indexes = _handle_shape(config["output_layers"])
218 output_layers = [all_layers[i - 1] for i in output_indexes]
219
220 return Model(inputs=input_layers, outputs=output_layers)
221
222
223 def get_batch_generator(config):
224 """
225 Construct keras online data generator from Galaxy tool parameters
226
227 Parameters
228 -----------
229 config : dictionary, galaxy tool parameters loaded by JSON
230 """
231 generator_type = config.pop("generator_type")
232 if generator_type == "none":
233 return None
234
235 klass = try_get_attr("galaxy_ml.preprocessors", generator_type)
236
237 if generator_type == "GenomicIntervalBatchGenerator":
238 config["ref_genome_path"] = "to_be_determined"
239 config["intervals_path"] = "to_be_determined"
240 config["target_path"] = "to_be_determined"
241 config["features"] = "to_be_determined"
242 else:
243 config["fasta_path"] = "to_be_determined"
244
245 return klass(**config)
246
247
248 def config_keras_model(inputs, outfile):
249 """
250 config keras model layers and output JSON
251
252 Parameters
253 ----------
254 inputs : dict
255 loaded galaxy tool parameters from `keras_model_config`
256 tool.
257 outfile : str
258 Path to galaxy dataset containing keras model JSON.
259 """
260 model_type = inputs["model_selection"]["model_type"]
261 layers_config = inputs["model_selection"]
262
263 if model_type == "sequential":
264 model = get_sequential_model(layers_config)
265 else:
266 model = get_functional_model(layers_config)
267
268 json_string = model.to_json()
269
270 with open(outfile, "w") as f:
271 json.dump(json.loads(json_string), f, indent=2)
272
273
274 def build_keras_model(
275 inputs,
276 outfile,
277 model_json,
278 infile_weights=None,
279 batch_mode=False,
280 outfile_params=None,
281 ):
282 """
283 for `keras_model_builder` tool
284
285 Parameters
286 ----------
287 inputs : dict
288 loaded galaxy tool parameters from `keras_model_builder` tool.
289 outfile : str
290 Path to galaxy dataset containing the keras_galaxy model output.
291 model_json : str
292 Path to dataset containing keras model JSON.
293 infile_weights : str or None
294 If string, path to dataset containing model weights.
295 batch_mode : bool, default=False
296 Whether to build online batch classifier.
297 outfile_params : str, default=None
298 File path to search parameters output.
299 """
300 with open(model_json, "r") as f:
301 json_model = json.load(f)
302
303 config = json_model["config"]
304
305 options = {}
306
307 if json_model["class_name"] == "Sequential":
308 options["model_type"] = "sequential"
309 klass = Sequential
310 elif json_model["class_name"] == "Model":
311 options["model_type"] = "functional"
312 klass = Model
313 else:
314 raise ValueError("Unknow Keras model class: %s" % json_model["class_name"])
315
316 # load prefitted model
317 if inputs["mode_selection"]["mode_type"] == "prefitted":
318 estimator = klass.from_config(config)
319 estimator.load_weights(infile_weights)
320 # build train model
321 else:
322 cls_name = inputs["mode_selection"]["learning_type"]
323 klass = try_get_attr("galaxy_ml.keras_galaxy_models", cls_name)
324
325 options["loss"] = inputs["mode_selection"]["compile_params"]["loss"]
326 options["optimizer"] = (
327 inputs["mode_selection"]["compile_params"]["optimizer_selection"][
328 "optimizer_type"
329 ]
330 ).lower()
331
332 options.update(
333 (
334 inputs["mode_selection"]["compile_params"]["optimizer_selection"][
335 "optimizer_options"
336 ]
337 )
338 )
339
340 train_metrics = inputs["mode_selection"]["compile_params"]["metrics"]
341 if train_metrics[-1] == "none":
342 train_metrics = train_metrics[:-1]
343 options["metrics"] = train_metrics
344
345 options.update(inputs["mode_selection"]["fit_params"])
346 options["seed"] = inputs["mode_selection"]["random_seed"]
347
348 if batch_mode:
349 generator = get_batch_generator(
350 inputs["mode_selection"]["generator_selection"]
351 )
352 options["data_batch_generator"] = generator
353 options["prediction_steps"] = inputs["mode_selection"]["prediction_steps"]
354 options["class_positive_factor"] = inputs["mode_selection"][
355 "class_positive_factor"
356 ]
357 estimator = klass(config, **options)
358 if outfile_params:
359 hyper_params = get_search_params(estimator)
360 # TODO: remove this after making `verbose` tunable
361 for h_param in hyper_params:
362 if h_param[1].endswith("verbose"):
363 h_param[0] = "@"
364 df = pd.DataFrame(hyper_params, columns=["", "Parameter", "Value"])
365 df.to_csv(outfile_params, sep="\t", index=False)
366
367 print(repr(estimator))
368 # save model by pickle
369 with open(outfile, "wb") as f:
370 pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL)
371
372
373 if __name__ == "__main__":
374 warnings.simplefilter("ignore")
375
376 aparser = argparse.ArgumentParser()
377 aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
378 aparser.add_argument("-m", "--model_json", dest="model_json")
379 aparser.add_argument("-t", "--tool_id", dest="tool_id")
380 aparser.add_argument("-w", "--infile_weights", dest="infile_weights")
381 aparser.add_argument("-o", "--outfile", dest="outfile")
382 aparser.add_argument("-p", "--outfile_params", dest="outfile_params")
383 args = aparser.parse_args()
384
385 input_json_path = args.inputs
386 with open(input_json_path, "r") as param_handler:
387 inputs = json.load(param_handler)
388
389 tool_id = args.tool_id
390 outfile = args.outfile
391 outfile_params = args.outfile_params
392 model_json = args.model_json
393 infile_weights = args.infile_weights
394
395 # for keras_model_config tool
396 if tool_id == "keras_model_config":
397 config_keras_model(inputs, outfile)
398
399 # for keras_model_builder tool
400 else:
401 batch_mode = False
402 if tool_id == "keras_batch_models":
403 batch_mode = True
404
405 build_keras_model(
406 inputs=inputs,
407 model_json=model_json,
408 infile_weights=infile_weights,
409 batch_mode=batch_mode,
410 outfile=outfile,
411 outfile_params=outfile_params,
412 )