Mercurial > repos > bgruening > sklearn_sample_generator
comparison keras_deep_learning.py @ 37:0f460421b212 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
author | bgruening |
---|---|
date | Sat, 01 May 2021 01:34:53 +0000 |
parents | 999e07f0a9fa |
children | 7f8fa89929e0 |
comparison
equal
deleted
inserted
replaced
36:999e07f0a9fa | 37:0f460421b212 |
---|---|
8 import pandas as pd | 8 import pandas as pd |
9 import six | 9 import six |
10 from galaxy_ml.utils import get_search_params, SafeEval, try_get_attr | 10 from galaxy_ml.utils import get_search_params, SafeEval, try_get_attr |
11 from keras.models import Model, Sequential | 11 from keras.models import Model, Sequential |
12 | 12 |
13 | |
14 safe_eval = SafeEval() | 13 safe_eval = SafeEval() |
15 | 14 |
16 | 15 |
17 def _handle_shape(literal): | 16 def _handle_shape(literal): |
18 """Eval integer or list/tuple of integers from string | 17 """ |
18 Eval integer or list/tuple of integers from string | |
19 | 19 |
20 Parameters: | 20 Parameters: |
21 ----------- | 21 ----------- |
22 literal : str. | 22 literal : str. |
23 """ | 23 """ |
30 print(e) | 30 print(e) |
31 return literal | 31 return literal |
32 | 32 |
33 | 33 |
34 def _handle_regularizer(literal): | 34 def _handle_regularizer(literal): |
35 """Construct regularizer from string literal | 35 """ |
36 Construct regularizer from string literal | |
36 | 37 |
37 Parameters | 38 Parameters |
38 ---------- | 39 ---------- |
39 literal : str. E.g. '(0.1, 0)' | 40 literal : str. E.g. '(0.1, 0)' |
40 """ | 41 """ |
46 | 47 |
47 if not l1 and not l2: | 48 if not l1 and not l2: |
48 return None | 49 return None |
49 | 50 |
50 if l1 is None: | 51 if l1 is None: |
51 l1 = 0. | 52 l1 = 0.0 |
52 if l2 is None: | 53 if l2 is None: |
53 l2 = 0. | 54 l2 = 0.0 |
54 | 55 |
55 return keras.regularizers.l1_l2(l1=l1, l2=l2) | 56 return keras.regularizers.l1_l2(l1=l1, l2=l2) |
56 | 57 |
57 | 58 |
58 def _handle_constraint(config): | 59 def _handle_constraint(config): |
59 """Construct constraint from galaxy tool parameters. | 60 """ |
61 Construct constraint from galaxy tool parameters. | |
60 Suppose correct dictionary format | 62 Suppose correct dictionary format |
61 | 63 |
62 Parameters | 64 Parameters |
63 ---------- | 65 ---------- |
64 config : dict. E.g. | 66 config : dict. E.g. |
70 }, | 72 }, |
71 "constraint_type": | 73 "constraint_type": |
72 "MinMaxNorm" | 74 "MinMaxNorm" |
73 } | 75 } |
74 """ | 76 """ |
75 constraint_type = config['constraint_type'] | 77 constraint_type = config["constraint_type"] |
76 if constraint_type in ('None', ''): | 78 if constraint_type in ("None", ""): |
77 return None | 79 return None |
78 | 80 |
79 klass = getattr(keras.constraints, constraint_type) | 81 klass = getattr(keras.constraints, constraint_type) |
80 options = config.get('constraint_options', {}) | 82 options = config.get("constraint_options", {}) |
81 if 'axis' in options: | 83 if "axis" in options: |
82 options['axis'] = literal_eval(options['axis']) | 84 options["axis"] = literal_eval(options["axis"]) |
83 | 85 |
84 return klass(**options) | 86 return klass(**options) |
85 | 87 |
86 | 88 |
87 def _handle_lambda(literal): | 89 def _handle_lambda(literal): |
88 return None | 90 return None |
89 | 91 |
90 | 92 |
91 def _handle_layer_parameters(params): | 93 def _handle_layer_parameters(params): |
92 """Access to handle all kinds of parameters | 94 """ |
95 Access to handle all kinds of parameters | |
93 """ | 96 """ |
94 for key, value in six.iteritems(params): | 97 for key, value in six.iteritems(params): |
95 if value in ('None', ''): | 98 if value in ("None", ""): |
96 params[key] = None | 99 params[key] = None |
97 continue | 100 continue |
98 | 101 |
99 if type(value) in [int, float, bool]\ | 102 if type(value) in [int, float, bool] or ( |
100 or (type(value) is str and value.isalpha()): | 103 type(value) is str and value.isalpha() |
104 ): | |
101 continue | 105 continue |
102 | 106 |
103 if key in ['input_shape', 'noise_shape', 'shape', 'batch_shape', | 107 if ( |
104 'target_shape', 'dims', 'kernel_size', 'strides', | 108 key |
105 'dilation_rate', 'output_padding', 'cropping', 'size', | 109 in [ |
106 'padding', 'pool_size', 'axis', 'shared_axes'] \ | 110 "input_shape", |
107 and isinstance(value, str): | 111 "noise_shape", |
112 "shape", | |
113 "batch_shape", | |
114 "target_shape", | |
115 "dims", | |
116 "kernel_size", | |
117 "strides", | |
118 "dilation_rate", | |
119 "output_padding", | |
120 "cropping", | |
121 "size", | |
122 "padding", | |
123 "pool_size", | |
124 "axis", | |
125 "shared_axes", | |
126 ] | |
127 and isinstance(value, str) | |
128 ): | |
108 params[key] = _handle_shape(value) | 129 params[key] = _handle_shape(value) |
109 | 130 |
110 elif key.endswith('_regularizer') and isinstance(value, dict): | 131 elif key.endswith("_regularizer") and isinstance(value, dict): |
111 params[key] = _handle_regularizer(value) | 132 params[key] = _handle_regularizer(value) |
112 | 133 |
113 elif key.endswith('_constraint') and isinstance(value, dict): | 134 elif key.endswith("_constraint") and isinstance(value, dict): |
114 params[key] = _handle_constraint(value) | 135 params[key] = _handle_constraint(value) |
115 | 136 |
116 elif key == 'function': # No support for lambda/function eval | 137 elif key == "function": # No support for lambda/function eval |
117 params.pop(key) | 138 params.pop(key) |
118 | 139 |
119 return params | 140 return params |
120 | 141 |
121 | 142 |
122 def get_sequential_model(config): | 143 def get_sequential_model(config): |
123 """Construct keras Sequential model from Galaxy tool parameters | 144 """ |
145 Construct keras Sequential model from Galaxy tool parameters | |
124 | 146 |
125 Parameters: | 147 Parameters: |
126 ----------- | 148 ----------- |
127 config : dictionary, galaxy tool parameters loaded by JSON | 149 config : dictionary, galaxy tool parameters loaded by JSON |
128 """ | 150 """ |
129 model = Sequential() | 151 model = Sequential() |
130 input_shape = _handle_shape(config['input_shape']) | 152 input_shape = _handle_shape(config["input_shape"]) |
131 layers = config['layers'] | 153 layers = config["layers"] |
132 for layer in layers: | 154 for layer in layers: |
133 options = layer['layer_selection'] | 155 options = layer["layer_selection"] |
134 layer_type = options.pop('layer_type') | 156 layer_type = options.pop("layer_type") |
135 klass = getattr(keras.layers, layer_type) | 157 klass = getattr(keras.layers, layer_type) |
136 kwargs = options.pop('kwargs', '') | 158 kwargs = options.pop("kwargs", "") |
137 | 159 |
138 # parameters needs special care | 160 # parameters needs special care |
139 options = _handle_layer_parameters(options) | 161 options = _handle_layer_parameters(options) |
140 | 162 |
141 if kwargs: | 163 if kwargs: |
142 kwargs = safe_eval('dict(' + kwargs + ')') | 164 kwargs = safe_eval("dict(" + kwargs + ")") |
143 options.update(kwargs) | 165 options.update(kwargs) |
144 | 166 |
145 # add input_shape to the first layer only | 167 # add input_shape to the first layer only |
146 if not getattr(model, '_layers') and input_shape is not None: | 168 if not getattr(model, "_layers") and input_shape is not None: |
147 options['input_shape'] = input_shape | 169 options["input_shape"] = input_shape |
148 | 170 |
149 model.add(klass(**options)) | 171 model.add(klass(**options)) |
150 | 172 |
151 return model | 173 return model |
152 | 174 |
153 | 175 |
154 def get_functional_model(config): | 176 def get_functional_model(config): |
155 """Construct keras functional model from Galaxy tool parameters | 177 """ |
178 Construct keras functional model from Galaxy tool parameters | |
156 | 179 |
157 Parameters | 180 Parameters |
158 ----------- | 181 ----------- |
159 config : dictionary, galaxy tool parameters loaded by JSON | 182 config : dictionary, galaxy tool parameters loaded by JSON |
160 """ | 183 """ |
161 layers = config['layers'] | 184 layers = config["layers"] |
162 all_layers = [] | 185 all_layers = [] |
163 for layer in layers: | 186 for layer in layers: |
164 options = layer['layer_selection'] | 187 options = layer["layer_selection"] |
165 layer_type = options.pop('layer_type') | 188 layer_type = options.pop("layer_type") |
166 klass = getattr(keras.layers, layer_type) | 189 klass = getattr(keras.layers, layer_type) |
167 inbound_nodes = options.pop('inbound_nodes', None) | 190 inbound_nodes = options.pop("inbound_nodes", None) |
168 kwargs = options.pop('kwargs', '') | 191 kwargs = options.pop("kwargs", "") |
169 | 192 |
170 # parameters needs special care | 193 # parameters needs special care |
171 options = _handle_layer_parameters(options) | 194 options = _handle_layer_parameters(options) |
172 | 195 |
173 if kwargs: | 196 if kwargs: |
174 kwargs = safe_eval('dict(' + kwargs + ')') | 197 kwargs = safe_eval("dict(" + kwargs + ")") |
175 options.update(kwargs) | 198 options.update(kwargs) |
176 | 199 |
177 # merge layers | 200 # merge layers |
178 if 'merging_layers' in options: | 201 if "merging_layers" in options: |
179 idxs = literal_eval(options.pop('merging_layers')) | 202 idxs = literal_eval(options.pop("merging_layers")) |
180 merging_layers = [all_layers[i - 1] for i in idxs] | 203 merging_layers = [all_layers[i - 1] for i in idxs] |
181 new_layer = klass(**options)(merging_layers) | 204 new_layer = klass(**options)(merging_layers) |
182 # non-input layers | 205 # non-input layers |
183 elif inbound_nodes is not None: | 206 elif inbound_nodes is not None: |
184 new_layer = klass(**options)(all_layers[inbound_nodes - 1]) | 207 new_layer = klass(**options)(all_layers[inbound_nodes - 1]) |
186 else: | 209 else: |
187 new_layer = klass(**options) | 210 new_layer = klass(**options) |
188 | 211 |
189 all_layers.append(new_layer) | 212 all_layers.append(new_layer) |
190 | 213 |
191 input_indexes = _handle_shape(config['input_layers']) | 214 input_indexes = _handle_shape(config["input_layers"]) |
192 input_layers = [all_layers[i - 1] for i in input_indexes] | 215 input_layers = [all_layers[i - 1] for i in input_indexes] |
193 | 216 |
194 output_indexes = _handle_shape(config['output_layers']) | 217 output_indexes = _handle_shape(config["output_layers"]) |
195 output_layers = [all_layers[i - 1] for i in output_indexes] | 218 output_layers = [all_layers[i - 1] for i in output_indexes] |
196 | 219 |
197 return Model(inputs=input_layers, outputs=output_layers) | 220 return Model(inputs=input_layers, outputs=output_layers) |
198 | 221 |
199 | 222 |
200 def get_batch_generator(config): | 223 def get_batch_generator(config): |
201 """Construct keras online data generator from Galaxy tool parameters | 224 """ |
225 Construct keras online data generator from Galaxy tool parameters | |
202 | 226 |
203 Parameters | 227 Parameters |
204 ----------- | 228 ----------- |
205 config : dictionary, galaxy tool parameters loaded by JSON | 229 config : dictionary, galaxy tool parameters loaded by JSON |
206 """ | 230 """ |
207 generator_type = config.pop('generator_type') | 231 generator_type = config.pop("generator_type") |
208 if generator_type == 'none': | 232 if generator_type == "none": |
209 return None | 233 return None |
210 | 234 |
211 klass = try_get_attr('galaxy_ml.preprocessors', generator_type) | 235 klass = try_get_attr("galaxy_ml.preprocessors", generator_type) |
212 | 236 |
213 if generator_type == 'GenomicIntervalBatchGenerator': | 237 if generator_type == "GenomicIntervalBatchGenerator": |
214 config['ref_genome_path'] = 'to_be_determined' | 238 config["ref_genome_path"] = "to_be_determined" |
215 config['intervals_path'] = 'to_be_determined' | 239 config["intervals_path"] = "to_be_determined" |
216 config['target_path'] = 'to_be_determined' | 240 config["target_path"] = "to_be_determined" |
217 config['features'] = 'to_be_determined' | 241 config["features"] = "to_be_determined" |
218 else: | 242 else: |
219 config['fasta_path'] = 'to_be_determined' | 243 config["fasta_path"] = "to_be_determined" |
220 | 244 |
221 return klass(**config) | 245 return klass(**config) |
222 | 246 |
223 | 247 |
224 def config_keras_model(inputs, outfile): | 248 def config_keras_model(inputs, outfile): |
225 """ config keras model layers and output JSON | 249 """ |
250 config keras model layers and output JSON | |
226 | 251 |
227 Parameters | 252 Parameters |
228 ---------- | 253 ---------- |
229 inputs : dict | 254 inputs : dict |
230 loaded galaxy tool parameters from `keras_model_config` | 255 loaded galaxy tool parameters from `keras_model_config` |
231 tool. | 256 tool. |
232 outfile : str | 257 outfile : str |
233 Path to galaxy dataset containing keras model JSON. | 258 Path to galaxy dataset containing keras model JSON. |
234 """ | 259 """ |
235 model_type = inputs['model_selection']['model_type'] | 260 model_type = inputs["model_selection"]["model_type"] |
236 layers_config = inputs['model_selection'] | 261 layers_config = inputs["model_selection"] |
237 | 262 |
238 if model_type == 'sequential': | 263 if model_type == "sequential": |
239 model = get_sequential_model(layers_config) | 264 model = get_sequential_model(layers_config) |
240 else: | 265 else: |
241 model = get_functional_model(layers_config) | 266 model = get_functional_model(layers_config) |
242 | 267 |
243 json_string = model.to_json() | 268 json_string = model.to_json() |
244 | 269 |
245 with open(outfile, 'w') as f: | 270 with open(outfile, "w") as f: |
246 json.dump(json.loads(json_string), f, indent=2) | 271 json.dump(json.loads(json_string), f, indent=2) |
247 | 272 |
248 | 273 |
249 def build_keras_model(inputs, outfile, model_json, infile_weights=None, | 274 def build_keras_model( |
250 batch_mode=False, outfile_params=None): | 275 inputs, |
251 """ for `keras_model_builder` tool | 276 outfile, |
277 model_json, | |
278 infile_weights=None, | |
279 batch_mode=False, | |
280 outfile_params=None, | |
281 ): | |
282 """ | |
283 for `keras_model_builder` tool | |
252 | 284 |
253 Parameters | 285 Parameters |
254 ---------- | 286 ---------- |
255 inputs : dict | 287 inputs : dict |
256 loaded galaxy tool parameters from `keras_model_builder` tool. | 288 loaded galaxy tool parameters from `keras_model_builder` tool. |
263 batch_mode : bool, default=False | 295 batch_mode : bool, default=False |
264 Whether to build online batch classifier. | 296 Whether to build online batch classifier. |
265 outfile_params : str, default=None | 297 outfile_params : str, default=None |
266 File path to search parameters output. | 298 File path to search parameters output. |
267 """ | 299 """ |
268 with open(model_json, 'r') as f: | 300 with open(model_json, "r") as f: |
269 json_model = json.load(f) | 301 json_model = json.load(f) |
270 | 302 |
271 config = json_model['config'] | 303 config = json_model["config"] |
272 | 304 |
273 options = {} | 305 options = {} |
274 | 306 |
275 if json_model['class_name'] == 'Sequential': | 307 if json_model["class_name"] == "Sequential": |
276 options['model_type'] = 'sequential' | 308 options["model_type"] = "sequential" |
277 klass = Sequential | 309 klass = Sequential |
278 elif json_model['class_name'] == 'Model': | 310 elif json_model["class_name"] == "Model": |
279 options['model_type'] = 'functional' | 311 options["model_type"] = "functional" |
280 klass = Model | 312 klass = Model |
281 else: | 313 else: |
282 raise ValueError("Unknow Keras model class: %s" | 314 raise ValueError("Unknow Keras model class: %s" % json_model["class_name"]) |
283 % json_model['class_name']) | |
284 | 315 |
285 # load prefitted model | 316 # load prefitted model |
286 if inputs['mode_selection']['mode_type'] == 'prefitted': | 317 if inputs["mode_selection"]["mode_type"] == "prefitted": |
287 estimator = klass.from_config(config) | 318 estimator = klass.from_config(config) |
288 estimator.load_weights(infile_weights) | 319 estimator.load_weights(infile_weights) |
289 # build train model | 320 # build train model |
290 else: | 321 else: |
291 cls_name = inputs['mode_selection']['learning_type'] | 322 cls_name = inputs["mode_selection"]["learning_type"] |
292 klass = try_get_attr('galaxy_ml.keras_galaxy_models', cls_name) | 323 klass = try_get_attr("galaxy_ml.keras_galaxy_models", cls_name) |
293 | 324 |
294 options['loss'] = (inputs['mode_selection'] | 325 options["loss"] = inputs["mode_selection"]["compile_params"]["loss"] |
295 ['compile_params']['loss']) | 326 options["optimizer"] = ( |
296 options['optimizer'] =\ | 327 inputs["mode_selection"]["compile_params"]["optimizer_selection"][ |
297 (inputs['mode_selection']['compile_params'] | 328 "optimizer_type" |
298 ['optimizer_selection']['optimizer_type']).lower() | 329 ] |
299 | 330 ).lower() |
300 options.update((inputs['mode_selection']['compile_params'] | 331 |
301 ['optimizer_selection']['optimizer_options'])) | 332 options.update( |
302 | 333 ( |
303 train_metrics = inputs['mode_selection']['compile_params']['metrics'] | 334 inputs["mode_selection"]["compile_params"]["optimizer_selection"][ |
304 if train_metrics[-1] == 'none': | 335 "optimizer_options" |
336 ] | |
337 ) | |
338 ) | |
339 | |
340 train_metrics = inputs["mode_selection"]["compile_params"]["metrics"] | |
341 if train_metrics[-1] == "none": | |
305 train_metrics = train_metrics[:-1] | 342 train_metrics = train_metrics[:-1] |
306 options['metrics'] = train_metrics | 343 options["metrics"] = train_metrics |
307 | 344 |
308 options.update(inputs['mode_selection']['fit_params']) | 345 options.update(inputs["mode_selection"]["fit_params"]) |
309 options['seed'] = inputs['mode_selection']['random_seed'] | 346 options["seed"] = inputs["mode_selection"]["random_seed"] |
310 | 347 |
311 if batch_mode: | 348 if batch_mode: |
312 generator = get_batch_generator(inputs['mode_selection'] | 349 generator = get_batch_generator( |
313 ['generator_selection']) | 350 inputs["mode_selection"]["generator_selection"] |
314 options['data_batch_generator'] = generator | 351 ) |
315 options['prediction_steps'] = \ | 352 options["data_batch_generator"] = generator |
316 inputs['mode_selection']['prediction_steps'] | 353 options["prediction_steps"] = inputs["mode_selection"]["prediction_steps"] |
317 options['class_positive_factor'] = \ | 354 options["class_positive_factor"] = inputs["mode_selection"][ |
318 inputs['mode_selection']['class_positive_factor'] | 355 "class_positive_factor" |
356 ] | |
319 estimator = klass(config, **options) | 357 estimator = klass(config, **options) |
320 if outfile_params: | 358 if outfile_params: |
321 hyper_params = get_search_params(estimator) | 359 hyper_params = get_search_params(estimator) |
322 # TODO: remove this after making `verbose` tunable | 360 # TODO: remove this after making `verbose` tunable |
323 for h_param in hyper_params: | 361 for h_param in hyper_params: |
324 if h_param[1].endswith('verbose'): | 362 if h_param[1].endswith("verbose"): |
325 h_param[0] = '@' | 363 h_param[0] = "@" |
326 df = pd.DataFrame(hyper_params, columns=['', 'Parameter', 'Value']) | 364 df = pd.DataFrame(hyper_params, columns=["", "Parameter", "Value"]) |
327 df.to_csv(outfile_params, sep='\t', index=False) | 365 df.to_csv(outfile_params, sep="\t", index=False) |
328 | 366 |
329 print(repr(estimator)) | 367 print(repr(estimator)) |
330 # save model by pickle | 368 # save model by pickle |
331 with open(outfile, 'wb') as f: | 369 with open(outfile, "wb") as f: |
332 pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL) | 370 pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL) |
333 | 371 |
334 | 372 |
335 if __name__ == '__main__': | 373 if __name__ == "__main__": |
336 warnings.simplefilter('ignore') | 374 warnings.simplefilter("ignore") |
337 | 375 |
338 aparser = argparse.ArgumentParser() | 376 aparser = argparse.ArgumentParser() |
339 aparser.add_argument("-i", "--inputs", dest="inputs", required=True) | 377 aparser.add_argument("-i", "--inputs", dest="inputs", required=True) |
340 aparser.add_argument("-m", "--model_json", dest="model_json") | 378 aparser.add_argument("-m", "--model_json", dest="model_json") |
341 aparser.add_argument("-t", "--tool_id", dest="tool_id") | 379 aparser.add_argument("-t", "--tool_id", dest="tool_id") |
343 aparser.add_argument("-o", "--outfile", dest="outfile") | 381 aparser.add_argument("-o", "--outfile", dest="outfile") |
344 aparser.add_argument("-p", "--outfile_params", dest="outfile_params") | 382 aparser.add_argument("-p", "--outfile_params", dest="outfile_params") |
345 args = aparser.parse_args() | 383 args = aparser.parse_args() |
346 | 384 |
347 input_json_path = args.inputs | 385 input_json_path = args.inputs |
348 with open(input_json_path, 'r') as param_handler: | 386 with open(input_json_path, "r") as param_handler: |
349 inputs = json.load(param_handler) | 387 inputs = json.load(param_handler) |
350 | 388 |
351 tool_id = args.tool_id | 389 tool_id = args.tool_id |
352 outfile = args.outfile | 390 outfile = args.outfile |
353 outfile_params = args.outfile_params | 391 outfile_params = args.outfile_params |
354 model_json = args.model_json | 392 model_json = args.model_json |
355 infile_weights = args.infile_weights | 393 infile_weights = args.infile_weights |
356 | 394 |
357 # for keras_model_config tool | 395 # for keras_model_config tool |
358 if tool_id == 'keras_model_config': | 396 if tool_id == "keras_model_config": |
359 config_keras_model(inputs, outfile) | 397 config_keras_model(inputs, outfile) |
360 | 398 |
361 # for keras_model_builder tool | 399 # for keras_model_builder tool |
362 else: | 400 else: |
363 batch_mode = False | 401 batch_mode = False |
364 if tool_id == 'keras_batch_models': | 402 if tool_id == "keras_batch_models": |
365 batch_mode = True | 403 batch_mode = True |
366 | 404 |
367 build_keras_model(inputs=inputs, | 405 build_keras_model( |
368 model_json=model_json, | 406 inputs=inputs, |
369 infile_weights=infile_weights, | 407 model_json=model_json, |
370 batch_mode=batch_mode, | 408 infile_weights=infile_weights, |
371 outfile=outfile, | 409 batch_mode=batch_mode, |
372 outfile_params=outfile_params) | 410 outfile=outfile, |
411 outfile_params=outfile_params, | |
412 ) |