Mercurial > repos > bgruening > sklearn_train_test_split
comparison keras_deep_learning.py @ 0:0985b0dd6f1a draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit eb703290e2589561ea215c84aa9f71bcfe1712c6"
author | bgruening |
---|---|
date | Fri, 01 Nov 2019 17:26:59 -0400 |
parents | |
children | 5a092779412e |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0985b0dd6f1a |
---|---|
1 import argparse | |
2 import json | |
3 import keras | |
4 import pandas as pd | |
5 import pickle | |
6 import six | |
7 import warnings | |
8 | |
9 from ast import literal_eval | |
10 from keras.models import Sequential, Model | |
11 from galaxy_ml.utils import try_get_attr, get_search_params, SafeEval | |
12 | |
13 | |
14 safe_eval = SafeEval() | |
15 | |
16 | |
17 def _handle_shape(literal): | |
18 """Eval integer or list/tuple of integers from string | |
19 | |
20 Parameters: | |
21 ----------- | |
22 literal : str. | |
23 """ | |
24 literal = literal.strip() | |
25 if not literal: | |
26 return None | |
27 try: | |
28 return literal_eval(literal) | |
29 except NameError as e: | |
30 print(e) | |
31 return literal | |
32 | |
33 | |
34 def _handle_regularizer(literal): | |
35 """Construct regularizer from string literal | |
36 | |
37 Parameters | |
38 ---------- | |
39 literal : str. E.g. '(0.1, 0)' | |
40 """ | |
41 literal = literal.strip() | |
42 if not literal: | |
43 return None | |
44 | |
45 l1, l2 = literal_eval(literal) | |
46 | |
47 if not l1 and not l2: | |
48 return None | |
49 | |
50 if l1 is None: | |
51 l1 = 0. | |
52 if l2 is None: | |
53 l2 = 0. | |
54 | |
55 return keras.regularizers.l1_l2(l1=l1, l2=l2) | |
56 | |
57 | |
58 def _handle_constraint(config): | |
59 """Construct constraint from galaxy tool parameters. | |
60 Suppose correct dictionary format | |
61 | |
62 Parameters | |
63 ---------- | |
64 config : dict. E.g. | |
65 "bias_constraint": | |
66 {"constraint_options": | |
67 {"max_value":1.0, | |
68 "min_value":0.0, | |
69 "axis":"[0, 1, 2]" | |
70 }, | |
71 "constraint_type": | |
72 "MinMaxNorm" | |
73 } | |
74 """ | |
75 constraint_type = config['constraint_type'] | |
76 if constraint_type == 'None': | |
77 return None | |
78 | |
79 klass = getattr(keras.constraints, constraint_type) | |
80 options = config.get('constraint_options', {}) | |
81 if 'axis' in options: | |
82 options['axis'] = literal_eval(options['axis']) | |
83 | |
84 return klass(**options) | |
85 | |
86 | |
87 def _handle_lambda(literal): | |
88 return None | |
89 | |
90 | |
91 def _handle_layer_parameters(params): | |
92 """Access to handle all kinds of parameters | |
93 """ | |
94 for key, value in six.iteritems(params): | |
95 if value == 'None': | |
96 params[key] = None | |
97 continue | |
98 | |
99 if type(value) in [int, float, bool]\ | |
100 or (type(value) is str and value.isalpha()): | |
101 continue | |
102 | |
103 if key in ['input_shape', 'noise_shape', 'shape', 'batch_shape', | |
104 'target_shape', 'dims', 'kernel_size', 'strides', | |
105 'dilation_rate', 'output_padding', 'cropping', 'size', | |
106 'padding', 'pool_size', 'axis', 'shared_axes'] \ | |
107 and isinstance(value, str): | |
108 params[key] = _handle_shape(value) | |
109 | |
110 elif key.endswith('_regularizer') and isinstance(value, dict): | |
111 params[key] = _handle_regularizer(value) | |
112 | |
113 elif key.endswith('_constraint') and isinstance(value, dict): | |
114 params[key] = _handle_constraint(value) | |
115 | |
116 elif key == 'function': # No support for lambda/function eval | |
117 params.pop(key) | |
118 | |
119 return params | |
120 | |
121 | |
122 def get_sequential_model(config): | |
123 """Construct keras Sequential model from Galaxy tool parameters | |
124 | |
125 Parameters: | |
126 ----------- | |
127 config : dictionary, galaxy tool parameters loaded by JSON | |
128 """ | |
129 model = Sequential() | |
130 input_shape = _handle_shape(config['input_shape']) | |
131 layers = config['layers'] | |
132 for layer in layers: | |
133 options = layer['layer_selection'] | |
134 layer_type = options.pop('layer_type') | |
135 klass = getattr(keras.layers, layer_type) | |
136 kwargs = options.pop('kwargs', '') | |
137 | |
138 # parameters needs special care | |
139 options = _handle_layer_parameters(options) | |
140 | |
141 if kwargs: | |
142 kwargs = safe_eval('dict(' + kwargs + ')') | |
143 options.update(kwargs) | |
144 | |
145 # add input_shape to the first layer only | |
146 if not getattr(model, '_layers') and input_shape is not None: | |
147 options['input_shape'] = input_shape | |
148 | |
149 model.add(klass(**options)) | |
150 | |
151 return model | |
152 | |
153 | |
154 def get_functional_model(config): | |
155 """Construct keras functional model from Galaxy tool parameters | |
156 | |
157 Parameters | |
158 ----------- | |
159 config : dictionary, galaxy tool parameters loaded by JSON | |
160 """ | |
161 layers = config['layers'] | |
162 all_layers = [] | |
163 for layer in layers: | |
164 options = layer['layer_selection'] | |
165 layer_type = options.pop('layer_type') | |
166 klass = getattr(keras.layers, layer_type) | |
167 inbound_nodes = options.pop('inbound_nodes', None) | |
168 kwargs = options.pop('kwargs', '') | |
169 | |
170 # parameters needs special care | |
171 options = _handle_layer_parameters(options) | |
172 | |
173 if kwargs: | |
174 kwargs = safe_eval('dict(' + kwargs + ')') | |
175 options.update(kwargs) | |
176 | |
177 # merge layers | |
178 if 'merging_layers' in options: | |
179 idxs = literal_eval(options.pop('merging_layers')) | |
180 merging_layers = [all_layers[i-1] for i in idxs] | |
181 new_layer = klass(**options)(merging_layers) | |
182 # non-input layers | |
183 elif inbound_nodes is not None: | |
184 new_layer = klass(**options)(all_layers[inbound_nodes-1]) | |
185 # input layers | |
186 else: | |
187 new_layer = klass(**options) | |
188 | |
189 all_layers.append(new_layer) | |
190 | |
191 input_indexes = _handle_shape(config['input_layers']) | |
192 input_layers = [all_layers[i-1] for i in input_indexes] | |
193 | |
194 output_indexes = _handle_shape(config['output_layers']) | |
195 output_layers = [all_layers[i-1] for i in output_indexes] | |
196 | |
197 return Model(inputs=input_layers, outputs=output_layers) | |
198 | |
199 | |
200 def get_batch_generator(config): | |
201 """Construct keras online data generator from Galaxy tool parameters | |
202 | |
203 Parameters | |
204 ----------- | |
205 config : dictionary, galaxy tool parameters loaded by JSON | |
206 """ | |
207 generator_type = config.pop('generator_type') | |
208 klass = try_get_attr('galaxy_ml.preprocessors', generator_type) | |
209 | |
210 if generator_type == 'GenomicIntervalBatchGenerator': | |
211 config['ref_genome_path'] = 'to_be_determined' | |
212 config['intervals_path'] = 'to_be_determined' | |
213 config['target_path'] = 'to_be_determined' | |
214 config['features'] = 'to_be_determined' | |
215 else: | |
216 config['fasta_path'] = 'to_be_determined' | |
217 | |
218 return klass(**config) | |
219 | |
220 | |
221 def config_keras_model(inputs, outfile): | |
222 """ config keras model layers and output JSON | |
223 | |
224 Parameters | |
225 ---------- | |
226 inputs : dict | |
227 loaded galaxy tool parameters from `keras_model_config` | |
228 tool. | |
229 outfile : str | |
230 Path to galaxy dataset containing keras model JSON. | |
231 """ | |
232 model_type = inputs['model_selection']['model_type'] | |
233 layers_config = inputs['model_selection'] | |
234 | |
235 if model_type == 'sequential': | |
236 model = get_sequential_model(layers_config) | |
237 else: | |
238 model = get_functional_model(layers_config) | |
239 | |
240 json_string = model.to_json() | |
241 | |
242 with open(outfile, 'w') as f: | |
243 f.write(json_string) | |
244 | |
245 | |
246 def build_keras_model(inputs, outfile, model_json, infile_weights=None, | |
247 batch_mode=False, outfile_params=None): | |
248 """ for `keras_model_builder` tool | |
249 | |
250 Parameters | |
251 ---------- | |
252 inputs : dict | |
253 loaded galaxy tool parameters from `keras_model_builder` tool. | |
254 outfile : str | |
255 Path to galaxy dataset containing the keras_galaxy model output. | |
256 model_json : str | |
257 Path to dataset containing keras model JSON. | |
258 infile_weights : str or None | |
259 If string, path to dataset containing model weights. | |
260 batch_mode : bool, default=False | |
261 Whether to build online batch classifier. | |
262 outfile_params : str, default=None | |
263 File path to search parameters output. | |
264 """ | |
265 with open(model_json, 'r') as f: | |
266 json_model = json.load(f) | |
267 | |
268 config = json_model['config'] | |
269 | |
270 options = {} | |
271 | |
272 if json_model['class_name'] == 'Sequential': | |
273 options['model_type'] = 'sequential' | |
274 klass = Sequential | |
275 elif json_model['class_name'] == 'Model': | |
276 options['model_type'] = 'functional' | |
277 klass = Model | |
278 else: | |
279 raise ValueError("Unknow Keras model class: %s" | |
280 % json_model['class_name']) | |
281 | |
282 # load prefitted model | |
283 if inputs['mode_selection']['mode_type'] == 'prefitted': | |
284 estimator = klass.from_config(config) | |
285 estimator.load_weights(infile_weights) | |
286 # build train model | |
287 else: | |
288 cls_name = inputs['mode_selection']['learning_type'] | |
289 klass = try_get_attr('galaxy_ml.keras_galaxy_models', cls_name) | |
290 | |
291 options['loss'] = (inputs['mode_selection'] | |
292 ['compile_params']['loss']) | |
293 options['optimizer'] =\ | |
294 (inputs['mode_selection']['compile_params'] | |
295 ['optimizer_selection']['optimizer_type']).lower() | |
296 | |
297 options.update((inputs['mode_selection']['compile_params'] | |
298 ['optimizer_selection']['optimizer_options'])) | |
299 | |
300 train_metrics = (inputs['mode_selection']['compile_params'] | |
301 ['metrics']).split(',') | |
302 if train_metrics[-1] == 'none': | |
303 train_metrics = train_metrics[:-1] | |
304 options['metrics'] = train_metrics | |
305 | |
306 options.update(inputs['mode_selection']['fit_params']) | |
307 options['seed'] = inputs['mode_selection']['random_seed'] | |
308 | |
309 if batch_mode: | |
310 generator = get_batch_generator(inputs['mode_selection'] | |
311 ['generator_selection']) | |
312 options['data_batch_generator'] = generator | |
313 options['prediction_steps'] = \ | |
314 inputs['mode_selection']['prediction_steps'] | |
315 options['class_positive_factor'] = \ | |
316 inputs['mode_selection']['class_positive_factor'] | |
317 estimator = klass(config, **options) | |
318 if outfile_params: | |
319 hyper_params = get_search_params(estimator) | |
320 # TODO: remove this after making `verbose` tunable | |
321 for h_param in hyper_params: | |
322 if h_param[1].endswith('verbose'): | |
323 h_param[0] = '@' | |
324 df = pd.DataFrame(hyper_params, columns=['', 'Parameter', 'Value']) | |
325 df.to_csv(outfile_params, sep='\t', index=False) | |
326 | |
327 print(repr(estimator)) | |
328 # save model by pickle | |
329 with open(outfile, 'wb') as f: | |
330 pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL) | |
331 | |
332 | |
333 if __name__ == '__main__': | |
334 warnings.simplefilter('ignore') | |
335 | |
336 aparser = argparse.ArgumentParser() | |
337 aparser.add_argument("-i", "--inputs", dest="inputs", required=True) | |
338 aparser.add_argument("-m", "--model_json", dest="model_json") | |
339 aparser.add_argument("-t", "--tool_id", dest="tool_id") | |
340 aparser.add_argument("-w", "--infile_weights", dest="infile_weights") | |
341 aparser.add_argument("-o", "--outfile", dest="outfile") | |
342 aparser.add_argument("-p", "--outfile_params", dest="outfile_params") | |
343 args = aparser.parse_args() | |
344 | |
345 input_json_path = args.inputs | |
346 with open(input_json_path, 'r') as param_handler: | |
347 inputs = json.load(param_handler) | |
348 | |
349 tool_id = args.tool_id | |
350 outfile = args.outfile | |
351 outfile_params = args.outfile_params | |
352 model_json = args.model_json | |
353 infile_weights = args.infile_weights | |
354 | |
355 # for keras_model_config tool | |
356 if tool_id == 'keras_model_config': | |
357 config_keras_model(inputs, outfile) | |
358 | |
359 # for keras_model_builder tool | |
360 else: | |
361 batch_mode = False | |
362 if tool_id == 'keras_batch_models': | |
363 batch_mode = True | |
364 | |
365 build_keras_model(inputs=inputs, | |
366 model_json=model_json, | |
367 infile_weights=infile_weights, | |
368 batch_mode=batch_mode, | |
369 outfile=outfile, | |
370 outfile_params=outfile_params) |