Mercurial > repos > bgruening > sklearn_train_test_eval
diff keras_deep_learning.py @ 11:caf7d2b71a48 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
author | bgruening |
---|---|
date | Sat, 01 May 2021 01:47:26 +0000 |
parents | a9e0b963b7bb |
children | 2eb5c017958d |
line wrap: on
line diff
--- a/keras_deep_learning.py Tue Apr 13 22:04:06 2021 +0000 +++ b/keras_deep_learning.py Sat May 01 01:47:26 2021 +0000 @@ -10,12 +10,12 @@ from galaxy_ml.utils import get_search_params, SafeEval, try_get_attr from keras.models import Model, Sequential - safe_eval = SafeEval() def _handle_shape(literal): - """Eval integer or list/tuple of integers from string + """ + Eval integer or list/tuple of integers from string Parameters: ----------- @@ -32,7 +32,8 @@ def _handle_regularizer(literal): - """Construct regularizer from string literal + """ + Construct regularizer from string literal Parameters ---------- @@ -48,15 +49,16 @@ return None if l1 is None: - l1 = 0. + l1 = 0.0 if l2 is None: - l2 = 0. + l2 = 0.0 return keras.regularizers.l1_l2(l1=l1, l2=l2) def _handle_constraint(config): - """Construct constraint from galaxy tool parameters. + """ + Construct constraint from galaxy tool parameters. Suppose correct dictionary format Parameters @@ -72,14 +74,14 @@ "MinMaxNorm" } """ - constraint_type = config['constraint_type'] - if constraint_type in ('None', ''): + constraint_type = config["constraint_type"] + if constraint_type in ("None", ""): return None klass = getattr(keras.constraints, constraint_type) - options = config.get('constraint_options', {}) - if 'axis' in options: - options['axis'] = literal_eval(options['axis']) + options = config.get("constraint_options", {}) + if "axis" in options: + options["axis"] = literal_eval(options["axis"]) return klass(**options) @@ -89,62 +91,82 @@ def _handle_layer_parameters(params): - """Access to handle all kinds of parameters + """ + Access to handle all kinds of parameters """ for key, value in six.iteritems(params): - if value in ('None', ''): + if value in ("None", ""): params[key] = None continue - if type(value) in [int, float, bool]\ - or (type(value) is str and value.isalpha()): + if type(value) in [int, float, bool] or ( + type(value) is str and value.isalpha() + ): continue - if key in ['input_shape', 'noise_shape', 'shape', 'batch_shape', - 'target_shape', 'dims', 'kernel_size', 'strides', - 'dilation_rate', 'output_padding', 'cropping', 'size', - 'padding', 'pool_size', 'axis', 'shared_axes'] \ - and isinstance(value, str): + if ( + key + in [ + "input_shape", + "noise_shape", + "shape", + "batch_shape", + "target_shape", + "dims", + "kernel_size", + "strides", + "dilation_rate", + "output_padding", + "cropping", + "size", + "padding", + "pool_size", + "axis", + "shared_axes", + ] + and isinstance(value, str) + ): params[key] = _handle_shape(value) - elif key.endswith('_regularizer') and isinstance(value, dict): + elif key.endswith("_regularizer") and isinstance(value, dict): params[key] = _handle_regularizer(value) - elif key.endswith('_constraint') and isinstance(value, dict): + elif key.endswith("_constraint") and isinstance(value, dict): params[key] = _handle_constraint(value) - elif key == 'function': # No support for lambda/function eval + elif key == "function": # No support for lambda/function eval params.pop(key) return params def get_sequential_model(config): - """Construct keras Sequential model from Galaxy tool parameters + """ + Construct keras Sequential model from Galaxy tool parameters Parameters: ----------- config : dictionary, galaxy tool parameters loaded by JSON """ model = Sequential() - input_shape = _handle_shape(config['input_shape']) - layers = config['layers'] + input_shape = _handle_shape(config["input_shape"]) + layers = config["layers"] for layer in layers: - options = layer['layer_selection'] - layer_type = options.pop('layer_type') + options = layer["layer_selection"] + layer_type = options.pop("layer_type") klass = getattr(keras.layers, layer_type) - kwargs = options.pop('kwargs', '') + kwargs = options.pop("kwargs", "") # parameters needs special care options = _handle_layer_parameters(options) if kwargs: - kwargs = safe_eval('dict(' + kwargs + ')') + kwargs = safe_eval("dict(" + kwargs + ")") options.update(kwargs) # add input_shape to the first layer only - if not getattr(model, '_layers') and input_shape is not None: - options['input_shape'] = input_shape + if not getattr(model, "_layers") and input_shape is not None: + options["input_shape"] = input_shape model.add(klass(**options)) @@ -152,31 +174,32 @@ def get_functional_model(config): - """Construct keras functional model from Galaxy tool parameters + """ + Construct keras functional model from Galaxy tool parameters Parameters ----------- config : dictionary, galaxy tool parameters loaded by JSON """ - layers = config['layers'] + layers = config["layers"] all_layers = [] for layer in layers: - options = layer['layer_selection'] - layer_type = options.pop('layer_type') + options = layer["layer_selection"] + layer_type = options.pop("layer_type") klass = getattr(keras.layers, layer_type) - inbound_nodes = options.pop('inbound_nodes', None) - kwargs = options.pop('kwargs', '') + inbound_nodes = options.pop("inbound_nodes", None) + kwargs = options.pop("kwargs", "") # parameters needs special care options = _handle_layer_parameters(options) if kwargs: - kwargs = safe_eval('dict(' + kwargs + ')') + kwargs = safe_eval("dict(" + kwargs + ")") options.update(kwargs) # merge layers - if 'merging_layers' in options: - idxs = literal_eval(options.pop('merging_layers')) + if "merging_layers" in options: + idxs = literal_eval(options.pop("merging_layers")) merging_layers = [all_layers[i - 1] for i in idxs] new_layer = klass(**options)(merging_layers) # non-input layers @@ -188,41 +211,43 @@ all_layers.append(new_layer) - input_indexes = _handle_shape(config['input_layers']) + input_indexes = _handle_shape(config["input_layers"]) input_layers = [all_layers[i - 1] for i in input_indexes] - output_indexes = _handle_shape(config['output_layers']) + output_indexes = _handle_shape(config["output_layers"]) output_layers = [all_layers[i - 1] for i in output_indexes] return Model(inputs=input_layers, outputs=output_layers) def get_batch_generator(config): - """Construct keras online data generator from Galaxy tool parameters + """ + Construct keras online data generator from Galaxy tool parameters Parameters ----------- config : dictionary, galaxy tool parameters loaded by JSON """ - generator_type = config.pop('generator_type') - if generator_type == 'none': + generator_type = config.pop("generator_type") + if generator_type == "none": return None - klass = try_get_attr('galaxy_ml.preprocessors', generator_type) + klass = try_get_attr("galaxy_ml.preprocessors", generator_type) - if generator_type == 'GenomicIntervalBatchGenerator': - config['ref_genome_path'] = 'to_be_determined' - config['intervals_path'] = 'to_be_determined' - config['target_path'] = 'to_be_determined' - config['features'] = 'to_be_determined' + if generator_type == "GenomicIntervalBatchGenerator": + config["ref_genome_path"] = "to_be_determined" + config["intervals_path"] = "to_be_determined" + config["target_path"] = "to_be_determined" + config["features"] = "to_be_determined" else: - config['fasta_path'] = 'to_be_determined' + config["fasta_path"] = "to_be_determined" return klass(**config) def config_keras_model(inputs, outfile): - """ config keras model layers and output JSON + """ + config keras model layers and output JSON Parameters ---------- @@ -232,23 +257,30 @@ outfile : str Path to galaxy dataset containing keras model JSON. """ - model_type = inputs['model_selection']['model_type'] - layers_config = inputs['model_selection'] + model_type = inputs["model_selection"]["model_type"] + layers_config = inputs["model_selection"] - if model_type == 'sequential': + if model_type == "sequential": model = get_sequential_model(layers_config) else: model = get_functional_model(layers_config) json_string = model.to_json() - with open(outfile, 'w') as f: + with open(outfile, "w") as f: json.dump(json.loads(json_string), f, indent=2) -def build_keras_model(inputs, outfile, model_json, infile_weights=None, - batch_mode=False, outfile_params=None): - """ for `keras_model_builder` tool +def build_keras_model( + inputs, + outfile, + model_json, + infile_weights=None, + batch_mode=False, + outfile_params=None, +): + """ + for `keras_model_builder` tool Parameters ---------- @@ -265,75 +297,81 @@ outfile_params : str, default=None File path to search parameters output. """ - with open(model_json, 'r') as f: + with open(model_json, "r") as f: json_model = json.load(f) - config = json_model['config'] + config = json_model["config"] options = {} - if json_model['class_name'] == 'Sequential': - options['model_type'] = 'sequential' + if json_model["class_name"] == "Sequential": + options["model_type"] = "sequential" klass = Sequential - elif json_model['class_name'] == 'Model': - options['model_type'] = 'functional' + elif json_model["class_name"] == "Model": + options["model_type"] = "functional" klass = Model else: - raise ValueError("Unknow Keras model class: %s" - % json_model['class_name']) + raise ValueError("Unknow Keras model class: %s" % json_model["class_name"]) # load prefitted model - if inputs['mode_selection']['mode_type'] == 'prefitted': + if inputs["mode_selection"]["mode_type"] == "prefitted": estimator = klass.from_config(config) estimator.load_weights(infile_weights) # build train model else: - cls_name = inputs['mode_selection']['learning_type'] - klass = try_get_attr('galaxy_ml.keras_galaxy_models', cls_name) + cls_name = inputs["mode_selection"]["learning_type"] + klass = try_get_attr("galaxy_ml.keras_galaxy_models", cls_name) - options['loss'] = (inputs['mode_selection'] - ['compile_params']['loss']) - options['optimizer'] =\ - (inputs['mode_selection']['compile_params'] - ['optimizer_selection']['optimizer_type']).lower() + options["loss"] = inputs["mode_selection"]["compile_params"]["loss"] + options["optimizer"] = ( + inputs["mode_selection"]["compile_params"]["optimizer_selection"][ + "optimizer_type" + ] + ).lower() - options.update((inputs['mode_selection']['compile_params'] - ['optimizer_selection']['optimizer_options'])) + options.update( + ( + inputs["mode_selection"]["compile_params"]["optimizer_selection"][ + "optimizer_options" + ] + ) + ) - train_metrics = inputs['mode_selection']['compile_params']['metrics'] - if train_metrics[-1] == 'none': + train_metrics = inputs["mode_selection"]["compile_params"]["metrics"] + if train_metrics[-1] == "none": train_metrics = train_metrics[:-1] - options['metrics'] = train_metrics + options["metrics"] = train_metrics - options.update(inputs['mode_selection']['fit_params']) - options['seed'] = inputs['mode_selection']['random_seed'] + options.update(inputs["mode_selection"]["fit_params"]) + options["seed"] = inputs["mode_selection"]["random_seed"] if batch_mode: - generator = get_batch_generator(inputs['mode_selection'] - ['generator_selection']) - options['data_batch_generator'] = generator - options['prediction_steps'] = \ - inputs['mode_selection']['prediction_steps'] - options['class_positive_factor'] = \ - inputs['mode_selection']['class_positive_factor'] + generator = get_batch_generator( + inputs["mode_selection"]["generator_selection"] + ) + options["data_batch_generator"] = generator + options["prediction_steps"] = inputs["mode_selection"]["prediction_steps"] + options["class_positive_factor"] = inputs["mode_selection"][ + "class_positive_factor" + ] estimator = klass(config, **options) if outfile_params: hyper_params = get_search_params(estimator) # TODO: remove this after making `verbose` tunable for h_param in hyper_params: - if h_param[1].endswith('verbose'): - h_param[0] = '@' - df = pd.DataFrame(hyper_params, columns=['', 'Parameter', 'Value']) - df.to_csv(outfile_params, sep='\t', index=False) + if h_param[1].endswith("verbose"): + h_param[0] = "@" + df = pd.DataFrame(hyper_params, columns=["", "Parameter", "Value"]) + df.to_csv(outfile_params, sep="\t", index=False) print(repr(estimator)) # save model by pickle - with open(outfile, 'wb') as f: + with open(outfile, "wb") as f: pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL) -if __name__ == '__main__': - warnings.simplefilter('ignore') +if __name__ == "__main__": + warnings.simplefilter("ignore") aparser = argparse.ArgumentParser() aparser.add_argument("-i", "--inputs", dest="inputs", required=True) @@ -345,7 +383,7 @@ args = aparser.parse_args() input_json_path = args.inputs - with open(input_json_path, 'r') as param_handler: + with open(input_json_path, "r") as param_handler: inputs = json.load(param_handler) tool_id = args.tool_id @@ -355,18 +393,20 @@ infile_weights = args.infile_weights # for keras_model_config tool - if tool_id == 'keras_model_config': + if tool_id == "keras_model_config": config_keras_model(inputs, outfile) # for keras_model_builder tool else: batch_mode = False - if tool_id == 'keras_batch_models': + if tool_id == "keras_batch_models": batch_mode = True - build_keras_model(inputs=inputs, - model_json=model_json, - infile_weights=infile_weights, - batch_mode=batch_mode, - outfile=outfile, - outfile_params=outfile_params) + build_keras_model( + inputs=inputs, + model_json=model_json, + infile_weights=infile_weights, + batch_mode=batch_mode, + outfile=outfile, + outfile_params=outfile_params, + )