Mercurial > repos > bgruening > sklearn_mlxtend_association_rules
diff keras_deep_learning.py @ 0:af2624d5ab32 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ea12f973df4b97a2691d9e4ce6bf6fae59d57717"
author | bgruening |
---|---|
date | Sat, 01 May 2021 01:24:32 +0000 |
parents | |
children | 9349ed2749c6 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/keras_deep_learning.py Sat May 01 01:24:32 2021 +0000 @@ -0,0 +1,412 @@ +import argparse +import json +import pickle +import warnings +from ast import literal_eval + +import keras +import pandas as pd +import six +from galaxy_ml.utils import get_search_params, SafeEval, try_get_attr +from keras.models import Model, Sequential + +safe_eval = SafeEval() + + +def _handle_shape(literal): + """ + Eval integer or list/tuple of integers from string + + Parameters: + ----------- + literal : str. + """ + literal = literal.strip() + if not literal: + return None + try: + return literal_eval(literal) + except NameError as e: + print(e) + return literal + + +def _handle_regularizer(literal): + """ + Construct regularizer from string literal + + Parameters + ---------- + literal : str. E.g. '(0.1, 0)' + """ + literal = literal.strip() + if not literal: + return None + + l1, l2 = literal_eval(literal) + + if not l1 and not l2: + return None + + if l1 is None: + l1 = 0.0 + if l2 is None: + l2 = 0.0 + + return keras.regularizers.l1_l2(l1=l1, l2=l2) + + +def _handle_constraint(config): + """ + Construct constraint from galaxy tool parameters. + Suppose correct dictionary format + + Parameters + ---------- + config : dict. E.g. + "bias_constraint": + {"constraint_options": + {"max_value":1.0, + "min_value":0.0, + "axis":"[0, 1, 2]" + }, + "constraint_type": + "MinMaxNorm" + } + """ + constraint_type = config["constraint_type"] + if constraint_type in ("None", ""): + return None + + klass = getattr(keras.constraints, constraint_type) + options = config.get("constraint_options", {}) + if "axis" in options: + options["axis"] = literal_eval(options["axis"]) + + return klass(**options) + + +def _handle_lambda(literal): + return None + + +def _handle_layer_parameters(params): + """ + Access to handle all kinds of parameters + """ + for key, value in six.iteritems(params): + if value in ("None", ""): + params[key] = None + continue + + if type(value) in [int, float, bool] or ( + type(value) is str and value.isalpha() + ): + continue + + if ( + key + in [ + "input_shape", + "noise_shape", + "shape", + "batch_shape", + "target_shape", + "dims", + "kernel_size", + "strides", + "dilation_rate", + "output_padding", + "cropping", + "size", + "padding", + "pool_size", + "axis", + "shared_axes", + ] + and isinstance(value, str) + ): + params[key] = _handle_shape(value) + + elif key.endswith("_regularizer") and isinstance(value, dict): + params[key] = _handle_regularizer(value) + + elif key.endswith("_constraint") and isinstance(value, dict): + params[key] = _handle_constraint(value) + + elif key == "function": # No support for lambda/function eval + params.pop(key) + + return params + + +def get_sequential_model(config): + """ + Construct keras Sequential model from Galaxy tool parameters + + Parameters: + ----------- + config : dictionary, galaxy tool parameters loaded by JSON + """ + model = Sequential() + input_shape = _handle_shape(config["input_shape"]) + layers = config["layers"] + for layer in layers: + options = layer["layer_selection"] + layer_type = options.pop("layer_type") + klass = getattr(keras.layers, layer_type) + kwargs = options.pop("kwargs", "") + + # parameters needs special care + options = _handle_layer_parameters(options) + + if kwargs: + kwargs = safe_eval("dict(" + kwargs + ")") + options.update(kwargs) + + # add input_shape to the first layer only + if not getattr(model, "_layers") and input_shape is not None: + options["input_shape"] = input_shape + + model.add(klass(**options)) + + return model + + +def get_functional_model(config): + """ + Construct keras functional model from Galaxy tool parameters + + Parameters + ----------- + config : dictionary, galaxy tool parameters loaded by JSON + """ + layers = config["layers"] + all_layers = [] + for layer in layers: + options = layer["layer_selection"] + layer_type = options.pop("layer_type") + klass = getattr(keras.layers, layer_type) + inbound_nodes = options.pop("inbound_nodes", None) + kwargs = options.pop("kwargs", "") + + # parameters needs special care + options = _handle_layer_parameters(options) + + if kwargs: + kwargs = safe_eval("dict(" + kwargs + ")") + options.update(kwargs) + + # merge layers + if "merging_layers" in options: + idxs = literal_eval(options.pop("merging_layers")) + merging_layers = [all_layers[i - 1] for i in idxs] + new_layer = klass(**options)(merging_layers) + # non-input layers + elif inbound_nodes is not None: + new_layer = klass(**options)(all_layers[inbound_nodes - 1]) + # input layers + else: + new_layer = klass(**options) + + all_layers.append(new_layer) + + input_indexes = _handle_shape(config["input_layers"]) + input_layers = [all_layers[i - 1] for i in input_indexes] + + output_indexes = _handle_shape(config["output_layers"]) + output_layers = [all_layers[i - 1] for i in output_indexes] + + return Model(inputs=input_layers, outputs=output_layers) + + +def get_batch_generator(config): + """ + Construct keras online data generator from Galaxy tool parameters + + Parameters + ----------- + config : dictionary, galaxy tool parameters loaded by JSON + """ + generator_type = config.pop("generator_type") + if generator_type == "none": + return None + + klass = try_get_attr("galaxy_ml.preprocessors", generator_type) + + if generator_type == "GenomicIntervalBatchGenerator": + config["ref_genome_path"] = "to_be_determined" + config["intervals_path"] = "to_be_determined" + config["target_path"] = "to_be_determined" + config["features"] = "to_be_determined" + else: + config["fasta_path"] = "to_be_determined" + + return klass(**config) + + +def config_keras_model(inputs, outfile): + """ + config keras model layers and output JSON + + Parameters + ---------- + inputs : dict + loaded galaxy tool parameters from `keras_model_config` + tool. + outfile : str + Path to galaxy dataset containing keras model JSON. + """ + model_type = inputs["model_selection"]["model_type"] + layers_config = inputs["model_selection"] + + if model_type == "sequential": + model = get_sequential_model(layers_config) + else: + model = get_functional_model(layers_config) + + json_string = model.to_json() + + with open(outfile, "w") as f: + json.dump(json.loads(json_string), f, indent=2) + + +def build_keras_model( + inputs, + outfile, + model_json, + infile_weights=None, + batch_mode=False, + outfile_params=None, +): + """ + for `keras_model_builder` tool + + Parameters + ---------- + inputs : dict + loaded galaxy tool parameters from `keras_model_builder` tool. + outfile : str + Path to galaxy dataset containing the keras_galaxy model output. + model_json : str + Path to dataset containing keras model JSON. + infile_weights : str or None + If string, path to dataset containing model weights. + batch_mode : bool, default=False + Whether to build online batch classifier. + outfile_params : str, default=None + File path to search parameters output. + """ + with open(model_json, "r") as f: + json_model = json.load(f) + + config = json_model["config"] + + options = {} + + if json_model["class_name"] == "Sequential": + options["model_type"] = "sequential" + klass = Sequential + elif json_model["class_name"] == "Model": + options["model_type"] = "functional" + klass = Model + else: + raise ValueError("Unknow Keras model class: %s" % json_model["class_name"]) + + # load prefitted model + if inputs["mode_selection"]["mode_type"] == "prefitted": + estimator = klass.from_config(config) + estimator.load_weights(infile_weights) + # build train model + else: + cls_name = inputs["mode_selection"]["learning_type"] + klass = try_get_attr("galaxy_ml.keras_galaxy_models", cls_name) + + options["loss"] = inputs["mode_selection"]["compile_params"]["loss"] + options["optimizer"] = ( + inputs["mode_selection"]["compile_params"]["optimizer_selection"][ + "optimizer_type" + ] + ).lower() + + options.update( + ( + inputs["mode_selection"]["compile_params"]["optimizer_selection"][ + "optimizer_options" + ] + ) + ) + + train_metrics = inputs["mode_selection"]["compile_params"]["metrics"] + if train_metrics[-1] == "none": + train_metrics = train_metrics[:-1] + options["metrics"] = train_metrics + + options.update(inputs["mode_selection"]["fit_params"]) + options["seed"] = inputs["mode_selection"]["random_seed"] + + if batch_mode: + generator = get_batch_generator( + inputs["mode_selection"]["generator_selection"] + ) + options["data_batch_generator"] = generator + options["prediction_steps"] = inputs["mode_selection"]["prediction_steps"] + options["class_positive_factor"] = inputs["mode_selection"][ + "class_positive_factor" + ] + estimator = klass(config, **options) + if outfile_params: + hyper_params = get_search_params(estimator) + # TODO: remove this after making `verbose` tunable + for h_param in hyper_params: + if h_param[1].endswith("verbose"): + h_param[0] = "@" + df = pd.DataFrame(hyper_params, columns=["", "Parameter", "Value"]) + df.to_csv(outfile_params, sep="\t", index=False) + + print(repr(estimator)) + # save model by pickle + with open(outfile, "wb") as f: + pickle.dump(estimator, f, pickle.HIGHEST_PROTOCOL) + + +if __name__ == "__main__": + warnings.simplefilter("ignore") + + aparser = argparse.ArgumentParser() + aparser.add_argument("-i", "--inputs", dest="inputs", required=True) + aparser.add_argument("-m", "--model_json", dest="model_json") + aparser.add_argument("-t", "--tool_id", dest="tool_id") + aparser.add_argument("-w", "--infile_weights", dest="infile_weights") + aparser.add_argument("-o", "--outfile", dest="outfile") + aparser.add_argument("-p", "--outfile_params", dest="outfile_params") + args = aparser.parse_args() + + input_json_path = args.inputs + with open(input_json_path, "r") as param_handler: + inputs = json.load(param_handler) + + tool_id = args.tool_id + outfile = args.outfile + outfile_params = args.outfile_params + model_json = args.model_json + infile_weights = args.infile_weights + + # for keras_model_config tool + if tool_id == "keras_model_config": + config_keras_model(inputs, outfile) + + # for keras_model_builder tool + else: + batch_mode = False + if tool_id == "keras_batch_models": + batch_mode = True + + build_keras_model( + inputs=inputs, + model_json=model_json, + infile_weights=infile_weights, + batch_mode=batch_mode, + outfile=outfile, + outfile_params=outfile_params, + )