Mercurial > repos > bgruening > create_tool_recommendation_model
diff utils.py @ 2:76251d1ccdcc draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
author | bgruening |
---|---|
date | Fri, 11 Oct 2019 18:24:54 -0400 |
parents | 9bf25dbe00ad |
children | 5b3c08710e47 |
line wrap: on
line diff
--- a/utils.py Wed Sep 25 06:42:40 2019 -0400 +++ b/utils.py Fri Oct 11 18:24:54 2019 -0400 @@ -3,11 +3,6 @@ import json import h5py -from keras.models import model_from_json, Sequential -from keras.layers import Dense, GRU, Dropout -from keras.layers.embeddings import Embedding -from keras.layers.core import SpatialDropout1D -from keras.optimizers import RMSprop from keras import backend as K @@ -37,17 +32,6 @@ workflows_file.write(workflow_paths_unique) -def load_saved_model(model_config, model_weights): - """ - Load the saved trained model using the saved network and its weights - """ - # load the network - loaded_model = model_from_json(model_config) - # load the saved weights into the model - loaded_model.set_weights(model_weights) - return loaded_model - - def format_tool_id(tool_link): """ Extract tool id from tool link @@ -57,22 +41,6 @@ return tool_id -def get_HDF5(hf, d_key): - """ - Read h5 file to get train and test data - """ - return hf.get(d_key).value - - -def save_HDF5(hf_file, d_key, data, d_type=""): - """ - Save datasets as h5 file - """ - if (d_type == 'json'): - data = json.dumps(data) - hf_file.create_dataset(d_key, data=data) - - def set_trained_model(dump_file, model_values): """ Create an h5 file with the trained weights and associated dicts @@ -100,44 +68,6 @@ os.remove(file_path) -def extract_configuration(config_object): - config_loss = dict() - for index, item in enumerate(config_object): - config_loss[index] = list() - d_config = dict() - d_config['loss'] = item['result']['loss'] - d_config['params_config'] = item['misc']['vals'] - config_loss[index].append(d_config) - return config_loss - - -def get_best_parameters(mdl_dict): - """ - Get param values (defaults as well) - """ - lr = float(mdl_dict.get("learning_rate", "0.001")) - embedding_size = int(mdl_dict.get("embedding_size", "512")) - dropout = float(mdl_dict.get("dropout", "0.2")) - recurrent_dropout = float(mdl_dict.get("recurrent_dropout", "0.2")) - spatial_dropout = float(mdl_dict.get("spatial_dropout", "0.2")) - units = int(mdl_dict.get("units", "512")) - batch_size = int(mdl_dict.get("batch_size", "512")) - activation_recurrent = mdl_dict.get("activation_recurrent", "elu") - activation_output = mdl_dict.get("activation_output", "sigmoid") - - return { - "lr": lr, - "embedding_size": embedding_size, - "dropout": dropout, - "recurrent_dropout": recurrent_dropout, - "spatial_dropout": spatial_dropout, - "units": units, - "batch_size": batch_size, - "activation_recurrent": activation_recurrent, - "activation_output": activation_output, - } - - def weighted_loss(class_weights): """ Create a weighted loss function. Penalise the misclassification @@ -152,27 +82,6 @@ return weighted_binary_crossentropy -def set_recurrent_network(mdl_dict, reverse_dictionary, class_weights): - """ - Create a RNN network and set its parameters - """ - dimensions = len(reverse_dictionary) + 1 - model_params = get_best_parameters(mdl_dict) - - # define the architecture of the neural network - model = Sequential() - model.add(Embedding(dimensions, model_params["embedding_size"], mask_zero=True)) - model.add(SpatialDropout1D(model_params["spatial_dropout"])) - model.add(GRU(model_params["units"], dropout=model_params["spatial_dropout"], recurrent_dropout=model_params["recurrent_dropout"], activation=model_params["activation_recurrent"], return_sequences=True)) - model.add(Dropout(model_params["dropout"])) - model.add(GRU(model_params["units"], dropout=model_params["spatial_dropout"], recurrent_dropout=model_params["recurrent_dropout"], activation=model_params["activation_recurrent"], return_sequences=False)) - model.add(Dropout(model_params["dropout"])) - model.add(Dense(dimensions, activation=model_params["activation_output"])) - optimizer = RMSprop(lr=model_params["lr"]) - model.compile(loss=weighted_loss(class_weights), optimizer=optimizer) - return model, model_params - - def compute_precision(model, x, y, reverse_data_dictionary, next_compatible_tools, usage_scores, actual_classes_pos, topk): """ Compute absolute and compatible precision