# HG changeset patch # User bgruening # Date 1568390924 14400 # Node ID cc49634df38fe5097ebb8928061babe897264960 # Parent 68aaa903052a85188c298186489d94f54ad534fb "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16" diff -r 68aaa903052a -r cc49634df38f keras_deep_learning.py --- a/keras_deep_learning.py Fri Aug 09 07:09:06 2019 -0400 +++ b/keras_deep_learning.py Fri Sep 13 12:08:44 2019 -0400 @@ -8,7 +8,10 @@ from ast import literal_eval from keras.models import Sequential, Model -from galaxy_ml.utils import try_get_attr, get_search_params +from galaxy_ml.utils import try_get_attr, get_search_params, SafeEval + + +safe_eval = SafeEval() def _handle_shape(literal): @@ -100,13 +103,14 @@ if key in ['input_shape', 'noise_shape', 'shape', 'batch_shape', 'target_shape', 'dims', 'kernel_size', 'strides', 'dilation_rate', 'output_padding', 'cropping', 'size', - 'padding', 'pool_size', 'axis', 'shared_axes']: + 'padding', 'pool_size', 'axis', 'shared_axes'] \ + and isinstance(value, str): params[key] = _handle_shape(value) - elif key.endswith('_regularizer'): + elif key.endswith('_regularizer') and isinstance(value, dict): params[key] = _handle_regularizer(value) - elif key.endswith('_constraint'): + elif key.endswith('_constraint') and isinstance(value, dict): params[key] = _handle_constraint(value) elif key == 'function': # No support for lambda/function eval @@ -129,12 +133,15 @@ options = layer['layer_selection'] layer_type = options.pop('layer_type') klass = getattr(keras.layers, layer_type) - other_options = options.pop('layer_options', {}) - options.update(other_options) + kwargs = options.pop('kwargs', '') # parameters needs special care options = _handle_layer_parameters(options) + if kwargs: + kwargs = safe_eval('dict(' + kwargs + ')') + options.update(kwargs) + # add input_shape to the first layer only if not getattr(model, '_layers') and input_shape is not None: options['input_shape'] = input_shape @@ -158,11 +165,15 @@ layer_type = options.pop('layer_type') klass = getattr(keras.layers, layer_type) inbound_nodes = options.pop('inbound_nodes', None) - other_options = options.pop('layer_options', {}) - options.update(other_options) + kwargs = options.pop('kwargs', '') # parameters needs special care options = _handle_layer_parameters(options) + + if kwargs: + kwargs = safe_eval('dict(' + kwargs + ')') + options.update(kwargs) + # merge layers if 'merging_layers' in options: idxs = literal_eval(options.pop('merging_layers')) diff -r 68aaa903052a -r cc49634df38f keras_macros.xml --- a/keras_macros.xml Fri Aug 09 07:09:06 2019 -0400 +++ b/keras_macros.xml Fri Sep 13 12:08:44 2019 -0400 @@ -1,5 +1,5 @@ - 0.4.0 + 0.4.2 @@ -24,34 +24,34 @@ - + - + - + - + - - + + - + - + @@ -109,133 +109,120 @@ - - - - - - - + + + + + + + - - - - - + + + + + - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + - - - - - - - - - - - - + + + + + + + + + + + + - - + + - - - - - - - - - - - - + + + + + + + + + + - + - - - - - - + + + + + + - + - - - + + + - - - - - - - - + + + + + + + + -
- - - - - - - - - -
+ +
- - + @@ -301,19 +288,13 @@ - - - - + - - - - + @@ -322,260 +303,81 @@ - -
- - - - - - - - - - - - - \ - - - - - - -
+ +
- -
- - - - - - - - - - - - - \ - - - - - - -
+ +
- -
- - - - - - - - - - - - - - - - - - - - - - - -
+ +
- -
- - - - - - - - - - - - - - - - - - - - - - - -
+ +
- + - -
- - - - - - - - - - - - - - - - - - - - - -
+ +
- -
- - - - - - - - - - - - - - - - - - - - -
+ +
- -
- - - - - - - - - - - - - - - - - - - -
+ +
- -
- - - - - - - - - - - - - - - - - - - - -
+ +
- + - - - - + - - - - + @@ -586,23 +388,13 @@ - - - - - - - - + - - - - + @@ -613,19 +405,13 @@ - - - - + - - - - + @@ -634,30 +420,14 @@ - - - - - - - - - + - - - - - - - - - + @@ -665,45 +435,21 @@ - - - - - - - - - +
- - - - - - - - - + - - - - - - - - - + @@ -711,15 +457,7 @@ - - - - - - - - - +
@@ -776,92 +514,90 @@ - -
- - - - - - - - - - \ - - - - - - -
+ +
- -
- - - - - - - - - - - - \ - - - - - - -
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + - + - + -
- -
+
+ + + + + + + + + + + + + + + + + + + -
- - - - - - -
+
@@ -901,7 +637,7 @@
- + @@ -939,13 +675,34 @@ + + + + + + + + + + + + + + + + + + + + + diff -r 68aaa903052a -r cc49634df38f main_macros.xml --- a/main_macros.xml Fri Aug 09 07:09:06 2019 -0400 +++ b/main_macros.xml Fri Sep 13 12:08:44 2019 -0400 @@ -1,12 +1,12 @@ - 1.0.7.10 + 1.0.7.12 0.2.0 python - Galaxy-ML + Galaxy-ML @@ -1379,7 +1379,7 @@ - + diff -r 68aaa903052a -r cc49634df38f ml_visualization_ex.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ml_visualization_ex.py Fri Sep 13 12:08:44 2019 -0400 @@ -0,0 +1,305 @@ +import argparse +import json +import numpy as np +import pandas as pd +import plotly +import plotly.graph_objs as go +import warnings + +from keras.models import model_from_json +from keras.utils import plot_model +from sklearn.feature_selection.base import SelectorMixin +from sklearn.metrics import precision_recall_curve, average_precision_score +from sklearn.metrics import roc_curve, auc +from sklearn.pipeline import Pipeline +from galaxy_ml.utils import load_model, read_columns, SafeEval + + +safe_eval = SafeEval() + + +def main(inputs, infile_estimator=None, infile1=None, + infile2=None, outfile_result=None, + outfile_object=None, groups=None, + ref_seq=None, intervals=None, + targets=None, fasta_path=None, + model_config=None): + """ + Parameter + --------- + inputs : str + File path to galaxy tool parameter + + infile_estimator : str, default is None + File path to estimator + + infile1 : str, default is None + File path to dataset containing features or true labels. + + infile2 : str, default is None + File path to dataset containing target values or predicted + probabilities. + + outfile_result : str, default is None + File path to save the results, either cv_results or test result + + outfile_object : str, default is None + File path to save searchCV object + + groups : str, default is None + File path to dataset containing groups labels + + ref_seq : str, default is None + File path to dataset containing genome sequence file + + intervals : str, default is None + File path to dataset containing interval file + + targets : str, default is None + File path to dataset compressed target bed file + + fasta_path : str, default is None + File path to dataset containing fasta file + + model_config : str, default is None + File path to dataset containing JSON config for neural networks + """ + warnings.simplefilter('ignore') + + with open(inputs, 'r') as param_handler: + params = json.load(param_handler) + + title = params['plotting_selection']['title'].strip() + plot_type = params['plotting_selection']['plot_type'] + if plot_type == 'feature_importances': + with open(infile_estimator, 'rb') as estimator_handler: + estimator = load_model(estimator_handler) + + column_option = (params['plotting_selection'] + ['column_selector_options'] + ['selected_column_selector_option']) + if column_option in ['by_index_number', 'all_but_by_index_number', + 'by_header_name', 'all_but_by_header_name']: + c = (params['plotting_selection'] + ['column_selector_options']['col1']) + else: + c = None + + _, input_df = read_columns(infile1, c=c, + c_option=column_option, + return_df=True, + sep='\t', header='infer', + parse_dates=True) + + feature_names = input_df.columns.values + + if isinstance(estimator, Pipeline): + for st in estimator.steps[:-1]: + if isinstance(st[-1], SelectorMixin): + mask = st[-1].get_support() + feature_names = feature_names[mask] + estimator = estimator.steps[-1][-1] + + if hasattr(estimator, 'coef_'): + coefs = estimator.coef_ + else: + coefs = getattr(estimator, 'feature_importances_', None) + if coefs is None: + raise RuntimeError('The classifier does not expose ' + '"coef_" or "feature_importances_" ' + 'attributes') + + threshold = params['plotting_selection']['threshold'] + if threshold is not None: + mask = (coefs > threshold) | (coefs < -threshold) + coefs = coefs[mask] + feature_names = feature_names[mask] + + # sort + indices = np.argsort(coefs)[::-1] + + trace = go.Bar(x=feature_names[indices], + y=coefs[indices]) + layout = go.Layout(title=title or "Feature Importances") + fig = go.Figure(data=[trace], layout=layout) + + elif plot_type == 'pr_curve': + df1 = pd.read_csv(infile1, sep='\t', header=None) + df2 = pd.read_csv(infile2, sep='\t', header=None) + + precision = {} + recall = {} + ap = {} + + pos_label = params['plotting_selection']['pos_label'].strip() \ + or None + for col in df1.columns: + y_true = df1[col].values + y_score = df2[col].values + + precision[col], recall[col], _ = precision_recall_curve( + y_true, y_score, pos_label=pos_label) + ap[col] = average_precision_score( + y_true, y_score, pos_label=pos_label or 1) + + if len(df1.columns) > 1: + precision["micro"], recall["micro"], _ = precision_recall_curve( + df1.values.ravel(), df2.values.ravel(), pos_label=pos_label) + ap['micro'] = average_precision_score( + df1.values, df2.values, average='micro', pos_label=pos_label or 1) + + data = [] + for key in precision.keys(): + trace = go.Scatter( + x=recall[key], + y=precision[key], + mode='lines', + name='%s (area = %.2f)' % (key, ap[key]) if key == 'micro' + else 'column %s (area = %.2f)' % (key, ap[key]) + ) + data.append(trace) + + layout = go.Layout( + title=title or "Precision-Recall curve", + xaxis=dict(title='Recall'), + yaxis=dict(title='Precision') + ) + + fig = go.Figure(data=data, layout=layout) + + elif plot_type == 'roc_curve': + df1 = pd.read_csv(infile1, sep='\t', header=None) + df2 = pd.read_csv(infile2, sep='\t', header=None) + + fpr = {} + tpr = {} + roc_auc = {} + + pos_label = params['plotting_selection']['pos_label'].strip() \ + or None + for col in df1.columns: + y_true = df1[col].values + y_score = df2[col].values + + fpr[col], tpr[col], _ = roc_curve( + y_true, y_score, pos_label=pos_label) + roc_auc[col] = auc(fpr[col], tpr[col]) + + if len(df1.columns) > 1: + fpr["micro"], tpr["micro"], _ = roc_curve( + df1.values.ravel(), df2.values.ravel(), pos_label=pos_label) + roc_auc['micro'] = auc(fpr["micro"], tpr["micro"]) + + data = [] + for key in fpr.keys(): + trace = go.Scatter( + x=fpr[key], + y=tpr[key], + mode='lines', + name='%s (area = %.2f)' % (key, roc_auc[key]) if key == 'micro' + else 'column %s (area = %.2f)' % (key, roc_auc[key]) + ) + data.append(trace) + + trace = go.Scatter(x=[0, 1], y=[0, 1], + mode='lines', + line=dict(color='black', dash='dash'), + showlegend=False) + data.append(trace) + + layout = go.Layout( + title=title or "Receiver operating characteristic curve", + xaxis=dict(title='False Positive Rate'), + yaxis=dict(title='True Positive Rate') + ) + + fig = go.Figure(data=data, layout=layout) + + elif plot_type == 'rfecv_gridscores': + input_df = pd.read_csv(infile1, sep='\t', header='infer') + scores = input_df.iloc[:, 0] + steps = params['plotting_selection']['steps'].strip() + steps = safe_eval(steps) + + data = go.Scatter( + x=list(range(len(scores))), + y=scores, + text=[str(_) for _ in steps] if steps else None, + mode='lines' + ) + layout = go.Layout( + xaxis=dict(title="Number of features selected"), + yaxis=dict(title="Cross validation score"), + title=title or None + ) + + fig = go.Figure(data=[data], layout=layout) + + elif plot_type == 'learning_curve': + input_df = pd.read_csv(infile1, sep='\t', header='infer') + plot_std_err = params['plotting_selection']['plot_std_err'] + data1 = go.Scatter( + x=input_df['train_sizes_abs'], + y=input_df['mean_train_scores'], + error_y=dict( + array=input_df['std_train_scores'] + ) if plot_std_err else None, + mode='lines', + name="Train Scores", + ) + data2 = go.Scatter( + x=input_df['train_sizes_abs'], + y=input_df['mean_test_scores'], + error_y=dict( + array=input_df['std_test_scores'] + ) if plot_std_err else None, + mode='lines', + name="Test Scores", + ) + layout = dict( + xaxis=dict( + title='No. of samples' + ), + yaxis=dict( + title='Performance Score' + ), + title=title or 'Learning Curve' + ) + fig = go.Figure(data=[data1, data2], layout=layout) + + elif plot_type == 'keras_plot_model': + with open(model_config, 'r') as f: + model_str = f.read() + model = model_from_json(model_str) + plot_model(model, to_file="output.png") + __import__('os').rename('output.png', 'output') + + return 0 + + plotly.offline.plot(fig, filename="output.html", + auto_open=False) + # to be discovered by `from_work_dir` + __import__('os').rename('output.html', 'output') + + +if __name__ == '__main__': + aparser = argparse.ArgumentParser() + aparser.add_argument("-i", "--inputs", dest="inputs", required=True) + aparser.add_argument("-e", "--estimator", dest="infile_estimator") + aparser.add_argument("-X", "--infile1", dest="infile1") + aparser.add_argument("-y", "--infile2", dest="infile2") + aparser.add_argument("-O", "--outfile_result", dest="outfile_result") + aparser.add_argument("-o", "--outfile_object", dest="outfile_object") + aparser.add_argument("-g", "--groups", dest="groups") + aparser.add_argument("-r", "--ref_seq", dest="ref_seq") + aparser.add_argument("-b", "--intervals", dest="intervals") + aparser.add_argument("-t", "--targets", dest="targets") + aparser.add_argument("-f", "--fasta_path", dest="fasta_path") + aparser.add_argument("-c", "--model_config", dest="model_config") + args = aparser.parse_args() + + main(args.inputs, args.infile_estimator, args.infile1, args.infile2, + args.outfile_result, outfile_object=args.outfile_object, + groups=args.groups, ref_seq=args.ref_seq, intervals=args.intervals, + targets=args.targets, fasta_path=args.fasta_path, + model_config=args.model_config) diff -r 68aaa903052a -r cc49634df38f model_prediction.py --- a/model_prediction.py Fri Aug 09 07:09:06 2019 -0400 +++ b/model_prediction.py Fri Sep 13 12:08:44 2019 -0400 @@ -2,11 +2,13 @@ import json import numpy as np import pandas as pd +import tabix import warnings from scipy.io import mmread from sklearn.pipeline import Pipeline +from galaxy_ml.externals.selene_sdk.sequences import Genome from galaxy_ml.utils import (load_model, read_columns, get_module, try_get_attr) @@ -138,53 +140,108 @@ pred_data_generator.fit() - preds = estimator.model_.predict_generator( - pred_data_generator.flow(batch_size=32), - workers=N_JOBS, - use_multiprocessing=True) + variants = pred_data_generator.variants + # TODO : remove the following block after galaxy-ml v0.7.13 + blacklist_tabix = getattr(pred_data_generator.reference_genome_, + '_blacklist_tabix', None) + clean_variants = [] + if blacklist_tabix: + start_radius = pred_data_generator.start_radius_ + end_radius = pred_data_generator.end_radius_ + + for chrom, pos, name, ref, alt, strand in variants: + center = pos + len(ref) // 2 + start = center - start_radius + end = center + end_radius - if preds.min() < 0. or preds.max() > 1.: - warnings.warn('Network returning invalid probability values. ' - 'The last layer might not normalize predictions ' - 'into probabilities ' - '(like softmax or sigmoid would).') + if isinstance(pred_data_generator.reference_genome_, Genome): + if "chr" not in chrom: + chrom = "chr" + chrom + if "MT" in chrom: + chrom = chrom[:-1] + try: + rows = blacklist_tabix.query(chrom, start, end) + found = 0 + for row in rows: + found = 1 + break + if found: + continue + except tabix.TabixError: + pass - if params['method'] == 'predict_proba' and preds.shape[1] == 1: - # first column is probability of class 0 and second is of class 1 - preds = np.hstack([1 - preds, preds]) + clean_variants.append((chrom, pos, name, ref, alt, strand)) + else: + clean_variants = variants + + setattr(pred_data_generator, 'variants', clean_variants) + + variants = np.array(clean_variants) + # predict 1600 sample at once then write to file + gen_flow = pred_data_generator.flow(batch_size=1600) + + file_writer = open(outfile_predict, 'w') + header_row = '\t'.join(['chrom', 'pos', 'name', 'ref', + 'alt', 'strand']) + file_writer.write(header_row) + header_done = False - elif params['method'] == 'predict': - if preds.shape[-1] > 1: - # if the last activation is `softmax`, the sum of all - # probibilities will 1, the classification is considered as - # multi-class problem, otherwise, we take it as multi-label. - act = getattr(estimator.model_.layers[-1], 'activation', None) - if act and act.__name__ == 'softmax': - classes = preds.argmax(axis=-1) + steps_done = 0 + + # TODO: multiple threading + try: + while steps_done < len(gen_flow): + index_array = next(gen_flow.index_generator) + batch_X = gen_flow._get_batches_of_transformed_samples( + index_array) + + if params['method'] == 'predict': + batch_preds = estimator.predict( + batch_X, + # The presence of `pred_data_generator` below is to + # override model carrying data_generator if there + # is any. + data_generator=pred_data_generator) else: - preds = (preds > 0.5).astype('int32') - else: - classes = (preds > 0.5).astype('int32') + batch_preds = estimator.predict_proba( + batch_X, + # The presence of `pred_data_generator` below is to + # override model carrying data_generator if there + # is any. + data_generator=pred_data_generator) + + if batch_preds.ndim == 1: + batch_preds = batch_preds[:, np.newaxis] + + batch_meta = variants[index_array] + batch_out = np.column_stack([batch_meta, batch_preds]) - preds = estimator.classes_[classes] + if not header_done: + heads = np.arange(batch_preds.shape[-1]).astype(str) + heads_str = '\t'.join(heads) + file_writer.write("\t%s\n" % heads_str) + header_done = True + + for row in batch_out: + row_str = '\t'.join(row) + file_writer.write("%s\n" % row_str) + + steps_done += 1 + + finally: + file_writer.close() + # TODO: make api `pred_data_generator.close()` + pred_data_generator.close() + return 0 # end input # output - if input_type == 'variant_effect': # TODO: save in batchs - rval = pd.DataFrame(preds) - meta = pd.DataFrame( - pred_data_generator.variants, - columns=['chrom', 'pos', 'name', 'ref', 'alt', 'strand']) - - rval = pd.concat([meta, rval], axis=1) - - elif len(preds.shape) == 1: + if len(preds.shape) == 1: rval = pd.DataFrame(preds, columns=['Predicted']) else: rval = pd.DataFrame(preds) - rval.to_csv(outfile_predict, sep='\t', - header=True, index=False) + rval.to_csv(outfile_predict, sep='\t', header=True, index=False) if __name__ == '__main__': diff -r 68aaa903052a -r cc49634df38f search_model_validation.py --- a/search_model_validation.py Fri Aug 09 07:09:06 2019 -0400 +++ b/search_model_validation.py Fri Sep 13 12:08:44 2019 -0400 @@ -213,6 +213,16 @@ with open(inputs, 'r') as param_handler: params = json.load(param_handler) + # conflict param checker + if params['outer_split']['split_mode'] == 'nested_cv' \ + and params['save'] != 'nope': + raise ValueError("Save best estimator is not possible for nested CV!") + + if not (params['search_schemes']['options']['refit']) \ + and params['save'] != 'nope': + raise ValueError("Save best estimator is not possible when refit " + "is False!") + params_builder = params['search_schemes']['search_params_builder'] with open(infile_estimator, 'rb') as estimator_handler: @@ -542,7 +552,6 @@ del main_est.validation_data if getattr(main_est, 'data_generator_', None): del main_est.data_generator_ - del main_est.data_batch_generator with open(outfile_object, 'wb') as output_handler: pickle.dump(best_estimator_, output_handler, diff -r 68aaa903052a -r cc49634df38f test-data/grid_scores_.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/grid_scores_.tabular Fri Sep 13 12:08:44 2019 -0400 @@ -0,0 +1,18 @@ +grid_scores_ +0.7634899597102532 +0.7953981831108754 +0.7937021172447345 +0.7951323776809974 +0.793206654688313 +0.8046265123256906 +0.7972524937034748 +0.8106427221191455 +0.8072746749161711 +0.8146665413082648 +0.8155998800333571 +0.8056801877422021 +0.8123573954396127 +0.8155472512482351 +0.8164562575257928 +0.8151250518677203 +0.8107710182153142 diff -r 68aaa903052a -r cc49634df38f test-data/keras02.json --- a/test-data/keras02.json Fri Aug 09 07:09:06 2019 -0400 +++ b/test-data/keras02.json Fri Sep 13 12:08:44 2019 -0400 @@ -1,1 +1,1 @@ -{"class_name": "Model", "config": {"name": "model_1", "layers": [{"name": "main_input", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 100], "dtype": "int32", "sparse": false, "name": "main_input"}, "inbound_nodes": []}, {"name": "embedding_1", "class_name": "Embedding", "config": {"name": "embedding_1", "trainable": true, "batch_input_shape": [null, 100], "dtype": "float32", "input_dim": 10000, "output_dim": 512, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -0.05, "maxval": 0.05, "seed": null}}, "embeddings_regularizer": null, "activity_regularizer": null, "embeddings_constraint": null, "mask_zero": false, "input_length": 100}, "inbound_nodes": [[["main_input", 0, 0, {}]]]}, {"name": "lstm_1", "class_name": "LSTM", "config": {"name": "lstm_1", "trainable": true, "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "units": 32, "activation": "linear", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 1}, "inbound_nodes": [[["embedding_1", 0, 0, {}]]]}, {"name": "dense_1", "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["lstm_1", 0, 0, {}]]]}, {"name": "aux_input", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 5], "dtype": "float32", "sparse": false, "name": "aux_input"}, "inbound_nodes": []}, {"name": "concatenate_1", "class_name": "Concatenate", "config": {"name": "concatenate_1", "trainable": true, "axis": -1}, "inbound_nodes": [[["dense_1", 0, 0, {}], ["aux_input", 0, 0, {}]]]}, {"name": "dense_2", "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["concatenate_1", 0, 0, {}]]]}, {"name": "dense_3", "class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_2", 0, 0, {}]]]}, {"name": "dense_4", "class_name": "Dense", "config": {"name": "dense_4", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_3", 0, 0, {}]]]}, {"name": "dense_5", "class_name": "Dense", "config": {"name": "dense_5", "trainable": true, "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_4", 0, 0, {}]]]}], "input_layers": [["main_input", 0, 0], ["aux_input", 0, 0]], "output_layers": [["dense_1", 0, 0], ["dense_5", 0, 0]]}, "keras_version": "2.2.4", "backend": "tensorflow"} \ No newline at end of file +{"class_name": "Model", "config": {"name": "model_1", "layers": [{"name": "main_input", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 100], "dtype": "int32", "sparse": false, "name": "main_input"}, "inbound_nodes": []}, {"name": "embedding_1", "class_name": "Embedding", "config": {"name": "embedding_1", "trainable": true, "batch_input_shape": [null, 100], "dtype": "float32", "input_dim": 10000, "output_dim": 512, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -0.05, "maxval": 0.05, "seed": null}}, "embeddings_regularizer": null, "activity_regularizer": null, "embeddings_constraint": null, "mask_zero": false, "input_length": 100}, "inbound_nodes": [[["main_input", 0, 0, {}]]]}, {"name": "lstm_1", "class_name": "LSTM", "config": {"name": "lstm_1", "trainable": true, "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "units": 32, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 1}, "inbound_nodes": [[["embedding_1", 0, 0, {}]]]}, {"name": "dense_1", "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["lstm_1", 0, 0, {}]]]}, {"name": "aux_input", "class_name": "InputLayer", "config": {"batch_input_shape": [null, 5], "dtype": "float32", "sparse": false, "name": "aux_input"}, "inbound_nodes": []}, {"name": "concatenate_1", "class_name": "Concatenate", "config": {"name": "concatenate_1", "trainable": true, "axis": -1}, "inbound_nodes": [[["dense_1", 0, 0, {}], ["aux_input", 0, 0, {}]]]}, {"name": "dense_2", "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["concatenate_1", 0, 0, {}]]]}, {"name": "dense_3", "class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_2", 0, 0, {}]]]}, {"name": "dense_4", "class_name": "Dense", "config": {"name": "dense_4", "trainable": true, "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_3", 0, 0, {}]]]}, {"name": "dense_5", "class_name": "Dense", "config": {"name": "dense_5", "trainable": true, "units": 1, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_4", 0, 0, {}]]]}], "input_layers": [["main_input", 0, 0], ["aux_input", 0, 0]], "output_layers": [["dense_1", 0, 0], ["dense_5", 0, 0]]}, "keras_version": "2.2.4", "backend": "tensorflow"} \ No newline at end of file diff -r 68aaa903052a -r cc49634df38f test-data/keras_batch_model01 Binary file test-data/keras_batch_model01 has changed diff -r 68aaa903052a -r cc49634df38f test-data/keras_batch_model02 Binary file test-data/keras_batch_model02 has changed diff -r 68aaa903052a -r cc49634df38f test-data/keras_batch_model03 Binary file test-data/keras_batch_model03 has changed diff -r 68aaa903052a -r cc49634df38f test-data/ml_vis01.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ml_vis01.html Fri Sep 13 12:08:44 2019 -0400 @@ -0,0 +1,14 @@ +
\ No newline at end of file diff -r 68aaa903052a -r cc49634df38f test-data/ml_vis02.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ml_vis02.html Fri Sep 13 12:08:44 2019 -0400 @@ -0,0 +1,14 @@ +
\ No newline at end of file diff -r 68aaa903052a -r cc49634df38f test-data/ml_vis03.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ml_vis03.html Fri Sep 13 12:08:44 2019 -0400 @@ -0,0 +1,14 @@ +
\ No newline at end of file diff -r 68aaa903052a -r cc49634df38f test-data/ml_vis04.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ml_vis04.html Fri Sep 13 12:08:44 2019 -0400 @@ -0,0 +1,14 @@ +
\ No newline at end of file diff -r 68aaa903052a -r cc49634df38f test-data/ml_vis05.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ml_vis05.html Fri Sep 13 12:08:44 2019 -0400 @@ -0,0 +1,14 @@ +
\ No newline at end of file diff -r 68aaa903052a -r cc49634df38f test-data/ml_vis05.png Binary file test-data/ml_vis05.png has changed diff -r 68aaa903052a -r cc49634df38f test-data/pipeline14 Binary file test-data/pipeline14 has changed diff -r 68aaa903052a -r cc49634df38f test-data/pipeline16 Binary file test-data/pipeline16 has changed diff -r 68aaa903052a -r cc49634df38f test-data/y_score.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/y_score.tabular Fri Sep 13 12:08:44 2019 -0400 @@ -0,0 +1,75 @@ +0.04521016253284027 +-0.0017878318955413253 +-0.3380009790698638 +-0.15416229901482092 +-0.008989122568787922 +0.3775746361984437 +-0.20342288788672414 +0.21787658306027935 +-0.5322523189136876 +-0.6361907868807346 +-0.036875765955103335 +-0.24857077769453662 +-0.5305978020035378 +-0.5288479779433272 +-0.22579627342382325 +0.4905346629557697 +-0.12238193946346121 +-0.42773421293023084 +0.16878080982659216 +0.051637548704625946 +0.023623352380110763 +-0.3553978552068183 +-0.4597636722184091 +-0.36924223816393 +-0.539585171546133 +-0.4138055622986405 +-0.25401950905817183 +0.35124248378117207 +-0.5767911246317095 +-0.4452974937020068 +0.13456824841567622 +-0.08366761511503285 +-0.5855411774730717 +0.4493951821813167 +-0.0008118901312900162 +-0.375188782981553 +-0.052180286682808386 +-0.3624923116131733 +-0.3212899940903371 +-0.6326134385656439 +-0.5951558341213625 +-0.026698968757988106 +-0.6389295278289815 +-0.4665622957151918 +0.24683878631472084 +0.06670297201702563 +-0.09995075976356604 +-0.0026791784207790825 +-0.26843502542172126 +-0.23167967546053814 +-0.5500853075669638 +-0.07278578744420061 +-0.1908269856404199 +-0.10431209677312014 +-0.40541232698507823 +-1.3031302463301446 +-0.10509162333664135 +-0.06155868232417461 +-0.4347097510343062 +-0.8391150198454305 +-0.5372307413404114 +-0.46030478301666744 +-0.11618205513493052 +-0.021278188504645024 +-0.16029035414173087 +-0.35975375227600914 +-0.4814892536194141 +-0.1385760560857231 +0.3409736022465082 +-0.5355178831501075 +0.22534151535735567 +0.07294052191693523 +-0.3386178239054628 +0.15540977852505278 +0.07383896651967975 diff -r 68aaa903052a -r cc49634df38f test-data/y_true.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/y_true.tabular Fri Sep 13 12:08:44 2019 -0400 @@ -0,0 +1,75 @@ +0 +1 +0 +0 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +1 +0 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +0 +1 +1 +0 +1 +0 +0 +1 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +0 +1 +0 +0 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +0 +0 +1 +1 +0 +1 +0 +0 +0 +1 diff -r 68aaa903052a -r cc49634df38f train_test_eval.py --- a/train_test_eval.py Fri Aug 09 07:09:06 2019 -0400 +++ b/train_test_eval.py Fri Sep 13 12:08:44 2019 -0400 @@ -403,7 +403,6 @@ del main_est.validation_data if getattr(main_est, 'data_generator_', None): del main_est.data_generator_ - del main_est.data_batch_generator with open(outfile_object, 'wb') as output_handler: pickle.dump(estimator, output_handler,