Mercurial > repos > bgruening > ml_visualization_ex
changeset 6:222c02df5d55 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9e28f4466084464d38d3f8db2aff07974be4ba69"
author | bgruening |
---|---|
date | Wed, 11 Mar 2020 13:59:57 -0400 |
parents | c7655b5a94af |
children | 05143043ca13 |
files | main_macros.xml ml_visualization_ex.py ml_visualization_ex.xml test-data/ml_confusion_predicted.tabular test-data/ml_confusion_true.tabular test-data/ml_confusion_viz.png test-data/predicted_header.tabular test-data/true_header.tabular |
diffstat | 8 files changed, 259 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/main_macros.xml Wed Jan 22 08:03:54 2020 -0500 +++ b/main_macros.xml Wed Mar 11 13:59:57 2020 -0400 @@ -1,5 +1,5 @@ <macros> - <token name="@VERSION@">1.0.8.1</token> + <token name="@VERSION@">1.0.8.2</token> <xml name="python_requirements"> <requirements>
--- a/ml_visualization_ex.py Wed Jan 22 08:03:54 2020 -0500 +++ b/ml_visualization_ex.py Wed Mar 11 13:59:57 2020 -0400 @@ -13,7 +13,7 @@ from keras.utils import plot_model from sklearn.feature_selection.base import SelectorMixin from sklearn.metrics import precision_recall_curve, average_precision_score -from sklearn.metrics import roc_curve, auc +from sklearn.metrics import roc_curve, auc, confusion_matrix from sklearn.pipeline import Pipeline from galaxy_ml.utils import load_model, read_columns, SafeEval @@ -266,12 +266,29 @@ os.path.join(folder, "output")) +def get_dataframe(file_path, plot_selection, header_name, column_name): + header = 'infer' if plot_selection[header_name] else None + column_option = plot_selection[column_name]["selected_column_selector_option"] + if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: + col = plot_selection[column_name]["col1"] + else: + col = None + _, input_df = read_columns(file_path, c=col, + c_option=column_option, + return_df=True, + sep='\t', header=header, + parse_dates=True) + return input_df + + def main(inputs, infile_estimator=None, infile1=None, infile2=None, outfile_result=None, outfile_object=None, groups=None, ref_seq=None, intervals=None, targets=None, fasta_path=None, - model_config=None): + model_config=None, true_labels=None, + predicted_labels=None, plot_color=None, + title=None): """ Parameter --------- @@ -311,6 +328,18 @@ model_config : str, default is None File path to dataset containing JSON config for neural networks + + true_labels : str, default is None + File path to dataset containing true labels + + predicted_labels : str, default is None + File path to dataset containing true predicted labels + + plot_color : str, default is None + Color of the confusion matrix heatmap + + title : str, default is None + Title of the confusion matrix heatmap """ warnings.simplefilter('ignore') @@ -543,6 +572,32 @@ return 0 + elif plot_type == 'classification_confusion_matrix': + plot_selection = params["plotting_selection"] + input_true = get_dataframe(true_labels, plot_selection, "header_true", "column_selector_options_true") + header_predicted = 'infer' if plot_selection["header_predicted"] else None + input_predicted = pd.read_csv(predicted_labels, sep='\t', parse_dates=True, header=header_predicted) + true_classes = input_true.iloc[:, -1].copy() + predicted_classes = input_predicted.iloc[:, -1].copy() + axis_labels = list(set(true_classes)) + c_matrix = confusion_matrix(true_classes, predicted_classes) + fig, ax = plt.subplots(figsize=(7, 7)) + im = plt.imshow(c_matrix, cmap=plot_color) + for i in range(len(c_matrix)): + for j in range(len(c_matrix)): + ax.text(j, i, c_matrix[i, j], ha="center", va="center", color="k") + ax.set_ylabel('True class labels') + ax.set_xlabel('Predicted class labels') + ax.set_title(title) + ax.set_xticks(axis_labels) + ax.set_yticks(axis_labels) + fig.colorbar(im, ax=ax) + fig.tight_layout() + plt.savefig("output.png", dpi=125) + os.rename('output.png', 'output') + + return 0 + # save pdf file to disk # fig.write_image("image.pdf", format='pdf') # fig.write_image("image.pdf", format='pdf', width=340*2, height=226*2) @@ -562,10 +617,17 @@ aparser.add_argument("-t", "--targets", dest="targets") aparser.add_argument("-f", "--fasta_path", dest="fasta_path") aparser.add_argument("-c", "--model_config", dest="model_config") + aparser.add_argument("-tl", "--true_labels", dest="true_labels") + aparser.add_argument("-pl", "--predicted_labels", dest="predicted_labels") + aparser.add_argument("-pc", "--plot_color", dest="plot_color") + aparser.add_argument("-pt", "--title", dest="title") args = aparser.parse_args() main(args.inputs, args.infile_estimator, args.infile1, args.infile2, args.outfile_result, outfile_object=args.outfile_object, groups=args.groups, ref_seq=args.ref_seq, intervals=args.intervals, targets=args.targets, fasta_path=args.fasta_path, - model_config=args.model_config) + model_config=args.model_config, true_labels=args.true_labels, + predicted_labels=args.predicted_labels, + plot_color=args.plot_color, + title=args.title)
--- a/ml_visualization_ex.xml Wed Jan 22 08:03:54 2020 -0500 +++ b/ml_visualization_ex.xml Wed Mar 11 13:59:57 2020 -0400 @@ -21,6 +21,11 @@ --infile1 '$plotting_selection.infile1' #elif $plotting_selection.plot_type == 'keras_plot_model' --model_config '$plotting_selection.infile_model_config' + #elif $plotting_selection.plot_type == 'classification_confusion_matrix' + --true_labels '$plotting_selection.infile_true' + --predicted_labels '$plotting_selection.infile_predicted' + --plot_color '$plotting_selection.plot_color' + --title '$plotting_selection.title' #end if ]]> </command> @@ -36,6 +41,7 @@ <option value="rfecv_gridscores">Number of features vs. Recursive Feature Elimination gridscores with corss-validation</option> <option value="feature_importances">Feature Importances plot</option> <option value="keras_plot_model">keras plot model - plot configuration of a neural network model</option> + <option value="classification_confusion_matrix">Confusion matrix for classes</option> </param> <when value="learning_curve"> <param name="infile1" type="data" format="tabular" label="Select the dataset containing values for plotting learning curve." help="This dataset should be the output of tool model_validation->learning_curve."/> @@ -96,6 +102,33 @@ <param name="title" type="hidden" value="" optional="true" label="Plot title" help="Optional. If change is desired."/> <param name="plot_format" type="hidden" value="png" label="The output format and library"/> </when> + + <when value="classification_confusion_matrix"> + <param name="infile_true" type="data" format="tabular" label="Select dataset containing true labels"/> + <param name="header_true" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" /> + <conditional name="column_selector_options_true"> + <expand macro="samples_column_selector_options" multiple="true" column_option="selected_column_selector_option" + col_name="col1" infile="infile_true"/> + </conditional> + + <param name="infile_predicted" type="data" format="tabular" label="Select dataset containing predicted labels"/> + <param name="header_predicted" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" /> + <param name="title" type="text" value="Confusion matrix between true and predicted labels" label="Plot title"/> + <param name="plot_format" type="hidden" value="png" label="The output format and library"/> + <param name="plot_color" type="select" label="Choose plot color"> + <option value="Greys">Greys</option> + <option value="Purples">Purples</option> + <option value="Blues">Blues</option> + <option value="Greens" selected="true">Greens</option> + <option value="Oranges">Oranges</option> + <option value="Reds">Reds</option> + <option value="Summer">Summer</option> + <option value="Autumn">Autumn</option> + <option value="RdYlGn">RdYlGn</option> + <option value="Spectral">Spectral</option> + <option value="winter">winter</option> + </param> + </when> </conditional> </inputs> <outputs> @@ -140,6 +173,28 @@ <param name="infile_model_config" value="deepsear_1feature.json" ftype="json"/> <output name="output" file="ml_vis05.png" compare="sim_size" delta="20000"/> </test> + <test> + <param name="plot_type" value="classification_confusion_matrix"/> + <param name="infile_true" value="ml_confusion_true.tabular" ftype="tabular"/> + <param name="header_true" value="False"/> + <param name="selected_column_selector_option" value="all_columns"/> + <param name="infile_predicted" value="ml_confusion_predicted.tabular" ftype="tabular"/> + <param name="header_predicted" value="False"/> + <param name="title" value="Confusion matrix"/> + <param name="plot_color" value="winter" /> + <output name="output" file="ml_confusion_viz.png" compare="sim_size"/> + </test> + <test> + <param name="plot_type" value="classification_confusion_matrix"/> + <param name="infile_true" value="true_header.tabular" ftype="tabular"/> + <param name="header_true" value="True"/> + <param name="selected_column_selector_option" value="all_columns"/> + <param name="infile_predicted" value="predicted_header.tabular" ftype="tabular"/> + <param name="header_predicted" value="True"/> + <param name="title" value="Confusion matrix"/> + <param name="plot_color" value="winter" /> + <output name="output" file="ml_confusion_viz.png" compare="sim_size"/> + </test> </tests> <help> <![CDATA[
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ml_confusion_predicted.tabular Wed Mar 11 13:59:57 2020 -0400 @@ -0,0 +1,34 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ml_confusion_true.tabular Wed Mar 11 13:59:57 2020 -0400 @@ -0,0 +1,34 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +0 +0 +1 +1 +0 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0