Mercurial > repos > bgruening > ml_visualization_ex
view ml_visualization_ex.xml @ 5:c7655b5a94af draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 756f8be9c3cd437e131e6410cd625c24fe078e8c"
author | bgruening |
---|---|
date | Wed, 22 Jan 2020 08:03:54 -0500 |
parents | 6b94d76a1397 |
children | 222c02df5d55 |
line wrap: on
line source
<tool id="ml_visualization_ex" name="Machine Learning Visualization Extension" version="@VERSION@"> <description>includes several types of plotting for machine learning</description> <macros> <import>main_macros.xml</import> <import>keras_macros.xml</import> </macros> <expand macro="python_requirements"/> <expand macro="macro_stdio"/> <version_command>echo "@VERSION@"</version_command> <command> <![CDATA[ python '$__tool_directory__/ml_visualization_ex.py' --inputs '$inputs' #if $plotting_selection.plot_type in ['learning_curve', 'rfecv_gridscores'] --infile1 '$plotting_selection.infile1' #elif $plotting_selection.plot_type in ['pr_curve', 'roc_curve'] --infile1 '$plotting_selection.infile1' --infile2 '$plotting_selection.infile2' #elif $plotting_selection.plot_type == 'feature_importances' --estimator '$plotting_selection.infile_estimator' --infile1 '$plotting_selection.infile1' #elif $plotting_selection.plot_type == 'keras_plot_model' --model_config '$plotting_selection.infile_model_config' #end if ]]> </command> <configfiles> <inputs name="inputs" /> </configfiles> <inputs> <conditional name="plotting_selection"> <param name="plot_type" type="select" label="Select a plotting type"> <option value="learning_curve" selected="true">Learning curve</option> <option value="pr_curve">2-class / multpi-label Precison Recall curve</option> <option value="roc_curve">2-class / multi-label Receiver Operating Characteristic (ROC) curve</option> <option value="rfecv_gridscores">Number of features vs. Recursive Feature Elimination gridscores with corss-validation</option> <option value="feature_importances">Feature Importances plot</option> <option value="keras_plot_model">keras plot model - plot configuration of a neural network model</option> </param> <when value="learning_curve"> <param name="infile1" type="data" format="tabular" label="Select the dataset containing values for plotting learning curve." help="This dataset should be the output of tool model_validation->learning_curve."/> <param name="plot_std_err" type="boolean" truevalue="booltrue" falsevalue="boolfalse" optional="true" checked="false" label="Whether to plot standard error bar?"/> <param name="title" type="text" value="" optional="true" label="Plot title" help="Optional. If change is desired."/> <param name="plot_format" type="select" label="The output format and library"> <option value="html" selected="true">An interactive html by plotly</option> <!--<option value="png">PNG image by matplotlib</option> TODO--> </param> </when> <when value="pr_curve"> <param name="infile1" type="data" format="tabular" label="Select the dataset containing true labels." help="y_true. Headered. For 2-class, single column contains both class labels (e.g. True and False). For multi-label, each column, hot-encoded, corresponds to one label."/> <param name="infile2" type="data" format="tabular" label="Select the dataset containing predicted probabilities." help="y_preds. Headered. For 2-class, sinle column or the first column contains scores for the positive label. For multi-label, each column corresponds to one label."/> <param name="pos_label" type="text" value="" optional="true" label="pos_label" help="The label of positive class. If not specified, it will be 1 by default."/> <param name="title" type="text" value="" optional="true" label="Plot title" help="Optional. If change is desired."/> <param name="report_minimum_n_positives" type="integer" value="" optional="true" label="Report minimum number of positives" help="For mulitple label binary classifications, whose number of true postives is less than the threhold will be ignored."/> <param name="plot_format" type="select" label="The output format and library"> <option value="plotly_html" selected="true">An interactive html by plotly</option> <option value="matplotlib_svg">SVG image by matplotlib</option> </param> </when> <when value="roc_curve"> <param name="infile1" type="data" format="tabular" label="Select the dataset containing true labels." help="y_true. Headered. For 2-class, single column contains both class labels (e.g. True and False). For multi-label, each column, hot-encoded, corresponds to one label."/> <param name="infile2" type="data" format="tabular" label="Select the dataset containing predicted probabilities." help="y_preds. Headered. For 2-class, sinle column or the first column contains scores for the positive label. For multi-label, each column corresponds to one label."/> <param name="pos_label" type="text" value="" optional="true" label="pos_label" help="The label of positive class. If not specified, it will be 1 by default."/> <param name="drop_intermediate" type="boolean" truevalue="booltrue" falsevalue="boolfalse" optional="true" checked="true" label="drop_intermediate" help="Whether to drop some suboptimal thresholds which would not appear on a plotted ROC curve."/> <param name="title" type="text" value="" optional="true" label="Plot title" help="Optional. If change is desired."/> <param name="report_minimum_n_positives" type="integer" value="" optional="true" label="Report minimum number of positives" help="For mulitple label binary classifications, whose number of true postives is less than the threhold will be ignored."/> <param name="plot_format" type="select" label="The output format and library"> <option value="plotly_html" selected="true">An interactive html by plotly</option> <option value="matplotlib_svg">SVG image by matplotlib</option> </param> </when> <when value="rfecv_gridscores"> <param name="infile1" type="data" format="tabular" label="Select the dataset containing grid_scores from a fitted RFECV object." help="Headered. Single column. Could be Output from `estimator_attributes->grid_scores_`."/> <param name="steps" type="text" value="" optional="true" label="Step IDs" help="List, containing hover labels for each grid_score. For example: `list(range(10)) + [15, 20]`."/> <param name="title" type="text" value="" optional="true" label="Plot title" help="Optional. If change is desired."/> <param name="plot_format" type="select" label="The output format and library"> <option value="html" selected="true">An interactive html by plotly</option> <!--<option value="png">PNG image by matplotlib</option> TODO--> </param> </when> <when value="feature_importances"> <param name="infile_estimator" type="data" format="zip" label="Select the dataset containing fitted estimator/pipeline" /> <param name="infile1" type="data" format="tabular" label="Select the dataset containing feature names" help="Make sure the headers (first row) are feature names."/> <conditional name="column_selector_options"> <expand macro="samples_column_selector_options" multiple="true"/> </conditional> <param name="threshold" type="float" value="" optional="true" min="0." label="Threshold value" help="Features with importance below the threshold value will be ignored."/> <param name="title" type="text" value="" optional="true" label="Custom plot title" help="Optional."/> <param name="plot_format" type="select" label="The output format and library"> <option value="html" selected="true">An interactive html by plotly</option> <!--<option value="png">PNG image by matplotlib</option> TODO--> </param> </when> <when value="keras_plot_model"> <param name="infile_model_config" type="data" format="json" label="Select the JSON dataset containing configuration for a neural network model"/> <param name="title" type="hidden" value="" optional="true" label="Plot title" help="Optional. If change is desired."/> <param name="plot_format" type="hidden" value="png" label="The output format and library"/> </when> </conditional> </inputs> <outputs> <data name="output" format="html" from_work_dir="output" label="${plotting_selection.plot_type} plot on ${on_string}"> <change_format> <when input="plotting_selection.plot_format" value="png" format="png"/> </change_format> </data> </outputs> <tests> <test> <param name="plot_type" value="roc_curve"/> <param name="infile1" value="y_true.tabular" ftype="tabular"/> <param name="infile2" value="y_score.tabular" ftype="tabular"/> <output name="output" file="ml_vis04.html" compare="sim_size"/> </test> <test> <param name="plot_type" value="feature_importances"/> <param name="infile_estimator" value="best_estimator_.zip" ftype="zip"/> <param name="infile1" value="regression_X.tabular" ftype="tabular"/> <param name="selected_column_selector_option" value="all_columns"/> <output name="output" file="ml_vis01.html" compare="sim_size"/> </test> <test> <param name="plot_type" value="learning_curve"/> <param name="infile1" value="mv_result03.tabular" ftype="tabular"/> <output name="output" file="ml_vis02.html" compare="sim_size"/> </test> <test> <param name="plot_type" value="pr_curve"/> <param name="infile1" value="y_true.tabular" ftype="tabular"/> <param name="infile2" value="y_score.tabular" ftype="tabular"/> <output name="output" file="ml_vis03.html" compare="sim_size"/> </test> <test> <param name="plot_type" value="rfecv_gridscores"/> <param name="infile1" value="grid_scores_.tabular" ftype="tabular"/> <output name="output" file="ml_vis05.html" compare="sim_size"/> </test> <test> <param name="plot_type" value="keras_plot_model"/> <param name="infile_model_config" value="deepsear_1feature.json" ftype="json"/> <output name="output" file="ml_vis05.png" compare="sim_size" delta="20000"/> </test> </tests> <help> <![CDATA[ **What it does** This tool ouputs serveral machine learning visualization plots using Plotly, including 'feature_importances', 'learning curve', 'precison recall curve', 'roc_curve', and 'number of featues vs. rfecv gridscores'. This tool also ouputs configuration diagram for a deep learning model using the Keras model JSON file as input. **Feature importances** .. image:: https://raw.githubusercontent.com/goeckslab/Galaxy-ML/master/galaxy_ml/tools/images/feature_importances.png :width: 400 :alt: feature importances **Learning curve** .. image:: https://raw.githubusercontent.com/goeckslab/Galaxy-ML/master/galaxy_ml/tools/images/learning_curve.png :width: 400 :alt: learning curve **Precison recall curve** .. image:: https://raw.githubusercontent.com/goeckslab/Galaxy-ML/master/galaxy_ml/tools/images/pr_curve.png :width: 400 :alt: precison recall curve **Receiver operating characteristic curve** .. image:: https://raw.githubusercontent.com/goeckslab/Galaxy-ML/master/galaxy_ml/tools/images/roc_curve.png :width: 400 :alt: Receiver operating characteristic curve **Number of featues vs. rfecv gridscores** .. image:: https://raw.githubusercontent.com/goeckslab/Galaxy-ML/master/galaxy_ml/tools/images/rfecv_gridscore.png :width: 400 :alt: Number of featues vs. rfecv gridscores **Deep learning model configuration** .. image:: https://raw.githubusercontent.com/goeckslab/Galaxy-ML/master/galaxy_ml/tools/images/deepsea.png :width: 400 :alt: deapsea model configuration ]]> </help> <expand macro="sklearn_citation"> <citation type="bibtex"> @online{plotly, author = {Plotly Technologies Inc.}, title = {Collaborative data science}, publisher = {Plotly Technologies Inc.}, address = {Montreal, QC}, year = {2015}, url = {https://plot.ly}} </citation> </expand> </tool>