Mercurial > repos > bgruening > ml_visualization_ex

diff ml_visualization_ex.xml @ 8:6cf6f27547cb draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
author: bgruening
date: Tue, 13 Apr 2021 19:09:17 +0000
parents: 222c02df5d55
children: 9c19cf3c4ea0
--- a/ml_visualization_ex.xml	Fri Oct 02 08:55:27 2020 +0000
+++ b/ml_visualization_ex.xml	Tue Apr 13 19:09:17 2021 +0000
@@ -1,11 +1,11 @@
-<tool id="ml_visualization_ex" name="Machine Learning Visualization Extension" version="@VERSION@">
+<tool id="ml_visualization_ex" name="Machine Learning Visualization Extension" version="@VERSION@" profile="20.05">
     <description>includes several types of plotting for machine learning</description>
     <macros>
         <import>main_macros.xml</import>
         <import>keras_macros.xml</import>
     </macros>
-    <expand macro="python_requirements"/>
-    <expand macro="macro_stdio"/>
+    <expand macro="python_requirements" />
+    <expand macro="macro_stdio" />
     <version_command>echo "@VERSION@"</version_command>
     <command>
         <![CDATA[
@@ -44,41 +44,41 @@
                 <option value="classification_confusion_matrix">Confusion matrix for classes</option>
             </param>
             <when value="learning_curve">
-                <param name="infile1" type="data" format="tabular" label="Select the dataset containing values for plotting learning curve." help="This dataset should be the output of tool model_validation->learning_curve."/>
-                <param name="plot_std_err" type="boolean" truevalue="booltrue" falsevalue="boolfalse" optional="true" checked="false" label="Whether to plot standard error bar?"/>
-                <param name="title" type="text" value="" optional="true" label="Plot title" help="Optional. If change is desired."/>
+                <param name="infile1" type="data" format="tabular" label="Select the dataset containing values for plotting learning curve." help="This dataset should be the output of tool model_validation->learning_curve." />
+                <param name="plot_std_err" type="boolean" truevalue="booltrue" falsevalue="boolfalse" optional="true" checked="false" label="Whether to plot standard error bar?" />
+                <param name="title" type="text" value="" optional="true" label="Plot title" help="Optional. If change is desired." />
                 <param name="plot_format" type="select" label="The output format and library">
                     <option value="html" selected="true">An interactive html by plotly</option>
                     <!--<option value="png">PNG image by matplotlib</option> TODO-->
                 </param>
             </when>
             <when value="pr_curve">
-                <param name="infile1" type="data" format="tabular" label="Select the dataset containing true labels." help="y_true. Headered. For 2-class, single column contains both class labels (e.g. True and False). For multi-label, each column, hot-encoded, corresponds to one label."/>
-                <param name="infile2" type="data" format="tabular" label="Select the dataset containing predicted probabilities." help="y_preds. Headered. For 2-class, sinle column or the first column contains scores for the positive label. For multi-label, each column corresponds to one label."/>
-                <param name="pos_label" type="text" value="" optional="true" label="pos_label" help="The label of positive class. If not specified, it will be 1 by default."/>
-                <param name="title" type="text" value="" optional="true" label="Plot title" help="Optional. If change is desired."/>
-                <param name="report_minimum_n_positives" type="integer" value="" optional="true" label="Report minimum number of positives" help="For mulitple label binary classifications, whose number of true postives is less than the threhold will be ignored."/>
+                <param name="infile1" type="data" format="tabular" label="Select the dataset containing true labels." help="y_true. Headered. For 2-class, single column contains both class labels (e.g. True and False). For multi-label, each column, hot-encoded, corresponds to one label." />
+                <param name="infile2" type="data" format="tabular" label="Select the dataset containing predicted probabilities." help="y_preds. Headered. For 2-class, sinle column or the first column contains scores for the positive label. For multi-label, each column corresponds to one label." />
+                <param name="pos_label" type="text" value="" optional="true" label="pos_label" help="The label of positive class. If not specified, it will be 1 by default." />
+                <param name="title" type="text" value="" optional="true" label="Plot title" help="Optional. If change is desired." />
+                <param name="report_minimum_n_positives" type="integer" value="" optional="true" label="Report minimum number of positives" help="For mulitple label binary classifications, whose number of true postives is less than the threhold will be ignored." />
                 <param name="plot_format" type="select" label="The output format and library">
                     <option value="plotly_html" selected="true">An interactive html by plotly</option>
                     <option value="matplotlib_svg">SVG image by matplotlib</option>
                 </param>
             </when>
             <when value="roc_curve">
-                <param name="infile1" type="data" format="tabular" label="Select the dataset containing true labels." help="y_true. Headered. For 2-class, single column contains both class labels (e.g. True and False). For multi-label, each column, hot-encoded, corresponds to one label."/>
-                <param name="infile2" type="data" format="tabular" label="Select the dataset containing predicted probabilities." help="y_preds. Headered. For 2-class, sinle column or the first column contains scores for the positive label. For multi-label, each column corresponds to one label."/>
-                <param name="pos_label" type="text" value="" optional="true" label="pos_label" help="The label of positive class. If not specified, it will be 1 by default."/>
-                <param name="drop_intermediate" type="boolean" truevalue="booltrue" falsevalue="boolfalse" optional="true" checked="true" label="drop_intermediate" help="Whether to drop some suboptimal thresholds which would not appear on a plotted ROC curve."/>
-                <param name="title" type="text" value="" optional="true" label="Plot title" help="Optional. If change is desired."/>
-                <param name="report_minimum_n_positives" type="integer" value="" optional="true" label="Report minimum number of positives" help="For mulitple label binary classifications, whose number of true postives is less than the threhold will be ignored."/>
+                <param name="infile1" type="data" format="tabular" label="Select the dataset containing true labels." help="y_true. Headered. For 2-class, single column contains both class labels (e.g. True and False). For multi-label, each column, hot-encoded, corresponds to one label." />
+                <param name="infile2" type="data" format="tabular" label="Select the dataset containing predicted probabilities." help="y_preds. Headered. For 2-class, sinle column or the first column contains scores for the positive label. For multi-label, each column corresponds to one label." />
+                <param name="pos_label" type="text" value="" optional="true" label="pos_label" help="The label of positive class. If not specified, it will be 1 by default." />
+                <param name="drop_intermediate" type="boolean" truevalue="booltrue" falsevalue="boolfalse" optional="true" checked="true" label="drop_intermediate" help="Whether to drop some suboptimal thresholds which would not appear on a plotted ROC curve." />
+                <param name="title" type="text" value="" optional="true" label="Plot title" help="Optional. If change is desired." />
+                <param name="report_minimum_n_positives" type="integer" value="" optional="true" label="Report minimum number of positives" help="For mulitple label binary classifications, whose number of true postives is less than the threhold will be ignored." />
                 <param name="plot_format" type="select" label="The output format and library">
                     <option value="plotly_html" selected="true">An interactive html by plotly</option>
                     <option value="matplotlib_svg">SVG image by matplotlib</option>
                 </param>
             </when>
             <when value="rfecv_gridscores">
-                <param name="infile1" type="data" format="tabular" label="Select the dataset containing grid_scores from a fitted RFECV object." help="Headered. Single column. Could be Output from `estimator_attributes->grid_scores_`."/>
-                <param name="steps" type="text" value="" optional="true" label="Step IDs" help="List, containing hover labels for each grid_score. For example: `list(range(10)) + [15, 20]`."/>
-                <param name="title" type="text" value="" optional="true" label="Plot title" help="Optional. If change is desired."/>
+                <param name="infile1" type="data" format="tabular" label="Select the dataset containing grid_scores from a fitted RFECV object." help="Headered. Single column. Could be Output from `estimator_attributes->grid_scores_`." />
+                <param name="steps" type="text" value="" optional="true" label="Step IDs" help="List, containing hover labels for each grid_score. For example: `list(range(10)) + [15, 20]`." />
+                <param name="title" type="text" value="" optional="true" label="Plot title" help="Optional. If change is desired." />
                 <param name="plot_format" type="select" label="The output format and library">
                     <option value="html" selected="true">An interactive html by plotly</option>
                     <!--<option value="png">PNG image by matplotlib</option> TODO-->
@@ -86,35 +86,34 @@
             </when>
             <when value="feature_importances">
                 <param name="infile_estimator" type="data" format="zip" label="Select the dataset containing fitted estimator/pipeline" />
-                <param name="infile1" type="data" format="tabular" label="Select the dataset containing feature names" help="Make sure the headers (first row) are feature names."/>
+                <param name="infile1" type="data" format="tabular" label="Select the dataset containing feature names" help="Make sure the headers (first row) are feature names." />
                 <conditional name="column_selector_options">
-                    <expand macro="samples_column_selector_options" multiple="true"/>
+                    <expand macro="samples_column_selector_options" multiple="true" />
                 </conditional>
-                <param name="threshold" type="float" value="" optional="true" min="0." label="Threshold value" help="Features with importance below the threshold value will be ignored."/>
-                <param name="title" type="text" value="" optional="true" label="Custom plot title" help="Optional."/>
+                <param name="threshold" type="float" value="" optional="true" min="0." label="Threshold value" help="Features with importance below the threshold value will be ignored." />
+                <param name="title" type="text" value="" optional="true" label="Custom plot title" help="Optional." />
                 <param name="plot_format" type="select" label="The output format and library">
                     <option value="html" selected="true">An interactive html by plotly</option>
                     <!--<option value="png">PNG image by matplotlib</option> TODO-->
                 </param>
             </when>
             <when value="keras_plot_model">
-                <param name="infile_model_config" type="data" format="json" label="Select the JSON dataset containing configuration for a neural network model"/>
-                <param name="title" type="hidden" value="" optional="true" label="Plot title" help="Optional. If change is desired."/>
-                <param name="plot_format" type="hidden" value="png" label="The output format and library"/>
+                <param name="infile_model_config" type="data" format="json" label="Select the JSON dataset containing configuration for a neural network model" />
+                <param name="title" type="hidden" value="" optional="true" label="Plot title" help="Optional. If change is desired." />
+                <param name="plot_format" type="hidden" value="png" label="The output format and library" />
             </when>
-            
+
             <when value="classification_confusion_matrix">
-                <param name="infile_true" type="data" format="tabular" label="Select dataset containing true labels"/>
+                <param name="infile_true" type="data" format="tabular" label="Select dataset containing true labels" />
                 <param name="header_true" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" />
                 <conditional name="column_selector_options_true">
-                    <expand macro="samples_column_selector_options" multiple="true" column_option="selected_column_selector_option"
-                        col_name="col1" infile="infile_true"/>
+                    <expand macro="samples_column_selector_options" multiple="true" column_option="selected_column_selector_option" col_name="col1" infile="infile_true" />
                 </conditional>
-                
-                <param name="infile_predicted" type="data" format="tabular" label="Select dataset containing predicted labels"/>
-                <param name="header_predicted" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" /> 
-                <param name="title" type="text" value="Confusion matrix between true and predicted labels" label="Plot title"/>
-                <param name="plot_format" type="hidden" value="png" label="The output format and library"/>
+
+                <param name="infile_predicted" type="data" format="tabular" label="Select dataset containing predicted labels" />
+                <param name="header_predicted" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" />
+                <param name="title" type="text" value="Confusion matrix between true and predicted labels" label="Plot title" />
+                <param name="plot_format" type="hidden" value="png" label="The output format and library" />
                 <param name="plot_color" type="select" label="Choose plot color">
                     <option value="Greys">Greys</option>
                     <option value="Purples">Purples</option>
@@ -134,66 +133,66 @@
     <outputs>
         <data name="output" format="html" from_work_dir="output" label="${plotting_selection.plot_type} plot on ${on_string}">
             <change_format>
-                <when input="plotting_selection.plot_format" value="png" format="png"/>
+                <when input="plotting_selection.plot_format" value="png" format="png" />
             </change_format>
         </data>
     </outputs>
     <tests>
         <test>
-            <param name="plot_type" value="roc_curve"/>
-            <param name="infile1" value="y_true.tabular" ftype="tabular"/>
-            <param name="infile2" value="y_score.tabular" ftype="tabular"/>
-            <output name="output" file="ml_vis04.html" compare="sim_size"/>
+            <param name="plot_type" value="roc_curve" />
+            <param name="infile1" value="y_true.tabular" ftype="tabular" />
+            <param name="infile2" value="y_score.tabular" ftype="tabular" />
+            <output name="output" file="ml_vis04.html" compare="sim_size" />
         </test>
         <test>
-            <param name="plot_type" value="feature_importances"/>
-            <param name="infile_estimator" value="best_estimator_.zip" ftype="zip"/>
-            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
-            <param name="selected_column_selector_option" value="all_columns"/>
-            <output name="output" file="ml_vis01.html" compare="sim_size"/>
+            <param name="plot_type" value="feature_importances" />
+            <param name="infile_estimator" value="best_estimator_.zip" ftype="zip" />
+            <param name="infile1" value="regression_X.tabular" ftype="tabular" />
+            <param name="selected_column_selector_option" value="all_columns" />
+            <output name="output" file="ml_vis01.html" compare="sim_size" />
         </test>
         <test>
-            <param name="plot_type" value="learning_curve"/>
-            <param name="infile1" value="mv_result03.tabular" ftype="tabular"/>
-            <output name="output" file="ml_vis02.html" compare="sim_size"/>
+            <param name="plot_type" value="learning_curve" />
+            <param name="infile1" value="mv_result03.tabular" ftype="tabular" />
+            <output name="output" file="ml_vis02.html" compare="sim_size" />
         </test>
         <test>
-            <param name="plot_type" value="pr_curve"/>
-            <param name="infile1" value="y_true.tabular" ftype="tabular"/>
-            <param name="infile2" value="y_score.tabular" ftype="tabular"/>
-            <output name="output" file="ml_vis03.html" compare="sim_size"/>
+            <param name="plot_type" value="pr_curve" />
+            <param name="infile1" value="y_true.tabular" ftype="tabular" />
+            <param name="infile2" value="y_score.tabular" ftype="tabular" />
+            <output name="output" file="ml_vis03.html" compare="sim_size" />
         </test>
         <test>
-            <param name="plot_type" value="rfecv_gridscores"/>
-            <param name="infile1" value="grid_scores_.tabular" ftype="tabular"/>
-            <output name="output" file="ml_vis05.html" compare="sim_size"/>
+            <param name="plot_type" value="rfecv_gridscores" />
+            <param name="infile1" value="grid_scores_.tabular" ftype="tabular" />
+            <output name="output" file="ml_vis05.html" compare="sim_size" />
         </test>
         <test>
-            <param name="plot_type" value="keras_plot_model"/>
-            <param name="infile_model_config" value="deepsear_1feature.json" ftype="json"/>
-            <output name="output" file="ml_vis05.png" compare="sim_size" delta="20000"/>
+            <param name="plot_type" value="keras_plot_model" />
+            <param name="infile_model_config" value="deepsear_1feature.json" ftype="json" />
+            <output name="output" file="ml_vis05.png" compare="sim_size" delta="20000" />
         </test>
         <test>
-            <param name="plot_type" value="classification_confusion_matrix"/>
-            <param name="infile_true" value="ml_confusion_true.tabular" ftype="tabular"/>
-            <param name="header_true" value="False"/>
-            <param name="selected_column_selector_option" value="all_columns"/>
-            <param name="infile_predicted" value="ml_confusion_predicted.tabular" ftype="tabular"/>
-            <param name="header_predicted" value="False"/>
-            <param name="title" value="Confusion matrix"/>
+            <param name="plot_type" value="classification_confusion_matrix" />
+            <param name="infile_true" value="ml_confusion_true.tabular" ftype="tabular" />
+            <param name="header_true" value="False" />
+            <param name="selected_column_selector_option" value="all_columns" />
+            <param name="infile_predicted" value="ml_confusion_predicted.tabular" ftype="tabular" />
+            <param name="header_predicted" value="False" />
+            <param name="title" value="Confusion matrix" />
             <param name="plot_color" value="winter" />
-            <output name="output" file="ml_confusion_viz.png" compare="sim_size"/>
+            <output name="output" file="ml_confusion_viz.png" compare="sim_size" />
         </test>
         <test>
-            <param name="plot_type" value="classification_confusion_matrix"/>
-            <param name="infile_true" value="true_header.tabular" ftype="tabular"/>
-            <param name="header_true" value="True"/>
-            <param name="selected_column_selector_option" value="all_columns"/>
-            <param name="infile_predicted" value="predicted_header.tabular" ftype="tabular"/>
-            <param name="header_predicted" value="True"/>
-            <param name="title" value="Confusion matrix"/>
+            <param name="plot_type" value="classification_confusion_matrix" />
+            <param name="infile_true" value="true_header.tabular" ftype="tabular" />
+            <param name="header_true" value="True" />
+            <param name="selected_column_selector_option" value="all_columns" />
+            <param name="infile_predicted" value="predicted_header.tabular" ftype="tabular" />
+            <param name="header_predicted" value="True" />
+            <param name="title" value="Confusion matrix" />
             <param name="plot_color" value="winter" />
-            <output name="output" file="ml_confusion_viz.png" compare="sim_size"/>
+            <output name="output" file="ml_confusion_viz.png" compare="sim_size" />
         </test>
     </tests>
     <help>
author	bgruening
date	Tue, 13 Apr 2021 19:09:17 +0000
parents	222c02df5d55
children	9c19cf3c4ea0