view plotly_ml_performance_plots.xml @ 2:62e3a4e8c54c draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/plotly_ml_performance_plots commit 3752bb86fb42f0c98b6d6cbbd75e8d987f17a3fd"
author bgruening
date Thu, 16 Jan 2020 13:49:49 -0500
parents 4fac53da862f
children 1c5dcef5ce0f
line wrap: on
line source

<tool id="plotly_ml_performance_plots" name="Plot confusion matrix, precision, recall and ROC and AUC curves" version="0.2">
    <description>of tabular data</description>
    <requirements>
        <requirement type="package" version="0.25.3">pandas</requirement>
        <requirement type="package" version="4.3.0">plotly</requirement>
        <requirement type="package" version="0.21.3">scikit-learn</requirement>
        <requirement type="package" version="1.4.1">scipy</requirement>
    </requirements>
    <version_command>echo $version</version_command>
    <command detect_errors="aggressive"><![CDATA[
    python '$__tool_directory__/plot_ml_performance.py'
        -i '$infile_input'
        -j '$infile_output'
        -k '$infile_trained_model'
]]>
    </command>
    <inputs>
        <param name="infile_input" type="data" format="tabular" label="Select input data file :" help="Input data is a matrix (tabular) where each column is a feature and the last column contains the (true or original) class labels."/>
        <param name="infile_output" type="data" format="tabular" label="Select predicted data file :" help="Predicted data is a matrix (tabular) where each column is a feature and the last column contains the predicted class labels."/>
        <param name="infile_trained_model" type="data" format="zip" label="Select trained model :" help="This file is a final model trained on training data."/>
    </inputs>

    <outputs>
        <data name="output_confusion" format="html" from_work_dir="output_confusion.html" label="Confusion matrix of tabular data on ${on_string}"/>
        <data name="output_prf" format="html" from_work_dir="output_prf.html" label="Precision, recall and f-score of tabular data on ${on_string}"/>
        <data name="output_roc" format="html" from_work_dir="output_roc.html" label="ROC and AUC curves of tabular data on ${on_string}"/>
    </outputs>
    
    <tests>
        <test>
            <param name="infile_input" value="binary_test_label.tabular" ftype="tabular"/>
            <param name="infile_output" value="binary_prediction_sgd.tabular" ftype="tabular"/>
            <param name="infile_trained_model" value="model_binary_sgd.zip" ftype="zip"/>
            <output name="output_confusion" file="cnf_binary_sgd.html" compare="sim_size"/>
            <output name="output_prf" file="prf_binary_sgd.html" compare="sim_size"/>
            <output name="output_roc" file="roc_auc_binary_sgd.html" compare="sim_size"/>
        </test>
        
        <test>
            <param name="infile_input" value="binary_test_label.tabular" ftype="tabular"/>
            <param name="infile_output" value="prediction_binary_linearsvm.tabular" ftype="tabular"/>
            <param name="infile_trained_model" value="model_binary_linearsvm.zip" ftype="zip"/>
            <output name="output_confusion" file="cnf_binary_linearsvm.html" compare="sim_size"/>
            <output name="output_prf" file="prf_binary_linearsvm.html" compare="sim_size"/>
            <output name="output_roc" file="roc_auc_binary_linearsvm.html" compare="sim_size"/>
        </test>
        
        <test>
            <param name="infile_input" value="binary_test_label.tabular" ftype="tabular"/>
            <param name="infile_output" value="prediction_binary_rfc.tabular" ftype="tabular"/>
            <param name="infile_trained_model" value="model_binary_rfc.zip" ftype="zip"/>
            <output name="output_confusion" file="cnf_binary_rfc.html" compare="sim_size"/>
            <output name="output_prf" file="prf_binary_rfc.html" compare="sim_size"/>
            <output name="output_roc" file="roc_auc_binary_rfc.html" compare="sim_size"/>
        </test>
        
        <test>
            <param name="infile_input" value="binary_test_label.tabular" ftype="tabular"/>
            <param name="infile_output" value="prediction_binary_knn.tabular" ftype="tabular"/>
            <param name="infile_trained_model" value="model_binary_knn.zip" ftype="zip"/>
            <output name="output_confusion" file="cnf_binary_knn.html" compare="sim_size"/>
            <output name="output_prf" file="prf_binary_knn.html" compare="sim_size"/>
            <output name="output_roc" file="roc_auc_binary_knn.html" compare="sim_size"/>
        </test>
        
        <test>
            <param name="infile_input" value="multi_test_label.tabular" ftype="tabular"/>
            <param name="infile_output" value="prediction_multi_lr.tabular" ftype="tabular"/>
            <param name="infile_trained_model" value="model_multi_lr.zip" ftype="zip"/>
            <output name="output_confusion" file="cnf_multi_lr.html" compare="sim_size"/>
            <output name="output_prf" file="prf_multi_lr.html" compare="sim_size"/>
            <output name="output_roc" file="roc_auc_multi_lr.html" compare="sim_size"/>
        </test>
        
    </tests>
    <help><![CDATA[
**What it does**


Produce a `heatmap plot (confusion matrix) <https://plot.ly/python/heatmaps/>`_ from tabular files. The input data contains the original/true class labels (last column) and the predicted data contains the predicted class labels (last column). The true and predicted class labels are plotted against each other. The diagonal of this heatmap shows the correctly predicted data. The plot is buried in a html file which
provides rich interactive features. Image can be saved in various format, such as 'png', 'svg', 'jpeg' and so on.


    ]]>
    </help>
</tool>