changeset 16:a536d2736c2d draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 3c1e6c72303cfd8a5fd014734f18402b97f8ecb5
author bgruening
date Fri, 22 Sep 2023 16:40:38 +0000
parents 6fabdcde0214
children 1588f9076e32
files ml_visualization_ex.py ml_visualization_ex.xml pdb70_cs219.ffdata
diffstat 3 files changed, 132 insertions(+), 189 deletions(-) [+]
line wrap: on
line diff
--- a/ml_visualization_ex.py	Wed Aug 09 13:48:55 2023 +0000
+++ b/ml_visualization_ex.py	Fri Sep 22 16:40:38 2023 +0000
@@ -15,6 +15,7 @@
 from sklearn.metrics import (
     auc,
     average_precision_score,
+    confusion_matrix,
     precision_recall_curve,
     roc_curve,
 )
@@ -258,6 +259,30 @@
     os.rename(os.path.join(folder, "output.svg"), os.path.join(folder, "output"))
 
 
+def get_dataframe(file_path, plot_selection, header_name, column_name):
+    header = "infer" if plot_selection[header_name] else None
+    column_option = plot_selection[column_name]["selected_column_selector_option"]
+    if column_option in [
+        "by_index_number",
+        "all_but_by_index_number",
+        "by_header_name",
+        "all_but_by_header_name",
+    ]:
+        col = plot_selection[column_name]["col1"]
+    else:
+        col = None
+    _, input_df = read_columns(
+        file_path,
+        c=col,
+        c_option=column_option,
+        return_df=True,
+        sep="\t",
+        header=header,
+        parse_dates=True,
+    )
+    return input_df
+
+
 def main(
     inputs,
     infile_estimator=None,
@@ -271,6 +296,10 @@
     targets=None,
     fasta_path=None,
     model_config=None,
+    true_labels=None,
+    predicted_labels=None,
+    plot_color=None,
+    title=None,
 ):
     """
     Parameter
@@ -311,6 +340,18 @@
 
     model_config : str, default is None
         File path to dataset containing JSON config for neural networks
+
+    true_labels : str, default is None
+        File path to dataset containing true labels
+
+    predicted_labels : str, default is None
+        File path to dataset containing true predicted labels
+
+    plot_color : str, default is None
+        Color of the confusion matrix heatmap
+
+    title : str, default is None
+        Title of the confusion matrix heatmap
     """
     warnings.simplefilter("ignore")
 
@@ -534,6 +575,36 @@
 
         return 0
 
+    elif plot_type == "classification_confusion_matrix":
+        plot_selection = params["plotting_selection"]
+        input_true = get_dataframe(
+            true_labels, plot_selection, "header_true", "column_selector_options_true"
+        )
+        header_predicted = "infer" if plot_selection["header_predicted"] else None
+        input_predicted = pd.read_csv(
+            predicted_labels, sep="\t", parse_dates=True, header=header_predicted
+        )
+        true_classes = input_true.iloc[:, -1].copy()
+        predicted_classes = input_predicted.iloc[:, -1].copy()
+        axis_labels = list(set(true_classes))
+        c_matrix = confusion_matrix(true_classes, predicted_classes)
+        fig, ax = plt.subplots(figsize=(7, 7))
+        im = plt.imshow(c_matrix, cmap=plot_color)
+        for i in range(len(c_matrix)):
+            for j in range(len(c_matrix)):
+                ax.text(j, i, c_matrix[i, j], ha="center", va="center", color="k")
+        ax.set_ylabel("True class labels")
+        ax.set_xlabel("Predicted class labels")
+        ax.set_title(title)
+        ax.set_xticks(axis_labels)
+        ax.set_yticks(axis_labels)
+        fig.colorbar(im, ax=ax)
+        fig.tight_layout()
+        plt.savefig("output.png", dpi=125)
+        os.rename("output.png", "output")
+
+        return 0
+
     # save pdf file to disk
     # fig.write_image("image.pdf", format='pdf')
     # fig.write_image("image.pdf", format='pdf', width=340*2, height=226*2)
@@ -553,6 +624,10 @@
     aparser.add_argument("-t", "--targets", dest="targets")
     aparser.add_argument("-f", "--fasta_path", dest="fasta_path")
     aparser.add_argument("-c", "--model_config", dest="model_config")
+    aparser.add_argument("-tl", "--true_labels", dest="true_labels")
+    aparser.add_argument("-pl", "--predicted_labels", dest="predicted_labels")
+    aparser.add_argument("-pc", "--plot_color", dest="plot_color")
+    aparser.add_argument("-pt", "--title", dest="title")
     args = aparser.parse_args()
 
     main(
@@ -568,4 +643,8 @@
         targets=args.targets,
         fasta_path=args.fasta_path,
         model_config=args.model_config,
+        true_labels=args.true_labels,
+        predicted_labels=args.predicted_labels,
+        plot_color=args.plot_color,
+        title=args.title,
     )
--- a/ml_visualization_ex.xml	Wed Aug 09 13:48:55 2023 +0000
+++ b/ml_visualization_ex.xml	Fri Sep 22 16:40:38 2023 +0000
@@ -21,6 +21,11 @@
             --infile1 '$plotting_selection.infile1'
             #elif $plotting_selection.plot_type == 'keras_plot_model'
             --model_config '$plotting_selection.infile_model_config'
+            #elif $plotting_selection.plot_type == 'classification_confusion_matrix'
+            --true_labels '$plotting_selection.infile_true'
+            --predicted_labels '$plotting_selection.infile_predicted'
+            --plot_color '$plotting_selection.plot_color'
+            --title '$plotting_selection.title'
             #end if
         ]]>
     </command>
@@ -36,6 +41,7 @@
                 <option value="rfecv_gridscores">Number of features vs. Recursive Feature Elimination gridscores with corss-validation</option>
                 <option value="feature_importances">Feature Importances plot</option>
                 <option value="keras_plot_model">keras plot model - plot configuration of a neural network model</option>
+                <option value="classification_confusion_matrix">Confusion matrix for classes</option>
             </param>
             <when value="learning_curve">
                 <param name="infile1" type="data" format="tabular" label="Select the dataset containing values for plotting learning curve." help="This dataset should be the output of tool model_validation->learning_curve." />
@@ -96,6 +102,31 @@
                 <param name="title" type="hidden" value="" optional="true" label="Plot title" help="Optional. If change is desired." />
                 <param name="plot_format" type="hidden" value="png" label="The output format and library" />
             </when>
+            <when value="classification_confusion_matrix">
+                <param name="infile_true" type="data" format="tabular" label="Select dataset containing true labels" />
+                <param name="header_true" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" />
+                <conditional name="column_selector_options_true">
+                    <expand macro="samples_column_selector_options" multiple="true" column_option="selected_column_selector_option" col_name="col1" infile="infile_true" />
+                </conditional>
+
+                <param name="infile_predicted" type="data" format="tabular" label="Select dataset containing predicted labels" />
+                <param name="header_predicted" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" />
+                <param name="title" type="text" value="Confusion matrix between true and predicted labels" label="Plot title" />
+                <param name="plot_format" type="hidden" value="png" label="The output format and library" />
+                <param name="plot_color" type="select" label="Choose plot color">
+                    <option value="Greys">Greys</option>
+                    <option value="Purples">Purples</option>
+                    <option value="Blues">Blues</option>
+                    <option value="Greens" selected="true">Greens</option>
+                    <option value="Oranges">Oranges</option>
+                    <option value="Reds">Reds</option>
+                    <option value="Summer">Summer</option>
+                    <option value="Autumn">Autumn</option>
+                    <option value="RdYlGn">RdYlGn</option>
+                    <option value="Spectral">Spectral</option>
+                    <option value="winter">winter</option>
+                </param>
+            </when>
         </conditional>
     </inputs>
     <outputs>
@@ -140,6 +171,28 @@
             <param name="infile_model_config" value="deepsear_1feature.json" ftype="json" />
             <output name="output" file="ml_vis05.png" compare="sim_size" delta="20000" />
         </test>
+        <test>
+            <param name="plot_type" value="classification_confusion_matrix" />
+            <param name="infile_true" value="ml_confusion_true.tabular" ftype="tabular" />
+            <param name="header_true" value="False" />
+            <param name="selected_column_selector_option" value="all_columns" />
+            <param name="infile_predicted" value="ml_confusion_predicted.tabular" ftype="tabular" />
+            <param name="header_predicted" value="False" />
+            <param name="title" value="Confusion matrix" />
+            <param name="plot_color" value="winter" />
+            <output name="output" file="ml_confusion_viz.png" compare="sim_size" />
+        </test>
+        <test>
+            <param name="plot_type" value="classification_confusion_matrix" />
+            <param name="infile_true" value="true_header.tabular" ftype="tabular" />
+            <param name="header_true" value="True" />
+            <param name="selected_column_selector_option" value="all_columns" />
+            <param name="infile_predicted" value="predicted_header.tabular" ftype="tabular" />
+            <param name="header_predicted" value="True" />
+            <param name="title" value="Confusion matrix" />
+            <param name="plot_color" value="winter" />
+            <output name="output" file="ml_confusion_viz.png" compare="sim_size" />
+        </test>
     </tests>
     <help>
         <![CDATA[
--- a/pdb70_cs219.ffdata	Wed Aug 09 13:48:55 2023 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,189 +0,0 @@
-
-
-
-
-<!DOCTYPE HTML>
-<html>
-    <!--base.mako-->
-    
-
-
-    <head>
-        <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-        <meta name = "viewport" content = "maximum-scale=1.0">
-        <meta http-equiv="X-UA-Compatible" content="IE=Edge,chrome=1">
-
-        <title>
-            Galaxy
-            | Europe
-            | 
-        </title>
-
-        <link rel="index" href="/"/>
-
-        
-        
-    <link href="/static/style/bootstrap-tour.css?v=1618364054" media="screen" rel="stylesheet" type="text/css" />
-    <link href="/static/dist/base.css?v=1618364054" media="screen" rel="stylesheet" type="text/css" />
-
-        
-    <script src="/static/dist/libs.chunk.js?v=1618364054" type="text/javascript"></script>
-<script src="/static/dist/base.chunk.js?v=1618364054" type="text/javascript"></script>
-<script src="/static/dist/generic.bundled.js?v=1618364054" type="text/javascript"></script>
-
-        
-    <!-- message.mako javascript_app() -->
-    
-
-    
-    <script type="text/javascript">
-        // galaxy_client_app.mako, load
-
-        var bootstrapped;
-        try {
-            bootstrapped = 
-{}
-;
-        } catch(err) {
-            console.warn("Unable to parse bootstrapped variable", err);
-            bootstrapped = {};
-        }
-
-        var options = {
-            root: '/',
-            config: 
-    
-{
-"display_galaxy_brand": true,
-"chunk_upload_size": 104857600,
-"use_remote_user": null,
-"enable_oidc": true,
-"mailing_join_addr": null,
-"select_type_workflow_threshold": -1,
-"myexperiment_target_url": "www.myexperiment.org:80",
-"tool_recommendation_model_path": "https://github.com/galaxyproject/galaxy-test-data/raw/master/tool_recommendation_model.hdf5",
-"simplified_workflow_run_ui_target_history": "current",
-"interactivetools_enable": true,
-"is_admin_user": false,
-"show_welcome_with_login": true,
-"welcome_url": "/static/welcome.html",
-"allow_user_impersonation": true,
-"overwrite_model_recommendations": false,
-"topk_recommendations": 10,
-"user_library_import_dir_available": false,
-"ga_code": null,
-"enable_beta_markdown_export": true,
-"visualizations_visible": true,
-"enable_tool_recommendations": true,
-"enable_unique_workflow_defaults": false,
-"registration_warning_message": "Please register only one account. The usegalaxy.eu service is provided free of charge and has limited computational and data storage resources. <strong>Registration and usage of multiple accounts is tracked and such accounts are subject to termination and data deletion.<\/strong>",
-"logo_src": "/static/favicon.png",
-"enable_quotas": true,
-"server_mail_configured": true,
-"citation_url": "https://galaxyproject.org/citing-galaxy",
-"allow_user_dataset_purge": true,
-"ftp_upload_site": "ftp://ftp.usegalaxy.eu",
-"terms_url": "https://usegalaxy.eu/terms",
-"upload_from_form_button": "always-on",
-"wiki_url": "https://galaxyproject.org/",
-"logo_src_secondary": null,
-"aws_estimate": true,
-"single_user": false,
-"datatypes_disable_auto": false,
-"brand": "Europe",
-"mailing_lists": "https://galaxyproject.org/mailing-lists/",
-"python": [
-3,
-6
-],
-"release_doc_base_url": "https://docs.galaxyproject.org/en/release_",
-"enable_openid": false,
-"cookie_domain": null,
-"message_box_content": "You are using the new UseGalaxy.eu backend server, let us know if you encounter any issues!",
-"admin_tool_recommendations_path": "/opt/galaxy/config/tool_recommendations_overwrite.yml",
-"search_url": "https://galaxyproject.org/search/",
-"remote_user_logout_href": null,
-"default_locale": "auto",
-"screencasts_url": "https://vimeo.com/galaxyproject",
-"quota_url": "https://galaxyproject.org/support/account-quotas/",
-"version_major": "21.01",
-"simplified_workflow_run_ui": "prefer",
-"allow_user_creation": true,
-"lims_doc_url": "https://usegalaxy.org/u/rkchak/p/sts",
-"message_box_visible": false,
-"has_user_tool_filters": true,
-"message_box_class": "info",
-"require_login": false,
-"logo_url": "/",
-"support_url": "https://galaxyproject.org/support/",
-"simplified_workflow_run_ui_job_cache": "off",
-"server_startttime": 1618364054,
-"oidc": {
-"elixir": {
-"icon": "https://elixir-europe.org/sites/default/files/images/login-button-orange.png"
-}
-},
-"version_minor": "",
-"helpsite_url": "https://help.galaxyproject.org/c/usegalaxy-eu-support",
-"file_sources_configured": true,
-"inactivity_box_content": "Your account has not been activated yet.  Feel free to browse around and see what's available, but you won't be able to upload data or run jobs until you have verified your email address.",
-"nginx_upload_path": "/_upload"
-}
-,
-            user: 
-    
-{
-"total_disk_usage": 0,
-"nice_total_disk_usage": "0 bytes",
-"quota_percent": null
-}
-,
-            session_csrf_token: 'c3ae71f65be7de55dd5bd5f97f316000'
-        };
-
-        config.set({
-            options: options,
-            bootstrapped: bootstrapped
-        });
-
-
-    </script>
-
-    
-
-
-
-
-    
-
-    
-    <script type="text/javascript">
-        config.addInitialization(function() {
-            if (parent.handle_minwidth_hint) {
-                parent.handle_minwidth_hint(-1);
-            }
-        });
-    </script>
-
-    </head>
-    <body class="inbound">
-        
-    
-    
-    <div class="message mt-2 alert alert-danger">You are not allowed to access this dataset</div>
-
-
-    </body>
-</html>
-
-
-
-
-
-
-
-
-
-
-
-