changeset 1:693bfd012601 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/chopin2 commit a9f99c56d0ff858ca7b37ce00759f62401cbd806
author iuc
date Tue, 09 Apr 2024 11:23:55 +0000
parents d49893faf877
children ee6c2152e092
files chopin2.xml macros.xml test-data/selection.txt test-data/summary.txt test-data/summary_1.txt test-data/summary_2.txt test-data/summary_3.txt
diffstat 7 files changed, 84 insertions(+), 53 deletions(-) [+]
line wrap: on
line diff
--- a/chopin2.xml	Tue Jan 31 16:31:19 2023 +0000
+++ b/chopin2.xml	Tue Apr 09 11:23:55 2024 +0000
@@ -44,13 +44,13 @@
     <inputs>
         <param name="dataset" type="data" format="csv,tabular" 
                label="Select a dataset" 
-               help="Input dataset with features on columns and observations on rows. The first column must contain the observation IDs, while the last column must contain classes. The header line is also required." />
+               help="Input dataset with features on columns and observations on rows. The first column must contain the observation IDs, while the last column must contain classes. The header line is also required."/>
         
         <param name="dimensionality" type="integer" value="10000" min="100" 
                label="Vectors dimensionality" 
                help="Size of hypervectors is usually 10,000 in vector-symbolic architectures. However, lower values could work 
                      with small datasets in terms of number of features and observations. Please note that you may require 
-                     to increase this number in case of datasets with a huge number of features." />
+                     to increase this number in case of datasets with a huge number of features."/>
         
         <param name="levels" type="integer" value="1000" min="2" 
                label="Levels" 
@@ -59,13 +59,13 @@
         
         <param name="retrain" type="integer" value="0" min="0" 
                label="Model retraining iterations" 
-               help="Maximum number of retraining iterations. Class hypervectors are retrained to minimize errors caused by noise." />
+               help="Maximum number of retraining iterations. Class hypervectors are retrained to minimize errors caused by noise."/>
         
         <param name="folds" type="integer" value="2" min="2" 
                label="Number of folds for cross-validation" 
                help="This tool makes use of k-folds cross-validation to evaluate the accuracy of the hyperdimensional model. 
                      Make sure to choose a good number of folds for validating the classification model. Please note that higher number 
-                     of folds could significantly increase the running time." />
+                     of folds could significantly increase the running time."/>
 
         <conditional name="feature_selection">
             <param name="enable_fs" type="select" 
@@ -76,20 +76,20 @@
                 <option value="true">Enabled</option>
             </param>
             
-            <when value="false" />
+            <when value="false"/>
 
             <when value="true">
                 <param name="group_min" type="integer" value="1" min="1" 
                        label="Minimum number of selected features" 
-                       help="Tool will stop removing features if its number will reach this value." />
+                       help="Tool will stop removing features if its number will reach this value."/>
                 
                 <param name="accuracy_threshold" type="float" value="60.0" min="0.0" max="100.0" 
                        label="Accuracy threshold" 
-                       help="Stop the execution if the best accuracy reached for a group of features is lower than this value." />
+                       help="Stop the execution if the best accuracy reached for a group of features is lower than this value."/>
                 
                 <param name="accuracy_uncertainty_perc" type="float" value="5.0" min="0.0" max="100.0" 
                        label="Accuracy uncertainty percentage" 
-                       help="Consider non optimal solutions if model accuracy is greater than the best accuracy minus this percentage." />
+                       help="Consider non optimal solutions if model accuracy is greater than the best accuracy minus this percentage."/>
             </when>
         </conditional>
     </inputs>
@@ -97,81 +97,81 @@
     <outputs>
         <data format="tabular" name="summary" label="${tool.name} on ${on_string}: Summary" from_work_dir="summary.txt">
             <actions>
-                <action name="column_names" type="metadata" default="Run ID,Group Size,Retraining,Accuracy,Excluded Feature" />
-                <action name="column_types" type="metadata" default="str,int,int,float,str" />
-                <action name="comment_lines" type="metadata" default="7" />
+                <action name="column_names" type="metadata" default="Run ID,Group Size,Retraining,Accuracy,Excluded Feature"/>
+                <action name="column_types" type="metadata" default="str,int,int,float,str"/>
+                <action name="comment_lines" type="metadata" default="7"/>
             </actions>
         </data>
         
         <data format="tabular" name="selection" label="${tool.name} on ${on_string}: Selection" from_work_dir="selection.txt">
             <filter>feature_selection["enable_fs"]</filter>
             <actions>
-                <action name="column_names" type="metadata" default="Selected Features:" />
-                <action name="column_types" type="metadata" default="str" />
-                <action name="comment_lines" type="metadata" default="3" />
+                <action name="column_names" type="metadata" default="Selected Features:"/>
+                <action name="column_types" type="metadata" default="str"/>
+                <action name="comment_lines" type="metadata" default="3"/>
             </actions>
         </data>
     </outputs>
 
     <tests>
-        <test>
-            <param name="dataset" value="iris.csv" />
-            <param name="dimensionality" value="1000" />
-            <param name="levels" value="100" />
-            <param name="retrain" value="10" />
-            <param name="folds" value="5" />
+        <test expect_num_outputs="2">
+            <param name="dataset" value="iris.csv"/>
+            <param name="dimensionality" value="1000"/>
+            <param name="levels" value="100"/>
+            <param name="retrain" value="10"/>
+            <param name="folds" value="5"/>
             
-            <output name="summary" ftype="tabular" value="summary.txt">
+            <output name="summary" ftype="tabular" value="summary_1.txt">
                 <assert_contents>
                     <has_text_matching expression="# Run ID\tGroup Size\tRetraining\tAccuracy"/>
-                    <has_text text="8f0e142ff27db7f8d2cc66cfcc05e27c" />
+                    <has_text text="8f0e142ff27db7f8d2cc66cfcc05e27c"/>
                 </assert_contents>
             </output>
         </test>
 
-        <test>
-            <param name="dataset" value="iris.tabular" />
-            <param name="dimensionality" value="1000" />
-            <param name="levels" value="100" />
-            <param name="retrain" value="10" />
-            <param name="folds" value="5" />
+        <test expect_num_outputs="2">
+            <param name="dataset" value="iris.tabular"/>
+            <param name="dimensionality" value="1000"/>
+            <param name="levels" value="100"/>
+            <param name="retrain" value="10"/>
+            <param name="folds" value="5"/>
             
-            <output name="summary" ftype="tabular" value="summary.txt">
+            <output name="summary" ftype="tabular" value="summary_2.txt">
                 <assert_contents>
                     <has_text_matching expression="# Run ID\tGroup Size\tRetraining\tAccuracy"/>
-                    <has_text text="8f0e142ff27db7f8d2cc66cfcc05e27c" />
+                    <has_text text="8f0e142ff27db7f8d2cc66cfcc05e27c"/>
                 </assert_contents>
             </output>
         </test>
 
-        <test>
-            <param name="dataset" value="iris.csv" />
-            <param name="dimensionality" value="1000" />
-            <param name="levels" value="100" />
-            <param name="retrain" value="10" />
-            <param name="folds" value="5" />
+        <test expect_num_outputs="2">
+            <param name="dataset" value="iris.csv"/>
+            <param name="dimensionality" value="1000"/>
+            <param name="levels" value="100"/>
+            <param name="retrain" value="10"/>
+            <param name="folds" value="5"/>
 
             <conditional name="feature_selection">
-                <param name="enable_fs" value="true" />
-                <param name="group_min" value="1" />
-                <param name="accuracy_threshold" value="60.0" />
-                <param name="accuracy_uncertainty_perc" value="5.0" />
+                <param name="enable_fs" value="true"/>
+                <param name="group_min" value="1"/>
+                <param name="accuracy_threshold" value="60.0"/>
+                <param name="accuracy_uncertainty_perc" value="5.0"/>
             </conditional>
             
-            <output name="summary" ftype="tabular" value="summary.txt">
+            <output name="summary" ftype="tabular" value="summary_3.txt">
                 <assert_contents>
-                    <has_text_matching expression="# Run ID\tGroup Size\tRetraining\tAccuracy" />
-                    <has_text text="8f0e142ff27db7f8d2cc66cfcc05e27c" />
+                    <has_text_matching expression="# Run ID\tGroup Size\tRetraining\tAccuracy"/>
+                    <has_text text="8f0e142ff27db7f8d2cc66cfcc05e27c"/>
                 </assert_contents>
             </output>
 
             <output name="selection" ftype="tabular" value="selection.txt">
                 <assert_contents>
-                    <has_text text="# Selected Features:" />
-                    <has_text text="PetalLengthCm" />
-                    <has_text text="PetalWidthCm" />
-                    <has_text text="SepalLengthCm" />
-                    <has_text text="SepalWidthCm" />
+                    <has_text text="# Selected Features:"/>
+                    <has_text text="PetalLengthCm"/>
+                    <has_text text="PetalWidthCm"/>
+                    <has_text text="SepalLengthCm"/>
+                    <has_text text="SepalWidthCm"/>
                 </assert_contents>
             </output>
         </test>
--- a/macros.xml	Tue Jan 31 16:31:19 2023 +0000
+++ b/macros.xml	Tue Apr 09 11:23:55 2024 +0000
@@ -1,6 +1,6 @@
 <macros>
-    <token name="@TOOL_VERSION@">1.0.7</token>
-    <token name="@GALAXY_VERSION@">1</token>
+    <token name="@TOOL_VERSION@">1.0.8.post1</token>
+    <token name="@GALAXY_VERSION@">0</token>
     <token name="@PROFILE@">22.05</token>
 
     <xml name="creator">
--- a/test-data/selection.txt	Tue Jan 31 16:31:19 2023 +0000
+++ b/test-data/selection.txt	Tue Apr 09 11:23:55 2024 +0000
@@ -1,5 +1,5 @@
-# Best group size: 4
-# Best accuracy: 43.33333333333333
+# Best group size: 3
+# Best accuracy: 96.66666666666667
 # Selected Features:
 PetalLengthCm
 PetalWidthCm
--- a/test-data/summary.txt	Tue Jan 31 16:31:19 2023 +0000
+++ b/test-data/summary.txt	Tue Apr 09 11:23:55 2024 +0000
@@ -5,4 +5,7 @@
 # Accuracy threshold (stop condition): 60.0
 # Accuracy uncertainty (percentage): 5.0
 # Run ID	Group Size	Retraining	Accuracy	Excluded Feature
-8f0e142ff27db7f8d2cc66cfcc05e27c	4	2	43.33333333333333	
+8f0e142ff27db7f8d2cc66cfcc05e27c	4	1	96.0	
+1349db4e4ec1dcbf755cda656c5e6212	3	0	96.66666666666667	SepalWidthCm
+7b74a2f91bb30738131e1ff11be19f6d	3	0	96.66666666666667	SepalLengthCm
+879af8de56b640a254522448aff67544	1	0	96.0	PetalLengthCm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/summary_1.txt	Tue Apr 09 11:23:55 2024 +0000
@@ -0,0 +1,8 @@
+# Dataset: iris
+# Dimensionality: 1000
+# Number of levels: 100
+# Max retraining iterations: 10
+# Accuracy threshold (stop condition): 60.0
+# Accuracy uncertainty (percentage): 5.0
+# Run ID	Group Size	Retraining	Accuracy	Excluded Feature
+8f0e142ff27db7f8d2cc66cfcc05e27c	4	1	96.0	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/summary_2.txt	Tue Apr 09 11:23:55 2024 +0000
@@ -0,0 +1,8 @@
+# Dataset: iris
+# Dimensionality: 1000
+# Number of levels: 100
+# Max retraining iterations: 10
+# Accuracy threshold (stop condition): 60.0
+# Accuracy uncertainty (percentage): 5.0
+# Run ID	Group Size	Retraining	Accuracy	Excluded Feature
+8f0e142ff27db7f8d2cc66cfcc05e27c	4	1	96.0	
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/summary_3.txt	Tue Apr 09 11:23:55 2024 +0000
@@ -0,0 +1,12 @@
+# Dataset: iris
+# Dimensionality: 1000
+# Number of levels: 100
+# Max retraining iterations: 10
+# Accuracy threshold (stop condition): 60.0
+# Accuracy uncertainty (percentage): 5.0
+# Run ID	Group Size	Retraining	Accuracy	Excluded Feature
+8f0e142ff27db7f8d2cc66cfcc05e27c	4	1	96.0	
+1349db4e4ec1dcbf755cda656c5e6212	3	0	96.66666666666667	SepalWidthCm
+7b74a2f91bb30738131e1ff11be19f6d	3	0	96.66666666666667	SepalLengthCm
+879af8de56b640a254522448aff67544	1	0	96.0	PetalLengthCm
+64e07b74f471e3b056c3e86b2b2af45d	1	0	95.33333333333333	PetalWidthCm