diff feature_selection.xml @ 35:61edd9e5c17f draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author bgruening
date Wed, 09 Aug 2023 13:10:57 +0000
parents 93f3b307485f
children
line wrap: on
line diff
--- a/feature_selection.xml	Thu Aug 11 09:24:57 2022 +0000
+++ b/feature_selection.xml	Wed Aug 09 13:10:57 2023 +0000
@@ -1,4 +1,4 @@
-<tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@" profile="20.05">
+<tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@" profile="@PROFILE@">
     <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description>
     <macros>
         <import>main_macros.xml</import>
@@ -31,6 +31,7 @@
 from imblearn.pipeline import Pipeline as imbPipeline
 from sklearn.pipeline import Pipeline
 
+from galaxy_ml.model_persist import dump_model_to_h5
 from galaxy_ml.utils import (SafeEval, feature_selector,
                              read_columns, get_module)
 
@@ -80,13 +81,14 @@
 else:
     c = None
 X, input_df = read_columns(
-        '$input_options.infile1',
-        c = c,
-        c_option = column_option,
-        return_df = True,
-        sep='\t',
-        header=header,
-        parse_dates=True)
+    '$input_options.infile1',
+    c = c,
+    c_option = column_option,
+    return_df = True,
+    sep='\t',
+    header=header,
+    parse_dates=True,
+)
 X = X.astype(float)
 #elif $input_options.selected_input == 'seq_fasta'
 fasta_file = '$input_options.fasta_file'
@@ -118,12 +120,13 @@
 else:
     c = None
 y = read_columns(
-        '$input_options.infile2',
-        c = c,
-        c_option = column_option,
-        sep='\t',
-        header=header,
-        parse_dates=True)
+    '$input_options.infile2',
+    c = c,
+    c_option = column_option,
+    sep='\t',
+    header=header,
+    parse_dates=True,
+)
 y = y.ravel()
 
 ## Create feature selector
@@ -142,8 +145,7 @@
 res.to_csv(path_or_buf='$outfile', sep='\t', index=False)
 
 #if $save:
-with open('$outfile_selector', 'wb') as output_handler:
-    pickle.dump(new_selector, output_handler, pickle.HIGHEST_PROTOCOL)
+dump_model_to_h5(new_selector, '$outfile_selector')
 #end if
 
             ]]>
@@ -156,7 +158,7 @@
     </inputs>
     <outputs>
         <data format="tabular" name="outfile" />
-        <data format="zip" name="outfile_selector" label="${fs_algorithm_selector.selected_algorithm}">
+        <data format="h5mlm" name="outfile_selector" label="${fs_algorithm_selector.selected_algorithm}">
             <filter>save</filter>
         </data>
     </outputs>
@@ -294,13 +296,13 @@
         <test>
             <param name="selected_algorithm" value="SelectFromModel" />
             <param name="input_mode" value="prefitted" />
-            <param name="fitted_estimator" value="rfr_model01" ftype="zip" />
-            <param name="infile1" value="regression_train.tabular" ftype="tabular" />
-            <param name="header1" value="false" />
-            <param name="col1" value="1,2,3,4,5" />
-            <param name="infile2" value="regression_train.tabular" ftype="tabular" />
+            <param name="fitted_estimator" value="searchCV03" ftype="h5mlm" />
+            <param name="infile1" value="regression_X.tabular" ftype="tabular" />
+            <param name="header1" value="true" />
+            <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17" />
+            <param name="infile2" value="regression_y.tabular" ftype="tabular" />
             <param name="col2" value="1" />
-            <param name="header2" value="false" />
+            <param name="header2" value="true" />
             <output name="outfile" file="feature_selection_result12" />
         </test>
         <test>