diff estimator_attributes.xml @ 17:a01fa4e8fe4f draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author bgruening
date Wed, 09 Aug 2023 12:54:40 +0000
parents 27fabe5feedc
children
line wrap: on
line diff
--- a/estimator_attributes.xml	Thu Aug 11 09:52:07 2022 +0000
+++ b/estimator_attributes.xml	Wed Aug 09 12:54:40 2023 +0000
@@ -1,4 +1,4 @@
-<tool id="sklearn_estimator_attributes" name="Estimator attributes" version="@VERSION@" profile="20.05">
+<tool id="sklearn_estimator_attributes" name="Estimator attributes" version="@VERSION@" profile="@PROFILE@">
     <description>get important attributes from an estimator or scikit object</description>
     <macros>
         <import>main_macros.xml</import>
@@ -16,7 +16,6 @@
             <![CDATA[
 import json
 import pandas
-import pickle
 import skrebate
 import sys
 import warnings
@@ -30,15 +29,15 @@
 from imblearn.pipeline import Pipeline as imbPipeline
 from sklearn.pipeline import Pipeline
 
-from galaxy_ml.utils import load_model, get_search_params
+from galaxy_ml.model_persist import load_model_from_h5, dump_model_to_h5
+from galaxy_ml.utils import get_search_params
 
 warnings.simplefilter('ignore')
 
 infile_object = '$infile_object'
 attribute = '$attribute_type'
 
-with open(infile_object, 'rb') as f:
-    est_obj = load_model(f)
+est_obj = load_model_from_h5(infile_object)
 
 if attribute == 'get_params':
     ## get_params()
@@ -48,14 +47,12 @@
 elif attribute == 'final_estimator':
     res = est_obj.steps[-1][-1]
     print(repr(res))
-    with open('$outfile', 'wb') as f:
-        pickle.dump(res, f, pickle.HIGHEST_PROTOCOL)
+    dump_model_to_h5(res, '$outfile')
 elif attribute in ['best_estimator_', 'init_', 'classifier_', 'regressor_']:
     res = getattr(est_obj, attribute)
     print(repr(res))
-    with open('$outfile', 'wb') as f:
-        pickle.dump(res, f, pickle.HIGHEST_PROTOCOL)
-elif attribute in ['oob_score_', 'best_score_', 'n_features_']:
+    dump_model_to_h5(res, '$outfile')
+elif attribute in ['oob_score_', 'best_score_', 'n_features_in']:
     res = getattr(est_obj, attribute)
     res = pandas.DataFrame([res], columns=[attribute])
     res.to_csv('$outfile', sep='\t', index=False)
@@ -67,8 +64,6 @@
     res = pandas.DataFrame(est_obj.cv_results_)
     res = res[sorted(res.columns)]
     res.to_csv('$outfile', sep='\t', index=False)
-elif attribute == 'save_weights':
-    est_obj.save_weights('$outfile')
 else:
     if attribute == 'get_signature':
         res = est_obj.get_signature()
@@ -87,10 +82,10 @@
         </configfile>
     </configfiles>
     <inputs>
-        <param name="infile_object" type="data" format="zip" label="Choose the dataset containing estimator/pipeline object" />
+        <param name="infile_object" type="data" format="h5mlm" label="Choose the dataset containing estimator/pipeline object" />
         <param name="attribute_type" type="select" label="Select an attribute retrival type">
             <option value="get_params" selected="true">Estimator - get_params()</option>
-            <option value="feature_importances_">Fitted estimator - feature_importances_ </option>
+            <option value="feature_importances_" >Fitted estimator - feature_importances_ </option>
             <option value="coef_">Fitted estimator - coef_ </option>
             <option value="train_score_">Fitted estimator - train_score_ </option>
             <option value="oob_score_">Fitted estimator - oob_score_ </option>
@@ -107,9 +102,8 @@
             <option value="scores_">Feature_selection - scores_ </option>
             <option value="pvalues_">Feature_selection - pvalues_ </option>
             <option value="ranking_">Feature_selection - ranking_ </option>
-            <option value="n_features_">Feature_selection - n_features_ </option>
+            <option value="n_features_in">Feature_selection - n_features_in </option>
             <option value="grid_scores_">Feature_selection - grid_scores_ </option>
-            <option value="save_weights">KerasGClassifier/KerasGRegressor - save_weights</option>
         </param>
     </inputs>
     <outputs>
@@ -117,61 +111,55 @@
             <change_format>
                 <when input="attribute_type" value="named_steps" format="txt" />
                 <when input="attribute_type" value="best_params_" format="txt" />
-                <when input="attribute_type" value="final_estimator" format="zip" />
-                <when input="attribute_type" value="best_estimator_" format="zip" />
-                <when input="attribute_type" value="init_" format="zip" />
-                <when input="attribute_type" value="classifier_" format="zip" />
-                <when input="attribute_type" value="regressor_" format="zip" />
-                <when input="attribute_type" value="save_weights" format="h5" />
+                <when input="attribute_type" value="final_estimator" format="h5mlm" />
+                <when input="attribute_type" value="best_estimator_" format="h5mlm" />
+                <when input="attribute_type" value="init_" format="h5mlm" />
+                <when input="attribute_type" value="classifier_" format="h5mlm" />
+                <when input="attribute_type" value="regressor_" format="h5mlm" />
             </change_format>
         </data>
     </outputs>
     <tests>
         <test>
-            <param name="infile_object" value="GridSearchCV.zip" ftype="zip" />
+            <param name="infile_object" value="GridSearchCV01.h5mlm" ftype="h5mlm" />
             <param name="attribute_type" value="best_score_" />
             <output name="outfile" file="best_score_.tabular" />
         </test>
         <test>
-            <param name="infile_object" value="GridSearchCV.zip" ftype="zip" />
+            <param name="infile_object" value="GridSearchCV01.h5mlm" ftype="h5mlm" />
             <param name="attribute_type" value="best_params_" />
             <output name="outfile" file="best_params_.txt" />
         </test>
         <test>
-            <param name="infile_object" value="GridSearchCV.zip" ftype="zip" />
+            <param name="infile_object" value="GridSearchCV01.h5mlm" ftype="h5mlm" />
             <param name="attribute_type" value="best_estimator_" />
-            <output name="outfile" file="best_estimator_.zip" compare="sim_size" delta="10" />
+            <output name="outfile" file="best_estimator_.h5mlm" compare="sim_size" delta="10" />
+        </test>
+         <test>
+            <param name="infile_object" value="searchCV01" ftype="h5mlm" />
+            <param name="attribute_type" value="final_estimator" />
+            <output name="outfile" file="final_estimator.h5mlm" compare="sim_size" delta="10" />
         </test>
         <test>
-            <param name="infile_object" value="best_estimator_.zip" ftype="zip" />
-            <param name="attribute_type" value="final_estimator" />
-            <output name="outfile" file="final_estimator.zip" compare="sim_size" delta="10" />
-        </test>
-        <test>
-            <param name="infile_object" value="best_estimator_.zip" ftype="zip" />
+            <param name="infile_object" value="searchCV01" ftype="h5mlm" />
             <param name="attribute_type" value="named_steps" />
             <output name="outfile" file="named_steps.txt" compare="sim_size" delta="5" />
         </test>
         <test>
-            <param name="infile_object" value="final_estimator.zip" ftype="zip" />
+            <param name="infile_object" value="best_estimator_.h5mlm" ftype="h5mlm" />
             <param name="attribute_type" value="feature_importances_" />
             <output name="outfile" file="feature_importances_.tabular" />
         </test>
         <test>
-            <param name="infile_object" value="RFE.zip" ftype="zip" />
+            <param name="infile_object" value="RFE.h5mlm" ftype="h5mlm" />
             <param name="attribute_type" value="ranking_" />
             <output name="outfile" file="ranking_.tabular" />
         </test>
         <test>
-            <param name="infile_object" value="LinearRegression02.zip" ftype="zip" />
+            <param name="infile_object" value="LinearRegression01.h5mlm" ftype="h5mlm" />
             <param name="attribute_type" value="get_params" />
             <output name="outfile" value="get_params.tabular" />
         </test>
-        <test>
-            <param name="infile_object" value="fitted_keras_g_regressor01.zip" ftype="zip" />
-            <param name="attribute_type" value="save_weights" />
-            <output name="outfile" value="keras_save_weights01.h5" compare="sim_size" delta="5" />
-        </test>
     </tests>
     <help>
         <![CDATA[