Mercurial > repos > bgruening > sklearn_estimator_attributes
comparison estimator_attributes.xml @ 17:a01fa4e8fe4f draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author | bgruening |
---|---|
date | Wed, 09 Aug 2023 12:54:40 +0000 |
parents | 27fabe5feedc |
children |
comparison
equal
deleted
inserted
replaced
16:d0352e8b4c10 | 17:a01fa4e8fe4f |
---|---|
1 <tool id="sklearn_estimator_attributes" name="Estimator attributes" version="@VERSION@" profile="20.05"> | 1 <tool id="sklearn_estimator_attributes" name="Estimator attributes" version="@VERSION@" profile="@PROFILE@"> |
2 <description>get important attributes from an estimator or scikit object</description> | 2 <description>get important attributes from an estimator or scikit object</description> |
3 <macros> | 3 <macros> |
4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="python_requirements" /> | 6 <expand macro="python_requirements" /> |
14 <configfiles> | 14 <configfiles> |
15 <configfile name="main_script"> | 15 <configfile name="main_script"> |
16 <![CDATA[ | 16 <![CDATA[ |
17 import json | 17 import json |
18 import pandas | 18 import pandas |
19 import pickle | |
20 import skrebate | 19 import skrebate |
21 import sys | 20 import sys |
22 import warnings | 21 import warnings |
23 import xgboost | 22 import xgboost |
24 from mlxtend import regressor, classifier | 23 from mlxtend import regressor, classifier |
28 model_selection, naive_bayes, neighbors, pipeline, preprocessing, | 27 model_selection, naive_bayes, neighbors, pipeline, preprocessing, |
29 svm, linear_model, tree, discriminant_analysis) | 28 svm, linear_model, tree, discriminant_analysis) |
30 from imblearn.pipeline import Pipeline as imbPipeline | 29 from imblearn.pipeline import Pipeline as imbPipeline |
31 from sklearn.pipeline import Pipeline | 30 from sklearn.pipeline import Pipeline |
32 | 31 |
33 from galaxy_ml.utils import load_model, get_search_params | 32 from galaxy_ml.model_persist import load_model_from_h5, dump_model_to_h5 |
33 from galaxy_ml.utils import get_search_params | |
34 | 34 |
35 warnings.simplefilter('ignore') | 35 warnings.simplefilter('ignore') |
36 | 36 |
37 infile_object = '$infile_object' | 37 infile_object = '$infile_object' |
38 attribute = '$attribute_type' | 38 attribute = '$attribute_type' |
39 | 39 |
40 with open(infile_object, 'rb') as f: | 40 est_obj = load_model_from_h5(infile_object) |
41 est_obj = load_model(f) | |
42 | 41 |
43 if attribute == 'get_params': | 42 if attribute == 'get_params': |
44 ## get_params() | 43 ## get_params() |
45 results = get_search_params(est_obj) | 44 results = get_search_params(est_obj) |
46 df = pandas.DataFrame(results, columns=['', 'Parameter', 'Value']) | 45 df = pandas.DataFrame(results, columns=['', 'Parameter', 'Value']) |
47 df.to_csv('$outfile', sep='\t', index=False) | 46 df.to_csv('$outfile', sep='\t', index=False) |
48 elif attribute == 'final_estimator': | 47 elif attribute == 'final_estimator': |
49 res = est_obj.steps[-1][-1] | 48 res = est_obj.steps[-1][-1] |
50 print(repr(res)) | 49 print(repr(res)) |
51 with open('$outfile', 'wb') as f: | 50 dump_model_to_h5(res, '$outfile') |
52 pickle.dump(res, f, pickle.HIGHEST_PROTOCOL) | |
53 elif attribute in ['best_estimator_', 'init_', 'classifier_', 'regressor_']: | 51 elif attribute in ['best_estimator_', 'init_', 'classifier_', 'regressor_']: |
54 res = getattr(est_obj, attribute) | 52 res = getattr(est_obj, attribute) |
55 print(repr(res)) | 53 print(repr(res)) |
56 with open('$outfile', 'wb') as f: | 54 dump_model_to_h5(res, '$outfile') |
57 pickle.dump(res, f, pickle.HIGHEST_PROTOCOL) | 55 elif attribute in ['oob_score_', 'best_score_', 'n_features_in']: |
58 elif attribute in ['oob_score_', 'best_score_', 'n_features_']: | |
59 res = getattr(est_obj, attribute) | 56 res = getattr(est_obj, attribute) |
60 res = pandas.DataFrame([res], columns=[attribute]) | 57 res = pandas.DataFrame([res], columns=[attribute]) |
61 res.to_csv('$outfile', sep='\t', index=False) | 58 res.to_csv('$outfile', sep='\t', index=False) |
62 elif attribute in ['best_params_', 'named_steps']: | 59 elif attribute in ['best_params_', 'named_steps']: |
63 res = getattr(est_obj, attribute) | 60 res = getattr(est_obj, attribute) |
65 f.write(repr(res)) | 62 f.write(repr(res)) |
66 elif attribute == 'cv_results_': | 63 elif attribute == 'cv_results_': |
67 res = pandas.DataFrame(est_obj.cv_results_) | 64 res = pandas.DataFrame(est_obj.cv_results_) |
68 res = res[sorted(res.columns)] | 65 res = res[sorted(res.columns)] |
69 res.to_csv('$outfile', sep='\t', index=False) | 66 res.to_csv('$outfile', sep='\t', index=False) |
70 elif attribute == 'save_weights': | |
71 est_obj.save_weights('$outfile') | |
72 else: | 67 else: |
73 if attribute == 'get_signature': | 68 if attribute == 'get_signature': |
74 res = est_obj.get_signature() | 69 res = est_obj.get_signature() |
75 else: | 70 else: |
76 res = getattr(est_obj, attribute) | 71 res = getattr(est_obj, attribute) |
85 | 80 |
86 ]]> | 81 ]]> |
87 </configfile> | 82 </configfile> |
88 </configfiles> | 83 </configfiles> |
89 <inputs> | 84 <inputs> |
90 <param name="infile_object" type="data" format="zip" label="Choose the dataset containing estimator/pipeline object" /> | 85 <param name="infile_object" type="data" format="h5mlm" label="Choose the dataset containing estimator/pipeline object" /> |
91 <param name="attribute_type" type="select" label="Select an attribute retrival type"> | 86 <param name="attribute_type" type="select" label="Select an attribute retrival type"> |
92 <option value="get_params" selected="true">Estimator - get_params()</option> | 87 <option value="get_params" selected="true">Estimator - get_params()</option> |
93 <option value="feature_importances_">Fitted estimator - feature_importances_ </option> | 88 <option value="feature_importances_" >Fitted estimator - feature_importances_ </option> |
94 <option value="coef_">Fitted estimator - coef_ </option> | 89 <option value="coef_">Fitted estimator - coef_ </option> |
95 <option value="train_score_">Fitted estimator - train_score_ </option> | 90 <option value="train_score_">Fitted estimator - train_score_ </option> |
96 <option value="oob_score_">Fitted estimator - oob_score_ </option> | 91 <option value="oob_score_">Fitted estimator - oob_score_ </option> |
97 <option value="init_">Fitted estimator - init_ </option> | 92 <option value="init_">Fitted estimator - init_ </option> |
98 <option value="classifier_">Fitted BinarizeTargetClassifier - classifier_</option> | 93 <option value="classifier_">Fitted BinarizeTargetClassifier - classifier_</option> |
105 <option value="best_score_">SearchCV - best_score_ </option> | 100 <option value="best_score_">SearchCV - best_score_ </option> |
106 <option value="best_params_">SearchCV - best_params_ </option> | 101 <option value="best_params_">SearchCV - best_params_ </option> |
107 <option value="scores_">Feature_selection - scores_ </option> | 102 <option value="scores_">Feature_selection - scores_ </option> |
108 <option value="pvalues_">Feature_selection - pvalues_ </option> | 103 <option value="pvalues_">Feature_selection - pvalues_ </option> |
109 <option value="ranking_">Feature_selection - ranking_ </option> | 104 <option value="ranking_">Feature_selection - ranking_ </option> |
110 <option value="n_features_">Feature_selection - n_features_ </option> | 105 <option value="n_features_in">Feature_selection - n_features_in </option> |
111 <option value="grid_scores_">Feature_selection - grid_scores_ </option> | 106 <option value="grid_scores_">Feature_selection - grid_scores_ </option> |
112 <option value="save_weights">KerasGClassifier/KerasGRegressor - save_weights</option> | |
113 </param> | 107 </param> |
114 </inputs> | 108 </inputs> |
115 <outputs> | 109 <outputs> |
116 <data format="tabular" name="outfile" label="${attribute_type} from ${on_string}"> | 110 <data format="tabular" name="outfile" label="${attribute_type} from ${on_string}"> |
117 <change_format> | 111 <change_format> |
118 <when input="attribute_type" value="named_steps" format="txt" /> | 112 <when input="attribute_type" value="named_steps" format="txt" /> |
119 <when input="attribute_type" value="best_params_" format="txt" /> | 113 <when input="attribute_type" value="best_params_" format="txt" /> |
120 <when input="attribute_type" value="final_estimator" format="zip" /> | 114 <when input="attribute_type" value="final_estimator" format="h5mlm" /> |
121 <when input="attribute_type" value="best_estimator_" format="zip" /> | 115 <when input="attribute_type" value="best_estimator_" format="h5mlm" /> |
122 <when input="attribute_type" value="init_" format="zip" /> | 116 <when input="attribute_type" value="init_" format="h5mlm" /> |
123 <when input="attribute_type" value="classifier_" format="zip" /> | 117 <when input="attribute_type" value="classifier_" format="h5mlm" /> |
124 <when input="attribute_type" value="regressor_" format="zip" /> | 118 <when input="attribute_type" value="regressor_" format="h5mlm" /> |
125 <when input="attribute_type" value="save_weights" format="h5" /> | |
126 </change_format> | 119 </change_format> |
127 </data> | 120 </data> |
128 </outputs> | 121 </outputs> |
129 <tests> | 122 <tests> |
130 <test> | 123 <test> |
131 <param name="infile_object" value="GridSearchCV.zip" ftype="zip" /> | 124 <param name="infile_object" value="GridSearchCV01.h5mlm" ftype="h5mlm" /> |
132 <param name="attribute_type" value="best_score_" /> | 125 <param name="attribute_type" value="best_score_" /> |
133 <output name="outfile" file="best_score_.tabular" /> | 126 <output name="outfile" file="best_score_.tabular" /> |
134 </test> | 127 </test> |
135 <test> | 128 <test> |
136 <param name="infile_object" value="GridSearchCV.zip" ftype="zip" /> | 129 <param name="infile_object" value="GridSearchCV01.h5mlm" ftype="h5mlm" /> |
137 <param name="attribute_type" value="best_params_" /> | 130 <param name="attribute_type" value="best_params_" /> |
138 <output name="outfile" file="best_params_.txt" /> | 131 <output name="outfile" file="best_params_.txt" /> |
139 </test> | 132 </test> |
140 <test> | 133 <test> |
141 <param name="infile_object" value="GridSearchCV.zip" ftype="zip" /> | 134 <param name="infile_object" value="GridSearchCV01.h5mlm" ftype="h5mlm" /> |
142 <param name="attribute_type" value="best_estimator_" /> | 135 <param name="attribute_type" value="best_estimator_" /> |
143 <output name="outfile" file="best_estimator_.zip" compare="sim_size" delta="10" /> | 136 <output name="outfile" file="best_estimator_.h5mlm" compare="sim_size" delta="10" /> |
137 </test> | |
138 <test> | |
139 <param name="infile_object" value="searchCV01" ftype="h5mlm" /> | |
140 <param name="attribute_type" value="final_estimator" /> | |
141 <output name="outfile" file="final_estimator.h5mlm" compare="sim_size" delta="10" /> | |
144 </test> | 142 </test> |
145 <test> | 143 <test> |
146 <param name="infile_object" value="best_estimator_.zip" ftype="zip" /> | 144 <param name="infile_object" value="searchCV01" ftype="h5mlm" /> |
147 <param name="attribute_type" value="final_estimator" /> | |
148 <output name="outfile" file="final_estimator.zip" compare="sim_size" delta="10" /> | |
149 </test> | |
150 <test> | |
151 <param name="infile_object" value="best_estimator_.zip" ftype="zip" /> | |
152 <param name="attribute_type" value="named_steps" /> | 145 <param name="attribute_type" value="named_steps" /> |
153 <output name="outfile" file="named_steps.txt" compare="sim_size" delta="5" /> | 146 <output name="outfile" file="named_steps.txt" compare="sim_size" delta="5" /> |
154 </test> | 147 </test> |
155 <test> | 148 <test> |
156 <param name="infile_object" value="final_estimator.zip" ftype="zip" /> | 149 <param name="infile_object" value="best_estimator_.h5mlm" ftype="h5mlm" /> |
157 <param name="attribute_type" value="feature_importances_" /> | 150 <param name="attribute_type" value="feature_importances_" /> |
158 <output name="outfile" file="feature_importances_.tabular" /> | 151 <output name="outfile" file="feature_importances_.tabular" /> |
159 </test> | 152 </test> |
160 <test> | 153 <test> |
161 <param name="infile_object" value="RFE.zip" ftype="zip" /> | 154 <param name="infile_object" value="RFE.h5mlm" ftype="h5mlm" /> |
162 <param name="attribute_type" value="ranking_" /> | 155 <param name="attribute_type" value="ranking_" /> |
163 <output name="outfile" file="ranking_.tabular" /> | 156 <output name="outfile" file="ranking_.tabular" /> |
164 </test> | 157 </test> |
165 <test> | 158 <test> |
166 <param name="infile_object" value="LinearRegression02.zip" ftype="zip" /> | 159 <param name="infile_object" value="LinearRegression01.h5mlm" ftype="h5mlm" /> |
167 <param name="attribute_type" value="get_params" /> | 160 <param name="attribute_type" value="get_params" /> |
168 <output name="outfile" value="get_params.tabular" /> | 161 <output name="outfile" value="get_params.tabular" /> |
169 </test> | |
170 <test> | |
171 <param name="infile_object" value="fitted_keras_g_regressor01.zip" ftype="zip" /> | |
172 <param name="attribute_type" value="save_weights" /> | |
173 <output name="outfile" value="keras_save_weights01.h5" compare="sim_size" delta="5" /> | |
174 </test> | 162 </test> |
175 </tests> | 163 </tests> |
176 <help> | 164 <help> |
177 <![CDATA[ | 165 <![CDATA[ |
178 **What it does** | 166 **What it does** |