Mercurial > repos > bgruening > sklearn_data_preprocess
comparison pre_process.xml @ 41:a16f33c6ca64 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author | bgruening |
---|---|
date | Wed, 09 Aug 2023 13:29:02 +0000 |
parents | 0e5fcf7ddc75 |
children |
comparison
equal
deleted
inserted
replaced
40:80074b842ebd | 41:a16f33c6ca64 |
---|---|
1 <tool id="sklearn_data_preprocess" name="Preprocess" version="@VERSION@" profile="20.05"> | 1 <tool id="sklearn_data_preprocess" name="Preprocess" version="@VERSION@" profile="@PROFILE@"> |
2 <description>raw feature vectors into standardized datasets</description> | 2 <description>raw feature vectors into standardized datasets</description> |
3 <macros> | 3 <macros> |
4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="python_requirements" /> | 6 <expand macro="python_requirements" /> |
16 <configfile name="pre_processor_script"> | 16 <configfile name="pre_processor_script"> |
17 <![CDATA[ | 17 <![CDATA[ |
18 import sys | 18 import sys |
19 import json | 19 import json |
20 import pandas | 20 import pandas |
21 import pickle | |
22 | 21 |
23 from scipy.io import mmread | 22 from scipy.io import mmread |
24 from scipy.io import mmwrite | 23 from scipy.io import mmwrite |
25 from sklearn import preprocessing | 24 from sklearn import preprocessing |
25 from galaxy_ml.model_persist import dump_model_to_h5 | |
26 from galaxy_ml.utils import read_columns, SafeEval | 26 from galaxy_ml.utils import read_columns, SafeEval |
27 | 27 |
28 | 28 |
29 safe_eval = SafeEval() | 29 safe_eval = SafeEval() |
30 | 30 |
79 res.to_csv(path_or_buf = "$outfile_transform", sep="\t", | 79 res.to_csv(path_or_buf = "$outfile_transform", sep="\t", |
80 index=False, header=True if header else False) | 80 index=False, header=True if header else False) |
81 #end if | 81 #end if |
82 | 82 |
83 #if $save: | 83 #if $save: |
84 with open("$outfile_fit", 'wb') as out_handler: | 84 dump_model_to_h5(estimator, "$outfile_fit") |
85 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL) | |
86 #end if | 85 #end if |
87 ]]> | 86 ]]> |
88 </configfile> | 87 </configfile> |
89 </configfiles> | 88 </configfiles> |
90 <inputs> | 89 <inputs> |
114 </conditional> | 113 </conditional> |
115 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Save the preprocessor" help="Saves the preprocessor after fitting to the data. The preprocessor can then be passed to other tools and used in later operations." /> | 114 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Save the preprocessor" help="Saves the preprocessor after fitting to the data. The preprocessor can then be passed to other tools and used in later operations." /> |
116 </inputs> | 115 </inputs> |
117 <outputs> | 116 <outputs> |
118 <data format="tabular" name="outfile_transform" from_work_dir="./output" /> | 117 <data format="tabular" name="outfile_transform" from_work_dir="./output" /> |
119 <data format="zip" name="outfile_fit"> | 118 <data format="h5mlm" name="outfile_fit"> |
120 <filter>save</filter> | 119 <filter>save</filter> |
121 </data> | 120 </data> |
122 </outputs> | 121 </outputs> |
123 <tests> | 122 <tests> |
124 <test> | 123 <test> |
125 <param name="infile" value="train.tabular" ftype="tabular" /> | 124 <param name="infile" value="train.tabular" ftype="tabular" /> |
126 <param name="selected_column_selector_option" value="all_columns" /> | 125 <param name="selected_column_selector_option" value="all_columns" /> |
127 <param name="selected_input_type" value="tabular" /> | 126 <param name="selected_input_type" value="tabular" /> |
128 <param name="selected_pre_processor" value="KernelCenterer" /> | 127 <param name="selected_pre_processor" value="QuantileTransformer" /> |
129 <param name="save" value="true" /> | 128 <param name="save" value="true" /> |
129 <param name="random_state" value="200" /> | |
130 <param name="n_quantiles" value="10" /> | |
131 <param name="subsample" value="100" /> | |
130 <output name="outfile_transform" file="prp_result01" ftype="tabular" /> | 132 <output name="outfile_transform" file="prp_result01" ftype="tabular" /> |
131 <output name="outfile_fit" file="prp_model01" ftype="zip" compare="sim_size" delta="5" /> | 133 <output name="outfile_fit" file="prp_model01" ftype="h5mlm" compare="sim_size" delta="5" /> |
132 </test> | 134 </test> |
133 <test> | 135 <test> |
134 <param name="infile" value="train.tabular" ftype="tabular" /> | 136 <param name="infile" value="train.tabular" ftype="tabular" /> |
135 <param name="selected_column_selector_option" value="all_columns" /> | 137 <param name="selected_column_selector_option" value="all_columns" /> |
136 <param name="selected_input_type" value="tabular" /> | 138 <param name="selected_input_type" value="tabular" /> |
137 <param name="selected_pre_processor" value="MinMaxScaler" /> | 139 <param name="selected_pre_processor" value="MinMaxScaler" /> |
138 <param name="save" value="true" /> | 140 <param name="save" value="true" /> |
139 <output name="outfile_transform" file="prp_result02" ftype="tabular" /> | 141 <output name="outfile_transform" file="prp_result02" ftype="tabular" /> |
140 <output name="outfile_fit" file="prp_model02" ftype="zip" compare="sim_size" delta="5" /> | 142 <output name="outfile_fit" file="prp_model02" ftype="h5mlm" compare="sim_size" delta="5" /> |
141 </test> | 143 </test> |
142 <test> | 144 <test> |
143 <param name="infile" value="train.tabular" ftype="tabular" /> | 145 <param name="infile" value="train.tabular" ftype="tabular" /> |
144 <param name="selected_column_selector_option" value="all_columns" /> | 146 <param name="selected_column_selector_option" value="all_columns" /> |
145 <param name="selected_input_type" value="tabular" /> | 147 <param name="selected_input_type" value="tabular" /> |
146 <param name="selected_pre_processor" value="PolynomialFeatures" /> | 148 <param name="selected_pre_processor" value="PolynomialFeatures" /> |
147 <param name="save" value="true" /> | 149 <param name="save" value="true" /> |
148 <output name="outfile_transform" file="prp_result03" ftype="tabular" /> | 150 <output name="outfile_transform" file="prp_result03" ftype="tabular" /> |
149 <output name="outfile_fit" file="prp_model03" ftype="zip" compare="sim_size" delta="5" /> | 151 <output name="outfile_fit" file="prp_model03" ftype="h5mlm" compare="sim_size" delta="5" /> |
150 </test> | 152 </test> |
151 <test> | 153 <test> |
152 <param name="infile" value="train.tabular" ftype="tabular" /> | 154 <param name="infile" value="train.tabular" ftype="tabular" /> |
153 <param name="selected_column_selector_option" value="all_columns" /> | 155 <param name="selected_column_selector_option" value="all_columns" /> |
154 <param name="selected_input_type" value="tabular" /> | 156 <param name="selected_input_type" value="tabular" /> |
155 <param name="selected_pre_processor" value="RobustScaler" /> | 157 <param name="selected_pre_processor" value="RobustScaler" /> |
156 <param name="save" value="true" /> | 158 <param name="save" value="true" /> |
157 <output name="outfile_transform" file="prp_result04" ftype="tabular" /> | 159 <output name="outfile_transform" file="prp_result04" ftype="tabular" /> |
158 <output name="outfile_fit" file="prp_model04" ftype="zip" compare="sim_size" delta="5" /> | 160 <output name="outfile_fit" file="prp_model04" ftype="h5mlm" compare="sim_size" delta="5" /> |
159 </test> | 161 </test> |
160 <test> | 162 <test> |
161 <param name="infile" value="csr_sparse2.mtx" ftype="txt" /> | 163 <param name="infile" value="csr_sparse2.mtx" ftype="txt" /> |
162 <param name="selected_input_type" value="sparse" /> | 164 <param name="selected_input_type" value="sparse" /> |
163 <param name="selected_pre_processor" value="Binarizer" /> | 165 <param name="selected_pre_processor" value="Binarizer" /> |
164 <param name="save" value="true" /> | 166 <param name="save" value="true" /> |
165 <output name="outfile_transform" file="prp_result05" ftype="tabular" /> | 167 <output name="outfile_transform" file="prp_result05" ftype="tabular" /> |
166 <output name="outfile_fit" file="prp_model05" ftype="zip" compare="sim_size" delta="5" /> | 168 <output name="outfile_fit" file="prp_model05" ftype="h5mlm" compare="sim_size" delta="5" /> |
167 </test> | 169 </test> |
168 <test> | 170 <test> |
169 <param name="infile" value="train.tabular" ftype="tabular" /> | 171 <param name="infile" value="train.tabular" ftype="tabular" /> |
170 <param name="selected_input_type" value="tabular" /> | 172 <param name="selected_input_type" value="tabular" /> |
171 <param name="selected_column_selector_option" value="all_columns" /> | 173 <param name="selected_column_selector_option" value="all_columns" /> |
172 <param name="selected_pre_processor" value="StandardScaler" /> | 174 <param name="selected_pre_processor" value="StandardScaler" /> |
173 <param name="save" value="true" /> | 175 <param name="save" value="true" /> |
174 <output name="outfile_transform" file="prp_result07" ftype="tabular" /> | 176 <output name="outfile_transform" file="prp_result07" ftype="tabular" /> |
175 <output name="outfile_fit" file="prp_model07" ftype="zip" compare="sim_size" delta="5" /> | 177 <output name="outfile_fit" file="prp_model07" ftype="h5mlm" compare="sim_size" delta="5" /> |
176 </test> | 178 </test> |
177 <test> | 179 <test> |
178 <param name="infile" value="csr_sparse2.mtx" ftype="txt" /> | 180 <param name="infile" value="csr_sparse2.mtx" ftype="txt" /> |
179 <param name="selected_input_type" value="sparse" /> | 181 <param name="selected_input_type" value="sparse" /> |
180 <param name="selected_pre_processor" value="MaxAbsScaler" /> | 182 <param name="selected_pre_processor" value="MaxAbsScaler" /> |
181 <param name="save" value="true" /> | 183 <param name="save" value="true" /> |
182 <output name="outfile_transform" file="prp_result08" ftype="tabular" /> | 184 <output name="outfile_transform" file="prp_result08" ftype="tabular" /> |
183 <output name="outfile_fit" file="prp_model08" ftype="zip" compare="sim_size" delta="5" /> | 185 <output name="outfile_fit" file="prp_model08" ftype="h5mlm" compare="sim_size" delta="5" /> |
184 </test> | 186 </test> |
185 <test> | 187 <test> |
186 <param name="infile" value="csr_sparse2.mtx" ftype="txt" /> | 188 <param name="infile" value="csr_sparse2.mtx" ftype="txt" /> |
187 <param name="selected_input_type" value="sparse" /> | 189 <param name="selected_input_type" value="sparse" /> |
188 <param name="selected_pre_processor" value="Normalizer" /> | 190 <param name="selected_pre_processor" value="Normalizer" /> |
189 <param name="save" value="true" /> | 191 <param name="save" value="true" /> |
190 <output name="outfile_transform" file="prp_result09" ftype="tabular" /> | 192 <output name="outfile_transform" file="prp_result09" ftype="tabular" /> |
191 <output name="outfile_fit" file="prp_model09" ftype="zip" compare="sim_size" delta="5" /> | 193 <output name="outfile_fit" file="prp_model09" ftype="h5mlm" compare="sim_size" delta="5" /> |
192 </test> | 194 </test> |
193 <test> | 195 <test> |
194 <param name="infile" value="regression_X.tabular" ftype="tabular" /> | 196 <param name="infile" value="regression_X.tabular" ftype="tabular" /> |
195 <param name="header1" value="true" /> | 197 <param name="header1" value="true" /> |
196 <param name="selected_column_selector_option" value="all_columns" /> | 198 <param name="selected_column_selector_option" value="all_columns" /> |