Mercurial > repos > bgruening > sklearn_generalized_linear
comparison generalized_linear.xml @ 41:fe181d613429 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author | bgruening |
---|---|
date | Wed, 09 Aug 2023 12:18:02 +0000 |
parents | 602edec75e1d |
children |
comparison
equal
deleted
inserted
replaced
40:dc4b5fd604a6 | 41:fe181d613429 |
---|---|
1 <tool id="sklearn_generalized_linear" name="Generalized linear models" version="@VERSION@" profile="20.05"> | 1 <tool id="sklearn_generalized_linear" name="Generalized linear models" version="@VERSION@" profile="@PROFILE@"> |
2 <description>for classification and regression</description> | 2 <description>for classification and regression</description> |
3 <macros> | 3 <macros> |
4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="python_requirements" /> | 6 <expand macro="python_requirements" /> |
16 import sys | 16 import sys |
17 import json | 17 import json |
18 import numpy as np | 18 import numpy as np |
19 import sklearn.linear_model | 19 import sklearn.linear_model |
20 import pandas | 20 import pandas |
21 import pickle | |
22 | 21 |
23 from scipy.io import mmread | 22 from scipy.io import mmread |
24 from galaxy_ml.utils import load_model, get_X_y | 23 from galaxy_ml.model_persist import dump_model_to_h5, load_model_from_h5 |
24 from galaxy_ml.utils import clean_params, get_X_y | |
25 | 25 |
26 | 26 |
27 input_json_path = sys.argv[1] | 27 input_json_path = sys.argv[1] |
28 with open(input_json_path, "r") as param_handler: | 28 with open(input_json_path, "r") as param_handler: |
29 params = json.load(param_handler) | 29 params = json.load(param_handler) |
36 options = params["selected_tasks"]["selected_algorithms"]["options"] | 36 options = params["selected_tasks"]["selected_algorithms"]["options"] |
37 | 37 |
38 my_class = getattr(sklearn.linear_model, algorithm) | 38 my_class = getattr(sklearn.linear_model, algorithm) |
39 estimator = my_class(**options) | 39 estimator = my_class(**options) |
40 estimator.fit(X,y) | 40 estimator.fit(X,y) |
41 with open("$outfile_fit", 'wb') as out_handler: | 41 dump_model_to_h5(estimator, "$outfile_fit") |
42 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL) | |
43 | 42 |
44 #else: | 43 #else: |
45 with open("$selected_tasks.infile_model", 'rb') as model_handler: | 44 classifier_object = load_model_from_h5("$selected_tasks.infile_model") |
46 classifier_object = load_model(model_handler) | 45 classifier_object = clean_params(classifier_object) |
47 header = 'infer' if params["selected_tasks"]["header"] else None | 46 header = 'infer' if params["selected_tasks"]["header"] else None |
48 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None) | 47 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None) |
49 prediction = classifier_object.predict(data) | 48 prediction = classifier_object.predict(data) |
50 prediction_df = pandas.DataFrame(prediction, columns=["predicted"]) | 49 prediction_df = pandas.DataFrame(prediction, columns=["predicted"]) |
51 res = pandas.concat([data, prediction_df], axis=1) | 50 res = pandas.concat([data, prediction_df], axis=1) |
54 | 53 |
55 ]]> | 54 ]]> |
56 </configfile> | 55 </configfile> |
57 </configfiles> | 56 </configfiles> |
58 <inputs> | 57 <inputs> |
59 <expand macro="sl_Conditional" model="zip"> | 58 <expand macro="sl_Conditional" model="h5mlm"> |
60 <param name="selected_algorithm" type="select" label="Select a linear model:"> | 59 <param name="selected_algorithm" type="select" label="Select a linear model:"> |
61 <option value="SGDClassifier" selected="true">Stochastic Gradient Descent (SGD) classifier</option> | 60 <option value="SGDClassifier" selected="true">Stochastic Gradient Descent (SGD) classifier</option> |
62 <option value="SGDRegressor">Stochastic Gradient Descent (SGD) regressor</option> | 61 <option value="SGDRegressor">Stochastic Gradient Descent (SGD) regressor</option> |
63 <option value="LinearRegression">Linear Regression model</option> | 62 <option value="LinearRegression">Linear Regression model</option> |
64 <option value="RidgeClassifier">Ridge classifier</option> | 63 <option value="RidgeClassifier">Ridge classifier</option> |
202 <expand macro="output" /> | 201 <expand macro="output" /> |
203 <tests> | 202 <tests> |
204 <test> | 203 <test> |
205 <param name="infile1" value="regression_train.tabular" ftype="tabular" /> | 204 <param name="infile1" value="regression_train.tabular" ftype="tabular" /> |
206 <param name="infile2" value="regression_train.tabular" ftype="tabular" /> | 205 <param name="infile2" value="regression_train.tabular" ftype="tabular" /> |
207 <param name="selected_column_selector_option" value="all_but_by_index_number" /> | 206 <param name="col1" value="1,2,3,4,5" /> |
208 <param name="col1" value="6" /> | |
209 <param name="col2" value="6" /> | 207 <param name="col2" value="6" /> |
210 <param name="selected_task" value="train" /> | 208 <param name="selected_task" value="train" /> |
211 <param name="selected_algorithm" value="SGDRegressor" /> | 209 <param name="selected_algorithm" value="SGDRegressor" /> |
212 <param name="random_state" value="10" /> | 210 <param name="random_state" value="10" /> |
213 <output name="outfile_fit" file="glm_model01" compare="sim_size" delta="5" /> | 211 <output name="outfile_fit" file="glm_model01" compare="sim_size" delta="5" /> |
214 </test> | 212 </test> |
215 <test> | 213 <test> |
216 <param name="infile_model" value="glm_model01" ftype="zip" /> | 214 <param name="infile_model" value="glm_model01" ftype="h5mlm" /> |
217 <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> | 215 <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> |
218 <param name="selected_task" value="load" /> | 216 <param name="selected_task" value="load" /> |
219 <output name="outfile_predict" file="glm_result01" lines_diff="4" /> | 217 <output name="outfile_predict" file="glm_result01" lines_diff="4" /> |
220 </test> | 218 </test> |
221 <test> | 219 <test> |
227 <param name="selected_algorithm" value="SGDClassifier" /> | 225 <param name="selected_algorithm" value="SGDClassifier" /> |
228 <param name="random_state" value="10" /> | 226 <param name="random_state" value="10" /> |
229 <output name="outfile_fit" file="glm_model02" compare="sim_size" delta="5" /> | 227 <output name="outfile_fit" file="glm_model02" compare="sim_size" delta="5" /> |
230 </test> | 228 </test> |
231 <test> | 229 <test> |
232 <param name="infile_model" value="glm_model02" ftype="zip" /> | 230 <param name="infile_model" value="glm_model02" ftype="h5mlm" /> |
233 <param name="infile_data" value="test.tabular" ftype="tabular" /> | 231 <param name="infile_data" value="test.tabular" ftype="tabular" /> |
234 <param name="selected_task" value="load" /> | 232 <param name="selected_task" value="load" /> |
235 <output name="outfile_predict" file="glm_result02" /> | 233 <output name="outfile_predict" file="glm_result02" /> |
236 </test> | 234 </test> |
237 <test> | 235 <test> |
243 <param name="selected_algorithm" value="RidgeClassifier" /> | 241 <param name="selected_algorithm" value="RidgeClassifier" /> |
244 <param name="random_state" value="10" /> | 242 <param name="random_state" value="10" /> |
245 <output name="outfile_fit" file="glm_model03" compare="sim_size" delta="5" /> | 243 <output name="outfile_fit" file="glm_model03" compare="sim_size" delta="5" /> |
246 </test> | 244 </test> |
247 <test> | 245 <test> |
248 <param name="infile_model" value="glm_model03" ftype="zip" /> | 246 <param name="infile_model" value="glm_model03" ftype="h5mlm" /> |
249 <param name="infile_data" value="test.tabular" ftype="tabular" /> | 247 <param name="infile_data" value="test.tabular" ftype="tabular" /> |
250 <param name="selected_task" value="load" /> | 248 <param name="selected_task" value="load" /> |
251 <output name="outfile_predict" file="glm_result03" /> | 249 <output name="outfile_predict" file="glm_result03" /> |
252 </test> | 250 </test> |
253 <test> | 251 <test> |
258 <param name="selected_task" value="train" /> | 256 <param name="selected_task" value="train" /> |
259 <param name="selected_algorithm" value="LinearRegression" /> | 257 <param name="selected_algorithm" value="LinearRegression" /> |
260 <output name="outfile_fit" file="glm_model04" compare="sim_size" delta="5" /> | 258 <output name="outfile_fit" file="glm_model04" compare="sim_size" delta="5" /> |
261 </test> | 259 </test> |
262 <test> | 260 <test> |
263 <param name="infile_model" value="glm_model04" ftype="zip" /> | 261 <param name="infile_model" value="glm_model04" ftype="h5mlm" /> |
264 <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> | 262 <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> |
265 <param name="selected_task" value="load" /> | 263 <param name="selected_task" value="load" /> |
266 <output name="outfile_predict" file="glm_result04" lines_diff="8" /> | 264 <output name="outfile_predict" file="glm_result04" lines_diff="8" /> |
267 </test> | 265 </test> |
268 <test> | 266 <test> |
274 <param name="selected_algorithm" value="LogisticRegression" /> | 272 <param name="selected_algorithm" value="LogisticRegression" /> |
275 <param name="random_state" value="10" /> | 273 <param name="random_state" value="10" /> |
276 <output name="outfile_fit" file="glm_model05" compare="sim_size" delta="5" /> | 274 <output name="outfile_fit" file="glm_model05" compare="sim_size" delta="5" /> |
277 </test> | 275 </test> |
278 <test> | 276 <test> |
279 <param name="infile_model" value="glm_model05" ftype="zip" /> | 277 <param name="infile_model" value="glm_model05" ftype="h5mlm" /> |
280 <param name="infile_data" value="test.tabular" ftype="tabular" /> | 278 <param name="infile_data" value="test.tabular" ftype="tabular" /> |
281 <param name="selected_task" value="load" /> | 279 <param name="selected_task" value="load" /> |
282 <output name="outfile_predict" file="glm_result05" /> | 280 <output name="outfile_predict" file="glm_result05" /> |
283 </test> | 281 </test> |
284 <test> | 282 <test> |
285 <param name="infile1" value="train.tabular" ftype="tabular" /> | 283 <param name="infile1" value="train.tabular" ftype="tabular" /> |
286 <param name="infile2" value="train.tabular" ftype="tabular" /> | 284 <param name="infile2" value="train.tabular" ftype="tabular" /> |
287 <param name="col1" value="1,2,3,4" /> | 285 <param name="col1" value="1,2,3,4" /> |
288 <param name="col2" value="5" /> | 286 <param name="col2" value="5" /> |
289 <param name="selected_task" value="train" /> | 287 <param name="selected_task" value="train" /> |
290 <param name="selected_algorithm" value="LogisticRegressionCV" /> | 288 <param name="selected_algorithm" value="LogisticRegression" /> |
291 <param name="random_state" value="10" /> | 289 <param name="random_state" value="10" /> |
290 <param name="penalty" value="none" /> | |
291 <param name="solver" value="lbfgs" /> | |
292 <output name="outfile_fit" file="glm_model06" compare="sim_size" delta="5" /> | 292 <output name="outfile_fit" file="glm_model06" compare="sim_size" delta="5" /> |
293 </test> | 293 </test> |
294 <test> | 294 <test> |
295 <param name="infile_model" value="glm_model06" ftype="zip" /> | 295 <param name="infile_model" value="glm_model06" ftype="h5mlm" /> |
296 <param name="infile_data" value="test.tabular" ftype="tabular" /> | 296 <param name="infile_data" value="test.tabular" ftype="tabular" /> |
297 <param name="selected_task" value="load" /> | 297 <param name="selected_task" value="load" /> |
298 <output name="outfile_predict" file="glm_result06" /> | 298 <output name="outfile_predict" file="glm_result06" /> |
299 </test> | 299 </test> |
300 <test> | 300 <test> |
306 <param name="selected_algorithm" value="Ridge" /> | 306 <param name="selected_algorithm" value="Ridge" /> |
307 <param name="random_state" value="10" /> | 307 <param name="random_state" value="10" /> |
308 <output name="outfile_fit" file="glm_model07" compare="sim_size" delta="5" /> | 308 <output name="outfile_fit" file="glm_model07" compare="sim_size" delta="5" /> |
309 </test> | 309 </test> |
310 <test> | 310 <test> |
311 <param name="infile_model" value="glm_model07" ftype="zip" /> | 311 <param name="infile_model" value="glm_model07" ftype="h5mlm" /> |
312 <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> | 312 <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> |
313 <param name="selected_task" value="load" /> | 313 <param name="selected_task" value="load" /> |
314 <output name="outfile_predict"> | 314 <output name="outfile_predict"> |
315 <assert_contents> | 315 <assert_contents> |
316 <has_n_columns n="6" /> | 316 <has_n_columns n="6" /> |
317 <has_text text="86.9702122735000" /> | 317 <has_text text="86.9702122735" /> |
318 <has_text text="-1.0173960197" /> | 318 <has_text text="-1.01739601979" /> |
319 <has_text text="0.64184687433" /> | 319 <has_text text="0.641846874331" /> |
320 <has_text text="-0.621522971207000" /> | 320 <has_text text="-0.621522971207" /> |
321 <has_text text="0.39001218449" /> | 321 <has_text text="0.390012184498" /> |
322 <has_text text="0.596382816494397" /> | 322 <has_text text="0.5963828164943976" /> |
323 <has_text text="-47.4101632272" /> | 323 <has_text text="-47.4101632272" /> |
324 <has_text text="-0.732777468453000" /> | 324 <has_text text="-0.732777468453" /> |
325 <has_text text="-1.0610977011" /> | 325 <has_text text="-1.06109770116" /> |
326 <has_text text="-1.099948005770000" /> | 326 <has_text text="-1.09994800577" /> |
327 <has_text text="0.58565796301" /> | 327 <has_text text="0.585657963012" /> |
328 <has_text text="0.262144044202223" /> | 328 <has_text text="0.26214404420222365" /> |
329 <has_text text="-206.99829512" /> | 329 <has_text text="-206.998295124" /> |
330 <has_text text="0.7057412304" /> | 330 <has_text text="0.70574123041" /> |
331 <has_text text="-1.332209237379999" /> | 331 <has_text text="-1.33220923738" /> |
332 </assert_contents> | 332 </assert_contents> |
333 </output> | 333 </output> |
334 </test> | 334 </test> |
335 <test> | 335 <test> |
336 <param name="infile1" value="train.tabular" ftype="tabular" /> | 336 <param name="infile1" value="train.tabular" ftype="tabular" /> |
341 <param name="selected_algorithm" value="Perceptron" /> | 341 <param name="selected_algorithm" value="Perceptron" /> |
342 <param name="random_state" value="10" /> | 342 <param name="random_state" value="10" /> |
343 <output name="outfile_fit" file="glm_model08" compare="sim_size" delta="5" /> | 343 <output name="outfile_fit" file="glm_model08" compare="sim_size" delta="5" /> |
344 </test> | 344 </test> |
345 <test> | 345 <test> |
346 <param name="infile_model" value="glm_model08" ftype="zip" /> | 346 <param name="infile_model" value="glm_model08" ftype="h5mlm" /> |
347 <param name="infile_data" value="test.tabular" ftype="tabular" /> | 347 <param name="infile_data" value="test.tabular" ftype="tabular" /> |
348 <param name="selected_task" value="load" /> | 348 <param name="selected_task" value="load" /> |
349 <output name="outfile_predict" file="glm_result08" /> | 349 <output name="outfile_predict" file="glm_result08" /> |
350 </test> | 350 </test> |
351 </tests> | 351 </tests> |