comparison generalized_linear.xml @ 41:fe181d613429 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author bgruening
date Wed, 09 Aug 2023 12:18:02 +0000
parents 602edec75e1d
children
comparison
equal deleted inserted replaced
40:dc4b5fd604a6 41:fe181d613429
1 <tool id="sklearn_generalized_linear" name="Generalized linear models" version="@VERSION@" profile="20.05"> 1 <tool id="sklearn_generalized_linear" name="Generalized linear models" version="@VERSION@" profile="@PROFILE@">
2 <description>for classification and regression</description> 2 <description>for classification and regression</description>
3 <macros> 3 <macros>
4 <import>main_macros.xml</import> 4 <import>main_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="python_requirements" /> 6 <expand macro="python_requirements" />
16 import sys 16 import sys
17 import json 17 import json
18 import numpy as np 18 import numpy as np
19 import sklearn.linear_model 19 import sklearn.linear_model
20 import pandas 20 import pandas
21 import pickle
22 21
23 from scipy.io import mmread 22 from scipy.io import mmread
24 from galaxy_ml.utils import load_model, get_X_y 23 from galaxy_ml.model_persist import dump_model_to_h5, load_model_from_h5
24 from galaxy_ml.utils import clean_params, get_X_y
25 25
26 26
27 input_json_path = sys.argv[1] 27 input_json_path = sys.argv[1]
28 with open(input_json_path, "r") as param_handler: 28 with open(input_json_path, "r") as param_handler:
29 params = json.load(param_handler) 29 params = json.load(param_handler)
36 options = params["selected_tasks"]["selected_algorithms"]["options"] 36 options = params["selected_tasks"]["selected_algorithms"]["options"]
37 37
38 my_class = getattr(sklearn.linear_model, algorithm) 38 my_class = getattr(sklearn.linear_model, algorithm)
39 estimator = my_class(**options) 39 estimator = my_class(**options)
40 estimator.fit(X,y) 40 estimator.fit(X,y)
41 with open("$outfile_fit", 'wb') as out_handler: 41 dump_model_to_h5(estimator, "$outfile_fit")
42 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL)
43 42
44 #else: 43 #else:
45 with open("$selected_tasks.infile_model", 'rb') as model_handler: 44 classifier_object = load_model_from_h5("$selected_tasks.infile_model")
46 classifier_object = load_model(model_handler) 45 classifier_object = clean_params(classifier_object)
47 header = 'infer' if params["selected_tasks"]["header"] else None 46 header = 'infer' if params["selected_tasks"]["header"] else None
48 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None) 47 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None)
49 prediction = classifier_object.predict(data) 48 prediction = classifier_object.predict(data)
50 prediction_df = pandas.DataFrame(prediction, columns=["predicted"]) 49 prediction_df = pandas.DataFrame(prediction, columns=["predicted"])
51 res = pandas.concat([data, prediction_df], axis=1) 50 res = pandas.concat([data, prediction_df], axis=1)
54 53
55 ]]> 54 ]]>
56 </configfile> 55 </configfile>
57 </configfiles> 56 </configfiles>
58 <inputs> 57 <inputs>
59 <expand macro="sl_Conditional" model="zip"> 58 <expand macro="sl_Conditional" model="h5mlm">
60 <param name="selected_algorithm" type="select" label="Select a linear model:"> 59 <param name="selected_algorithm" type="select" label="Select a linear model:">
61 <option value="SGDClassifier" selected="true">Stochastic Gradient Descent (SGD) classifier</option> 60 <option value="SGDClassifier" selected="true">Stochastic Gradient Descent (SGD) classifier</option>
62 <option value="SGDRegressor">Stochastic Gradient Descent (SGD) regressor</option> 61 <option value="SGDRegressor">Stochastic Gradient Descent (SGD) regressor</option>
63 <option value="LinearRegression">Linear Regression model</option> 62 <option value="LinearRegression">Linear Regression model</option>
64 <option value="RidgeClassifier">Ridge classifier</option> 63 <option value="RidgeClassifier">Ridge classifier</option>
202 <expand macro="output" /> 201 <expand macro="output" />
203 <tests> 202 <tests>
204 <test> 203 <test>
205 <param name="infile1" value="regression_train.tabular" ftype="tabular" /> 204 <param name="infile1" value="regression_train.tabular" ftype="tabular" />
206 <param name="infile2" value="regression_train.tabular" ftype="tabular" /> 205 <param name="infile2" value="regression_train.tabular" ftype="tabular" />
207 <param name="selected_column_selector_option" value="all_but_by_index_number" /> 206 <param name="col1" value="1,2,3,4,5" />
208 <param name="col1" value="6" />
209 <param name="col2" value="6" /> 207 <param name="col2" value="6" />
210 <param name="selected_task" value="train" /> 208 <param name="selected_task" value="train" />
211 <param name="selected_algorithm" value="SGDRegressor" /> 209 <param name="selected_algorithm" value="SGDRegressor" />
212 <param name="random_state" value="10" /> 210 <param name="random_state" value="10" />
213 <output name="outfile_fit" file="glm_model01" compare="sim_size" delta="5" /> 211 <output name="outfile_fit" file="glm_model01" compare="sim_size" delta="5" />
214 </test> 212 </test>
215 <test> 213 <test>
216 <param name="infile_model" value="glm_model01" ftype="zip" /> 214 <param name="infile_model" value="glm_model01" ftype="h5mlm" />
217 <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> 215 <param name="infile_data" value="regression_test.tabular" ftype="tabular" />
218 <param name="selected_task" value="load" /> 216 <param name="selected_task" value="load" />
219 <output name="outfile_predict" file="glm_result01" lines_diff="4" /> 217 <output name="outfile_predict" file="glm_result01" lines_diff="4" />
220 </test> 218 </test>
221 <test> 219 <test>
227 <param name="selected_algorithm" value="SGDClassifier" /> 225 <param name="selected_algorithm" value="SGDClassifier" />
228 <param name="random_state" value="10" /> 226 <param name="random_state" value="10" />
229 <output name="outfile_fit" file="glm_model02" compare="sim_size" delta="5" /> 227 <output name="outfile_fit" file="glm_model02" compare="sim_size" delta="5" />
230 </test> 228 </test>
231 <test> 229 <test>
232 <param name="infile_model" value="glm_model02" ftype="zip" /> 230 <param name="infile_model" value="glm_model02" ftype="h5mlm" />
233 <param name="infile_data" value="test.tabular" ftype="tabular" /> 231 <param name="infile_data" value="test.tabular" ftype="tabular" />
234 <param name="selected_task" value="load" /> 232 <param name="selected_task" value="load" />
235 <output name="outfile_predict" file="glm_result02" /> 233 <output name="outfile_predict" file="glm_result02" />
236 </test> 234 </test>
237 <test> 235 <test>
243 <param name="selected_algorithm" value="RidgeClassifier" /> 241 <param name="selected_algorithm" value="RidgeClassifier" />
244 <param name="random_state" value="10" /> 242 <param name="random_state" value="10" />
245 <output name="outfile_fit" file="glm_model03" compare="sim_size" delta="5" /> 243 <output name="outfile_fit" file="glm_model03" compare="sim_size" delta="5" />
246 </test> 244 </test>
247 <test> 245 <test>
248 <param name="infile_model" value="glm_model03" ftype="zip" /> 246 <param name="infile_model" value="glm_model03" ftype="h5mlm" />
249 <param name="infile_data" value="test.tabular" ftype="tabular" /> 247 <param name="infile_data" value="test.tabular" ftype="tabular" />
250 <param name="selected_task" value="load" /> 248 <param name="selected_task" value="load" />
251 <output name="outfile_predict" file="glm_result03" /> 249 <output name="outfile_predict" file="glm_result03" />
252 </test> 250 </test>
253 <test> 251 <test>
258 <param name="selected_task" value="train" /> 256 <param name="selected_task" value="train" />
259 <param name="selected_algorithm" value="LinearRegression" /> 257 <param name="selected_algorithm" value="LinearRegression" />
260 <output name="outfile_fit" file="glm_model04" compare="sim_size" delta="5" /> 258 <output name="outfile_fit" file="glm_model04" compare="sim_size" delta="5" />
261 </test> 259 </test>
262 <test> 260 <test>
263 <param name="infile_model" value="glm_model04" ftype="zip" /> 261 <param name="infile_model" value="glm_model04" ftype="h5mlm" />
264 <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> 262 <param name="infile_data" value="regression_test.tabular" ftype="tabular" />
265 <param name="selected_task" value="load" /> 263 <param name="selected_task" value="load" />
266 <output name="outfile_predict" file="glm_result04" lines_diff="8" /> 264 <output name="outfile_predict" file="glm_result04" lines_diff="8" />
267 </test> 265 </test>
268 <test> 266 <test>
274 <param name="selected_algorithm" value="LogisticRegression" /> 272 <param name="selected_algorithm" value="LogisticRegression" />
275 <param name="random_state" value="10" /> 273 <param name="random_state" value="10" />
276 <output name="outfile_fit" file="glm_model05" compare="sim_size" delta="5" /> 274 <output name="outfile_fit" file="glm_model05" compare="sim_size" delta="5" />
277 </test> 275 </test>
278 <test> 276 <test>
279 <param name="infile_model" value="glm_model05" ftype="zip" /> 277 <param name="infile_model" value="glm_model05" ftype="h5mlm" />
280 <param name="infile_data" value="test.tabular" ftype="tabular" /> 278 <param name="infile_data" value="test.tabular" ftype="tabular" />
281 <param name="selected_task" value="load" /> 279 <param name="selected_task" value="load" />
282 <output name="outfile_predict" file="glm_result05" /> 280 <output name="outfile_predict" file="glm_result05" />
283 </test> 281 </test>
284 <test> 282 <test>
285 <param name="infile1" value="train.tabular" ftype="tabular" /> 283 <param name="infile1" value="train.tabular" ftype="tabular" />
286 <param name="infile2" value="train.tabular" ftype="tabular" /> 284 <param name="infile2" value="train.tabular" ftype="tabular" />
287 <param name="col1" value="1,2,3,4" /> 285 <param name="col1" value="1,2,3,4" />
288 <param name="col2" value="5" /> 286 <param name="col2" value="5" />
289 <param name="selected_task" value="train" /> 287 <param name="selected_task" value="train" />
290 <param name="selected_algorithm" value="LogisticRegressionCV" /> 288 <param name="selected_algorithm" value="LogisticRegression" />
291 <param name="random_state" value="10" /> 289 <param name="random_state" value="10" />
290 <param name="penalty" value="none" />
291 <param name="solver" value="lbfgs" />
292 <output name="outfile_fit" file="glm_model06" compare="sim_size" delta="5" /> 292 <output name="outfile_fit" file="glm_model06" compare="sim_size" delta="5" />
293 </test> 293 </test>
294 <test> 294 <test>
295 <param name="infile_model" value="glm_model06" ftype="zip" /> 295 <param name="infile_model" value="glm_model06" ftype="h5mlm" />
296 <param name="infile_data" value="test.tabular" ftype="tabular" /> 296 <param name="infile_data" value="test.tabular" ftype="tabular" />
297 <param name="selected_task" value="load" /> 297 <param name="selected_task" value="load" />
298 <output name="outfile_predict" file="glm_result06" /> 298 <output name="outfile_predict" file="glm_result06" />
299 </test> 299 </test>
300 <test> 300 <test>
306 <param name="selected_algorithm" value="Ridge" /> 306 <param name="selected_algorithm" value="Ridge" />
307 <param name="random_state" value="10" /> 307 <param name="random_state" value="10" />
308 <output name="outfile_fit" file="glm_model07" compare="sim_size" delta="5" /> 308 <output name="outfile_fit" file="glm_model07" compare="sim_size" delta="5" />
309 </test> 309 </test>
310 <test> 310 <test>
311 <param name="infile_model" value="glm_model07" ftype="zip" /> 311 <param name="infile_model" value="glm_model07" ftype="h5mlm" />
312 <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> 312 <param name="infile_data" value="regression_test.tabular" ftype="tabular" />
313 <param name="selected_task" value="load" /> 313 <param name="selected_task" value="load" />
314 <output name="outfile_predict"> 314 <output name="outfile_predict">
315 <assert_contents> 315 <assert_contents>
316 <has_n_columns n="6" /> 316 <has_n_columns n="6" />
317 <has_text text="86.9702122735000" /> 317 <has_text text="86.9702122735" />
318 <has_text text="-1.0173960197" /> 318 <has_text text="-1.01739601979" />
319 <has_text text="0.64184687433" /> 319 <has_text text="0.641846874331" />
320 <has_text text="-0.621522971207000" /> 320 <has_text text="-0.621522971207" />
321 <has_text text="0.39001218449" /> 321 <has_text text="0.390012184498" />
322 <has_text text="0.596382816494397" /> 322 <has_text text="0.5963828164943976" />
323 <has_text text="-47.4101632272" /> 323 <has_text text="-47.4101632272" />
324 <has_text text="-0.732777468453000" /> 324 <has_text text="-0.732777468453" />
325 <has_text text="-1.0610977011" /> 325 <has_text text="-1.06109770116" />
326 <has_text text="-1.099948005770000" /> 326 <has_text text="-1.09994800577" />
327 <has_text text="0.58565796301" /> 327 <has_text text="0.585657963012" />
328 <has_text text="0.262144044202223" /> 328 <has_text text="0.26214404420222365" />
329 <has_text text="-206.99829512" /> 329 <has_text text="-206.998295124" />
330 <has_text text="0.7057412304" /> 330 <has_text text="0.70574123041" />
331 <has_text text="-1.332209237379999" /> 331 <has_text text="-1.33220923738" />
332 </assert_contents> 332 </assert_contents>
333 </output> 333 </output>
334 </test> 334 </test>
335 <test> 335 <test>
336 <param name="infile1" value="train.tabular" ftype="tabular" /> 336 <param name="infile1" value="train.tabular" ftype="tabular" />
341 <param name="selected_algorithm" value="Perceptron" /> 341 <param name="selected_algorithm" value="Perceptron" />
342 <param name="random_state" value="10" /> 342 <param name="random_state" value="10" />
343 <output name="outfile_fit" file="glm_model08" compare="sim_size" delta="5" /> 343 <output name="outfile_fit" file="glm_model08" compare="sim_size" delta="5" />
344 </test> 344 </test>
345 <test> 345 <test>
346 <param name="infile_model" value="glm_model08" ftype="zip" /> 346 <param name="infile_model" value="glm_model08" ftype="h5mlm" />
347 <param name="infile_data" value="test.tabular" ftype="tabular" /> 347 <param name="infile_data" value="test.tabular" ftype="tabular" />
348 <param name="selected_task" value="load" /> 348 <param name="selected_task" value="load" />
349 <output name="outfile_predict" file="glm_result08" /> 349 <output name="outfile_predict" file="glm_result08" />
350 </test> 350 </test>
351 </tests> 351 </tests>