Mercurial > repos > bgruening > sklearn_generalized_linear
diff generalized_linear.xml @ 41:fe181d613429 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author | bgruening |
---|---|
date | Wed, 09 Aug 2023 12:18:02 +0000 |
parents | 602edec75e1d |
children |
line wrap: on
line diff
--- a/generalized_linear.xml Thu Aug 11 09:02:29 2022 +0000 +++ b/generalized_linear.xml Wed Aug 09 12:18:02 2023 +0000 @@ -1,4 +1,4 @@ -<tool id="sklearn_generalized_linear" name="Generalized linear models" version="@VERSION@" profile="20.05"> +<tool id="sklearn_generalized_linear" name="Generalized linear models" version="@VERSION@" profile="@PROFILE@"> <description>for classification and regression</description> <macros> <import>main_macros.xml</import> @@ -18,10 +18,10 @@ import numpy as np import sklearn.linear_model import pandas -import pickle from scipy.io import mmread -from galaxy_ml.utils import load_model, get_X_y +from galaxy_ml.model_persist import dump_model_to_h5, load_model_from_h5 +from galaxy_ml.utils import clean_params, get_X_y input_json_path = sys.argv[1] @@ -38,12 +38,11 @@ my_class = getattr(sklearn.linear_model, algorithm) estimator = my_class(**options) estimator.fit(X,y) -with open("$outfile_fit", 'wb') as out_handler: - pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL) +dump_model_to_h5(estimator, "$outfile_fit") #else: -with open("$selected_tasks.infile_model", 'rb') as model_handler: - classifier_object = load_model(model_handler) +classifier_object = load_model_from_h5("$selected_tasks.infile_model") +classifier_object = clean_params(classifier_object) header = 'infer' if params["selected_tasks"]["header"] else None data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None) prediction = classifier_object.predict(data) @@ -56,7 +55,7 @@ </configfile> </configfiles> <inputs> - <expand macro="sl_Conditional" model="zip"> + <expand macro="sl_Conditional" model="h5mlm"> <param name="selected_algorithm" type="select" label="Select a linear model:"> <option value="SGDClassifier" selected="true">Stochastic Gradient Descent (SGD) classifier</option> <option value="SGDRegressor">Stochastic Gradient Descent (SGD) regressor</option> @@ -204,8 +203,7 @@ <test> <param name="infile1" value="regression_train.tabular" ftype="tabular" /> <param name="infile2" value="regression_train.tabular" ftype="tabular" /> - <param name="selected_column_selector_option" value="all_but_by_index_number" /> - <param name="col1" value="6" /> + <param name="col1" value="1,2,3,4,5" /> <param name="col2" value="6" /> <param name="selected_task" value="train" /> <param name="selected_algorithm" value="SGDRegressor" /> @@ -213,7 +211,7 @@ <output name="outfile_fit" file="glm_model01" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="glm_model01" ftype="zip" /> + <param name="infile_model" value="glm_model01" ftype="h5mlm" /> <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> <param name="selected_task" value="load" /> <output name="outfile_predict" file="glm_result01" lines_diff="4" /> @@ -229,7 +227,7 @@ <output name="outfile_fit" file="glm_model02" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="glm_model02" ftype="zip" /> + <param name="infile_model" value="glm_model02" ftype="h5mlm" /> <param name="infile_data" value="test.tabular" ftype="tabular" /> <param name="selected_task" value="load" /> <output name="outfile_predict" file="glm_result02" /> @@ -245,7 +243,7 @@ <output name="outfile_fit" file="glm_model03" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="glm_model03" ftype="zip" /> + <param name="infile_model" value="glm_model03" ftype="h5mlm" /> <param name="infile_data" value="test.tabular" ftype="tabular" /> <param name="selected_task" value="load" /> <output name="outfile_predict" file="glm_result03" /> @@ -260,7 +258,7 @@ <output name="outfile_fit" file="glm_model04" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="glm_model04" ftype="zip" /> + <param name="infile_model" value="glm_model04" ftype="h5mlm" /> <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> <param name="selected_task" value="load" /> <output name="outfile_predict" file="glm_result04" lines_diff="8" /> @@ -276,7 +274,7 @@ <output name="outfile_fit" file="glm_model05" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="glm_model05" ftype="zip" /> + <param name="infile_model" value="glm_model05" ftype="h5mlm" /> <param name="infile_data" value="test.tabular" ftype="tabular" /> <param name="selected_task" value="load" /> <output name="outfile_predict" file="glm_result05" /> @@ -287,12 +285,14 @@ <param name="col1" value="1,2,3,4" /> <param name="col2" value="5" /> <param name="selected_task" value="train" /> - <param name="selected_algorithm" value="LogisticRegressionCV" /> + <param name="selected_algorithm" value="LogisticRegression" /> <param name="random_state" value="10" /> + <param name="penalty" value="none" /> + <param name="solver" value="lbfgs" /> <output name="outfile_fit" file="glm_model06" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="glm_model06" ftype="zip" /> + <param name="infile_model" value="glm_model06" ftype="h5mlm" /> <param name="infile_data" value="test.tabular" ftype="tabular" /> <param name="selected_task" value="load" /> <output name="outfile_predict" file="glm_result06" /> @@ -308,27 +308,27 @@ <output name="outfile_fit" file="glm_model07" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="glm_model07" ftype="zip" /> + <param name="infile_model" value="glm_model07" ftype="h5mlm" /> <param name="infile_data" value="regression_test.tabular" ftype="tabular" /> <param name="selected_task" value="load" /> <output name="outfile_predict"> <assert_contents> <has_n_columns n="6" /> - <has_text text="86.9702122735000" /> - <has_text text="-1.0173960197" /> - <has_text text="0.64184687433" /> - <has_text text="-0.621522971207000" /> - <has_text text="0.39001218449" /> - <has_text text="0.596382816494397" /> + <has_text text="86.9702122735" /> + <has_text text="-1.01739601979" /> + <has_text text="0.641846874331" /> + <has_text text="-0.621522971207" /> + <has_text text="0.390012184498" /> + <has_text text="0.5963828164943976" /> <has_text text="-47.4101632272" /> - <has_text text="-0.732777468453000" /> - <has_text text="-1.0610977011" /> - <has_text text="-1.099948005770000" /> - <has_text text="0.58565796301" /> - <has_text text="0.262144044202223" /> - <has_text text="-206.99829512" /> - <has_text text="0.7057412304" /> - <has_text text="-1.332209237379999" /> + <has_text text="-0.732777468453" /> + <has_text text="-1.06109770116" /> + <has_text text="-1.09994800577" /> + <has_text text="0.585657963012" /> + <has_text text="0.26214404420222365" /> + <has_text text="-206.998295124" /> + <has_text text="0.70574123041" /> + <has_text text="-1.33220923738" /> </assert_contents> </output> </test> @@ -343,7 +343,7 @@ <output name="outfile_fit" file="glm_model08" compare="sim_size" delta="5" /> </test> <test> - <param name="infile_model" value="glm_model08" ftype="zip" /> + <param name="infile_model" value="glm_model08" ftype="h5mlm" /> <param name="infile_data" value="test.tabular" ftype="tabular" /> <param name="selected_task" value="load" /> <output name="outfile_predict" file="glm_result08" />