sklearn_generalized_linear: generalized

comparison generalized_linear.xml @ 41:fe181d613429 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb

author	bgruening
date	Wed, 09 Aug 2023 12:18:02 +0000
parents	602edec75e1d
children

comparison

equal deleted inserted replaced

-:dc4b5fd604a6
+:fe181d613429
-<tool id="sklearn_generalized_linear" name="Generalized linear models" version="@VERSION@" profile="20.05">
+<tool id="sklearn_generalized_linear" name="Generalized linear models" version="@VERSION@" profile="@PROFILE@">
 <description>for classification and regression</description>
 <macros>
 <import>main_macros.xml</import>
 </macros>
 <expand macro="python_requirements" />
 import sys
 import json
 import numpy as np
 import sklearn.linear_model
 import pandas
-import pickle
 from scipy.io import mmread
-from galaxy_ml.utils import load_model, get_X_y
+from galaxy_ml.model_persist import dump_model_to_h5, load_model_from_h5
+from galaxy_ml.utils import clean_params, get_X_y
 input_json_path = sys.argv[1]
 with open(input_json_path, "r") as param_handler:
 params = json.load(param_handler)
 options = params["selected_tasks"]["selected_algorithms"]["options"]
 my_class = getattr(sklearn.linear_model, algorithm)
 estimator = my_class(**options)
 estimator.fit(X,y)
-with open("$outfile_fit", 'wb') as out_handler:
+dump_model_to_h5(estimator, "$outfile_fit")
-pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL)
 #else:
-with open("$selected_tasks.infile_model", 'rb') as model_handler:
+classifier_object = load_model_from_h5("$selected_tasks.infile_model")
-classifier_object = load_model(model_handler)
+classifier_object = clean_params(classifier_object)
 header = 'infer' if params["selected_tasks"]["header"] else None
 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None)
 prediction = classifier_object.predict(data)
 prediction_df = pandas.DataFrame(prediction, columns=["predicted"])
 res = pandas.concat([data, prediction_df], axis=1)
 ]]>
 </configfile>
 </configfiles>
 <inputs>
-<expand macro="sl_Conditional" model="zip">
+<expand macro="sl_Conditional" model="h5mlm">
 <param name="selected_algorithm" type="select" label="Select a linear model:">
 <option value="SGDClassifier" selected="true">Stochastic Gradient Descent (SGD) classifier</option>
 <option value="SGDRegressor">Stochastic Gradient Descent (SGD) regressor</option>
 <option value="LinearRegression">Linear Regression model</option>
 <option value="RidgeClassifier">Ridge classifier</option>
 <expand macro="output" />
 <tests>
 <test>
 <param name="infile1" value="regression_train.tabular" ftype="tabular" />
 <param name="infile2" value="regression_train.tabular" ftype="tabular" />
-<param name="selected_column_selector_option" value="all_but_by_index_number" />
+<param name="col1" value="1,2,3,4,5" />
-<param name="col1" value="6" />
 <param name="col2" value="6" />
 <param name="selected_task" value="train" />
 <param name="selected_algorithm" value="SGDRegressor" />
 <param name="random_state" value="10" />
 <output name="outfile_fit" file="glm_model01" compare="sim_size" delta="5" />
 </test>
 <test>
-<param name="infile_model" value="glm_model01" ftype="zip" />
+<param name="infile_model" value="glm_model01" ftype="h5mlm" />
 <param name="infile_data" value="regression_test.tabular" ftype="tabular" />
 <param name="selected_task" value="load" />
 <output name="outfile_predict" file="glm_result01" lines_diff="4" />
 </test>
 <test>
 <param name="selected_algorithm" value="SGDClassifier" />
 <param name="random_state" value="10" />
 <output name="outfile_fit" file="glm_model02" compare="sim_size" delta="5" />
 </test>
 <test>
-<param name="infile_model" value="glm_model02" ftype="zip" />
+<param name="infile_model" value="glm_model02" ftype="h5mlm" />
 <param name="infile_data" value="test.tabular" ftype="tabular" />
 <param name="selected_task" value="load" />
 <output name="outfile_predict" file="glm_result02" />
 </test>
 <test>
 <param name="selected_algorithm" value="RidgeClassifier" />
 <param name="random_state" value="10" />
 <output name="outfile_fit" file="glm_model03" compare="sim_size" delta="5" />
 </test>
 <test>
-<param name="infile_model" value="glm_model03" ftype="zip" />
+<param name="infile_model" value="glm_model03" ftype="h5mlm" />
 <param name="infile_data" value="test.tabular" ftype="tabular" />
 <param name="selected_task" value="load" />
 <output name="outfile_predict" file="glm_result03" />
 </test>
 <test>
 <param name="selected_task" value="train" />
 <param name="selected_algorithm" value="LinearRegression" />
 <output name="outfile_fit" file="glm_model04" compare="sim_size" delta="5" />
 </test>
 <test>
-<param name="infile_model" value="glm_model04" ftype="zip" />
+<param name="infile_model" value="glm_model04" ftype="h5mlm" />
 <param name="infile_data" value="regression_test.tabular" ftype="tabular" />
 <param name="selected_task" value="load" />
 <output name="outfile_predict" file="glm_result04" lines_diff="8" />
 </test>
 <test>
 <param name="selected_algorithm" value="LogisticRegression" />
 <param name="random_state" value="10" />
 <output name="outfile_fit" file="glm_model05" compare="sim_size" delta="5" />
 </test>
 <test>
-<param name="infile_model" value="glm_model05" ftype="zip" />
+<param name="infile_model" value="glm_model05" ftype="h5mlm" />
 <param name="infile_data" value="test.tabular" ftype="tabular" />
 <param name="selected_task" value="load" />
 <output name="outfile_predict" file="glm_result05" />
 </test>
 <test>
 <param name="infile1" value="train.tabular" ftype="tabular" />
 <param name="infile2" value="train.tabular" ftype="tabular" />
 <param name="col1" value="1,2,3,4" />
 <param name="col2" value="5" />
 <param name="selected_task" value="train" />
-<param name="selected_algorithm" value="LogisticRegressionCV" />
+<param name="selected_algorithm" value="LogisticRegression" />
 <param name="random_state" value="10" />
+<param name="penalty" value="none" />
+<param name="solver" value="lbfgs" />
 <output name="outfile_fit" file="glm_model06" compare="sim_size" delta="5" />
 </test>
 <test>
-<param name="infile_model" value="glm_model06" ftype="zip" />
+<param name="infile_model" value="glm_model06" ftype="h5mlm" />
 <param name="infile_data" value="test.tabular" ftype="tabular" />
 <param name="selected_task" value="load" />
 <output name="outfile_predict" file="glm_result06" />
 </test>
 <test>
 <param name="selected_algorithm" value="Ridge" />
 <param name="random_state" value="10" />
 <output name="outfile_fit" file="glm_model07" compare="sim_size" delta="5" />
 </test>
 <test>
-<param name="infile_model" value="glm_model07" ftype="zip" />
+<param name="infile_model" value="glm_model07" ftype="h5mlm" />
 <param name="infile_data" value="regression_test.tabular" ftype="tabular" />
 <param name="selected_task" value="load" />
 <output name="outfile_predict">
 <assert_contents>
 <has_n_columns n="6" />
-<has_text text="86.9702122735000" />
+<has_text text="86.9702122735" />
-<has_text text="-1.0173960197" />
+<has_text text="-1.01739601979" />
-<has_text text="0.64184687433" />
+<has_text text="0.641846874331" />
-<has_text text="-0.621522971207000" />
+<has_text text="-0.621522971207" />
-<has_text text="0.39001218449" />
+<has_text text="0.390012184498" />
-<has_text text="0.596382816494397" />
+<has_text text="0.5963828164943976" />
 <has_text text="-47.4101632272" />
-<has_text text="-0.732777468453000" />
+<has_text text="-0.732777468453" />
-<has_text text="-1.0610977011" />
+<has_text text="-1.06109770116" />
-<has_text text="-1.099948005770000" />
+<has_text text="-1.09994800577" />
-<has_text text="0.58565796301" />
+<has_text text="0.585657963012" />
-<has_text text="0.262144044202223" />
+<has_text text="0.26214404420222365" />
-<has_text text="-206.99829512" />
+<has_text text="-206.998295124" />
-<has_text text="0.7057412304" />
+<has_text text="0.70574123041" />
-<has_text text="-1.332209237379999" />
+<has_text text="-1.33220923738" />
 </assert_contents>
 </output>
 </test>
 <test>
 <param name="infile1" value="train.tabular" ftype="tabular" />
 <param name="selected_algorithm" value="Perceptron" />
 <param name="random_state" value="10" />
 <output name="outfile_fit" file="glm_model08" compare="sim_size" delta="5" />
 </test>
 <test>
-<param name="infile_model" value="glm_model08" ftype="zip" />
+<param name="infile_model" value="glm_model08" ftype="h5mlm" />
 <param name="infile_data" value="test.tabular" ftype="tabular" />
 <param name="selected_task" value="load" />
 <output name="outfile_predict" file="glm_result08" />
 </test>
 </tests>

Mercurial > repos > bgruening > sklearn_generalized_linear

comparison generalized_linear.xml @ 41:fe181d613429 draft