comparison svm.xml @ 5:1c5989b930e3 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
author bgruening
date Sat, 29 Sep 2018 07:26:04 -0400
parents 41d0edb7d1fc
children 1a9d5a8fff12
comparison
equal deleted inserted replaced
4:41d0edb7d1fc 5:1c5989b930e3
18 import sys 18 import sys
19 import json 19 import json
20 import sklearn.svm 20 import sklearn.svm
21 import pandas 21 import pandas
22 22
23 execfile("$__tool_directory__/sk_whitelist.py") 23 with open("$__tool_directory__/sk_whitelist.json", "r") as f:
24 execfile("$__tool_directory__/utils.py", globals()) 24 sk_whitelist = json.load(f)
25 exec(open("$__tool_directory__/utils.py").read(), globals())
25 26
26 input_json_path = sys.argv[1] 27 input_json_path = sys.argv[1]
27 with open(input_json_path, "r") as param_handler: 28 with open(input_json_path, "r") as param_handler:
28 params = json.load(param_handler) 29 params = json.load(param_handler)
29 30
30 #if $selected_tasks.selected_task == "load": 31 #if $selected_tasks.selected_task == "load":
31 32
32 with open("$infile_model", 'rb') as model_handler:
33 classifier_object = SafePickler.load(model_handler)
34
35 header = 'infer' if params["selected_tasks"]["header"] else None 33 header = 'infer' if params["selected_tasks"]["header"] else None
36 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) 34 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False)
35
36 with open("$infile_model", 'rb') as model_handler:
37 classifier_object = load_model(model_handler)
38
37 prediction = classifier_object.predict(data) 39 prediction = classifier_object.predict(data)
38 prediction_df = pandas.DataFrame(prediction) 40 prediction_df = pandas.DataFrame(prediction)
39 res = pandas.concat([data, prediction_df], axis=1) 41 res = pandas.concat([data, prediction_df], axis=1)
40 res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False) 42 res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False)
41 43
53 my_class = getattr(sklearn.svm, selected_algorithm) 55 my_class = getattr(sklearn.svm, selected_algorithm)
54 classifier_object = my_class(**options) 56 classifier_object = my_class(**options)
55 classifier_object.fit(X, y) 57 classifier_object.fit(X, y)
56 58
57 with open("$outfile_fit", 'wb') as out_handler: 59 with open("$outfile_fit", 'wb') as out_handler:
58 pickle.dump(classifier_object, out_handler, pickle.HIGHEST_PROTOCOL) 60 pickle.dump(classifier_object, out_handler)
59 61
60 #end if 62 #end if
61 63
62 ]]> 64 ]]>
63 </configfile> 65 </configfile>
106 <param argument="intercept_scaling" type="float" optional="true" value="1" label="Add synthetic feature to the instance vector" help=" "/> 108 <param argument="intercept_scaling" type="float" optional="true" value="1" label="Add synthetic feature to the instance vector" help=" "/>
107 </section> 109 </section>
108 </when> 110 </when>
109 </expand> 111 </expand>
110 </inputs> 112 </inputs>
111
112 <expand macro="output"/> 113 <expand macro="output"/>
113
114 <tests> 114 <tests>
115 <test> 115 <test>
116 <param name="infile1" value="train_set.tabular" ftype="tabular"/> 116 <param name="infile1" value="train_set.tabular" ftype="tabular"/>
117 <param name="infile2" value="train_set.tabular" ftype="tabular"/> 117 <param name="infile2" value="train_set.tabular" ftype="tabular"/>
118 <param name="header1" value="True"/> 118 <param name="header1" value="True"/>
120 <param name="col1" value="1,2,3,4"/> 120 <param name="col1" value="1,2,3,4"/>
121 <param name="col2" value="5"/> 121 <param name="col2" value="5"/>
122 <param name="selected_task" value="train"/> 122 <param name="selected_task" value="train"/>
123 <param name="selected_algorithm" value="SVC"/> 123 <param name="selected_algorithm" value="SVC"/>
124 <param name="random_state" value="5"/> 124 <param name="random_state" value="5"/>
125 <output name="outfile_fit" file="svc_model01.txt" compare="sim_size" delta="1"/> 125 <output name="outfile_fit" file="svc_model01" compare="sim_size"/>
126 </test> 126 </test>
127 <test> 127 <test>
128 <param name="infile1" value="train_set.tabular" ftype="tabular"/> 128 <param name="infile1" value="train_set.tabular" ftype="tabular"/>
129 <param name="infile2" value="train_set.tabular" ftype="tabular"/> 129 <param name="infile2" value="train_set.tabular" ftype="tabular"/>
130 <param name="header1" value="True"/> 130 <param name="header1" value="True"/>
132 <param name="col1" value="1,2,3,4"/> 132 <param name="col1" value="1,2,3,4"/>
133 <param name="col2" value="5"/> 133 <param name="col2" value="5"/>
134 <param name="selected_task" value="train"/> 134 <param name="selected_task" value="train"/>
135 <param name="selected_algorithm" value="NuSVC"/> 135 <param name="selected_algorithm" value="NuSVC"/>
136 <param name="random_state" value="5"/> 136 <param name="random_state" value="5"/>
137 <output name="outfile_fit" file="svc_model02.txt" compare="sim_size" delta="1"/> 137 <output name="outfile_fit" file="svc_model02" compare="sim_size"/>
138 </test> 138 </test>
139 <test> 139 <test>
140 <param name="infile1" value="train_set.tabular" ftype="tabular"/> 140 <param name="infile1" value="train_set.tabular" ftype="tabular"/>
141 <param name="infile2" value="train_set.tabular" ftype="tabular"/> 141 <param name="infile2" value="train_set.tabular" ftype="tabular"/>
142 <param name="header1" value="True"/> 142 <param name="header1" value="True"/>
144 <param name="col1" value="1,2,3,4"/> 144 <param name="col1" value="1,2,3,4"/>
145 <param name="col2" value="5"/> 145 <param name="col2" value="5"/>
146 <param name="selected_task" value="train"/> 146 <param name="selected_task" value="train"/>
147 <param name="selected_algorithm" value="LinearSVC"/> 147 <param name="selected_algorithm" value="LinearSVC"/>
148 <param name="random_state" value="5"/> 148 <param name="random_state" value="5"/>
149 <output name="outfile_fit" file="svc_model03.txt" compare="sim_size" delta="1"/> 149 <output name="outfile_fit" file="svc_model03" compare="sim_size"/>
150 </test> 150 </test>
151 <test> 151 <test>
152 <param name="infile_model" value="svc_model01.txt" ftype="zip"/> 152 <param name="infile_model" value="svc_model01" ftype="zip"/>
153 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> 153 <param name="infile_data" value="test_set.tabular" ftype="tabular"/>
154 <param name="header" value="True"/> 154 <param name="header" value="True"/>
155 <param name="selected_task" value="load"/> 155 <param name="selected_task" value="load"/>
156 <output name="outfile_predict" file="svc_prediction_result01.tabular"/> 156 <output name="outfile_predict" file="svc_prediction_result01.tabular"/>
157 </test> 157 </test>
158 <test> 158 <test>
159 <param name="infile_model" value="svc_model02.txt" ftype="zip"/> 159 <param name="infile_model" value="svc_model02" ftype="zip"/>
160 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> 160 <param name="infile_data" value="test_set.tabular" ftype="tabular"/>
161 <param name="header" value="True"/> 161 <param name="header" value="True"/>
162 <param name="selected_task" value="load"/> 162 <param name="selected_task" value="load"/>
163 <output name="outfile_predict" file="svc_prediction_result02.tabular"/> 163 <output name="outfile_predict" file="svc_prediction_result02.tabular"/>
164 </test> 164 </test>
165 <test> 165 <test>
166 <param name="infile_model" value="svc_model03.txt" ftype="zip"/> 166 <param name="infile_model" value="svc_model03" ftype="zip"/>
167 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> 167 <param name="infile_data" value="test_set.tabular" ftype="tabular"/>
168 <param name="header" value="True"/> 168 <param name="header" value="True"/>
169 <param name="selected_task" value="load"/> 169 <param name="selected_task" value="load"/>
170 <output name="outfile_predict" file="svc_prediction_result03.tabular"/> 170 <output name="outfile_predict" file="svc_prediction_result03.tabular"/>
171 </test>
172 <!-- The following test is expected to fail, it is testing the whitelist/blacklist filtering.
173 It loads a pickle with malicious content that we do not accept. -->
174 <test expect_failure="true">
175 <param name="infile_model" value="pickle_blacklist" ftype="zip"/>
176 <param name="infile_data" value="test_set.tabular" ftype="tabular"/>
177 <param name="header" value="True"/>
178 <param name="selected_task" value="load"/>
171 </test> 179 </test>
172 </tests> 180 </tests>
173 <help><![CDATA[ 181 <help><![CDATA[
174 **What it does** 182 **What it does**
175 This module implements the Support Vector Machine (SVM) classification algorithms. 183 This module implements the Support Vector Machine (SVM) classification algorithms.