Mercurial > repos > bgruening > sklearn_svm_classifier
comparison svm.xml @ 5:1c5989b930e3 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
author | bgruening |
---|---|
date | Sat, 29 Sep 2018 07:26:04 -0400 |
parents | 41d0edb7d1fc |
children | 1a9d5a8fff12 |
comparison
equal
deleted
inserted
replaced
4:41d0edb7d1fc | 5:1c5989b930e3 |
---|---|
18 import sys | 18 import sys |
19 import json | 19 import json |
20 import sklearn.svm | 20 import sklearn.svm |
21 import pandas | 21 import pandas |
22 | 22 |
23 execfile("$__tool_directory__/sk_whitelist.py") | 23 with open("$__tool_directory__/sk_whitelist.json", "r") as f: |
24 execfile("$__tool_directory__/utils.py", globals()) | 24 sk_whitelist = json.load(f) |
25 exec(open("$__tool_directory__/utils.py").read(), globals()) | |
25 | 26 |
26 input_json_path = sys.argv[1] | 27 input_json_path = sys.argv[1] |
27 with open(input_json_path, "r") as param_handler: | 28 with open(input_json_path, "r") as param_handler: |
28 params = json.load(param_handler) | 29 params = json.load(param_handler) |
29 | 30 |
30 #if $selected_tasks.selected_task == "load": | 31 #if $selected_tasks.selected_task == "load": |
31 | 32 |
32 with open("$infile_model", 'rb') as model_handler: | |
33 classifier_object = SafePickler.load(model_handler) | |
34 | |
35 header = 'infer' if params["selected_tasks"]["header"] else None | 33 header = 'infer' if params["selected_tasks"]["header"] else None |
36 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) | 34 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) |
35 | |
36 with open("$infile_model", 'rb') as model_handler: | |
37 classifier_object = load_model(model_handler) | |
38 | |
37 prediction = classifier_object.predict(data) | 39 prediction = classifier_object.predict(data) |
38 prediction_df = pandas.DataFrame(prediction) | 40 prediction_df = pandas.DataFrame(prediction) |
39 res = pandas.concat([data, prediction_df], axis=1) | 41 res = pandas.concat([data, prediction_df], axis=1) |
40 res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False) | 42 res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False) |
41 | 43 |
53 my_class = getattr(sklearn.svm, selected_algorithm) | 55 my_class = getattr(sklearn.svm, selected_algorithm) |
54 classifier_object = my_class(**options) | 56 classifier_object = my_class(**options) |
55 classifier_object.fit(X, y) | 57 classifier_object.fit(X, y) |
56 | 58 |
57 with open("$outfile_fit", 'wb') as out_handler: | 59 with open("$outfile_fit", 'wb') as out_handler: |
58 pickle.dump(classifier_object, out_handler, pickle.HIGHEST_PROTOCOL) | 60 pickle.dump(classifier_object, out_handler) |
59 | 61 |
60 #end if | 62 #end if |
61 | 63 |
62 ]]> | 64 ]]> |
63 </configfile> | 65 </configfile> |
106 <param argument="intercept_scaling" type="float" optional="true" value="1" label="Add synthetic feature to the instance vector" help=" "/> | 108 <param argument="intercept_scaling" type="float" optional="true" value="1" label="Add synthetic feature to the instance vector" help=" "/> |
107 </section> | 109 </section> |
108 </when> | 110 </when> |
109 </expand> | 111 </expand> |
110 </inputs> | 112 </inputs> |
111 | |
112 <expand macro="output"/> | 113 <expand macro="output"/> |
113 | |
114 <tests> | 114 <tests> |
115 <test> | 115 <test> |
116 <param name="infile1" value="train_set.tabular" ftype="tabular"/> | 116 <param name="infile1" value="train_set.tabular" ftype="tabular"/> |
117 <param name="infile2" value="train_set.tabular" ftype="tabular"/> | 117 <param name="infile2" value="train_set.tabular" ftype="tabular"/> |
118 <param name="header1" value="True"/> | 118 <param name="header1" value="True"/> |
120 <param name="col1" value="1,2,3,4"/> | 120 <param name="col1" value="1,2,3,4"/> |
121 <param name="col2" value="5"/> | 121 <param name="col2" value="5"/> |
122 <param name="selected_task" value="train"/> | 122 <param name="selected_task" value="train"/> |
123 <param name="selected_algorithm" value="SVC"/> | 123 <param name="selected_algorithm" value="SVC"/> |
124 <param name="random_state" value="5"/> | 124 <param name="random_state" value="5"/> |
125 <output name="outfile_fit" file="svc_model01.txt" compare="sim_size" delta="1"/> | 125 <output name="outfile_fit" file="svc_model01" compare="sim_size"/> |
126 </test> | 126 </test> |
127 <test> | 127 <test> |
128 <param name="infile1" value="train_set.tabular" ftype="tabular"/> | 128 <param name="infile1" value="train_set.tabular" ftype="tabular"/> |
129 <param name="infile2" value="train_set.tabular" ftype="tabular"/> | 129 <param name="infile2" value="train_set.tabular" ftype="tabular"/> |
130 <param name="header1" value="True"/> | 130 <param name="header1" value="True"/> |
132 <param name="col1" value="1,2,3,4"/> | 132 <param name="col1" value="1,2,3,4"/> |
133 <param name="col2" value="5"/> | 133 <param name="col2" value="5"/> |
134 <param name="selected_task" value="train"/> | 134 <param name="selected_task" value="train"/> |
135 <param name="selected_algorithm" value="NuSVC"/> | 135 <param name="selected_algorithm" value="NuSVC"/> |
136 <param name="random_state" value="5"/> | 136 <param name="random_state" value="5"/> |
137 <output name="outfile_fit" file="svc_model02.txt" compare="sim_size" delta="1"/> | 137 <output name="outfile_fit" file="svc_model02" compare="sim_size"/> |
138 </test> | 138 </test> |
139 <test> | 139 <test> |
140 <param name="infile1" value="train_set.tabular" ftype="tabular"/> | 140 <param name="infile1" value="train_set.tabular" ftype="tabular"/> |
141 <param name="infile2" value="train_set.tabular" ftype="tabular"/> | 141 <param name="infile2" value="train_set.tabular" ftype="tabular"/> |
142 <param name="header1" value="True"/> | 142 <param name="header1" value="True"/> |
144 <param name="col1" value="1,2,3,4"/> | 144 <param name="col1" value="1,2,3,4"/> |
145 <param name="col2" value="5"/> | 145 <param name="col2" value="5"/> |
146 <param name="selected_task" value="train"/> | 146 <param name="selected_task" value="train"/> |
147 <param name="selected_algorithm" value="LinearSVC"/> | 147 <param name="selected_algorithm" value="LinearSVC"/> |
148 <param name="random_state" value="5"/> | 148 <param name="random_state" value="5"/> |
149 <output name="outfile_fit" file="svc_model03.txt" compare="sim_size" delta="1"/> | 149 <output name="outfile_fit" file="svc_model03" compare="sim_size"/> |
150 </test> | 150 </test> |
151 <test> | 151 <test> |
152 <param name="infile_model" value="svc_model01.txt" ftype="zip"/> | 152 <param name="infile_model" value="svc_model01" ftype="zip"/> |
153 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> | 153 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> |
154 <param name="header" value="True"/> | 154 <param name="header" value="True"/> |
155 <param name="selected_task" value="load"/> | 155 <param name="selected_task" value="load"/> |
156 <output name="outfile_predict" file="svc_prediction_result01.tabular"/> | 156 <output name="outfile_predict" file="svc_prediction_result01.tabular"/> |
157 </test> | 157 </test> |
158 <test> | 158 <test> |
159 <param name="infile_model" value="svc_model02.txt" ftype="zip"/> | 159 <param name="infile_model" value="svc_model02" ftype="zip"/> |
160 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> | 160 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> |
161 <param name="header" value="True"/> | 161 <param name="header" value="True"/> |
162 <param name="selected_task" value="load"/> | 162 <param name="selected_task" value="load"/> |
163 <output name="outfile_predict" file="svc_prediction_result02.tabular"/> | 163 <output name="outfile_predict" file="svc_prediction_result02.tabular"/> |
164 </test> | 164 </test> |
165 <test> | 165 <test> |
166 <param name="infile_model" value="svc_model03.txt" ftype="zip"/> | 166 <param name="infile_model" value="svc_model03" ftype="zip"/> |
167 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> | 167 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> |
168 <param name="header" value="True"/> | 168 <param name="header" value="True"/> |
169 <param name="selected_task" value="load"/> | 169 <param name="selected_task" value="load"/> |
170 <output name="outfile_predict" file="svc_prediction_result03.tabular"/> | 170 <output name="outfile_predict" file="svc_prediction_result03.tabular"/> |
171 </test> | |
172 <!-- The following test is expected to fail, it is testing the whitelist/blacklist filtering. | |
173 It loads a pickle with malicious content that we do not accept. --> | |
174 <test expect_failure="true"> | |
175 <param name="infile_model" value="pickle_blacklist" ftype="zip"/> | |
176 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> | |
177 <param name="header" value="True"/> | |
178 <param name="selected_task" value="load"/> | |
171 </test> | 179 </test> |
172 </tests> | 180 </tests> |
173 <help><![CDATA[ | 181 <help><![CDATA[ |
174 **What it does** | 182 **What it does** |
175 This module implements the Support Vector Machine (SVM) classification algorithms. | 183 This module implements the Support Vector Machine (SVM) classification algorithms. |