Mercurial > repos > bgruening > svm_classifier
comparison main_macros.xml @ 0:9a9396e5d153 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit 0e582cf1f3134c777cce3aa57d71b80ed95e6ba9
author | bgruening |
---|---|
date | Fri, 16 Feb 2018 09:16:30 -0500 |
parents | |
children | be9b169a1e05 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9a9396e5d153 |
---|---|
1 <macros> | |
2 <token name="@VERSION@">0.9</token> | |
3 | |
4 <token name="@COLUMNS_FUNCTION@"> | |
5 def columns(f,c): | |
6 data = pandas.read_csv(f, sep='\t', header=None, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) | |
7 cols = c.split (',') | |
8 cols = map(int, cols) | |
9 cols = list(map(lambda x: x - 1, cols)) | |
10 y = data.iloc[:,cols].values | |
11 return y | |
12 </token> | |
13 | |
14 <xml name="python_requirements"> | |
15 <requirements> | |
16 <requirement type="package" version="2.7">python</requirement> | |
17 <requirement type="package" version="0.19.1">scikit-learn</requirement> | |
18 <requirement type="package" version="0.22.0">pandas</requirement> | |
19 <yield /> | |
20 </requirements> | |
21 </xml> | |
22 | |
23 <xml name="macro_stdio"> | |
24 <stdio> | |
25 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error"/> | |
26 </stdio> | |
27 </xml> | |
28 | |
29 | |
30 <!--Generic interface--> | |
31 <xml name="train_loadConditional" token_train="tabular" token_data="tabular" token_model="txt"> | |
32 <conditional name="selected_tasks"> | |
33 <param name="selected_task" type="select" label="Select a Classification Task"> | |
34 <option value="train" selected="true">Train a model</option> | |
35 <option value="load">Load a model and predict</option> | |
36 </param> | |
37 <when value="load"> | |
38 <param name="infile_model" type="data" format="@MODEL@" label="Models" help="Select a model file."/> | |
39 <param name="infile_data" type="data" format="@DATA@" label="Data (tabular)" help="Select the dataset you want to classify."/> | |
40 <conditional name="prediction_options"> | |
41 <param name="prediction_option" type="select" label="Select the type of prediction"> | |
42 <option value="predict">Predict class labels</option> | |
43 <option value="advanced">Include advanced options</option> | |
44 </param> | |
45 <when value="predict"> | |
46 </when> | |
47 <when value="advanced"> | |
48 </when> | |
49 </conditional> | |
50 </when> | |
51 <when value="train"> | |
52 <param name="infile_train" type="data" format="@TRAIN@" label="Training samples (tabular)"/> | |
53 <conditional name="selected_algorithms"> | |
54 <yield /> | |
55 </conditional> | |
56 </when> | |
57 </conditional> | |
58 </xml> | |
59 | |
60 <xml name="sl_Conditional" token_train="tabular" token_data="tabular" token_model="txt"> | |
61 <conditional name="selected_tasks"> | |
62 <param name="selected_task" type="select" label="Select a Classification Task"> | |
63 <option value="train" selected="true">Train a model</option> | |
64 <option value="load">Load a model and predict</option> | |
65 </param> | |
66 <when value="load"> | |
67 <param name="infile_model" type="data" format="@MODEL@" label="Models" help="Select a model file."/> | |
68 <param name="infile_data" type="data" format="@DATA@" label="Data (tabular)" help="Select the dataset you want to classify."/> | |
69 <conditional name="prediction_options"> | |
70 <param name="prediction_option" type="select" label="Select the type of prediction"> | |
71 <option value="predict">Predict class labels</option> | |
72 <option value="advanced">Include advanced options</option> | |
73 </param> | |
74 <when value="predict"> | |
75 </when> | |
76 <when value="advanced"> | |
77 </when> | |
78 </conditional> | |
79 </when> | |
80 <when value="train"> | |
81 <conditional name="selected_algorithms"> | |
82 <yield /> | |
83 </conditional> | |
84 </when> | |
85 </conditional> | |
86 </xml> | |
87 | |
88 <xml name="advanced_section"> | |
89 <section name="options" title="Advanced Options" expanded="False"> | |
90 <yield /> | |
91 </section> | |
92 </xml> | |
93 | |
94 | |
95 <!--Generalized Linear Models--> | |
96 <xml name="loss" token_help=" " token_select="false"> | |
97 <param argument="loss" type="select" label="Loss function" help="@HELP@"> | |
98 <option value="squared_loss" selected="@SELECT@">squared loss</option> | |
99 <option value="huber">huber</option> | |
100 <option value="epsilon_insensitive">epsilon insensitive</option> | |
101 <option value="squared_epsilon_insensitive">squared epsilon insensitive</option> | |
102 <yield/> | |
103 </param> | |
104 </xml> | |
105 | |
106 <xml name="penalty" token_help=" "> | |
107 <param argument="penalty" type="select" label="Penalty (regularization term)" help="@HELP@"> | |
108 <option value="l2" selected="true">l2</option> | |
109 <option value="l1">l1</option> | |
110 <option value="elasticnet">elastic net</option> | |
111 <option value="none">none</option> | |
112 <yield/> | |
113 </param> | |
114 </xml> | |
115 | |
116 <xml name="l1_ratio" token_default_value="0.15" token_help=" "> | |
117 <param argument="l1_ratio" type="float" value="@DEFAULT_VALUE@" label="Elastic Net mixing parameter" help="@HELP@"/> | |
118 </xml> | |
119 | |
120 <xml name="epsilon" token_default_value="0.1" token_help="Used if loss is ‘huber’, ‘epsilon_insensitive’, or ‘squared_epsilon_insensitive’. "> | |
121 <param argument="epsilon" type="float" value="@DEFAULT_VALUE@" label="Epsilon (epsilon-sensitive loss functions only)" help="@HELP@"/> | |
122 </xml> | |
123 | |
124 <xml name="learning_rate_s" token_help=" " token_selected1="false" token_selected2="false"> | |
125 <param argument="learning_rate" type="select" optional="true" label="Learning rate schedule" help="@HELP@"> | |
126 <option value="optimal" selected="@SELECTED1@">optimal</option> | |
127 <option value="constant">constant</option> | |
128 <option value="invscaling" selected="@SELECTED2@">inverse scaling</option> | |
129 <yield/> | |
130 </param> | |
131 </xml> | |
132 | |
133 <xml name="eta0" token_default_value="0.0" token_help="Used with ‘constant’ or ‘invscaling’ schedules. "> | |
134 <param argument="eta0" type="float" value="@DEFAULT_VALUE@" label="Initial learning rate" help="@HELP@"/> | |
135 </xml> | |
136 | |
137 <xml name="power_t" token_default_value="0.5" token_help=" "> | |
138 <param argument="power_t" type="float" value="@DEFAULT_VALUE@" label="Exponent for inverse scaling learning rate" help="@HELP@"/> | |
139 </xml> | |
140 | |
141 <xml name="normalize" token_checked="false" token_help=" "> | |
142 <param argument="normalize" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Normalize samples before training" help=" "/> | |
143 </xml> | |
144 | |
145 <xml name="copy_X" token_checked="true" token_help=" "> | |
146 <param argument="copy_X" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use a copy of samples" help="If false, samples would be overwritten. "/> | |
147 </xml> | |
148 | |
149 <xml name="ridge_params"> | |
150 <expand macro="normalize"/> | |
151 <expand macro="alpha" default_value="1.0"/> | |
152 <expand macro="fit_intercept"/> | |
153 <expand macro="max_iter" default_value=""/> | |
154 <expand macro="tol" default_value="0.001" help_text="Precision of the solution. "/> | |
155 <!--class_weight--> | |
156 <expand macro="copy_X"/> | |
157 <param argument="solver" type="select" value="" label="Solver to use in the computational routines" help=" "> | |
158 <option value="auto" selected="true">auto</option> | |
159 <option value="svd">svd</option> | |
160 <option value="cholesky">cholesky</option> | |
161 <option value="lsqr">lsqr</option> | |
162 <option value="sparse_cg">sparse_cg</option> | |
163 <option value="sag">sag</option> | |
164 </param> | |
165 <expand macro="random_state"/> | |
166 </xml> | |
167 | |
168 <!--Ensemble methods--> | |
169 <xml name="n_estimators" token_default_value="10" token_help=" "> | |
170 <param argument="n_estimators" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of trees in the forest" help="@HELP@"/> | |
171 </xml> | |
172 | |
173 <xml name="max_depth" token_default_value="" token_help=" "> | |
174 <param argument="max_depth" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Maximum depth of the tree" help="@HELP@"/> | |
175 </xml> | |
176 | |
177 <xml name="min_samples_split" token_default_value="2" token_help=" "> | |
178 <param argument="min_samples_split" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Maximum depth of the tree" help="@HELP@"/> | |
179 </xml> | |
180 | |
181 <xml name="min_samples_leaf" token_default_value="1" token_help=" "> | |
182 <param argument="min_samples_leaf" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Minimum number of samples in newly created leaves" help="@HELP@"/> | |
183 </xml> | |
184 | |
185 <xml name="min_weight_fraction_leaf" token_default_value="0.0" token_help=" "> | |
186 <param argument="min_weight_fraction_leaf" type="float" optional="true" value="@DEFAULT_VALUE@" label="Minimum weighted fraction of the input samples required to be at a leaf node" help="@HELP@"/> | |
187 </xml> | |
188 | |
189 <xml name="max_leaf_nodes" token_default_value="" token_help=" "> | |
190 <param argument="max_leaf_nodes" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Maximum number of leaf nodes in best-first method" help="@HELP@"/> | |
191 </xml> | |
192 | |
193 <xml name="bootstrap" token_checked="true" token_help=" "> | |
194 <param argument="bootstrap" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Use bootstrap samples for building trees." help="@HELP@"/> | |
195 </xml> | |
196 | |
197 <xml name="criterion" token_help=" "> | |
198 <param argument="criterion" type="select" label="Function to measure the quality of a split" help=" "> | |
199 <option value="gini" selected="true">Gini impurity</option> | |
200 <option value="entropy">Information gain</option> | |
201 <yield/> | |
202 </param> | |
203 </xml> | |
204 | |
205 <xml name="oob_score" token_checked="false" token_help=" "> | |
206 <param argument="oob_score" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use out-of-bag samples to estimate the generalization error" help="@HELP@"/> | |
207 </xml> | |
208 | |
209 <xml name="max_features" token_default_value="auto" token_help="This could be an integer, float, string, or None. For more information please refer to help. "> | |
210 <param argument="max_features" type="text" optional="true" value="@DEFAULT_VALUE@" label="Number of features for finding the best split" help="@HELP@"/> | |
211 </xml> | |
212 | |
213 <xml name="learning_rate" token_default_value="1.0" token_help=" "> | |
214 <param argument="learning_rate" type="float" optional="true" value="@DEFAULT_VALUE@" label="Learning rate" help="@HELP@"/> | |
215 </xml> | |
216 | |
217 | |
218 <!--Parameters--> | |
219 <xml name="tol" token_default_value="0.0" token_help_text="Early stopping heuristics based on the relative center changes. Set to default (0.0) to disable this convergence detection."> | |
220 <param argument="tol" type="float" optional="true" value="@DEFAULT_VALUE@" label="Tolerance" help="@HELP_TEXT@"/> | |
221 </xml> | |
222 | |
223 <xml name="n_clusters" token_default_value="8"> | |
224 <param argument="n_clusters" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of clusters" help=" "/> | |
225 </xml> | |
226 | |
227 <xml name="fit_intercept" token_checked="true"> | |
228 <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered."/> | |
229 </xml> | |
230 | |
231 <xml name="n_iter" token_default_value="5" token_help_text="The number of passes over the training data (aka epochs). "> | |
232 <param argument="n_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of iterations" help="@HELP_TEXT@"/> | |
233 </xml> | |
234 | |
235 <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration"> | |
236 <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@"/> | |
237 </xml> | |
238 | |
239 <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results."> | |
240 <param argument="random_state" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Random seed number" help="@HELP_TEXT@"/> | |
241 </xml> | |
242 | |
243 <xml name="warm_start" token_checked="true" token_help_text="When set to True, reuse the solution of the previous call to fit as initialization,otherwise, just erase the previous solution."> | |
244 <param argument="warm_start" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Perform warm start" help="@HELP_TEXT@"/> | |
245 </xml> | |
246 | |
247 <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term."> | |
248 <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@"/> | |
249 </xml> | |
250 | |
251 <!--xml name="class_weight" token_default_value="" token_help_text=""> | |
252 <param argument="class_weight" type="" optional="true" value="@DEFAULT_VALUE@" label="" help="@HELP_TEXT@"/> | |
253 </xml--> | |
254 | |
255 <xml name="alpha" token_default_value="0.0001" token_help_text="Constant that multiplies the regularization term if regularization is used. "> | |
256 <param argument="alpha" type="float" optional="true" value="@DEFAULT_VALUE@" label="Regularization coefficient" help="@HELP_TEXT@"/> | |
257 </xml> | |
258 | |
259 <xml name="n_samples" token_default_value="100" token_help_text="The total number of points equally divided among clusters."> | |
260 <param argument="n_samples" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of samples" help="@HELP_TEXT@"/> | |
261 </xml> | |
262 | |
263 <xml name="n_features" token_default_value="2" token_help_text="Number of different numerical properties produced for each sample."> | |
264 <param argument="n_features" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of features" help="@HELP_TEXT@"/> | |
265 </xml> | |
266 | |
267 <xml name="noise" token_default_value="0.0" token_help_text="Floating point number. "> | |
268 <param argument="noise" type="float" optional="true" value="@DEFAULT_VALUE@" label="Standard deviation of the Gaussian noise added to the data" help="@HELP_TEXT@"/> | |
269 </xml> | |
270 | |
271 <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term. "> | |
272 <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@"/> | |
273 </xml> | |
274 | |
275 <xml name="max_iter" token_default_value="300" token_label="Maximum number of iterations per single run" token_help_text=" "> | |
276 <param argument="max_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/> | |
277 </xml> | |
278 | |
279 <xml name="n_init" token_default_value="10" > | |
280 <param argument="n_init" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of runs with different centroid seeds" help=" "/> | |
281 </xml> | |
282 | |
283 <xml name="init"> | |
284 <param argument="init" type="select" label="Centroid initialization method" help="''k-means++'' selects initial cluster centers that speed up convergence. ''random'' chooses k observations (rows) at random from data as initial centroids."> | |
285 <option value="k-means++">k-means++</option> | |
286 <option value="random">random</option> | |
287 </param> | |
288 </xml> | |
289 | |
290 <xml name="gamma" token_default_value="1.0" token_label="Scaling parameter" token_help_text=" "> | |
291 <param argument="gamma" type="float" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/> | |
292 </xml> | |
293 | |
294 <xml name="degree" token_default_value="3" token_label="Degree of the polynomial" token_help_text=" "> | |
295 <param argument="degree" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/> | |
296 </xml> | |
297 | |
298 <xml name="coef0" token_default_value="1" token_label="Zero coefficient" token_help_text=" "> | |
299 <param argument="coef0" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/> | |
300 </xml> | |
301 | |
302 <xml name="pos_label" token_default_value=""> | |
303 <param argument="pos_label" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Label of the positive class" help=" "/> | |
304 </xml> | |
305 | |
306 <xml name="average"> | |
307 <param argument="average" type="select" optional="true" label="Averaging type" help=" "> | |
308 <option value="micro">Calculate metrics globally by counting the total true positives, false negatives and false positives. (micro)</option> | |
309 <option value="samples">Calculate metrics for each instance, and find their average. Only meaningful for multilabel. (samples)</option> | |
310 <option value="macro">Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. (macro)</option> | |
311 <option value="weighted">Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters ‘macro’ to account for label imbalance; it can result in an F-score that is not between precision and recall. (weighted)</option> | |
312 <option value="None">None</option> | |
313 <yield/> | |
314 </param> | |
315 </xml> | |
316 | |
317 <xml name="beta"> | |
318 <param argument="beta" type="float" value="1.0" label="The strength of recall versus precision in the F-score" help=" "/> | |
319 </xml> | |
320 | |
321 | |
322 <!--Data interface--> | |
323 <xml name="tabular_input"> | |
324 <param name="infile" type="data" format="tabular" label="Data file with numeric values"/> | |
325 <param name="start_column" type="data_column" data_ref="infile" optional="True" label="Select a subset of data. Start column:" /> | |
326 <param name="end_column" type="data_column" data_ref="infile" optional="True" label="End column:" /> | |
327 </xml> | |
328 | |
329 <xml name="sample_cols" token_label1="File containing true class labels:" token_label2="File containing predicted class labels:" token_multiple1="False" token_multiple2="False" token_format1="tabular" token_format2="tabular" token_help1="" token_help2=""> | |
330 <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/> | |
331 <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select target column(s):"/> | |
332 <param name="infile2" type="data" format="@FORMAT2@" label="@LABEL2@" help="@HELP2@"/> | |
333 <param name="col2" multiple="@MULTIPLE2@" type="data_column" data_ref="infile2" label="Select target column(s):"/> | |
334 <yield/> | |
335 </xml> | |
336 | |
337 <xml name="samples_tabular" token_multiple1="False" token_multiple2="False"> | |
338 <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/> | |
339 <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select target column(s):"/> | |
340 <param name="infile2" type="data" format="tabular" label="Dataset containing class labels:"/> | |
341 <param name="col2" multiple="@MULTIPLE2@" type="data_column" data_ref="infile2" label="Select target column(s):"/> | |
342 <yield/> | |
343 </xml> | |
344 | |
345 <xml name="clf_inputs_extended" token_label1=" " token_label2=" " token_multiple="False"> | |
346 <conditional name="true_columns"> | |
347 <param name="selected_input1" type="select" label="Select the input type of true labels dataset:"> | |
348 <option value="tabular" selected="true">Tabular</option> | |
349 <option value="sparse">Sparse</option> | |
350 </param> | |
351 <when value="tabular"> | |
352 <param name="infile1" type="data" label="@LABEL1@"/> | |
353 <param name="col1" type="data_column" data_ref="infile1" label="Select the target column:"/> | |
354 </when> | |
355 <when value="sparse"> | |
356 <param name="infile1" type="data" format="txt" label="@LABEL1@"/> | |
357 </when> | |
358 </conditional> | |
359 <conditional name="predicted_columns"> | |
360 <param name="selected_input2" type="select" label="Select the input type of predicted labels dataset:"> | |
361 <option value="tabular" selected="true">Tabular</option> | |
362 <option value="sparse">Sparse</option> | |
363 </param> | |
364 <when value="tabular"> | |
365 <param name="infile2" type="data" label="@LABEL2@"/> | |
366 <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/> | |
367 </when> | |
368 <when value="sparse"> | |
369 <param name="infile2" type="data" format="txt" label="@LABEL1@"/> | |
370 </when> | |
371 </conditional> | |
372 </xml> | |
373 | |
374 <xml name="clf_inputs" token_label1="Dataset containing true labels (tabular):" token_label2="Dataset containing predicted values (tabular):" token_multiple1="False" token_multiple="False"> | |
375 <param name="infile1" type="data" format="tabular" label="@LABEL1@"/> | |
376 <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select the target column:"/> | |
377 <param name="infile2" type="data" format="tabular" label="@LABEL2@"/> | |
378 <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/> | |
379 </xml> | |
380 | |
381 <xml name="multiple_input" token_name="input_files" token_max_num="10" token_format="txt" token_label="Sparse matrix file (.mtx, .txt)" token_help_text="Specify a sparse matrix file in .txt format."> | |
382 <repeat name="@NAME@" min="1" max="@MAX_NUM@" title="Select input file(s):"> | |
383 <param name="input" type="data" format="@FORMAT@" label="@LABEL@" help="@HELP_TEXT@"/> | |
384 </repeat> | |
385 </xml> | |
386 | |
387 <xml name="sparse_target" token_label1="Select a sparse matrix:" token_label2="Select the tabular containing true labels:" token_multiple="False" token_format1="txt" token_format2="tabular" token_help1="" token_help2=""> | |
388 <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/> | |
389 <param name="infile2" type="data" format="@FORMAT2@" label="@LABEL2@" help="@HELP2@"/> | |
390 <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/> | |
391 </xml> | |
392 | |
393 <xml name="sl_mixed_input"> | |
394 <conditional name="input_options"> | |
395 <param name="selected_input" type="select" label="Select input type:"> | |
396 <option value="tabular" selected="true">tabular data</option> | |
397 <option value="sparse">sparse matrix</option> | |
398 </param> | |
399 <when value="tabular"> | |
400 <expand macro="samples_tabular" multiple1="true"/> | |
401 </when> | |
402 <when value="sparse"> | |
403 <expand macro="sparse_target"/> | |
404 </when> | |
405 </conditional> | |
406 </xml> | |
407 | |
408 <xml name="multitype_input" token_format="tabular" token_help="All datasets with tabular format are supporetd."> | |
409 <param name="infile_transform" type="data" format="@FORMAT@" label="Select a dataset to transform:" help="@HELP@"/> | |
410 </xml> | |
411 | |
412 | |
413 <!--Advanced options--> | |
414 <xml name="nn_advanced_options"> | |
415 <section name="options" title="Advanced Options" expanded="False"> | |
416 <yield/> | |
417 <param argument="weights" type="select" label="Weight function" help="Used in prediction."> | |
418 <option value="uniform" selected="true">Uniform weights. All points in each neighborhood are weighted equally. (Uniform)</option> | |
419 <option value="distance">Weight points by the inverse of their distance. (Distance)</option> | |
420 </param> | |
421 <param argument="algorithm" type="select" label="Neighbor selection algorithm" help=" "> | |
422 <option value="auto" selected="true">Auto</option> | |
423 <option value="ball_tree">BallTree</option> | |
424 <option value="kd_tree">KDTree</option> | |
425 <option value="brute">Brute-force</option> | |
426 </param> | |
427 <param argument="leaf_size" type="integer" value="30" label="Leaf size" help="Used with BallTree and KDTree. Affects the time and memory usage of the constructed tree."/> | |
428 <!--param name="metric"--> | |
429 <!--param name="p"--> | |
430 <!--param name="metric_params"--> | |
431 </section> | |
432 </xml> | |
433 | |
434 <xml name="svc_advanced_options"> | |
435 <section name="options" title="Advanced Options" expanded="False"> | |
436 <yield/> | |
437 <param argument="kernel" type="select" optional="true" label="Kernel type" help="Kernel type to be used in the algorithm. If none is given, ‘rbf’ will be used."> | |
438 <option value="rbf" selected="true">rbf</option> | |
439 <option value="linear">linear</option> | |
440 <option value="poly">poly</option> | |
441 <option value="sigmoid">sigmoid</option> | |
442 <option value="precomputed">precomputed</option> | |
443 </param> | |
444 <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/> | |
445 <!--TODO: param argument="gamma" float, optional (default=’auto’) --> | |
446 <param argument="coef0" type="float" optional="true" value="0.0" label="Zero coefficient (polynomial and sigmoid kernels only)" | |
447 help="Independent term in kernel function. dafault: 0.0 "/> | |
448 <param argument="shrinking" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
449 label="Use the shrinking heuristic" help=" "/> | |
450 <param argument="probability" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" | |
451 label="Enable probability estimates. " help="This must be enabled prior to calling fit, and will slow down that method."/> | |
452 <!-- param argument="cache_size"--> | |
453 <!--expand macro="class_weight"/--> | |
454 <expand macro="tol" default_value="0.001" help_text="Tolerance for stopping criterion. "/> | |
455 <expand macro="max_iter" default_value="-1" label="Solver maximum number of iterations" help_text="Hard limit on iterations within solver, or -1 for no limit."/> | |
456 <!--param argument="decision_function_shape"--> | |
457 <expand macro="random_state" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data for probability estimation. A fixed seed allows reproducible results."/> | |
458 </section> | |
459 </xml> | |
460 | |
461 <xml name="spectral_clustering_advanced_options"> | |
462 <section name="options" title="Advanced Options" expanded="False"> | |
463 <expand macro="n_clusters"/> | |
464 <param argument="eigen_solver" type="select" value="" label="Eigen solver" help="The eigenvalue decomposition strategy to use."> | |
465 <option value="arpack" selected="true">arpack</option> | |
466 <option value="lobpcg">lobpcg</option> | |
467 <option value="amg">amg</option> | |
468 <!--None--> | |
469 </param> | |
470 <expand macro="random_state"/> | |
471 <expand macro="n_init"/> | |
472 <param argument="gamma" type="float" optional="true" value="1.0" label="Kernel scaling factor" help="Scaling factor of RBF, polynomial, exponential chi^2 and sigmoid affinity kernel. Ignored for affinity=''nearest_neighbors''."/> | |
473 <param argument="affinity" type="select" label="Affinity" help="Affinity kernel to use. "> | |
474 <option value="rbf" selected="true">RBF</option> | |
475 <option value="precomputed">precomputed</option> | |
476 <option value="nearest_neighbors">Nearset neighbors</option> | |
477 </param> | |
478 <param argument="n_neighbors" type="integer" optional="true" value="10" label="Number of neighbors" help="Number of neighbors to use when constructing the affinity matrix using the nearest neighbors method. Ignored for affinity=''rbf''"/> | |
479 <!--param argument="eigen_tol"--> | |
480 <param argument="assign_labels" type="select" label="Assign labels" help="The strategy to use to assign labels in the embedding space."> | |
481 <option value="kmeans" selected="true">kmeans</option> | |
482 <option value="discretize">discretize</option> | |
483 </param> | |
484 <param argument="degree" type="integer" optional="true" value="3" | |
485 label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/> | |
486 <param argument="coef0" type="integer" optional="true" value="1" | |
487 label="Zero coefficient (polynomial and sigmoid kernels only)" help="Ignored by other kernels. dafault : 1 "/> | |
488 <!--param argument="kernel_params"--> | |
489 </section> | |
490 </xml> | |
491 | |
492 <xml name="minibatch_kmeans_advanced_options"> | |
493 <section name="options" title="Advanced Options" expanded="False"> | |
494 <expand macro="n_clusters"/> | |
495 <expand macro="init"/> | |
496 <expand macro="n_init" default_value="3"/> | |
497 <expand macro="max_iter" default_value="100"/> | |
498 <expand macro="tol" help_text="Early stopping heuristics based on normalized center change. To disable set to 0.0 ."/> | |
499 <expand macro="random_state"/> | |
500 <param argument="batch_size" type="integer" optional="true" value="100" label="Batch size" help="Size of the mini batches."/> | |
501 <!--param argument="compute_labels"--> | |
502 <param argument="max_no_improvement" type="integer" optional="true" value="10" label="Maximum number of improvement attempts" help=" | |
503 Convergence detection based on inertia (the consecutive number of mini batches that doe not yield an improvement on the smoothed inertia). | |
504 To disable, set max_no_improvement to None. "/> | |
505 <param argument="init_size" type="integer" optional="true" value="" label="Number of random initialization samples" help="Number of samples to randomly sample for speeding up the initialization . ( default: 3 * batch_size )"/> | |
506 <param argument="reassignment_ratio" type="float" optional="true" value="0.01" label="Re-assignment ratio" help="Controls the fraction of the maximum number of counts for a center to be reassigned. Higher values yield better clustering results."/> | |
507 </section> | |
508 </xml> | |
509 | |
510 <xml name="kmeans_advanced_options"> | |
511 <section name="options" title="Advanced Options" expanded="False"> | |
512 <expand macro="n_clusters"/> | |
513 <expand macro="init"/> | |
514 <expand macro="n_init"/> | |
515 <expand macro="max_iter"/> | |
516 <expand macro="tol" default_value="0.0001" help_text="Relative tolerance with regards to inertia to declare convergence."/> | |
517 <!--param argument="precompute_distances"/--> | |
518 <expand macro="random_state"/> | |
519 <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean."/> | |
520 </section> | |
521 </xml> | |
522 | |
523 <xml name="birch_advanced_options"> | |
524 <section name="options" title="Advanced Options" expanded="False"> | |
525 <param argument="threshold" type="float" optional="true" value="0.5" label="Subcluster radius threshold" help="The radius of the subcluster obtained by merging a new sample; the closest subcluster should be less than the threshold to avoid a new subcluster."/> | |
526 <param argument="branching_factor" type="integer" optional="true" value="50" label="Maximum number of subclusters per branch" help="Maximum number of CF subclusters in each node."/> | |
527 <expand macro="n_clusters" default_value="3"/> | |
528 <!--param argument="compute_labels"/--> | |
529 </section> | |
530 </xml> | |
531 | |
532 <xml name="dbscan_advanced_options"> | |
533 <section name="options" title="Advanced Options" expanded="False"> | |
534 <param argument="eps" type="float" optional="true" value="0.5" label="Maximum neighborhood distance" help="The maximum distance between two samples for them to be considered as in the same neighborhood."/> | |
535 <param argument="min_samples" type="integer" optional="true" value="5" label="Minimal core point density" help="The number of samples (or total weight) in a neighborhood for a point (including the point itself) to be considered as a core point."/> | |
536 <param argument="metric" type="text" optional="true" value="euclidean" label="Metric" help="The metric to use when calculating distance between instances in a feature array."/> | |
537 <param argument="algorithm" type="select" label="Pointwise distance computation algorithm" help="The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors."> | |
538 <option value="auto" selected="true">auto</option> | |
539 <option value="ball_tree">ball_tree</option> | |
540 <option value="kd_tree">kd_tree</option> | |
541 <option value="brute">brute</option> | |
542 </param> | |
543 <param argument="leaf_size" type="integer" optional="true" value="30" label="Leaf size" help="Leaf size passed to BallTree or cKDTree. Memory and time efficieny factor in tree constrution and querying."/> | |
544 </section> | |
545 </xml> | |
546 | |
547 <xml name="clustering_algorithms_options"> | |
548 <conditional name="algorithm_options"> | |
549 <param name="selected_algorithm" type="select" label="Clustering Algorithm"> | |
550 <option value="KMeans" selected="true">KMeans</option> | |
551 <option value="SpectralClustering">Spectral Clustering</option> | |
552 <option value="MiniBatchKMeans">Mini Batch KMeans</option> | |
553 <option value="DBSCAN">DBSCAN</option> | |
554 <option value="Birch">Birch</option> | |
555 </param> | |
556 <when value="KMeans"> | |
557 <expand macro="kmeans_advanced_options"/> | |
558 </when> | |
559 <when value="DBSCAN"> | |
560 <expand macro="dbscan_advanced_options"/> | |
561 </when> | |
562 <when value="Birch"> | |
563 <expand macro="birch_advanced_options"/> | |
564 </when> | |
565 <when value="SpectralClustering"> | |
566 <expand macro="spectral_clustering_advanced_options"/> | |
567 </when> | |
568 <when value="MiniBatchKMeans"> | |
569 <expand macro="minibatch_kmeans_advanced_options"/> | |
570 </when> | |
571 </conditional> | |
572 </xml> | |
573 | |
574 <xml name="distance_metrics"> | |
575 <param argument="metric" type="select" label="Distance metric" help=" "> | |
576 <option value="euclidean" selected="true">euclidean</option> | |
577 <option value="cityblock">cityblock</option> | |
578 <option value="cosine">cosine</option> | |
579 <option value="l1">l1</option> | |
580 <option value="l2">l2</option> | |
581 <option value="manhattan">manhattan</option> | |
582 <yield/> | |
583 </param> | |
584 </xml> | |
585 | |
586 <xml name="distance_nonsparse_metrics"> | |
587 <option value="braycurtis">braycurtis</option> | |
588 <option value="canberra">canberra</option> | |
589 <option value="chebyshev">chebyshev</option> | |
590 <option value="correlation">correlation</option> | |
591 <option value="dice">dice</option> | |
592 <option value="hamming">hamming</option> | |
593 <option value="jaccard">jaccard</option> | |
594 <option value="kulsinski">kulsinski</option> | |
595 <option value="mahalanobis">mahalanobis</option> | |
596 <option value="matching">matching</option> | |
597 <option value="minkowski">minkowski</option> | |
598 <option value="rogerstanimoto">rogerstanimoto</option> | |
599 <option value="russellrao">russellrao</option> | |
600 <option value="seuclidean">seuclidean</option> | |
601 <option value="sokalmichener">sokalmichener</option> | |
602 <option value="sokalsneath">sokalsneath</option> | |
603 <option value="sqeuclidean">sqeuclidean</option> | |
604 <option value="yule">yule</option> | |
605 </xml> | |
606 | |
607 <xml name="pairwise_kernel_metrics"> | |
608 <param argument="metric" type="select" label="Pirwise Kernel metric" help=" "> | |
609 <option value="rbf" selected="true">rbf</option> | |
610 <option value="sigmoid">sigmoid</option> | |
611 <option value="polynomial">polynomial</option> | |
612 <option value="linear" selected="true">linear</option> | |
613 <option value="chi2">chi2</option> | |
614 <option value="additive_chi2">additive_chi2</option> | |
615 </param> | |
616 </xml> | |
617 | |
618 <xml name="sparse_pairwise_metric_functions"> | |
619 <param name="selected_metric_function" type="select" label="Select the pairwise metric you want to compute:"> | |
620 <option value="euclidean_distances" selected="true">Euclidean distance matrix</option> | |
621 <option value="pairwise_distances">Distance matrix</option> | |
622 <option value="pairwise_distances_argmin">Minimum distances between one point and a set of points</option> | |
623 <yield/> | |
624 </param> | |
625 </xml> | |
626 | |
627 <xml name="pairwise_metric_functions"> | |
628 <option value="additive_chi2_kernel" >Additive chi-squared kernel</option> | |
629 <option value="chi2_kernel">Exponential chi-squared kernel</option> | |
630 <option value="linear_kernel">Linear kernel</option> | |
631 <option value="manhattan_distances">L1 distances</option> | |
632 <option value="pairwise_kernels">Kernel</option> | |
633 <option value="polynomial_kernel">Polynomial kernel</option> | |
634 <option value="rbf_kernel">Gaussian (rbf) kernel</option> | |
635 <option value="laplacian_kernel">Laplacian kernel</option> | |
636 </xml> | |
637 | |
638 <xml name="sparse_pairwise_condition"> | |
639 <when value="pairwise_distances"> | |
640 <section name="options" title="Advanced Options" expanded="False"> | |
641 <expand macro="distance_metrics"> | |
642 <yield/> | |
643 </expand> | |
644 </section> | |
645 </when> | |
646 <when value="euclidean_distances"> | |
647 <section name="options" title="Advanced Options" expanded="False"> | |
648 <param argument="squared" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" | |
649 label="Return squared Euclidean distances" help=" "/> | |
650 </section> | |
651 </when> | |
652 </xml> | |
653 | |
654 <xml name="argmin_distance_condition"> | |
655 <when value="pairwise_distances_argmin"> | |
656 <section name="options" title="Advanced Options" expanded="False"> | |
657 <param argument="axis" type="integer" optional="true" value="1" label="Axis" help="Axis along which the argmin and distances are to be computed."/> | |
658 <expand macro="distance_metrics"> | |
659 <yield/> | |
660 </expand> | |
661 <param argument="batch_size" type="integer" optional="true" value="500" label="Batch size" help="Number of rows to be processed in each batch run."/> | |
662 </section> | |
663 </when> | |
664 </xml> | |
665 | |
666 <xml name="sparse_preprocessors"> | |
667 <param name="selected_pre_processor" type="select" label="Select a preprocessor:"> | |
668 <option value="StandardScaler" selected="true">Standard Scaler (Standardizes features by removing the mean and scaling to unit variance)</option> | |
669 <option value="Binarizer">Binarizer (Binarizes data)</option> | |
670 <option value="Imputer">Imputer (Completes missing values)</option> | |
671 <option value="MaxAbsScaler">Max Abs Scaler (Scales features by their maximum absolute value)</option> | |
672 <option value="Normalizer">Normalizer (Normalizes samples individually to unit norm)</option> | |
673 <yield/> | |
674 </param> | |
675 </xml> | |
676 | |
677 <xml name="sparse_preprocessor_options"> | |
678 <when value="Binarizer"> | |
679 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/> | |
680 <section name="options" title="Advanced Options" expanded="False"> | |
681 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
682 label="Use a copy of data for precomputing binarization" help=" "/> | |
683 <param argument="threshold" type="float" optional="true" value="0.0" | |
684 label="Threshold" | |
685 help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/> | |
686 </section> | |
687 </when> | |
688 <when value="Imputer"> | |
689 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/> | |
690 <section name="options" title="Advanced Options" expanded="False"> | |
691 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
692 label="Use a copy of data for precomputing imputation" help=" "/> | |
693 <param argument="strategy" type="select" optional="true" label="Imputation strategy" help=" "> | |
694 <option value="mean" selected="true">Replace missing values using the mean along the axis</option> | |
695 <option value="median">Replace missing values using the median along the axis</option> | |
696 <option value="most_frequent">Replace missing using the most frequent value along the axis</option> | |
697 </param> | |
698 <param argument="missing_values" type="text" optional="true" value="NaN" | |
699 label="Placeholder for missing values" help="For missing values encoded as numpy.nan, use the string value “NaN”"/> | |
700 <param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0" | |
701 label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/> | |
702 <!--param argument="axis" type="select" optional="true" label="The axis along which to impute" help=" "> | |
703 <option value="0" selected="true">Impute along columns</option> | |
704 <option value="1">Impute along rows</option> | |
705 </param--> | |
706 </section> | |
707 </when> | |
708 <when value="StandardScaler"> | |
709 <expand macro="multitype_input"/> | |
710 <section name="options" title="Advanced Options" expanded="False"> | |
711 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
712 label="Use a copy of data for performing inplace scaling" help=" "/> | |
713 <param argument="with_mean" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
714 label="Center the data before scaling" help=" "/> | |
715 <param argument="with_std" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
716 label="Scale the data to unit variance (or unit standard deviation)" help=" "/> | |
717 </section> | |
718 </when> | |
719 <when value="MaxAbsScaler"> | |
720 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/> | |
721 <section name="options" title="Advanced Options" expanded="False"> | |
722 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
723 label="Use a copy of data for precomputing scaling" help=" "/> | |
724 </section> | |
725 </when> | |
726 <when value="Normalizer"> | |
727 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/> | |
728 <section name="options" title="Advanced Options" expanded="False"> | |
729 <param argument="norm" type="select" optional="true" label="The norm to use to normalize non zero samples" help=" "> | |
730 <option value="l1" selected="true">l1</option> | |
731 <option value="l2">l2</option> | |
732 <option value="max">max</option> | |
733 </param> | |
734 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" | |
735 label="Use a copy of data for precomputing row normalization" help=" "/> | |
736 </section> | |
737 </when> | |
738 <yield/> | |
739 </xml> | |
740 | |
741 <!-- Outputs --> | |
742 | |
743 <xml name="output"> | |
744 <outputs> | |
745 <data format="tabular" name="outfile_predict"> | |
746 <filter>selected_tasks['selected_task'] == 'load'</filter> | |
747 </data> | |
748 <data format="zip" name="outfile_fit"> | |
749 <filter>selected_tasks['selected_task'] == 'train'</filter> | |
750 </data> | |
751 </outputs> | |
752 </xml> | |
753 | |
754 | |
755 <!--Citations--> | |
756 <xml name="eden_citation"> | |
757 <citations> | |
758 <citation type="bibtex"> | |
759 @misc{fabrizio_costa_2015_15094, | |
760 author = {Fabrizio Costa and | |
761 Björn Grüning and | |
762 gigolo}, | |
763 title = {EDeN: EDeN - Graph Vectorizer}, | |
764 month = feb, | |
765 year = 2015, | |
766 doi = {10.5281/zenodo.15094}, | |
767 url = {http://dx.doi.org/10.5281/zenodo.15094} | |
768 } | |
769 } | |
770 </citation> | |
771 </citations> | |
772 </xml> | |
773 | |
774 <xml name="sklearn_citation"> | |
775 <citations> | |
776 <citation type="bibtex"> | |
777 @article{scikit-learn, | |
778 title={Scikit-learn: Machine Learning in {P}ython}, | |
779 author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. | |
780 and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. | |
781 and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and | |
782 Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, | |
783 journal={Journal of Machine Learning Research}, | |
784 volume={12}, | |
785 pages={2825--2830}, | |
786 year={2011} | |
787 url = {https://github.com/scikit-learn/scikit-learn} | |
788 } | |
789 </citation> | |
790 </citations> | |
791 </xml> | |
792 | |
793 <xml name="scipy_citation"> | |
794 <citations> | |
795 <citation type="bibtex"> | |
796 @Misc{, | |
797 author = {Eric Jones and Travis Oliphant and Pearu Peterson and others}, | |
798 title = {{SciPy}: Open source scientific tools for {Python}}, | |
799 year = {2001--}, | |
800 url = "http://www.scipy.org/", | |
801 note = {[Online; accessed 2016-04-09]} | |
802 } | |
803 </citation> | |
804 </citations> | |
805 </xml> | |
806 | |
807 </macros> |