comparison main_macros.xml @ 0:333507faecab draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2e1e78576b38110cf5b1f2ed83b08b9c3a6cbfee
author bgruening
date Sat, 28 Apr 2018 18:10:26 -0400
parents
children 02eadaaa4bf7
comparison
equal deleted inserted replaced
-1:000000000000 0:333507faecab
1 <macros>
2 <token name="@VERSION@">0.9</token>
3
4 <token name="@COLUMNS_FUNCTION@">
5 def read_columns(f, c, **args):
6 data = pandas.read_csv(f, **args)
7 cols = c.split (',')
8 cols = map(int, cols)
9 cols = list(map(lambda x: x - 1, cols))
10 y = data.iloc[:,cols].values
11 return y
12 </token>
13
14 <xml name="python_requirements">
15 <requirements>
16 <requirement type="package" version="2.7">python</requirement>
17 <requirement type="package" version="0.19.1">scikit-learn</requirement>
18 <requirement type="package" version="0.22.0">pandas</requirement>
19 <yield />
20 </requirements>
21 </xml>
22
23 <xml name="macro_stdio">
24 <stdio>
25 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error"/>
26 </stdio>
27 </xml>
28
29
30 <!--Generic interface-->
31 <xml name="train_loadConditional" token_train="tabular" token_data="tabular" token_model="txt">
32 <conditional name="selected_tasks">
33 <param name="selected_task" type="select" label="Select a Classification Task">
34 <option value="train" selected="true">Train a model</option>
35 <option value="load">Load a model and predict</option>
36 </param>
37 <when value="load">
38 <param name="infile_model" type="data" format="@MODEL@" label="Models" help="Select a model file."/>
39 <param name="infile_data" type="data" format="@DATA@" label="Data (tabular)" help="Select the dataset you want to classify."/>
40 <conditional name="prediction_options">
41 <param name="prediction_option" type="select" label="Select the type of prediction">
42 <option value="predict">Predict class labels</option>
43 <option value="advanced">Include advanced options</option>
44 </param>
45 <when value="predict">
46 </when>
47 <when value="advanced">
48 </when>
49 </conditional>
50 </when>
51 <when value="train">
52 <param name="infile_train" type="data" format="@TRAIN@" label="Training samples (tabular)"/>
53 <conditional name="selected_algorithms">
54 <yield />
55 </conditional>
56 </when>
57 </conditional>
58 </xml>
59
60 <xml name="sl_Conditional" token_train="tabular" token_data="tabular" token_model="txt">
61 <conditional name="selected_tasks">
62 <param name="selected_task" type="select" label="Select a Classification Task">
63 <option value="train" selected="true">Train a model</option>
64 <option value="load">Load a model and predict</option>
65 </param>
66 <when value="load">
67 <param name="infile_model" type="data" format="@MODEL@" label="Models" help="Select a model file."/>
68 <param name="infile_data" type="data" format="@DATA@" label="Data (tabular)" help="Select the dataset you want to classify."/>
69 <param name="header" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
70 <conditional name="prediction_options">
71 <param name="prediction_option" type="select" label="Select the type of prediction">
72 <option value="predict">Predict class labels</option>
73 <option value="advanced">Include advanced options</option>
74 </param>
75 <when value="predict">
76 </when>
77 <when value="advanced">
78 </when>
79 </conditional>
80 </when>
81 <when value="train">
82 <conditional name="selected_algorithms">
83 <yield />
84 </conditional>
85 </when>
86 </conditional>
87 </xml>
88
89 <xml name="advanced_section">
90 <section name="options" title="Advanced Options" expanded="False">
91 <yield />
92 </section>
93 </xml>
94
95
96 <!--Generalized Linear Models-->
97 <xml name="loss" token_help=" " token_select="false">
98 <param argument="loss" type="select" label="Loss function" help="@HELP@">
99 <option value="squared_loss" selected="@SELECT@">squared loss</option>
100 <option value="huber">huber</option>
101 <option value="epsilon_insensitive">epsilon insensitive</option>
102 <option value="squared_epsilon_insensitive">squared epsilon insensitive</option>
103 <yield/>
104 </param>
105 </xml>
106
107 <xml name="penalty" token_help=" ">
108 <param argument="penalty" type="select" label="Penalty (regularization term)" help="@HELP@">
109 <option value="l2" selected="true">l2</option>
110 <option value="l1">l1</option>
111 <option value="elasticnet">elastic net</option>
112 <option value="none">none</option>
113 <yield/>
114 </param>
115 </xml>
116
117 <xml name="l1_ratio" token_default_value="0.15" token_help=" ">
118 <param argument="l1_ratio" type="float" value="@DEFAULT_VALUE@" label="Elastic Net mixing parameter" help="@HELP@"/>
119 </xml>
120
121 <xml name="epsilon" token_default_value="0.1" token_help="Used if loss is ‘huber’, ‘epsilon_insensitive’, or ‘squared_epsilon_insensitive’. ">
122 <param argument="epsilon" type="float" value="@DEFAULT_VALUE@" label="Epsilon (epsilon-sensitive loss functions only)" help="@HELP@"/>
123 </xml>
124
125 <xml name="learning_rate_s" token_help=" " token_selected1="false" token_selected2="false">
126 <param argument="learning_rate" type="select" optional="true" label="Learning rate schedule" help="@HELP@">
127 <option value="optimal" selected="@SELECTED1@">optimal</option>
128 <option value="constant">constant</option>
129 <option value="invscaling" selected="@SELECTED2@">inverse scaling</option>
130 <yield/>
131 </param>
132 </xml>
133
134 <xml name="eta0" token_default_value="0.0" token_help="Used with ‘constant’ or ‘invscaling’ schedules. ">
135 <param argument="eta0" type="float" value="@DEFAULT_VALUE@" label="Initial learning rate" help="@HELP@"/>
136 </xml>
137
138 <xml name="power_t" token_default_value="0.5" token_help=" ">
139 <param argument="power_t" type="float" value="@DEFAULT_VALUE@" label="Exponent for inverse scaling learning rate" help="@HELP@"/>
140 </xml>
141
142 <xml name="normalize" token_checked="false" token_help=" ">
143 <param argument="normalize" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Normalize samples before training" help=" "/>
144 </xml>
145
146 <xml name="copy_X" token_checked="true" token_help=" ">
147 <param argument="copy_X" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use a copy of samples" help="If false, samples would be overwritten. "/>
148 </xml>
149
150 <xml name="ridge_params">
151 <expand macro="normalize"/>
152 <expand macro="alpha" default_value="1.0"/>
153 <expand macro="fit_intercept"/>
154 <expand macro="max_iter" default_value=""/>
155 <expand macro="tol" default_value="0.001" help_text="Precision of the solution. "/>
156 <!--class_weight-->
157 <expand macro="copy_X"/>
158 <param argument="solver" type="select" value="" label="Solver to use in the computational routines" help=" ">
159 <option value="auto" selected="true">auto</option>
160 <option value="svd">svd</option>
161 <option value="cholesky">cholesky</option>
162 <option value="lsqr">lsqr</option>
163 <option value="sparse_cg">sparse_cg</option>
164 <option value="sag">sag</option>
165 </param>
166 <expand macro="random_state"/>
167 </xml>
168
169 <!--Ensemble methods-->
170 <xml name="n_estimators" token_default_value="10" token_help=" ">
171 <param argument="n_estimators" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of trees in the forest" help="@HELP@"/>
172 </xml>
173
174 <xml name="max_depth" token_default_value="" token_help=" ">
175 <param argument="max_depth" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Maximum depth of the tree" help="@HELP@"/>
176 </xml>
177
178 <xml name="min_samples_split" token_type="integer" token_default_value="2" token_help=" ">
179 <param argument="min_samples_split" type="@TYPE@" optional="true" value="@DEFAULT_VALUE@" label="Minimum number of samples required to split an internal node" help="@HELP@"/>
180 </xml>
181
182 <xml name="min_samples_leaf" token_type="integer" token_default_value="1" token_label="Minimum number of samples in newly created leaves" token_help=" ">
183 <param argument="min_samples_leaf" type="@TYPE@" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP@"/>
184 </xml>
185
186 <xml name="min_weight_fraction_leaf" token_default_value="0.0" token_help=" ">
187 <param argument="min_weight_fraction_leaf" type="float" optional="true" value="@DEFAULT_VALUE@" label="Minimum weighted fraction of the input samples required to be at a leaf node" help="@HELP@"/>
188 </xml>
189
190 <xml name="max_leaf_nodes" token_default_value="" token_help=" ">
191 <param argument="max_leaf_nodes" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Maximum number of leaf nodes in best-first method" help="@HELP@"/>
192 </xml>
193
194 <xml name="min_impurity_decrease" token_default_value="0" token_help=" ">
195 <param argument="min_impurity_decrease" type="float" value="@DEFAULT_VALUE@" optional="true" label="The threshold value of impurity for stopping node splitting" help="@HELP@"/>
196 </xml>
197
198 <xml name="bootstrap" token_checked="true" token_help=" ">
199 <param argument="bootstrap" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Use bootstrap samples for building trees." help="@HELP@"/>
200 </xml>
201
202 <xml name="criterion" token_help=" ">
203 <param argument="criterion" type="select" label="Function to measure the quality of a split" help=" ">
204 <option value="gini" selected="true">Gini impurity</option>
205 <option value="entropy">Information gain</option>
206 <yield/>
207 </param>
208 </xml>
209
210 <xml name="criterion2" token_help="">
211 <param argument="criterion" type="select" label="Function to measure the quality of a split" >
212 <option value="mse">mse - mean squared error</option>
213 <option value="mae">mae - mean absolute error</option>
214 <yield/>
215 </param>
216 </xml>
217
218 <xml name="oob_score" token_checked="false" token_help=" ">
219 <param argument="oob_score" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use out-of-bag samples to estimate the generalization error" help="@HELP@"/>
220 </xml>
221
222 <xml name="max_features">
223 <conditional name="select_max_features">
224 <param argument="max_features" type="select" label="max_features">
225 <option value="auto" selected="true">auto - max_features=n_features</option>
226 <option value="sqrt">sqrt - max_features=sqrt(n_features)</option>
227 <option value="log2">log2 - max_features=log2(n_features)</option>
228 <option value="number_input">I want to type the number in or input None type</option>
229 </param>
230 <when value="auto">
231 </when>
232 <when value="sqrt">
233 </when>
234 <when value="log2">
235 </when>
236 <when value="number_input">
237 <param name="num_max_features" type="float" value="" optional="true" label="Input max_features number:" help="If int, consider the number of features at each split; If float, then max_features is a percentage and int(max_features * n_features) features are considered at each split."/>
238 </when>
239 </conditional>
240 </xml>
241
242 <xml name="verbose" token_default_value="0" token_help="If 1 then it prints progress and performance once in a while. If greater than 1 then it prints progress and performance for every tree.">
243 <param argument="verbose" type="integer" value="@DEFAULT_VALUE@" optional="true" label="Enable verbose output" help="@HELP@"/>
244 </xml>
245
246 <xml name="learning_rate" token_default_value="1.0" token_help=" ">
247 <param argument="learning_rate" type="float" optional="true" value="@DEFAULT_VALUE@" label="Learning rate" help="@HELP@"/>
248 </xml>
249
250 <xml name="subsample" token_help=" ">
251 <param argument="subsample" type="float" value="1.0" optional="true" label="The fraction of samples to be used for fitting the individual base learners" help="@HELP@"/>
252 </xml>
253
254 <xml name="presort">
255 <param argument="presort" type="select" label="Whether to presort the data to speed up the finding of best splits in fitting" >
256 <option value="auto" selected="true">auto</option>
257 <option value="true">true</option>
258 <option value="false">false</option>
259 </param>
260 </xml>
261
262 <!--Parameters-->
263 <xml name="tol" token_default_value="0.0" token_help_text="Early stopping heuristics based on the relative center changes. Set to default (0.0) to disable this convergence detection.">
264 <param argument="tol" type="float" optional="true" value="@DEFAULT_VALUE@" label="Tolerance" help="@HELP_TEXT@"/>
265 </xml>
266
267 <xml name="n_clusters" token_default_value="8">
268 <param argument="n_clusters" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of clusters" help=" "/>
269 </xml>
270
271 <xml name="fit_intercept" token_checked="true">
272 <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered."/>
273 </xml>
274
275 <xml name="n_jobs" token_default_value="1" token_label="The number of jobs to run in parallel for both fit and predict">
276 <param argument="n_jobs" type="integer" value="@DEFAULT_VALUE@" optional="true" label="@LABEL@" help="If -1, then the number of jobs is set to the number of cores"/>
277 </xml>
278
279 <xml name="n_iter" token_default_value="5" token_help_text="The number of passes over the training data (aka epochs). ">
280 <param argument="n_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of iterations" help="@HELP_TEXT@"/>
281 </xml>
282
283 <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration">
284 <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@"/>
285 </xml>
286
287 <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results.">
288 <param argument="random_state" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Random seed number" help="@HELP_TEXT@"/>
289 </xml>
290
291 <xml name="warm_start" token_checked="true" token_help_text="When set to True, reuse the solution of the previous call to fit as initialization,otherwise, just erase the previous solution.">
292 <param argument="warm_start" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Perform warm start" help="@HELP_TEXT@"/>
293 </xml>
294
295 <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term.">
296 <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@"/>
297 </xml>
298
299 <!--xml name="class_weight" token_default_value="" token_help_text="">
300 <param argument="class_weight" type="" optional="true" value="@DEFAULT_VALUE@" label="" help="@HELP_TEXT@"/>
301 </xml-->
302
303 <xml name="alpha" token_default_value="0.0001" token_help_text="Constant that multiplies the regularization term if regularization is used. ">
304 <param argument="alpha" type="float" optional="true" value="@DEFAULT_VALUE@" label="Regularization coefficient" help="@HELP_TEXT@"/>
305 </xml>
306
307 <xml name="n_samples" token_default_value="100" token_help_text="The total number of points equally divided among clusters.">
308 <param argument="n_samples" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of samples" help="@HELP_TEXT@"/>
309 </xml>
310
311 <xml name="n_features" token_default_value="2" token_help_text="Number of different numerical properties produced for each sample.">
312 <param argument="n_features" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of features" help="@HELP_TEXT@"/>
313 </xml>
314
315 <xml name="noise" token_default_value="0.0" token_help_text="Floating point number. ">
316 <param argument="noise" type="float" optional="true" value="@DEFAULT_VALUE@" label="Standard deviation of the Gaussian noise added to the data" help="@HELP_TEXT@"/>
317 </xml>
318
319 <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term. ">
320 <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@"/>
321 </xml>
322
323 <xml name="max_iter" token_default_value="300" token_label="Maximum number of iterations per single run" token_help_text=" ">
324 <param argument="max_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/>
325 </xml>
326
327 <xml name="n_init" token_default_value="10" >
328 <param argument="n_init" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of runs with different centroid seeds" help=" "/>
329 </xml>
330
331 <xml name="init">
332 <param argument="init" type="select" label="Centroid initialization method" help="''k-means++'' selects initial cluster centers that speed up convergence. ''random'' chooses k observations (rows) at random from data as initial centroids.">
333 <option value="k-means++">k-means++</option>
334 <option value="random">random</option>
335 </param>
336 </xml>
337
338 <xml name="gamma" token_default_value="1.0" token_label="Scaling parameter" token_help_text=" ">
339 <param argument="gamma" type="float" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/>
340 </xml>
341
342 <xml name="degree" token_default_value="3" token_label="Degree of the polynomial" token_help_text=" ">
343 <param argument="degree" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/>
344 </xml>
345
346 <xml name="coef0" token_default_value="1" token_label="Zero coefficient" token_help_text=" ">
347 <param argument="coef0" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/>
348 </xml>
349
350 <xml name="pos_label" token_default_value="">
351 <param argument="pos_label" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Label of the positive class" help=" "/>
352 </xml>
353
354 <xml name="average">
355 <param argument="average" type="select" optional="true" label="Averaging type" help=" ">
356 <option value="micro">Calculate metrics globally by counting the total true positives, false negatives and false positives. (micro)</option>
357 <option value="samples">Calculate metrics for each instance, and find their average. Only meaningful for multilabel. (samples)</option>
358 <option value="macro">Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. (macro)</option>
359 <option value="weighted">Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters ‘macro’ to account for label imbalance; it can result in an F-score that is not between precision and recall. (weighted)</option>
360 <option value="None">None</option>
361 <yield/>
362 </param>
363 </xml>
364
365 <xml name="beta">
366 <param argument="beta" type="float" value="1.0" label="The strength of recall versus precision in the F-score" help=" "/>
367 </xml>
368
369
370 <!--Data interface-->
371 <xml name="tabular_input">
372 <param name="infile" type="data" format="tabular" label="Data file with numeric values"/>
373 <param name="start_column" type="data_column" data_ref="infile" optional="True" label="Select a subset of data. Start column:" />
374 <param name="end_column" type="data_column" data_ref="infile" optional="True" label="End column:" />
375 </xml>
376
377 <xml name="sample_cols" token_label1="File containing true class labels:" token_label2="File containing predicted class labels:" token_multiple1="False" token_multiple2="False" token_format1="tabular" token_format2="tabular" token_help1="" token_help2="">
378 <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/>
379 <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select target column(s):"/>
380 <param name="infile2" type="data" format="@FORMAT2@" label="@LABEL2@" help="@HELP2@"/>
381 <param name="col2" multiple="@MULTIPLE2@" type="data_column" data_ref="infile2" label="Select target column(s):"/>
382 <yield/>
383 </xml>
384
385 <xml name="samples_tabular" token_multiple1="False" token_multiple2="False">
386 <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/>
387 <param name="header1" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
388 <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select target column(s):"/>
389 <param name="infile2" type="data" format="tabular" label="Dataset containing class labels:"/>
390 <param name="header2" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
391 <param name="col2" multiple="@MULTIPLE2@" type="data_column" data_ref="infile2" label="Select target column(s):"/>
392 <yield/>
393 </xml>
394
395 <xml name="clf_inputs_extended" token_label1=" " token_label2=" " token_multiple="False">
396 <conditional name="true_columns">
397 <param name="selected_input1" type="select" label="Select the input type of true labels dataset:">
398 <option value="tabular" selected="true">Tabular</option>
399 <option value="sparse">Sparse</option>
400 </param>
401 <when value="tabular">
402 <param name="infile1" type="data" label="@LABEL1@"/>
403 <param name="col1" type="data_column" data_ref="infile1" label="Select the target column:"/>
404 </when>
405 <when value="sparse">
406 <param name="infile1" type="data" format="txt" label="@LABEL1@"/>
407 </when>
408 </conditional>
409 <conditional name="predicted_columns">
410 <param name="selected_input2" type="select" label="Select the input type of predicted labels dataset:">
411 <option value="tabular" selected="true">Tabular</option>
412 <option value="sparse">Sparse</option>
413 </param>
414 <when value="tabular">
415 <param name="infile2" type="data" label="@LABEL2@"/>
416 <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/>
417 </when>
418 <when value="sparse">
419 <param name="infile2" type="data" format="txt" label="@LABEL1@"/>
420 </when>
421 </conditional>
422 </xml>
423
424 <xml name="clf_inputs" token_label1="Dataset containing true labels (tabular):" token_label2="Dataset containing predicted values (tabular):" token_multiple1="False" token_multiple="False">
425 <param name="infile1" type="data" format="tabular" label="@LABEL1@"/>
426 <param name="header1" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
427 <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select the target column:"/>
428 <param name="infile2" type="data" format="tabular" label="@LABEL2@"/>
429 <param name="header2" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
430 <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/>
431 </xml>
432
433 <xml name="multiple_input" token_name="input_files" token_max_num="10" token_format="txt" token_label="Sparse matrix file (.mtx, .txt)" token_help_text="Specify a sparse matrix file in .txt format.">
434 <repeat name="@NAME@" min="1" max="@MAX_NUM@" title="Select input file(s):">
435 <param name="input" type="data" format="@FORMAT@" label="@LABEL@" help="@HELP_TEXT@"/>
436 </repeat>
437 </xml>
438
439 <xml name="sparse_target" token_label1="Select a sparse matrix:" token_label2="Select the tabular containing true labels:" token_multiple="False" token_format1="txt" token_format2="tabular" token_help1="" token_help2="">
440 <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/>
441 <param name="infile2" type="data" format="@FORMAT2@" label="@LABEL2@" help="@HELP2@"/>
442 <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/>
443 </xml>
444
445 <xml name="sl_mixed_input">
446 <conditional name="input_options">
447 <param name="selected_input" type="select" label="Select input type:">
448 <option value="tabular" selected="true">tabular data</option>
449 <option value="sparse">sparse matrix</option>
450 </param>
451 <when value="tabular">
452 <expand macro="samples_tabular" multiple1="true"/>
453 </when>
454 <when value="sparse">
455 <expand macro="sparse_target"/>
456 </when>
457 </conditional>
458 </xml>
459
460 <xml name="multitype_input" token_format="tabular" token_help="All datasets with tabular format are supporetd.">
461 <param name="infile_transform" type="data" format="@FORMAT@" label="Select a dataset to transform:" help="@HELP@"/>
462 </xml>
463
464
465 <!--Advanced options-->
466 <xml name="nn_advanced_options">
467 <section name="options" title="Advanced Options" expanded="False">
468 <yield/>
469 <param argument="weights" type="select" label="Weight function" help="Used in prediction.">
470 <option value="uniform" selected="true">Uniform weights. All points in each neighborhood are weighted equally. (Uniform)</option>
471 <option value="distance">Weight points by the inverse of their distance. (Distance)</option>
472 </param>
473 <param argument="algorithm" type="select" label="Neighbor selection algorithm" help=" ">
474 <option value="auto" selected="true">Auto</option>
475 <option value="ball_tree">BallTree</option>
476 <option value="kd_tree">KDTree</option>
477 <option value="brute">Brute-force</option>
478 </param>
479 <param argument="leaf_size" type="integer" value="30" label="Leaf size" help="Used with BallTree and KDTree. Affects the time and memory usage of the constructed tree."/>
480 <!--param name="metric"-->
481 <!--param name="p"-->
482 <!--param name="metric_params"-->
483 </section>
484 </xml>
485
486 <xml name="svc_advanced_options">
487 <section name="options" title="Advanced Options" expanded="False">
488 <yield/>
489 <param argument="kernel" type="select" optional="true" label="Kernel type" help="Kernel type to be used in the algorithm. If none is given, ‘rbf’ will be used.">
490 <option value="rbf" selected="true">rbf</option>
491 <option value="linear">linear</option>
492 <option value="poly">poly</option>
493 <option value="sigmoid">sigmoid</option>
494 <option value="precomputed">precomputed</option>
495 </param>
496 <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/>
497 <!--TODO: param argument="gamma" float, optional (default=’auto’) -->
498 <param argument="coef0" type="float" optional="true" value="0.0" label="Zero coefficient (polynomial and sigmoid kernels only)"
499 help="Independent term in kernel function. dafault: 0.0 "/>
500 <param argument="shrinking" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
501 label="Use the shrinking heuristic" help=" "/>
502 <param argument="probability" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false"
503 label="Enable probability estimates. " help="This must be enabled prior to calling fit, and will slow down that method."/>
504 <!-- param argument="cache_size"-->
505 <!--expand macro="class_weight"/-->
506 <expand macro="tol" default_value="0.001" help_text="Tolerance for stopping criterion. "/>
507 <expand macro="max_iter" default_value="-1" label="Solver maximum number of iterations" help_text="Hard limit on iterations within solver, or -1 for no limit."/>
508 <!--param argument="decision_function_shape"-->
509 <expand macro="random_state" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data for probability estimation. A fixed seed allows reproducible results."/>
510 </section>
511 </xml>
512
513 <xml name="spectral_clustering_advanced_options">
514 <section name="options" title="Advanced Options" expanded="False">
515 <expand macro="n_clusters"/>
516 <param argument="eigen_solver" type="select" value="" label="Eigen solver" help="The eigenvalue decomposition strategy to use.">
517 <option value="arpack" selected="true">arpack</option>
518 <option value="lobpcg">lobpcg</option>
519 <option value="amg">amg</option>
520 <!--None-->
521 </param>
522 <expand macro="random_state"/>
523 <expand macro="n_init"/>
524 <param argument="gamma" type="float" optional="true" value="1.0" label="Kernel scaling factor" help="Scaling factor of RBF, polynomial, exponential chi^2 and sigmoid affinity kernel. Ignored for affinity=''nearest_neighbors''."/>
525 <param argument="affinity" type="select" label="Affinity" help="Affinity kernel to use. ">
526 <option value="rbf" selected="true">RBF</option>
527 <option value="precomputed">precomputed</option>
528 <option value="nearest_neighbors">Nearset neighbors</option>
529 </param>
530 <param argument="n_neighbors" type="integer" optional="true" value="10" label="Number of neighbors" help="Number of neighbors to use when constructing the affinity matrix using the nearest neighbors method. Ignored for affinity=''rbf''"/>
531 <!--param argument="eigen_tol"-->
532 <param argument="assign_labels" type="select" label="Assign labels" help="The strategy to use to assign labels in the embedding space.">
533 <option value="kmeans" selected="true">kmeans</option>
534 <option value="discretize">discretize</option>
535 </param>
536 <param argument="degree" type="integer" optional="true" value="3"
537 label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/>
538 <param argument="coef0" type="integer" optional="true" value="1"
539 label="Zero coefficient (polynomial and sigmoid kernels only)" help="Ignored by other kernels. dafault : 1 "/>
540 <!--param argument="kernel_params"-->
541 </section>
542 </xml>
543
544 <xml name="minibatch_kmeans_advanced_options">
545 <section name="options" title="Advanced Options" expanded="False">
546 <expand macro="n_clusters"/>
547 <expand macro="init"/>
548 <expand macro="n_init" default_value="3"/>
549 <expand macro="max_iter" default_value="100"/>
550 <expand macro="tol" help_text="Early stopping heuristics based on normalized center change. To disable set to 0.0 ."/>
551 <expand macro="random_state"/>
552 <param argument="batch_size" type="integer" optional="true" value="100" label="Batch size" help="Size of the mini batches."/>
553 <!--param argument="compute_labels"-->
554 <param argument="max_no_improvement" type="integer" optional="true" value="10" label="Maximum number of improvement attempts" help="
555 Convergence detection based on inertia (the consecutive number of mini batches that doe not yield an improvement on the smoothed inertia).
556 To disable, set max_no_improvement to None. "/>
557 <param argument="init_size" type="integer" optional="true" value="" label="Number of random initialization samples" help="Number of samples to randomly sample for speeding up the initialization . ( default: 3 * batch_size )"/>
558 <param argument="reassignment_ratio" type="float" optional="true" value="0.01" label="Re-assignment ratio" help="Controls the fraction of the maximum number of counts for a center to be reassigned. Higher values yield better clustering results."/>
559 </section>
560 </xml>
561
562 <xml name="kmeans_advanced_options">
563 <section name="options" title="Advanced Options" expanded="False">
564 <expand macro="n_clusters"/>
565 <expand macro="init"/>
566 <expand macro="n_init"/>
567 <expand macro="max_iter"/>
568 <expand macro="tol" default_value="0.0001" help_text="Relative tolerance with regards to inertia to declare convergence."/>
569 <!--param argument="precompute_distances"/-->
570 <expand macro="random_state"/>
571 <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean."/>
572 </section>
573 </xml>
574
575 <xml name="birch_advanced_options">
576 <section name="options" title="Advanced Options" expanded="False">
577 <param argument="threshold" type="float" optional="true" value="0.5" label="Subcluster radius threshold" help="The radius of the subcluster obtained by merging a new sample; the closest subcluster should be less than the threshold to avoid a new subcluster."/>
578 <param argument="branching_factor" type="integer" optional="true" value="50" label="Maximum number of subclusters per branch" help="Maximum number of CF subclusters in each node."/>
579 <expand macro="n_clusters" default_value="3"/>
580 <!--param argument="compute_labels"/-->
581 </section>
582 </xml>
583
584 <xml name="dbscan_advanced_options">
585 <section name="options" title="Advanced Options" expanded="False">
586 <param argument="eps" type="float" optional="true" value="0.5" label="Maximum neighborhood distance" help="The maximum distance between two samples for them to be considered as in the same neighborhood."/>
587 <param argument="min_samples" type="integer" optional="true" value="5" label="Minimal core point density" help="The number of samples (or total weight) in a neighborhood for a point (including the point itself) to be considered as a core point."/>
588 <param argument="metric" type="text" optional="true" value="euclidean" label="Metric" help="The metric to use when calculating distance between instances in a feature array."/>
589 <param argument="algorithm" type="select" label="Pointwise distance computation algorithm" help="The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors.">
590 <option value="auto" selected="true">auto</option>
591 <option value="ball_tree">ball_tree</option>
592 <option value="kd_tree">kd_tree</option>
593 <option value="brute">brute</option>
594 </param>
595 <param argument="leaf_size" type="integer" optional="true" value="30" label="Leaf size" help="Leaf size passed to BallTree or cKDTree. Memory and time efficieny factor in tree constrution and querying."/>
596 </section>
597 </xml>
598
599 <xml name="clustering_algorithms_options">
600 <conditional name="algorithm_options">
601 <param name="selected_algorithm" type="select" label="Clustering Algorithm">
602 <option value="KMeans" selected="true">KMeans</option>
603 <option value="SpectralClustering">Spectral Clustering</option>
604 <option value="MiniBatchKMeans">Mini Batch KMeans</option>
605 <option value="DBSCAN">DBSCAN</option>
606 <option value="Birch">Birch</option>
607 </param>
608 <when value="KMeans">
609 <expand macro="kmeans_advanced_options"/>
610 </when>
611 <when value="DBSCAN">
612 <expand macro="dbscan_advanced_options"/>
613 </when>
614 <when value="Birch">
615 <expand macro="birch_advanced_options"/>
616 </when>
617 <when value="SpectralClustering">
618 <expand macro="spectral_clustering_advanced_options"/>
619 </when>
620 <when value="MiniBatchKMeans">
621 <expand macro="minibatch_kmeans_advanced_options"/>
622 </when>
623 </conditional>
624 </xml>
625
626 <xml name="distance_metrics">
627 <param argument="metric" type="select" label="Distance metric" help=" ">
628 <option value="euclidean" selected="true">euclidean</option>
629 <option value="cityblock">cityblock</option>
630 <option value="cosine">cosine</option>
631 <option value="l1">l1</option>
632 <option value="l2">l2</option>
633 <option value="manhattan">manhattan</option>
634 <yield/>
635 </param>
636 </xml>
637
638 <xml name="distance_nonsparse_metrics">
639 <option value="braycurtis">braycurtis</option>
640 <option value="canberra">canberra</option>
641 <option value="chebyshev">chebyshev</option>
642 <option value="correlation">correlation</option>
643 <option value="dice">dice</option>
644 <option value="hamming">hamming</option>
645 <option value="jaccard">jaccard</option>
646 <option value="kulsinski">kulsinski</option>
647 <option value="mahalanobis">mahalanobis</option>
648 <option value="matching">matching</option>
649 <option value="minkowski">minkowski</option>
650 <option value="rogerstanimoto">rogerstanimoto</option>
651 <option value="russellrao">russellrao</option>
652 <option value="seuclidean">seuclidean</option>
653 <option value="sokalmichener">sokalmichener</option>
654 <option value="sokalsneath">sokalsneath</option>
655 <option value="sqeuclidean">sqeuclidean</option>
656 <option value="yule">yule</option>
657 </xml>
658
659 <xml name="pairwise_kernel_metrics">
660 <param argument="metric" type="select" label="Pirwise Kernel metric" help=" ">
661 <option value="rbf" selected="true">rbf</option>
662 <option value="sigmoid">sigmoid</option>
663 <option value="polynomial">polynomial</option>
664 <option value="linear" selected="true">linear</option>
665 <option value="chi2">chi2</option>
666 <option value="additive_chi2">additive_chi2</option>
667 </param>
668 </xml>
669
670 <xml name="sparse_pairwise_metric_functions">
671 <param name="selected_metric_function" type="select" label="Select the pairwise metric you want to compute:">
672 <option value="euclidean_distances" selected="true">Euclidean distance matrix</option>
673 <option value="pairwise_distances">Distance matrix</option>
674 <option value="pairwise_distances_argmin">Minimum distances between one point and a set of points</option>
675 <yield/>
676 </param>
677 </xml>
678
679 <xml name="pairwise_metric_functions">
680 <option value="additive_chi2_kernel" >Additive chi-squared kernel</option>
681 <option value="chi2_kernel">Exponential chi-squared kernel</option>
682 <option value="linear_kernel">Linear kernel</option>
683 <option value="manhattan_distances">L1 distances</option>
684 <option value="pairwise_kernels">Kernel</option>
685 <option value="polynomial_kernel">Polynomial kernel</option>
686 <option value="rbf_kernel">Gaussian (rbf) kernel</option>
687 <option value="laplacian_kernel">Laplacian kernel</option>
688 </xml>
689
690 <xml name="sparse_pairwise_condition">
691 <when value="pairwise_distances">
692 <section name="options" title="Advanced Options" expanded="False">
693 <expand macro="distance_metrics">
694 <yield/>
695 </expand>
696 </section>
697 </when>
698 <when value="euclidean_distances">
699 <section name="options" title="Advanced Options" expanded="False">
700 <param argument="squared" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false"
701 label="Return squared Euclidean distances" help=" "/>
702 </section>
703 </when>
704 </xml>
705
706 <xml name="argmin_distance_condition">
707 <when value="pairwise_distances_argmin">
708 <section name="options" title="Advanced Options" expanded="False">
709 <param argument="axis" type="integer" optional="true" value="1" label="Axis" help="Axis along which the argmin and distances are to be computed."/>
710 <expand macro="distance_metrics">
711 <yield/>
712 </expand>
713 <param argument="batch_size" type="integer" optional="true" value="500" label="Batch size" help="Number of rows to be processed in each batch run."/>
714 </section>
715 </when>
716 </xml>
717
718 <xml name="sparse_preprocessors">
719 <param name="selected_pre_processor" type="select" label="Select a preprocessor:">
720 <option value="StandardScaler" selected="true">Standard Scaler (Standardizes features by removing the mean and scaling to unit variance)</option>
721 <option value="Binarizer">Binarizer (Binarizes data)</option>
722 <option value="Imputer">Imputer (Completes missing values)</option>
723 <option value="MaxAbsScaler">Max Abs Scaler (Scales features by their maximum absolute value)</option>
724 <option value="Normalizer">Normalizer (Normalizes samples individually to unit norm)</option>
725 <yield/>
726 </param>
727 </xml>
728
729 <xml name="sparse_preprocessor_options">
730 <when value="Binarizer">
731 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/>
732 <section name="options" title="Advanced Options" expanded="False">
733 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
734 label="Use a copy of data for precomputing binarization" help=" "/>
735 <param argument="threshold" type="float" optional="true" value="0.0"
736 label="Threshold"
737 help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/>
738 </section>
739 </when>
740 <when value="Imputer">
741 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/>
742 <section name="options" title="Advanced Options" expanded="False">
743 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
744 label="Use a copy of data for precomputing imputation" help=" "/>
745 <param argument="strategy" type="select" optional="true" label="Imputation strategy" help=" ">
746 <option value="mean" selected="true">Replace missing values using the mean along the axis</option>
747 <option value="median">Replace missing values using the median along the axis</option>
748 <option value="most_frequent">Replace missing using the most frequent value along the axis</option>
749 </param>
750 <param argument="missing_values" type="text" optional="true" value="NaN"
751 label="Placeholder for missing values" help="For missing values encoded as numpy.nan, use the string value “NaN”"/>
752 <param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0"
753 label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/>
754 <!--param argument="axis" type="select" optional="true" label="The axis along which to impute" help=" ">
755 <option value="0" selected="true">Impute along columns</option>
756 <option value="1">Impute along rows</option>
757 </param-->
758 </section>
759 </when>
760 <when value="StandardScaler">
761 <expand macro="multitype_input"/>
762 <section name="options" title="Advanced Options" expanded="False">
763 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
764 label="Use a copy of data for performing inplace scaling" help=" "/>
765 <param argument="with_mean" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
766 label="Center the data before scaling" help=" "/>
767 <param argument="with_std" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
768 label="Scale the data to unit variance (or unit standard deviation)" help=" "/>
769 </section>
770 </when>
771 <when value="MaxAbsScaler">
772 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/>
773 <section name="options" title="Advanced Options" expanded="False">
774 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
775 label="Use a copy of data for precomputing scaling" help=" "/>
776 </section>
777 </when>
778 <when value="Normalizer">
779 <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/>
780 <section name="options" title="Advanced Options" expanded="False">
781 <param argument="norm" type="select" optional="true" label="The norm to use to normalize non zero samples" help=" ">
782 <option value="l1" selected="true">l1</option>
783 <option value="l2">l2</option>
784 <option value="max">max</option>
785 </param>
786 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
787 label="Use a copy of data for precomputing row normalization" help=" "/>
788 </section>
789 </when>
790 <yield/>
791 </xml>
792 <xml name="feature_selection_score_function">
793 <param argument="score_func" type="select" label="Select a score function">
794 <option value="chi2">chi2 - Compute chi-squared stats between each non-negative feature and class</option>
795 <option value="f_classif">f_classif - Compute the ANOVA F-value for the provided sample</option>
796 <option value="f_regression">f_regression - Univariate linear regression tests</option>
797 <option value="mutual_info_classif">mutual_info_classif - Estimate mutual information for a discrete target variable</option>
798 <option value="mutual_info_regression">mutual_info_regression - Estimate mutual information for a continuous target variable</option>
799 </param>
800 </xml>
801 <xml name="feature_selection_estimator">
802 <param argument="estimator" type="select" label="Select an estimator" help="The base estimator from which the transformer is built.">
803 <option value="svm.SVR(kernel=&quot;linear&quot;)">svm.SVR(kernel=&quot;linear&quot;)</option>
804 <option value="svm.SVC(kernel=&quot;linear&quot;)">svm.SVC(kernel=&quot;linear&quot;)</option>
805 <option value="svm.LinearSVC(penalty=&quot;l1&quot;, dual=False, tol=1e-3)">svm.LinearSVC(penalty=&quot;l1&quot;, dual=False, tol=1e-3)</option>
806 <option value="linear_model.LassoCV()">linear_model.LassoCV()</option>
807 <option value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)">ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)</option>
808 </param>
809 </xml>
810 <xml name="feature_selection_extra_estimator">
811 <param name="has_estimator" type="select" label="Does your estimator on the list above?">
812 <option value="yes">Yes, my estimator is on the list</option>
813 <option value="no">No, I need make a new estimator</option>
814 <yield/>
815 </param>
816 </xml>
817 <xml name="feature_selection_estimator_choices">
818 <when value="yes">
819 </when>
820 <when value="no">
821 <param name="new_estimator" type="text" value="" label="Make a new estimator" />
822 </when>
823 <yield/>
824 </xml>
825 <xml name="feature_selection_methods">
826 <conditional name="select_methods">
827 <param name="selected_method" type="select" label="Select an operation">
828 <option value="fit_transform">fit_transform - Fit to data, then transform it</option>
829 <option value="get_support">get_support - Get a mask, or integer index, of the features selected</option>
830 </param>
831 <when value="fit_transform">
832 <!--**fit_params-->
833 </when>
834 <when value="get_support">
835 <param name="indices" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Indices" help="If True, the return value will be an array of integers, rather than a boolean mask."/>
836 </when>
837 </conditional>
838 </xml>
839
840 <xml name="model_validation_common_options">
841 <param argument="cv" type="integer" value="" optional="true" label="cv" help="The number of folds in a (Stratified)KFold" />
842 <expand macro="n_jobs"/>
843 <expand macro="verbose"/>
844 <yield/>
845 </xml>
846
847 <xml name="scoring">
848 <param argument="scoring" type="text" value="" optional="true" label="scoring" help="A metric used to evaluate the estimator"/>
849 </xml>
850
851 <xml name="pre_dispatch">
852 <param argument="pre_dispatch" type="text" value="all" optional="true" label="pre_dispatch" help="Number of predispatched jobs for parallel execution"/>
853 </xml>
854
855 <!-- Outputs -->
856
857 <xml name="output">
858 <outputs>
859 <data format="tabular" name="outfile_predict">
860 <filter>selected_tasks['selected_task'] == 'load'</filter>
861 </data>
862 <data format="zip" name="outfile_fit">
863 <filter>selected_tasks['selected_task'] == 'train'</filter>
864 </data>
865 </outputs>
866 </xml>
867
868
869 <!--Citations-->
870 <xml name="eden_citation">
871 <citations>
872 <citation type="doi">10.5281/zenodo.15094</citation>
873 </citations>
874 </xml>
875
876 <xml name="sklearn_citation">
877 <citations>
878 <citation type="bibtex">
879 @article{scikit-learn,
880 title={Scikit-learn: Machine Learning in {P}ython},
881 author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
882 and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
883 and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
884 Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
885 journal={Journal of Machine Learning Research},
886 volume={12},
887 pages={2825--2830},
888 year={2011}
889 url = {https://github.com/scikit-learn/scikit-learn}
890 }
891 </citation>
892 </citations>
893 </xml>
894
895 <xml name="scipy_citation">
896 <citations>
897 <citation type="bibtex">
898 @Misc{,
899 author = {Eric Jones and Travis Oliphant and Pearu Peterson and others},
900 title = {{SciPy}: Open source scientific tools for {Python}},
901 year = {2001--},
902 url = "http://www.scipy.org/",
903 note = {[Online; accessed 2016-04-09]}
904 }
905 </citation>
906 </citations>
907 </xml>
908
909 </macros>