comparison main_macros.xml @ 40:06d772036a62 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author bgruening
date Wed, 09 Aug 2023 13:11:48 +0000
parents 7dd3fb35904f
children 838ca001438d
comparison
equal deleted inserted replaced
39:7dd3fb35904f 40:06d772036a62
1 <macros> 1 <macros>
2 <token name="@VERSION@">1.0.8.4</token> 2 <token name="@VERSION@">1.0.10.0</token>
3 <token name="@PROFILE@">21.05</token>
3 4
4 <xml name="python_requirements"> 5 <xml name="python_requirements">
5 <requirements> 6 <requirements>
6 <requirement type="package" version="0.8.3">Galaxy-ML</requirement> 7 <requirement type="package" version="3.9">python</requirement>
8 <requirement type="package" version="0.10.0">galaxy-ml</requirement>
7 <yield /> 9 <yield />
8 </requirements> 10 </requirements>
9 </xml> 11 </xml>
10 12
11 <xml name="macro_stdio"> 13 <xml name="macro_stdio">
12 <stdio> 14 <stdio>
13 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" /> 15 <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error" />
14 </stdio> 16 <exit_code range="137" level="fatal_oom" description="Out of Memory" />
15 </xml> 17 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" />
16 18 </stdio>
17 19 </xml>
20
18 <!--Generic interface--> 21 <!--Generic interface-->
19 22
20 <xml name="sl_Conditional" token_train="tabular" token_data="tabular" token_model="txt"> 23 <xml name="sl_Conditional" token_train="tabular" token_data="tabular" token_model="txt">
21 <conditional name="selected_tasks"> 24 <conditional name="selected_tasks">
22 <param name="selected_task" type="select" label="Select a Classification Task"> 25 <param name="selected_task" type="select" label="Select a Classification Task">
23 <option value="train" selected="true">Train a model</option> 26 <option value="train" selected="true">Train a model</option>
24 <option value="load">Load a model and predict</option> 27 <option value="load">Load a model and predict</option>
25 </param> 28 </param>
26 <when value="load"> 29 <when value="load">
27 <param name="infile_model" type="data" format="@MODEL@" label="Models" help="Select a model file." /> 30 <param name="infile_model" type="data" format="@MODEL@" label="Models" help="Select a model file." />
28 <param name="infile_data" type="data" format="@DATA@" label="Data (tabular)" help="Select the dataset you want to classify." /> 31 <param name="infile_data" type="data" format="@DATA@" label="Data (tabular)" help="Select the dataset you want to classify." />
29 <param name="header" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> 32 <param name="header" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
30 <conditional name="prediction_options"> 33 <conditional name="prediction_options">
31 <param name="prediction_option" type="select" label="Select the type of prediction"> 34 <param name="prediction_option" type="select" label="Select the type of prediction">
32 <option value="predict">Predict class labels</option> 35 <option value="predict">Predict class labels</option>
33 <option value="advanced">Include advanced options</option> 36 <option value="advanced">Include advanced options</option>
34 </param> 37 </param>
35 <when value="predict"> 38 <when value="predict">
36 </when> 39 </when>
37 <when value="advanced"> 40 <when value="advanced">
38 </when> 41 </when>
39 </conditional>
40 </when>
41 <when value="train">
42 <conditional name="selected_algorithms">
43 <yield />
44 </conditional>
45 </when>
46 </conditional> 42 </conditional>
47 </xml> 43 </when>
48 44 <when value="train">
49 <xml name="advanced_section"> 45 <conditional name="selected_algorithms">
50 <section name="options" title="Advanced Options" expanded="False"> 46 <yield />
51 <yield /> 47 </conditional>
52 </section> 48 </when>
53 </xml> 49 </conditional>
54 50 </xml>
55 51
56 <!--Generalized Linear Models--> 52 <xml name="advanced_section">
57 <xml name="loss" token_help=" " token_select="false"> 53 <section name="options" title="Advanced Options" expanded="False">
58 <param argument="loss" type="select" label="Loss function" help="@HELP@"> 54 <yield />
59 <option value="squared_loss" selected="@SELECT@">squared loss</option> 55 </section>
60 <option value="huber">huber</option> 56 </xml>
61 <option value="epsilon_insensitive">epsilon insensitive</option> 57
62 <option value="squared_epsilon_insensitive">squared epsilon insensitive</option> 58
63 <yield /> 59 <!--Generalized Linear Models-->
64 </param> 60 <xml name="loss" token_help=" " token_select="false">
65 </xml> 61 <param argument="loss" type="select" label="Loss function" help="@HELP@">
66 62 <option value="squared_loss" selected="@SELECT@">squared loss</option>
67 <xml name="penalty" token_help=" "> 63 <option value="huber">huber</option>
68 <param argument="penalty" type="select" label="Penalty (regularization term)" help="@HELP@"> 64 <option value="epsilon_insensitive">epsilon insensitive</option>
69 <option value="l2" selected="true">l2</option> 65 <option value="squared_epsilon_insensitive">squared epsilon insensitive</option>
70 <option value="l1">l1</option> 66 <yield />
71 <option value="elasticnet">elastic net</option> 67 </param>
72 <option value="none">none</option> 68 </xml>
73 <yield /> 69
74 </param> 70 <xml name="penalty" token_help=" ">
75 </xml> 71 <param argument="penalty" type="select" label="Penalty (regularization term)" help="@HELP@">
76 72 <option value="l2" selected="true">l2</option>
77 <xml name="l1_ratio" token_default_value="0.15" token_help=" "> 73 <option value="l1">l1</option>
78 <param argument="l1_ratio" type="float" value="@DEFAULT_VALUE@" label="Elastic Net mixing parameter" help="@HELP@" /> 74 <option value="elasticnet">elastic net</option>
79 </xml> 75 <option value="none">none</option>
80 76 <yield />
81 <xml name="epsilon" token_default_value="0.1" token_help="Used if loss is ‘huber’, ‘epsilon_insensitive’, or ‘squared_epsilon_insensitive’. "> 77 </param>
82 <param argument="epsilon" type="float" value="@DEFAULT_VALUE@" label="Epsilon (epsilon-sensitive loss functions only)" help="@HELP@" /> 78 </xml>
83 </xml> 79
84 80 <xml name="l1_ratio" token_default_value="0.15" token_help=" ">
85 <xml name="learning_rate_s" token_help=" " token_selected1="false" token_selected2="false"> 81 <param argument="l1_ratio" type="float" value="@DEFAULT_VALUE@" label="Elastic Net mixing parameter" help="@HELP@" />
86 <param argument="learning_rate" type="select" optional="true" label="Learning rate schedule" help="@HELP@"> 82 </xml>
87 <option value="optimal" selected="@SELECTED1@">optimal</option> 83
88 <option value="constant">constant</option> 84 <xml name="epsilon" token_default_value="0.1" token_help="Used if loss is ‘huber’, ‘epsilon_insensitive’, or ‘squared_epsilon_insensitive’. ">
89 <option value="invscaling" selected="@SELECTED2@">inverse scaling</option> 85 <param argument="epsilon" type="float" value="@DEFAULT_VALUE@" label="Epsilon (epsilon-sensitive loss functions only)" help="@HELP@" />
90 <yield /> 86 </xml>
91 </param> 87
92 </xml> 88 <xml name="learning_rate_s" token_help=" " token_selected1="false" token_selected2="false">
93 89 <param argument="learning_rate" type="select" optional="true" label="Learning rate schedule" help="@HELP@">
94 <xml name="eta0" token_default_value="0.0" token_help="Used with ‘constant’ or ‘invscaling’ schedules. "> 90 <option value="optimal" selected="@SELECTED1@">optimal</option>
95 <param argument="eta0" type="float" value="@DEFAULT_VALUE@" label="Initial learning rate" help="@HELP@" /> 91 <option value="constant">constant</option>
96 </xml> 92 <option value="invscaling" selected="@SELECTED2@">inverse scaling</option>
97 93 <yield />
98 <xml name="power_t" token_default_value="0.5" token_help=" "> 94 </param>
99 <param argument="power_t" type="float" value="@DEFAULT_VALUE@" label="Exponent for inverse scaling learning rate" help="@HELP@" /> 95 </xml>
100 </xml> 96
101 97 <xml name="eta0" token_default_value="0.0" token_help="Used with ‘constant’ or ‘invscaling’ schedules. ">
102 <xml name="normalize" token_checked="false" token_help=" "> 98 <param argument="eta0" type="float" value="@DEFAULT_VALUE@" label="Initial learning rate" help="@HELP@" />
103 <param argument="normalize" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Normalize samples before training" help=" " /> 99 </xml>
104 </xml> 100
105 101 <xml name="power_t" token_default_value="0.5" token_help=" ">
106 <xml name="copy_X" token_checked="true" token_help=" "> 102 <param argument="power_t" type="float" value="@DEFAULT_VALUE@" label="Exponent for inverse scaling learning rate" help="@HELP@" />
107 <param argument="copy_X" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use a copy of samples" help="If false, samples would be overwritten. " /> 103 </xml>
108 </xml> 104
109 105 <xml name="normalize" token_checked="false" token_help=" ">
110 <xml name="ridge_params"> 106 <param argument="normalize" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Normalize samples before training" help=" " />
111 <expand macro="normalize" /> 107 </xml>
112 <expand macro="alpha" default_value="1.0" /> 108
113 <expand macro="fit_intercept" /> 109 <xml name="copy_X" token_checked="true" token_help=" ">
114 <expand macro="max_iter" default_value="" /> 110 <param argument="copy_X" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use a copy of samples" help="If false, samples would be overwritten. " />
115 <expand macro="tol" default_value="0.001" help_text="Precision of the solution. " /> 111 </xml>
116 <!--class_weight--> 112
117 <expand macro="copy_X" /> 113 <xml name="ridge_params">
118 <param argument="solver" type="select" value="" label="Solver to use in the computational routines" help=" "> 114 <expand macro="normalize" />
119 <option value="auto" selected="true">auto</option> 115 <expand macro="alpha" default_value="1.0" />
120 <option value="svd">svd</option> 116 <expand macro="fit_intercept" />
121 <option value="cholesky">cholesky</option> 117 <expand macro="max_iter" default_value="" />
122 <option value="lsqr">lsqr</option> 118 <expand macro="tol" default_value="0.001" help_text="Precision of the solution. " />
123 <option value="sparse_cg">sparse_cg</option> 119 <!--class_weight-->
124 <option value="sag">sag</option> 120 <expand macro="copy_X" />
125 </param> 121 <param argument="solver" type="select" value="" label="Solver to use in the computational routines" help=" ">
126 <expand macro="random_state" /> 122 <option value="auto" selected="true">auto</option>
127 </xml> 123 <option value="svd">svd</option>
128 124 <option value="cholesky">cholesky</option>
129 <!--Ensemble methods--> 125 <option value="lsqr">lsqr</option>
130 <xml name="n_estimators" token_default_value="10" token_help=" "> 126 <option value="sparse_cg">sparse_cg</option>
131 <param argument="n_estimators" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of trees in the forest" help="@HELP@" /> 127 <option value="sag">sag</option>
132 </xml> 128 </param>
133 129 <expand macro="random_state" />
134 <xml name="max_depth" token_default_value="" token_help=" "> 130 </xml>
135 <param argument="max_depth" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Maximum depth of the tree" help="@HELP@" /> 131
136 </xml> 132 <!--Ensemble methods-->
137 133 <xml name="n_estimators" token_default_value="10" token_help=" ">
138 <xml name="min_samples_split" token_type="integer" token_default_value="2" token_help=" "> 134 <param argument="n_estimators" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of trees in the forest" help="@HELP@" />
139 <param argument="min_samples_split" type="@TYPE@" optional="true" value="@DEFAULT_VALUE@" label="Minimum number of samples required to split an internal node" help="@HELP@" /> 135 </xml>
140 </xml> 136
141 137 <xml name="max_depth" token_default_value="" token_help=" ">
142 <xml name="min_samples_leaf" token_type="integer" token_default_value="1" token_label="Minimum number of samples in newly created leaves" token_help=" "> 138 <param argument="max_depth" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Maximum depth of the tree" help="@HELP@" />
143 <param argument="min_samples_leaf" type="@TYPE@" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP@" /> 139 </xml>
144 </xml> 140
145 141 <xml name="min_samples_split" token_type="integer" token_default_value="2" token_help=" ">
146 <xml name="min_weight_fraction_leaf" token_default_value="0.0" token_help=" "> 142 <param argument="min_samples_split" type="@TYPE@" optional="true" value="@DEFAULT_VALUE@" label="Minimum number of samples required to split an internal node" help="@HELP@" />
147 <param argument="min_weight_fraction_leaf" type="float" optional="true" value="@DEFAULT_VALUE@" label="Minimum weighted fraction of the input samples required to be at a leaf node" help="@HELP@" /> 143 </xml>
148 </xml> 144
149 145 <xml name="min_samples_leaf" token_type="integer" token_default_value="1" token_label="Minimum number of samples in newly created leaves" token_help=" ">
150 <xml name="max_leaf_nodes" token_default_value="" token_help=" "> 146 <param argument="min_samples_leaf" type="@TYPE@" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP@" />
151 <param argument="max_leaf_nodes" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Maximum number of leaf nodes in best-first method" help="@HELP@" /> 147 </xml>
152 </xml> 148
153 149 <xml name="min_weight_fraction_leaf" token_default_value="0.0" token_help=" ">
154 <xml name="min_impurity_decrease" token_default_value="0" token_help=" "> 150 <param argument="min_weight_fraction_leaf" type="float" optional="true" value="@DEFAULT_VALUE@" label="Minimum weighted fraction of the input samples required to be at a leaf node" help="@HELP@" />
155 <param argument="min_impurity_decrease" type="float" value="@DEFAULT_VALUE@" optional="true" label="The threshold value of impurity for stopping node splitting" help="@HELP@" /> 151 </xml>
156 </xml> 152
157 153 <xml name="max_leaf_nodes" token_default_value="" token_help=" ">
158 <xml name="bootstrap" token_checked="true" token_help=" "> 154 <param argument="max_leaf_nodes" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Maximum number of leaf nodes in best-first method" help="@HELP@" />
159 <param argument="bootstrap" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Use bootstrap samples for building trees." help="@HELP@" /> 155 </xml>
160 </xml> 156
161 157 <xml name="min_impurity_decrease" token_default_value="0" token_help=" ">
162 <xml name="criterion" token_help=" "> 158 <param argument="min_impurity_decrease" type="float" value="@DEFAULT_VALUE@" optional="true" label="The threshold value of impurity for stopping node splitting" help="@HELP@" />
163 <param argument="criterion" type="select" label="Function to measure the quality of a split" help=" "> 159 </xml>
164 <option value="gini" selected="true">Gini impurity</option> 160
165 <option value="entropy">Information gain</option> 161 <xml name="bootstrap" token_checked="true" token_help=" ">
166 <yield /> 162 <param argument="bootstrap" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Use bootstrap samples for building trees." help="@HELP@" />
167 </param> 163 </xml>
168 </xml> 164
169 165 <xml name="criterion" token_help=" ">
170 <xml name="criterion2" token_help=""> 166 <param argument="criterion" type="select" label="Function to measure the quality of a split" help=" ">
171 <param argument="criterion" type="select" label="Function to measure the quality of a split"> 167 <option value="gini" selected="true">Gini impurity</option>
172 <option value="mse">mse - mean squared error</option> 168 <option value="entropy">Information gain</option>
173 <option value="mae">mae - mean absolute error</option> 169 <yield />
174 <yield /> 170 </param>
175 </param> 171 </xml>
176 </xml> 172
177 173 <xml name="criterion2" token_help="">
178 <xml name="oob_score" token_checked="false" token_help=" "> 174 <param argument="criterion" type="select" label="Function to measure the quality of a split" >
179 <param argument="oob_score" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use out-of-bag samples to estimate the generalization error" help="@HELP@" /> 175 <option value="mse">mse - mean squared error</option>
180 </xml> 176 <option value="mae">mae - mean absolute error</option>
181 177 <yield />
182 <xml name="max_features"> 178 </param>
183 <conditional name="select_max_features"> 179 </xml>
184 <param argument="max_features" type="select" label="max_features"> 180
185 <option value="auto" selected="true">auto - max_features=n_features</option> 181 <xml name="oob_score" token_checked="false" token_help=" ">
186 <option value="sqrt">sqrt - max_features=sqrt(n_features)</option> 182 <param argument="oob_score" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use out-of-bag samples to estimate the generalization error" help="@HELP@" />
187 <option value="log2">log2 - max_features=log2(n_features)</option> 183 </xml>
188 <option value="number_input">I want to type the number in or input None type</option> 184
189 </param> 185 <xml name="max_features">
190 <when value="auto"> 186 <conditional name="select_max_features">
191 </when> 187 <param argument="max_features" type="select" label="max_features">
192 <when value="sqrt"> 188 <option value="auto" selected="true">auto - max_features=n_features</option>
193 </when> 189 <option value="sqrt">sqrt - max_features=sqrt(n_features)</option>
194 <when value="log2"> 190 <option value="log2">log2 - max_features=log2(n_features)</option>
195 </when> 191 <option value="number_input">I want to type the number in or input None type</option>
196 <when value="number_input"> 192 </param>
197 <param name="num_max_features" type="float" value="" optional="true" label="Input max_features number:" help="If int, consider the number of features at each split; If float, then max_features is a percentage and int(max_features * n_features) features are considered at each split." /> 193 <when value="auto">
198 </when> 194 </when>
199 </conditional> 195 <when value="sqrt">
200 </xml> 196 </when>
201 197 <when value="log2">
202 <xml name="verbose" token_default_value="0" token_help="If 1 then it prints progress and performance once in a while. If greater than 1 then it prints progress and performance for every tree."> 198 </when>
203 <param argument="verbose" type="integer" value="@DEFAULT_VALUE@" optional="true" label="Enable verbose output" help="@HELP@" /> 199 <when value="number_input">
204 </xml> 200 <param name="num_max_features" type="float" value="" optional="true" label="Input max_features number:" help="If int, consider the number of features at each split; If float, then max_features is a percentage and int(max_features * n_features) features are considered at each split." />
205 201 </when>
206 <xml name="learning_rate" token_default_value="1.0" token_help=" "> 202 </conditional>
207 <param argument="learning_rate" type="float" optional="true" value="@DEFAULT_VALUE@" label="Learning rate" help="@HELP@" /> 203 </xml>
208 </xml> 204
209 205 <xml name="verbose" token_default_value="0" token_help="If 1 then it prints progress and performance once in a while. If greater than 1 then it prints progress and performance for every tree.">
210 <xml name="subsample" token_help=" "> 206 <param argument="verbose" type="integer" value="@DEFAULT_VALUE@" optional="true" label="Enable verbose output" help="@HELP@" />
211 <param argument="subsample" type="float" value="1.0" optional="true" label="The fraction of samples to be used for fitting the individual base learners" help="@HELP@" /> 207 </xml>
212 </xml> 208
213 209 <xml name="learning_rate" token_default_value="1.0" token_help=" ">
214 <xml name="presort"> 210 <param argument="learning_rate" type="float" optional="true" value="@DEFAULT_VALUE@" label="Learning rate" help="@HELP@" />
215 <param argument="presort" type="select" label="Whether to presort the data to speed up the finding of best splits in fitting"> 211 </xml>
216 <option value="auto" selected="true">auto</option> 212
217 <option value="true">true</option> 213 <xml name="subsample" token_help=" ">
218 <option value="false">false</option> 214 <param argument="subsample" type="float" value="1.0" optional="true" label="The fraction of samples to be used for fitting the individual base learners" help="@HELP@" />
219 </param> 215 </xml>
220 </xml> 216
221 217 <xml name="presort">
222 <!-- LightGBM --> 218 <param argument="presort" type="select" label="Whether to presort the data to speed up the finding of best splits in fitting" >
219 <option value="auto" selected="true">auto</option>
220 <option value="true">true</option>
221 <option value="false">false</option>
222 </param>
223 </xml>
224
225 <!-- LightGBM -->
223 <xml name="feature_fraction" token_help="LightGBM will randomly select part of the features for each iteration (tree) if feature_fraction is smaller than 1.0. For example, if you set it to 0.8, LightGBM will select 80% of features before training each tree."> 226 <xml name="feature_fraction" token_help="LightGBM will randomly select part of the features for each iteration (tree) if feature_fraction is smaller than 1.0. For example, if you set it to 0.8, LightGBM will select 80% of features before training each tree.">
224 <param argument="feature_fraction" type="float" value="1.0" label="Proportion of features to train each tree" help="@HELP@" /> 227 <param argument="feature_fraction" type="float" value="1.0" label="Proportion of features to train each tree" help="@HELP@" />
225 </xml> 228 </xml>
226 229
227 <xml name="lambda_l1" token_help=" "> 230 <xml name="lambda_l1" token_help=" ">
238 241
239 <xml name="min_child_weight" token_help="Minimal sum hessian in one leaf. It can be used to deal with over-fitting."> 242 <xml name="min_child_weight" token_help="Minimal sum hessian in one leaf. It can be used to deal with over-fitting.">
240 <param argument="min_child_weight" type="float" value="0.0" label="Minimal sum hessian in one leaf" help="@HELP@" /> 243 <param argument="min_child_weight" type="float" value="0.0" label="Minimal sum hessian in one leaf" help="@HELP@" />
241 </xml> 244 </xml>
242 245
243 246 <!--Parameters-->
244 <!--Parameters--> 247 <xml name="tol" token_default_value="0.0" token_help_text="Early stopping heuristics based on the relative center changes. Set to default (0.0) to disable this convergence detection.">
245 <xml name="tol" token_default_value="0.0" token_help_text="Early stopping heuristics based on the relative center changes. Set to default (0.0) to disable this convergence detection.">
246 <param argument="tol" type="float" optional="true" value="@DEFAULT_VALUE@" label="Tolerance" help="@HELP_TEXT@" /> 248 <param argument="tol" type="float" optional="true" value="@DEFAULT_VALUE@" label="Tolerance" help="@HELP_TEXT@" />
247 </xml> 249 </xml>
248 250
249 <xml name="n_clusters" token_default_value="8"> 251 <xml name="n_clusters" token_default_value="8">
250 <param argument="n_clusters" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of clusters" help=" " /> 252 <param argument="n_clusters" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of clusters" help=" " />
251 </xml> 253 </xml>
252 254
253 <xml name="fit_intercept" token_checked="true"> 255 <xml name="fit_intercept" token_checked="true">
254 <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered." /> 256 <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered." />
255 </xml> 257 </xml>
256 258
257 <xml name="n_iter_no_change" token_default_value="5" token_help_text="Number of iterations with no improvement to wait before early stopping. "> 259 <xml name="n_iter_no_change" token_default_value="5" token_help_text="Number of iterations with no improvement to wait before early stopping. ">
258 <param argument="n_iter_no_change" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of iterations" help="@HELP_TEXT@" /> 260 <param argument="n_iter_no_change" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of iterations" help="@HELP_TEXT@" />
259 </xml> 261 </xml>
260 262
261 <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration"> 263 <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration">
262 <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@" /> 264 <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@" />
263 </xml> 265 </xml>
264 266
265 <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results. default=None."> 267 <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results. default=None.">
266 <param argument="random_state" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Random seed number" help="@HELP_TEXT@" /> 268 <param argument="random_state" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Random seed number" help="@HELP_TEXT@" />
267 </xml> 269 </xml>
268 270
269 <xml name="warm_start" token_checked="true" token_help_text="When set to True, reuse the solution of the previous call to fit as initialization,otherwise, just erase the previous solution."> 271 <xml name="warm_start" token_checked="true" token_help_text="When set to True, reuse the solution of the previous call to fit as initialization,otherwise, just erase the previous solution.">
270 <param argument="warm_start" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Perform warm start" help="@HELP_TEXT@" /> 272 <param argument="warm_start" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Perform warm start" help="@HELP_TEXT@" />
271 </xml> 273 </xml>
272 274
273 <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term."> 275 <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term.">
274 <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@" /> 276 <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@" />
275 </xml> 277 </xml>
276 278
277 <!--xml name="class_weight" token_default_value="" token_help_text=""> 279 <!--xml name="class_weight" token_default_value="" token_help_text="">
278 <param argument="class_weight" type="" optional="true" value="@DEFAULT_VALUE@" label="" help="@HELP_TEXT@"/> 280 <param argument="class_weight" type="" optional="true" value="@DEFAULT_VALUE@" label="" help="@HELP_TEXT@" />
279 </xml--> 281 </xml-->
280 282
281 <xml name="alpha" token_default_value="0.0001" token_help_text="Constant that multiplies the regularization term if regularization is used. "> 283 <xml name="alpha" token_default_value="0.0001" token_help_text="Constant that multiplies the regularization term if regularization is used. ">
282 <param argument="alpha" type="float" optional="true" value="@DEFAULT_VALUE@" label="Regularization coefficient" help="@HELP_TEXT@" /> 284 <param argument="alpha" type="float" optional="true" value="@DEFAULT_VALUE@" label="Regularization coefficient" help="@HELP_TEXT@" />
283 </xml> 285 </xml>
284 286
285 <xml name="n_samples" token_default_value="100" token_help_text="The total number of points equally divided among clusters."> 287 <xml name="n_samples" token_default_value="100" token_help_text="The total number of points equally divided among clusters.">
286 <param argument="n_samples" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of samples" help="@HELP_TEXT@" /> 288 <param argument="n_samples" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of samples" help="@HELP_TEXT@" />
287 </xml> 289 </xml>
288 290
289 <xml name="n_features" token_default_value="2" token_help_text="Number of different numerical properties produced for each sample."> 291 <xml name="n_features" token_default_value="2" token_help_text="Number of different numerical properties produced for each sample.">
290 <param argument="n_features" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of features" help="@HELP_TEXT@" /> 292 <param argument="n_features" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of features" help="@HELP_TEXT@" />
291 </xml> 293 </xml>
292 294
293 <xml name="noise" token_default_value="0.0" token_help_text="Floating point number. "> 295 <xml name="noise" token_default_value="0.0" token_help_text="Floating point number. ">
294 <param argument="noise" type="float" optional="true" value="@DEFAULT_VALUE@" label="Standard deviation of the Gaussian noise added to the data" help="@HELP_TEXT@" /> 296 <param argument="noise" type="float" optional="true" value="@DEFAULT_VALUE@" label="Standard deviation of the Gaussian noise added to the data" help="@HELP_TEXT@" />
295 </xml> 297 </xml>
296 298
297 <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term. "> 299 <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term. ">
298 <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@" /> 300 <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@" />
299 </xml> 301 </xml>
300 302
301 <xml name="max_iter" token_default_value="300" token_label="Maximum number of iterations per single run" token_help_text=" "> 303 <xml name="max_iter" token_default_value="300" token_label="Maximum number of iterations per single run" token_help_text=" ">
302 <param argument="max_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@" /> 304 <param argument="max_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@" />
303 </xml> 305 </xml>
304 306
305 <xml name="n_init" token_default_value="10"> 307 <xml name="n_init" token_default_value="10" >
306 <param argument="n_init" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of runs with different centroid seeds" help=" " /> 308 <param argument="n_init" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of runs with different centroid seeds" help=" " />
307 </xml> 309 </xml>
308 310
309 <xml name="init"> 311 <xml name="init">
310 <param argument="init" type="select" label="Centroid initialization method" help="''k-means++'' selects initial cluster centers that speed up convergence. ''random'' chooses k observations (rows) at random from data as initial centroids."> 312 <param argument="init" type="select" label="Centroid initialization method" help="''k-means++'' selects initial cluster centers that speed up convergence. ''random'' chooses k observations (rows) at random from data as initial centroids.">
311 <option value="k-means++">k-means++</option> 313 <option value="k-means++">k-means++</option>
312 <option value="random">random</option> 314 <option value="random">random</option>
315 </param>
316 </xml>
317
318 <xml name="gamma" token_default_value="1.0" token_label="Scaling parameter" token_help_text=" ">
319 <param argument="gamma" type="float" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@" />
320 </xml>
321
322 <xml name="degree" token_default_value="3" token_label="Degree of the polynomial" token_help_text=" ">
323 <param argument="degree" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@" />
324 </xml>
325
326 <xml name="coef0" token_default_value="1" token_label="Zero coefficient" token_help_text=" ">
327 <param argument="coef0" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@" />
328 </xml>
329
330 <xml name="pos_label" token_default_value="">
331 <param argument="pos_label" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Label of the positive class" help=" " />
332 </xml>
333
334 <xml name="average">
335 <param argument="average" type="select" optional="true" label="Averaging type" help=" ">
336 <option value="micro">Calculate metrics globally by counting the total true positives, false negatives and false positives. (micro)</option>
337 <option value="samples">Calculate metrics for each instance, and find their average. Only meaningful for multilabel. (samples)</option>
338 <option value="macro">Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. (macro)</option>
339 <option value="weighted">Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters ‘macro’ to account for label imbalance; it can result in an F-score that is not between precision and recall. (weighted)</option>
340 <option value="None">None</option>
341 <yield />
342 </param>
343 </xml>
344
345 <xml name="beta">
346 <param argument="beta" type="float" value="1.0" label="The strength of recall versus precision in the F-score" help=" " />
347 </xml>
348
349
350 <!--Data interface-->
351
352 <xml name="samples_tabular" token_label1="Training samples dataset:" token_multiple1="false" token_multiple2="false">
353 <param name="infile1" type="data" format="tabular" label="@LABEL1@" />
354 <param name="header1" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
355 <conditional name="column_selector_options_1">
356 <expand macro="samples_column_selector_options" multiple="@MULTIPLE1@" />
357 </conditional>
358 <param name="infile2" type="data" format="tabular" label="Dataset containing class labels or target values:" />
359 <param name="header2" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
360 <conditional name="column_selector_options_2">
361 <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="@MULTIPLE2@" infile="infile2" />
362 </conditional>
363 <yield />
364 </xml>
365
366 <xml name="samples_column_selector_options" token_column_option="selected_column_selector_option" token_col_name="col1" token_multiple="False" token_infile="infile1">
367 <param name="@COLUMN_OPTION@" type="select" label="Choose how to select data by column:">
368 <option value="by_index_number" selected="true">Select columns by column index number(s)</option>
369 <option value="all_but_by_index_number">All columns EXCLUDING some by column index number(s)</option>
370 <option value="by_header_name">Select columns by column header name(s)</option>
371 <option value="all_but_by_header_name">All columns EXCLUDING some by column header name(s)</option>
372 <option value="all_columns">All columns</option>
373 </param>
374 <when value="by_index_number">
375 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" use_header_names="true" data_ref="@INFILE@" label="Select target column(s):" />
376 </when>
377 <when value="all_but_by_index_number">
378 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" use_header_names="true" data_ref="@INFILE@" label="Select target column(s):" />
379 </when>
380 <when value="by_header_name">
381 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2" />
382 </when>
383 <when value="all_but_by_header_name">
384 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2" />
385 </when>
386 <when value="all_columns">
387 </when>
388 </xml>
389
390 <xml name="clf_inputs_extended" token_label1=" " token_label2=" " token_multiple="False">
391 <conditional name="true_columns">
392 <param name="selected_input1" type="select" label="Select the input type of true labels dataset:">
393 <option value="tabular" selected="true">Tabular</option>
394 <option value="sparse">Sparse</option>
395 </param>
396 <when value="tabular">
397 <param name="infile1" type="data" label="@LABEL1@" />
398 <param name="col1" type="data_column" data_ref="infile1" label="Select the target column:" />
399 </when>
400 <when value="sparse">
401 <param name="infile1" type="data" format="txt" label="@LABEL1@" />
402 </when>
403 </conditional>
404 <conditional name="predicted_columns">
405 <param name="selected_input2" type="select" label="Select the input type of predicted labels dataset:">
406 <option value="tabular" selected="true">Tabular</option>
407 <option value="sparse">Sparse</option>
408 </param>
409 <when value="tabular">
410 <param name="infile2" type="data" label="@LABEL2@" />
411 <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):" />
412 </when>
413 <when value="sparse">
414 <param name="infile2" type="data" format="txt" label="@LABEL1@" />
415 </when>
416 </conditional>
417 </xml>
418
419 <xml name="clf_inputs" token_label1="Dataset containing true labels (tabular):" token_label2="Dataset containing predicted values (tabular):" token_multiple1="False" token_multiple="False">
420 <param name="infile1" type="data" format="tabular" label="@LABEL1@" />
421 <param name="header1" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
422 <conditional name="column_selector_options_1">
423 <expand macro="samples_column_selector_options" multiple="@MULTIPLE1@" />
424 </conditional>
425 <param name="infile2" type="data" format="tabular" label="@LABEL2@" />
426 <param name="header2" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
427 <conditional name="column_selector_options_2">
428 <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="@MULTIPLE@" infile="infile2" />
429 </conditional>
430 </xml>
431
432 <xml name="multiple_input" token_name="input_files" token_max_num="10" token_format="txt" token_label="Sparse matrix file (.mtx, .txt)" token_help_text="Specify a sparse matrix file in .txt format.">
433 <repeat name="@NAME@" min="1" max="@MAX_NUM@" title="Select input file(s):">
434 <param name="input" type="data" format="@FORMAT@" label="@LABEL@" help="@HELP_TEXT@" />
435 </repeat>
436 </xml>
437
438 <xml name="sparse_target" token_label1="Select a sparse matrix:" token_label2="Select the tabular containing true labels:" token_multiple="False" token_format1="txt" token_format2="tabular" token_help1="" token_help2="">
439 <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@" />
440 <expand macro="input_tabular_target" />
441 </xml>
442
443 <xml name="sl_mixed_input">
444 <conditional name="input_options">
445 <expand macro="data_input_options" />
446 <expand macro="data_input_whens" />
447 </conditional>
448 </xml>
449
450 <xml name="sl_mixed_input_plus_sequence">
451 <conditional name="input_options">
452 <expand macro="data_input_options">
453 <option value="seq_fasta">sequnences in a fasta file</option>
454 <option value="refseq_and_interval">reference genome and intervals</option>
455 </expand>
456 <expand macro="data_input_whens">
457 <when value="seq_fasta">
458 <expand macro="inputs_seq_fasta" />
459 </when>
460 <when value="refseq_and_interval">
461 <expand macro="inputs_refseq_and_interval" />
462 </when>
463 </expand>
464 </conditional>
465 </xml>
466
467 <xml name="data_input_options">
468 <param name="selected_input" type="select" label="Select input type:">
469 <option value="tabular" selected="true">tabular data</option>
470 <option value="sparse">sparse matrix</option>
471 <yield />
472 </param>
473 </xml>
474
475 <xml name="data_input_whens">
476 <when value="tabular">
477 <expand macro="samples_tabular" multiple1="true" multiple2="false" />
478 </when>
479 <when value="sparse">
480 <expand macro="sparse_target" />
481 </when>
482 <yield />
483 </xml>
484
485 <xml name="input_tabular_target">
486 <param name="infile2" type="data" format="tabular" label="Dataset containing class labels or target values:" />
487 <param name="header2" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" />
488 <conditional name="column_selector_options_2">
489 <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="false" infile="infile2" />
490 </conditional>
491 </xml>
492
493 <xml name="inputs_seq_fasta">
494 <param name="fasta_path" type="data" format="fasta" label="Dataset containing fasta genomic/protein sequences" help="Sequences will be one-hot encoded to arrays." />
495 <expand macro="input_tabular_target" />
496 </xml>
497
498 <xml name="inputs_refseq_and_interval">
499 <param name="ref_genome_file" type="data" format="fasta" label="Dataset containing reference genomic sequence" />
500 <param name="interval_file" type="data" format="interval" label="Dataset containing sequence intervals for training" help="interval. Sequences will be retrieved from the reference genome and one-hot encoded to training arrays." />
501 <param name="target_file" type="data" format="bed" label="Dataset containing positions and features for target values." help="bed. The file will be compressed with `bgzip` and then indexed using `tabix`." />
502 <param name="infile2" type="data" format="tabular" label="Dataset containing the feature list for prediction" />
503 <param name="header2" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" />
504 <conditional name="column_selector_options_2">
505 <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="true" infile="infile2" />
506 </conditional>
507 </xml>
508
509 <!--Advanced options-->
510 <xml name="nn_advanced_options">
511 <section name="options" title="Advanced Options" expanded="False">
512 <yield />
513 <param argument="weights" type="select" label="Weight function" help="Used in prediction.">
514 <option value="uniform" selected="true">Uniform weights. All points in each neighborhood are weighted equally. (Uniform)</option>
515 <option value="distance">Weight points by the inverse of their distance. (Distance)</option>
516 </param>
517 <param argument="algorithm" type="select" label="Neighbor selection algorithm" help=" ">
518 <option value="auto" selected="true">Auto</option>
519 <option value="ball_tree">BallTree</option>
520 <option value="kd_tree">KDTree</option>
521 <option value="brute">Brute-force</option>
522 </param>
523 <param argument="leaf_size" type="integer" value="30" label="Leaf size" help="Used with BallTree and KDTree. Affects the time and memory usage of the constructed tree." />
524 <!--param name="metric"-->
525 <!--param name="p"-->
526 <!--param name="metric_params"-->
527 </section>
528 </xml>
529
530 <xml name="svc_advanced_options">
531 <section name="options" title="Advanced Options" expanded="False">
532 <yield />
533 <param argument="kernel" type="select" optional="true" label="Kernel type" help="Kernel type to be used in the algorithm. If none is given, ‘rbf’ will be used.">
534 <option value="rbf" selected="true">rbf</option>
535 <option value="linear">linear</option>
536 <option value="poly">poly</option>
537 <option value="sigmoid">sigmoid</option>
538 <option value="precomputed">precomputed</option>
539 </param>
540 <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 " />
541 <!--TODO: param argument="gamma" float, optional (default=’auto’) -->
542 <param argument="coef0" type="float" optional="true" value="0.0" label="Zero coefficient (polynomial and sigmoid kernels only)"
543 help="Independent term in kernel function. dafault: 0.0 " />
544 <param argument="shrinking" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
545 label="Use the shrinking heuristic" help=" " />
546 <param argument="probability" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false"
547 label="Enable probability estimates. " help="This must be enabled prior to calling fit, and will slow down that method." />
548 <!-- param argument="cache_size"-->
549 <!--expand macro="class_weight"/-->
550 <expand macro="tol" default_value="0.001" help_text="Tolerance for stopping criterion. " />
551 <expand macro="max_iter" default_value="-1" label="Solver maximum number of iterations" help_text="Hard limit on iterations within solver, or -1 for no limit." />
552 <!--param argument="decision_function_shape"-->
553 <expand macro="random_state" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data for probability estimation. A fixed seed allows reproducible results." />
554 </section>
555 </xml>
556
557 <xml name="spectral_clustering_advanced_options">
558 <section name="options" title="Advanced Options" expanded="False">
559 <expand macro="n_clusters" />
560 <param argument="eigen_solver" type="select" value="" label="Eigen solver" help="The eigenvalue decomposition strategy to use.">
561 <option value="arpack" selected="true">arpack</option>
562 <option value="lobpcg">lobpcg</option>
563 <option value="amg">amg</option>
564 <!--None-->
565 </param>
566 <expand macro="random_state" />
567 <expand macro="n_init" />
568 <param argument="gamma" type="float" optional="true" value="1.0" label="Kernel scaling factor" help="Scaling factor of RBF, polynomial, exponential chi^2 and sigmoid affinity kernel. Ignored for affinity=''nearest_neighbors''." />
569 <param argument="affinity" type="select" label="Affinity" help="Affinity kernel to use. ">
570 <option value="rbf" selected="true">RBF</option>
571 <option value="precomputed">precomputed</option>
572 <option value="nearest_neighbors">Nearset neighbors</option>
573 </param>
574 <param argument="n_neighbors" type="integer" optional="true" value="10" label="Number of neighbors" help="Number of neighbors to use when constructing the affinity matrix using the nearest neighbors method. Ignored for affinity=''rbf''" />
575 <!--param argument="eigen_tol"-->
576 <param argument="assign_labels" type="select" label="Assign labels" help="The strategy to use to assign labels in the embedding space.">
577 <option value="kmeans" selected="true">kmeans</option>
578 <option value="discretize">discretize</option>
579 </param>
580 <param argument="degree" type="integer" optional="true" value="3"
581 label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 " />
582 <param argument="coef0" type="integer" optional="true" value="1"
583 label="Zero coefficient (polynomial and sigmoid kernels only)" help="Ignored by other kernels. dafault : 1 " />
584 <!--param argument="kernel_params"-->
585 </section>
586 </xml>
587
588 <xml name="minibatch_kmeans_advanced_options">
589 <section name="options" title="Advanced Options" expanded="False">
590 <expand macro="n_clusters" />
591 <expand macro="init" />
592 <expand macro="n_init" default_value="3" />
593 <expand macro="max_iter" default_value="100" />
594 <expand macro="tol" help_text="Early stopping heuristics based on normalized center change. To disable set to 0.0 ." />
595 <expand macro="random_state" />
596 <param argument="batch_size" type="integer" optional="true" value="100" label="Batch size" help="Size of the mini batches." />
597 <!--param argument="compute_labels"-->
598 <param argument="max_no_improvement" type="integer" optional="true" value="10" label="Maximum number of improvement attempts" help="
599 Convergence detection based on inertia (the consecutive number of mini batches that doe not yield an improvement on the smoothed inertia).
600 To disable, set max_no_improvement to None. " />
601 <param argument="init_size" type="integer" optional="true" value="" label="Number of random initialization samples" help="Number of samples to randomly sample for speeding up the initialization . ( default: 3 * batch_size )" />
602 <param argument="reassignment_ratio" type="float" optional="true" value="0.01" label="Re-assignment ratio" help="Controls the fraction of the maximum number of counts for a center to be reassigned. Higher values yield better clustering results." />
603 </section>
604 </xml>
605
606 <xml name="kmeans_advanced_options">
607 <section name="options" title="Advanced Options" expanded="False">
608 <expand macro="n_clusters" />
609 <expand macro="init" />
610 <expand macro="n_init" />
611 <expand macro="max_iter" />
612 <expand macro="tol" default_value="0.0001" help_text="Relative tolerance with regards to inertia to declare convergence." />
613 <!--param argument="precompute_distances"/-->
614 <expand macro="random_state" />
615 <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean." />
616 <expand macro="kmeans_algorithm" />
617 </section>
618 </xml>
619
620 <xml name="kmeans_algorithm">
621 <param argument="algorithm" type="select" label="K-means algorithm to use:">
622 <option value="auto" selected="true">auto</option>
623 <option value="full">full</option>
624 <option value="elkan">elkan</option>
625 </param>
626 </xml>
627
628 <xml name="birch_advanced_options">
629 <section name="options" title="Advanced Options" expanded="False">
630 <param argument="threshold" type="float" optional="true" value="0.5" label="Subcluster radius threshold" help="The radius of the subcluster obtained by merging a new sample; the closest subcluster should be less than the threshold to avoid a new subcluster." />
631 <param argument="branching_factor" type="integer" optional="true" value="50" label="Maximum number of subclusters per branch" help="Maximum number of CF subclusters in each node." />
632 <expand macro="n_clusters" default_value="3" />
633 <!--param argument="compute_labels"/-->
634 </section>
635 </xml>
636
637 <xml name="dbscan_advanced_options">
638 <section name="options" title="Advanced Options" expanded="False">
639 <param argument="eps" type="float" optional="true" value="0.5" label="Maximum neighborhood distance" help="The maximum distance between two samples for them to be considered as in the same neighborhood." />
640 <param argument="min_samples" type="integer" optional="true" value="5" label="Minimal core point density" help="The number of samples (or total weight) in a neighborhood for a point (including the point itself) to be considered as a core point." />
641 <param argument="metric" type="text" optional="true" value="euclidean" label="Metric" help="The metric to use when calculating distance between instances in a feature array." />
642 <param argument="algorithm" type="select" label="Pointwise distance computation algorithm" help="The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors.">
643 <option value="auto" selected="true">auto</option>
644 <option value="ball_tree">ball_tree</option>
645 <option value="kd_tree">kd_tree</option>
646 <option value="brute">brute</option>
647 </param>
648 <param argument="leaf_size" type="integer" optional="true" value="30" label="Leaf size" help="Leaf size passed to BallTree or cKDTree. Memory and time efficieny factor in tree constrution and querying." />
649 </section>
650 </xml>
651
652 <xml name="clustering_algorithms_options">
653 <conditional name="algorithm_options">
654 <param name="selected_algorithm" type="select" label="Clustering Algorithm">
655 <option value="KMeans" selected="true">KMeans</option>
656 <option value="SpectralClustering">Spectral Clustering</option>
657 <option value="MiniBatchKMeans">Mini Batch KMeans</option>
658 <option value="DBSCAN">DBSCAN</option>
659 <option value="Birch">Birch</option>
660 </param>
661 <when value="KMeans">
662 <expand macro="kmeans_advanced_options" />
663 </when>
664 <when value="DBSCAN">
665 <expand macro="dbscan_advanced_options" />
666 </when>
667 <when value="Birch">
668 <expand macro="birch_advanced_options" />
669 </when>
670 <when value="SpectralClustering">
671 <expand macro="spectral_clustering_advanced_options" />
672 </when>
673 <when value="MiniBatchKMeans">
674 <expand macro="minibatch_kmeans_advanced_options" />
675 </when>
676 </conditional>
677 </xml>
678
679 <xml name="distance_metrics">
680 <param argument="metric" type="select" label="Distance metric" help=" ">
681 <option value="euclidean" selected="true">euclidean</option>
682 <option value="cityblock">cityblock</option>
683 <option value="cosine">cosine</option>
684 <option value="l1">l1</option>
685 <option value="l2">l2</option>
686 <option value="manhattan">manhattan</option>
687 <yield />
688 </param>
689 </xml>
690
691 <xml name="distance_nonsparse_metrics">
692 <option value="braycurtis">braycurtis</option>
693 <option value="canberra">canberra</option>
694 <option value="chebyshev">chebyshev</option>
695 <option value="correlation">correlation</option>
696 <option value="dice">dice</option>
697 <option value="hamming">hamming</option>
698 <option value="jaccard">jaccard</option>
699 <option value="kulsinski">kulsinski</option>
700 <option value="mahalanobis">mahalanobis</option>
701 <option value="matching">matching</option>
702 <option value="minkowski">minkowski</option>
703 <option value="rogerstanimoto">rogerstanimoto</option>
704 <option value="russellrao">russellrao</option>
705 <option value="seuclidean">seuclidean</option>
706 <option value="sokalmichener">sokalmichener</option>
707 <option value="sokalsneath">sokalsneath</option>
708 <option value="sqeuclidean">sqeuclidean</option>
709 <option value="yule">yule</option>
710 </xml>
711
712 <xml name="pairwise_kernel_metrics">
713 <param argument="metric" type="select" label="Pirwise Kernel metric" help=" ">
714 <option value="rbf" selected="true">rbf</option>
715 <option value="sigmoid">sigmoid</option>
716 <option value="polynomial">polynomial</option>
717 <option value="linear" selected="true">linear</option>
718 <option value="chi2">chi2</option>
719 <option value="additive_chi2">additive_chi2</option>
720 </param>
721 </xml>
722
723 <xml name="sparse_pairwise_metric_functions">
724 <param name="selected_metric_function" type="select" label="Select the pairwise metric you want to compute:">
725 <option value="euclidean_distances" selected="true">Euclidean distance matrix</option>
726 <option value="pairwise_distances">Distance matrix</option>
727 <option value="pairwise_distances_argmin">Minimum distances between one point and a set of points</option>
728 <yield />
729 </param>
730 </xml>
731
732 <xml name="pairwise_metric_functions">
733 <option value="additive_chi2_kernel" >Additive chi-squared kernel</option>
734 <option value="chi2_kernel">Exponential chi-squared kernel</option>
735 <option value="linear_kernel">Linear kernel</option>
736 <option value="manhattan_distances">L1 distances</option>
737 <option value="pairwise_kernels">Kernel</option>
738 <option value="polynomial_kernel">Polynomial kernel</option>
739 <option value="rbf_kernel">Gaussian (rbf) kernel</option>
740 <option value="laplacian_kernel">Laplacian kernel</option>
741 </xml>
742
743 <xml name="sparse_pairwise_condition">
744 <when value="pairwise_distances">
745 <section name="options" title="Advanced Options" expanded="False">
746 <expand macro="distance_metrics">
747 <yield />
748 </expand>
749 </section>
750 </when>
751 <when value="euclidean_distances">
752 <section name="options" title="Advanced Options" expanded="False">
753 <param argument="squared" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false"
754 label="Return squared Euclidean distances" help=" " />
755 </section>
756 </when>
757 </xml>
758
759 <xml name="argmin_distance_condition">
760 <when value="pairwise_distances_argmin">
761 <section name="options" title="Advanced Options" expanded="False">
762 <param argument="axis" type="integer" optional="true" value="1" label="Axis" help="Axis along which the argmin and distances are to be computed." />
763 <expand macro="distance_metrics">
764 <yield />
765 </expand>
766 <param argument="batch_size" type="integer" optional="true" value="500" label="Batch size" help="Number of rows to be processed in each batch run." />
767 </section>
768 </when>
769 </xml>
770
771 <xml name="sparse_preprocessors">
772 <param name="selected_pre_processor" type="select" label="Select a preprocessor:">
773 <option value="StandardScaler" selected="true">Standard Scaler (Standardizes features by removing the mean and scaling to unit variance)</option>
774 <option value="Binarizer">Binarizer (Binarizes data)</option>
775 <option value="MaxAbsScaler">Max Abs Scaler (Scales features by their maximum absolute value)</option>
776 <option value="Normalizer">Normalizer (Normalizes samples individually to unit norm)</option>
777 <yield />
778 </param>
779 </xml>
780
781 <xml name="sparse_preprocessors_ext">
782 <expand macro="sparse_preprocessors">
783 <option value="KernelCenterer">Kernel Centerer (Centers a kernel matrix)</option>
784 <option value="MinMaxScaler">Minmax Scaler (Scales features to a range)</option>
785 <option value="PolynomialFeatures">Polynomial Features (Generates polynomial and interaction features)</option>
786 <option value="RobustScaler">Robust Scaler (Scales features using outlier-invariance statistics)</option>
787 <option value="QuantileTransformer">QuantileTransformer (Transform features using quantiles information)</option>
788 <option value="PowerTransformer">PowerTransformer (Apply a power transform featurewise to make data more Gaussian-like)</option>
789 <option value="KBinsDiscretizer">KBinsDiscretizer (Bin continuous data into intervals.)</option>
790 </expand>
791 </xml>
792
793 <xml name="sparse_preprocessor_options">
794 <when value="Binarizer">
795 <section name="options" title="Advanced Options" expanded="False">
796 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
797 label="Use a copy of data for precomputing binarization" help=" " />
798 <param argument="threshold" type="float" optional="true" value="0.0"
799 label="Threshold"
800 help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. " />
801 </section>
802 </when>
803 <when value="StandardScaler">
804 <section name="options" title="Advanced Options" expanded="False">
805 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
806 label="Use a copy of data for performing inplace scaling" help=" " />
807 <param argument="with_mean" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
808 label="Center the data before scaling" help=" " />
809 <param argument="with_std" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
810 label="Scale the data to unit variance (or unit standard deviation)" help=" " />
811 </section>
812 </when>
813 <when value="MaxAbsScaler">
814 <section name="options" title="Advanced Options" expanded="False">
815 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
816 label="Use a copy of data for precomputing scaling" help=" " />
817 </section>
818 </when>
819 <when value="Normalizer">
820 <section name="options" title="Advanced Options" expanded="False">
821 <param argument="norm" type="select" optional="true" label="The norm to use to normalize non zero samples" help=" ">
822 <option value="l1" selected="true">l1</option>
823 <option value="l2">l2</option>
824 <option value="max">max</option>
313 </param> 825 </param>
314 </xml> 826 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
315 827 label="Use a copy of data for precomputing row normalization" help=" " />
316 <xml name="gamma" token_default_value="1.0" token_label="Scaling parameter" token_help_text=" "> 828 </section>
317 <param argument="gamma" type="float" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@" /> 829 </when>
318 </xml> 830 <yield />
319 831 </xml>
320 <xml name="degree" token_default_value="3" token_label="Degree of the polynomial" token_help_text=" "> 832
321 <param argument="degree" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@" /> 833 <xml name="sparse_preprocessor_options_ext">
322 </xml> 834 <expand macro="sparse_preprocessor_options">
323 835 <when value="KernelCenterer">
324 <xml name="coef0" token_default_value="1" token_label="Zero coefficient" token_help_text=" "> 836 <section name="options" title="Advanced Options" expanded="False">
325 <param argument="coef0" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@" /> 837 </section>
326 </xml> 838 </when>
327 839 <when value="MinMaxScaler">
328 <xml name="pos_label" token_default_value=""> 840 <section name="options" title="Advanced Options" expanded="False">
329 <param argument="pos_label" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Label of the positive class" help=" " /> 841 <param argument="feature_range" type="text" value="(0, 1)" optional="true" help="Desired range of transformed data. None or tuple (min, max). None equals to (0, 1)" />
330 </xml> 842 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true"
331 843 label="Use a copy of data for precomputing normalization" help=" " />
332 <xml name="average"> 844 </section>
333 <param argument="average" type="select" optional="true" label="Averaging type" help=" "> 845 </when>
334 <option value="micro">Calculate metrics globally by counting the total true positives, false negatives and false positives. (micro)</option> 846 <when value="PolynomialFeatures">
335 <option value="samples">Calculate metrics for each instance, and find their average. Only meaningful for multilabel. (samples)</option> 847 <section name="options" title="Advanced Options" expanded="False">
336 <option value="macro">Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. (macro)</option> 848 <param argument="degree" type="integer" optional="true" value="2" label="The degree of the polynomial features " help="" />
337 <option value="weighted">Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters ‘macro’ to account for label imbalance; it can result in an F-score that is not between precision and recall. (weighted)</option> 849 <param argument="interaction_only" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Produce interaction features only" help="(Features that are products of at most degree distinct input features) " />
338 <option value="None">None</option> 850 <param argument="include_bias" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Include a bias column" help="Feature in which all polynomial powers are zero " />
339 <yield /> 851 </section>
852 </when>
853 <when value="RobustScaler">
854 <section name="options" title="Advanced Options" expanded="False">
855 <!--=True, =True, copy=True-->
856 <param argument="with_centering" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true"
857 label="Center the data before scaling" help=" " />
858 <param argument="with_scaling" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true"
859 label="Scale the data to interquartile range" help=" " />
860 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true"
861 label="Use a copy of data for inplace scaling" help=" " />
862 </section>
863 </when>
864 <when value="QuantileTransformer">
865 <section name="options" title="Advanced Options" expanded="False">
866 <param name="n_quantiles" type="integer" value="1000" min="0" label="Number of quantiles to be computed" />
867 <param name="output_distribution" type="select" label="Marginal distribution for the transformed data">
868 <option value="uniform" selected="true">uniform</option>
869 <option value="normal">normal</option>
870 </param>
871 <param name="ignore_implicit_zeros" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Whether to discard sparse entries" help="Only applies to sparse matrices. If False, sparse entries are treated as zeros" />
872 <param name="subsample" type="integer" value="100000" label="Maximum number of samples used to estimate the quantiles for computational efficiency" help="Note that the subsampling procedure may differ for value-identical sparse and dense matrices." />
873 <expand macro="random_state" help_text="This is used by subsampling and smoothing noise" />
874 </section>
875 </when>
876 <when value="PowerTransformer">
877 <section name="options" title="Advanced Options" expanded="False">
878 <param name="method" type="select" label="The power transform method">
879 <option value="yeo-johnson" selected="true">yeo-johnson (works with positive and negative values)</option>
880 <option value="box-cox">box-cox (might perform better, but only works with strictly positive values)</option>
881 </param>
882 <param name="standardize" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Whether to apply zero-mean, unit-variance normalization to the transformed output." />
883 </section>
884 </when>
885 <when value="KBinsDiscretizer">
886 <section name="options" title="Advanced Options" expanded="False">
887 <param name="n_bins" type="integer" value="5" min="2" label="The number of bins to produce" />
888 <param name="encode" type="select" label="Method used to encode the transformed result">
889 <option value="onehot" selected="true">onehot (encode the transformed result with one-hot encoding and return a sparse matrix)</option>
890 <option value="onehot-dense">onehot-dense (encode the transformed result with one-hot encoding and return a dense array)</option>
891 <option value="ordinal">ordinal (return the bin identifier encoded as an integer value)</option>
892 </param>
893 <param name="strategy" type="select" label="Strategy used to define the widths of the bins">
894 <option value="uniform">uniform (all bins in each feature have identical widths)</option>
895 <option value="quantile" selected="true">quantile (all bins in each feature have the same number of points)</option>
896 <option value="kmeans">kmeans (values in each bin have the same nearest center of a 1D k-means cluster)</option>
897 </param>
898 </section>
899 </when>
900 </expand>
901 </xml>
902
903 <xml name="cv_splitter">
904 <option value="default" selected="true">default splitter</option>
905 <option value="KFold">KFold</option>
906 <option value="StratifiedKFold">StratifiedKFold</option>
907 <option value="LeaveOneOut">LeaveOneOut</option>
908 <option value="LeavePOut">LeavePOut</option>
909 <option value="RepeatedKFold">RepeatedKFold</option>
910 <option value="RepeatedStratifiedKFold">RepeatedStratifiedKFold</option>
911 <option value="ShuffleSplit">ShuffleSplit</option>
912 <option value="StratifiedShuffleSplit">StratifiedShuffleSplit</option>
913 <option value="TimeSeriesSplit">TimeSeriesSplit</option>
914 <option value="PredefinedSplit">PredefinedSplit</option>
915 <option value="OrderedKFold">OrderedKFold</option>
916 <option value="RepeatedOrderedKFold">RepeatedOrderedKFold</option>
917 <yield />
918 </xml>
919
920 <xml name="cv_splitter_options">
921 <when value="default">
922 <expand macro="cv_n_splits" />
923 </when>
924 <when value="KFold">
925 <expand macro="cv_n_splits" />
926 <expand macro="cv_shuffle" />
927 <expand macro="random_state" />
928 </when>
929 <when value="StratifiedKFold">
930 <expand macro="cv_n_splits" />
931 <expand macro="cv_shuffle" />
932 <expand macro="random_state" />
933 </when>
934 <when value="LeaveOneOut">
935 </when>
936 <when value="LeavePOut">
937 <param argument="p" type="integer" value="" label="p" help="Integer. Size of the test sets." />
938 </when>
939 <when value="RepeatedKFold">
940 <expand macro="cv_n_splits" value="5" />
941 <param argument="n_repeats" type="integer" value="10" label="n_repeats" help="Number of times cross-validator needs to be repeated." />
942 <expand macro="random_state" />
943 </when>
944 <when value="RepeatedStratifiedKFold">
945 <expand macro="cv_n_splits" value="5" />
946 <param argument="n_repeats" type="integer" value="10" label="n_repeats" help="Number of times cross-validator needs to be repeated." />
947 <expand macro="random_state" />
948 </when>
949 <when value="ShuffleSplit">
950 <expand macro="cv_n_splits" value="10" help="Number of re-shuffling and splitting iterations." />
951 <expand macro="cv_test_size" value="0.1" />
952 <expand macro="random_state" />
953 </when>
954 <when value="StratifiedShuffleSplit">
955 <expand macro="cv_n_splits" value="10" help="Number of re-shuffling and splitting iterations." />
956 <expand macro="cv_test_size" value="0.1" />
957 <expand macro="random_state" />
958 </when>
959 <when value="TimeSeriesSplit">
960 <expand macro="cv_n_splits" />
961 <param argument="max_train_size" type="integer" value="" optional="true" label="Maximum size of the training set" help="Maximum size for a single training set." />
962 </when>
963 <when value="PredefinedSplit">
964 <param argument="test_fold" type="text" value="" area="true" label="test_fold" help="List, e.g., [0, 1, -1, 1], represents two test sets, [X[0]] and [X[1], X[3]], X[2] is excluded from any test set due to '-1'." />
965 </when>
966 <when value="OrderedKFold">
967 <expand macro="cv_n_splits" />
968 <expand macro="cv_shuffle" />
969 <expand macro="random_state" />
970 <expand macro="cv_n_stratification_bins" />
971 </when>
972 <when value="RepeatedOrderedKFold">
973 <expand macro="cv_n_splits" />
974 <param argument="n_repeats" type="integer" value="5" />
975 <expand macro="random_state" />
976 <expand macro="cv_n_stratification_bins" />
977 </when>
978 <yield />
979 </xml>
980
981 <xml name="cv">
982 <conditional name="cv_selector">
983 <param name="selected_cv" type="select" label="Select the cv splitter:">
984 <expand macro="cv_splitter">
985 <option value="GroupKFold">GroupKFold</option>
986 <option value="GroupShuffleSplit">GroupShuffleSplit</option>
987 <option value="LeaveOneGroupOut">LeaveOneGroupOut</option>
988 <option value="LeavePGroupsOut">LeavePGroupsOut</option>
989 </expand>
990 </param>
991 <expand macro="cv_splitter_options">
992 <when value="GroupKFold">
993 <expand macro="cv_n_splits" />
994 <expand macro="cv_groups" />
995 </when>
996 <when value="GroupShuffleSplit">
997 <expand macro="cv_n_splits" value="5" />
998 <expand macro="cv_test_size" />
999 <expand macro="random_state" />
1000 <expand macro="cv_groups" />
1001 </when>
1002 <when value="LeaveOneGroupOut">
1003 <expand macro="cv_groups" />
1004 </when>
1005 <when value="LeavePGroupsOut">
1006 <param argument="n_groups" type="integer" value="" label="n_groups" help="Number of groups (p) to leave out in the test split." />
1007 <expand macro="cv_groups" />
1008 </when>
1009 </expand>
1010 </conditional>
1011 </xml>
1012
1013 <xml name="cv_reduced" token_label="Select the cv splitter">
1014 <conditional name="cv_selector">
1015 <param name="selected_cv" type="select" label="@LABEL@">
1016 <expand macro="cv_splitter" />
1017 </param>
1018 <expand macro="cv_splitter_options" />
1019 </conditional>
1020 </xml>
1021
1022 <xml name="cv_n_splits" token_value="5" token_help="Number of folds. Must be at least 2.">
1023 <!--why set min to 1?-->
1024 <param argument="n_splits" type="integer" value="@VALUE@" min="1" label="n_splits" help="@HELP@" />
1025 </xml>
1026
1027 <xml name="cv_shuffle">
1028 <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Whether to shuffle data before splitting" />
1029 </xml>
1030
1031 <xml name="cv_n_stratification_bins">
1032 <param argument="n_stratification_bins" type="integer" value="" optional="true" help="Integer. The number of stratification bins. Only relevent when shuffle is True. Valid in [2, `n_samples // n_splits`]. Default value is None, which is same as `n_samples // n_splits`. The higher the value is, the distribution of target values is more approximately the ame across all split folds." />
1033 </xml>
1034
1035 <xml name="cv_test_size" token_value="0.2">
1036 <param argument="test_size" type="float" value="@VALUE@" min="0.0" label="Portion or number of the test set" help="0.0-1.0, proportion of the dataset to include in the test split; >1, integer only, the absolute number of test samples " />
1037 </xml>
1038
1039 <xml name="cv_groups" >
1040 <section name="groups_selector" title="Groups column selector" expanded="true">
1041 <param name="infile_g" type="data" format="tabular" label="Choose dataset containing groups info:" />
1042 <param name="header_g" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
1043 <conditional name="column_selector_options_g">
1044 <expand macro="samples_column_selector_options" column_option="selected_column_selector_option_g" col_name="col_g" multiple="False" infile="infile_g" />
1045 </conditional>
1046 </section>
1047 </xml>
1048
1049 <xml name="train_test_split_params">
1050 <conditional name="split_algos">
1051 <param name="shuffle" type="select" label="Select the splitting method">
1052 <option value="None">No shuffle</option>
1053 <option value="simple" selected="true">ShuffleSplit</option>
1054 <option value="stratified">StratifiedShuffleSplit -- target values serve as class labels</option>
1055 <option value="group">GroupShuffleSplit or split by group names</option>
1056 </param>
1057 <when value="None">
1058 <expand macro="train_test_split_test_size" />
1059 </when>
1060 <when value="simple">
1061 <expand macro="train_test_split_test_size" />
1062 <expand macro="random_state" />
1063 </when>
1064 <when value="stratified">
1065 <expand macro="train_test_split_test_size" />
1066 <expand macro="random_state" />
1067 </when>
1068 <when value="group">
1069 <expand macro="train_test_split_test_size" optional="true" />
1070 <expand macro="random_state" />
1071 <param argument="group_names" type="text" value="" optional="true" label="Type in group names instead"
1072 help="For example: chr6, chr7. This parameter is optional. If used, it will override the holdout size and random seed." />
1073 <yield />
1074 </when>
1075 </conditional>
1076 <!--param argument="train_size" type="float" optional="True" value="" label="Train size:" />-->
1077 </xml>
1078
1079 <xml name="train_test_split_test_size" token_optional="false">
1080 <param name="test_size" type="float" value="0.2" optional="@OPTIONAL@" label="Holdout size" help="Leass than 1, for preportion; greater than 1 (integer), for number of samples." />
1081 </xml>
1082
1083 <xml name="feature_selection_algorithms">
1084 <option value="SelectKBest" selected="true">SelectKBest - Select features according to the k highest scores</option>
1085 <option value="GenericUnivariateSelect">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option>
1086 <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option>
1087 <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option>
1088 <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option>
1089 <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option>
1090 <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option>
1091 <option value="SelectFromModel">SelectFromModel - Meta-transformer for selecting features based on importance weights</option>
1092 <option value="RFE">RFE - Feature ranking with recursive feature elimination</option>
1093 <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option>
1094 <yield />
1095 </xml>
1096
1097 <xml name="feature_selection_algorithm_details">
1098 <when value="GenericUnivariateSelect">
1099 <expand macro="feature_selection_score_function" />
1100 <section name="options" title="Advanced Options" expanded="False">
1101 <param argument="mode" type="select" label="Feature selection mode">
1102 <option value="percentile">percentile</option>
1103 <option value="k_best">k_best</option>
1104 <option value="fpr">fpr</option>
1105 <option value="fdr">fdr</option>
1106 <option value="fwe">fwe</option>
340 </param> 1107 </param>
341 </xml> 1108 <param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" />
342 1109 </section>
343 <xml name="beta"> 1110 </when>
344 <param argument="beta" type="float" value="1.0" label="The strength of recall versus precision in the F-score" help=" " /> 1111 <when value="SelectPercentile">
345 </xml> 1112 <expand macro="feature_selection_score_function" />
346 1113 <section name="options" title="Advanced Options" expanded="False">
347 1114 <param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" />
348 <!--Data interface--> 1115 </section>
349 1116 </when>
350 <xml name="samples_tabular" token_label1="Training samples dataset:" token_multiple1="false" token_multiple2="false"> 1117 <when value="SelectKBest">
351 <param name="infile1" type="data" format="tabular" label="@LABEL1@" /> 1118 <expand macro="feature_selection_score_function" />
352 <param name="header1" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> 1119 <section name="options" title="Advanced Options" expanded="False">
353 <conditional name="column_selector_options_1"> 1120 <param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." />
354 <expand macro="samples_column_selector_options" multiple="@MULTIPLE1@" /> 1121 </section>
355 </conditional> 1122 </when>
356 <param name="infile2" type="data" format="tabular" label="Dataset containing class labels or target values:" /> 1123 <when value="SelectFpr">
357 <param name="header2" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> 1124 <expand macro="feature_selection_score_function" />
358 <conditional name="column_selector_options_2"> 1125 <section name="options" title="Advanced Options" expanded="False">
359 <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="@MULTIPLE2@" infile="infile2" /> 1126 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept." />
360 </conditional> 1127 </section>
361 <yield /> 1128 </when>
362 </xml> 1129 <when value="SelectFdr">
363 1130 <expand macro="feature_selection_score_function" />
364 <xml name="samples_column_selector_options" token_column_option="selected_column_selector_option" token_col_name="col1" token_multiple="False" token_infile="infile1"> 1131 <section name="options" title="Advanced Options" expanded="False">
365 <param name="@COLUMN_OPTION@" type="select" label="Choose how to select data by column:"> 1132 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep." />
366 <option value="by_index_number" selected="true">Select columns by column index number(s)</option> 1133 </section>
367 <option value="all_but_by_index_number">All columns EXCLUDING some by column index number(s)</option> 1134 </when>
368 <option value="by_header_name">Select columns by column header name(s)</option> 1135 <when value="SelectFwe">
369 <option value="all_but_by_header_name">All columns EXCLUDING some by column header name(s)</option> 1136 <expand macro="feature_selection_score_function" />
370 <option value="all_columns">All columns</option> 1137 <section name="options" title="Advanced Options" expanded="False">
1138 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep." />
1139 </section>
1140 </when>
1141 <when value="VarianceThreshold">
1142 <section name="options" title="Options" expanded="False">
1143 <param argument="threshold" type="float" value="0.0" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed." />
1144 </section>
1145 </when>
1146 </xml>
1147
1148 <xml name="feature_selection_SelectFromModel">
1149 <when value="SelectFromModel">
1150 <conditional name="model_inputter">
1151 <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" >
1152 <option value="new" selected="true">Yes</option>
1153 <option value="prefitted">No. Load a prefitted estimator</option>
371 </param> 1154 </param>
372 <when value="by_index_number"> 1155 <when value="new">
373 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" use_header_names="true" data_ref="@INFILE@" label="Select target column(s):" /> 1156 <expand macro="estimator_selector_fs" />
374 </when> 1157 </when>
375 <when value="all_but_by_index_number"> 1158 <when value="prefitted">
376 <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" use_header_names="true" data_ref="@INFILE@" label="Select target column(s):" /> 1159 <param name="fitted_estimator" type="data" format='h5mlm' label="Load a prefitted estimator" />
377 </when> 1160 </when>
378 <when value="by_header_name"> 1161 </conditional>
379 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2" /> 1162 <expand macro="feature_selection_SelectFromModel_options" />
1163 </when>
1164 </xml>
1165
1166 <xml name="feature_selection_SelectFromModel_no_prefitted">
1167 <when value="SelectFromModel">
1168 <conditional name="model_inputter">
1169 <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" >
1170 <option value="new" selected="true">Yes</option>
1171 </param>
1172 <when value="new">
1173 <expand macro="estimator_selector_all" />
380 </when> 1174 </when>
381 <when value="all_but_by_header_name"> 1175 </conditional>
382 <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2" /> 1176 <expand macro="feature_selection_SelectFromModel_options" />
1177 </when>
1178 </xml>
1179
1180 <xml name="feature_selection_SelectFromModel_options">
1181 <section name="options" title="Advanced Options" expanded="False">
1182 <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." />
1183 <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " />
1184 <param argument="max_features" type="integer" value="" optional="true" label="The maximum number of features selected scoring above threshold" help="To disable threshold and only select based on max_features, set threshold=-np.inf." />
1185 </section>
1186 </xml>
1187
1188 <xml name="feature_selection_RFE">
1189 <when value="RFE">
1190 <yield />
1191 <section name="options" title="Advanced Options" expanded="False">
1192 <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." />
1193 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
1194 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
1195 </section>
1196 </when>
1197 </xml>
1198
1199 <xml name="feature_selection_RFECV_fs">
1200 <when value="RFECV">
1201 <yield />
1202 <section name="options" title="Advanced Options" expanded="False">
1203 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
1204 <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected" />
1205 <expand macro="cv" />
1206 <expand macro="scoring_selection" />
1207 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
1208 </section>
1209 </when>
1210 </xml>
1211
1212 <xml name="feature_selection_RFECV_pipeline">
1213 <when value="RFECV">
1214 <yield />
1215 <section name="options" title="Advanced Options" expanded="False">
1216 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
1217 <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected" />
1218 <expand macro="cv_reduced" />
1219 <!-- TODO: group splitter support-->
1220 <expand macro="scoring_selection" />
1221 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
1222 </section>
1223 </when>
1224 </xml>
1225
1226 <xml name="feature_selection_DyRFECV_fs">
1227 <when value="DyRFECV">
1228 <yield />
1229 <section name="options" title="Advanced Options" expanded="False">
1230 <param argument="step" type="text" size="30" value="1" label="step" optional="true" help="Default = 1. Support float, int and list." >
1231 <sanitizer>
1232 <valid initial="default">
1233 <add value="[" />
1234 <add value="]" />
1235 </valid>
1236 </sanitizer>
1237 </param>
1238 <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected" />
1239 <expand macro="cv" />
1240 <expand macro="scoring_selection" />
1241 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
1242 </section>
1243 </when>
1244 </xml>
1245
1246 <xml name="feature_selection_pipeline">
1247 <!--compare to `feature_selection_fs`, no fitted estimator for SelectFromModel and no custom estimator for RFE and RFECV-->
1248 <conditional name="fs_algorithm_selector">
1249 <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
1250 <expand macro="feature_selection_algorithms" />
1251 </param>
1252 <expand macro="feature_selection_algorithm_details" />
1253 <expand macro="feature_selection_SelectFromModel_no_prefitted" />
1254 <expand macro="feature_selection_RFE">
1255 <expand macro="estimator_selector_all" />
1256 </expand>
1257 <expand macro="feature_selection_RFECV_pipeline">
1258 <expand macro="estimator_selector_all" />
1259 </expand>
1260 <!-- TODO: add DyRFECV to pipeline-->
1261 </conditional>
1262 </xml>
1263
1264 <xml name="feature_selection_fs">
1265 <conditional name="fs_algorithm_selector">
1266 <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
1267 <expand macro="feature_selection_algorithms">
1268 <option value="DyRFECV">DyRFECV - Extended RFECV with changeable steps</option>
1269 </expand>
1270 </param>
1271 <expand macro="feature_selection_algorithm_details" />
1272 <expand macro="feature_selection_SelectFromModel" />
1273 <expand macro="feature_selection_RFE">
1274 <expand macro="estimator_selector_fs" />
1275 </expand>
1276 <expand macro="feature_selection_RFECV_fs">
1277 <expand macro="estimator_selector_fs" />
1278 </expand>
1279 <expand macro="feature_selection_DyRFECV_fs">
1280 <expand macro="estimator_selector_fs" />
1281 </expand>
1282 </conditional>
1283 </xml>
1284
1285 <xml name="feature_selection_score_function">
1286 <param argument="score_func" type="select" label="Select a score function">
1287 <option value="chi2">chi2 - Compute chi-squared stats between each non-negative feature and class</option>
1288 <option value="f_classif">f_classif - Compute the ANOVA F-value for the provided sample</option>
1289 <option value="f_regression">f_regression - Univariate linear regression tests</option>
1290 <option value="mutual_info_classif">mutual_info_classif - Estimate mutual information for a discrete target variable</option>
1291 <option value="mutual_info_regression">mutual_info_regression - Estimate mutual information for a continuous target variable</option>
1292 </param>
1293 </xml>
1294
1295 <xml name="model_validation_common_options">
1296 <expand macro="cv" />
1297 <expand macro="verbose" />
1298 <yield />
1299 </xml>
1300
1301 <xml name="scoring_selection" token_help="Metric to refit the best estimator.">
1302 <conditional name="scoring">
1303 <param name="primary_scoring" type="select" multiple="false" label="Select the primary metric (scoring):" help="@HELP@">
1304 <option value="default" selected="true">default with estimator</option>
1305 <expand macro="scoring_selection_options" />
1306 </param>
1307 <when value="default" />
1308 <when value="accuracy"><expand macro="secondary_scoring_selection_classification" /></when>
1309 <when value="balanced_accuracy"><expand macro="secondary_scoring_selection_classification" /></when>
1310 <when value="average_precision"><expand macro="secondary_scoring_selection_classification" /></when>
1311 <when value="f1"><expand macro="secondary_scoring_selection_classification" /></when>
1312 <when value="f1_micro"><expand macro="secondary_scoring_selection_classification" /></when>
1313 <when value="f1_macro"><expand macro="secondary_scoring_selection_classification" /></when>
1314 <when value="f1_weighted"><expand macro="secondary_scoring_selection_classification" /></when>
1315 <when value="f1_samples"><expand macro="secondary_scoring_selection_classification" /></when>
1316 <when value="neg_log_loss"><expand macro="secondary_scoring_selection_classification" /></when>
1317 <when value="precision"><expand macro="secondary_scoring_selection_classification" /></when>
1318 <when value="precision_micro"><expand macro="secondary_scoring_selection_classification" /></when>
1319 <when value="precision_macro"><expand macro="secondary_scoring_selection_classification" /></when>
1320 <when value="precision_wighted"><expand macro="secondary_scoring_selection_classification" /></when>
1321 <when value="precision_samples"><expand macro="secondary_scoring_selection_classification" /></when>
1322 <when value="recall"><expand macro="secondary_scoring_selection_classification" /></when>
1323 <when value="recall_micro"><expand macro="secondary_scoring_selection_classification" /></when>
1324 <when value="recall_macro"><expand macro="secondary_scoring_selection_classification" /></when>
1325 <when value="recall_wighted"><expand macro="secondary_scoring_selection_classification" /></when>
1326 <when value="recall_samples"><expand macro="secondary_scoring_selection_classification" /></when>
1327 <when value="roc_auc"><expand macro="secondary_scoring_selection_classification" /></when>
1328 <when value="explained_variance"><expand macro="secondary_scoring_selection_regression" /></when>
1329 <when value="neg_mean_absolute_error"><expand macro="secondary_scoring_selection_regression" /></when>
1330 <when value="neg_mean_squared_error"><expand macro="secondary_scoring_selection_regression" /></when>
1331 <when value="neg_mean_squared_log_error"><expand macro="secondary_scoring_selection_regression" /></when>
1332 <when value="neg_median_absolute_error"><expand macro="secondary_scoring_selection_regression" /></when>
1333 <when value="r2"><expand macro="secondary_scoring_selection_regression" /></when>
1334 <when value="max_error"><expand macro="secondary_scoring_selection_regression" /></when>
1335 <when value="spearman_correlation"><expand macro="secondary_scoring_selection_regression" /></when>
1336 <when value="binarize_auc_scorer"><expand macro="secondary_scoring_selection_anormaly" /></when>
1337 <when value="binarize_average_precision_scorer"><expand macro="secondary_scoring_selection_anormaly" /></when>
1338 </conditional>
1339 </xml>
1340
1341 <xml name="scoring_selection_options">
1342 <option value="accuracy">Classification -- 'accuracy'</option>
1343 <option value="balanced_accuracy">Classification -- 'balanced_accuracy'</option>
1344 <option value="average_precision">Classification -- 'average_precision'</option>
1345 <option value="f1">Classification -- 'f1'</option>
1346 <option value="f1_micro">Classification -- 'f1_micro'</option>
1347 <option value="f1_macro">Classification -- 'f1_macro'</option>
1348 <option value="f1_weighted">Classification -- 'f1_weighted'</option>
1349 <option value="f1_samples">Classification -- 'f1_samples'</option>
1350 <option value="neg_log_loss">Classification -- 'neg_log_loss'</option>
1351 <option value="precision">Classification -- 'precision'</option>
1352 <option value="precision_micro">Classification -- 'precision_micro'</option>
1353 <option value="precision_macro">Classification -- 'precision_macro'</option>
1354 <option value="precision_wighted">Classification -- 'precision_wighted'</option>
1355 <option value="precision_samples">Classification -- 'precision_samples'</option>
1356 <option value="recall">Classification -- 'recall'</option>
1357 <option value="recall_micro">Classification -- 'recall_micro'</option>
1358 <option value="recall_macro">Classification -- 'recall_macro'</option>
1359 <option value="recall_wighted">Classification -- 'recall_wighted'</option>
1360 <option value="recall_samples">Classification -- 'recall_samples'</option>
1361 <option value="roc_auc">Classification -- 'roc_auc'</option>
1362 <option value="explained_variance">Regression -- 'explained_variance'</option>
1363 <option value="neg_mean_absolute_error">Regression -- 'neg_mean_absolute_error'</option>
1364 <option value="neg_mean_squared_error">Regression -- 'neg_mean_squared_error'</option>
1365 <option value="neg_mean_squared_log_error">Regression -- 'neg_mean_squared_log_error'</option>
1366 <option value="neg_median_absolute_error">Regression -- 'neg_median_absolute_error'</option>
1367 <option value="r2">Regression -- 'r2'</option>
1368 <option value="max_error">Regression -- 'max_error'</option>
1369 <option value="spearman_correlation">Regression -- Spearman's rank correlation coefficient</option>
1370 <option value="binarize_auc_scorer">anomaly detection -- binarize_auc_scorer</option>
1371 <option value="binarize_average_precision_scorer">anomaly detection -- binarize_average_precision_scorer</option>
1372 </xml>
1373
1374 <xml name="secondary_scoring_selection_classification">
1375 <param name="secondary_scoring" type="select" multiple="true" label="Additional scoring used in multi-metric mode:" help="If the same metric with the primary is chosen, the metric will be ignored.">
1376 <option value="accuracy">Classification -- 'accuracy'</option>
1377 <option value="balanced_accuracy">Classification -- 'balanced_accuracy'</option>
1378 <option value="average_precision">Classification -- 'average_precision'</option>
1379 <option value="f1">Classification -- 'f1'</option>
1380 <option value="f1_micro">Classification -- 'f1_micro'</option>
1381 <option value="f1_macro">Classification -- 'f1_macro'</option>
1382 <option value="f1_weighted">Classification -- 'f1_weighted'</option>
1383 <option value="f1_samples">Classification -- 'f1_samples'</option>
1384 <option value="neg_log_loss">Classification -- 'neg_log_loss'</option>
1385 <option value="precision">Classification -- 'precision'</option>
1386 <option value="precision_micro">Classification -- 'precision_micro'</option>
1387 <option value="precision_macro">Classification -- 'precision_macro'</option>
1388 <option value="precision_wighted">Classification -- 'precision_wighted'</option>
1389 <option value="precision_samples">Classification -- 'precision_samples'</option>
1390 <option value="recall">Classification -- 'recall'</option>
1391 <option value="recall_micro">Classification -- 'recall_micro'</option>
1392 <option value="recall_macro">Classification -- 'recall_macro'</option>
1393 <option value="recall_wighted">Classification -- 'recall_wighted'</option>
1394 <option value="recall_samples">Classification -- 'recall_samples'</option>
1395 <option value="roc_auc">Classification -- 'roc_auc'</option>
1396 </param>
1397 </xml>
1398
1399 <xml name="secondary_scoring_selection_regression">
1400 <param name="secondary_scoring" type="select" multiple="true" label="Additional scoring used in multi-metric mode:" help="If the same metric with the primary is chosen, the metric will be ignored.">
1401 <option value="explained_variance">Regression -- 'explained_variance'</option>
1402 <option value="neg_mean_absolute_error">Regression -- 'neg_mean_absolute_error'</option>
1403 <option value="neg_mean_squared_error">Regression -- 'neg_mean_squared_error'</option>
1404 <option value="neg_mean_squared_log_error">Regression -- 'neg_mean_squared_log_error'</option>
1405 <option value="neg_median_absolute_error">Regression -- 'neg_median_absolute_error'</option>
1406 <option value="r2">Regression -- 'r2'</option>
1407 <option value="max_error">Regression -- 'max_error'</option>
1408 <option value="spearman_correlation">Regression -- Spearman's rank correlation coefficient</option>
1409 </param>
1410 </xml>
1411
1412 <xml name="secondary_scoring_selection_anormaly">
1413 <param name="secondary_scoring" type="select" multiple="true" label="Additional scoring used in multi-metric mode:" help="If the same metric with the primary is chosen, the metric will be ignored.">
1414 <expand macro="scoring_selection_options" />
1415 </param>
1416 </xml>
1417
1418 <xml name="pre_dispatch" token_type="hidden" token_default_value="all" token_help="Number of predispatched jobs for parallel execution">
1419 <param argument="pre_dispatch" type="@TYPE@" value="@DEFAULT_VALUE@" optional="true" label="pre_dispatch" help="@HELP@" />
1420 </xml>
1421
1422 <xml name="estimator_and_hyperparameter">
1423 <param name="infile_estimator" type="data" format="h5mlm" label="Choose the dataset containing pipeline/estimator object" />
1424 <section name="hyperparams_swapping" title="Hyperparameter Swapping" expanded="false">
1425 <repeat name="param_set" min="1" max="30" title="New hyperparameter setting">
1426 <param name="sp_name" type="select" optional="true" label="Choose a parameter name (with current value)">
1427 <options from_dataset="infile_estimator" meta_file_key="hyper_params" startswith="@">
1428 <column name="name" index="2" />
1429 <column name="value" index="1" />
1430 <filter type="unique_value" name="unique_param" column="1" />
1431 </options>
1432 </param>
1433 <param name="sp_value" type="text" value="" optional="true" label="New value" help="Supports int, float, boolean, single quoted string, and selected object constructor. Similar to the `Parameter settings for search` section in `searchcv` tool except that only single value is expected here.">
1434 <sanitizer>
1435 <valid initial="default">
1436 <add value="&apos;" />
1437 <add value="&quot;" />
1438 </valid>
1439 </sanitizer>
1440 </param>
1441 </repeat>
1442 </section>
1443 </xml>
1444
1445 <xml name="search_cv_options">
1446 <expand macro="scoring_selection" />
1447 <expand macro="model_validation_common_options" />
1448 <!--expand macro="pre_dispatch" default_value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/-->
1449 <!--param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="If True, data is identically distributed across the folds" />-->
1450 <!--param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset. Be aware that `refit=True` invokes extra computation, but it's REQUIRED for outputting the best estimator!" /> -->
1451 <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to NaN if an error occurs in estimator fitting and FitFailedWarning is raised." />
1452 <param argument="return_train_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="return_train_score" help="" />
1453 </xml>
1454
1455 <xml name="estimator_module_options">
1456 <option value="svm" selected="true">sklearn.svm</option>
1457 <option value="linear_model">sklearn.linear_model</option>
1458 <option value="ensemble">sklearn.ensemble</option>
1459 <option value="naive_bayes">sklearn.naive_bayes</option>
1460 <option value="tree">sklearn.tree</option>
1461 <option value="neighbors">sklearn.neighbors</option>
1462 <option value="xgboost">xgboost</option>
1463 <yield />
1464 </xml>
1465
1466 <xml name="estimator_suboptions">
1467 <when value="svm">
1468 <param name="selected_estimator" type="select" label="Choose estimator class:">
1469 <option value="LinearSVC" selected="true">LinearSVC</option>
1470 <option value="LinearSVR">LinearSVR</option>
1471 <option value="NuSVC">NuSVC</option>
1472 <option value="NuSVR">NuSVR</option>
1473 <option value="OneClassSVM">OneClassSVM</option>
1474 <option value="SVC">SVC</option>
1475 <option value="SVR">SVR</option>
1476 </param>
1477 <expand macro="estimator_params_text" />
1478 </when>
1479 <when value="linear_model">
1480 <param name="selected_estimator" type="select" label="Choose estimator class:">
1481 <option value="ARDRegression" selected="true">ARDRegression</option>
1482 <option value="BayesianRidge">BayesianRidge</option>
1483 <option value="ElasticNet">ElasticNet</option>
1484 <option value="ElasticNetCV">ElasticNetCV</option>
1485 <option value="HuberRegressor">HuberRegressor</option>
1486 <option value="Lars">Lars</option>
1487 <option value="LarsCV">LarsCV</option>
1488 <option value="Lasso">Lasso</option>
1489 <option value="LassoCV">LassoCV</option>
1490 <option value="LassoLars">LassoLars</option>
1491 <option value="LassoLarsCV">LassoLarsCV</option>
1492 <option value="LassoLarsIC">LassoLarsIC</option>
1493 <option value="LinearRegression">LinearRegression</option>
1494 <option value="LogisticRegression">LogisticRegression</option>
1495 <option value="LogisticRegressionCV">LogisticRegressionCV</option>
1496 <option value="MultiTaskLasso">MultiTaskLasso</option>
1497 <option value="MultiTaskElasticNet">MultiTaskElasticNet</option>
1498 <option value="MultiTaskLassoCV">MultiTaskLassoCV</option>
1499 <option value="MultiTaskElasticNetCV">MultiTaskElasticNetCV</option>
1500 <option value="OrthogonalMatchingPursuit">OrthogonalMatchingPursuit</option>
1501 <option value="OrthogonalMatchingPursuitCV">OrthogonalMatchingPursuitCV</option>
1502 <option value="PassiveAggressiveClassifier">PassiveAggressiveClassifier</option>
1503 <option value="PassiveAggressiveRegressor">PassiveAggressiveRegressor</option>
1504 <option value="Perceptron">Perceptron</option>
1505 <option value="RANSACRegressor">RANSACRegressor</option>
1506 <option value="Ridge">Ridge</option>
1507 <option value="RidgeClassifier">RidgeClassifier</option>
1508 <option value="RidgeClassifierCV">RidgeClassifierCV</option>
1509 <option value="RidgeCV">RidgeCV</option>
1510 <option value="SGDClassifier">SGDClassifier</option>
1511 <option value="SGDRegressor">SGDRegressor</option>
1512 <option value="TheilSenRegressor">TheilSenRegressor</option>
1513 </param>
1514 <expand macro="estimator_params_text" />
1515 </when>
1516 <when value="ensemble">
1517 <param name="selected_estimator" type="select" label="Choose estimator class:">
1518 <option value="AdaBoostClassifier" selected="true">AdaBoostClassifier</option>
1519 <option value="AdaBoostRegressor">AdaBoostRegressor</option>
1520 <option value="BaggingClassifier">BaggingClassifier</option>
1521 <option value="BaggingRegressor">BaggingRegressor</option>
1522 <option value="ExtraTreesClassifier">ExtraTreesClassifier</option>
1523 <option value="ExtraTreesRegressor">ExtraTreesRegressor</option>
1524 <option value="GradientBoostingClassifier">GradientBoostingClassifier</option>
1525 <option value="GradientBoostingRegressor">GradientBoostingRegressor</option>
1526 <option value="IsolationForest">IsolationForest</option>
1527 <option value="HistGradientBoostingClassifier">HistGradientBoostingClassifier</option>
1528 <option value="HistGradientBoostingRegressor">HistGradientBoostingRegressor</option>
1529 <option value="RandomForestClassifier">RandomForestClassifier</option>
1530 <option value="RandomForestRegressor">RandomForestRegressor</option>
1531 <option value="RandomTreesEmbedding">RandomTreesEmbedding</option>
1532 <!--option value="VotingClassifier">VotingClassifier</option-->
1533 </param>
1534 <expand macro="estimator_params_text" />
1535 </when>
1536 <when value="naive_bayes">
1537 <param name="selected_estimator" type="select" label="Choose estimator class:">
1538 <option value="BernoulliNB" selected="true">BernoulliNB</option>
1539 <option value="GaussianNB">GaussianNB</option>
1540 <option value="MultinomialNB">MultinomialNB</option>
1541 </param>
1542 <expand macro="estimator_params_text" />
1543 </when>
1544 <when value="tree">
1545 <param name="selected_estimator" type="select" label="Choose estimator class:">
1546 <option value="DecisionTreeClassifier" selected="true">DecisionTreeClassifier</option>
1547 <option value="DecisionTreeRegressor">DecisionTreeRegressor</option>
1548 <option value="ExtraTreeClassifier">ExtraTreeClassifier</option>
1549 <option value="ExtraTreeRegressor">ExtraTreeRegressor</option>
1550 </param>
1551 <expand macro="estimator_params_text" />
1552 </when>
1553 <when value="neighbors">
1554 <param name="selected_estimator" type="select" label="Choose estimator class:">
1555 <option value="KNeighborsClassifier" selected="true">KNeighborsClassifier</option>
1556 <option value="KNeighborsRegressor">KNeighborsRegressor</option>
1557 <!--option value="BallTree">BallTree</option-->
1558 <!--option value="KDTree">KDTree</option-->
1559 <option value="KernelDensity">KernelDensity</option>
1560 <option value="LocalOutlierFactor">LocalOutlierFactor</option>
1561 <option value="RadiusNeighborsClassifier">RadiusNeighborsClassifier</option>
1562 <option value="RadiusNeighborsRegressor">RadiusNeighborsRegressor</option>
1563 <option value="NearestCentroid">NearestCentroid</option>
1564 <option value="NearestNeighbors">NearestNeighbors</option>
1565 </param>
1566 <expand macro="estimator_params_text" />
1567 </when>
1568 <when value="xgboost">
1569 <param name="selected_estimator" type="select" label="Choose estimator class:">
1570 <option value="XGBRegressor" selected="true">XGBRegressor</option>
1571 <option value="XGBClassifier">XGBClassifier</option>
1572 </param>
1573 <expand macro="estimator_params_text" />
1574 </when>
1575 <yield />
1576 </xml>
1577
1578 <xml name="estimator_selector_all">
1579 <conditional name="estimator_selector">
1580 <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >
1581 <expand macro="estimator_module_options" />
1582 </param>
1583 <expand macro="estimator_suboptions" />
1584 </conditional>
1585 </xml>
1586
1587 <xml name="estimator_selector_fs">
1588 <conditional name="estimator_selector">
1589 <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >
1590 <expand macro="estimator_module_options">
1591 <option value="custom_estimator">Load a custom estimator</option>
1592 </expand>
1593 </param>
1594 <expand macro="estimator_suboptions">
1595 <when value="custom_estimator">
1596 <param name="c_estimator" type="data" format="h5mlm" label="Choose the dataset containing the custom estimator or pipeline:" />
383 </when> 1597 </when>
384 <when value="all_columns"> 1598 </expand>
385 </when> 1599 </conditional>
386 </xml> 1600 </xml>
387 1601
388 <xml name="clf_inputs_extended" token_label1=" " token_label2=" " token_multiple="False"> 1602 <xml name="estimator_params_text" token_label="Type in parameter settings if different from default:" token_default_value=''
389 <conditional name="true_columns"> 1603 token_help="Dictionary-capable, e.g., C=1, kernel='linear'. No double quotes. Leave this box blank for default estimator.">
390 <param name="selected_input1" type="select" label="Select the input type of true labels dataset:"> 1604 <param name="text_params" type="text" value="@DEFAULT_VALUE@" optional="true" label="@LABEL@" help="@HELP@">
391 <option value="tabular" selected="true">Tabular</option> 1605 <sanitizer>
392 <option value="sparse">Sparse</option> 1606 <valid initial="default">
393 </param> 1607 <add value="&apos;" />
394 <when value="tabular"> 1608 </valid>
395 <param name="infile1" type="data" label="@LABEL1@" /> 1609 </sanitizer>
396 <param name="col1" type="data_column" data_ref="infile1" label="Select the target column:" /> 1610 </param>
397 </when> 1611 </xml>
398 <when value="sparse"> 1612
399 <param name="infile1" type="data" format="txt" label="@LABEL1@" /> 1613 <xml name="kernel_approximation_all">
400 </when> 1614 <conditional name="kernel_approximation_selector">
401 </conditional> 1615 <param name="select_algorithm" type="select" label="Choose a kernel approximation algorithm:">
402 <conditional name="predicted_columns"> 1616 <option value="Nystroem" selected="true">Nystroem</option>
403 <param name="selected_input2" type="select" label="Select the input type of predicted labels dataset:"> 1617 <option value="RBFSampler">RBFSampler</option>
404 <option value="tabular" selected="true">Tabular</option> 1618 <option value="AdditiveChi2Sampler">AdditiveChi2Sampler</option>
405 <option value="sparse">Sparse</option> 1619 <option value="SkewedChi2Sampler">SkewedChi2Sampler</option>
406 </param> 1620 </param>
407 <when value="tabular"> 1621 <when value="Nystroem">
408 <param name="infile2" type="data" label="@LABEL2@" /> 1622 <expand macro="estimator_params_text"
409 <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):" /> 1623 help="Default(=blank): coef0=None, degree=None, gamma=None, kernel='rbf', kernel_params=None, n_components=100, random_state=None. No double quotes" />
410 </when> 1624 </when>
411 <when value="sparse"> 1625 <when value="RBFSampler">
412 <param name="infile2" type="data" format="txt" label="@LABEL1@" /> 1626 <expand macro="estimator_params_text"
413 </when> 1627 help="Default(=blank): gamma=1.0, n_components=100, random_state=None." />
414 </conditional> 1628 </when>
415 </xml> 1629 <when value="AdditiveChi2Sampler">
416 1630 <expand macro="estimator_params_text"
417 <xml name="clf_inputs" token_label1="Dataset containing true labels (tabular):" token_label2="Dataset containing predicted values (tabular):" token_multiple1="False" token_multiple="False"> 1631 help="Default(=blank): sample_interval=None, sample_steps=2." />
418 <param name="infile1" type="data" format="tabular" label="@LABEL1@" /> 1632 </when>
419 <param name="header1" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> 1633 <when value="SkewedChi2Sampler">
420 <conditional name="column_selector_options_1"> 1634 <expand macro="estimator_params_text"
421 <expand macro="samples_column_selector_options" multiple="@MULTIPLE1@" /> 1635 help="Default(=blank): n_components=100, random_state=None, skewedness=1.0." />
422 </conditional> 1636 </when>
423 <param name="infile2" type="data" format="tabular" label="@LABEL2@" /> 1637 </conditional>
424 <param name="header2" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> 1638 </xml>
425 <conditional name="column_selector_options_2"> 1639
426 <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="@MULTIPLE@" infile="infile2" /> 1640 <xml name="matrix_decomposition_all">
427 </conditional> 1641 <conditional name="matrix_decomposition_selector">
428 </xml> 1642 <param name="select_algorithm" type="select" label="Choose a matrix decomposition algorithm:">
429 1643 <option value="DictionaryLearning" selected="true">DictionaryLearning</option>
430 <xml name="multiple_input" token_name="input_files" token_max_num="10" token_format="txt" token_label="Sparse matrix file (.mtx, .txt)" token_help_text="Specify a sparse matrix file in .txt format."> 1644 <option value="FactorAnalysis">FactorAnalysis</option>
431 <repeat name="@NAME@" min="1" max="@MAX_NUM@" title="Select input file(s):"> 1645 <option value="FastICA">FastICA</option>
432 <param name="input" type="data" format="@FORMAT@" label="@LABEL@" help="@HELP_TEXT@" /> 1646 <option value="IncrementalPCA">IncrementalPCA</option>
433 </repeat> 1647 <option value="KernelPCA">KernelPCA</option>
434 </xml> 1648 <option value="LatentDirichletAllocation">LatentDirichletAllocation</option>
435 1649 <option value="MiniBatchDictionaryLearning">MiniBatchDictionaryLearning</option>
436 <xml name="sparse_target" token_label1="Select a sparse matrix:" token_label2="Select the tabular containing true labels:" token_multiple="False" token_format1="txt" token_format2="tabular" token_help1="" token_help2=""> 1650 <option value="MiniBatchSparsePCA">MiniBatchSparsePCA</option>
437 <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@" /> 1651 <option value="NMF">NMF</option>
438 <expand macro="input_tabular_target" /> 1652 <option value="PCA">PCA</option>
439 </xml> 1653 <option value="SparsePCA">SparsePCA</option>
440 1654 <!--option value="SparseCoder">SparseCoder</option-->
441 <xml name="sl_mixed_input"> 1655 <option value="TruncatedSVD">TruncatedSVD</option>
442 <conditional name="input_options"> 1656 </param>
443 <expand macro="data_input_options" /> 1657 <when value="DictionaryLearning">
444 <expand macro="data_input_whens" /> 1658 <expand macro="estimator_params_text"
445 </conditional> 1659 help="Default(=blank): alpha=1, code_init=None, dict_init=None, fit_algorithm='lars', max_iter=1000, n_components=None, random_state=None, split_sign=False, tol=1e-08, transform_algorithm='omp', transform_alpha=None, transform_n_nonzero_coefs=None, verbose=False." />
446 </xml> 1660 </when>
447 1661 <when value="FactorAnalysis">
448 <xml name="sl_mixed_input_plus_sequence"> 1662 <expand macro="estimator_params_text"
449 <conditional name="input_options"> 1663 help="Default(=blank): copy=True, iterated_power=3, max_iter=1000, n_components=None, noise_variance_init=None, random_state=0, svd_method='randomized', tol=0.01." />
450 <expand macro="data_input_options"> 1664 </when>
451 <option value="seq_fasta">sequnences in a fasta file</option> 1665 <when value="FastICA">
452 <option value="refseq_and_interval">reference genome and intervals</option> 1666 <expand macro="estimator_params_text"
453 </expand> 1667 help="Default(=blank): algorithm='parallel', fun='logcosh', fun_args=None, max_iter=200, n_components=None, random_state=None, tol=0.0001, w_init=None, whiten=True. No double quotes." />
454 <expand macro="data_input_whens"> 1668 </when>
455 <when value="seq_fasta"> 1669 <when value="IncrementalPCA">
456 <expand macro="inputs_seq_fasta" /> 1670 <expand macro="estimator_params_text"
457 </when> 1671 help="Default(=blank): batch_size=None, copy=True, n_components=None, whiten=False." />
458 <when value="refseq_and_interval"> 1672 </when>
459 <expand macro="inputs_refseq_and_interval" /> 1673 <when value="KernelPCA">
460 </when> 1674 <expand macro="estimator_params_text"
461 </expand> 1675 help="Default(=blank): alpha=1.0, coef0=1, copy_X=True, degree=3, eigen_solver='auto', fit_inverse_transform=False, gamma=None, kernel='linear', kernel_params=None, max_iter=None, n_components=None, random_state=None, remove_zero_eig=False, tol=0. No double quotes." />
462 </conditional> 1676 </when>
463 </xml> 1677 <when value="LatentDirichletAllocation">
464 1678 <expand macro="estimator_params_text"
465 <xml name="data_input_options"> 1679 help="Default(=blank): batch_size=128, doc_topic_prior=None, evaluate_every=-1, learning_decay=0.7, learning_method=None, learning_offset=10.0, max_doc_update_iter=100, max_iter=10, mean_change_tol=0.001, n_components=10, n_topics=None, perp_tol=0.1, random_state=None, topic_word_prior=None, total_samples=1000000.0, verbose=0." />
466 <param name="selected_input" type="select" label="Select input type:"> 1680 </when>
467 <option value="tabular" selected="true">tabular data</option> 1681 <when value="MiniBatchDictionaryLearning">
468 <option value="sparse">sparse matrix</option> 1682 <expand macro="estimator_params_text"
469 <yield /> 1683 help="Default(=blank): alpha=1, batch_size=3, dict_init=None, fit_algorithm='lars', n_components=None, n_iter=1000, random_state=None, shuffle=True, split_sign=False, transform_algorithm='omp', transform_alpha=None, transform_n_nonzero_coefs=None, verbose=False." />
470 </param> 1684 </when>
471 </xml> 1685 <when value="MiniBatchSparsePCA">
472 1686 <expand macro="estimator_params_text"
473 <xml name="data_input_whens"> 1687 help="Default(=blank): alpha=1, batch_size=3, callback=None, method='lars', n_components=None, n_iter=100, random_state=None, ridge_alpha=0.01, shuffle=True, verbose=False." />
474 <when value="tabular"> 1688 </when>
475 <expand macro="samples_tabular" multiple1="true" multiple2="false" /> 1689 <when value="NMF">
476 </when> 1690 <expand macro="estimator_params_text"
477 <when value="sparse"> 1691 help="Default(=blank): alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=200, n_components=None, random_state=None, shuffle=False, solver='cd', tol=0.0001, verbose=0." />
478 <expand macro="sparse_target" /> 1692 </when>
479 </when> 1693 <when value="PCA">
480 <yield /> 1694 <expand macro="estimator_params_text"
481 </xml> 1695 help="Default(=blank): copy=True, iterated_power='auto', n_components=None, random_state=None, svd_solver='auto', tol=0.0, whiten=False." />
482 1696 </when>
483 <xml name="input_tabular_target"> 1697 <when value="SparsePCA">
484 <param name="infile2" type="data" format="tabular" label="Dataset containing class labels or target values:" /> 1698 <expand macro="estimator_params_text"
485 <param name="header2" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" /> 1699 help="Default(=blank): U_init=None, V_init=None, alpha=1, max_iter=1000, method='lars', n_components=None, random_state=None, ridge_alpha=0.01, tol=1e-08, verbose=False." />
486 <conditional name="column_selector_options_2"> 1700 </when>
487 <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="false" infile="infile2" /> 1701 <when value="TruncatedSVD">
488 </conditional> 1702 <expand macro="estimator_params_text"
489 </xml> 1703 help="Default(=blank): algorithm='randomized', n_components=2, n_iter=5, random_state=None, tol=0.0." />
490 1704 </when>
491 <xml name="inputs_seq_fasta"> 1705 </conditional>
492 <param name="fasta_path" type="data" format="fasta" label="Dataset containing fasta genomic/protein sequences" help="Sequences will be one-hot encoded to arrays." /> 1706 </xml>
493 <expand macro="input_tabular_target" /> 1707
494 </xml> 1708 <xml name="FeatureAgglomeration">
495 1709 <conditional name="FeatureAgglomeration_selector">
496 <xml name="inputs_refseq_and_interval"> 1710 <param name="select_algorithm" type="select" label="Choose the algorithm:">
497 <param name="ref_genome_file" type="data" format="fasta" label="Dataset containing reference genomic sequence" /> 1711 <option value="FeatureAgglomeration" selected="true">FeatureAgglomeration</option>
498 <param name="interval_file" type="data" format="interval" label="Dataset containing sequence intervals for training" help="interval. Sequences will be retrieved from the reference genome and one-hot encoded to training arrays." /> 1712 </param>
499 <param name="target_file" type="data" format="bed" label="Dataset containing positions and features for target values." help="bed. The file will be compressed with `bgzip` and then indexed using `tabix`." /> 1713 <when value="FeatureAgglomeration">
500 <param name="infile2" type="data" format="tabular" label="Dataset containing the feature list for prediction" /> 1714 <expand macro="estimator_params_text"
501 <param name="header2" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" /> 1715 help="Default(=blank): affinity='euclidean', compute_full_tree='auto', connectivity=None, linkage='ward', memory=None, n_clusters=2, pooling_func=np.mean." />
502 <conditional name="column_selector_options_2"> 1716 </when>
503 <expand macro="samples_column_selector_options" column_option="selected_column_selector_option2" col_name="col2" multiple="true" infile="infile2" /> 1717 </conditional>
504 </conditional> 1718 </xml>
505 </xml> 1719
506 1720 <xml name="skrebate">
507 <!--Advanced options--> 1721 <conditional name="skrebate_selector">
508 <xml name="nn_advanced_options"> 1722 <param name="select_algorithm" type="select" label="Choose the algorithm:">
509 <section name="options" title="Advanced Options" expanded="False"> 1723 <option value="ReliefF">ReliefF</option>
510 <yield /> 1724 <option value="SURF">SURF</option>
511 <param argument="weights" type="select" label="Weight function" help="Used in prediction."> 1725 <option value="SURFstar">SURFstar</option>
512 <option value="uniform" selected="true">Uniform weights. All points in each neighborhood are weighted equally. (Uniform)</option> 1726 <option value="MultiSURF">MultiSURF</option>
513 <option value="distance">Weight points by the inverse of their distance. (Distance)</option> 1727 <option value="MultiSURFstar">MultiSURFstar</option>
514 </param> 1728 <!--option value="TuRF">TuRF</option> -->
515 <param argument="algorithm" type="select" label="Neighbor selection algorithm" help=" "> 1729 </param>
516 <option value="auto" selected="true">Auto</option> 1730 <when value="ReliefF">
517 <option value="ball_tree">BallTree</option> 1731 <expand macro="estimator_params_text"
518 <option value="kd_tree">KDTree</option> 1732 help="Default(=blank): discrete_threshold=10, n_features_to_select=10, n_neighbors=100, verbose=False." />
519 <option value="brute">Brute-force</option> 1733 </when>
520 </param> 1734 <when value="SURF">
521 <param argument="leaf_size" type="integer" value="30" label="Leaf size" help="Used with BallTree and KDTree. Affects the time and memory usage of the constructed tree." /> 1735 <expand macro="estimator_params_text"
522 <!--param name="metric"--> 1736 help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False." />
523 <!--param name="p"--> 1737 </when>
524 <!--param name="metric_params"--> 1738 <when value="SURFstar">
525 </section> 1739 <expand macro="estimator_params_text"
526 </xml> 1740 help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False." />
527 1741 </when>
528 <xml name="svc_advanced_options"> 1742 <when value="MultiSURF">
529 <section name="options" title="Advanced Options" expanded="False"> 1743 <expand macro="estimator_params_text"
530 <yield /> 1744 help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False." />
531 <param argument="kernel" type="select" optional="true" label="Kernel type" help="Kernel type to be used in the algorithm. If none is given, ‘rbf’ will be used."> 1745 </when>
532 <option value="rbf" selected="true">rbf</option> 1746 <when value="MultiSURFstar">
533 <option value="linear">linear</option> 1747 <expand macro="estimator_params_text"
534 <option value="poly">poly</option> 1748 help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False." />
535 <option value="sigmoid">sigmoid</option> 1749 </when>
536 <option value="precomputed">precomputed</option> 1750 <!--when value="TuRF">
537 </param> 1751 <expand macro="estimator_params_text"
538 <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 " /> 1752 help="Default(=blank): core_algorithm='ReliefF', discrete_threshold=10, n_features_to_select=10, n_neighbors=100, pct=0.5, verbose=False." />
539 <!--TODO: param argument="gamma" float, optional (default=’auto’) -->
540 <param argument="coef0" type="float" optional="true" value="0.0" label="Zero coefficient (polynomial and sigmoid kernels only)" help="Independent term in kernel function. dafault: 0.0 " />
541 <param argument="shrinking" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use the shrinking heuristic" help=" " />
542 <param argument="probability" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Enable probability estimates. " help="This must be enabled prior to calling fit, and will slow down that method." />
543 <!-- param argument="cache_size"-->
544 <!--expand macro="class_weight"/-->
545 <expand macro="tol" default_value="0.001" help_text="Tolerance for stopping criterion. " />
546 <expand macro="max_iter" default_value="-1" label="Solver maximum number of iterations" help_text="Hard limit on iterations within solver, or -1 for no limit." />
547 <!--param argument="decision_function_shape"-->
548 <expand macro="random_state" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data for probability estimation. A fixed seed allows reproducible results." />
549 </section>
550 </xml>
551
552 <xml name="spectral_clustering_advanced_options">
553 <section name="options" title="Advanced Options" expanded="False">
554 <expand macro="n_clusters" />
555 <param argument="eigen_solver" type="select" value="" label="Eigen solver" help="The eigenvalue decomposition strategy to use.">
556 <option value="arpack" selected="true">arpack</option>
557 <option value="lobpcg">lobpcg</option>
558 <option value="amg">amg</option>
559 <!--None-->
560 </param>
561 <expand macro="random_state" />
562 <expand macro="n_init" />
563 <param argument="gamma" type="float" optional="true" value="1.0" label="Kernel scaling factor" help="Scaling factor of RBF, polynomial, exponential chi^2 and sigmoid affinity kernel. Ignored for affinity=''nearest_neighbors''." />
564 <param argument="affinity" type="select" label="Affinity" help="Affinity kernel to use. ">
565 <option value="rbf" selected="true">RBF</option>
566 <option value="precomputed">precomputed</option>
567 <option value="nearest_neighbors">Nearset neighbors</option>
568 </param>
569 <param argument="n_neighbors" type="integer" optional="true" value="10" label="Number of neighbors" help="Number of neighbors to use when constructing the affinity matrix using the nearest neighbors method. Ignored for affinity=''rbf''" />
570 <!--param argument="eigen_tol"-->
571 <param argument="assign_labels" type="select" label="Assign labels" help="The strategy to use to assign labels in the embedding space.">
572 <option value="kmeans" selected="true">kmeans</option>
573 <option value="discretize">discretize</option>
574 </param>
575 <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 " />
576 <param argument="coef0" type="integer" optional="true" value="1" label="Zero coefficient (polynomial and sigmoid kernels only)" help="Ignored by other kernels. dafault : 1 " />
577 <!--param argument="kernel_params"-->
578 </section>
579 </xml>
580
581 <xml name="minibatch_kmeans_advanced_options">
582 <section name="options" title="Advanced Options" expanded="False">
583 <expand macro="n_clusters" />
584 <expand macro="init" />
585 <expand macro="n_init" default_value="3" />
586 <expand macro="max_iter" default_value="100" />
587 <expand macro="tol" help_text="Early stopping heuristics based on normalized center change. To disable set to 0.0 ." />
588 <expand macro="random_state" />
589 <param argument="batch_size" type="integer" optional="true" value="100" label="Batch size" help="Size of the mini batches." />
590 <!--param argument="compute_labels"-->
591 <param argument="max_no_improvement" type="integer" optional="true" value="10" label="Maximum number of improvement attempts" help="
592 Convergence detection based on inertia (the consecutive number of mini batches that doe not yield an improvement on the smoothed inertia).
593 To disable, set max_no_improvement to None. " />
594 <param argument="init_size" type="integer" optional="true" value="" label="Number of random initialization samples" help="Number of samples to randomly sample for speeding up the initialization . ( default: 3 * batch_size )" />
595 <param argument="reassignment_ratio" type="float" optional="true" value="0.01" label="Re-assignment ratio" help="Controls the fraction of the maximum number of counts for a center to be reassigned. Higher values yield better clustering results." />
596 </section>
597 </xml>
598
599 <xml name="kmeans_advanced_options">
600 <section name="options" title="Advanced Options" expanded="False">
601 <expand macro="n_clusters" />
602 <expand macro="init" />
603 <expand macro="n_init" />
604 <expand macro="max_iter" />
605 <expand macro="tol" default_value="0.0001" help_text="Relative tolerance with regards to inertia to declare convergence." />
606 <!--param argument="precompute_distances"/-->
607 <expand macro="random_state" />
608 <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean." />
609 <expand macro="kmeans_algorithm" />
610 </section>
611 </xml>
612
613 <xml name="kmeans_algorithm">
614 <param argument="algorithm" type="select" label="K-means algorithm to use:">
615 <option value="auto" selected="true">auto</option>
616 <option value="full">full</option>
617 <option value="elkan">elkan</option>
618 </param>
619 </xml>
620
621 <xml name="birch_advanced_options">
622 <section name="options" title="Advanced Options" expanded="False">
623 <param argument="threshold" type="float" optional="true" value="0.5" label="Subcluster radius threshold" help="The radius of the subcluster obtained by merging a new sample; the closest subcluster should be less than the threshold to avoid a new subcluster." />
624 <param argument="branching_factor" type="integer" optional="true" value="50" label="Maximum number of subclusters per branch" help="Maximum number of CF subclusters in each node." />
625 <expand macro="n_clusters" default_value="3" />
626 <!--param argument="compute_labels"/-->
627 </section>
628 </xml>
629
630 <xml name="dbscan_advanced_options">
631 <section name="options" title="Advanced Options" expanded="False">
632 <param argument="eps" type="float" optional="true" value="0.5" label="Maximum neighborhood distance" help="The maximum distance between two samples for them to be considered as in the same neighborhood." />
633 <param argument="min_samples" type="integer" optional="true" value="5" label="Minimal core point density" help="The number of samples (or total weight) in a neighborhood for a point (including the point itself) to be considered as a core point." />
634 <param argument="metric" type="text" optional="true" value="euclidean" label="Metric" help="The metric to use when calculating distance between instances in a feature array." />
635 <param argument="algorithm" type="select" label="Pointwise distance computation algorithm" help="The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors.">
636 <option value="auto" selected="true">auto</option>
637 <option value="ball_tree">ball_tree</option>
638 <option value="kd_tree">kd_tree</option>
639 <option value="brute">brute</option>
640 </param>
641 <param argument="leaf_size" type="integer" optional="true" value="30" label="Leaf size" help="Leaf size passed to BallTree or cKDTree. Memory and time efficieny factor in tree constrution and querying." />
642 </section>
643 </xml>
644
645 <xml name="clustering_algorithms_options">
646 <conditional name="algorithm_options">
647 <param name="selected_algorithm" type="select" label="Clustering Algorithm">
648 <option value="KMeans" selected="true">KMeans</option>
649 <option value="SpectralClustering">Spectral Clustering</option>
650 <option value="MiniBatchKMeans">Mini Batch KMeans</option>
651 <option value="DBSCAN">DBSCAN</option>
652 <option value="Birch">Birch</option>
653 </param>
654 <when value="KMeans">
655 <expand macro="kmeans_advanced_options" />
656 </when>
657 <when value="DBSCAN">
658 <expand macro="dbscan_advanced_options" />
659 </when>
660 <when value="Birch">
661 <expand macro="birch_advanced_options" />
662 </when>
663 <when value="SpectralClustering">
664 <expand macro="spectral_clustering_advanced_options" />
665 </when>
666 <when value="MiniBatchKMeans">
667 <expand macro="minibatch_kmeans_advanced_options" />
668 </when>
669 </conditional>
670 </xml>
671
672 <xml name="distance_metrics">
673 <param argument="metric" type="select" label="Distance metric" help=" ">
674 <option value="euclidean" selected="true">euclidean</option>
675 <option value="cityblock">cityblock</option>
676 <option value="cosine">cosine</option>
677 <option value="l1">l1</option>
678 <option value="l2">l2</option>
679 <option value="manhattan">manhattan</option>
680 <yield />
681 </param>
682 </xml>
683
684 <xml name="distance_nonsparse_metrics">
685 <option value="braycurtis">braycurtis</option>
686 <option value="canberra">canberra</option>
687 <option value="chebyshev">chebyshev</option>
688 <option value="correlation">correlation</option>
689 <option value="dice">dice</option>
690 <option value="hamming">hamming</option>
691 <option value="jaccard">jaccard</option>
692 <option value="kulsinski">kulsinski</option>
693 <option value="mahalanobis">mahalanobis</option>
694 <option value="matching">matching</option>
695 <option value="minkowski">minkowski</option>
696 <option value="rogerstanimoto">rogerstanimoto</option>
697 <option value="russellrao">russellrao</option>
698 <option value="seuclidean">seuclidean</option>
699 <option value="sokalmichener">sokalmichener</option>
700 <option value="sokalsneath">sokalsneath</option>
701 <option value="sqeuclidean">sqeuclidean</option>
702 <option value="yule">yule</option>
703 </xml>
704
705 <xml name="pairwise_kernel_metrics">
706 <param argument="metric" type="select" label="Pirwise Kernel metric" help=" ">
707 <option value="rbf" selected="true">rbf</option>
708 <option value="sigmoid">sigmoid</option>
709 <option value="polynomial">polynomial</option>
710 <option value="linear" selected="true">linear</option>
711 <option value="chi2">chi2</option>
712 <option value="additive_chi2">additive_chi2</option>
713 </param>
714 </xml>
715
716 <xml name="sparse_pairwise_metric_functions">
717 <param name="selected_metric_function" type="select" label="Select the pairwise metric you want to compute:">
718 <option value="euclidean_distances" selected="true">Euclidean distance matrix</option>
719 <option value="pairwise_distances">Distance matrix</option>
720 <option value="pairwise_distances_argmin">Minimum distances between one point and a set of points</option>
721 <yield />
722 </param>
723 </xml>
724
725 <xml name="pairwise_metric_functions">
726 <option value="additive_chi2_kernel">Additive chi-squared kernel</option>
727 <option value="chi2_kernel">Exponential chi-squared kernel</option>
728 <option value="linear_kernel">Linear kernel</option>
729 <option value="manhattan_distances">L1 distances</option>
730 <option value="pairwise_kernels">Kernel</option>
731 <option value="polynomial_kernel">Polynomial kernel</option>
732 <option value="rbf_kernel">Gaussian (rbf) kernel</option>
733 <option value="laplacian_kernel">Laplacian kernel</option>
734 </xml>
735
736 <xml name="sparse_pairwise_condition">
737 <when value="pairwise_distances">
738 <section name="options" title="Advanced Options" expanded="False">
739 <expand macro="distance_metrics">
740 <yield />
741 </expand>
742 </section>
743 </when>
744 <when value="euclidean_distances">
745 <section name="options" title="Advanced Options" expanded="False">
746 <param argument="squared" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Return squared Euclidean distances" help=" " />
747 </section>
748 </when>
749 </xml>
750
751 <xml name="argmin_distance_condition">
752 <when value="pairwise_distances_argmin">
753 <section name="options" title="Advanced Options" expanded="False">
754 <param argument="axis" type="integer" optional="true" value="1" label="Axis" help="Axis along which the argmin and distances are to be computed." />
755 <expand macro="distance_metrics">
756 <yield />
757 </expand>
758 <param argument="batch_size" type="integer" optional="true" value="500" label="Batch size" help="Number of rows to be processed in each batch run." />
759 </section>
760 </when>
761 </xml>
762
763 <xml name="sparse_preprocessors">
764 <param name="selected_pre_processor" type="select" label="Select a preprocessor:">
765 <option value="StandardScaler" selected="true">Standard Scaler (Standardizes features by removing the mean and scaling to unit variance)</option>
766 <option value="Binarizer">Binarizer (Binarizes data)</option>
767 <option value="MaxAbsScaler">Max Abs Scaler (Scales features by their maximum absolute value)</option>
768 <option value="Normalizer">Normalizer (Normalizes samples individually to unit norm)</option>
769 <yield />
770 </param>
771 </xml>
772
773 <xml name="sparse_preprocessors_ext">
774 <expand macro="sparse_preprocessors">
775 <option value="KernelCenterer">Kernel Centerer (Centers a kernel matrix)</option>
776 <option value="MinMaxScaler">Minmax Scaler (Scales features to a range)</option>
777 <option value="PolynomialFeatures">Polynomial Features (Generates polynomial and interaction features)</option>
778 <option value="RobustScaler">Robust Scaler (Scales features using outlier-invariance statistics)</option>
779 <option value="QuantileTransformer">QuantileTransformer (Transform features using quantiles information)</option>
780 <option value="PowerTransformer">PowerTransformer (Apply a power transform featurewise to make data more Gaussian-like)</option>
781 <option value="KBinsDiscretizer">KBinsDiscretizer (Bin continuous data into intervals.)</option>
782 </expand>
783 </xml>
784
785 <xml name="sparse_preprocessor_options">
786 <when value="Binarizer">
787 <section name="options" title="Advanced Options" expanded="False">
788 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for precomputing binarization" help=" " />
789 <param argument="threshold" type="float" optional="true" value="0.0" label="Threshold" help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. " />
790 </section>
791 </when>
792 <when value="StandardScaler">
793 <section name="options" title="Advanced Options" expanded="False">
794 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for performing inplace scaling" help=" " />
795 <param argument="with_mean" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Center the data before scaling" help=" " />
796 <param argument="with_std" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Scale the data to unit variance (or unit standard deviation)" help=" " />
797 </section>
798 </when>
799 <when value="MaxAbsScaler">
800 <section name="options" title="Advanced Options" expanded="False">
801 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for precomputing scaling" help=" " />
802 </section>
803 </when>
804 <when value="Normalizer">
805 <section name="options" title="Advanced Options" expanded="False">
806 <param argument="norm" type="select" optional="true" label="The norm to use to normalize non zero samples" help=" ">
807 <option value="l1" selected="true">l1</option>
808 <option value="l2">l2</option>
809 <option value="max">max</option>
810 </param>
811 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for precomputing row normalization" help=" " />
812 </section>
813 </when>
814 <yield />
815 </xml>
816
817 <xml name="sparse_preprocessor_options_ext">
818 <expand macro="sparse_preprocessor_options">
819 <when value="KernelCenterer">
820 <section name="options" title="Advanced Options" expanded="False">
821 </section>
822 </when>
823 <when value="MinMaxScaler">
824 <section name="options" title="Advanced Options" expanded="False">
825 <param argument="feature_range" type="text" value="(0, 1)" optional="true" help="Desired range of transformed data. None or tuple (min, max). None equals to (0, 1)" />
826 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for precomputing normalization" help=" " />
827 </section>
828 </when>
829 <when value="PolynomialFeatures">
830 <section name="options" title="Advanced Options" expanded="False">
831 <param argument="degree" type="integer" optional="true" value="2" label="The degree of the polynomial features " help="" />
832 <param argument="interaction_only" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="false" label="Produce interaction features only" help="(Features that are products of at most degree distinct input features) " />
833 <param argument="include_bias" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Include a bias column" help="Feature in which all polynomial powers are zero " />
834 </section>
835 </when>
836 <when value="RobustScaler">
837 <section name="options" title="Advanced Options" expanded="False">
838 <!--=True, =True, copy=True-->
839 <param argument="with_centering" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Center the data before scaling" help=" " />
840 <param argument="with_scaling" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Scale the data to interquartile range" help=" " />
841 <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Use a copy of data for inplace scaling" help=" " />
842 </section>
843 </when>
844 <when value="QuantileTransformer">
845 <section name="options" title="Advanced Options" expanded="False">
846 <param name="n_quantiles" type="integer" value="1000" min="0" label="Number of quantiles to be computed" />
847 <param name="output_distribution" type="select" label="Marginal distribution for the transformed data">
848 <option value="uniform" selected="true">uniform</option>
849 <option value="normal">normal</option>
850 </param>
851 <param name="ignore_implicit_zeros" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Whether to discard sparse entries" help="Only applies to sparse matrices. If False, sparse entries are treated as zeros" />
852 <param name="subsample" type="integer" value="100000" label="Maximum number of samples used to estimate the quantiles for computational efficiency" help="Note that the subsampling procedure may differ for value-identical sparse and dense matrices." />
853 <expand macro="random_state" help_text="This is used by subsampling and smoothing noise" />
854 </section>
855 </when>
856 <when value="PowerTransformer">
857 <section name="options" title="Advanced Options" expanded="False">
858 <param name="method" type="select" label="The power transform method">
859 <option value="yeo-johnson" selected="true">yeo-johnson (works with positive and negative values)</option>
860 <option value="box-cox">box-cox (might perform better, but only works with strictly positive values)</option>
861 </param>
862 <param name="standardize" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Whether to apply zero-mean, unit-variance normalization to the transformed output." />
863 </section>
864 </when>
865 <when value="KBinsDiscretizer">
866 <section name="options" title="Advanced Options" expanded="False">
867 <param name="n_bins" type="integer" value="5" min="2" label="The number of bins to produce" />
868 <param name="encode" type="select" label="Method used to encode the transformed result">
869 <option value="onehot" selected="true">onehot (encode the transformed result with one-hot encoding and return a sparse matrix)</option>
870 <option value="onehot-dense">onehot-dense (encode the transformed result with one-hot encoding and return a dense array)</option>
871 <option value="ordinal">ordinal (return the bin identifier encoded as an integer value)</option>
872 </param>
873 <param name="strategy" type="select" label="Strategy used to define the widths of the bins">
874 <option value="uniform">uniform (all bins in each feature have identical widths)</option>
875 <option value="quantile" selected="true">quantile (all bins in each feature have the same number of points)</option>
876 <option value="kmeans">kmeans (values in each bin have the same nearest center of a 1D k-means cluster)</option>
877 </param>
878 </section>
879 </when>
880 </expand>
881 </xml>
882
883 <xml name="cv_splitter">
884 <option value="default" selected="true">default splitter</option>
885 <option value="KFold">KFold</option>
886 <option value="StratifiedKFold">StratifiedKFold</option>
887 <option value="LeaveOneOut">LeaveOneOut</option>
888 <option value="LeavePOut">LeavePOut</option>
889 <option value="RepeatedKFold">RepeatedKFold</option>
890 <option value="RepeatedStratifiedKFold">RepeatedStratifiedKFold</option>
891 <option value="ShuffleSplit">ShuffleSplit</option>
892 <option value="StratifiedShuffleSplit">StratifiedShuffleSplit</option>
893 <option value="TimeSeriesSplit">TimeSeriesSplit</option>
894 <option value="PredefinedSplit">PredefinedSplit</option>
895 <option value="OrderedKFold">OrderedKFold</option>
896 <option value="RepeatedOrderedKFold">RepeatedOrderedKFold</option>
897 <yield />
898 </xml>
899
900 <xml name="cv_splitter_options">
901 <when value="default">
902 <expand macro="cv_n_splits" />
903 </when>
904 <when value="KFold">
905 <expand macro="cv_n_splits" />
906 <expand macro="cv_shuffle" />
907 <expand macro="random_state" />
908 </when>
909 <when value="StratifiedKFold">
910 <expand macro="cv_n_splits" />
911 <expand macro="cv_shuffle" />
912 <expand macro="random_state" />
913 </when>
914 <when value="LeaveOneOut">
915 </when>
916 <when value="LeavePOut">
917 <param argument="p" type="integer" value="" label="p" help="Integer. Size of the test sets." />
918 </when>
919 <when value="RepeatedKFold">
920 <expand macro="cv_n_splits" value="5" />
921 <param argument="n_repeats" type="integer" value="10" label="n_repeats" help="Number of times cross-validator needs to be repeated." />
922 <expand macro="random_state" />
923 </when>
924 <when value="RepeatedStratifiedKFold">
925 <expand macro="cv_n_splits" value="5" />
926 <param argument="n_repeats" type="integer" value="10" label="n_repeats" help="Number of times cross-validator needs to be repeated." />
927 <expand macro="random_state" />
928 </when>
929 <when value="ShuffleSplit">
930 <expand macro="cv_n_splits" value="10" help="Number of re-shuffling and splitting iterations." />
931 <expand macro="cv_test_size" value="0.1" />
932 <expand macro="random_state" />
933 </when>
934 <when value="StratifiedShuffleSplit">
935 <expand macro="cv_n_splits" value="10" help="Number of re-shuffling and splitting iterations." />
936 <expand macro="cv_test_size" value="0.1" />
937 <expand macro="random_state" />
938 </when>
939 <when value="TimeSeriesSplit">
940 <expand macro="cv_n_splits" />
941 <param argument="max_train_size" type="integer" value="" optional="true" label="Maximum size of the training set" help="Maximum size for a single training set." />
942 </when>
943 <when value="PredefinedSplit">
944 <param argument="test_fold" type="text" value="" area="true" label="test_fold" help="List, e.g., [0, 1, -1, 1], represents two test sets, [X[0]] and [X[1], X[3]], X[2] is excluded from any test set due to '-1'." />
945 </when>
946 <when value="OrderedKFold">
947 <expand macro="cv_n_splits" />
948 <expand macro="cv_shuffle" />
949 <expand macro="random_state" />
950 </when>
951 <when value="RepeatedOrderedKFold">
952 <expand macro="cv_n_splits" />
953 <param argument="n_repeats" type="integer" value="5" />
954 <expand macro="random_state" />
955 </when>
956 <yield />
957 </xml>
958
959 <xml name="cv">
960 <conditional name="cv_selector">
961 <param name="selected_cv" type="select" label="Select the cv splitter:">
962 <expand macro="cv_splitter">
963 <option value="GroupKFold">GroupKFold</option>
964 <option value="GroupShuffleSplit">GroupShuffleSplit</option>
965 <option value="LeaveOneGroupOut">LeaveOneGroupOut</option>
966 <option value="LeavePGroupsOut">LeavePGroupsOut</option>
967 </expand>
968 </param>
969 <expand macro="cv_splitter_options">
970 <when value="GroupKFold">
971 <expand macro="cv_n_splits" />
972 <expand macro="cv_groups" />
973 </when>
974 <when value="GroupShuffleSplit">
975 <expand macro="cv_n_splits" value="5" />
976 <expand macro="cv_test_size" />
977 <expand macro="random_state" />
978 <expand macro="cv_groups" />
979 </when>
980 <when value="LeaveOneGroupOut">
981 <expand macro="cv_groups" />
982 </when>
983 <when value="LeavePGroupsOut">
984 <param argument="n_groups" type="integer" value="" label="n_groups" help="Number of groups (p) to leave out in the test split." />
985 <expand macro="cv_groups" />
986 </when>
987 </expand>
988 </conditional>
989 </xml>
990
991 <xml name="cv_reduced" token_label="Select the cv splitter">
992 <conditional name="cv_selector">
993 <param name="selected_cv" type="select" label="@LABEL@">
994 <expand macro="cv_splitter" />
995 </param>
996 <expand macro="cv_splitter_options" />
997 </conditional>
998 </xml>
999
1000 <xml name="cv_n_splits" token_value="3" token_help="Number of folds. Must be at least 2.">
1001 <param argument="n_splits" type="integer" value="@VALUE@" min="1" label="n_splits" help="@HELP@" />
1002 </xml>
1003
1004 <xml name="cv_shuffle">
1005 <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Whether to shuffle data before splitting" />
1006 </xml>
1007
1008 <xml name="cv_test_size" token_value="0.2">
1009 <param argument="test_size" type="float" value="@VALUE@" min="0.0" label="Portion or number of the test set" help="0.0-1.0, proportion of the dataset to include in the test split; >1, integer only, the absolute number of test samples " />
1010 </xml>
1011
1012 <xml name="cv_groups">
1013 <section name="groups_selector" title="Groups column selector" expanded="true">
1014 <param name="infile_g" type="data" format="tabular" label="Choose dataset containing groups info:" />
1015 <param name="header_g" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
1016 <conditional name="column_selector_options_g">
1017 <expand macro="samples_column_selector_options" column_option="selected_column_selector_option_g" col_name="col_g" multiple="False" infile="infile_g" />
1018 </conditional>
1019 </section>
1020 </xml>
1021
1022 <xml name="train_test_split_params">
1023 <conditional name="split_algos">
1024 <param name="shuffle" type="select" label="Select the splitting method">
1025 <option value="None">No shuffle</option>
1026 <option value="simple" selected="true">ShuffleSplit</option>
1027 <option value="stratified">StratifiedShuffleSplit -- target values serve as class labels</option>
1028 <option value="group">GroupShuffleSplit or split by group names</option>
1029 </param>
1030 <when value="None">
1031 <expand macro="train_test_split_test_size" />
1032 </when>
1033 <when value="simple">
1034 <expand macro="train_test_split_test_size" />
1035 <expand macro="random_state" />
1036 </when>
1037 <when value="stratified">
1038 <expand macro="train_test_split_test_size" />
1039 <expand macro="random_state" />
1040 </when>
1041 <when value="group">
1042 <expand macro="train_test_split_test_size" optional="true" />
1043 <expand macro="random_state" />
1044 <param argument="group_names" type="text" value="" optional="true" label="Type in group names instead" help="For example: chr6, chr7. This parameter is optional. If used, it will override the holdout size and random seed." />
1045 <yield />
1046 </when>
1047 </conditional>
1048 <!--param argument="train_size" type="float" optional="True" value="" label="Train size:"/>-->
1049 </xml>
1050
1051 <xml name="train_test_split_test_size" token_optional="false">
1052 <param name="test_size" type="float" value="0.2" optional="@OPTIONAL@" label="Holdout size" help="Leass than 1, for preportion; greater than 1 (integer), for number of samples." />
1053 </xml>
1054
1055 <xml name="feature_selection_algorithms">
1056 <option value="SelectKBest" selected="true">SelectKBest - Select features according to the k highest scores</option>
1057 <option value="GenericUnivariateSelect">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option>
1058 <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option>
1059 <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option>
1060 <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option>
1061 <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option>
1062 <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option>
1063 <option value="SelectFromModel">SelectFromModel - Meta-transformer for selecting features based on importance weights</option>
1064 <option value="RFE">RFE - Feature ranking with recursive feature elimination</option>
1065 <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option>
1066 <yield />
1067 </xml>
1068
1069 <xml name="feature_selection_algorithm_details">
1070 <when value="GenericUnivariateSelect">
1071 <expand macro="feature_selection_score_function" />
1072 <section name="options" title="Advanced Options" expanded="False">
1073 <param argument="mode" type="select" label="Feature selection mode">
1074 <option value="percentile">percentile</option>
1075 <option value="k_best">k_best</option>
1076 <option value="fpr">fpr</option>
1077 <option value="fdr">fdr</option>
1078 <option value="fwe">fwe</option>
1079 </param>
1080 <param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" />
1081 </section>
1082 </when>
1083 <when value="SelectPercentile">
1084 <expand macro="feature_selection_score_function" />
1085 <section name="options" title="Advanced Options" expanded="False">
1086 <param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" />
1087 </section>
1088 </when>
1089 <when value="SelectKBest">
1090 <expand macro="feature_selection_score_function" />
1091 <section name="options" title="Advanced Options" expanded="False">
1092 <param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." />
1093 </section>
1094 </when>
1095 <when value="SelectFpr">
1096 <expand macro="feature_selection_score_function" />
1097 <section name="options" title="Advanced Options" expanded="False">
1098 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept." />
1099 </section>
1100 </when>
1101 <when value="SelectFdr">
1102 <expand macro="feature_selection_score_function" />
1103 <section name="options" title="Advanced Options" expanded="False">
1104 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep." />
1105 </section>
1106 </when>
1107 <when value="SelectFwe">
1108 <expand macro="feature_selection_score_function" />
1109 <section name="options" title="Advanced Options" expanded="False">
1110 <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep." />
1111 </section>
1112 </when>
1113 <when value="VarianceThreshold">
1114 <section name="options" title="Options" expanded="False">
1115 <param argument="threshold" type="float" value="0.0" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed." />
1116 </section>
1117 </when>
1118 </xml>
1119
1120 <xml name="feature_selection_SelectFromModel">
1121 <when value="SelectFromModel">
1122 <conditional name="model_inputter">
1123 <param name="input_mode" type="select" label="Construct a new estimator from a selection list?">
1124 <option value="new" selected="true">Yes</option>
1125 <option value="prefitted">No. Load a prefitted estimator</option>
1126 </param>
1127 <when value="new">
1128 <expand macro="estimator_selector_fs" />
1129 </when>
1130 <when value="prefitted">
1131 <param name="fitted_estimator" type="data" format='zip' label="Load a prefitted estimator" />
1132 </when>
1133 </conditional>
1134 <expand macro="feature_selection_SelectFromModel_options" />
1135 </when>
1136 </xml>
1137
1138 <xml name="feature_selection_SelectFromModel_no_prefitted">
1139 <when value="SelectFromModel">
1140 <conditional name="model_inputter">
1141 <param name="input_mode" type="select" label="Construct a new estimator from a selection list?">
1142 <option value="new" selected="true">Yes</option>
1143 </param>
1144 <when value="new">
1145 <expand macro="estimator_selector_all" />
1146 </when>
1147 </conditional>
1148 <expand macro="feature_selection_SelectFromModel_options" />
1149 </when>
1150 </xml>
1151
1152 <xml name="feature_selection_SelectFromModel_options">
1153 <section name="options" title="Advanced Options" expanded="False">
1154 <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." />
1155 <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " />
1156 <param argument="max_features" type="integer" value="" optional="true" label="The maximum number of features selected scoring above threshold" help="To disable threshold and only select based on max_features, set threshold=-np.inf." />
1157 </section>
1158 </xml>
1159
1160 <xml name="feature_selection_RFE">
1161 <when value="RFE">
1162 <yield />
1163 <section name="options" title="Advanced Options" expanded="False">
1164 <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." />
1165 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
1166 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
1167 </section>
1168 </when>
1169 </xml>
1170
1171 <xml name="feature_selection_RFECV_fs">
1172 <when value="RFECV">
1173 <yield />
1174 <section name="options" title="Advanced Options" expanded="False">
1175 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
1176 <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected" />
1177 <expand macro="cv" />
1178 <expand macro="scoring_selection" />
1179 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
1180 </section>
1181 </when>
1182 </xml>
1183
1184 <xml name="feature_selection_RFECV_pipeline">
1185 <when value="RFECV">
1186 <yield />
1187 <section name="options" title="Advanced Options" expanded="False">
1188 <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
1189 <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected" />
1190 <expand macro="cv_reduced" />
1191 <!-- TODO: group splitter support-->
1192 <expand macro="scoring_selection" />
1193 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
1194 </section>
1195 </when>
1196 </xml>
1197
1198 <xml name="feature_selection_DyRFECV_fs">
1199 <when value="DyRFECV">
1200 <yield />
1201 <section name="options" title="Advanced Options" expanded="False">
1202 <param argument="step" type="text" size="30" value="1" label="step" optional="true" help="Default = 1. Support float, int and list.">
1203 <sanitizer>
1204 <valid initial="default">
1205 <add value="[" />
1206 <add value="]" />
1207 </valid>
1208 </sanitizer>
1209 </param>
1210 <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected" />
1211 <expand macro="cv" />
1212 <expand macro="scoring_selection" />
1213 <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
1214 </section>
1215 </when>
1216 </xml>
1217
1218 <xml name="feature_selection_pipeline">
1219 <!--compare to `feature_selection_fs`, no fitted estimator for SelectFromModel and no custom estimator for RFE and RFECV-->
1220 <conditional name="fs_algorithm_selector">
1221 <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
1222 <expand macro="feature_selection_algorithms" />
1223 </param>
1224 <expand macro="feature_selection_algorithm_details" />
1225 <expand macro="feature_selection_SelectFromModel_no_prefitted" />
1226 <expand macro="feature_selection_RFE">
1227 <expand macro="estimator_selector_all" />
1228 </expand>
1229 <expand macro="feature_selection_RFECV_pipeline">
1230 <expand macro="estimator_selector_all" />
1231 </expand>
1232 <!-- TODO: add DyRFECV to pipeline-->
1233 </conditional>
1234 </xml>
1235
1236 <xml name="feature_selection_fs">
1237 <conditional name="fs_algorithm_selector">
1238 <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
1239 <expand macro="feature_selection_algorithms">
1240 <option value="DyRFECV">DyRFECV - Extended RFECV with changeable steps</option>
1241 </expand>
1242 </param>
1243 <expand macro="feature_selection_algorithm_details" />
1244 <expand macro="feature_selection_SelectFromModel" />
1245 <expand macro="feature_selection_RFE">
1246 <expand macro="estimator_selector_fs" />
1247 </expand>
1248 <expand macro="feature_selection_RFECV_fs">
1249 <expand macro="estimator_selector_fs" />
1250 </expand>
1251 <expand macro="feature_selection_DyRFECV_fs">
1252 <expand macro="estimator_selector_fs" />
1253 </expand>
1254 </conditional>
1255 </xml>
1256
1257 <xml name="feature_selection_score_function">
1258 <param argument="score_func" type="select" label="Select a score function">
1259 <option value="chi2">chi2 - Compute chi-squared stats between each non-negative feature and class</option>
1260 <option value="f_classif">f_classif - Compute the ANOVA F-value for the provided sample</option>
1261 <option value="f_regression">f_regression - Univariate linear regression tests</option>
1262 <option value="mutual_info_classif">mutual_info_classif - Estimate mutual information for a discrete target variable</option>
1263 <option value="mutual_info_regression">mutual_info_regression - Estimate mutual information for a continuous target variable</option>
1264 </param>
1265 </xml>
1266
1267 <xml name="model_validation_common_options">
1268 <expand macro="cv" />
1269 <expand macro="verbose" />
1270 <yield />
1271 </xml>
1272
1273 <xml name="scoring_selection">
1274 <conditional name="scoring">
1275 <param name="primary_scoring" type="select" multiple="false" label="Select the primary metric (scoring):" help="Metric to refit the best estimator.">
1276 <option value="default" selected="true">default with estimator</option>
1277 <option value="accuracy">Classification -- 'accuracy'</option>
1278 <option value="balanced_accuracy">Classification -- 'balanced_accuracy'</option>
1279 <option value="average_precision">Classification -- 'average_precision'</option>
1280 <option value="f1">Classification -- 'f1'</option>
1281 <option value="f1_micro">Classification -- 'f1_micro'</option>
1282 <option value="f1_macro">Classification -- 'f1_macro'</option>
1283 <option value="f1_weighted">Classification -- 'f1_weighted'</option>
1284 <option value="f1_samples">Classification -- 'f1_samples'</option>
1285 <option value="neg_log_loss">Classification -- 'neg_log_loss'</option>
1286 <option value="precision">Classification -- 'precision'</option>
1287 <option value="precision_micro">Classification -- 'precision_micro'</option>
1288 <option value="precision_macro">Classification -- 'precision_macro'</option>
1289 <option value="precision_wighted">Classification -- 'precision_wighted'</option>
1290 <option value="precision_samples">Classification -- 'precision_samples'</option>
1291 <option value="recall">Classification -- 'recall'</option>
1292 <option value="recall_micro">Classification -- 'recall_micro'</option>
1293 <option value="recall_macro">Classification -- 'recall_macro'</option>
1294 <option value="recall_wighted">Classification -- 'recall_wighted'</option>
1295 <option value="recall_samples">Classification -- 'recall_samples'</option>
1296 <option value="roc_auc">Classification -- 'roc_auc'</option>
1297 <option value="explained_variance">Regression -- 'explained_variance'</option>
1298 <option value="neg_mean_absolute_error">Regression -- 'neg_mean_absolute_error'</option>
1299 <option value="neg_mean_squared_error">Regression -- 'neg_mean_squared_error'</option>
1300 <option value="neg_mean_squared_log_error">Regression -- 'neg_mean_squared_log_error'</option>
1301 <option value="neg_median_absolute_error">Regression -- 'neg_median_absolute_error'</option>
1302 <option value="r2">Regression -- 'r2'</option>
1303 <option value="max_error">Regression -- 'max_error'</option>
1304 <option value="binarize_auc_scorer">anomaly detection -- binarize_auc_scorer</option>
1305 <option value="binarize_average_precision_scorer">anomaly detection -- binarize_average_precision_scorer</option>
1306 </param>
1307 <when value="default" />
1308 <when value="accuracy">
1309 <expand macro="secondary_scoring_selection_classification" />
1310 </when>
1311 <when value="balanced_accuracy">
1312 <expand macro="secondary_scoring_selection_classification" />
1313 </when>
1314 <when value="average_precision">
1315 <expand macro="secondary_scoring_selection_classification" />
1316 </when>
1317 <when value="f1">
1318 <expand macro="secondary_scoring_selection_classification" />
1319 </when>
1320 <when value="f1_micro">
1321 <expand macro="secondary_scoring_selection_classification" />
1322 </when>
1323 <when value="f1_macro">
1324 <expand macro="secondary_scoring_selection_classification" />
1325 </when>
1326 <when value="f1_weighted">
1327 <expand macro="secondary_scoring_selection_classification" />
1328 </when>
1329 <when value="f1_samples">
1330 <expand macro="secondary_scoring_selection_classification" />
1331 </when>
1332 <when value="neg_log_loss">
1333 <expand macro="secondary_scoring_selection_classification" />
1334 </when>
1335 <when value="precision">
1336 <expand macro="secondary_scoring_selection_classification" />
1337 </when>
1338 <when value="precision_micro">
1339 <expand macro="secondary_scoring_selection_classification" />
1340 </when>
1341 <when value="precision_macro">
1342 <expand macro="secondary_scoring_selection_classification" />
1343 </when>
1344 <when value="precision_wighted">
1345 <expand macro="secondary_scoring_selection_classification" />
1346 </when>
1347 <when value="precision_samples">
1348 <expand macro="secondary_scoring_selection_classification" />
1349 </when>
1350 <when value="recall">
1351 <expand macro="secondary_scoring_selection_classification" />
1352 </when>
1353 <when value="recall_micro">
1354 <expand macro="secondary_scoring_selection_classification" />
1355 </when>
1356 <when value="recall_macro">
1357 <expand macro="secondary_scoring_selection_classification" />
1358 </when>
1359 <when value="recall_wighted">
1360 <expand macro="secondary_scoring_selection_classification" />
1361 </when>
1362 <when value="recall_samples">
1363 <expand macro="secondary_scoring_selection_classification" />
1364 </when>
1365 <when value="roc_auc">
1366 <expand macro="secondary_scoring_selection_classification" />
1367 </when>
1368 <when value="explained_variance">
1369 <expand macro="secondary_scoring_selection_regression" />
1370 </when>
1371 <when value="neg_mean_absolute_error">
1372 <expand macro="secondary_scoring_selection_regression" />
1373 </when>
1374 <when value="neg_mean_squared_error">
1375 <expand macro="secondary_scoring_selection_regression" />
1376 </when>
1377 <when value="neg_mean_squared_log_error">
1378 <expand macro="secondary_scoring_selection_regression" />
1379 </when>
1380 <when value="neg_median_absolute_error">
1381 <expand macro="secondary_scoring_selection_regression" />
1382 </when>
1383 <when value="r2">
1384 <expand macro="secondary_scoring_selection_regression" />
1385 </when>
1386 <when value="max_error">
1387 <expand macro="secondary_scoring_selection_regression" />
1388 </when>
1389 <when value="binarize_auc_scorer">
1390 <expand macro="secondary_scoring_selection_anormaly" />
1391 </when>
1392 <when value="binarize_average_precision_scorer">
1393 <expand macro="secondary_scoring_selection_anormaly" />
1394 </when>
1395 </conditional>
1396 </xml>
1397
1398 <xml name="secondary_scoring_selection_classification">
1399 <param name="secondary_scoring" type="select" multiple="true" label="Additional scoring used in multi-metric mode:" help="If the same metric with the primary is chosen, the metric will be ignored.">
1400 <option value="accuracy">Classification -- 'accuracy'</option>
1401 <option value="balanced_accuracy">Classification -- 'balanced_accuracy'</option>
1402 <option value="average_precision">Classification -- 'average_precision'</option>
1403 <option value="f1">Classification -- 'f1'</option>
1404 <option value="f1_micro">Classification -- 'f1_micro'</option>
1405 <option value="f1_macro">Classification -- 'f1_macro'</option>
1406 <option value="f1_weighted">Classification -- 'f1_weighted'</option>
1407 <option value="f1_samples">Classification -- 'f1_samples'</option>
1408 <option value="neg_log_loss">Classification -- 'neg_log_loss'</option>
1409 <option value="precision">Classification -- 'precision'</option>
1410 <option value="precision_micro">Classification -- 'precision_micro'</option>
1411 <option value="precision_macro">Classification -- 'precision_macro'</option>
1412 <option value="precision_wighted">Classification -- 'precision_wighted'</option>
1413 <option value="precision_samples">Classification -- 'precision_samples'</option>
1414 <option value="recall">Classification -- 'recall'</option>
1415 <option value="recall_micro">Classification -- 'recall_micro'</option>
1416 <option value="recall_macro">Classification -- 'recall_macro'</option>
1417 <option value="recall_wighted">Classification -- 'recall_wighted'</option>
1418 <option value="recall_samples">Classification -- 'recall_samples'</option>
1419 <option value="roc_auc">Classification -- 'roc_auc'</option>
1420 </param>
1421 </xml>
1422
1423 <xml name="secondary_scoring_selection_regression">
1424 <param name="secondary_scoring" type="select" multiple="true" label="Additional scoring used in multi-metric mode:" help="If the same metric with the primary is chosen, the metric will be ignored.">
1425 <option value="explained_variance">Regression -- 'explained_variance'</option>
1426 <option value="neg_mean_absolute_error">Regression -- 'neg_mean_absolute_error'</option>
1427 <option value="neg_mean_squared_error">Regression -- 'neg_mean_squared_error'</option>
1428 <option value="neg_mean_squared_log_error">Regression -- 'neg_mean_squared_log_error'</option>
1429 <option value="neg_median_absolute_error">Regression -- 'neg_median_absolute_error'</option>
1430 <option value="r2">Regression -- 'r2'</option>
1431 <option value="max_error">Regression -- 'max_error'</option>
1432 </param>
1433 </xml>
1434
1435 <xml name="secondary_scoring_selection_anormaly">
1436 <param name="secondary_scoring" type="select" multiple="true" label="Additional scoring used in multi-metric mode:" help="If the same metric with the primary is chosen, the metric will be ignored.">
1437 <option value="binarize_auc_scorer">anomaly detection -- binarize_auc_scorer</option>
1438 <option value="binarize_average_precision_scorer">anomaly detection -- binarize_average_precision_scorer</option>
1439 </param>
1440 </xml>
1441
1442 <xml name="pre_dispatch" token_type="hidden" token_default_value="all" token_help="Number of predispatched jobs for parallel execution">
1443 <param argument="pre_dispatch" type="@TYPE@" value="@DEFAULT_VALUE@" optional="true" label="pre_dispatch" help="@HELP@" />
1444 </xml>
1445
1446 <xml name="estimator_and_hyperparameter">
1447 <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object" />
1448 <section name="hyperparams_swapping" title="Hyperparameter Swapping" expanded="false">
1449 <param name="infile_params" type="data" format="tabular" optional="true" label="Choose the dataset containing hyperparameters for the pipeline/estimator above" help="This dataset could be the output of `get_params` in the `Estimator Attributes` tool." />
1450 <repeat name="param_set" min="1" max="30" title="New hyperparameter setting">
1451 <param name="sp_name" type="select" optional="true" label="Choose a parameter name (with current value)">
1452 <options from_dataset="infile_params" startswith="@">
1453 <column name="name" index="2" />
1454 <column name="value" index="1" />
1455 <filter type="unique_value" name="unique_param" column="1" />
1456 </options>
1457 </param>
1458 <param name="sp_value" type="text" value="" optional="true" label="New value" help="Supports int, float, boolean, single quoted string, and selected object constructor. Similar to the `Parameter settings for search` section in `searchcv` tool except that only single value is expected here.">
1459 <sanitizer>
1460 <valid initial="default">
1461 <add value="&apos;" />
1462 <add value="&quot;" />
1463 </valid>
1464 </sanitizer>
1465 </param>
1466 </repeat>
1467 </section>
1468 </xml>
1469
1470 <xml name="search_cv_options">
1471 <expand macro="scoring_selection" />
1472 <expand macro="model_validation_common_options" />
1473 <!--expand macro="pre_dispatch" default_value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/-->
1474 <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="If True, data is identically distributed across the folds" />
1475 <!--param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset. Be aware that `refit=True` invokes extra computation, but it's REQUIRED for outputting the best estimator!"/> -->
1476 <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to NaN if an error occurs in estimator fitting and FitFailedWarning is raised." />
1477 <param argument="return_train_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="return_train_score" help="" />
1478 </xml>
1479
1480 <xml name="estimator_module_options">
1481 <option value="svm" selected="true">sklearn.svm</option>
1482 <option value="linear_model">sklearn.linear_model</option>
1483 <option value="ensemble">sklearn.ensemble</option>
1484 <option value="naive_bayes">sklearn.naive_bayes</option>
1485 <option value="tree">sklearn.tree</option>
1486 <option value="neighbors">sklearn.neighbors</option>
1487 <option value="xgboost">xgboost</option>
1488 <yield />
1489 </xml>
1490
1491 <xml name="estimator_suboptions">
1492 <when value="svm">
1493 <param name="selected_estimator" type="select" label="Choose estimator class:">
1494 <option value="LinearSVC" selected="true">LinearSVC</option>
1495 <option value="LinearSVR">LinearSVR</option>
1496 <option value="NuSVC">NuSVC</option>
1497 <option value="NuSVR">NuSVR</option>
1498 <option value="OneClassSVM">OneClassSVM</option>
1499 <option value="SVC">SVC</option>
1500 <option value="SVR">SVR</option>
1501 </param>
1502 <expand macro="estimator_params_text" />
1503 </when>
1504 <when value="linear_model">
1505 <param name="selected_estimator" type="select" label="Choose estimator class:">
1506 <option value="ARDRegression" selected="true">ARDRegression</option>
1507 <option value="BayesianRidge">BayesianRidge</option>
1508 <option value="ElasticNet">ElasticNet</option>
1509 <option value="ElasticNetCV">ElasticNetCV</option>
1510 <option value="HuberRegressor">HuberRegressor</option>
1511 <option value="Lars">Lars</option>
1512 <option value="LarsCV">LarsCV</option>
1513 <option value="Lasso">Lasso</option>
1514 <option value="LassoCV">LassoCV</option>
1515 <option value="LassoLars">LassoLars</option>
1516 <option value="LassoLarsCV">LassoLarsCV</option>
1517 <option value="LassoLarsIC">LassoLarsIC</option>
1518 <option value="LinearRegression">LinearRegression</option>
1519 <option value="LogisticRegression">LogisticRegression</option>
1520 <option value="LogisticRegressionCV">LogisticRegressionCV</option>
1521 <option value="MultiTaskLasso">MultiTaskLasso</option>
1522 <option value="MultiTaskElasticNet">MultiTaskElasticNet</option>
1523 <option value="MultiTaskLassoCV">MultiTaskLassoCV</option>
1524 <option value="MultiTaskElasticNetCV">MultiTaskElasticNetCV</option>
1525 <option value="OrthogonalMatchingPursuit">OrthogonalMatchingPursuit</option>
1526 <option value="OrthogonalMatchingPursuitCV">OrthogonalMatchingPursuitCV</option>
1527 <option value="PassiveAggressiveClassifier">PassiveAggressiveClassifier</option>
1528 <option value="PassiveAggressiveRegressor">PassiveAggressiveRegressor</option>
1529 <option value="Perceptron">Perceptron</option>
1530 <option value="RANSACRegressor">RANSACRegressor</option>
1531 <option value="Ridge">Ridge</option>
1532 <option value="RidgeClassifier">RidgeClassifier</option>
1533 <option value="RidgeClassifierCV">RidgeClassifierCV</option>
1534 <option value="RidgeCV">RidgeCV</option>
1535 <option value="SGDClassifier">SGDClassifier</option>
1536 <option value="SGDRegressor">SGDRegressor</option>
1537 <option value="TheilSenRegressor">TheilSenRegressor</option>
1538 </param>
1539 <expand macro="estimator_params_text" />
1540 </when>
1541 <when value="ensemble">
1542 <param name="selected_estimator" type="select" label="Choose estimator class:">
1543 <option value="AdaBoostClassifier" selected="true">AdaBoostClassifier</option>
1544 <option value="AdaBoostRegressor">AdaBoostRegressor</option>
1545 <option value="BaggingClassifier">BaggingClassifier</option>
1546 <option value="BaggingRegressor">BaggingRegressor</option>
1547 <option value="ExtraTreesClassifier">ExtraTreesClassifier</option>
1548 <option value="ExtraTreesRegressor">ExtraTreesRegressor</option>
1549 <option value="GradientBoostingClassifier">GradientBoostingClassifier</option>
1550 <option value="GradientBoostingRegressor">GradientBoostingRegressor</option>
1551 <option value="IsolationForest">IsolationForest</option>
1552 <option value="HistGradientBoostingClassifier">HistGradientBoostingClassifier</option>
1553 <option value="HistGradientBoostingRegressor">HistGradientBoostingRegressor</option>
1554 <option value="RandomForestClassifier">RandomForestClassifier</option>
1555 <option value="RandomForestRegressor">RandomForestRegressor</option>
1556 <option value="RandomTreesEmbedding">RandomTreesEmbedding</option>
1557 <!--option value="VotingClassifier">VotingClassifier</option-->
1558 </param>
1559 <expand macro="estimator_params_text" />
1560 </when>
1561 <when value="naive_bayes">
1562 <param name="selected_estimator" type="select" label="Choose estimator class:">
1563 <option value="BernoulliNB" selected="true">BernoulliNB</option>
1564 <option value="GaussianNB">GaussianNB</option>
1565 <option value="MultinomialNB">MultinomialNB</option>
1566 </param>
1567 <expand macro="estimator_params_text" />
1568 </when>
1569 <when value="tree">
1570 <param name="selected_estimator" type="select" label="Choose estimator class:">
1571 <option value="DecisionTreeClassifier" selected="true">DecisionTreeClassifier</option>
1572 <option value="DecisionTreeRegressor">DecisionTreeRegressor</option>
1573 <option value="ExtraTreeClassifier">ExtraTreeClassifier</option>
1574 <option value="ExtraTreeRegressor">ExtraTreeRegressor</option>
1575 </param>
1576 <expand macro="estimator_params_text" />
1577 </when>
1578 <when value="neighbors">
1579 <param name="selected_estimator" type="select" label="Choose estimator class:">
1580 <option value="KNeighborsClassifier" selected="true">KNeighborsClassifier</option>
1581 <option value="KNeighborsRegressor">KNeighborsRegressor</option>
1582 <!--option value="BallTree">BallTree</option-->
1583 <!--option value="KDTree">KDTree</option-->
1584 <option value="KernelDensity">KernelDensity</option>
1585 <option value="LocalOutlierFactor">LocalOutlierFactor</option>
1586 <option value="RadiusNeighborsClassifier">RadiusNeighborsClassifier</option>
1587 <option value="RadiusNeighborsRegressor">RadiusNeighborsRegressor</option>
1588 <option value="NearestCentroid">NearestCentroid</option>
1589 <option value="NearestNeighbors">NearestNeighbors</option>
1590 </param>
1591 <expand macro="estimator_params_text" />
1592 </when>
1593 <when value="xgboost">
1594 <param name="selected_estimator" type="select" label="Choose estimator class:">
1595 <option value="XGBRegressor" selected="true">XGBRegressor</option>
1596 <option value="XGBClassifier">XGBClassifier</option>
1597 </param>
1598 <expand macro="estimator_params_text" />
1599 </when>
1600 <yield />
1601 </xml>
1602
1603 <xml name="estimator_selector_all">
1604 <conditional name="estimator_selector">
1605 <param name="selected_module" type="select" label="Choose the module that contains target estimator:">
1606 <expand macro="estimator_module_options" />
1607 </param>
1608 <expand macro="estimator_suboptions" />
1609 </conditional>
1610 </xml>
1611
1612 <xml name="estimator_selector_fs">
1613 <conditional name="estimator_selector">
1614 <param name="selected_module" type="select" label="Choose the module that contains target estimator:">
1615 <expand macro="estimator_module_options">
1616 <option value="custom_estimator">Load a custom estimator</option>
1617 </expand>
1618 </param>
1619 <expand macro="estimator_suboptions">
1620 <when value="custom_estimator">
1621 <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the custom estimator or pipeline:" />
1622 </when>
1623 </expand>
1624 </conditional>
1625 </xml>
1626
1627 <xml name="estimator_params_text" token_label="Type in parameter settings if different from default:" token_default_value='' token_help="Dictionary-capable, e.g., C=1, kernel='linear'. No double quotes. Leave this box blank for default estimator.">
1628 <param name="text_params" type="text" value="@DEFAULT_VALUE@" optional="true" label="@LABEL@" help="@HELP@">
1629 <sanitizer>
1630 <valid initial="default">
1631 <add value="&apos;" />
1632 </valid>
1633 </sanitizer>
1634 </param>
1635 </xml>
1636
1637 <xml name="kernel_approximation_all">
1638 <conditional name="kernel_approximation_selector">
1639 <param name="select_algorithm" type="select" label="Choose a kernel approximation algorithm:">
1640 <option value="Nystroem" selected="true">Nystroem</option>
1641 <option value="RBFSampler">RBFSampler</option>
1642 <option value="AdditiveChi2Sampler">AdditiveChi2Sampler</option>
1643 <option value="SkewedChi2Sampler">SkewedChi2Sampler</option>
1644 </param>
1645 <when value="Nystroem">
1646 <expand macro="estimator_params_text" help="Default(=blank): coef0=None, degree=None, gamma=None, kernel='rbf', kernel_params=None, n_components=100, random_state=None. No double quotes" />
1647 </when>
1648 <when value="RBFSampler">
1649 <expand macro="estimator_params_text" help="Default(=blank): gamma=1.0, n_components=100, random_state=None." />
1650 </when>
1651 <when value="AdditiveChi2Sampler">
1652 <expand macro="estimator_params_text" help="Default(=blank): sample_interval=None, sample_steps=2." />
1653 </when>
1654 <when value="SkewedChi2Sampler">
1655 <expand macro="estimator_params_text" help="Default(=blank): n_components=100, random_state=None, skewedness=1.0." />
1656 </when>
1657 </conditional>
1658 </xml>
1659
1660 <xml name="matrix_decomposition_all">
1661 <conditional name="matrix_decomposition_selector">
1662 <param name="select_algorithm" type="select" label="Choose a matrix decomposition algorithm:">
1663 <option value="DictionaryLearning" selected="true">DictionaryLearning</option>
1664 <option value="FactorAnalysis">FactorAnalysis</option>
1665 <option value="FastICA">FastICA</option>
1666 <option value="IncrementalPCA">IncrementalPCA</option>
1667 <option value="KernelPCA">KernelPCA</option>
1668 <option value="LatentDirichletAllocation">LatentDirichletAllocation</option>
1669 <option value="MiniBatchDictionaryLearning">MiniBatchDictionaryLearning</option>
1670 <option value="MiniBatchSparsePCA">MiniBatchSparsePCA</option>
1671 <option value="NMF">NMF</option>
1672 <option value="PCA">PCA</option>
1673 <option value="SparsePCA">SparsePCA</option>
1674 <!--option value="SparseCoder">SparseCoder</option-->
1675 <option value="TruncatedSVD">TruncatedSVD</option>
1676 </param>
1677 <when value="DictionaryLearning">
1678 <expand macro="estimator_params_text" help="Default(=blank): alpha=1, code_init=None, dict_init=None, fit_algorithm='lars', max_iter=1000, n_components=None, random_state=None, split_sign=False, tol=1e-08, transform_algorithm='omp', transform_alpha=None, transform_n_nonzero_coefs=None, verbose=False." />
1679 </when>
1680 <when value="FactorAnalysis">
1681 <expand macro="estimator_params_text" help="Default(=blank): copy=True, iterated_power=3, max_iter=1000, n_components=None, noise_variance_init=None, random_state=0, svd_method='randomized', tol=0.01." />
1682 </when>
1683 <when value="FastICA">
1684 <expand macro="estimator_params_text" help="Default(=blank): algorithm='parallel', fun='logcosh', fun_args=None, max_iter=200, n_components=None, random_state=None, tol=0.0001, w_init=None, whiten=True. No double quotes." />
1685 </when>
1686 <when value="IncrementalPCA">
1687 <expand macro="estimator_params_text" help="Default(=blank): batch_size=None, copy=True, n_components=None, whiten=False." />
1688 </when>
1689 <when value="KernelPCA">
1690 <expand macro="estimator_params_text" help="Default(=blank): alpha=1.0, coef0=1, copy_X=True, degree=3, eigen_solver='auto', fit_inverse_transform=False, gamma=None, kernel='linear', kernel_params=None, max_iter=None, n_components=None, random_state=None, remove_zero_eig=False, tol=0. No double quotes." />
1691 </when>
1692 <when value="LatentDirichletAllocation">
1693 <expand macro="estimator_params_text" help="Default(=blank): batch_size=128, doc_topic_prior=None, evaluate_every=-1, learning_decay=0.7, learning_method=None, learning_offset=10.0, max_doc_update_iter=100, max_iter=10, mean_change_tol=0.001, n_components=10, n_topics=None, perp_tol=0.1, random_state=None, topic_word_prior=None, total_samples=1000000.0, verbose=0." />
1694 </when>
1695 <when value="MiniBatchDictionaryLearning">
1696 <expand macro="estimator_params_text" help="Default(=blank): alpha=1, batch_size=3, dict_init=None, fit_algorithm='lars', n_components=None, n_iter=1000, random_state=None, shuffle=True, split_sign=False, transform_algorithm='omp', transform_alpha=None, transform_n_nonzero_coefs=None, verbose=False." />
1697 </when>
1698 <when value="MiniBatchSparsePCA">
1699 <expand macro="estimator_params_text" help="Default(=blank): alpha=1, batch_size=3, callback=None, method='lars', n_components=None, n_iter=100, random_state=None, ridge_alpha=0.01, shuffle=True, verbose=False." />
1700 </when>
1701 <when value="NMF">
1702 <expand macro="estimator_params_text" help="Default(=blank): alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=200, n_components=None, random_state=None, shuffle=False, solver='cd', tol=0.0001, verbose=0." />
1703 </when>
1704 <when value="PCA">
1705 <expand macro="estimator_params_text" help="Default(=blank): copy=True, iterated_power='auto', n_components=None, random_state=None, svd_solver='auto', tol=0.0, whiten=False." />
1706 </when>
1707 <when value="SparsePCA">
1708 <expand macro="estimator_params_text" help="Default(=blank): U_init=None, V_init=None, alpha=1, max_iter=1000, method='lars', n_components=None, random_state=None, ridge_alpha=0.01, tol=1e-08, verbose=False." />
1709 </when>
1710 <when value="TruncatedSVD">
1711 <expand macro="estimator_params_text" help="Default(=blank): algorithm='randomized', n_components=2, n_iter=5, random_state=None, tol=0.0." />
1712 </when>
1713 </conditional>
1714 </xml>
1715
1716 <xml name="FeatureAgglomeration">
1717 <conditional name="FeatureAgglomeration_selector">
1718 <param name="select_algorithm" type="select" label="Choose the algorithm:">
1719 <option value="FeatureAgglomeration" selected="true">FeatureAgglomeration</option>
1720 </param>
1721 <when value="FeatureAgglomeration">
1722 <expand macro="estimator_params_text" help="Default(=blank): affinity='euclidean', compute_full_tree='auto', connectivity=None, linkage='ward', memory=None, n_clusters=2, pooling_func=np.mean." />
1723 </when>
1724 </conditional>
1725 </xml>
1726
1727 <xml name="skrebate">
1728 <conditional name="skrebate_selector">
1729 <param name="select_algorithm" type="select" label="Choose the algorithm:">
1730 <option value="ReliefF">ReliefF</option>
1731 <option value="SURF">SURF</option>
1732 <option value="SURFstar">SURFstar</option>
1733 <option value="MultiSURF">MultiSURF</option>
1734 <option value="MultiSURFstar">MultiSURFstar</option>
1735 <!--option value="TuRF">TuRF</option> -->
1736 </param>
1737 <when value="ReliefF">
1738 <expand macro="estimator_params_text" help="Default(=blank): discrete_threshold=10, n_features_to_select=10, n_neighbors=100, verbose=False." />
1739 </when>
1740 <when value="SURF">
1741 <expand macro="estimator_params_text" help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False." />
1742 </when>
1743 <when value="SURFstar">
1744 <expand macro="estimator_params_text" help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False." />
1745 </when>
1746 <when value="MultiSURF">
1747 <expand macro="estimator_params_text" help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False." />
1748 </when>
1749 <when value="MultiSURFstar">
1750 <expand macro="estimator_params_text" help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False." />
1751 </when>
1752 <!--when value="TuRF">
1753 <expand macro="estimator_params_text" help="Default(=blank): core_algorithm='ReliefF', discrete_threshold=10, n_features_to_select=10, n_neighbors=100, pct=0.5, verbose=False."/>
1754 </when> --> 1753 </when> -->
1755 </conditional> 1754 </conditional>
1756 </xml> 1755 </xml>
1757 1756
1758 <xml name="imbalanced_learn_sampling"> 1757 <xml name="imbalanced_learn_sampling">
1759 <conditional name="imblearn_selector"> 1758 <conditional name="imblearn_selector">
1760 <param name="select_algorithm" type="select" label="Choose the algorithm:"> 1759 <param name="select_algorithm" type="select" label="Choose the algorithm:">
1761 <option value="under_sampling.ClusterCentroids" selected="true">under_sampling.ClusterCentroids</option> 1760 <option value="under_sampling.ClusterCentroids" selected="true">under_sampling.ClusterCentroids</option>
1762 <option value="under_sampling.CondensedNearestNeighbour">under_sampling.CondensedNearestNeighbour</option> 1761 <option value="under_sampling.CondensedNearestNeighbour">under_sampling.CondensedNearestNeighbour</option>
1763 <option value="under_sampling.EditedNearestNeighbours">under_sampling.EditedNearestNeighbours</option> 1762 <option value="under_sampling.EditedNearestNeighbours">under_sampling.EditedNearestNeighbours</option>
1764 <option value="under_sampling.RepeatedEditedNearestNeighbours">under_sampling.RepeatedEditedNearestNeighbours</option> 1763 <option value="under_sampling.RepeatedEditedNearestNeighbours">under_sampling.RepeatedEditedNearestNeighbours</option>
1765 <option value="under_sampling.AllKNN">under_sampling.AllKNN</option> 1764 <option value="under_sampling.AllKNN">under_sampling.AllKNN</option>
1766 <option value="under_sampling.InstanceHardnessThreshold">under_sampling.InstanceHardnessThreshold</option> 1765 <option value="under_sampling.InstanceHardnessThreshold">under_sampling.InstanceHardnessThreshold</option>
1767 <option value="under_sampling.NearMiss">under_sampling.NearMiss</option> 1766 <option value="under_sampling.NearMiss">under_sampling.NearMiss</option>
1768 <option value="under_sampling.NeighbourhoodCleaningRule">under_sampling.NeighbourhoodCleaningRule</option> 1767 <option value="under_sampling.NeighbourhoodCleaningRule">under_sampling.NeighbourhoodCleaningRule</option>
1769 <option value="under_sampling.OneSidedSelection">under_sampling.OneSidedSelection</option> 1768 <option value="under_sampling.OneSidedSelection">under_sampling.OneSidedSelection</option>
1770 <option value="under_sampling.RandomUnderSampler">under_sampling.RandomUnderSampler</option> 1769 <option value="under_sampling.RandomUnderSampler">under_sampling.RandomUnderSampler</option>
1771 <option value="under_sampling.TomekLinks">under_sampling.TomekLinks</option> 1770 <option value="under_sampling.TomekLinks">under_sampling.TomekLinks</option>
1772 <option value="over_sampling.ADASYN">over_sampling.ADASYN</option> 1771 <option value="over_sampling.ADASYN">over_sampling.ADASYN</option>
1773 <option value="over_sampling.RandomOverSampler">over_sampling.RandomOverSampler</option> 1772 <option value="over_sampling.RandomOverSampler">over_sampling.RandomOverSampler</option>
1774 <option value="over_sampling.SMOTE">over_sampling.SMOTE</option> 1773 <option value="over_sampling.SMOTE">over_sampling.SMOTE</option>
1775 <option value="over_sampling.SVMSMOTE">over_sampling.SVMSMOTE</option> 1774 <option value="over_sampling.SVMSMOTE">over_sampling.SVMSMOTE</option>
1776 <option value="over_sampling.BorderlineSMOTE">over_sampling.BorderlineSMOTE</option> 1775 <option value="over_sampling.BorderlineSMOTE">over_sampling.BorderlineSMOTE</option>
1777 <option value="over_sampling.SMOTENC">over_sampling.SMOTENC</option> 1776 <option value="over_sampling.SMOTENC">over_sampling.SMOTENC</option>
1778 <option value="combine.SMOTEENN">combine.SMOTEENN</option> 1777 <option value="combine.SMOTEENN">combine.SMOTEENN</option>
1779 <option value="combine.SMOTETomek">combine.SMOTETomek</option> 1778 <option value="combine.SMOTETomek">combine.SMOTETomek</option>
1780 <option value="Z_RandomOverSampler">Z_RandomOverSampler - for regression</option> 1779 <option value="Z_RandomOverSampler">Z_RandomOverSampler - for regression</option>
1781 </param> 1780 </param>
1782 <when value="under_sampling.ClusterCentroids"> 1781 <when value="under_sampling.ClusterCentroids">
1783 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None, estimator=None, voting='auto'." /> 1782 <expand macro="estimator_params_text"
1784 </when> 1783 help="Default(=blank): sampling_strategy='auto', random_state=None, estimator=None, voting='auto'." />
1785 <when value="under_sampling.CondensedNearestNeighbour"> 1784 </when>
1786 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=None, n_seeds_S=1." /> 1785 <when value="under_sampling.CondensedNearestNeighbour">
1787 </when> 1786 <expand macro="estimator_params_text"
1788 <when value="under_sampling.EditedNearestNeighbours"> 1787 help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=None, n_seeds_S=1." />
1789 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, max_iter=100, kind_sel='all'." /> 1788 </when>
1790 </when> 1789 <when value="under_sampling.EditedNearestNeighbours">
1791 <when value="under_sampling.RepeatedEditedNearestNeighbours"> 1790 <expand macro="estimator_params_text"
1792 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, max_iter=100, kind_sel='all'." /> 1791 help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, max_iter=100, kind_sel='all'." />
1793 </when> 1792 </when>
1794 <when value="under_sampling.AllKNN"> 1793 <when value="under_sampling.RepeatedEditedNearestNeighbours">
1795 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, kind_sel='all', allow_minority=False." /> 1794 <expand macro="estimator_params_text"
1796 </when> 1795 help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, max_iter=100, kind_sel='all'." />
1797 <when value="under_sampling.InstanceHardnessThreshold"> 1796 </when>
1798 <expand macro="estimator_params_text" help="Default(=blank): estimator=None, sampling_strategy='auto', random_state=None, cv=5." /> 1797 <when value="under_sampling.AllKNN">
1799 </when> 1798 <expand macro="estimator_params_text"
1800 <when value="under_sampling.NearMiss"> 1799 help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, kind_sel='all', allow_minority=False." />
1801 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None, version=1, n_neighbors=3, n_neighbors_ver3=3." /> 1800 </when>
1802 </when> 1801 <when value="under_sampling.InstanceHardnessThreshold">
1803 <when value="under_sampling.NeighbourhoodCleaningRule"> 1802 <expand macro="estimator_params_text"
1804 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, kind_sel='all', threshold_cleaning=0.5." /> 1803 help="Default(=blank): estimator=None, sampling_strategy='auto', random_state=None, cv=5." />
1805 </when> 1804 </when>
1806 <when value="under_sampling.OneSidedSelection"> 1805 <when value="under_sampling.NearMiss">
1807 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=None, n_seeds_S=1." /> 1806 <expand macro="estimator_params_text"
1808 </when> 1807 help="Default(=blank): sampling_strategy='auto', random_state=None, version=1, n_neighbors=3, n_neighbors_ver3=3." />
1809 <when value="under_sampling.RandomUnderSampler"> 1808 </when>
1810 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None, replacement=False." /> 1809 <when value="under_sampling.NeighbourhoodCleaningRule">
1811 </when> 1810 <expand macro="estimator_params_text"
1812 <when value="under_sampling.TomekLinks"> 1811 help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, kind_sel='all', threshold_cleaning=0.5." />
1813 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None." /> 1812 </when>
1814 </when> 1813 <when value="under_sampling.OneSidedSelection">
1815 <when value="over_sampling.ADASYN"> 1814 <expand macro="estimator_params_text"
1816 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=5." /> 1815 help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=None, n_seeds_S=1." />
1817 </when> 1816 </when>
1818 <when value="over_sampling.RandomOverSampler"> 1817 <when value="under_sampling.RandomUnderSampler">
1819 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None." /> 1818 <expand macro="estimator_params_text"
1820 </when> 1819 help="Default(=blank): sampling_strategy='auto', random_state=None, replacement=False." />
1821 <when value="over_sampling.SMOTE"> 1820 </when>
1822 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None, k_neighbors=5." /> 1821 <when value="under_sampling.TomekLinks">
1823 </when> 1822 <expand macro="estimator_params_text"
1824 <when value="over_sampling.SVMSMOTE"> 1823 help="Default(=blank): sampling_strategy='auto', random_state=None." />
1825 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', k_neighbors=5, m_neighbors=10, out_step=0.5, random_state=None, svm_estimator=None." /> 1824 </when>
1826 </when> 1825 <when value="over_sampling.ADASYN">
1827 <when value="over_sampling.BorderlineSMOTE"> 1826 <expand macro="estimator_params_text"
1828 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', k_neighbors=5, kind='borderline-1', m_neighbors=10, random_state=None." /> 1827 help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=5." />
1829 </when> 1828 </when>
1830 <when value="over_sampling.SMOTENC"> 1829 <when value="over_sampling.RandomOverSampler">
1831 <expand macro="estimator_params_text" help="Default: categorical_features=[], sampling_strategy='auto', random_state=None, k_neighbors=5." /> 1830 <expand macro="estimator_params_text"
1832 </when> 1831 help="Default(=blank): sampling_strategy='auto', random_state=None." />
1833 <when value="combine.SMOTEENN"> 1832 </when>
1834 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None, smote=None, enn=None." /> 1833 <when value="over_sampling.SMOTE">
1835 </when> 1834 <expand macro="estimator_params_text"
1836 <when value="combine.SMOTETomek"> 1835 help="Default(=blank): sampling_strategy='auto', random_state=None, k_neighbors=5." />
1837 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None, smote=None, tomek=None." /> 1836 </when>
1838 </when> 1837 <when value="over_sampling.SVMSMOTE">
1839 <when value="Z_RandomOverSampler"> 1838 <expand macro="estimator_params_text"
1840 <expand macro="estimator_params_text" help="Default(=blank): sampling_strategy='auto', random_state=None, negative_thres=0, positive_thres=-1." /> 1839 help="Default(=blank): sampling_strategy='auto', k_neighbors=5, m_neighbors=10, out_step=0.5, random_state=None, svm_estimator=None." />
1841 </when> 1840 </when>
1842 </conditional> 1841 <when value="over_sampling.BorderlineSMOTE">
1843 </xml> 1842 <expand macro="estimator_params_text"
1844 1843 help="Default(=blank): sampling_strategy='auto', k_neighbors=5, kind='borderline-1', m_neighbors=10, random_state=None." />
1845 <xml name="stacking_ensemble_inputs"> 1844 </when>
1846 <section name="options" title="Advanced Options" expanded="false"> 1845 <when value="over_sampling.SMOTENC">
1847 <yield /> 1846 <expand macro="estimator_params_text"
1848 <param argument="use_features_in_secondary" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" /> 1847 help="Default: categorical_features=[], sampling_strategy='auto', random_state=None, k_neighbors=5." />
1849 <param argument="store_train_meta_features" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" /> 1848 </when>
1850 </section> 1849 <when value="combine.SMOTEENN">
1851 </xml> 1850 <expand macro="estimator_params_text"
1852 1851 help="Default(=blank): sampling_strategy='auto', random_state=None, smote=None, enn=None." />
1853 <xml name="stacking_base_estimator"> 1852 </when>
1854 <conditional name="estimator_selector"> 1853 <when value="combine.SMOTETomek">
1855 <param name="selected_module" type="select" label="Choose the module that contains target estimator:"> 1854 <expand macro="estimator_params_text"
1856 <expand macro="estimator_module_options"> 1855 help="Default(=blank): sampling_strategy='auto', random_state=None, smote=None, tomek=None." />
1857 <option value="custom_estimator">Load a custom estimator</option> 1856 </when>
1858 </expand> 1857 <when value="Z_RandomOverSampler">
1859 </param> 1858 <expand macro="estimator_params_text"
1860 <expand macro="estimator_suboptions"> 1859 help="Default(=blank): sampling_strategy='auto', random_state=None, negative_thres=0, positive_thres=-1." />
1861 <when value="custom_estimator"> 1860 </when>
1862 <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the custom estimator or pipeline" /> 1861 </conditional>
1863 </when> 1862 </xml>
1864 </expand> 1863
1865 </conditional> 1864 <xml name="preprocessors_sequence_encoders">
1866 </xml> 1865 <conditional name="encoder_selection">
1867 1866 <param name="encoder_type" type="select" label="Choose the sequence encoder class">
1868 <xml name="stacking_voting_weights"> 1867 <option value="GenomeOneHotEncoder">GenomeOneHotEncoder</option>
1869 <section name="options" title="Advanced Options" expanded="false"> 1868 <option value="ProteinOneHotEncoder">ProteinOneHotEncoder</option>
1870 <param argument="weights" type="text" value="[]" optional="true" help="Sequence of weights (float or int). Uses uniform weights if None (`[]`)."> 1869 </param>
1871 <sanitizer> 1870 <when value="GenomeOneHotEncoder">
1872 <valid initial="default"> 1871 <expand macro="preprocessors_sequence_encoder_arguments" />
1873 <add value="[" /> 1872 </when>
1874 <add value="]" /> 1873 <when value="ProteinOneHotEncoder">
1875 </valid> 1874 <expand macro="preprocessors_sequence_encoder_arguments" />
1876 </sanitizer> 1875 </when>
1877 </param> 1876 </conditional>
1878 <yield /> 1877 </xml>
1879 </section> 1878
1880 </xml> 1879 <xml name="preprocessors_sequence_encoder_arguments">
1881 1880 <param argument="seq_length" type="integer" value="" min="0" optional="true" help="Integer. Sequence length" />
1882 <xml name="preprocessors_sequence_encoders"> 1881 <param argument="padding" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" help="Whether to pad or truncate sequence to meet the sequence length." />
1883 <conditional name="encoder_selection"> 1882 </xml>
1884 <param name="encoder_type" type="select" label="Choose the sequence encoder class"> 1883
1885 <option value="GenomeOneHotEncoder">GenomeOneHotEncoder</option> 1884 <!-- Outputs -->
1886 <option value="ProteinOneHotEncoder">ProteinOneHotEncoder</option> 1885
1887 </param> 1886 <xml name="output">
1888 <when value="GenomeOneHotEncoder"> 1887 <outputs>
1889 <expand macro="preprocessors_sequence_encoder_arguments" /> 1888 <data format="tabular" name="outfile_predict">
1890 </when> 1889 <filter>selected_tasks['selected_task'] == 'load'</filter>
1891 <when value="ProteinOneHotEncoder"> 1890 </data>
1892 <expand macro="preprocessors_sequence_encoder_arguments" /> 1891 <data format="h5mlm" name="outfile_fit" label="${tool.name}.${selected_tasks.selected_algorithms.selected_algorithm}">
1893 </when> 1892 <filter>selected_tasks['selected_task'] == 'train'</filter>
1894 </conditional> 1893 </data>
1895 </xml> 1894 </outputs>
1896 1895 </xml>
1897 <xml name="preprocessors_sequence_encoder_arguments"> 1896
1898 <param argument="seq_length" type="integer" value="" min="0" optional="true" help="Integer. Sequence length" /> 1897 <!--Citations-->
1899 <param argument="padding" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" help="Whether to pad or truncate sequence to meet the sequence length." /> 1898 <xml name="eden_citation">
1900 </xml> 1899 <citations>
1901 1900 <citation type="doi">10.5281/zenodo.15094</citation>
1902 <!-- Outputs --> 1901 </citations>
1903 1902 </xml>
1904 <xml name="output"> 1903
1905 <outputs> 1904 <xml name="sklearn_citation">
1906 <data format="tabular" name="outfile_predict"> 1905 <citations>
1907 <filter>selected_tasks['selected_task'] == 'load'</filter> 1906 <citation type="bibtex">
1908 </data> 1907 @article{scikit-learn,
1909 <data format="zip" name="outfile_fit" label="${tool.name}.${selected_tasks.selected_algorithms.selected_algorithm}"> 1908 title={Scikit-learn: Machine Learning in {P}ython},
1910 <filter>selected_tasks['selected_task'] == 'train'</filter> 1909 author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
1911 </data> 1910 and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
1912 </outputs> 1911 and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
1913 </xml> 1912 Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
1914 1913 journal={Journal of Machine Learning Research},
1915 <!--Citations--> 1914 volume={12},
1916 <xml name="eden_citation"> 1915 pages={2825--2830},
1917 <citations> 1916 year={2011}
1918 <citation type="doi">10.5281/zenodo.15094</citation>
1919 </citations>
1920 </xml>
1921
1922 <xml name="sklearn_citation">
1923 <citations>
1924 <citation type="doi">10.1371/journal.pcbi.1009014</citation>
1925 <citation type="bibtex">
1926 @article{JMLR:v12:pedregosa11a,
1927 title = {Scikit-learn: Machine Learning in {P}ython},
1928 author = {Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
1929 and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
1930 and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
1931 Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
1932 journal = {Journal of Machine Learning Research},
1933 volume = {12},
1934 pages = {2825--2830},
1935 year = {2011}
1936 url = {http://jmlr.org/papers/v12/pedregosa11a.html}
1937 }
1938 </citation>
1939 <yield />
1940 </citations>
1941 </xml>
1942
1943 <xml name="scipy_citation">
1944 <citations>
1945 <citation type="bibtex">
1946 @Misc{,
1947 author = {Eric Jones and Travis Oliphant and Pearu Peterson and others},
1948 title = {{SciPy}: Open source scientific tools for {Python}},
1949 year = {2001--},
1950 url = {http://www.scipy.org/},
1951 note = {[Online; accessed 2016-04-09]}
1952 } 1917 }
1953 </citation> 1918 </citation>
1954 </citations> 1919 <yield />
1955 </xml> 1920 </citations>
1956 1921 </xml>
1957 <xml name="skrebate_citation"> 1922
1958 <citation type="bibtex"> 1923 <xml name="scipy_citation">
1924 <citations>
1925 <citation type="bibtex">
1926 @Misc{,
1927 author = {Eric Jones and Travis Oliphant and Pearu Peterson and others},
1928 title = {{SciPy}: Open source scientific tools for {Python}},
1929 year = {2001--},
1930 url = "http://www.scipy.org/",
1931 note = {[Online; accessed 2016-04-09]}
1932 }
1933 </citation>
1934 </citations>
1935 </xml>
1936
1937 <xml name="skrebate_citation">
1938 <citation type="bibtex">
1959 @article{DBLP:journals/corr/abs-1711-08477, 1939 @article{DBLP:journals/corr/abs-1711-08477,
1960 author = {Ryan J. Urbanowicz and 1940 author = {Ryan J. Urbanowicz and
1961 Randal S. Olson and 1941 Randal S. Olson and
1962 Peter Schmitt and 1942 Peter Schmitt and
1963 Melissa Meeker and 1943 Melissa Meeker and
1971 eprint = {1711.08477}, 1951 eprint = {1711.08477},
1972 timestamp = {Mon, 13 Aug 2018 16:46:04 +0200}, 1952 timestamp = {Mon, 13 Aug 2018 16:46:04 +0200},
1973 biburl = {https://dblp.org/rec/bib/journals/corr/abs-1711-08477}, 1953 biburl = {https://dblp.org/rec/bib/journals/corr/abs-1711-08477},
1974 bibsource = {dblp computer science bibliography, https://dblp.org} 1954 bibsource = {dblp computer science bibliography, https://dblp.org}
1975 } 1955 }
1976 </citation> 1956 </citation>
1977 </xml> 1957 </xml>
1978 1958
1979 <xml name="xgboost_citation"> 1959 <xml name="xgboost_citation">
1980 <citation type="bibtex"> 1960 <citation type="bibtex">
1981 @inproceedings{Chen:2016:XST:2939672.2939785, 1961 @inproceedings{Chen:2016:XST:2939672.2939785,
1982 author = {Chen, Tianqi and Guestrin, Carlos}, 1962 author = {Chen, Tianqi and Guestrin, Carlos},
1983 title = {{XGBoost}: A Scalable Tree Boosting System}, 1963 title = {{XGBoost}: A Scalable Tree Boosting System},
1984 booktitle = {Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, 1964 booktitle = {Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
1985 series = {KDD '16}, 1965 series = {KDD '16},
1986 year = {2016}, 1966 year = {2016},
1987 isbn = {978-1-4503-4232-2}, 1967 isbn = {978-1-4503-4232-2},
1988 location = {San Francisco, California, USA}, 1968 location = {San Francisco, California, USA},
1989 pages = {785--794}, 1969 pages = {785--794},
1990 numpages = {10}, 1970 numpages = {10},
1991 url = {http://doi.acm.org/10.1145/2939672.2939785}, 1971 url = {http://doi.acm.org/10.1145/2939672.2939785},
1992 doi = {10.1145/2939672.2939785}, 1972 doi = {10.1145/2939672.2939785},
1993 acmid = {2939785}, 1973 acmid = {2939785},
1994 publisher = {ACM}, 1974 publisher = {ACM},
1995 address = {New York, NY, USA}, 1975 address = {New York, NY, USA},
1996 keywords = {large-scale machine learning}, 1976 keywords = {large-scale machine learning},
1997 } 1977 }
1998 </citation> 1978 </citation>
1999 </xml> 1979 </xml>
2000 1980
2001 <xml name="imblearn_citation"> 1981 <xml name="imblearn_citation">
2002 <citation type="bibtex"> 1982 <citation type="bibtex">
2003 @article{JMLR:v18:16-365, 1983 @article{JMLR:v18:16-365,
2004 author = {Guillaume Lema{{\^i}}tre and Fernando Nogueira and Christos K. Aridas}, 1984 author = {Guillaume Lema{{\^i}}tre and Fernando Nogueira and Christos K. Aridas},
2005 title = {Imbalanced-learn: A Python Toolbox to Tackle the Curse of Imbalanced Datasets in Machine Learning}, 1985 title = {Imbalanced-learn: A Python Toolbox to Tackle the Curse of Imbalanced Datasets in Machine Learning},
2006 journal = {Journal of Machine Learning Research}, 1986 journal = {Journal of Machine Learning Research},
2007 year = {2017}, 1987 year = {2017},
2008 volume = {18}, 1988 volume = {18},
2009 number = {17}, 1989 number = {17},
2010 pages = {1-5}, 1990 pages = {1-5},
2011 url = {http://jmlr.org/papers/v18/16-365.html} 1991 url = {http://jmlr.org/papers/v18/16-365.html}
2012 } 1992 }
2013 </citation> 1993 </citation>
2014 </xml> 1994 </xml>
2015 1995
2016 <xml name="selene_citation"> 1996 <xml name="selene_citation">
2017 <citation type="doi">10.1038/s41592-019-0360-8</citation> 1997 <citation type="bibtex">
2018 </xml> 1998 @article{chen2019selene,
1999 title={Selene: a PyTorch-based deep learning library for sequence data},
2000 author={Chen, Kathleen M and Cofer, Evan M and Zhou, Jian and Troyanskaya, Olga G},
2001 journal={Nature methods},
2002 volume={16},
2003 number={4},
2004 pages={315},
2005 year={2019},
2006 publisher={Nature Publishing Group}
2007 }
2008 </citation>
2009 </xml>
2019 2010
2020 </macros> 2011 </macros>