Mercurial > repos > bgruening > sklearn_train_test_split
comparison train_test_split.xml @ 11:5da2217cd788 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author | bgruening |
---|---|
date | Wed, 09 Aug 2023 13:33:25 +0000 |
parents | ce2fd1edbc6e |
children |
comparison
equal
deleted
inserted
replaced
10:6e25381dad5c | 11:5da2217cd788 |
---|---|
1 <tool id="sklearn_train_test_split" name="Split Dataset" version="@VERSION@"> | 1 <tool id="sklearn_train_test_split" name="Split Dataset" version="@VERSION@" profile="@PROFILE@"> |
2 <description>into training and test subsets</description> | 2 <description>into training and test subsets</description> |
3 <macros> | 3 <macros> |
4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
5 <macro name="label_input" token_label="Select the dataset containing labels"> | 5 <macro name="label_input" token_label="Select the dataset containing labels"> |
6 <param name="labels" type="data" format="tabular" label="@LABEL@"/> | 6 <param name="labels" type="data" format="tabular" label="@LABEL@" /> |
7 <param name="header1" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Does the dataset contain header?" /> | 7 <param name="header1" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Does the dataset contain header?" /> |
8 <param name="col" type="data_column" data_ref="labels" label="Select target column"/> | 8 <param name="col" type="data_column" data_ref="labels" label="Select target column" /> |
9 </macro> | 9 </macro> |
10 </macros> | 10 </macros> |
11 <expand macro="python_requirements"/> | 11 <expand macro="python_requirements" /> |
12 <expand macro="macro_stdio"/> | 12 <expand macro="macro_stdio" /> |
13 <version_command>echo "@VERSION@"</version_command> | 13 <version_command>echo "@VERSION@"</version_command> |
14 <command detect_errors="exit_code"><![CDATA[ | 14 <command detect_errors="exit_code"><![CDATA[ |
15 python '$__tool_directory__/train_test_split.py' | 15 python '$__tool_directory__/train_test_split.py' |
16 --inputs '$inputs' | 16 --inputs '$inputs' |
17 --infile_array '$infile_array' | 17 --infile_array '$infile_array' |
27 --outfile_train '$out_train' | 27 --outfile_train '$out_train' |
28 --outfile_test '$out_test' | 28 --outfile_test '$out_test' |
29 ]]> | 29 ]]> |
30 </command> | 30 </command> |
31 <configfiles> | 31 <configfiles> |
32 <inputs name="inputs"/> | 32 <inputs name="inputs" /> |
33 </configfiles> | 33 </configfiles> |
34 <inputs> | 34 <inputs> |
35 <param name="infile_array" type="data" format="tabular" label="Select the dataset containing array to split" help="This tool only supports to split one array at each tool run. If X, y are in separate files, the splitting task could be done by invoking this tool twice in which this input dataset is swapped while all other parameters are kept the same."/> | 35 <param name="infile_array" type="data" format="tabular" label="Select the dataset containing array to split" help="This tool only supports to split one array at each tool run. If X, y are in separate files, the splitting task could be done by invoking this tool twice in which this input dataset is swapped while all other parameters are kept the same." /> |
36 <param name="header0" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Does the dataset contain header?" /> | 36 <param name="header0" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Does the dataset contain header?" /> |
37 <conditional name="mode_selection"> | 37 <conditional name="mode_selection"> |
38 <param name="selected_mode" type="select" label="Select the splitting mode"> | 38 <param name="selected_mode" type="select" label="Select the splitting mode"> |
39 <option value="train_test_split" selected="true">Train Test Split</option> | 39 <option value="train_test_split" selected="true">Train Test Split</option> |
40 <option value="cv_splitter">Cross-Validation Splitter</option> | 40 <option value="cv_splitter">Cross-Validation Splitter</option> |
41 </param> | 41 </param> |
42 <when value="train_test_split"> | 42 <when value="train_test_split"> |
43 <section name="options" title="Options" expanded="true"> | 43 <section name="options" title="Options" expanded="true"> |
44 <param argument="test_size" type="float" min="0" optional="false" value="0.25" label="Test size:" | 44 <param argument="test_size" type="float" min="0" optional="false" value="0.25" label="Test size:" |
45 help="A float number, 0.0 - 1.0, represents the proportion of the dataset to be included in the test split."/> | 45 help="A float number, 0.0 - 1.0, represents the proportion of the dataset to be included in the test split." /> |
46 <param argument="random_state" type="integer" optional="true" value="" label="Random seed number:"/> | 46 <param argument="random_state" type="integer" optional="true" value="" label="Random seed number:" /> |
47 <conditional name="shuffle_selection"> | 47 <conditional name="shuffle_selection"> |
48 <param name="shuffle" type="select" label="Shuffle strategy"> | 48 <param name="shuffle" type="select" label="Shuffle strategy"> |
49 <option value="None">None - No shuffle</option> | 49 <option value="None">None - No shuffle</option> |
50 <option value="simple">Shuffle</option> | 50 <option value="simple">Shuffle</option> |
51 <option value="stratified">Stratified Shuffle</option> | 51 <option value="stratified">Stratified Shuffle</option> |
52 <option value="group">Group Shuffle</option> | 52 <option value="group">Group Shuffle</option> |
53 </param> | 53 </param> |
54 <when value="None"/> | 54 <when value="None" /> |
55 <when value="simple"/> | 55 <when value="simple" /> |
56 <when value="stratified"> | 56 <when value="stratified"> |
57 <expand macro="label_input"/> | 57 <expand macro="label_input" /> |
58 </when> | 58 </when> |
59 <when value="group"> | 59 <when value="group"> |
60 <expand macro="label_input" label="Select the dataset containing groups"/> | 60 <expand macro="label_input" label="Select the dataset containing groups" /> |
61 </when> | 61 </when> |
62 </conditional> | 62 </conditional> |
63 </section> | 63 </section> |
64 </when> | 64 </when> |
65 <when value="cv_splitter"> | 65 <when value="cv_splitter"> |
81 <option value="GroupShuffleSplit">GroupShuffleSplit</option> | 81 <option value="GroupShuffleSplit">GroupShuffleSplit</option> |
82 <option value="LeaveOneGroupOut">LeaveOneGroupOut</option> | 82 <option value="LeaveOneGroupOut">LeaveOneGroupOut</option> |
83 <option value="LeavePGroupsOut">LeavePGroupsOut</option> | 83 <option value="LeavePGroupsOut">LeavePGroupsOut</option> |
84 </param> | 84 </param> |
85 <when value="KFold"> | 85 <when value="KFold"> |
86 <expand macro="cv_n_splits"/> | 86 <expand macro="cv_n_splits" /> |
87 <expand macro="cv_shuffle"/> | 87 <expand macro="cv_shuffle" /> |
88 <expand macro="random_state"/> | 88 <expand macro="random_state" /> |
89 </when> | 89 </when> |
90 <when value="RepeatedKFold"> | 90 <when value="RepeatedKFold"> |
91 <expand macro="cv_n_splits" value="5"/> | 91 <expand macro="cv_n_splits" value="5" /> |
92 <param argument="n_repeats" type="integer" value="10" label="n_repeats" help="Number of times cross-validator needs to be repeated." /> | 92 <param argument="n_repeats" type="integer" value="10" label="n_repeats" help="Number of times cross-validator needs to be repeated." /> |
93 <expand macro="random_state" /> | 93 <expand macro="random_state" /> |
94 </when> | 94 </when> |
95 <when value="StratifiedKFold"> | 95 <when value="StratifiedKFold"> |
96 <expand macro="cv_n_splits"/> | 96 <expand macro="cv_n_splits" /> |
97 <expand macro="cv_shuffle"/> | 97 <expand macro="cv_shuffle" /> |
98 <expand macro="random_state"/> | 98 <expand macro="random_state" /> |
99 <section name="target_input" title="Target values" expanded="true"> | 99 <section name="target_input" title="Target values" expanded="true"> |
100 <expand macro="label_input"/> | 100 <expand macro="label_input" /> |
101 </section> | 101 </section> |
102 </when> | 102 </when> |
103 <when value="RepeatedStratifiedKFold"> | 103 <when value="RepeatedStratifiedKFold"> |
104 <expand macro="cv_n_splits" value="5"/> | 104 <expand macro="cv_n_splits" value="5" /> |
105 <param argument="n_repeats" type="integer" value="10" label="n_repeats" help="Number of times cross-validator needs to be repeated." /> | 105 <param argument="n_repeats" type="integer" value="10" label="n_repeats" help="Number of times cross-validator needs to be repeated." /> |
106 <expand macro="random_state" /> | 106 <expand macro="random_state" /> |
107 <section name="target_input" title="Target values" expanded="true"> | 107 <section name="target_input" title="Target values" expanded="true"> |
108 <expand macro="label_input"/> | 108 <expand macro="label_input" /> |
109 </section> | 109 </section> |
110 </when> | 110 </when> |
111 <when value="LeaveOneOut"> | 111 <when value="LeaveOneOut"> |
112 </when> | 112 </when> |
113 <when value="LeavePOut"> | 113 <when value="LeavePOut"> |
114 <param argument="p" type="integer" value="" label="p" help="Integer. Size of the test sets."/> | 114 <param argument="p" type="integer" value="" label="p" help="Integer. Size of the test sets." /> |
115 </when> | 115 </when> |
116 <when value="ShuffleSplit"> | 116 <when value="ShuffleSplit"> |
117 <expand macro="cv_n_splits" value="10" help="Number of re-shuffling and splitting iterations."/> | 117 <expand macro="cv_n_splits" value="10" help="Number of re-shuffling and splitting iterations." /> |
118 <expand macro="cv_test_size" value="0.1" /> | 118 <expand macro="cv_test_size" value="0.1" /> |
119 <expand macro="random_state"/> | 119 <expand macro="random_state" /> |
120 </when> | 120 </when> |
121 <when value="StratifiedShuffleSplit"> | 121 <when value="StratifiedShuffleSplit"> |
122 <expand macro="cv_n_splits" value="10" help="Number of re-shuffling and splitting iterations."/> | 122 <expand macro="cv_n_splits" value="10" help="Number of re-shuffling and splitting iterations." /> |
123 <expand macro="cv_test_size" value="0.1" /> | 123 <expand macro="cv_test_size" value="0.1" /> |
124 <expand macro="random_state"/> | 124 <expand macro="random_state" /> |
125 <section name="target_input" title="Target values" expanded="true"> | 125 <section name="target_input" title="Target values" expanded="true"> |
126 <expand macro="label_input"/> | 126 <expand macro="label_input" /> |
127 </section> | 127 </section> |
128 </when> | 128 </when> |
129 <when value="TimeSeriesSplit"> | 129 <when value="TimeSeriesSplit"> |
130 <expand macro="cv_n_splits"/> | 130 <expand macro="cv_n_splits" /> |
131 <param argument="max_train_size" type="integer" value="" optional="true" label="Maximum size of the training set" help="Maximum size for a single training set." /> | 131 <param argument="max_train_size" type="integer" value="" optional="true" label="Maximum size of the training set" help="Maximum size for a single training set." /> |
132 </when> | 132 </when> |
133 <when value="PredefinedSplit"> | 133 <when value="PredefinedSplit"> |
134 <param argument="test_fold" type="text" value="" area="true" label="test_fold" help="List, e.g., [0, 1, -1, 1], represents two test sets, [X[0]] and [X[1], X[3]], X[2] is excluded from any test set due to '-1'."/> | 134 <param argument="test_fold" type="text" value="" area="true" label="test_fold" help="List, e.g., [0, 1, -1, 1], represents two test sets, [X[0]] and [X[1], X[3]], X[2] is excluded from any test set due to '-1'." /> |
135 </when> | 135 </when> |
136 <when value="OrderedKFold"> | 136 <when value="OrderedKFold"> |
137 <expand macro="cv_n_splits"/> | 137 <expand macro="cv_n_splits" /> |
138 <expand macro="cv_shuffle"/> | 138 <expand macro="cv_shuffle" /> |
139 <expand macro="random_state"/> | 139 <expand macro="random_state" /> |
140 <section name="target_input" title="Target values" expanded="true"> | 140 <expand macro="cv_n_stratification_bins" /> |
141 <expand macro="label_input" label="Select the dataset containing target values"/> | 141 <section name="target_input" title="Target values" expanded="true"> |
142 <expand macro="label_input" label="Select the dataset containing target values" /> | |
142 </section> | 143 </section> |
143 </when> | 144 </when> |
144 <when value="RepeatedOrderedKFold"> | 145 <when value="RepeatedOrderedKFold"> |
145 <expand macro="cv_n_splits"/> | 146 <expand macro="cv_n_splits" /> |
146 <param argument="n_repeats" type="integer" value="5"/> | 147 <param argument="n_repeats" type="integer" value="5" /> |
147 <expand macro="random_state"/> | 148 <expand macro="random_state" /> |
148 <section name="target_input" title="Target values" expanded="true"> | 149 <expand macro="cv_n_stratification_bins" /> |
149 <expand macro="label_input" label="Select the dataset containing target values"/> | 150 <section name="target_input" title="Target values" expanded="true"> |
151 <expand macro="label_input" label="Select the dataset containing target values" /> | |
150 </section> | 152 </section> |
151 </when> | 153 </when> |
152 <when value="GroupKFold"> | 154 <when value="GroupKFold"> |
153 <expand macro="cv_n_splits"/> | 155 <expand macro="cv_n_splits" /> |
154 <expand macro="cv_groups" /> | 156 <expand macro="cv_groups" /> |
155 </when> | 157 </when> |
156 <when value="GroupShuffleSplit"> | 158 <when value="GroupShuffleSplit"> |
157 <expand macro="cv_n_splits" value="5"/> | 159 <expand macro="cv_n_splits" value="5" /> |
158 <expand macro="cv_test_size"/> | 160 <expand macro="cv_test_size" /> |
159 <expand macro="random_state"/> | 161 <expand macro="random_state" /> |
160 <expand macro="cv_groups"/> | 162 <expand macro="cv_groups" /> |
161 </when> | 163 </when> |
162 <when value="LeaveOneGroupOut"> | 164 <when value="LeaveOneGroupOut"> |
163 <expand macro="cv_groups"/> | 165 <expand macro="cv_groups" /> |
164 </when> | 166 </when> |
165 <when value="LeavePGroupsOut"> | 167 <when value="LeavePGroupsOut"> |
166 <param argument="n_groups" type="integer" value="" label="n_groups" help="Number of groups (p) to leave out in the test split." /> | 168 <param argument="n_groups" type="integer" value="" label="n_groups" help="Number of groups (p) to leave out in the test split." /> |
167 <expand macro="cv_groups"/> | 169 <expand macro="cv_groups" /> |
168 </when> | 170 </when> |
169 </conditional> | 171 </conditional> |
170 <param name="nth_split" type="integer" min="1" value="1" label="Type the index of split to output" help="Split index starts from 1 to total = n_splits (x n_repeats). (nth_split)"/> | 172 <param name="nth_split" type="integer" min="1" value="1" label="Type the index of split to output" help="Split index starts from 1 to total = n_splits (x n_repeats). (nth_split)" /> |
171 </when> | 173 </when> |
172 </conditional> | 174 </conditional> |
173 </inputs> | 175 </inputs> |
174 <outputs> | 176 <outputs> |
175 <data format="tabular" name="out_train" label="${tool.name} on ${on_string} (train)"/> | 177 <data format="tabular" name="out_train" label="${tool.name} on ${on_string} (train)" /> |
176 <data format="tabular" name="out_test" label="${tool.name} on ${on_string} (test)"/> | 178 <data format="tabular" name="out_test" label="${tool.name} on ${on_string} (test)" /> |
177 </outputs> | 179 </outputs> |
178 <tests> | 180 <tests> |
179 <test> | 181 <test> |
180 <param name="infile_array" value="regression_X.tabular" ftype="tabular"/> | 182 <param name="infile_array" value="regression_X.tabular" ftype="tabular" /> |
181 <param name="header0" value="true"/> | 183 <param name="header0" value="true" /> |
182 <conditional name="mode_selection"> | 184 <conditional name="mode_selection"> |
183 <param name="selected_mode" value="train_test_split"/> | 185 <param name="selected_mode" value="train_test_split" /> |
184 <section name="options"> | 186 <section name="options"> |
185 <param name="random_state" value="123"/> | 187 <param name="random_state" value="123" /> |
186 <conditional name="shuffle_selection"> | 188 <conditional name="shuffle_selection"> |
187 <param name="shuffle" value="simple"/> | 189 <param name="shuffle" value="simple" /> |
188 </conditional> | 190 </conditional> |
189 </section> | 191 </section> |
190 </conditional> | 192 </conditional> |
191 <output name="out_train" file="train_test_split_train01.tabular" ftype="tabular"/> | 193 <output name="out_train" file="train_test_split_train01.tabular" ftype="tabular" /> |
192 <output name="out_test" file="train_test_split_test01.tabular" ftype="tabular"/> | 194 <output name="out_test" file="train_test_split_test01.tabular" ftype="tabular" /> |
193 </test> | 195 </test> |
194 <test> | 196 <test> |
195 <param name="infile_array" value="regression_X.tabular" ftype="tabular"/> | 197 <param name="infile_array" value="regression_X.tabular" ftype="tabular" /> |
196 <param name="header0" value="true"/> | 198 <param name="header0" value="true" /> |
197 <conditional name="mode_selection"> | 199 <conditional name="mode_selection"> |
198 <param name="selected_mode" value="cv_splitter"/> | 200 <param name="selected_mode" value="cv_splitter" /> |
199 <conditional name="cv_selector"> | 201 <conditional name="cv_selector"> |
200 <param name="selected_cv" value="ShuffleSplit"/> | 202 <param name="selected_cv" value="ShuffleSplit" /> |
201 <param name="random_state" value="123"/> | 203 <param name="random_state" value="123" /> |
202 <param name="n_splits" value="2"/> | 204 <param name="n_splits" value="2" /> |
203 <param name="test_size" value="0.25"/> | 205 <param name="test_size" value="0.25" /> |
204 </conditional> | 206 </conditional> |
205 </conditional> | 207 </conditional> |
206 <output name="out_train" file="train_test_split_train01.tabular" ftype="tabular"/> | 208 <output name="out_train" file="train_test_split_train01.tabular" ftype="tabular" /> |
207 <output name="out_test" file="train_test_split_test01.tabular" ftype="tabular"/> | 209 <output name="out_test" file="train_test_split_test01.tabular" ftype="tabular" /> |
208 </test> | 210 </test> |
209 <test> | 211 <test> |
210 <param name="infile_array" value="imblearn_X.tabular" ftype="tabular"/> | 212 <param name="infile_array" value="imblearn_X.tabular" ftype="tabular" /> |
211 <param name="header0" value="false"/> | 213 <param name="header0" value="false" /> |
212 <conditional name="mode_selection"> | 214 <conditional name="mode_selection"> |
213 <param name="selected_mode" value="train_test_split"/> | 215 <param name="selected_mode" value="train_test_split" /> |
214 <section name="options"> | 216 <section name="options"> |
215 <param name="test_size" value="0.2"/> | 217 <param name="test_size" value="0.2" /> |
216 <param name="random_state" value="123"/> | 218 <param name="random_state" value="123" /> |
217 <conditional name="shuffle_selection"> | 219 <conditional name="shuffle_selection"> |
218 <param name="shuffle" value="stratified"/> | 220 <param name="shuffle" value="stratified" /> |
219 <param name="labels" value="imblearn_y.tabular" ftype="tabular"/> | 221 <param name="labels" value="imblearn_y.tabular" ftype="tabular" /> |
220 <param name="header1" value="false"/> | 222 <param name="header1" value="false" /> |
221 <param name="col" value="1"/> | 223 <param name="col" value="1" /> |
222 </conditional> | 224 </conditional> |
223 </section> | 225 </section> |
224 </conditional> | 226 </conditional> |
225 <output name="out_train" file="train_test_split_train02.tabular" ftype="tabular"/> | 227 <output name="out_train" file="train_test_split_train02.tabular" ftype="tabular" /> |
226 <output name="out_test" file="train_test_split_test02.tabular" ftype="tabular"/> | 228 <output name="out_test" file="train_test_split_test02.tabular" ftype="tabular" /> |
227 </test> | 229 </test> |
228 <test> | 230 <test> |
229 <param name="infile_array" value="imblearn_X.tabular" ftype="tabular"/> | 231 <param name="infile_array" value="imblearn_X.tabular" ftype="tabular" /> |
230 <param name="header0" value="false"/> | 232 <param name="header0" value="false" /> |
231 <conditional name="mode_selection"> | 233 <conditional name="mode_selection"> |
232 <param name="selected_mode" value="cv_splitter"/> | 234 <param name="selected_mode" value="cv_splitter" /> |
233 <conditional name="cv_selector"> | 235 <conditional name="cv_selector"> |
234 <param name="selected_cv" value="StratifiedShuffleSplit"/> | 236 <param name="selected_cv" value="StratifiedShuffleSplit" /> |
235 <param name="random_state" value="123"/> | 237 <param name="random_state" value="123" /> |
236 <param name="test_size" value="0.2"/> | 238 <param name="test_size" value="0.2" /> |
237 <param name="n_splits" value="1"/> | 239 <param name="n_splits" value="1" /> |
238 <section name="target_input"> | 240 <section name="target_input"> |
239 <param name="labels" value="imblearn_y.tabular" ftype="tabular"/> | 241 <param name="labels" value="imblearn_y.tabular" ftype="tabular" /> |
240 <param name="header1" value="false"/> | 242 <param name="header1" value="false" /> |
241 <param name="col" value="1"/> | 243 <param name="col" value="1" /> |
242 </section> | 244 </section> |
243 </conditional> | 245 </conditional> |
244 </conditional> | 246 </conditional> |
245 <output name="out_train" file="train_test_split_train02.tabular" ftype="tabular"/> | 247 <output name="out_train" file="train_test_split_train02.tabular" ftype="tabular" /> |
246 <output name="out_test" file="train_test_split_test02.tabular" ftype="tabular"/> | 248 <output name="out_test" file="train_test_split_test02.tabular" ftype="tabular" /> |
247 </test> | 249 </test> |
248 <test> | 250 <test> |
249 <param name="infile_array" value="regression_X.tabular" ftype="tabular"/> | 251 <param name="infile_array" value="regression_X.tabular" ftype="tabular" /> |
250 <param name="header0" value="true"/> | 252 <param name="header0" value="true" /> |
251 <conditional name="mode_selection"> | 253 <conditional name="mode_selection"> |
252 <param name="selected_mode" value="cv_splitter"/> | 254 <param name="selected_mode" value="cv_splitter" /> |
253 <conditional name="cv_selector"> | 255 <conditional name="cv_selector"> |
254 <param name="selected_cv" value="OrderedKFold"/> | 256 <param name="selected_cv" value="OrderedKFold" /> |
255 <param name="random_state" value="123"/> | 257 <param name="random_state" value="123" /> |
256 <param name="shuffle" value="true"/> | 258 <param name="shuffle" value="true" /> |
257 <param name="n_splits" value="5"/> | 259 <param name="n_splits" value="5" /> |
258 <section name="target_input"> | 260 <section name="target_input"> |
259 <param name="labels" value="regression_y.tabular" ftype="tabular"/> | 261 <param name="labels" value="regression_y.tabular" ftype="tabular" /> |
260 <param name="header1" value="true"/> | 262 <param name="header1" value="true" /> |
261 <param name="col" value="1"/> | 263 <param name="col" value="1" /> |
262 </section> | 264 </section> |
263 </conditional> | 265 </conditional> |
264 </conditional> | 266 </conditional> |
265 <output name="out_train" file="train_test_split_train03.tabular" ftype="tabular"/> | 267 <output name="out_train" file="train_test_split_train03.tabular" ftype="tabular" /> |
266 <output name="out_test" file="train_test_split_test03.tabular" ftype="tabular"/> | 268 <output name="out_test" file="train_test_split_test03.tabular" ftype="tabular" /> |
267 </test> | 269 </test> |
268 </tests> | 270 </tests> |
269 <help><![CDATA[ | 271 <help><![CDATA[ |
270 **What it does** | 272 **What it does** |
271 | 273 This tool implements splitter function and classes from `sklearn.model_selection` module to split contents (rows) of a table into two subsets for training and test, respectively . The simple train test split mode not only supports shuffle split and stratified shuffle split natively carried by the `train_test_split` function, but also gets extended to do group shuffle. The cross-validation splitter mode supports more diverse splitting strategies. Each tool run outputs one split, train and test. To get different splitting sets, for example, nested CV, multiple tool runs are needed with different `nth_split`. |
272 This tool implements splitter function and classes from `sklearn.model_selection` module to split contents (rows) of a table into | |
273 two subsets for training and test, respectively . The simple train test split mode not only supports shuffle split and stratified | |
274 shuffle split natively carried by the `train_test_split` function, but also gets extended to do group shuffle. | |
275 The cross-validation splitter mode supports more diverse splitting strategies. Each tool run outputs one split, train and test. | |
276 To get different splitting sets, for example, nested CV, multiple tool runs are needed with different `nth_split`. | |
277 Example: 6-fold CV. Set `n_splits` to 6. Run the tool 6 times with the same parameters, but set `nth_split` according to the number of the run (1-6). | |
278 | 274 |
279 - Train Test Split mode | 275 - Train Test Split mode |
280 - direct split, no shuffle | 276 - direct split, no shuffle |
281 - shuffle split | 277 - shuffle split |
282 - stratified shuffle split | 278 - stratified shuffle split |
291 Input: a tabular dataset. | 287 Input: a tabular dataset. |
292 | 288 |
293 Output: two tabular datasets containing training and test subsets, respectively. | 289 Output: two tabular datasets containing training and test subsets, respectively. |
294 | 290 |
295 ]]></help> | 291 ]]></help> |
296 <expand macro="sklearn_citation"/> | 292 <expand macro="sklearn_citation" /> |
297 </tool> | 293 </tool> |