Mercurial > repos > bgruening > split_file_to_collection
comparison split_file_to_collection.xml @ 5:e77b954f0da5 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_to_collection commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
author | bgruening |
---|---|
date | Fri, 11 Oct 2019 18:24:43 -0400 |
parents | 0850f2dfba13 |
children | 6cbe2f30c2d7 |
comparison
equal
deleted
inserted
replaced
4:0850f2dfba13 | 5:e77b954f0da5 |
---|---|
1 <tool id="split_file_to_collection" name="Split file" version="0.3.0"> | 1 <tool id="split_file_to_collection" name="Split file" version="0.4.0"> |
2 <description>to dataset collection</description> | 2 <description>to dataset collection</description> |
3 <macros> | 3 <macros> |
4 <xml name="regex_sanitizer"> | 4 <xml name="regex_sanitizer"> |
5 <sanitizer> | 5 <sanitizer> |
6 <valid> | 6 <valid> |
75 --batch | 75 --batch |
76 #end if | 76 #end if |
77 #end if | 77 #end if |
78 #else | 78 #else |
79 #if $split_parms.select_ftype == "generic" | 79 #if $split_parms.select_ftype == "generic" |
80 --generic_re '$split_parms.generic_regex' | 80 #if $split_parms.split_method.select_split_method == "regex" |
81 #if $split_parms.split_after == 'true': | 81 --generic_re '$split_parms.split_method.generic_regex' |
82 --split_after | 82 #if $split_parms.split_method.split_after == 'true': |
83 --split_after | |
84 #end if | |
85 #else | |
86 --generic_num $split_parms.split_method.record_length | |
83 #end if | 87 #end if |
84 #end if | 88 #end if |
85 #if $split_parms.select_mode.mode == "numnew": | 89 #if $split_parms.select_mode.mode == "numnew": |
86 --numnew '$split_parms.select_mode.numnew' | 90 --numnew '$split_parms.select_mode.numnew' |
87 #else | 91 #else |
161 <param name="input" type="data" format="txt" label="Text file to split"/> | 165 <param name="input" type="data" format="txt" label="Text file to split"/> |
162 <expand macro="numnew_fname"/> | 166 <expand macro="numnew_fname"/> |
163 </when> | 167 </when> |
164 <when value="generic"> | 168 <when value="generic"> |
165 <param name="input" type="data" format="txt" label="File to split"/> | 169 <param name="input" type="data" format="txt" label="File to split"/> |
166 <param name="generic_regex" type="text" label="Regex to match record separator" value="^.*"> | 170 <conditional name="split_method"> |
167 <expand macro="regex_sanitizer"/> | 171 <param name="select_split_method" type="select" label="Method to split files"> |
168 </param> | 172 <option value="regex">Specify record separator as regular expression</option> |
173 <option value="number">Specify number of lines after which a record ends</option> | |
174 </param> | |
175 <when value="regex"> | |
176 <param name="generic_regex" type="text" label="Regex to match record separator" value="^.*"> | |
177 <expand macro="regex_sanitizer"/> | |
178 </param> | |
179 <param name="split_after" type="select" value="false" label="Split records before or after the separator?" help="If before, the separator will appear at the start of each record; if after, at the end"> | |
180 <option value="false" selected="true">Before</option> | |
181 <option value="true">After</option> | |
182 </param> | |
183 </when> | |
184 <when value="number"> | |
185 <param name="record_length" type="integer" value="1" label="Record length" help="The number of lines after which each record ends"/> | |
186 </when> | |
187 </conditional> | |
169 <expand macro="numnew_fname"/> | 188 <expand macro="numnew_fname"/> |
170 <param name="split_after" type="select" value="false" label="Split records before or after the separator?" help="If before, the separator will appear at the start of each record; if after, at the end"> | |
171 <option value="false" selected="true">Before</option> | |
172 <option value="true">After</option> | |
173 </param> | |
174 </when> | 189 </when> |
175 </conditional> | 190 </conditional> |
176 </inputs> | 191 </inputs> |
177 <outputs> | 192 <outputs> |
178 <collection name="list_output_tab" type="list" label="${tool.name} on ${on_string}"> | 193 <collection name="list_output_tab" type="list" label="${tool.name} on ${on_string}"> |
203 <discover_datasets pattern="__name_and_ext__" directory="out" visible="false"/> | 218 <discover_datasets pattern="__name_and_ext__" directory="out" visible="false"/> |
204 <filter>split_parms['select_ftype'] == "generic"</filter> | 219 <filter>split_parms['select_ftype'] == "generic"</filter> |
205 </collection> | 220 </collection> |
206 </outputs> | 221 </outputs> |
207 <tests> | 222 <tests> |
223 <!-- 1 --> | |
208 <test> | 224 <test> |
209 <param name="input" value="test.tabular" ftype="tabular"/> | 225 <param name="input" value="test.tabular" ftype="tabular"/> |
210 <param name="select_ftype" value="tabular"/> | 226 <param name="select_ftype" value="tabular"/> |
211 <param name="select_split_by" value="col"/> | 227 <param name="select_split_by" value="col"/> |
212 <param name="id_col" value="1"/> | 228 <param name="id_col" value="1"/> |
217 <element name="foo.tab" file="foo.tab" ftype="tabular"/> | 233 <element name="foo.tab" file="foo.tab" ftype="tabular"/> |
218 <element name="foo2.tab" file="foo2.tab" ftype="tabular"/> | 234 <element name="foo2.tab" file="foo2.tab" ftype="tabular"/> |
219 <element name="foo3.tab" file="foo3.tab" ftype="tabular"/> | 235 <element name="foo3.tab" file="foo3.tab" ftype="tabular"/> |
220 </output_collection> | 236 </output_collection> |
221 </test> | 237 </test> |
238 <!-- 2 --> | |
222 <test> | 239 <test> |
223 <param name="input" value="test.tabular" ftype="tabular"/> | 240 <param name="input" value="test.tabular" ftype="tabular"/> |
224 <param name="select_ftype" value="tabular"/> | 241 <param name="select_ftype" value="tabular"/> |
225 <param name="select_split_by" value="row"/> | 242 <param name="select_split_by" value="row"/> |
226 <param name="top" value="2"/> | 243 <param name="top" value="2"/> |
230 <output_collection name="list_output_tab" type="list"> | 247 <output_collection name="list_output_tab" type="list"> |
231 <element name="test_000000.tabular" file="test_0.tabular" ftype="tabular"/> | 248 <element name="test_000000.tabular" file="test_0.tabular" ftype="tabular"/> |
232 <element name="test_000001.tabular" file="test_1.tabular" ftype="tabular"/> | 249 <element name="test_000001.tabular" file="test_1.tabular" ftype="tabular"/> |
233 </output_collection> | 250 </output_collection> |
234 </test> | 251 </test> |
252 <!-- 3 --> | |
235 <test> | 253 <test> |
236 <param name="input" value="test.tabular" ftype="tabular"/> | 254 <param name="input" value="test.tabular" ftype="tabular"/> |
237 <param name="select_ftype" value="tabular"/> | 255 <param name="select_ftype" value="tabular"/> |
238 <param name="select_split_by" value="row"/> | 256 <param name="select_split_by" value="row"/> |
239 <param name="top" value="2"/> | 257 <param name="top" value="2"/> |
244 <output_collection name="list_output_tab" type="list"> | 262 <output_collection name="list_output_tab" type="list"> |
245 <element name="batch_tab_000000.tabular" file="batch_tab_0.tabular" ftype="tabular"/> | 263 <element name="batch_tab_000000.tabular" file="batch_tab_0.tabular" ftype="tabular"/> |
246 <element name="batch_tab_000001.tabular" file="batch_tab_1.tabular" ftype="tabular"/> | 264 <element name="batch_tab_000001.tabular" file="batch_tab_1.tabular" ftype="tabular"/> |
247 </output_collection> | 265 </output_collection> |
248 </test> | 266 </test> |
267 <!-- 4 --> | |
249 <test> | 268 <test> |
250 <param name="input" value="test.tabular" ftype="tabular"/> | 269 <param name="input" value="test.tabular" ftype="tabular"/> |
251 <param name="select_ftype" value="tabular"/> | 270 <param name="select_ftype" value="tabular"/> |
252 <param name="select_split_by" value="row"/> | 271 <param name="select_split_by" value="row"/> |
253 <param name="top" value="2"/> | 272 <param name="top" value="2"/> |
258 <output_collection name="list_output_tab" type="list"> | 277 <output_collection name="list_output_tab" type="list"> |
259 <element name="batch_tab_000000.tabular" file="batch_tab_0.tabular" ftype="tabular"/> | 278 <element name="batch_tab_000000.tabular" file="batch_tab_0.tabular" ftype="tabular"/> |
260 <element name="batch_tab_000001.tabular" file="batch_tab_1.tabular" ftype="tabular"/> | 279 <element name="batch_tab_000001.tabular" file="batch_tab_1.tabular" ftype="tabular"/> |
261 </output_collection> | 280 </output_collection> |
262 </test> | 281 </test> |
282 <!-- 5 --> | |
263 <test> | 283 <test> |
264 <param name="select_ftype" value="txt"/> | 284 <param name="select_ftype" value="txt"/> |
265 <param name="input" value="karyotype.txt" ftype="txt"/> | 285 <param name="input" value="karyotype.txt" ftype="txt"/> |
266 <param name="mode" value="numnew"/> | 286 <param name="mode" value="numnew"/> |
267 <param name="numnew" value="24"/> | 287 <param name="numnew" value="24"/> |
293 <element name="chr_000021.txt" file="chr_000021.txt" ftype="txt"/> | 313 <element name="chr_000021.txt" file="chr_000021.txt" ftype="txt"/> |
294 <element name="chr_000022.txt" file="chr_000022.txt" ftype="txt"/> | 314 <element name="chr_000022.txt" file="chr_000022.txt" ftype="txt"/> |
295 <element name="chr_000023.txt" file="chr_000023.txt" ftype="txt"/> | 315 <element name="chr_000023.txt" file="chr_000023.txt" ftype="txt"/> |
296 </output_collection> | 316 </output_collection> |
297 </test> | 317 </test> |
318 <!-- 6 --> | |
298 <test> | 319 <test> |
299 <param name="input" value="psm.tabular" ftype="tabular"/> | 320 <param name="input" value="psm.tabular" ftype="tabular"/> |
300 <param name="select_ftype" value="tabular"/> | 321 <param name="select_ftype" value="tabular"/> |
301 <param name="select_split_by" value="col"/> | 322 <param name="select_split_by" value="col"/> |
302 <param name="id_col" value="10"/> | 323 <param name="id_col" value="10"/> |
308 <element name="file2.tab" file="file2.tab" ftype="tabular"/> | 329 <element name="file2.tab" file="file2.tab" ftype="tabular"/> |
309 <element name="file3.tab" file="file3.tab" ftype="tabular"/> | 330 <element name="file3.tab" file="file3.tab" ftype="tabular"/> |
310 <element name="file4.tab" file="file4.tab" ftype="tabular"/> | 331 <element name="file4.tab" file="file4.tab" ftype="tabular"/> |
311 </output_collection> | 332 </output_collection> |
312 </test> | 333 </test> |
334 <!-- 7 splitting of mgf --> | |
313 <test> | 335 <test> |
314 <param name="input" value="demo758Dacentroid.mgf" ftype="mgf"/> | 336 <param name="input" value="demo758Dacentroid.mgf" ftype="mgf"/> |
315 <param name="select_ftype" value="mgf"/> | 337 <param name="select_ftype" value="mgf"/> |
316 <param name="mode" value="numnew"/> | 338 <param name="mode" value="numnew"/> |
317 <param name="numnew" value="3"/> | 339 <param name="numnew" value="3"/> |
320 <element name="demo_000000.mgf" file="demo_0.mgf" ftype="mgf"/> | 342 <element name="demo_000000.mgf" file="demo_0.mgf" ftype="mgf"/> |
321 <element name="demo_000001.mgf" file="demo_1.mgf" ftype="mgf"/> | 343 <element name="demo_000001.mgf" file="demo_1.mgf" ftype="mgf"/> |
322 <element name="demo_000002.mgf" file="demo_2.mgf" ftype="mgf"/> | 344 <element name="demo_000002.mgf" file="demo_2.mgf" ftype="mgf"/> |
323 </output_collection> | 345 </output_collection> |
324 </test> | 346 </test> |
347 <!-- 8 splitting of fasta + desired number of files--> | |
325 <test> | 348 <test> |
326 <param name="input" value="test.fasta" ftype="fasta"/> | 349 <param name="input" value="test.fasta" ftype="fasta"/> |
327 <param name="select_ftype" value="fasta"/> | 350 <param name="select_ftype" value="fasta"/> |
328 <param name="mode" value="numnew"/> | 351 <param name="mode" value="numnew"/> |
329 <param name="numnew" value="2"/> | 352 <param name="numnew" value="2"/> |
331 <output_collection name="list_output_fasta" type="list"> | 354 <output_collection name="list_output_fasta" type="list"> |
332 <element name="test_000000.fasta" file="test_0.fasta" ftype="fasta"/> | 355 <element name="test_000000.fasta" file="test_0.fasta" ftype="fasta"/> |
333 <element name="test_000001.fasta" file="test_1.fasta" ftype="fasta"/> | 356 <element name="test_000001.fasta" file="test_1.fasta" ftype="fasta"/> |
334 </output_collection> | 357 </output_collection> |
335 </test> | 358 </test> |
359 <!-- 9 splitting of fasta + desired chunksize --> | |
336 <test> | 360 <test> |
337 <param name="input" value="test.fasta" ftype="fasta"/> | 361 <param name="input" value="test.fasta" ftype="fasta"/> |
338 <param name="select_ftype" value="fasta"/> | 362 <param name="select_ftype" value="fasta"/> |
339 <param name="mode" value="chunk"/> | 363 <param name="mode" value="chunk"/> |
340 <param name="chunksize" value="3"/> | 364 <param name="chunksize" value="3"/> |
342 <output_collection name="list_output_fasta" type="list"> | 366 <output_collection name="list_output_fasta" type="list"> |
343 <element name="test_000000.fasta" file="test_0.fasta" ftype="fasta"/> | 367 <element name="test_000000.fasta" file="test_0.fasta" ftype="fasta"/> |
344 <element name="test_000001.fasta" file="test_1.fasta" ftype="fasta"/> | 368 <element name="test_000001.fasta" file="test_1.fasta" ftype="fasta"/> |
345 </output_collection> | 369 </output_collection> |
346 </test> | 370 </test> |
371 <!-- 10 splitting of fastq, specify desired number of files --> | |
347 <test> | 372 <test> |
348 <param name="input" value="test.fastq" ftype="fastq"/> | 373 <param name="input" value="test.fastq" ftype="fastq"/> |
349 <param name="select_ftype" value="fastq"/> | 374 <param name="select_ftype" value="fastq"/> |
350 <param name="mode" value="numnew"/> | 375 <param name="mode" value="numnew"/> |
351 <param name="numnew" value="2"/> | 376 <param name="numnew" value="2"/> |
353 <output_collection name="list_output_fastq" type="list"> | 378 <output_collection name="list_output_fastq" type="list"> |
354 <element name="test_000000.fastq" file="test_0.fastq" ftype="fastq"/> | 379 <element name="test_000000.fastq" file="test_0.fastq" ftype="fastq"/> |
355 <element name="test_000001.fastq" file="test_1.fastq" ftype="fastq"/> | 380 <element name="test_000001.fastq" file="test_1.fastq" ftype="fastq"/> |
356 </output_collection> | 381 </output_collection> |
357 </test> | 382 </test> |
383 <!-- 11 splitting of fastq, specify desired number of files | |
384 same as previous test, but by specifying the number of lines per record | |
385 explicitely (not using the preset of the python script) --> | |
386 <test> | |
387 <param name="input" value="test.fastq" ftype="fastq"/> | |
388 <param name="select_ftype" value="generic"/> | |
389 <param name="select_split_method" value="number"/> | |
390 <param name="record_length" value="4"/> | |
391 <param name="mode" value="numnew"/> | |
392 <param name="numnew" value="2"/> | |
393 <param name="newfilenames" value="test"/> | |
394 <output_collection name="list_output_generic" type="list"> | |
395 <element name="test_000000" file="test_0.fastq" ftype="fastq"/> | |
396 <element name="test_000001" file="test_1.fastq" ftype="fastq"/> | |
397 </output_collection> | |
398 </test> | |
399 <!-- splitting of fasta w random assignment and specific filename prefix --> | |
358 <test> | 400 <test> |
359 <param name="input" value="test.fasta" ftype="fasta"/> | 401 <param name="input" value="test.fasta" ftype="fasta"/> |
360 <param name="select_ftype" value="fasta"/> | 402 <param name="select_ftype" value="fasta"/> |
361 <param name="mode" value="numnew"/> | 403 <param name="mode" value="numnew"/> |
362 <param name="numnew" value="2"/> | 404 <param name="numnew" value="2"/> |
366 <output_collection name="list_output_fasta" type="list"> | 408 <output_collection name="list_output_fasta" type="list"> |
367 <element name="rand_000000.fasta" file="rand_0.fasta" ftype="fasta"/> | 409 <element name="rand_000000.fasta" file="rand_0.fasta" ftype="fasta"/> |
368 <element name="rand_000001.fasta" file="rand_1.fasta" ftype="fasta"/> | 410 <element name="rand_000001.fasta" file="rand_1.fasta" ftype="fasta"/> |
369 </output_collection> | 411 </output_collection> |
370 </test> | 412 </test> |
413 <!-- splitting of fasta w batch assignment and specific filename prefix --> | |
371 <test> | 414 <test> |
372 <param name="input" value="test.fasta" ftype="fasta"/> | 415 <param name="input" value="test.fasta" ftype="fasta"/> |
373 <param name="select_ftype" value="fasta"/> | 416 <param name="select_ftype" value="fasta"/> |
374 <param name="mode" value="numnew"/> | 417 <param name="mode" value="numnew"/> |
375 <param name="numnew" value="2"/> | 418 <param name="numnew" value="2"/> |
378 <output_collection name="list_output_fasta" type="list"> | 421 <output_collection name="list_output_fasta" type="list"> |
379 <element name="fasta_batch_000000.fasta" file="fasta_batch_0.fasta" ftype="fasta"/> | 422 <element name="fasta_batch_000000.fasta" file="fasta_batch_0.fasta" ftype="fasta"/> |
380 <element name="fasta_batch_000001.fasta" file="fasta_batch_1.fasta" ftype="fasta"/> | 423 <element name="fasta_batch_000001.fasta" file="fasta_batch_1.fasta" ftype="fasta"/> |
381 </output_collection> | 424 </output_collection> |
382 </test> | 425 </test> |
426 <!-- splitting of txt w default (alternating assignment) --> | |
383 <test> | 427 <test> |
384 <param name="input" value="test.tabular" ftype="txt"/> | 428 <param name="input" value="test.tabular" ftype="txt"/> |
385 <param name="select_ftype" value="txt"/> | 429 <param name="select_ftype" value="txt"/> |
386 <param name="mode" value="numnew"/> | 430 <param name="mode" value="numnew"/> |
387 <param name="numnew" value="2"/> | 431 <param name="numnew" value="2"/> |
389 <output_collection name="list_output_txt" type="list"> | 433 <output_collection name="list_output_txt" type="list"> |
390 <element name="test_000000.txt" file="test_0.tabular" ftype="txt" lines_diff="1"/> | 434 <element name="test_000000.txt" file="test_0.tabular" ftype="txt" lines_diff="1"/> |
391 <element name="test_000001.txt" file="test_1.tabular" ftype="txt" lines_diff="1"/> | 435 <element name="test_000001.txt" file="test_1.tabular" ftype="txt" lines_diff="1"/> |
392 </output_collection> | 436 </output_collection> |
393 </test> | 437 </test> |
438 <!-- generic-regex splitting (of txt) w default assignement (alternating) --> | |
394 <test> | 439 <test> |
395 <param name="input" value="test.tabular" ftype="txt"/> | 440 <param name="input" value="test.tabular" ftype="txt"/> |
396 <param name="select_ftype" value="generic"/> | 441 <param name="select_ftype" value="generic"/> |
442 <param name="select_split_method" value="regex"/> | |
397 <param name="generic_regex" value="^.*"/> | 443 <param name="generic_regex" value="^.*"/> |
398 <param name="mode" value="numnew"/> | 444 <param name="mode" value="numnew"/> |
399 <param name="numnew" value="2"/> | 445 <param name="numnew" value="2"/> |
400 <param name="newfilenames" value="test"/> | 446 <param name="newfilenames" value="test"/> |
401 <output_collection name="list_output_generic" type="list"> | 447 <output_collection name="list_output_generic" type="list"> |
402 <element name="test_000000" file="test_0.tabular" ftype="txt" lines_diff="1"/> | 448 <element name="test_000000" file="test_0.tabular" ftype="txt" lines_diff="1"/> |
403 <element name="test_000001" file="test_1.tabular" ftype="txt" lines_diff="1"/> | 449 <element name="test_000001" file="test_1.tabular" ftype="txt" lines_diff="1"/> |
404 </output_collection> | 450 </output_collection> |
405 </test> | 451 </test> |
452 <!-- generic-regex splitting (of a fasta) w random assignment --> | |
406 <test> | 453 <test> |
407 <param name="input" value="test.fasta" ftype="fasta"/> | 454 <param name="input" value="test.fasta" ftype="fasta"/> |
408 <param name="select_ftype" value="generic"/> | 455 <param name="select_ftype" value="generic"/> |
456 <param name="select_split_method" value="regex"/> | |
409 <param name="generic_regex" value="^>.*"/> | 457 <param name="generic_regex" value="^>.*"/> |
410 <param name="mode" value="numnew"/> | 458 <param name="mode" value="numnew"/> |
411 <param name="numnew" value="2"/> | 459 <param name="numnew" value="2"/> |
412 <param name="newfilenames" value="rand"/> | 460 <param name="newfilenames" value="rand"/> |
413 <param name="allocate" value="random"/> | 461 <param name="allocate" value="random"/> |
415 <output_collection name="list_output_generic" type="list"> | 463 <output_collection name="list_output_generic" type="list"> |
416 <element name="rand_000000" file="rand_0.fasta" ftype="fasta"/> | 464 <element name="rand_000000" file="rand_0.fasta" ftype="fasta"/> |
417 <element name="rand_000001" file="rand_1.fasta" ftype="fasta"/> | 465 <element name="rand_000001" file="rand_1.fasta" ftype="fasta"/> |
418 </output_collection> | 466 </output_collection> |
419 </test> | 467 </test> |
468 <!-- sdf + specify desired number of files --> | |
420 <test> | 469 <test> |
421 <param name="input" value="3_molecules.sdf" ftype="sdf"/> | 470 <param name="input" value="3_molecules.sdf" ftype="sdf"/> |
422 <param name="select_ftype" value="sdf"/> | 471 <param name="select_ftype" value="sdf"/> |
423 <param name="mode" value="numnew"/> | 472 <param name="mode" value="numnew"/> |
424 <param name="numnew" value="10"/> | 473 <param name="numnew" value="10"/> |
428 <element name="mol_000000.sdf" file="mol_0.sdf" ftype="sdf"/> | 477 <element name="mol_000000.sdf" file="mol_0.sdf" ftype="sdf"/> |
429 <element name="mol_000001.sdf" file="mol_1.sdf" ftype="sdf"/> | 478 <element name="mol_000001.sdf" file="mol_1.sdf" ftype="sdf"/> |
430 <element name="mol_000002.sdf" file="mol_2.sdf" ftype="sdf"/> | 479 <element name="mol_000002.sdf" file="mol_2.sdf" ftype="sdf"/> |
431 </output_collection> | 480 </output_collection> |
432 </test> | 481 </test> |
482 <!-- sdf + specify desired number of records per file (chunksize) --> | |
433 <test> | 483 <test> |
434 <param name="input" value="3_molecules.sdf" ftype="sdf"/> | 484 <param name="input" value="3_molecules.sdf" ftype="sdf"/> |
435 <param name="select_ftype" value="sdf"/> | 485 <param name="select_ftype" value="sdf"/> |
436 <param name="mode" value="chunk"/> | 486 <param name="mode" value="chunk"/> |
437 <param name="chunksize" value="1"/> | 487 <param name="chunksize" value="1"/> |
441 <element name="mol_000000.sdf" file="mol_0.sdf" ftype="sdf"/> | 491 <element name="mol_000000.sdf" file="mol_0.sdf" ftype="sdf"/> |
442 <element name="mol_000001.sdf" file="mol_1.sdf" ftype="sdf"/> | 492 <element name="mol_000001.sdf" file="mol_1.sdf" ftype="sdf"/> |
443 <element name="mol_000002.sdf" file="mol_2.sdf" ftype="sdf"/> | 493 <element name="mol_000002.sdf" file="mol_2.sdf" ftype="sdf"/> |
444 </output_collection> | 494 </output_collection> |
445 </test> | 495 </test> |
496 <!-- test split_after (by splitting fasta files after non-header lines) --> | |
446 <test> | 497 <test> |
447 <param name="input" value="test.fasta" ftype="fasta"/> | 498 <param name="input" value="test.fasta" ftype="fasta"/> |
448 <param name="select_ftype" value="generic"/> | 499 <param name="select_ftype" value="generic"/> |
449 <param name="generic_regex" value="^>.*"/> | 500 <param name="select_split_method" value="regex"/> |
501 <param name="generic_regex" value="^[^>].*"/> | |
450 <param name="split_after" value="true"/> | 502 <param name="split_after" value="true"/> |
451 <param name="mode" value="numnew"/> | 503 <param name="mode" value="numnew"/> |
452 <param name="numnew" value="2"/> | 504 <param name="numnew" value="2"/> |
453 <param name="newfilenames" value="rand"/> | 505 <param name="newfilenames" value="rand"/> |
454 <param name="allocate" value="random"/> | 506 <param name="allocate" value="random"/> |
455 <param name="seed" value="1010"/> | 507 <param name="seed" value="1010"/> |
456 <output_collection name="list_output_generic" type="list"> | 508 <output_collection name="list_output_generic" type="list"> |
457 <element name="rand_000001" file="split_after.fasta" ftype="fasta"/> | 509 <element name="rand_000000" file="rand_0.fasta" ftype="fasta"/> |
510 <element name="rand_000001" file="rand_1.fasta" ftype="fasta"/> | |
458 </output_collection> | 511 </output_collection> |
459 </test> | 512 </test> |
460 </tests> | 513 </tests> |
461 <help><![CDATA[ | 514 <help><![CDATA[ |
462 **Split file into a dataset collection** | 515 **Split file into a dataset collection** |
463 | 516 |
464 This tool splits a data set consisting of records into multiple data sets within a collection. | 517 This tool splits a data set consisting of records into multiple data sets within a collection. |
465 A record can be for instance simply a line, a FASTA sequence (header + sequence), a FASTQ sequence | 518 A record can be for instance simply a line, a FASTA sequence (header + sequence), a FASTQ sequence |
466 (headers + sequence + qualities), etc. The important property is that the beginning of a new record | 519 (headers + sequence + qualities), etc. The important property is that the records either have a |
467 can be specified by a regular expression, e.g. ".*" for lines, ">.*" for FASTA, or "@.*" for FASTQ. | 520 specific length (e.g. 4 lines for FASTQ) or that the beginning/end of a new record |
468 The tool has presets for text, tabular data sets (which are split by line), FASTA, FASTQ, SDF and MGF. | 521 can be specified by a regular expression, e.g. ".*" for lines or ">.*" for FASTA. |
469 For other data types the text delimiting records can be specified manually using the generic splitter. | 522 The tool has presets for text, tabular data sets (which are split after each line), FASTA (new records start with ">.*"), FASTQ (records consist of 4 lines), SDF (records start with "^BEGIN IONS") and MGF (records end with "^$$$$"). |
523 For other data types the text delimiting records or the number of lines making up a record can be specified manually using the generic splitter. | |
470 If the generic splitter is used, an option is also available to split records either before or after the | 524 If the generic splitter is used, an option is also available to split records either before or after the |
471 separator. If a preset filetype is used, this is selected automatically (after for SDF, before for all | 525 separator. If a preset filetype is used, this is selected automatically (after for SDF, before for all |
472 others). | 526 others). |
473 | 527 |
474 If splitting by line (or by some other item, like a FASTA entry or an MGF record), the splitting can be either done alternatingly, in original record order, or at random. | 528 If splitting by line (or by some other item, like a FASTA entry or an MGF record), the splitting can be either done alternatingly, in original record order, or at random. |