comparison split_file_on_column.xml @ 4:37a53100b67e draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_on_column commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
author bgruening
date Thu, 25 Feb 2021 15:54:13 +0000
parents b60f2452580e
children d4b5b70e82cb
comparison
equal deleted inserted replaced
3:b60f2452580e 4:37a53100b67e
1 <tool id="tp_split_on_column" name="Split file" version="0.2"> 1 <tool id="tp_split_on_column" name="Split file" version="0.4">
2 <description>according to the values of a column</description> 2 <description>according to the values of a column</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="4.1.0">gnu_awk</requirement> 4 <requirement type="package" version="5.0.1">gawk</requirement>
5 </requirements> 5 </requirements>
6 <command> 6 <command>
7 <![CDATA[ 7 <![CDATA[
8 mkdir tmp_out && 8 mkdir tmp_out &&
9 awk -F'\t' '{print > "tmp_out/"\$$column".$infile.ext" }' $infile 9 #if $include_header
10 awk -F '\t' 'NR==1{hdr=$0;next}f!="tmp_out/"\$$column".$infile.ext"{if(f) close(f); f="tmp_out/"\$$column".$infile.ext";print hdr>f} {print >> f}' $infile
11 #else
12 awk -F'\t' '{print > "tmp_out/"\$$column".$infile.ext" }' '$infile'
13 #end if
10 ]]> 14 ]]>
11 </command> 15 </command>
12 <inputs> 16 <inputs>
13 <param format="tabular" name="infile" type="data" label="File to select" /> 17 <param format="tabular" name="infile" type="data" label="File to select" />
14 <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" /> 18 <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" />
19
20 <param name="include_header" type="boolean" label="Include the header in all splitted files?"
21 help="Include the first line (the assumed header line) in all splitted files." />
15 </inputs> 22 </inputs>
16 <outputs> 23 <outputs>
17 <collection name="split_output" type="list" label="Table split on first column"> 24 <collection name="split_output" type="list" label="Table split on first column">
18 <discover_datasets pattern="__name_and_ext__" directory="tmp_out" /> 25 <discover_datasets pattern="__name_and_ext__" directory="tmp_out" />
19 </collection> 26 </collection>
20 </outputs> 27 </outputs>
21 <tests> 28 <tests>
22 <test> 29 <test>
23 <param name="infile" value="5cols.tabular" ftype="tabular"/> 30 <param name="infile" value="5cols.tabular" ftype="tabular"/>
24 <param name="column" value="5" /> 31 <param name="column" value="5" />
32 <param name="include_header" value="false"/>
33 <output_collection name="split_output" type="list">
34 <element name="1">
35 <assert_contents>
36 <has_text_matching expression="chr7\t56632\t56652\tcluster\t1" />
37 </assert_contents>
38 </element>
39 <element name="2">
40 <assert_contents>
41 <has_text_matching expression="chr7\t56761\t56781\tcluster\t2" />
42 </assert_contents>
43 </element>
44 </output_collection>
45 </test>
46 <test>
47 <param name="infile" value="5cols-with-header.tabular" ftype="tabular" />
48 <param name="column" value="5" />
49 <param name="include_header" value="true"/>
50 <output_collection name="split_output" type="list">
51 <element name="1">
52 <assert_contents>
53 <has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" />
54 <has_n_lines n="3" />
55 <has_text_matching expression="chr7\t56632\t56652\tcluster\t1" />
56 </assert_contents>
57 </element>
58 <element name="2">
59 <assert_contents>
60 <has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" />
61 <has_n_lines n="4" />
62 <has_text_matching expression="chr7\t56761\t56781\tcluster\t2" />
63 </assert_contents>
64 </element>
65 </output_collection>
66 </test>
67 <test>
68 <param name="infile" value="5cols-with-header.tabular" ftype="tabular" />
69 <param name="column" value="5" />
70 <param name="include_header" value="false"/>
25 <output_collection name="split_output" type="list"> 71 <output_collection name="split_output" type="list">
26 <element name="1"> 72 <element name="1">
27 <assert_contents> 73 <assert_contents>
28 <has_text_matching expression="chr7\t56632\t56652\tcluster\t1" /> 74 <has_text_matching expression="chr7\t56632\t56652\tcluster\t1" />
29 </assert_contents> 75 </assert_contents>
41 87
42 **What it does** 88 **What it does**
43 89
44 This tool splits a file into different smaller files using a specific column. 90 This tool splits a file into different smaller files using a specific column.
45 It will work like the group tool, but every group is saved to its own file. 91 It will work like the group tool, but every group is saved to its own file.
92 You have the option to include the header (first line) in all splitted files.
93 If you have a header and don't want keep it, please remove it before you use this tool.
94 For example with the "Remove beginning of a file" tool.
46 95
47 ----- 96 -----
48 97
49 **Example** 98 **Example**
50 99
51 Splitting on column 5 from this:: 100 Splitting a file without header on column 5 from this::
52 101
53 chr7 56632 56652 cluster 1 102 chr7 56632 56652 cluster 1
54 chr7 56736 56756 cluster 1 103 chr7 56736 56756 cluster 1
55 chr7 56761 56781 cluster 2 104 chr7 56761 56781 cluster 2
56 chr7 56772 56792 cluster 2 105 chr7 56772 56792 cluster 2
64 113
65 chr7 56761 56781 cluster 2 114 chr7 56761 56781 cluster 2
66 chr7 56772 56792 cluster 2 115 chr7 56772 56792 cluster 2
67 chr7 56775 56795 cluster 2 116 chr7 56775 56795 cluster 2
68 117
69
70 ]]> 118 ]]>
71 </help> 119 </help>
120 <citations>
121 <citation type="bibtex">
122 @misc{githubsplit_file_on_column,
123 author = {Gruening, Bjoern},
124 year = {2015},
125 title = {split_file_on_column},
126 publisher = {GitHub},
127 journal = {GitHub repository},
128 url = {https://github.com/bgruening/galaxytools},
129 }
130 </citation>
131 </citations>
72 </tool> 132 </tool>
133