Mercurial > repos > bgruening > split_file_on_column
comparison split_file_on_column.xml @ 4:37a53100b67e draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_on_column commit 4d0bfcf37bfbedafc7ff0672dfe452766ca8a606"
author | bgruening |
---|---|
date | Thu, 25 Feb 2021 15:54:13 +0000 |
parents | b60f2452580e |
children | d4b5b70e82cb |
comparison
equal
deleted
inserted
replaced
3:b60f2452580e | 4:37a53100b67e |
---|---|
1 <tool id="tp_split_on_column" name="Split file" version="0.2"> | 1 <tool id="tp_split_on_column" name="Split file" version="0.4"> |
2 <description>according to the values of a column</description> | 2 <description>according to the values of a column</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="4.1.0">gnu_awk</requirement> | 4 <requirement type="package" version="5.0.1">gawk</requirement> |
5 </requirements> | 5 </requirements> |
6 <command> | 6 <command> |
7 <![CDATA[ | 7 <![CDATA[ |
8 mkdir tmp_out && | 8 mkdir tmp_out && |
9 awk -F'\t' '{print > "tmp_out/"\$$column".$infile.ext" }' $infile | 9 #if $include_header |
10 awk -F '\t' 'NR==1{hdr=$0;next}f!="tmp_out/"\$$column".$infile.ext"{if(f) close(f); f="tmp_out/"\$$column".$infile.ext";print hdr>f} {print >> f}' $infile | |
11 #else | |
12 awk -F'\t' '{print > "tmp_out/"\$$column".$infile.ext" }' '$infile' | |
13 #end if | |
10 ]]> | 14 ]]> |
11 </command> | 15 </command> |
12 <inputs> | 16 <inputs> |
13 <param format="tabular" name="infile" type="data" label="File to select" /> | 17 <param format="tabular" name="infile" type="data" label="File to select" /> |
14 <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" /> | 18 <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" /> |
19 | |
20 <param name="include_header" type="boolean" label="Include the header in all splitted files?" | |
21 help="Include the first line (the assumed header line) in all splitted files." /> | |
15 </inputs> | 22 </inputs> |
16 <outputs> | 23 <outputs> |
17 <collection name="split_output" type="list" label="Table split on first column"> | 24 <collection name="split_output" type="list" label="Table split on first column"> |
18 <discover_datasets pattern="__name_and_ext__" directory="tmp_out" /> | 25 <discover_datasets pattern="__name_and_ext__" directory="tmp_out" /> |
19 </collection> | 26 </collection> |
20 </outputs> | 27 </outputs> |
21 <tests> | 28 <tests> |
22 <test> | 29 <test> |
23 <param name="infile" value="5cols.tabular" ftype="tabular"/> | 30 <param name="infile" value="5cols.tabular" ftype="tabular"/> |
24 <param name="column" value="5" /> | 31 <param name="column" value="5" /> |
32 <param name="include_header" value="false"/> | |
33 <output_collection name="split_output" type="list"> | |
34 <element name="1"> | |
35 <assert_contents> | |
36 <has_text_matching expression="chr7\t56632\t56652\tcluster\t1" /> | |
37 </assert_contents> | |
38 </element> | |
39 <element name="2"> | |
40 <assert_contents> | |
41 <has_text_matching expression="chr7\t56761\t56781\tcluster\t2" /> | |
42 </assert_contents> | |
43 </element> | |
44 </output_collection> | |
45 </test> | |
46 <test> | |
47 <param name="infile" value="5cols-with-header.tabular" ftype="tabular" /> | |
48 <param name="column" value="5" /> | |
49 <param name="include_header" value="true"/> | |
50 <output_collection name="split_output" type="list"> | |
51 <element name="1"> | |
52 <assert_contents> | |
53 <has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" /> | |
54 <has_n_lines n="3" /> | |
55 <has_text_matching expression="chr7\t56632\t56652\tcluster\t1" /> | |
56 </assert_contents> | |
57 </element> | |
58 <element name="2"> | |
59 <assert_contents> | |
60 <has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" /> | |
61 <has_n_lines n="4" /> | |
62 <has_text_matching expression="chr7\t56761\t56781\tcluster\t2" /> | |
63 </assert_contents> | |
64 </element> | |
65 </output_collection> | |
66 </test> | |
67 <test> | |
68 <param name="infile" value="5cols-with-header.tabular" ftype="tabular" /> | |
69 <param name="column" value="5" /> | |
70 <param name="include_header" value="false"/> | |
25 <output_collection name="split_output" type="list"> | 71 <output_collection name="split_output" type="list"> |
26 <element name="1"> | 72 <element name="1"> |
27 <assert_contents> | 73 <assert_contents> |
28 <has_text_matching expression="chr7\t56632\t56652\tcluster\t1" /> | 74 <has_text_matching expression="chr7\t56632\t56652\tcluster\t1" /> |
29 </assert_contents> | 75 </assert_contents> |
41 | 87 |
42 **What it does** | 88 **What it does** |
43 | 89 |
44 This tool splits a file into different smaller files using a specific column. | 90 This tool splits a file into different smaller files using a specific column. |
45 It will work like the group tool, but every group is saved to its own file. | 91 It will work like the group tool, but every group is saved to its own file. |
92 You have the option to include the header (first line) in all splitted files. | |
93 If you have a header and don't want keep it, please remove it before you use this tool. | |
94 For example with the "Remove beginning of a file" tool. | |
46 | 95 |
47 ----- | 96 ----- |
48 | 97 |
49 **Example** | 98 **Example** |
50 | 99 |
51 Splitting on column 5 from this:: | 100 Splitting a file without header on column 5 from this:: |
52 | 101 |
53 chr7 56632 56652 cluster 1 | 102 chr7 56632 56652 cluster 1 |
54 chr7 56736 56756 cluster 1 | 103 chr7 56736 56756 cluster 1 |
55 chr7 56761 56781 cluster 2 | 104 chr7 56761 56781 cluster 2 |
56 chr7 56772 56792 cluster 2 | 105 chr7 56772 56792 cluster 2 |
64 | 113 |
65 chr7 56761 56781 cluster 2 | 114 chr7 56761 56781 cluster 2 |
66 chr7 56772 56792 cluster 2 | 115 chr7 56772 56792 cluster 2 |
67 chr7 56775 56795 cluster 2 | 116 chr7 56775 56795 cluster 2 |
68 | 117 |
69 | |
70 ]]> | 118 ]]> |
71 </help> | 119 </help> |
120 <citations> | |
121 <citation type="bibtex"> | |
122 @misc{githubsplit_file_on_column, | |
123 author = {Gruening, Bjoern}, | |
124 year = {2015}, | |
125 title = {split_file_on_column}, | |
126 publisher = {GitHub}, | |
127 journal = {GitHub repository}, | |
128 url = {https://github.com/bgruening/galaxytools}, | |
129 } | |
130 </citation> | |
131 </citations> | |
72 </tool> | 132 </tool> |
133 |