Mercurial > repos > bgruening > split_file_on_column
comparison split_file_on_column.xml @ 6:ff2a81aa3f08 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_on_column commit 6a2deb2f38472a2845123bd54e73b6bd115b3a0b
author | bgruening |
---|---|
date | Tue, 19 Jul 2022 13:25:20 +0000 |
parents | d4b5b70e82cb |
children |
comparison
equal
deleted
inserted
replaced
5:d4b5b70e82cb | 6:ff2a81aa3f08 |
---|---|
1 <tool id="tp_split_on_column" name="Split by group" version="0.5"> | 1 <tool id="tp_split_on_column" name="Split by group" version="0.6"> |
2 <requirements> | 2 <requirements> |
3 <requirement type="package" version="5.0.1">gawk</requirement> | 3 <requirement type="package" version="5.1.0">gawk</requirement> |
4 </requirements> | 4 </requirements> |
5 <command> | 5 <command> |
6 <![CDATA[ | 6 <![CDATA[ |
7 mkdir tmp_out && | 7 mkdir tmp_out && |
8 #if $include_header | 8 #if $include_header |
9 awk -F '\t' 'NR==1{hdr=$0;next}f!="tmp_out/"\$$column".$infile.ext"{if(f) close(f); f="tmp_out/"\$$column".$infile.ext";print hdr>f} {print >> f}' $infile | 9 awk -F '\t' 'NR==1{hdr=$0;next}f!="tmp_out/"\$$column".$infile.ext"{if(f) close(f); f="tmp_out/"\$$column".$infile.ext"}; {if (!seen[f]++) print hdr>f; print >> f}' $infile |
10 #else | 10 #else |
11 awk -F'\t' '{print > "tmp_out/"\$$column".$infile.ext" }' '$infile' | 11 awk -F'\t' '{print >> "tmp_out/"\$$column".$infile.ext" }' '$infile' |
12 #end if | 12 #end if |
13 ]]> | 13 ]]> |
14 </command> | 14 </command> |
15 <inputs> | 15 <inputs> |
16 <param format="tabular" name="infile" type="data" label="File to split" /> | 16 <param format="tabular" name="infile" type="data" label="File to split" /> |
78 <has_text_matching expression="chr7\t56761\t56781\tcluster\t2" /> | 78 <has_text_matching expression="chr7\t56761\t56781\tcluster\t2" /> |
79 </assert_contents> | 79 </assert_contents> |
80 </element> | 80 </element> |
81 </output_collection> | 81 </output_collection> |
82 </test> | 82 </test> |
83 <test><!-- test with unsorted column, no header --> | |
84 <param name="infile" value="5cols-unsorted.tabular" ftype="tabular" /> | |
85 <param name="column" value="5" /> | |
86 <param name="include_header" value="false"/> | |
87 <output_collection name="split_output" type="list"> | |
88 <element name="1"> | |
89 <assert_contents> | |
90 <has_n_lines n="3" /> | |
91 </assert_contents> | |
92 </element> | |
93 <element name="2"> | |
94 <assert_contents> | |
95 <has_n_lines n="2" /> | |
96 </assert_contents> | |
97 </element> | |
98 </output_collection> | |
99 </test> | |
100 <test><!-- test with unsorted column, with header --> | |
101 <param name="infile" value="5cols-unsorted-with-header.tabular" ftype="tabular" /> | |
102 <param name="column" value="5" /> | |
103 <param name="include_header" value="true"/> | |
104 <output_collection name="split_output" type="list"> | |
105 <element name="1"> | |
106 <assert_contents> | |
107 <has_n_lines n="4" /> | |
108 <has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" /> | |
109 | |
110 </assert_contents> | |
111 </element> | |
112 <element name="2"> | |
113 <assert_contents> | |
114 <has_n_lines n="3" /> | |
115 <has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" /> | |
116 </assert_contents> | |
117 </element> | |
118 </output_collection> | |
119 </test> | |
83 </tests> | 120 </tests> |
84 <help> | 121 <help> |
85 <![CDATA[ | 122 <![CDATA[ |
86 | 123 |
87 ======== | 124 ======== |
110 chr1 30 40 | 147 chr1 30 40 |
111 chr2 40 70 | 148 chr2 40 70 |
112 chr4 60 80 | 149 chr4 60 80 |
113 | 150 |
114 | 151 |
115 will produce a collectiion with 4 elements:: | 152 will produce a collection with 3 elements:: |
116 | 153 |
117 chr1 10 20 | 154 chr1 10 20 |
118 chr1 30 40 | 155 chr1 30 40 |
119 | 156 |
120 chr2 40 70 | 157 chr2 40 70 |