comparison split_file_on_column.xml @ 6:ff2a81aa3f08 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/split_file_on_column commit 6a2deb2f38472a2845123bd54e73b6bd115b3a0b
author bgruening
date Tue, 19 Jul 2022 13:25:20 +0000
parents d4b5b70e82cb
children
comparison
equal deleted inserted replaced
5:d4b5b70e82cb 6:ff2a81aa3f08
1 <tool id="tp_split_on_column" name="Split by group" version="0.5"> 1 <tool id="tp_split_on_column" name="Split by group" version="0.6">
2 <requirements> 2 <requirements>
3 <requirement type="package" version="5.0.1">gawk</requirement> 3 <requirement type="package" version="5.1.0">gawk</requirement>
4 </requirements> 4 </requirements>
5 <command> 5 <command>
6 <![CDATA[ 6 <![CDATA[
7 mkdir tmp_out && 7 mkdir tmp_out &&
8 #if $include_header 8 #if $include_header
9 awk -F '\t' 'NR==1{hdr=$0;next}f!="tmp_out/"\$$column".$infile.ext"{if(f) close(f); f="tmp_out/"\$$column".$infile.ext";print hdr>f} {print >> f}' $infile 9 awk -F '\t' 'NR==1{hdr=$0;next}f!="tmp_out/"\$$column".$infile.ext"{if(f) close(f); f="tmp_out/"\$$column".$infile.ext"}; {if (!seen[f]++) print hdr>f; print >> f}' $infile
10 #else 10 #else
11 awk -F'\t' '{print > "tmp_out/"\$$column".$infile.ext" }' '$infile' 11 awk -F'\t' '{print >> "tmp_out/"\$$column".$infile.ext" }' '$infile'
12 #end if 12 #end if
13 ]]> 13 ]]>
14 </command> 14 </command>
15 <inputs> 15 <inputs>
16 <param format="tabular" name="infile" type="data" label="File to split" /> 16 <param format="tabular" name="infile" type="data" label="File to split" />
78 <has_text_matching expression="chr7\t56761\t56781\tcluster\t2" /> 78 <has_text_matching expression="chr7\t56761\t56781\tcluster\t2" />
79 </assert_contents> 79 </assert_contents>
80 </element> 80 </element>
81 </output_collection> 81 </output_collection>
82 </test> 82 </test>
83 <test><!-- test with unsorted column, no header -->
84 <param name="infile" value="5cols-unsorted.tabular" ftype="tabular" />
85 <param name="column" value="5" />
86 <param name="include_header" value="false"/>
87 <output_collection name="split_output" type="list">
88 <element name="1">
89 <assert_contents>
90 <has_n_lines n="3" />
91 </assert_contents>
92 </element>
93 <element name="2">
94 <assert_contents>
95 <has_n_lines n="2" />
96 </assert_contents>
97 </element>
98 </output_collection>
99 </test>
100 <test><!-- test with unsorted column, with header -->
101 <param name="infile" value="5cols-unsorted-with-header.tabular" ftype="tabular" />
102 <param name="column" value="5" />
103 <param name="include_header" value="true"/>
104 <output_collection name="split_output" type="list">
105 <element name="1">
106 <assert_contents>
107 <has_n_lines n="4" />
108 <has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" />
109
110 </assert_contents>
111 </element>
112 <element name="2">
113 <assert_contents>
114 <has_n_lines n="3" />
115 <has_line_matching expression="Column1\tColumn2\tColumn3\tColumn4\tColumn5" />
116 </assert_contents>
117 </element>
118 </output_collection>
119 </test>
83 </tests> 120 </tests>
84 <help> 121 <help>
85 <![CDATA[ 122 <![CDATA[
86 123
87 ======== 124 ========
110 chr1 30 40 147 chr1 30 40
111 chr2 40 70 148 chr2 40 70
112 chr4 60 80 149 chr4 60 80
113 150
114 151
115 will produce a collectiion with 4 elements:: 152 will produce a collection with 3 elements::
116 153
117 chr1 10 20 154 chr1 10 20
118 chr1 30 40 155 chr1 30 40
119 156
120 chr2 40 70 157 chr2 40 70