Mercurial > repos > mvdbeek > collection_column_join
comparison collection_column_join.xml @ 0:4a90bbd2110c draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/collection_column_join commit ac5a5dcefafe63a842e0b04b733cc5ee1177acba-dirty"
author | mvdbeek |
---|---|
date | Mon, 07 Sep 2020 12:50:11 +0000 |
parents | |
children | 06cdbee48b68 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4a90bbd2110c |
---|---|
1 <tool id="collection_column_join" name="Column Join" version="0.0.3+galaxy1"> | |
2 <description>on Collections</description> | |
3 <requirements> | |
4 <requirement type="package" version="8.25">coreutils</requirement> | |
5 </requirements> | |
6 <command detect_errors="exit_code"><![CDATA[ | |
7 #if 'output_shell_script' in str( $include_outputs ).split( "," ): | |
8 cp '${collection_column_join_script}' '${script_output}' && | |
9 #end if | |
10 sh '${collection_column_join_script}' | |
11 ]]> | |
12 </command> | |
13 <configfiles> | |
14 <configfile name="collection_column_join_script"><![CDATA[ | |
15 #!/bin/sh | |
16 touch header0.tmp && | |
17 touch output0.tmp && | |
18 #set $delimiter = '\t' | |
19 #set $left_identifier_column = $identifier_column | |
20 #set $tail_offset = int( str( $has_header ) ) + 1 | |
21 #for $i, $tabular_item in enumerate( $input_tabular ): | |
22 #if $old_col_in_header: | |
23 #if $has_header: | |
24 head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", arr[i] ); ctr++ } }; printf( "\n" ); }' > input_header.tmp && | |
25 tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp && | |
26 #else: | |
27 awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", i ); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp && | |
28 LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp && | |
29 #end if | |
30 #else: | |
31 #if $has_header: | |
32 head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}" ); ctr++ } }; printf( "\n" ); }' > input_header.tmp && | |
33 tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp && | |
34 #else: | |
35 awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}"); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp && | |
36 LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp && | |
37 #end if | |
38 #end if | |
39 #if $i == 0: | |
40 mv input_file.tmp output${ ( $i + 1 ) % 2 }.tmp && | |
41 #if $has_header: | |
42 awk '{ printf \$${identifier_column}; exit }' "${tabular_item}" > header${ $i % 2 }.tmp && | |
43 #else: | |
44 echo "#KEY" > header${ $i % 2 }.tmp && | |
45 #end if | |
46 #else: | |
47 LC_ALL=C join -o auto -a 1 -a 2 -1 ${left_identifier_column} -2 ${identifier_column} -t "${delimiter}" -e "${fill_char}" output${ $i % 2 }.tmp input_file.tmp > output${ ( $i + 1 ) % 2 }.tmp && | |
48 #set $left_identifier_column = 1 | |
49 #end if | |
50 paste -d "${delimiter}" header${ $i % 2 }.tmp input_header.tmp > header${ ( $i + 1 ) % 2 }.tmp && | |
51 #end for | |
52 cat header${ ( $i + 1 ) % 2 }.tmp output${ ( $i + 1 ) % 2 }.tmp > "${tabular_output}" | |
53 ]]> | |
54 </configfile> | |
55 </configfiles> | |
56 <inputs> | |
57 <param name="input_tabular" type="data" format="tabular" multiple="True" optional="False" label="Tabular files"/> | |
58 <!-- <param name="identifier_column" type="data_column" data_ref="input_tabular" value="0" min="0" optional="False" label="Identifier column"/> --> | |
59 <param name="identifier_column" type="integer" value="1" min="0" optional="False" label="Identifier column" help="The column that will be used to join the input datasets"/> | |
60 <param name="has_header" type="integer" value="0" min="0" optional="False" label="Number of header lines in each input file" help="If this is set to 0, a header line will be added containing column names as follows: the identifier column will be named #KEY and the other columns are named by the input dataset names/columns. If you have one or more header lines in your input, set this to the number of header lines."/> | |
61 <param name="old_col_in_header" type="boolean" checked="true" label="Add column name to header" help="Disable if you want column headers to only be composed of the input file names, for example, if you want headers like file1 and not file1_column1, see Help section below. Default: Yes"/> | |
62 <param name="fill_char" type="text" value="." optional="False" label="Fill character"/> | |
63 <param name="include_outputs" type="select" multiple="True" label="Additional datasets to create"> | |
64 <option value="output_shell_script" selected="false">Shell script</option> | |
65 </param> | |
66 </inputs> | |
67 <outputs> | |
68 <data format="tabular" name="tabular_output"/> | |
69 <data format="txt" name="script_output"> | |
70 <filter>include_outputs and "output_shell_script" in include_outputs</filter> | |
71 </data> | |
72 </outputs> | |
73 <tests> | |
74 <test> | |
75 <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/> | |
76 <param name="identifier_column" value="1"/> | |
77 <param name="has_header" value="1"/> | |
78 <param name="old_col_in_header" value="true"/> | |
79 <param name="fill_char" value="."/> | |
80 <param name="include_outputs" /> | |
81 <output name="tabular_output" file="out_1.tabular" ftype="tabular"/> | |
82 </test> | |
83 <test> | |
84 <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/> | |
85 <param name="identifier_column" value="1"/> | |
86 <param name="has_header" value="0"/> | |
87 <param name="old_col_in_header" value="true"/> | |
88 <param name="fill_char" value="."/> | |
89 <param name="include_outputs" /> | |
90 <output name="tabular_output" file="out_2.tabular" ftype="tabular"/> | |
91 </test> | |
92 <test> | |
93 <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/> | |
94 <param name="identifier_column" value="1"/> | |
95 <param name="has_header" value="1"/> | |
96 <param name="old_col_in_header" value="false"/> | |
97 <param name="fill_char" value="."/> | |
98 <param name="include_outputs" /> | |
99 <output name="tabular_output" file="out_3.tabular" ftype="tabular"/> | |
100 </test> | |
101 <test> | |
102 <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/> | |
103 <param name="identifier_column" value="1"/> | |
104 <param name="has_header" value="0"/> | |
105 <param name="old_col_in_header" value="false"/> | |
106 <param name="fill_char" value="."/> | |
107 <param name="include_outputs" /> | |
108 <output name="tabular_output" file="out_4.tabular" ftype="tabular"/> | |
109 </test> | |
110 </tests> | |
111 <help> | |
112 <![CDATA[ | |
113 Joins lists of tabular datasets together on a field. | |
114 | |
115 ----- | |
116 | |
117 **Example** | |
118 | |
119 To join three files, with headers, based on the first column: | |
120 | |
121 **First file (in_1)**:: | |
122 | |
123 #KEY c2 c3 c4 | |
124 one 1-1 1-2 1-3 | |
125 two 1-4 1-5 1-6 | |
126 three 1-7 1-8 1-9 | |
127 | |
128 | |
129 **Second File (in_2)**:: | |
130 | |
131 #KEY c2 c3 c4 | |
132 one 2-1 2-2 2-3 | |
133 two 2-4 2-5 2-6 | |
134 three 2-7 2-8 2-9 | |
135 | |
136 **Third file (in_3)**:: | |
137 | |
138 #KEY c2 c3 c4 | |
139 one 3-3 3-2 3-3 | |
140 two 3-4 3-5 3-6 | |
141 three 3-7 3-8 3-9 | |
142 | |
143 | |
144 **Joining** the files, using **identifier column of 1** and a **header lines of 1**, will return:: | |
145 | |
146 #KEY in_1_c2 in_1_c3 in_1_c4 in_2_c2 in_2_c3 in_2_c4 in_3_c2 in_3_c3 in_3_c4 | |
147 one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3 | |
148 three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9 | |
149 two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6 | |
150 | |
151 | |
152 **Joining** the files, using **identifier column of 1** and a **header lines of 1**, but disabling **Add column name to header**, will return:: | |
153 | |
154 #KEY in_1 in_1 in_1 in_2 in_2 in_2 in_3 in_3 in_3 | |
155 one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3 | |
156 three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9 | |
157 two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6 | |
158 | |
159 ]]> | |
160 </help> | |
161 <citations> | |
162 </citations> | |
163 </tool> |