comparison collection_column_join.xml @ 3:58228a4d58fe draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/collection_column_join commit 9f1c3ab3f41bab8ff962aca1478c75e538e5bf6a
author iuc
date Fri, 06 Apr 2018 03:44:21 -0400
parents dfde09461b1e
children 071084070619
comparison
equal deleted inserted replaced
2:dfde09461b1e 3:58228a4d58fe
1 <tool id="collection_column_join" name="Column Join" version="0.0.2"> 1 <tool id="collection_column_join" name="Column Join" version="0.0.3">
2 <description>on Collections</description> 2 <description>on Collections</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="8.25">coreutils</requirement> 4 <requirement type="package" version="8.25">coreutils</requirement>
5 </requirements> 5 </requirements>
6 <command detect_errors="exit_code"><![CDATA[ 6 <command detect_errors="exit_code"><![CDATA[
17 touch output0.tmp && 17 touch output0.tmp &&
18 #set $delimiter = '\t' 18 #set $delimiter = '\t'
19 #set $left_identifier_column = $identifier_column 19 #set $left_identifier_column = $identifier_column
20 #set $tail_offset = int( str( $has_header ) ) + 1 20 #set $tail_offset = int( str( $has_header ) ) + 1
21 #for $i, $tabular_item in enumerate( $input_tabular ): 21 #for $i, $tabular_item in enumerate( $input_tabular ):
22 #if $has_header: 22 #if $old_col_in_header:
23 head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", arr[i] ); ctr++ } }; printf( "\n" ); }' > input_header.tmp && 23 #if $has_header:
24 tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp && 24 head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", arr[i] ); ctr++ } }; printf( "\n" ); }' > input_header.tmp &&
25 tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp &&
26 #else:
27 awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", i ); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp &&
28 LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp &&
29 #end if
25 #else: 30 #else:
26 awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", i ); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp && 31 #if $has_header:
27 LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp && 32 head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}" ); ctr++ } }; printf( "\n" ); }' > input_header.tmp &&
33 tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp &&
34 #else:
35 awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}"); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp &&
36 LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp &&
37 #end if
28 #end if 38 #end if
29 #if $i == 0: 39 #if $i == 0:
30 mv input_file.tmp output${ ( $i + 1 ) % 2 }.tmp && 40 mv input_file.tmp output${ ( $i + 1 ) % 2 }.tmp &&
31 #if $has_header: 41 #if $has_header:
32 awk '{ printf \$${identifier_column}; exit }' "${tabular_item}" > header${ $i % 2 }.tmp && 42 awk '{ printf \$${identifier_column}; exit }' "${tabular_item}" > header${ $i % 2 }.tmp &&
46 <inputs> 56 <inputs>
47 <param name="input_tabular" type="data" format="tabular" multiple="True" optional="False" label="Tabular files"/> 57 <param name="input_tabular" type="data" format="tabular" multiple="True" optional="False" label="Tabular files"/>
48 <!-- <param name="identifier_column" type="data_column" data_ref="input_tabular" value="0" min="0" optional="False" label="Identifier column"/> --> 58 <!-- <param name="identifier_column" type="data_column" data_ref="input_tabular" value="0" min="0" optional="False" label="Identifier column"/> -->
49 <param name="identifier_column" type="integer" value="1" min="0" optional="False" label="Identifier column"/> 59 <param name="identifier_column" type="integer" value="1" min="0" optional="False" label="Identifier column"/>
50 <param name="has_header" type="integer" value="0" min="0" optional="False" label="Number of Header lines in each item"/> 60 <param name="has_header" type="integer" value="0" min="0" optional="False" label="Number of Header lines in each item"/>
61 <param name="old_col_in_header" type="boolean" checked="true" label="Keep original column header" help="Disable if you want columns headers to be only composed of the input dataset names"/>
51 <param name="fill_char" type="text" value="." optional="False" label="Fill character"/> 62 <param name="fill_char" type="text" value="." optional="False" label="Fill character"/>
52 <param name="include_outputs" type="select" multiple="True" label="Additional datasets to create"> 63 <param name="include_outputs" type="select" multiple="True" label="Additional datasets to create">
53 <option value="output_shell_script" selected="false">Shell script</option> 64 <option value="output_shell_script" selected="false">Shell script</option>
54 </param> 65 </param>
55 </inputs> 66 </inputs>
62 <tests> 73 <tests>
63 <test> 74 <test>
64 <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/> 75 <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/>
65 <param name="identifier_column" value="1"/> 76 <param name="identifier_column" value="1"/>
66 <param name="has_header" value="1"/> 77 <param name="has_header" value="1"/>
78 <param name="old_col_in_header" value="true"/>
67 <param name="fill_char" value="."/> 79 <param name="fill_char" value="."/>
68 <param name="include_outputs" /> 80 <param name="include_outputs" />
69 <output name="tabular_output" file="out_1.tabular" ftype="tabular"/> 81 <output name="tabular_output" file="out_1.tabular" ftype="tabular"/>
70 </test> 82 </test>
71 <test> 83 <test>
72 <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/> 84 <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/>
73 <param name="identifier_column" value="1"/> 85 <param name="identifier_column" value="1"/>
74 <param name="has_header" value="0"/> 86 <param name="has_header" value="0"/>
87 <param name="old_col_in_header" value="true"/>
75 <param name="fill_char" value="."/> 88 <param name="fill_char" value="."/>
76 <param name="include_outputs" /> 89 <param name="include_outputs" />
77 <output name="tabular_output" file="out_2.tabular" ftype="tabular"/> 90 <output name="tabular_output" file="out_2.tabular" ftype="tabular"/>
91 </test>
92 <test>
93 <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/>
94 <param name="identifier_column" value="1"/>
95 <param name="has_header" value="1"/>
96 <param name="old_col_in_header" value="false"/>
97 <param name="fill_char" value="."/>
98 <param name="include_outputs" />
99 <output name="tabular_output" file="out_3.tabular" ftype="tabular"/>
100 </test>
101 <test>
102 <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/>
103 <param name="identifier_column" value="1"/>
104 <param name="has_header" value="0"/>
105 <param name="old_col_in_header" value="false"/>
106 <param name="fill_char" value="."/>
107 <param name="include_outputs" />
108 <output name="tabular_output" file="out_4.tabular" ftype="tabular"/>
78 </test> 109 </test>
79 </tests> 110 </tests>
80 <help> 111 <help>
81 <![CDATA[ 112 <![CDATA[
82 Joins lists of tabular datasets together on a field. 113 Joins lists of tabular datasets together on a field.
115 #KEY in_1.tabular_c2 in_1.tabular_c3 in_1.tabular_c4 in_2.tabular_c2 in_2.tabular_c3 in_2.tabular_c4 in_3.tabular_c2 in_3.tabular_c3 in_3.tabular_c4 146 #KEY in_1.tabular_c2 in_1.tabular_c3 in_1.tabular_c4 in_2.tabular_c2 in_2.tabular_c3 in_2.tabular_c4 in_3.tabular_c2 in_3.tabular_c3 in_3.tabular_c4
116 one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3 147 one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3
117 three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9 148 three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9
118 two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6 149 two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6
119 150
151
152 **Joining** the files, using **identifier column of 1** and a **header lines of 1**, but disabling **Keep original column header**, will return::
153
154 #KEY in_1.tabular in_1.tabular in_1.tabular in_2.tabular in_2.tabular in_2.tabular in_3.tabular in_3.tabular in_3.tabular
155 one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3
156 three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9
157 two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6
158
120 ]]> 159 ]]>
121 </help> 160 </help>
122 <citations> 161 <citations>
123 </citations> 162 </citations>
124 </tool> 163 </tool>