Mercurial > repos > iuc > collection_column_join
changeset 0:2f120a5c49b1 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/collection_column_join commit 3b918f5a99ea13ec5acc7cc5fdd310fadb773ac0
author | iuc |
---|---|
date | Fri, 27 May 2016 11:41:19 -0400 |
parents | |
children | 9c8536c7ed42 |
files | collection_column_join.xml test-data/in_1.tabular test-data/in_1_headerless.tabular test-data/in_2.tabular test-data/in_2_headerless.tabular test-data/in_3.tabular test-data/in_3_headerless.tabular test-data/out_1.tabular test-data/out_2.tabular tool_dependencies.xml |
diffstat | 10 files changed, 165 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/collection_column_join.xml Fri May 27 11:41:19 2016 -0400 @@ -0,0 +1,130 @@ +<tool id="collection_column_join" name="Column Join" version="0.0.1"> + <description> + on Collections + </description> + <requirements> + <requirement type="package" version="8.22">gnu_coreutils</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + </stdio> + <command><![CDATA[ + #if "output_shell_script" in str( $include_outputs ).split( "," ): + cp "${collection_column_join_script}" "${script_output}" && + #end if + sh "${collection_column_join_script}" + ]]> + </command> + <configfiles> + <configfile name="collection_column_join_script"><![CDATA[ +#!/bin/sh +touch header0.tmp && +touch output0.tmp && +#set $delimiter = '\t' +#set $left_identifier_column = $identifier_column +#set $tail_offset = int( str( $has_header ) ) + 1 +#for $i, $tabular_item in enumerate( $input_tabular ): + #if $has_header: + head -n ${has_header} "${tabular_item}" | awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", arr[i] ); ctr++ } }; printf( "\n" ); }' > input_header.tmp && + tail -n +${tail_offset} "${tabular_item}" | LC_ALL=C sort -t "${delimiter}" -k $identifier_column > input_file.tmp && + #else: + awk '{ n = split(\$0,arr,"${delimiter}"); ctr=1; for(i=1;i<=n;i++){ if( i != $identifier_column ){ if( ctr > 1) {printf("${delimiter}")}; printf( "${tabular_item.element_identifier}_%s", i ); ctr++ } }; exit }' "${tabular_item}" > input_header.tmp && + LC_ALL=C sort -t "${delimiter}" -k $identifier_column "${tabular_item}" > input_file.tmp && + #end if + #if $i == 0: + mv input_file.tmp output${ ( $i + 1 ) % 2 }.tmp && + #if $has_header: + awk '{ printf \$${identifier_column}; exit }' "${tabular_item}" > header${ $i % 2 }.tmp && + #else: + echo "#KEY" > header${ $i % 2 }.tmp && + #end if + #else: + LC_ALL=C join -o auto -a 1 -a 2 -1 ${left_identifier_column} -2 ${identifier_column} -t "${delimiter}" -e "${fill_char}" output${ $i % 2 }.tmp input_file.tmp > output${ ( $i + 1 ) % 2 }.tmp && + #set $left_identifier_column = 1 + #end if + paste -d "${delimiter}" header${ $i % 2 }.tmp input_header.tmp > header${ ( $i + 1 ) % 2 }.tmp && +#end for +cat header${ ( $i + 1 ) % 2 }.tmp output${ ( $i + 1 ) % 2 }.tmp > "${tabular_output}" + ]]> + </configfile> + </configfiles> + <inputs> + <param name="input_tabular" type="data" format="tabular" multiple="True" optional="False" label="Tabular files"/> + <!-- <param name="identifier_column" type="data_column" data_ref="input_tabular" value="0" min="0" optional="False" label="Identifier column"/> --> + <param name="identifier_column" type="integer" value="1" min="0" optional="False" label="Identifier column"/> + <param name="has_header" type="integer" value="0" min="0" optional="False" label="Number of Header lines in each item"/> + <param name="fill_char" type="text" value="." optional="False" label="Fill character"/> + <param name="include_outputs" type="select" multiple="True" label="Additional datasets to create"> + <option value="output_shell_script" selected="false">Shell script</option> + </param> + </inputs> + <outputs> + <data format="tabular" name="tabular_output"/> + <data format="txt" name="script_output"> + <filter>include_outputs and "output_shell_script" in include_outputs</filter> + </data> + </outputs> + <tests> + <test> + <param name="input_tabular" value="in_1.tabular,in_2.tabular,in_3.tabular" ftype="tabular"/> + <param name="identifier_column" value="1"/> + <param name="has_header" value="1"/> + <param name="fill_char" value="."/> + <param name="include_outputs" /> + <output name="tabular_output" file="out_1.tabular" ftype="tabular"/> + </test> + <test> + <param name="input_tabular" value="in_1_headerless.tabular,in_2_headerless.tabular,in_3_headerless.tabular" ftype="tabular"/> + <param name="identifier_column" value="1"/> + <param name="has_header" value="0"/> + <param name="fill_char" value="."/> + <param name="include_outputs" /> + <output name="tabular_output" file="out_2.tabular" ftype="tabular"/> + </test> + </tests> + <help> + <![CDATA[ +Joins lists of tabular datasets together on a field. + +----- + +**Example** + +To join three files, with headers, based on the first column: + +**First file (in_1.tabular)**:: + + #KEY c2 c3 c4 + one 1-1 1-2 1-3 + two 1-4 1-5 1-6 + three 1-7 1-8 1-9 + + +**Second File (in_2.tabular)**:: + + #KEY c2 c3 c4 + one 2-1 2-2 2-3 + two 2-4 2-5 2-6 + three 2-7 2-8 2-9 + +**Third file (in_3.tabular)**:: + + #KEY c2 c3 c4 + one 3-3 3-2 3-3 + two 3-4 3-5 3-6 + three 3-7 3-8 3-9 + + +**Joining** the files, using **identifier column of 1** and a **header lines of 1**, will return:: + + #KEY in_1.tabular_c2 in_1.tabular_c3 in_1.tabular_c4 in_2.tabular_c2 in_2.tabular_c3 in_2.tabular_c4 in_3.tabular_c2 in_3.tabular_c3 in_3.tabular_c4 + one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3 + three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9 + two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6 + + ]]> + </help> + <citations> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/in_1.tabular Fri May 27 11:41:19 2016 -0400 @@ -0,0 +1,4 @@ +#KEY c2 c3 c4 +one 1-1 1-2 1-3 +two 1-4 1-5 1-6 +three 1-7 1-8 1-9
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/in_1_headerless.tabular Fri May 27 11:41:19 2016 -0400 @@ -0,0 +1,3 @@ +one 1-1 1-2 1-3 +two 1-4 1-5 1-6 +three 1-7 1-8 1-9
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/in_2.tabular Fri May 27 11:41:19 2016 -0400 @@ -0,0 +1,4 @@ +#KEY c2 c3 c4 +one 2-1 2-2 2-3 +two 2-4 2-5 2-6 +three 2-7 2-8 2-9
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/in_2_headerless.tabular Fri May 27 11:41:19 2016 -0400 @@ -0,0 +1,3 @@ +one 2-1 2-2 2-3 +two 2-4 2-5 2-6 +three 2-7 2-8 2-9
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/in_3.tabular Fri May 27 11:41:19 2016 -0400 @@ -0,0 +1,4 @@ +#KEY c2 c3 c4 +one 3-3 3-2 3-3 +two 3-4 3-5 3-6 +three 3-7 3-8 3-9
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/in_3_headerless.tabular Fri May 27 11:41:19 2016 -0400 @@ -0,0 +1,3 @@ +one 3-3 3-2 3-3 +two 3-4 3-5 3-6 +three 3-7 3-8 3-9
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_1.tabular Fri May 27 11:41:19 2016 -0400 @@ -0,0 +1,4 @@ +#KEY in_1.tabular_c2 in_1.tabular_c3 in_1.tabular_c4 in_2.tabular_c2 in_2.tabular_c3 in_2.tabular_c4 in_3.tabular_c2 in_3.tabular_c3 in_3.tabular_c4 +one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3 +three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9 +two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_2.tabular Fri May 27 11:41:19 2016 -0400 @@ -0,0 +1,4 @@ +#KEY in_1_headerless.tabular_2 in_1_headerless.tabular_3 in_1_headerless.tabular_4 in_2_headerless.tabular_2 in_2_headerless.tabular_3 in_2_headerless.tabular_4 in_3_headerless.tabular_2 in_3_headerless.tabular_3 in_3_headerless.tabular_4 +one 1-1 1-2 1-3 2-1 2-2 2-3 3-3 3-2 3-3 +three 1-7 1-8 1-9 2-7 2-8 2-9 3-7 3-8 3-9 +two 1-4 1-5 1-6 2-4 2-5 2-6 3-4 3-5 3-6
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri May 27 11:41:19 2016 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="gnu_coreutils" version="8.22"> + <repository changeset_revision="ac64dfe4b1fb" name="package_gnu_coreutils_8_22" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>