Mercurial > repos > xuebing > sharplabtool
view tools/filters/joiner.xml @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
line wrap: on
line source
<tool id="join1" name="Join two Datasets" version="2.0.2"> <description>side by side on a specified field</description> <command interpreter="python">join.py $input1 $input2 $field1 $field2 $out_file1 $unmatched $partial --index_depth=3 --buffer=50000000 --fill_options_file=$fill_options_file</command> <inputs> <param format="tabular" name="input1" type="data" label="Join"/> <param name="field1" label="using column" type="data_column" data_ref="input1" /> <param format="tabular" name="input2" type="data" label="with" /> <param name="field2" label="and column" type="data_column" data_ref="input2" /> <param name="unmatched" type="select" label="Keep lines of first input that do not join with second input"> <option value="-u">Yes</option> <option value="" selected="true">No</option> </param> <param name="partial" type="select" label="Keep lines of first input that are incomplete"> <option value="-p">Yes</option> <option value="" selected="true">No</option> </param> <conditional name="fill_empty_columns"> <param name="fill_empty_columns_switch" type="select" label="Fill empty columns"> <option value="no_fill" selected="True">No</option> <option value="fill_empty">Yes</option> </param> <when value="no_fill"> <!-- do nothing --> </when> <when value="fill_empty"> <param type="select" name="fill_columns_by" label="Only fill unjoined rows"> <option value="fill_unjoined_only" selected="True">Yes</option> <option value="fill_all">No</option> </param> <conditional name="do_fill_empty_columns"> <param name="column_fill_type" type="select" label="Fill Columns by"> <option value="single_fill_value" selected="True">Single fill value</option> <option value="fill_value_by_column">Values by column</option> </param> <when value="single_fill_value"> <param type="text" name="fill_value" label="Fill value" value="."/> </when> <when value="fill_value_by_column"> <repeat name="column_fill1" title="Fill Column for Input 1"> <param name="column_number1" label="Column" type="data_column" data_ref="input1" /> <param type="text" name="fill_value1" value="."/> </repeat> <repeat name="column_fill2" title="Fill Column for Input 2"> <param name="column_number2" label="Column" type="data_column" data_ref="input2" /> <param type="text" name="fill_value2" value="."/> </repeat> </when> </conditional> </when> </conditional> </inputs> <configfiles> <configfile name="fill_options_file"><% import simplejson %> #set $__fill_options = {} #if $fill_empty_columns['fill_empty_columns_switch'] == 'fill_empty': #set $__fill_options['fill_unjoined_only'] = $fill_empty_columns['fill_columns_by'].value == 'fill_unjoined_only' #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'single_fill_value': #set $__start_fill = $fill_empty_columns['do_fill_empty_columns']['fill_value'].value #else: #set $__start_fill = "" #end if #set $__fill_options['file1_columns'] = [ __start_fill for i in range( int( $input1.metadata.columns ) ) ] #set $__fill_options['file2_columns'] = [ __start_fill for i in range( int( $input2.metadata.columns ) ) ] #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'fill_value_by_column': #for column_fill1 in $fill_empty_columns['do_fill_empty_columns']['column_fill1']: #set $__fill_options['file1_columns'][ int( column_fill1['column_number1'].value ) - 1 ] = column_fill1['fill_value1'].value #end for #for column_fill2 in $fill_empty_columns['do_fill_empty_columns']['column_fill2']: #set $__fill_options['file2_columns'][ int( column_fill2['column_number2'].value ) - 1 ] = column_fill2['fill_value2'].value #end for #end if #end if ${simplejson.dumps( __fill_options )} </configfile> </configfiles> <outputs> <data format="input" name="out_file1" metadata_source="input1" /> </outputs> <tests> <test> <param name="input1" value="1.bed"/> <param name="input2" value="2.bed"/> <param name="field1" value="2"/> <param name="field2" value="2"/> <param name="unmatched" value=""/> <param name="partial" value=""/> <param name="fill_empty_columns_switch" value="no_fill"/> <output name="out_file1" file="joiner_out1.bed"/> </test> <test> <param name="input1" value="1.bed"/> <param name="input2" value="2.bed"/> <param name="field1" value="2"/> <param name="field2" value="2"/> <param name="unmatched" value="Yes"/> <param name="partial" value="Yes"/> <param name="fill_empty_columns_switch" value="no_fill"/> <output name="out_file1" file="joiner_out2.bed"/> </test> <test> <param name="input1" value="1.bed"/> <param name="input2" value="2.bed"/> <param name="field1" value="2"/> <param name="field2" value="2"/> <param name="unmatched" value="Yes"/> <param name="partial" value="Yes"/> <param name="fill_empty_columns_switch" value="fill_empty"/> <param name="fill_columns_by" value="fill_all"/> <param name="column_fill_type" value="single_fill_value"/> <param name="fill_value" value="~"/> <output name="out_file1" file="joiner_out3.bed"/> </test> <test> <param name="input1" value="1.bed"/> <param name="input2" value="2.bed"/> <param name="field1" value="2"/> <param name="field2" value="2"/> <param name="unmatched" value="Yes"/> <param name="partial" value="Yes"/> <param name="fill_empty_columns_switch" value="fill_empty"/> <param name="fill_columns_by" value="fill_all"/> <param name="column_fill_type" value="fill_value_by_column"/> <param name="column_number1" value="6"/> <param name="fill_value1" value="+"/> <param name="column_number2" value="1"/> <param name="fill_value2" value="NoChrom"/> <output name="out_file1" file="joiner_out4.bed"/> </test> </tests> <help> .. class:: warningmark **This tool will attempt to reuse the metadata from your first input.** To change metadata assignments click on the "edit attributes" link of the history item generated by this tool. .. class:: infomark **TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* ----- **Syntax** This tool joins lines of two datasets on a common field. An empty string ("") is not a valid identifier. You may choose to include lines of your first input that do not join with your second input. - Columns are referenced with a **number**. For example, **3** refers to the 3rd column of a tab-delimited file. ----- **Example** Dataset1:: chr1 10 20 geneA chr1 50 80 geneB chr5 10 40 geneL Dataset2:: geneA tumor-supressor geneB Foxp2 geneC Gnas1 geneE INK4a Joining the 4th column of Dataset1 with the 1st column of Dataset2 will yield:: chr1 10 20 geneA geneA tumor-suppressor chr1 50 80 geneB geneB Foxp2 Joining the 4th column of Dataset1 with the 1st column of Dataset2, while keeping all lines from Dataset1, will yield:: chr1 10 20 geneA geneA tumor-suppressor chr1 50 80 geneB geneB Foxp2 chr5 10 40 geneL </help> </tool>