Mercurial > repos > xuebing > sharplabtool
diff tools/filters/joiner.xml @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/filters/joiner.xml Fri Mar 09 19:37:19 2012 -0500 @@ -0,0 +1,180 @@ +<tool id="join1" name="Join two Datasets" version="2.0.2"> + <description>side by side on a specified field</description> + <command interpreter="python">join.py $input1 $input2 $field1 $field2 $out_file1 $unmatched $partial --index_depth=3 --buffer=50000000 --fill_options_file=$fill_options_file</command> + <inputs> + <param format="tabular" name="input1" type="data" label="Join"/> + <param name="field1" label="using column" type="data_column" data_ref="input1" /> + <param format="tabular" name="input2" type="data" label="with" /> + <param name="field2" label="and column" type="data_column" data_ref="input2" /> + <param name="unmatched" type="select" label="Keep lines of first input that do not join with second input"> + <option value="-u">Yes</option> + <option value="" selected="true">No</option> + </param> + <param name="partial" type="select" label="Keep lines of first input that are incomplete"> + <option value="-p">Yes</option> + <option value="" selected="true">No</option> + </param> + <conditional name="fill_empty_columns"> + <param name="fill_empty_columns_switch" type="select" label="Fill empty columns"> + <option value="no_fill" selected="True">No</option> + <option value="fill_empty">Yes</option> + </param> + <when value="no_fill"> + <!-- do nothing --> + </when> + <when value="fill_empty"> + <param type="select" name="fill_columns_by" label="Only fill unjoined rows"> + <option value="fill_unjoined_only" selected="True">Yes</option> + <option value="fill_all">No</option> + </param> + <conditional name="do_fill_empty_columns"> + <param name="column_fill_type" type="select" label="Fill Columns by"> + <option value="single_fill_value" selected="True">Single fill value</option> + <option value="fill_value_by_column">Values by column</option> + </param> + <when value="single_fill_value"> + <param type="text" name="fill_value" label="Fill value" value="."/> + </when> + <when value="fill_value_by_column"> + <repeat name="column_fill1" title="Fill Column for Input 1"> + <param name="column_number1" label="Column" type="data_column" data_ref="input1" /> + <param type="text" name="fill_value1" value="."/> + </repeat> + <repeat name="column_fill2" title="Fill Column for Input 2"> + <param name="column_number2" label="Column" type="data_column" data_ref="input2" /> + <param type="text" name="fill_value2" value="."/> + </repeat> + </when> + </conditional> + </when> + </conditional> + </inputs> + <configfiles> + <configfile name="fill_options_file"><% +import simplejson +%> +#set $__fill_options = {} +#if $fill_empty_columns['fill_empty_columns_switch'] == 'fill_empty': + #set $__fill_options['fill_unjoined_only'] = $fill_empty_columns['fill_columns_by'].value == 'fill_unjoined_only' + #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'single_fill_value': + #set $__start_fill = $fill_empty_columns['do_fill_empty_columns']['fill_value'].value + #else: + #set $__start_fill = "" + #end if + #set $__fill_options['file1_columns'] = [ __start_fill for i in range( int( $input1.metadata.columns ) ) ] + #set $__fill_options['file2_columns'] = [ __start_fill for i in range( int( $input2.metadata.columns ) ) ] + #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'fill_value_by_column': + #for column_fill1 in $fill_empty_columns['do_fill_empty_columns']['column_fill1']: + #set $__fill_options['file1_columns'][ int( column_fill1['column_number1'].value ) - 1 ] = column_fill1['fill_value1'].value + #end for + #for column_fill2 in $fill_empty_columns['do_fill_empty_columns']['column_fill2']: + #set $__fill_options['file2_columns'][ int( column_fill2['column_number2'].value ) - 1 ] = column_fill2['fill_value2'].value + #end for + #end if +#end if +${simplejson.dumps( __fill_options )} + </configfile> + </configfiles> + <outputs> + <data format="input" name="out_file1" metadata_source="input1" /> + </outputs> + <tests> + <test> + <param name="input1" value="1.bed"/> + <param name="input2" value="2.bed"/> + <param name="field1" value="2"/> + <param name="field2" value="2"/> + <param name="unmatched" value=""/> + <param name="partial" value=""/> + <param name="fill_empty_columns_switch" value="no_fill"/> + <output name="out_file1" file="joiner_out1.bed"/> + </test> + <test> + <param name="input1" value="1.bed"/> + <param name="input2" value="2.bed"/> + <param name="field1" value="2"/> + <param name="field2" value="2"/> + <param name="unmatched" value="Yes"/> + <param name="partial" value="Yes"/> + <param name="fill_empty_columns_switch" value="no_fill"/> + <output name="out_file1" file="joiner_out2.bed"/> + </test> + <test> + <param name="input1" value="1.bed"/> + <param name="input2" value="2.bed"/> + <param name="field1" value="2"/> + <param name="field2" value="2"/> + <param name="unmatched" value="Yes"/> + <param name="partial" value="Yes"/> + <param name="fill_empty_columns_switch" value="fill_empty"/> + <param name="fill_columns_by" value="fill_all"/> + <param name="column_fill_type" value="single_fill_value"/> + <param name="fill_value" value="~"/> + <output name="out_file1" file="joiner_out3.bed"/> + </test> + <test> + <param name="input1" value="1.bed"/> + <param name="input2" value="2.bed"/> + <param name="field1" value="2"/> + <param name="field2" value="2"/> + <param name="unmatched" value="Yes"/> + <param name="partial" value="Yes"/> + <param name="fill_empty_columns_switch" value="fill_empty"/> + <param name="fill_columns_by" value="fill_all"/> + <param name="column_fill_type" value="fill_value_by_column"/> + <param name="column_number1" value="6"/> + <param name="fill_value1" value="+"/> + <param name="column_number2" value="1"/> + <param name="fill_value2" value="NoChrom"/> + <output name="out_file1" file="joiner_out4.bed"/> + </test> + </tests> + <help> + +.. class:: warningmark + +**This tool will attempt to reuse the metadata from your first input.** To change metadata assignments click on the "edit attributes" link of the history item generated by this tool. + +.. class:: infomark + +**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* + +----- + +**Syntax** + +This tool joins lines of two datasets on a common field. An empty string ("") is not a valid identifier. +You may choose to include lines of your first input that do not join with your second input. + +- Columns are referenced with a **number**. For example, **3** refers to the 3rd column of a tab-delimited file. + +----- + +**Example** + +Dataset1:: + + chr1 10 20 geneA + chr1 50 80 geneB + chr5 10 40 geneL + +Dataset2:: + + geneA tumor-supressor + geneB Foxp2 + geneC Gnas1 + geneE INK4a + +Joining the 4th column of Dataset1 with the 1st column of Dataset2 will yield:: + + chr1 10 20 geneA geneA tumor-suppressor + chr1 50 80 geneB geneB Foxp2 + +Joining the 4th column of Dataset1 with the 1st column of Dataset2, while keeping all lines from Dataset1, will yield:: + + chr1 10 20 geneA geneA tumor-suppressor + chr1 50 80 geneB geneB Foxp2 + chr5 10 40 geneL + +</help> +</tool>