view tools/filters/joiner.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line source

<tool id="join1" name="Join two Datasets" version="2.0.2">
  <description>side by side on a specified field</description>
  <command interpreter="python">join.py $input1 $input2 $field1 $field2 $out_file1 $unmatched $partial --index_depth=3 --buffer=50000000 --fill_options_file=$fill_options_file</command>
  <inputs>
    <param format="tabular" name="input1" type="data" label="Join"/>
    <param name="field1" label="using column" type="data_column" data_ref="input1" />
    <param format="tabular" name="input2" type="data" label="with" />
    <param name="field2" label="and column" type="data_column" data_ref="input2" />
    <param name="unmatched" type="select" label="Keep lines of first input that do not join with second input">
      <option value="-u">Yes</option>
      <option value="" selected="true">No</option>
    </param>
    <param name="partial" type="select" label="Keep lines of first input that are incomplete">
      <option value="-p">Yes</option>
      <option value="" selected="true">No</option>
    </param>
    <conditional name="fill_empty_columns">
      <param name="fill_empty_columns_switch" type="select" label="Fill empty columns">
        <option value="no_fill" selected="True">No</option>
        <option value="fill_empty">Yes</option>
      </param>
     <when value="no_fill">
        <!-- do nothing -->
     </when>
     <when value="fill_empty">
       <param type="select" name="fill_columns_by" label="Only fill unjoined rows">
         <option value="fill_unjoined_only" selected="True">Yes</option>
         <option value="fill_all">No</option>
       </param>
       <conditional name="do_fill_empty_columns">
         <param name="column_fill_type" type="select" label="Fill Columns by">
           <option value="single_fill_value" selected="True">Single fill value</option>
           <option value="fill_value_by_column">Values by column</option>
         </param>
         <when value="single_fill_value">
           <param type="text" name="fill_value" label="Fill value" value="."/>
         </when>
         <when value="fill_value_by_column">
           <repeat name="column_fill1" title="Fill Column for Input 1">
             <param name="column_number1" label="Column" type="data_column" data_ref="input1" />
             <param type="text" name="fill_value1" value="."/>
           </repeat>
           <repeat name="column_fill2" title="Fill Column for Input 2">
             <param name="column_number2" label="Column" type="data_column" data_ref="input2" />
             <param type="text" name="fill_value2" value="."/>
           </repeat>
         </when>
       </conditional>
     </when>
   </conditional>
  </inputs>
  <configfiles>
    <configfile name="fill_options_file">&lt;%
import simplejson
%&gt;
#set $__fill_options = {}
#if $fill_empty_columns['fill_empty_columns_switch'] == 'fill_empty':
    #set $__fill_options['fill_unjoined_only'] = $fill_empty_columns['fill_columns_by'].value == 'fill_unjoined_only'
    #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'single_fill_value':
        #set $__start_fill = $fill_empty_columns['do_fill_empty_columns']['fill_value'].value
    #else:
        #set $__start_fill = ""
    #end if
    #set $__fill_options['file1_columns'] = [ __start_fill for i in range( int( $input1.metadata.columns ) ) ]
    #set $__fill_options['file2_columns'] = [ __start_fill for i in range( int( $input2.metadata.columns ) ) ]
    #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'fill_value_by_column':
        #for column_fill1 in $fill_empty_columns['do_fill_empty_columns']['column_fill1']:
            #set $__fill_options['file1_columns'][ int( column_fill1['column_number1'].value ) - 1 ] = column_fill1['fill_value1'].value
        #end for
        #for column_fill2 in $fill_empty_columns['do_fill_empty_columns']['column_fill2']:
            #set $__fill_options['file2_columns'][ int( column_fill2['column_number2'].value ) - 1 ] = column_fill2['fill_value2'].value
        #end for
    #end if
#end if
${simplejson.dumps( __fill_options )}
    </configfile>
  </configfiles>
  <outputs>
     <data format="input" name="out_file1" metadata_source="input1" />
  </outputs>
  <tests>
    <test>
      <param name="input1" value="1.bed"/>
      <param name="input2" value="2.bed"/>
      <param name="field1" value="2"/>
      <param name="field2" value="2"/>
      <param name="unmatched" value=""/>
      <param name="partial" value=""/>
      <param name="fill_empty_columns_switch" value="no_fill"/>
      <output name="out_file1" file="joiner_out1.bed"/>
    </test>
    <test>
      <param name="input1" value="1.bed"/>
      <param name="input2" value="2.bed"/>
      <param name="field1" value="2"/>
      <param name="field2" value="2"/>
      <param name="unmatched" value="Yes"/>
      <param name="partial" value="Yes"/>
      <param name="fill_empty_columns_switch" value="no_fill"/>
      <output name="out_file1" file="joiner_out2.bed"/>
    </test>
    <test>
      <param name="input1" value="1.bed"/>
      <param name="input2" value="2.bed"/>
      <param name="field1" value="2"/>
      <param name="field2" value="2"/>
      <param name="unmatched" value="Yes"/>
      <param name="partial" value="Yes"/>
      <param name="fill_empty_columns_switch" value="fill_empty"/>
      <param name="fill_columns_by" value="fill_all"/>
      <param name="column_fill_type" value="single_fill_value"/>
      <param name="fill_value" value="~"/>
      <output name="out_file1" file="joiner_out3.bed"/>
    </test>
    <test>
      <param name="input1" value="1.bed"/>
      <param name="input2" value="2.bed"/>
      <param name="field1" value="2"/>
      <param name="field2" value="2"/>
      <param name="unmatched" value="Yes"/>
      <param name="partial" value="Yes"/>
      <param name="fill_empty_columns_switch" value="fill_empty"/>
      <param name="fill_columns_by" value="fill_all"/>
      <param name="column_fill_type" value="fill_value_by_column"/>
      <param name="column_number1" value="6"/>
      <param name="fill_value1" value="+"/>
      <param name="column_number2" value="1"/>
      <param name="fill_value2" value="NoChrom"/>
      <output name="out_file1" file="joiner_out4.bed"/>
    </test>
  </tests>
  <help>

.. class:: warningmark

**This tool will attempt to reuse the metadata from your first input.** To change metadata assignments click on the "edit attributes" link of the history item generated by this tool.

.. class:: infomark

**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*

-----

**Syntax**

This tool joins lines of two datasets on a common field. An empty string ("") is not a valid identifier.
You may choose to include lines of your first input that do not join with your second input.

- Columns are referenced with a **number**. For example, **3** refers to the 3rd column of a tab-delimited file.

-----

**Example**

Dataset1::

  chr1 10 20 geneA 
  chr1 50 80 geneB
  chr5 10 40 geneL

Dataset2::

  geneA tumor-supressor
  geneB Foxp2
  geneC Gnas1
  geneE INK4a

Joining the 4th column of Dataset1 with the 1st column of Dataset2 will yield::

  chr1 10 20 geneA geneA tumor-suppressor
  chr1 50 80 geneB geneB Foxp2

Joining the 4th column of Dataset1 with the 1st column of Dataset2, while keeping all lines from Dataset1, will yield::

  chr1 10 20 geneA geneA tumor-suppressor
  chr1 50 80 geneB geneB Foxp2
  chr5 10 40 geneL

</help>
</tool>