diff tools/filters/joiner.xml @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/filters/joiner.xml	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,180 @@
+<tool id="join1" name="Join two Datasets" version="2.0.2">
+  <description>side by side on a specified field</description>
+  <command interpreter="python">join.py $input1 $input2 $field1 $field2 $out_file1 $unmatched $partial --index_depth=3 --buffer=50000000 --fill_options_file=$fill_options_file</command>
+  <inputs>
+    <param format="tabular" name="input1" type="data" label="Join"/>
+    <param name="field1" label="using column" type="data_column" data_ref="input1" />
+    <param format="tabular" name="input2" type="data" label="with" />
+    <param name="field2" label="and column" type="data_column" data_ref="input2" />
+    <param name="unmatched" type="select" label="Keep lines of first input that do not join with second input">
+      <option value="-u">Yes</option>
+      <option value="" selected="true">No</option>
+    </param>
+    <param name="partial" type="select" label="Keep lines of first input that are incomplete">
+      <option value="-p">Yes</option>
+      <option value="" selected="true">No</option>
+    </param>
+    <conditional name="fill_empty_columns">
+      <param name="fill_empty_columns_switch" type="select" label="Fill empty columns">
+        <option value="no_fill" selected="True">No</option>
+        <option value="fill_empty">Yes</option>
+      </param>
+     <when value="no_fill">
+        <!-- do nothing -->
+     </when>
+     <when value="fill_empty">
+       <param type="select" name="fill_columns_by" label="Only fill unjoined rows">
+         <option value="fill_unjoined_only" selected="True">Yes</option>
+         <option value="fill_all">No</option>
+       </param>
+       <conditional name="do_fill_empty_columns">
+         <param name="column_fill_type" type="select" label="Fill Columns by">
+           <option value="single_fill_value" selected="True">Single fill value</option>
+           <option value="fill_value_by_column">Values by column</option>
+         </param>
+         <when value="single_fill_value">
+           <param type="text" name="fill_value" label="Fill value" value="."/>
+         </when>
+         <when value="fill_value_by_column">
+           <repeat name="column_fill1" title="Fill Column for Input 1">
+             <param name="column_number1" label="Column" type="data_column" data_ref="input1" />
+             <param type="text" name="fill_value1" value="."/>
+           </repeat>
+           <repeat name="column_fill2" title="Fill Column for Input 2">
+             <param name="column_number2" label="Column" type="data_column" data_ref="input2" />
+             <param type="text" name="fill_value2" value="."/>
+           </repeat>
+         </when>
+       </conditional>
+     </when>
+   </conditional>
+  </inputs>
+  <configfiles>
+    <configfile name="fill_options_file">&lt;%
+import simplejson
+%&gt;
+#set $__fill_options = {}
+#if $fill_empty_columns['fill_empty_columns_switch'] == 'fill_empty':
+    #set $__fill_options['fill_unjoined_only'] = $fill_empty_columns['fill_columns_by'].value == 'fill_unjoined_only'
+    #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'single_fill_value':
+        #set $__start_fill = $fill_empty_columns['do_fill_empty_columns']['fill_value'].value
+    #else:
+        #set $__start_fill = ""
+    #end if
+    #set $__fill_options['file1_columns'] = [ __start_fill for i in range( int( $input1.metadata.columns ) ) ]
+    #set $__fill_options['file2_columns'] = [ __start_fill for i in range( int( $input2.metadata.columns ) ) ]
+    #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'fill_value_by_column':
+        #for column_fill1 in $fill_empty_columns['do_fill_empty_columns']['column_fill1']:
+            #set $__fill_options['file1_columns'][ int( column_fill1['column_number1'].value ) - 1 ] = column_fill1['fill_value1'].value
+        #end for
+        #for column_fill2 in $fill_empty_columns['do_fill_empty_columns']['column_fill2']:
+            #set $__fill_options['file2_columns'][ int( column_fill2['column_number2'].value ) - 1 ] = column_fill2['fill_value2'].value
+        #end for
+    #end if
+#end if
+${simplejson.dumps( __fill_options )}
+    </configfile>
+  </configfiles>
+  <outputs>
+     <data format="input" name="out_file1" metadata_source="input1" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="input1" value="1.bed"/>
+      <param name="input2" value="2.bed"/>
+      <param name="field1" value="2"/>
+      <param name="field2" value="2"/>
+      <param name="unmatched" value=""/>
+      <param name="partial" value=""/>
+      <param name="fill_empty_columns_switch" value="no_fill"/>
+      <output name="out_file1" file="joiner_out1.bed"/>
+    </test>
+    <test>
+      <param name="input1" value="1.bed"/>
+      <param name="input2" value="2.bed"/>
+      <param name="field1" value="2"/>
+      <param name="field2" value="2"/>
+      <param name="unmatched" value="Yes"/>
+      <param name="partial" value="Yes"/>
+      <param name="fill_empty_columns_switch" value="no_fill"/>
+      <output name="out_file1" file="joiner_out2.bed"/>
+    </test>
+    <test>
+      <param name="input1" value="1.bed"/>
+      <param name="input2" value="2.bed"/>
+      <param name="field1" value="2"/>
+      <param name="field2" value="2"/>
+      <param name="unmatched" value="Yes"/>
+      <param name="partial" value="Yes"/>
+      <param name="fill_empty_columns_switch" value="fill_empty"/>
+      <param name="fill_columns_by" value="fill_all"/>
+      <param name="column_fill_type" value="single_fill_value"/>
+      <param name="fill_value" value="~"/>
+      <output name="out_file1" file="joiner_out3.bed"/>
+    </test>
+    <test>
+      <param name="input1" value="1.bed"/>
+      <param name="input2" value="2.bed"/>
+      <param name="field1" value="2"/>
+      <param name="field2" value="2"/>
+      <param name="unmatched" value="Yes"/>
+      <param name="partial" value="Yes"/>
+      <param name="fill_empty_columns_switch" value="fill_empty"/>
+      <param name="fill_columns_by" value="fill_all"/>
+      <param name="column_fill_type" value="fill_value_by_column"/>
+      <param name="column_number1" value="6"/>
+      <param name="fill_value1" value="+"/>
+      <param name="column_number2" value="1"/>
+      <param name="fill_value2" value="NoChrom"/>
+      <output name="out_file1" file="joiner_out4.bed"/>
+    </test>
+  </tests>
+  <help>
+
+.. class:: warningmark
+
+**This tool will attempt to reuse the metadata from your first input.** To change metadata assignments click on the "edit attributes" link of the history item generated by this tool.
+
+.. class:: infomark
+
+**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
+
+-----
+
+**Syntax**
+
+This tool joins lines of two datasets on a common field. An empty string ("") is not a valid identifier.
+You may choose to include lines of your first input that do not join with your second input.
+
+- Columns are referenced with a **number**. For example, **3** refers to the 3rd column of a tab-delimited file.
+
+-----
+
+**Example**
+
+Dataset1::
+
+  chr1 10 20 geneA 
+  chr1 50 80 geneB
+  chr5 10 40 geneL
+
+Dataset2::
+
+  geneA tumor-supressor
+  geneB Foxp2
+  geneC Gnas1
+  geneE INK4a
+
+Joining the 4th column of Dataset1 with the 1st column of Dataset2 will yield::
+
+  chr1 10 20 geneA geneA tumor-suppressor
+  chr1 50 80 geneB geneB Foxp2
+
+Joining the 4th column of Dataset1 with the 1st column of Dataset2, while keeping all lines from Dataset1, will yield::
+
+  chr1 10 20 geneA geneA tumor-suppressor
+  chr1 50 80 geneB geneB Foxp2
+  chr5 10 40 geneL
+
+</help>
+</tool>