0
|
1 <tool id="join1" name="Join two Datasets" version="2.0.2">
|
|
2 <description>side by side on a specified field</description>
|
|
3 <command interpreter="python">join.py $input1 $input2 $field1 $field2 $out_file1 $unmatched $partial --index_depth=3 --buffer=50000000 --fill_options_file=$fill_options_file</command>
|
|
4 <inputs>
|
|
5 <param format="tabular" name="input1" type="data" label="Join"/>
|
|
6 <param name="field1" label="using column" type="data_column" data_ref="input1" />
|
|
7 <param format="tabular" name="input2" type="data" label="with" />
|
|
8 <param name="field2" label="and column" type="data_column" data_ref="input2" />
|
|
9 <param name="unmatched" type="select" label="Keep lines of first input that do not join with second input">
|
|
10 <option value="-u">Yes</option>
|
|
11 <option value="" selected="true">No</option>
|
|
12 </param>
|
|
13 <param name="partial" type="select" label="Keep lines of first input that are incomplete">
|
|
14 <option value="-p">Yes</option>
|
|
15 <option value="" selected="true">No</option>
|
|
16 </param>
|
|
17 <conditional name="fill_empty_columns">
|
|
18 <param name="fill_empty_columns_switch" type="select" label="Fill empty columns">
|
|
19 <option value="no_fill" selected="True">No</option>
|
|
20 <option value="fill_empty">Yes</option>
|
|
21 </param>
|
|
22 <when value="no_fill">
|
|
23 <!-- do nothing -->
|
|
24 </when>
|
|
25 <when value="fill_empty">
|
|
26 <param type="select" name="fill_columns_by" label="Only fill unjoined rows">
|
|
27 <option value="fill_unjoined_only" selected="True">Yes</option>
|
|
28 <option value="fill_all">No</option>
|
|
29 </param>
|
|
30 <conditional name="do_fill_empty_columns">
|
|
31 <param name="column_fill_type" type="select" label="Fill Columns by">
|
|
32 <option value="single_fill_value" selected="True">Single fill value</option>
|
|
33 <option value="fill_value_by_column">Values by column</option>
|
|
34 </param>
|
|
35 <when value="single_fill_value">
|
|
36 <param type="text" name="fill_value" label="Fill value" value="."/>
|
|
37 </when>
|
|
38 <when value="fill_value_by_column">
|
|
39 <repeat name="column_fill1" title="Fill Column for Input 1">
|
|
40 <param name="column_number1" label="Column" type="data_column" data_ref="input1" />
|
|
41 <param type="text" name="fill_value1" value="."/>
|
|
42 </repeat>
|
|
43 <repeat name="column_fill2" title="Fill Column for Input 2">
|
|
44 <param name="column_number2" label="Column" type="data_column" data_ref="input2" />
|
|
45 <param type="text" name="fill_value2" value="."/>
|
|
46 </repeat>
|
|
47 </when>
|
|
48 </conditional>
|
|
49 </when>
|
|
50 </conditional>
|
|
51 </inputs>
|
|
52 <configfiles>
|
|
53 <configfile name="fill_options_file"><%
|
|
54 import simplejson
|
|
55 %>
|
|
56 #set $__fill_options = {}
|
|
57 #if $fill_empty_columns['fill_empty_columns_switch'] == 'fill_empty':
|
|
58 #set $__fill_options['fill_unjoined_only'] = $fill_empty_columns['fill_columns_by'].value == 'fill_unjoined_only'
|
|
59 #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'single_fill_value':
|
|
60 #set $__start_fill = $fill_empty_columns['do_fill_empty_columns']['fill_value'].value
|
|
61 #else:
|
|
62 #set $__start_fill = ""
|
|
63 #end if
|
|
64 #set $__fill_options['file1_columns'] = [ __start_fill for i in range( int( $input1.metadata.columns ) ) ]
|
|
65 #set $__fill_options['file2_columns'] = [ __start_fill for i in range( int( $input2.metadata.columns ) ) ]
|
|
66 #if $fill_empty_columns['do_fill_empty_columns']['column_fill_type'] == 'fill_value_by_column':
|
|
67 #for column_fill1 in $fill_empty_columns['do_fill_empty_columns']['column_fill1']:
|
|
68 #set $__fill_options['file1_columns'][ int( column_fill1['column_number1'].value ) - 1 ] = column_fill1['fill_value1'].value
|
|
69 #end for
|
|
70 #for column_fill2 in $fill_empty_columns['do_fill_empty_columns']['column_fill2']:
|
|
71 #set $__fill_options['file2_columns'][ int( column_fill2['column_number2'].value ) - 1 ] = column_fill2['fill_value2'].value
|
|
72 #end for
|
|
73 #end if
|
|
74 #end if
|
|
75 ${simplejson.dumps( __fill_options )}
|
|
76 </configfile>
|
|
77 </configfiles>
|
|
78 <outputs>
|
|
79 <data format="input" name="out_file1" metadata_source="input1" />
|
|
80 </outputs>
|
|
81 <tests>
|
|
82 <test>
|
|
83 <param name="input1" value="1.bed"/>
|
|
84 <param name="input2" value="2.bed"/>
|
|
85 <param name="field1" value="2"/>
|
|
86 <param name="field2" value="2"/>
|
|
87 <param name="unmatched" value=""/>
|
|
88 <param name="partial" value=""/>
|
|
89 <param name="fill_empty_columns_switch" value="no_fill"/>
|
|
90 <output name="out_file1" file="joiner_out1.bed"/>
|
|
91 </test>
|
|
92 <test>
|
|
93 <param name="input1" value="1.bed"/>
|
|
94 <param name="input2" value="2.bed"/>
|
|
95 <param name="field1" value="2"/>
|
|
96 <param name="field2" value="2"/>
|
|
97 <param name="unmatched" value="Yes"/>
|
|
98 <param name="partial" value="Yes"/>
|
|
99 <param name="fill_empty_columns_switch" value="no_fill"/>
|
|
100 <output name="out_file1" file="joiner_out2.bed"/>
|
|
101 </test>
|
|
102 <test>
|
|
103 <param name="input1" value="1.bed"/>
|
|
104 <param name="input2" value="2.bed"/>
|
|
105 <param name="field1" value="2"/>
|
|
106 <param name="field2" value="2"/>
|
|
107 <param name="unmatched" value="Yes"/>
|
|
108 <param name="partial" value="Yes"/>
|
|
109 <param name="fill_empty_columns_switch" value="fill_empty"/>
|
|
110 <param name="fill_columns_by" value="fill_all"/>
|
|
111 <param name="column_fill_type" value="single_fill_value"/>
|
|
112 <param name="fill_value" value="~"/>
|
|
113 <output name="out_file1" file="joiner_out3.bed"/>
|
|
114 </test>
|
|
115 <test>
|
|
116 <param name="input1" value="1.bed"/>
|
|
117 <param name="input2" value="2.bed"/>
|
|
118 <param name="field1" value="2"/>
|
|
119 <param name="field2" value="2"/>
|
|
120 <param name="unmatched" value="Yes"/>
|
|
121 <param name="partial" value="Yes"/>
|
|
122 <param name="fill_empty_columns_switch" value="fill_empty"/>
|
|
123 <param name="fill_columns_by" value="fill_all"/>
|
|
124 <param name="column_fill_type" value="fill_value_by_column"/>
|
|
125 <param name="column_number1" value="6"/>
|
|
126 <param name="fill_value1" value="+"/>
|
|
127 <param name="column_number2" value="1"/>
|
|
128 <param name="fill_value2" value="NoChrom"/>
|
|
129 <output name="out_file1" file="joiner_out4.bed"/>
|
|
130 </test>
|
|
131 </tests>
|
|
132 <help>
|
|
133
|
|
134 .. class:: warningmark
|
|
135
|
|
136 **This tool will attempt to reuse the metadata from your first input.** To change metadata assignments click on the "edit attributes" link of the history item generated by this tool.
|
|
137
|
|
138 .. class:: infomark
|
|
139
|
|
140 **TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert*
|
|
141
|
|
142 -----
|
|
143
|
|
144 **Syntax**
|
|
145
|
|
146 This tool joins lines of two datasets on a common field. An empty string ("") is not a valid identifier.
|
|
147 You may choose to include lines of your first input that do not join with your second input.
|
|
148
|
|
149 - Columns are referenced with a **number**. For example, **3** refers to the 3rd column of a tab-delimited file.
|
|
150
|
|
151 -----
|
|
152
|
|
153 **Example**
|
|
154
|
|
155 Dataset1::
|
|
156
|
|
157 chr1 10 20 geneA
|
|
158 chr1 50 80 geneB
|
|
159 chr5 10 40 geneL
|
|
160
|
|
161 Dataset2::
|
|
162
|
|
163 geneA tumor-supressor
|
|
164 geneB Foxp2
|
|
165 geneC Gnas1
|
|
166 geneE INK4a
|
|
167
|
|
168 Joining the 4th column of Dataset1 with the 1st column of Dataset2 will yield::
|
|
169
|
|
170 chr1 10 20 geneA geneA tumor-suppressor
|
|
171 chr1 50 80 geneB geneB Foxp2
|
|
172
|
|
173 Joining the 4th column of Dataset1 with the 1st column of Dataset2, while keeping all lines from Dataset1, will yield::
|
|
174
|
|
175 chr1 10 20 geneA geneA tumor-suppressor
|
|
176 chr1 50 80 geneB geneB Foxp2
|
|
177 chr5 10 40 geneL
|
|
178
|
|
179 </help>
|
|
180 </tool>
|