Mercurial > repos > saskia-hiltemann > cgatools_v17
comparison tools/cgatools17/join_v17.xml @ 1:3a2e0f376f26 draft
Minor change to tv2vcf.xml to allow for workflow automation
author | dgdekoning |
---|---|
date | Wed, 21 Oct 2015 10:09:15 -0400 (2015-10-21) |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:751b62d30ae1 | 1:3a2e0f376f26 |
---|---|
1 <tool id="cg_join" name="Join" version="1.7.1"> | |
2 | |
3 <description>Join two tsv files based on equal fields or overlapping regions.</description> | |
4 | |
5 <requirements> | |
6 <requirement type="package" version="1">cgatools17</requirement> | |
7 </requirements> | |
8 | |
9 <command> | |
10 cgatools | head -1; | |
11 cgatools join --beta | |
12 --input $inputA | |
13 --input $inputB | |
14 --output $output | |
15 --output-mode $outmode | |
16 $dump | |
17 --select $col | |
18 #for $m in $matches | |
19 --match ${m.match} | |
20 #end for | |
21 #if $range_overlap.range == 'yes' | |
22 #for $o in $range_overlap.overlaps | |
23 --overlap ${o.overlap} | |
24 #end for | |
25 --overlap-mode $range_overlap.overlapmode | |
26 --overlap-fraction-A $range_overlap.fractionA | |
27 --boundary-uncertainty-A $range_overlap.boundaryA | |
28 --overlap-fraction-B $range_overlap.fractionB | |
29 --boundary-uncertainty-B $range_overlap.boundaryB | |
30 #end if | |
31 </command> | |
32 | |
33 | |
34 <inputs> | |
35 <param name="inputA" type="data" format="tabular" label="Select input file A "/> | |
36 <param name="inputB" type="data" format="tabular" label="Select input file B "/> | |
37 <param name="col" type="text" value="A.*,B.*" size="40" label="Specify columns for output" help="The default value A.*,B.* prints all columns from both files, other selections enter in the format A.col_name1,A.col_name3,B.col_name1" /> | |
38 | |
39 <param name="outmode" type="select" label="Select output mode"> | |
40 <option value="full" selected="true">full (1 line for each match of records in A and B)</option> | |
41 <option value="compact">compact (1 line for each record in A, joining multiple records in B by semicolon)</option> | |
42 <option value="compact-pct">compact-pct (same as compact, annotated with % overlap)</option> | |
43 </param> | |
44 | |
45 <param name="dump" type="select" label="Select records to print"> | |
46 <option value="--always-dump" selected="true">print all records of A even if not matched in B</option> | |
47 <option value="">print only records of A that are matched in B</option> | |
48 </param> | |
49 | |
50 <repeat name="matches" title="Exact match column"> | |
51 <param name="match" type="text" size="40" label="Enter column:column" help="Enter column_from_A:column_from_B, e.g. chromosome:chromosome"/> | |
52 </repeat> | |
53 | |
54 <conditional name="range_overlap"> | |
55 <param name="range" type="select" label="Do you want to match columns by overlapping range?"> | |
56 <option value="no">no</option> | |
57 <option value="yes">yes</option> | |
58 </param> | |
59 <when value="no"> | |
60 <!-- no options --> | |
61 </when> | |
62 <when value="yes"> | |
63 <repeat name="overlaps" title="Range column"> | |
64 <param name="overlap" type="text" size="40" label="Enter column[,column]:column[,column]" help="Enter range_start_from_A[,range_stop_from_A]:range_start_from_B[,range_stop_from_B], e.g. begin,end:begin,end (overlapping range of positions) or begin,end:position"/> | |
65 </repeat> | |
66 | |
67 <param name="overlapmode" type="select" label="Select overlap mode"> | |
68 <option value="strict" selected="true">strict (overlap if A.begin<B.end and B.begin>A.end)</option> | |
69 <option value="allow-abutting-points">allow-abutting-points (overlap if A.begin<B.end and B.begin>A.end, or if A.begin<=B.end and B.begin<=A.end and either A or B has zero length.)</option> | |
70 </param> | |
71 | |
72 <param name="fractionA" type="integer" value="0" label="Minimum fraction of A region overlap " /> | |
73 <param name="boundaryA" type="integer" value="0" label="Boundary uncertainty for A for overlap filtering " help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-A * (A-range-length - boundary-uncertainty-A)"/> | |
74 | |
75 <param name="fractionB" type="integer" value="0" label="Minimum fraction of B region overlap " /> | |
76 <param name="boundaryB" type="integer" value="0" label="Boundary uncertainty for overlap filtering " help="Records failing the following boundary-uncertainty calculation are not included in the output: overlap length >= overlap-fraction-B * (B-range-length - boundary-uncertainty-B)"/> | |
77 </when> | |
78 </conditional> | |
79 | |
80 <!-- prefix for output file so you dont have to manually rename history items --> | |
81 <param name="fname" type="text" value="" label="Prefix for your output file" help="Optional"/> | |
82 | |
83 </inputs> | |
84 | |
85 <outputs> | |
86 <data format="tabular" name="output" label="$fname ${tool.name} on data ${on_string}" /> | |
87 </outputs> | |
88 | |
89 | |
90 <help> | |
91 | |
92 **What it does** | |
93 | |
94 This tool joins two tab-delimited files based on equal fields or overlapping regions. | |
95 | |
96 **cgatools 1.7.1 Documentation** | |
97 | |
98 Userguide: http://cgatools.sourceforge.net/docs/1.7.1/cgatools-user-guide.pdf | |
99 | |
100 Release notes: http://cgatools.sourceforge.net/docs/1.7.1/cgatools-release-notes.pdf | |
101 | |
102 **Command line reference**:: | |
103 | |
104 COMMAND NAME | |
105 join - Joins two tab-delimited files based on equal fields or overlapping regions. | |
106 | |
107 DESCRIPTION | |
108 Joins two tab-delimited files based on equal fields or overlapping regions. | |
109 By default, an output record is produced for each match found between file | |
110 A and file B, but output format can be controlled by the --output-mode | |
111 parameter. | |
112 | |
113 OPTIONS | |
114 -h [ --help ] | |
115 Print this help message. | |
116 | |
117 --beta | |
118 This is a beta command. To run this command, you must pass the --beta | |
119 flag. | |
120 | |
121 --input arg | |
122 File name to use as input (may be passed in as arguments at the end of | |
123 the command), or omitted for stdin). There must be exactly two input | |
124 files to join. If only one file is specified by name, file A is taken | |
125 to be stdin and file B is the named file. File B is read fully into | |
126 memory, and file A is streamed. File A's columns appear first in the | |
127 output. | |
128 | |
129 --output arg (=STDOUT) | |
130 The output file name (may be omitted for stdout). | |
131 | |
132 --match arg | |
133 A match specification, which is a column from A and a column from B | |
134 separated by a colon. | |
135 | |
136 --overlap arg | |
137 Overlap specification. An overlap specification consists of a range | |
138 definition for files A and B, separated by a colon. A range definition | |
139 may be two columns, in which case they are interpreted as the beginning | |
140 and end of the range. Or it may be one column, in which case the range | |
141 is defined as the 1-base range starting at the given value. The records | |
142 from the two files must overlap in order to be considered for output. | |
143 Two ranges are considered to overlap if the overlap is at least one | |
144 base long, or if one of the ranges is length 0 and the ranges overlap | |
145 or abut. For example, "begin,end:offset" will match wherever end-begin | |
146 > 0, begin<offset+1, and end>offset, or wherever end-begin = 0, | |
147 begin<=offset+1, and end>=offset. | |
148 | |
149 | |
150 -m [ --output-mode ] arg (=full) | |
151 Output mode, one of the following: | |
152 full Print an output record for each match found between | |
153 file A and file B. | |
154 compact Print at most one record for each record of file A, | |
155 joining the file B values by a semicolon and | |
156 suppressing repeated B values and empty B values. | |
157 compact-pct Same as compact, but for each distinct B value, | |
158 annotate with the percentage of the A record that is | |
159 overlapped by B records with that B value. Percentage | |
160 is rounded up to nearest integer. | |
161 | |
162 --overlap-mode arg (=strict) | |
163 Overlap mode, one of the following: | |
164 strict Range A and B overlap if A.begin < B.end and | |
165 B.begin < A.end. | |
166 allow-abutting-points Range A and B overlap they meet the strict | |
167 requirements, or if A.begin <= B.end and | |
168 B.begin <= A.end and either A or B has zero | |
169 length. | |
170 | |
171 --select arg (=A.*,B.*) | |
172 Set of fields to select for output. | |
173 | |
174 -a [ --always-dump ] | |
175 Dump every record of A, even if there are no matches with file B. | |
176 | |
177 --overlap-fraction-A arg (=0) | |
178 Minimum fraction of A region overlap for filtering output. | |
179 | |
180 --boundary-uncertainty-A arg (=0) | |
181 Boundary uncertainty for overlap filtering. Specifically, records | |
182 failing the following predicate are filtered away: overlap >= | |
183 overlap-fraction-A * ( A-range-length - boundary-uncertainty-A ) | |
184 | |
185 --overlap-fraction-B arg (=0) | |
186 Minimum fraction of B region overlap for filtering output. | |
187 | |
188 --boundary-uncertainty-B arg (=0) | |
189 Boundary uncertainty for overlap filtering. Specifically, records | |
190 failing the following predicate are filtered away: overlap >= | |
191 overlap-fraction-B * ( B-range-length - boundary-uncertainty-B ) | |
192 | |
193 SUPPORTED FORMAT_VERSION | |
194 Any | |
195 </help> | |
196 </tool> |