Mercurial > repos > saskia-hiltemann > cgatools_v17
comparison tools/cgatools17/calldiff_v17.xml @ 1:3a2e0f376f26 draft
Minor change to tv2vcf.xml to allow for workflow automation
author | dgdekoning |
---|---|
date | Wed, 21 Oct 2015 10:09:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:751b62d30ae1 | 1:3a2e0f376f26 |
---|---|
1 <tool id="cg_calldiff" name="CallDiff" version="1.7.1"> | |
2 | |
3 <description>Compares two Complete Genomics variant files.</description> | |
4 | |
5 <requirements> | |
6 <requirement type="package" version="1">cgatools17</requirement> | |
7 </requirements> | |
8 | |
9 <command> <!--run executable--> | |
10 cgatools | head -1; | |
11 cgatools calldiff | |
12 --beta | |
13 --reference ${crr.fields.crr_path} | |
14 --variantsA $inputA | |
15 --variantsB $inputB | |
16 $validation | |
17 $diploid | |
18 --locus-stats-column-count $column | |
19 --max-hypothesis-count $hypothesis | |
20 --output-prefix cg_ | |
21 --reports `echo ${report1} ${report2} ${report3} ${report4} ${report5} | sed 's/ */,/g'` | |
22 </command> | |
23 | |
24 <inputs> | |
25 <!--form field to select crr file--> | |
26 <param name="crr" type="select" label="Reference Build"> | |
27 <options from_data_table="cg_anno_files" /> | |
28 </param> | |
29 | |
30 <!-- input files --> | |
31 <param name="inputA" type="data" format="cg_var,tabular" label="Var file A"></param> | |
32 <param name="inputB" type="data" format="cg_var,tabular" label="Var file B"></param> | |
33 | |
34 <!-- reports --> | |
35 <param name="report1" type="select" label="Create report SuperlocusOutput"> | |
36 <option value="">no</option> | |
37 <option value="SuperlocusOutput">yes</option> | |
38 </param> | |
39 <param name="report2" type="select" label="Create report SuperlocusStats"> | |
40 <option value="">no</option> | |
41 <option value="SuperlocusStats">yes</option> | |
42 </param> | |
43 <param name="report3" type="select" label="Create report LocusOutput"> | |
44 <option value="">no</option> | |
45 <option value="LocusOutput">yes</option> | |
46 </param> | |
47 <param name="report4" type="select" label="Create report LocusStats"> | |
48 <option value="">no</option> | |
49 <option value="LocusStats">yes</option> | |
50 </param> | |
51 <param name="report5" type="select" label="Create report VariantOutput" help="Both variant files annotated by comparison results.If the somatic output report is requested, file A is also annotated with the same score ranks as produced in that report."> | |
52 <option value="VariantOutput">yes</option> | |
53 <option value="">no</option> | |
54 </param> | |
55 | |
56 <!-- parameters --> | |
57 <param name="diploid" type="select" label="Use diploid variant model" help="Uses varScoreEAF instead of varScoreVAF in somatic score computations. Also, uses diploid variant model instead of variable allele mixture model."> | |
58 <option value="">no</option> | |
59 <option value="--diploid">yes</option> | |
60 </param> | |
61 | |
62 <param name="column" type="integer" label="Number of columns for locus compare classification in the locus stats file (default 15)" value="15"/> | |
63 | |
64 <param name="hypothesis" type="integer" label="Maximum number of possible phasings to consider for a superlocus (default 32)" value="32"/> | |
65 | |
66 <param name="validation" type="select" label="Reference cover validation" help="Turns on/off validation that all bases of a chromosome are covered by calls of the variant file."> | |
67 <option value="">on</option> | |
68 <option value="--no-reference-cover-validation">off</option> | |
69 </param> | |
70 | |
71 <!-- prefix for output file so you dont have to manually rename history items --> | |
72 <param name="fname" type="text" value="" label="Prefix for your output file" help="Optional"/> | |
73 </inputs> | |
74 | |
75 <outputs> | |
76 <data format="tabular" name="output1" from_work_dir="cg_SuperlocusOutput.tsv" label="$fname ${tool.name} on ${on_string}: SuperlocusOutput"> | |
77 <filter>(report1 == 'SuperlocusOutput')</filter> | |
78 </data> | |
79 <data format="tabular" name="output2" from_work_dir="cg_SuperlocusStats.tsv" label="$fname ${tool.name} on ${on_string}: SuperlocusStats"> | |
80 <filter>(report2 == 'SuperlocusStats')</filter> | |
81 </data> | |
82 <data format="tabular" name="output3" from_work_dir="cg_LocusOutput.tsv" label="$fname ${tool.name} on ${on_string}: LocusOutput"> | |
83 <filter>(report3 == 'LocusOutput')</filter> | |
84 </data> | |
85 <data format="tabular" name="output4" from_work_dir="cg_LocusStats.tsv" label="$fname ${tool.name} on ${on_string}: LocusStats"> | |
86 <filter>(report4 == 'LocusStats')</filter> | |
87 </data> | |
88 <data format="tabular" name="output5a" from_work_dir="cg_VariantsA.tsv" label="$fname ${tool.name} on ${on_string}: VariantsA"> | |
89 <filter>(report5 == 'VariantOutput')</filter> | |
90 </data> | |
91 <data format="tabular" name="output5b" from_work_dir="cg_VariantsB.tsv" label="$fname ${tool.name} on ${on_string}: VariantsB"> | |
92 <filter>(report5 == 'VariantOutput')</filter> | |
93 </data> | |
94 </outputs> | |
95 | |
96 <tests> | |
97 <test> | |
98 <param name="inputA" value="HCC1187_T_chr22.tsv" /> | |
99 <param name="inputA" value="HCC1187_N_chr22.tsv" /> | |
100 <param name="crr" value="hg18" /> | |
101 <param name="report1" value="SuperlocusOutput" /> | |
102 <param name="report2" value="" /> | |
103 <param name="report3" value="" /> | |
104 <param name="report4" value="" /> | |
105 <param name="report5" value="" /> | |
106 <param name="diploid" value="" /> | |
107 <param name="column" value="15" /> | |
108 <param name="hypothesis" value="" /> | |
109 <output name="output1" file="HCC1187_chr22_SuperLocusOutput.tsv" /> | |
110 </test> | |
111 </tests> | |
112 | |
113 <help> | |
114 **What it does** | |
115 | |
116 This tool compares two Complete Genomics variant files. | |
117 | |
118 **cgatools 1.7.1 Documentation** | |
119 | |
120 Userguide: http://cgatools.sourceforge.net/docs/1.7.1/cgatools-user-guide.pdf | |
121 | |
122 Release notes: http://cgatools.sourceforge.net/docs/1.7.1/cgatools-release-notes.pdf | |
123 | |
124 **Command line reference**:: | |
125 | |
126 COMMAND NAME | |
127 calldiff - Compares two Complete Genomics variant files. | |
128 | |
129 DESCRIPTION | |
130 Compares two Complete Genomics variant files. Divides the genome up into | |
131 superloci of nearby variants, then compares the superloci. Also refines the | |
132 comparison to determine per-call or per-locus comparison results. | |
133 | |
134 Comparison results are usually described by a semi-colon separated string, | |
135 one per allele. Each allele's comparison result is one of the following | |
136 classifications: | |
137 | |
138 ref-identical The alleles of the two variant files are identical, and | |
139 they are consistent with the reference. | |
140 alt-identical The alleles of the two variant files are identical, and | |
141 they are inconsistent with the reference. | |
142 ref-consistent The alleles of the two variant files are consistent, | |
143 and they are consistent with the reference. | |
144 alt-consistent The alleles of the two variant files are consistent, | |
145 and they are inconsistent with the reference. | |
146 onlyA The alleles of the two variant files are inconsistent, | |
147 and only file A is inconsistent with the reference. | |
148 onlyB The alleles of the two variant files are inconsistent, | |
149 and only file B is inconsistent with the reference. | |
150 mismatch The alleles of the two variant files are inconsistent, | |
151 and they are both inconsistent with the reference. | |
152 phase-mismatch The two variant files would be consistent if the | |
153 hapLink field had been empty, but they are | |
154 inconsistent. | |
155 ploidy-mismatch The superlocus did not have uniform ploidy. | |
156 | |
157 In some contexts, this classification is rolled up into a simplified | |
158 classification, which is one of "identical", "consistent", "onlyA", | |
159 "onlyB", or "mismatch". | |
160 | |
161 A good place to start looking at the results is the superlocus-output file. | |
162 It has columns defined as follows: | |
163 | |
164 SuperlocusId An identifier given to the superlocus. | |
165 Chromosome The name of the chromosome. | |
166 Begin The 0-based offset of the start of the superlocus. | |
167 End The 0-based offset of the base one past the end of the | |
168 superlocus. | |
169 Classification The match classification of the superlocus. | |
170 Reference The reference sequence. | |
171 AllelesA A semicolon-separated list of the alleles (one per | |
172 haplotype) for variant file A, for the phasing with the | |
173 best comparison result. | |
174 AllelesB A semicolon-separated list of the alleles (one per | |
175 haplotype) for variant file B, for the phasing with the | |
176 best comparison result. | |
177 | |
178 The locus-output file contains, for each locus in file A and file B that is | |
179 not consistent with the reference, an annotated set of calls for the locus. | |
180 The calls are annotated with the following columns: | |
181 | |
182 SuperlocusId The id of the superlocus containing the locus. | |
183 File The variant file (A or B). | |
184 LocusClassification The locus classification is determined by the | |
185 varType column of the call that is inconsistent | |
186 with the reference, concatenated with a | |
187 modifier that describes whether the locus is | |
188 heterozygous, homozygous, or contains no-calls. | |
189 If there is no one variant in the locus (i.e., | |
190 it is heterozygous alt-alt), the locus | |
191 classification begins with "other". | |
192 LocusDiffClassification The match classification for the locus. This is | |
193 defined to be the best of the comparison of the | |
194 locus to the same region in the other file, or | |
195 the comparison of the superlocus. | |
196 | |
197 The somatic output file contains a list of putative somatic variations of | |
198 genome A. The output includes only those loci that can be classified as | |
199 snp, del, ins or sub in file A, and are called reference in the file B. | |
200 Every locus is annotated with the following columns: | |
201 | |
202 VarCvgA The totalReadCount from file A for this locus | |
203 (computed on the fly if file A is not a | |
204 masterVar file). | |
205 VarScoreA The varScoreVAF from file A, or varScoreEAF if | |
206 the "--diploid" option is used. | |
207 RefCvgB The maximum of the uniqueSequenceCoverage | |
208 values for the locus in genome B. | |
209 RefScoreB Minimum of the reference scores of the locus in | |
210 genome B. | |
211 SomaticCategory The category used for determining the | |
212 calibrated scores and the SomaticRank. | |
213 VarScoreACalib The calibrated variant score of file A, under | |
214 the model selected by using or not using the | |
215 "--diploid" option, and corrected for the count | |
216 of heterozygous variants observed in this | |
217 genome. See user guide for more information. | |
218 VarScoreBCalib The calibrated reference score of file B, under | |
219 the model selected by using or not using the | |
220 "--diploid" option, and corrected for the count | |
221 of heterozygous variants observed in this | |
222 genome. See user guide for more information. | |
223 SomaticRank The estimated rank of this somatic mutation, | |
224 amongst all true somatic mutations within this | |
225 SomaticCategory. The value is a number between | |
226 0 and 1; a value of 0.012 means, for example, | |
227 that an estimated 1.2% of the true somatic | |
228 mutations in this somaticCategory have a | |
229 somaticScore less than the somaticScore for | |
230 this mutation. See user guide for more | |
231 information. | |
232 SomaticScore An integer that provides a total order on | |
233 quality for all somatic mutations. It is equal | |
234 to -10*log10( P(false)/P(true) ), under the | |
235 assumption that this genome has a rate of | |
236 somatic mutation equal to 1/Mb for | |
237 SomaticCategory snp, 1/10Mb for SomaticCategory | |
238 ins, 1/10Mb for SomaticCategory del, and 1/20Mb | |
239 for SomaticCategory sub. The computation is | |
240 based on the assumptions described in the user | |
241 guide, and is affected by choice of variant | |
242 model selected by using or not using the | |
243 "--diploid" option. | |
244 SomaticQuality Equal to VQHIGH for all somatic mutations where | |
245 SomaticScore >= -10. Otherwise, this column is | |
246 empty. | |
247 | |
248 OPTIONS | |
249 -h [ --help ] | |
250 Print this help message. | |
251 | |
252 --reference arg | |
253 The input crr file. | |
254 | |
255 --variantsA arg | |
256 The "A" input variant file. | |
257 | |
258 --variantsB arg | |
259 The "B" input variant file. | |
260 | |
261 --output-prefix arg | |
262 The path prefix for all output reports. | |
263 | |
264 --reports arg (=SuperlocusOutput,SuperlocusStats,LocusOutput,LocusStats) | |
265 Comma-separated list of reports to generate. (Beware any reports whose | |
266 name begins with "Debug".) A report is one of: | |
267 SuperlocusOutput Report for superlocus classification. | |
268 SuperlocusStats Report for superlocus classification stats. | |
269 LocusOutput Report for locus classification. | |
270 LocusStats Report for locus stats. | |
271 VariantOutput Both variant files annotated by comparison | |
272 results.If the somatic output report is | |
273 requested, file A is also annotated with the | |
274 same score ranks as produced in that report. | |
275 SomaticOutput Report for the list of simple variations that | |
276 are present only in file "A", annotated with | |
277 the score that indicates the probability of | |
278 the variation being truly somatic. Requires | |
279 beta, genome-rootA, and genome-rootB options | |
280 to be provided as well. Note: generating this | |
281 report slows calldiff by 10x-20x. | |
282 DebugCallOutput Report for call classification. | |
283 DebugSuperlocusOutput Report for debug superlocus information. | |
284 DebugSomaticOutput Report for distribution estimates used for | |
285 somatic rescoring. Only produced if | |
286 SomaticOutput is also turned on. | |
287 | |
288 --diploid | |
289 Uses varScoreEAF instead of varScoreVAF in somatic score computations. | |
290 Also, uses diploid variant model instead of variable allele mixture | |
291 model. | |
292 | |
293 --locus-stats-column-count arg (=15) | |
294 The number of columns for locus compare classification in the locus | |
295 stats file. | |
296 | |
297 --max-hypothesis-count arg (=32) | |
298 The maximum number of possible phasings to consider for a superlocus. | |
299 | |
300 --no-reference-cover-validation | |
301 Turns off validation that all bases of a chromosome are covered by | |
302 calls of the variant file. | |
303 | |
304 --genome-rootA arg | |
305 The "A" genome directory, for example /data/GS00118-DNA_A01; this | |
306 directory is expected to contain ASM/REF and ASM/EVIDENCE | |
307 subdirectories. | |
308 | |
309 --genome-rootB arg | |
310 The "B" genome directory. | |
311 | |
312 --calibration-root arg | |
313 The directory containing calibration data. For example, there should | |
314 exist a file calibration-root/0.0.0/metrics.tsv. | |
315 | |
316 --beta | |
317 This flag enables the SomaticOutput report, which is beta | |
318 functionality. | |
319 | |
320 SUPPORTED FORMAT_VERSION | |
321 0.3 or later | |
322 </help> | |
323 </tool> |