Mercurial > repos > saskia-hiltemann > cgatools_v17
diff tools/cgatools17/calldiff_v17.xml @ 1:3a2e0f376f26 draft
Minor change to tv2vcf.xml to allow for workflow automation
author | dgdekoning |
---|---|
date | Wed, 21 Oct 2015 10:09:15 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/cgatools17/calldiff_v17.xml Wed Oct 21 10:09:15 2015 -0400 @@ -0,0 +1,323 @@ +<tool id="cg_calldiff" name="CallDiff" version="1.7.1"> + + <description>Compares two Complete Genomics variant files.</description> + + <requirements> + <requirement type="package" version="1">cgatools17</requirement> + </requirements> + + <command> <!--run executable--> + cgatools | head -1; + cgatools calldiff + --beta + --reference ${crr.fields.crr_path} + --variantsA $inputA + --variantsB $inputB + $validation + $diploid + --locus-stats-column-count $column + --max-hypothesis-count $hypothesis + --output-prefix cg_ + --reports `echo ${report1} ${report2} ${report3} ${report4} ${report5} | sed 's/ */,/g'` + </command> + + <inputs> + <!--form field to select crr file--> + <param name="crr" type="select" label="Reference Build"> + <options from_data_table="cg_anno_files" /> + </param> + + <!-- input files --> + <param name="inputA" type="data" format="cg_var,tabular" label="Var file A"></param> + <param name="inputB" type="data" format="cg_var,tabular" label="Var file B"></param> + + <!-- reports --> + <param name="report1" type="select" label="Create report SuperlocusOutput"> + <option value="">no</option> + <option value="SuperlocusOutput">yes</option> + </param> + <param name="report2" type="select" label="Create report SuperlocusStats"> + <option value="">no</option> + <option value="SuperlocusStats">yes</option> + </param> + <param name="report3" type="select" label="Create report LocusOutput"> + <option value="">no</option> + <option value="LocusOutput">yes</option> + </param> + <param name="report4" type="select" label="Create report LocusStats"> + <option value="">no</option> + <option value="LocusStats">yes</option> + </param> + <param name="report5" type="select" label="Create report VariantOutput" help="Both variant files annotated by comparison results.If the somatic output report is requested, file A is also annotated with the same score ranks as produced in that report."> + <option value="VariantOutput">yes</option> + <option value="">no</option> + </param> + + <!-- parameters --> + <param name="diploid" type="select" label="Use diploid variant model" help="Uses varScoreEAF instead of varScoreVAF in somatic score computations. Also, uses diploid variant model instead of variable allele mixture model."> + <option value="">no</option> + <option value="--diploid">yes</option> + </param> + + <param name="column" type="integer" label="Number of columns for locus compare classification in the locus stats file (default 15)" value="15"/> + + <param name="hypothesis" type="integer" label="Maximum number of possible phasings to consider for a superlocus (default 32)" value="32"/> + + <param name="validation" type="select" label="Reference cover validation" help="Turns on/off validation that all bases of a chromosome are covered by calls of the variant file."> + <option value="">on</option> + <option value="--no-reference-cover-validation">off</option> + </param> + + <!-- prefix for output file so you dont have to manually rename history items --> + <param name="fname" type="text" value="" label="Prefix for your output file" help="Optional"/> + </inputs> + + <outputs> + <data format="tabular" name="output1" from_work_dir="cg_SuperlocusOutput.tsv" label="$fname ${tool.name} on ${on_string}: SuperlocusOutput"> + <filter>(report1 == 'SuperlocusOutput')</filter> + </data> + <data format="tabular" name="output2" from_work_dir="cg_SuperlocusStats.tsv" label="$fname ${tool.name} on ${on_string}: SuperlocusStats"> + <filter>(report2 == 'SuperlocusStats')</filter> + </data> + <data format="tabular" name="output3" from_work_dir="cg_LocusOutput.tsv" label="$fname ${tool.name} on ${on_string}: LocusOutput"> + <filter>(report3 == 'LocusOutput')</filter> + </data> + <data format="tabular" name="output4" from_work_dir="cg_LocusStats.tsv" label="$fname ${tool.name} on ${on_string}: LocusStats"> + <filter>(report4 == 'LocusStats')</filter> + </data> + <data format="tabular" name="output5a" from_work_dir="cg_VariantsA.tsv" label="$fname ${tool.name} on ${on_string}: VariantsA"> + <filter>(report5 == 'VariantOutput')</filter> + </data> + <data format="tabular" name="output5b" from_work_dir="cg_VariantsB.tsv" label="$fname ${tool.name} on ${on_string}: VariantsB"> + <filter>(report5 == 'VariantOutput')</filter> + </data> + </outputs> + + <tests> + <test> + <param name="inputA" value="HCC1187_T_chr22.tsv" /> + <param name="inputA" value="HCC1187_N_chr22.tsv" /> + <param name="crr" value="hg18" /> + <param name="report1" value="SuperlocusOutput" /> + <param name="report2" value="" /> + <param name="report3" value="" /> + <param name="report4" value="" /> + <param name="report5" value="" /> + <param name="diploid" value="" /> + <param name="column" value="15" /> + <param name="hypothesis" value="" /> + <output name="output1" file="HCC1187_chr22_SuperLocusOutput.tsv" /> + </test> + </tests> + + <help> +**What it does** + +This tool compares two Complete Genomics variant files. + +**cgatools 1.7.1 Documentation** + +Userguide: http://cgatools.sourceforge.net/docs/1.7.1/cgatools-user-guide.pdf + +Release notes: http://cgatools.sourceforge.net/docs/1.7.1/cgatools-release-notes.pdf + +**Command line reference**:: + + COMMAND NAME + calldiff - Compares two Complete Genomics variant files. + + DESCRIPTION + Compares two Complete Genomics variant files. Divides the genome up into + superloci of nearby variants, then compares the superloci. Also refines the + comparison to determine per-call or per-locus comparison results. + + Comparison results are usually described by a semi-colon separated string, + one per allele. Each allele's comparison result is one of the following + classifications: + + ref-identical The alleles of the two variant files are identical, and + they are consistent with the reference. + alt-identical The alleles of the two variant files are identical, and + they are inconsistent with the reference. + ref-consistent The alleles of the two variant files are consistent, + and they are consistent with the reference. + alt-consistent The alleles of the two variant files are consistent, + and they are inconsistent with the reference. + onlyA The alleles of the two variant files are inconsistent, + and only file A is inconsistent with the reference. + onlyB The alleles of the two variant files are inconsistent, + and only file B is inconsistent with the reference. + mismatch The alleles of the two variant files are inconsistent, + and they are both inconsistent with the reference. + phase-mismatch The two variant files would be consistent if the + hapLink field had been empty, but they are + inconsistent. + ploidy-mismatch The superlocus did not have uniform ploidy. + + In some contexts, this classification is rolled up into a simplified + classification, which is one of "identical", "consistent", "onlyA", + "onlyB", or "mismatch". + + A good place to start looking at the results is the superlocus-output file. + It has columns defined as follows: + + SuperlocusId An identifier given to the superlocus. + Chromosome The name of the chromosome. + Begin The 0-based offset of the start of the superlocus. + End The 0-based offset of the base one past the end of the + superlocus. + Classification The match classification of the superlocus. + Reference The reference sequence. + AllelesA A semicolon-separated list of the alleles (one per + haplotype) for variant file A, for the phasing with the + best comparison result. + AllelesB A semicolon-separated list of the alleles (one per + haplotype) for variant file B, for the phasing with the + best comparison result. + + The locus-output file contains, for each locus in file A and file B that is + not consistent with the reference, an annotated set of calls for the locus. + The calls are annotated with the following columns: + + SuperlocusId The id of the superlocus containing the locus. + File The variant file (A or B). + LocusClassification The locus classification is determined by the + varType column of the call that is inconsistent + with the reference, concatenated with a + modifier that describes whether the locus is + heterozygous, homozygous, or contains no-calls. + If there is no one variant in the locus (i.e., + it is heterozygous alt-alt), the locus + classification begins with "other". + LocusDiffClassification The match classification for the locus. This is + defined to be the best of the comparison of the + locus to the same region in the other file, or + the comparison of the superlocus. + + The somatic output file contains a list of putative somatic variations of + genome A. The output includes only those loci that can be classified as + snp, del, ins or sub in file A, and are called reference in the file B. + Every locus is annotated with the following columns: + + VarCvgA The totalReadCount from file A for this locus + (computed on the fly if file A is not a + masterVar file). + VarScoreA The varScoreVAF from file A, or varScoreEAF if + the "--diploid" option is used. + RefCvgB The maximum of the uniqueSequenceCoverage + values for the locus in genome B. + RefScoreB Minimum of the reference scores of the locus in + genome B. + SomaticCategory The category used for determining the + calibrated scores and the SomaticRank. + VarScoreACalib The calibrated variant score of file A, under + the model selected by using or not using the + "--diploid" option, and corrected for the count + of heterozygous variants observed in this + genome. See user guide for more information. + VarScoreBCalib The calibrated reference score of file B, under + the model selected by using or not using the + "--diploid" option, and corrected for the count + of heterozygous variants observed in this + genome. See user guide for more information. + SomaticRank The estimated rank of this somatic mutation, + amongst all true somatic mutations within this + SomaticCategory. The value is a number between + 0 and 1; a value of 0.012 means, for example, + that an estimated 1.2% of the true somatic + mutations in this somaticCategory have a + somaticScore less than the somaticScore for + this mutation. See user guide for more + information. + SomaticScore An integer that provides a total order on + quality for all somatic mutations. It is equal + to -10*log10( P(false)/P(true) ), under the + assumption that this genome has a rate of + somatic mutation equal to 1/Mb for + SomaticCategory snp, 1/10Mb for SomaticCategory + ins, 1/10Mb for SomaticCategory del, and 1/20Mb + for SomaticCategory sub. The computation is + based on the assumptions described in the user + guide, and is affected by choice of variant + model selected by using or not using the + "--diploid" option. + SomaticQuality Equal to VQHIGH for all somatic mutations where + SomaticScore >= -10. Otherwise, this column is + empty. + + OPTIONS + -h [ --help ] + Print this help message. + + --reference arg + The input crr file. + + --variantsA arg + The "A" input variant file. + + --variantsB arg + The "B" input variant file. + + --output-prefix arg + The path prefix for all output reports. + + --reports arg (=SuperlocusOutput,SuperlocusStats,LocusOutput,LocusStats) + Comma-separated list of reports to generate. (Beware any reports whose + name begins with "Debug".) A report is one of: + SuperlocusOutput Report for superlocus classification. + SuperlocusStats Report for superlocus classification stats. + LocusOutput Report for locus classification. + LocusStats Report for locus stats. + VariantOutput Both variant files annotated by comparison + results.If the somatic output report is + requested, file A is also annotated with the + same score ranks as produced in that report. + SomaticOutput Report for the list of simple variations that + are present only in file "A", annotated with + the score that indicates the probability of + the variation being truly somatic. Requires + beta, genome-rootA, and genome-rootB options + to be provided as well. Note: generating this + report slows calldiff by 10x-20x. + DebugCallOutput Report for call classification. + DebugSuperlocusOutput Report for debug superlocus information. + DebugSomaticOutput Report for distribution estimates used for + somatic rescoring. Only produced if + SomaticOutput is also turned on. + + --diploid + Uses varScoreEAF instead of varScoreVAF in somatic score computations. + Also, uses diploid variant model instead of variable allele mixture + model. + + --locus-stats-column-count arg (=15) + The number of columns for locus compare classification in the locus + stats file. + + --max-hypothesis-count arg (=32) + The maximum number of possible phasings to consider for a superlocus. + + --no-reference-cover-validation + Turns off validation that all bases of a chromosome are covered by + calls of the variant file. + + --genome-rootA arg + The "A" genome directory, for example /data/GS00118-DNA_A01; this + directory is expected to contain ASM/REF and ASM/EVIDENCE + subdirectories. + + --genome-rootB arg + The "B" genome directory. + + --calibration-root arg + The directory containing calibration data. For example, there should + exist a file calibration-root/0.0.0/metrics.tsv. + + --beta + This flag enables the SomaticOutput report, which is beta + functionality. + + SUPPORTED FORMAT_VERSION + 0.3 or later + </help> +</tool>