Mercurial > repos > artbio > manta
changeset 5:f55d45b0c6d1 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/manta commit 86427647db100383faa432008b58e768b56ac416"
| author | artbio | 
|---|---|
| date | Tue, 09 Jun 2020 06:23:39 -0400 | 
| parents | d09254e37c68 | 
| children | cb5691381acb | 
| files | customConfigManta.py customized.ini manta.xml manta_macros.xml test-data/candidateSV.vcf.gz test-data/candidateSmallIndels.vcf.gz test-data/somaticSV.vcf.gz | 
| diffstat | 7 files changed, 137 insertions(+), 125 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/customConfigManta.py Tue Jun 09 06:23:39 2020 -0400 @@ -0,0 +1,80 @@ +import argparse + + +def Parser(): + the_parser = argparse.ArgumentParser() + the_parser.add_argument( + '--minCandidateVariantSize', type=int, default=8, + help="Run Manta reporting for all SVs/indels at or above this size") + the_parser.add_argument( + '--rnaMinCandidateVariantSize', type=int, default=1000, + help="Separate option (to provide different default) used for \ + runs in RNA-mode") + the_parser.add_argument( + '--minEdgeObservations', type=int, default=3, + help="Remove all edges from the graph unless they're supported \ + by this many 'observations'") + the_parser.add_argument( + '--graphNodeMaxEdgeCount', type=int, default=10, + help="If both nodes of an edge have an edge count higher than this, \ + then skip evaluation of the edge") + the_parser.add_argument( + '--minCandidateSpanningCount', type=int, default=3, + help="Run discovery and candidate reporting for all SVs/indels with \ + at least this many spanning support observations") + the_parser.add_argument( + '--minScoredVariantSize', type=int, default=50, + help="After candidate identification, only score and report \ + SVs/indels at or above this size") + the_parser.add_argument( + '--minDiploidVariantScore', type=int, default=10, + help="minimum VCF QUAL score for a variant to be included in \ + the diploid vcf") + the_parser.add_argument( + '--minPassDiploidVariantScore', type=int, default=20, + help="VCF QUAL score below which a variant is marked as \ + filtered in the diploid vcf") + the_parser.add_argument( + '--minPassDiploidGTScore', type=int, default=15, + help="minimum genotype quality score below which single samples \ + are filtered for a variant in the diploid vcf") + the_parser.add_argument( + '--minSomaticScore', type=int, default=10, + help="minimum VCF QUAL score for a variant to be included in the \ + diploid vcf") + the_parser.add_argument( + '--minPassSomaticScore', type=int, default=30, + help="somatic quality scores below this level are filtered in the \ + somatic vcf") + the_parser.add_argument( + '--enableRemoteReadRetrievalForInsertionsInGermlineCallingModes', + type=int, default=1, + help="includes tumor-normal subtraction and tumor-only calling") + the_parser.add_argument( + '--enableRemoteReadRetrievalForInsertionsInCancerCallingModes', + type=int, default=0, + help="GermlineCallingModes includes all other calling modes") + the_parser.add_argument( + '--useOverlapPairEvidence', type=int, default=0, + help="Set 1 if an overlapping read pair will be considered as \ + evidence. Set to 0 to skip overlapping read pairs") + args = the_parser.parse_args() + return args + + +if __name__ == "__main__": + args = Parser() + # recover arguments as a dictionary with keys = argument name and values + # are argument values + argsDict = args.__dict__ + ini_lines = [] + # implement first, hard-coded ini lines + ini_lines.append('[manta]') + ini_lines.append('referenceFasta = /dummy/path/to/genome.fa') + # implement the rest of the ini lines for the argsDict + for argument in argsDict: + ini_lines.append("%s = %s" % (argument, str(argsDict[argument]))) + # print ini_lines in configManta.py.ini + handler = open('configManta.py.ini', 'w') + for line in ini_lines: + handler.write("%s\n" % line)
--- a/customized.ini Mon Jun 08 03:11:56 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ - -# -# This section contains all configuration settings for the top-level manta workflow, -# -[manta] - -referenceFasta = /illumina/development/Isis/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa - -# Run discovery and candidate reporting for all SVs/indels at or above this size -# Separate option (to provide different default) used for runs in RNA-mode -minCandidateVariantSize = 8 -rnaMinCandidateVariantSize = 1000 - -# Remove all edges from the graph unless they're supported by this many 'observations'. -# Note that one supporting read pair or split read usually equals one observation, but evidence is sometimes downweighted. -minEdgeObservations = 3 - -# If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge. -# Set to 0 to turn this filtration off -graphNodeMaxEdgeCount = 10 - -# Run discovery and candidate reporting for all SVs/indels with at least this -# many spanning support observations -minCandidateSpanningCount = 3 - -# After candidate identification, only score and report SVs/indels at or above this size: -minScoredVariantSize = 50 - -# minimum VCF "QUAL" score for a variant to be included in the diploid vcf: -minDiploidVariantScore = 10 - -# VCF "QUAL" score below which a variant is marked as filtered in the diploid vcf: -minPassDiploidVariantScore = 20 - -# minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf: -minPassDiploidGTScore = 15 - -# somatic quality scores below this level are not included in the somatic vcf: -minSomaticScore = 10 - -# somatic quality scores below this level are filtered in the somatic vcf: -minPassSomaticScore = 30 - -# Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote -# locations with poor mapping quality, which pair to confidently mapping reads near the insertion locus. These reads -# can help to fully assemble longer insertions, under certain circumstances this feature can add a very large runtime -# burden. For instance, given the very high chimeric pair rates found in degraded FFPE samples, the runtime of the read -# retrieval process can be unpredicable. For this reason the feature is disabled by default for somatic variant calling. -# This feature can be enabled/disabled separately for germline and cancer calling below. -# -# Here "CancerCallingModes" includes tumor-normal subtraction and tumor-only calling. "GermlineCallingModes" includes -# all other calling modes. -enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1 -enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0 - -# Set if an overlapping read pair will be considered as evidence -# Set to 0 to skip overlapping read pairs -useOverlapPairEvidence = 0
--- a/manta.xml Mon Jun 08 03:11:56 2020 -0400 +++ b/manta.xml Tue Jun 09 06:23:39 2020 -0400 @@ -24,26 +24,27 @@ ln -s '$bam_input.tumor_bam_file.metadata.bam_index' tumor.bai && #end if - #if str( $set_configuration.set_configuration_switch ) == "Customized": - sed -i 's/minCandidateVariantSize = 8/minCandidateVariantSize = $set_configuration.minCandidateVariantSize/' ./configManta.py.ini && - sed -i 's/rnaMinCandidateVariantSize = 1000/rnaMinCandidateVariantSize = $set_configuration.rnaMinCandidateVariantSize/' ./configManta.py.ini && - sed -i 's/minEdgeObservations = 3/minEdgeObservations = $set_configuration.minEdgeObservations/' ./configManta.py.ini && - sed -i 's/graphNodeMaxEdgeCount = 10/graphNodeMaxEdgeCount = $set_configuration.graphNodeMaxEdgeCount/' ./configManta.py.ini && - sed -i 's/minCandidateSpanningCount = 3/minCandidateSpanningCount = $set_configuration.minCandidateSpanningCount/' ./configManta.py.ini && - sed -i 's/minScoredVariantSize = 50/minScoredVariantSize = $set_configuration.minScoredVariantSize/' ./configManta.py.ini && - sed -i 's/minDiploidVariantScore = 10/minDiploidVariantScore = $set_configuration.minDiploidVariantScore/' ./configManta.py.ini && - sed -i 's/minPassDiploidVariantScore = 20/minPassDiploidVariantScore = $set_configuration.minPassDiploidVariantScore/' ./configManta.py.ini && - sed -i 's/minPassDiploidGTScore = 15/minPassDiploidGTScore = $set_configuration.minPassDiploidGTScore/' ./configManta.py.ini && - sed -i 's/minSomaticScore = 10/minSomaticScore = $set_configuration.minSomaticScore/' ./configManta.py.ini && - sed -i 's/minPassSomaticScore = 30/minPassSomaticScore = $set_configuration.minPassSomaticScore/' ./configManta.py.ini && - sed -i 's/enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1/enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = $set_configuration.enableRemoteReadRetrievalForInsertionsInGermlineCallingModes/' ./configManta.py.ini && - sed -i 's/enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0/enableRemoteReadRetrievalForInsertionsInCancerCallingModes = $set_configuration.enableRemoteReadRetrievalForInsertionsInCancerCallingModes/' ./configManta.py.ini && - sed -i 's/useOverlapPairEvidence = 0/useOverlapPairEvidence = $set_configuration.useOverlapPairEvidence/' ./configManta.py.ini && - #end if - #if str( $set_configuration.set_configuration_switch ) == "Custom_config_file": cp '$set_configuration.CustomConfigFile' ./configManta.py.ini && #end if + #if str( $set_configuration.set_configuration_switch ) == "Customized": + rm ./configManta.py.ini && + python $__tool_directory__/customConfigManta.py + --minCandidateVariantSize '$set_configuration.minCandidateVariantSize' + --rnaMinCandidateVariantSize '$set_configuration.rnaMinCandidateVariantSize' + --minEdgeObservations '$set_configuration.minEdgeObservations' + --graphNodeMaxEdgeCount '$set_configuration.graphNodeMaxEdgeCount' + --minCandidateSpanningCount '$set_configuration.minCandidateSpanningCount' + --minScoredVariantSize '$set_configuration.minScoredVariantSize' + --minDiploidVariantScore '$set_configuration.minDiploidVariantScore' + --minPassDiploidVariantScore '$set_configuration.minPassDiploidVariantScore' + --minPassDiploidGTScore '$set_configuration.minPassDiploidGTScore' + --minSomaticScore '$set_configuration.minSomaticScore' + --minPassSomaticScore '$set_configuration.minPassSomaticScore' + --enableRemoteReadRetrievalForInsertionsInGermlineCallingModes '$set_configuration.enableRemoteReadRetrievalForInsertionsInGermlineCallingModes' + --enableRemoteReadRetrievalForInsertionsInCancerCallingModes '$set_configuration.enableRemoteReadRetrievalForInsertionsInCancerCallingModes' + --useOverlapPairEvidence '$set_configuration.useOverlapPairEvidence' && + #end if configManta.py --referenceFasta='${reference_fasta_filename}' --config='./configManta.py.ini' @@ -53,51 +54,39 @@ --bam='normal.bam' --tumorBam='tumor.bam' #end if - --runDir='${run_dir}' - --scanSizeMb=${advanced.scanSizeMb} - --callMemMb=${advanced.callMemMb} && + --runDir='${run_dir}' + --scanSizeMb=${advanced.scanSizeMb} + --callMemMb=${advanced.callMemMb} && - ln -s -f '${run_dir}/runWorkflow.py' '${run_manta_workflow}' && - ln -s -f './configManta.py.ini' '${set_conf_file}' && python2 '${run_dir}/runWorkflow.py' -m local -j \${GALAXY_SLOTS:-4} ]]></command> <inputs> <expand macro="reference_source_conditional" /> - <conditional name="bam_input"> - <param name="bam_input_selector" type="select" label="Just 'normal' BAM file or 'normal' + 'tumor' BAM files" help="Select between a single normal BAM file or a pair of normal / tumor BAM files"> + <param name="bam_input_selector" type="select" label="Single 'normal' or 'normal vs tumor' analysis" help="Select between a single normal BAM file or a pair of normal/tumor BAM files"> <option value="not_tumor_bam">Normal</option> <option value="tumor_bam">Normal + Tumor</option> </param> - <when value="not_tumor_bam"> <param name="normal_bam_file" type="data" format="bam" label="select normal BAM" help="Select the files you wish to send to Manta (normal sample, it must be in BAM format)." /> </when> - <when value='tumor_bam'> <param name="normal_bam_file" type="data" format="bam" label="select normal BAM" help="Select the files you wish to send to Manta (normal sample, it must be in BAM format)." /> <param name="tumor_bam_file" type="data" format="bam" label="select tumor BAM" help="Select the files you wish to send to Manta (tumor sample, it must be in BAM format)." /> </when> </conditional> - <param name="additional_param" type="select" multiple="true" display="checkboxes" label="Additional parameters" > <option value="exome">Set options for WES input: turn off depth filters</option> <option value="rna">Set options for RNA-Seq input. Must specify exactly one bam input file</option> <option value="unstrandedRNA">Set if RNA-Seq input is unstranded: Allows splice-junctions on either strand</option> </param> - <section name="advanced" title="Advanced options" expanded="false"> - <param name="callMemMb" type="integer" value="8000" label="Set default task memory requirements" help="The maximum memory size to assign to tasks" /> <param name="scanSizeMb" type="integer" value="12" label="Set maximum sequence region size" help="The maximum sequence region size (in megabases) scanned by each task during SV Locus graph generation. (default: 12)" /> <!-- <param name="generateEvidenceBam" type="boolean" checked="False" truevalue="-s" falsevalue="" label="Generate a bam of supporting reads for all SVs" help="Click yes for generating a BAM of supporting reads for all SVs."/> --> - </section> - - <!-- <expand macro="manta_configuration"/> --> - <conditional name="set_configuration"> <param name="set_configuration_switch" type="select" label="Do you want to change default configuration settings?"> <option value="Default_config_file">Default Manta Configuration File</option> @@ -126,42 +115,32 @@ <param name="useOverlapPairEvidence" type="integer" value="0" label="useOverlapPairEvidence" help="Set if an overlapping read pair will be considered as evidence. Set this value <= 0 to skip overlapping read pairs."/> </when> </conditional> - - <param name="runworkflow_file_check" type="boolean" label="output manta run_workflow file" checked="False" help="Show run_workflow file on history"/> <param name="config_file_check" type="boolean" label="output conf file" checked="False" help="Show configuration file on history"/> - <param name="candidateSV_check" type="boolean" label="Unscored candidate SV and indels" checked="False" - help="Show unfiltered structural variants"/> - <param name="candidateSmallIndels_check" type="boolean" label="all snvs" checked="False" - help="Subset of the Unscored candidate SV and indels, containing only simple insertion and deletion variants"/> - <param name="diploidSV_check" type="boolean" label="filtered variants in diploid model" checked="False" + <param name="candidateSV_check" type="boolean" label="Unfiltered structural variants" checked="False" + help="All unscored structural variant candidates"/> + <param name="candidateSmallIndels_check" type="boolean" label="Unfiltered small indel candidates" checked="False" + help="Subset of the unscored candidates, containing only small indel variants"/> + <param name="diploidSV_check" type="boolean" label="Score-filtered variants in diploid model" checked="False" help="Show filtered variants in a diploid (only normal) model. In the case of a tumor/normal subtraction, the scores in this file *do not* reflect any information from the tumor sample" /> - <param name="somaticSV_check" type="boolean" label="SVs and indels scored under a somatic variant model" checked="False" - help="This file will only be produced if a tumor sample alignment file is supplied during configuration"/> </inputs> - <outputs> - <data format="txt" name="run_manta_workflow" label="Parameters for running Manta"> - <filter>runworkflow_file_check == True</filter> - </data> - - <data format="tabular" name="set_conf_file" label="conf_file.ini"> + <data format="tabular" name="conf_file" label="conf_file.ini" from_work_dir="./configManta.py.ini"> <filter>config_file_check == True</filter> </data> - <data format="vcf_bgzip" name="candidateSV" label="Manta unfiltered SVs" from_work_dir="MantaWorkflow/results/variants/candidateSV.vcf.gz"> + <data format="vcf_bgzip" name="candidateSV" label="Manta unfiltered variants" from_work_dir="MantaWorkflow/results/variants/candidateSV.vcf.gz"> <filter>candidateSV_check == True</filter> </data> - <data format="vcf_bgzip" name="candidateSmallIndels" label="Manta unfiltered Small Indels" from_work_dir="MantaWorkflow/results/variants/candidateSmallIndels.vcf.gz"> + <data format="vcf_bgzip" name="candidateSmallIndels" label="Manta unfiltered indels" from_work_dir="MantaWorkflow/results/variants/candidateSmallIndels.vcf.gz"> <filter>candidateSmallIndels_check == True</filter> </data> - <data format="vcf_bgzip" name="diploidSV" label="Manta SVs (diploid model)" from_work_dir="MantaWorkflow/results/variants/diploidSV.vcf.gz"> + <data format="vcf_bgzip" name="diploidSV" label="Score-filtered Variants (diploid model)" from_work_dir="MantaWorkflow/results/variants/diploidSV.vcf.gz"> <filter>diploidSV_check == True</filter> </data> - <data format="vcf_bgzip" name="somaticSV" label="Manta SVs (somatic model)" from_work_dir="MantaWorkflow/results/variants/somaticSV.vcf.gz"> - <filter>somaticSV_check == True</filter> + <data format="vcf_bgzip" name="somaticSV" label="Score-filtered Variants (somatic model)" from_work_dir="MantaWorkflow/results/variants/somaticSV.vcf.gz"> + <filter>bam_input['bam_input_selector'] == 'tumor_bam'</filter> </data> </outputs> - <tests> <test> <param name="reference_source_selector" value="cached"/> @@ -172,11 +151,22 @@ <param name="set_configuration_switch" value="Default_config_file"/> <param name="callMemMb" value="1000"/> <param name="candidateSmallIndels_check" value="True"/> - <param name="somaticSV_check" value="True"/> - <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="4"/> - <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="4"/> + <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/> + <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/> </test> - <test> + <test> + <param name="reference_source_selector" value="cached"/> + <param name="index" value="hg19"/> + <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/> + <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/> + <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/> + <param name="set_configuration_switch" value="Customized"/> + <param name="callMemMb" value="1000"/> + <param name="candidateSmallIndels_check" value="True"/> + <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/> + <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/> + </test> + <test> <param name="reference_source_selector" value="cached"/> <param name="index" value="hg19"/> <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/> @@ -185,7 +175,8 @@ <param name="set_configuration_switch" value="Default_config_file"/> <param name="callMemMb" value="1000"/> <param name="candidateSmallIndels_check" value="True"/> - <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="4"/> + <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/> + <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/> </test> <test> <param name="reference_source_selector" value="history"/> @@ -196,7 +187,8 @@ <param name="set_configuration_switch" value="Default_config_file"/> <param name="callMemMb" value="1000"/> <param name="candidateSV_check" value="True"/> - <output name="candidateSV" file="candidateSV.vcf.gz" decompress="true" lines_diff="4"/> + <output name="candidateSV" file="candidateSV.vcf.gz" decompress="true" lines_diff="6"/> + <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/> </test> <test> <param name="reference_source_selector" value="history"/> @@ -207,10 +199,10 @@ <param name="set_configuration_switch" value="Default_config_file"/> <param name="callMemMb" value="1000"/> <param name="candidateSmallIndels_check" value="True"/> - <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="4"/> + <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/> + <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/> </test> - </tests> - + </tests> <help><![CDATA[ **Outputs** The primary Manta outputs are a set of VCF 4.1 files. Currently there are 3 VCF files @@ -333,10 +325,8 @@ For further info see: https://github.com/Illumina/manta - ]]></help> - + ]]></help> <citations> <citation type="doi">10.1093/bioinformatics/btv710</citation> </citations> - </tool>
--- a/manta_macros.xml Mon Jun 08 03:11:56 2020 -0400 +++ b/manta_macros.xml Tue Jun 09 06:23:39 2020 -0400 @@ -1,7 +1,7 @@ <macros> <token name="@VERSION@">1.6</token> - <token name="@WRAPPER_VERSION@">@VERSION@+galaxy6</token> + <token name="@WRAPPER_VERSION@">@VERSION@+galaxy7</token> <token name="@pipefail@"><![CDATA[set -o | grep -q pipefail && set -o pipefail;]]></token> <token name="@set_reference_fasta_filename@"><