changeset 5:f55d45b0c6d1 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/manta commit 86427647db100383faa432008b58e768b56ac416"
author artbio
date Tue, 09 Jun 2020 06:23:39 -0400 (2020-06-09)
parents d09254e37c68
children cb5691381acb
files customConfigManta.py customized.ini manta.xml manta_macros.xml test-data/candidateSV.vcf.gz test-data/candidateSmallIndels.vcf.gz test-data/somaticSV.vcf.gz
diffstat 7 files changed, 137 insertions(+), 125 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/customConfigManta.py	Tue Jun 09 06:23:39 2020 -0400
@@ -0,0 +1,80 @@
+import argparse
+
+
+def Parser():
+    the_parser = argparse.ArgumentParser()
+    the_parser.add_argument(
+        '--minCandidateVariantSize', type=int, default=8,
+        help="Run Manta reporting for all SVs/indels at or above this size")
+    the_parser.add_argument(
+        '--rnaMinCandidateVariantSize', type=int, default=1000,
+        help="Separate option (to provide different default) used for \
+              runs in RNA-mode")
+    the_parser.add_argument(
+        '--minEdgeObservations', type=int, default=3,
+        help="Remove all edges from the graph unless they're supported \
+              by this many 'observations'")
+    the_parser.add_argument(
+        '--graphNodeMaxEdgeCount', type=int, default=10,
+        help="If both nodes of an edge have an edge count higher than this, \
+              then skip evaluation of the edge")
+    the_parser.add_argument(
+        '--minCandidateSpanningCount', type=int, default=3,
+        help="Run discovery and candidate reporting for all SVs/indels with \
+              at least this many spanning support observations")
+    the_parser.add_argument(
+        '--minScoredVariantSize', type=int, default=50,
+        help="After candidate identification, only score and report \
+              SVs/indels at or above this size")
+    the_parser.add_argument(
+        '--minDiploidVariantScore', type=int, default=10,
+        help="minimum VCF QUAL score for a variant to be included in \
+              the diploid vcf")
+    the_parser.add_argument(
+        '--minPassDiploidVariantScore', type=int, default=20,
+        help="VCF QUAL score below which a variant is marked as \
+              filtered in the diploid vcf")
+    the_parser.add_argument(
+        '--minPassDiploidGTScore', type=int, default=15,
+        help="minimum genotype quality score below which single samples \
+              are filtered for a variant in the diploid vcf")
+    the_parser.add_argument(
+        '--minSomaticScore', type=int, default=10,
+        help="minimum VCF QUAL score for a variant to be included in the \
+              diploid vcf")
+    the_parser.add_argument(
+        '--minPassSomaticScore', type=int, default=30,
+        help="somatic quality scores below this level are filtered in the \
+              somatic vcf")
+    the_parser.add_argument(
+        '--enableRemoteReadRetrievalForInsertionsInGermlineCallingModes',
+        type=int, default=1,
+        help="includes tumor-normal subtraction and tumor-only calling")
+    the_parser.add_argument(
+        '--enableRemoteReadRetrievalForInsertionsInCancerCallingModes',
+        type=int, default=0,
+        help="GermlineCallingModes includes all other calling modes")
+    the_parser.add_argument(
+        '--useOverlapPairEvidence', type=int, default=0,
+        help="Set 1 if an overlapping read pair will be considered as \
+              evidence. Set to 0 to skip overlapping read pairs")
+    args = the_parser.parse_args()
+    return args
+
+
+if __name__ == "__main__":
+    args = Parser()
+    # recover arguments as a dictionary with keys = argument name and values
+    # are argument values
+    argsDict = args.__dict__
+    ini_lines = []
+    # implement first, hard-coded ini lines
+    ini_lines.append('[manta]')
+    ini_lines.append('referenceFasta = /dummy/path/to/genome.fa')
+    # implement the rest of the ini lines for the argsDict
+    for argument in argsDict:
+        ini_lines.append("%s = %s" % (argument, str(argsDict[argument])))
+    # print ini_lines in configManta.py.ini
+    handler = open('configManta.py.ini', 'w')
+    for line in ini_lines:
+        handler.write("%s\n" % line)
--- a/customized.ini	Mon Jun 08 03:11:56 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-
-#
-# This section contains all configuration settings for the top-level manta workflow,
-#
-[manta]
-
-referenceFasta = /illumina/development/Isis/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa
-
-# Run discovery and candidate reporting for all SVs/indels at or above this size
-# Separate option (to provide different default) used for runs in RNA-mode
-minCandidateVariantSize = 8
-rnaMinCandidateVariantSize = 1000
-
-# Remove all edges from the graph unless they're supported by this many 'observations'.
-# Note that one supporting read pair or split read usually equals one observation, but evidence is sometimes downweighted.
-minEdgeObservations = 3
-
-# If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge.
-# Set to 0 to turn this filtration off
-graphNodeMaxEdgeCount = 10
-
-# Run discovery and candidate reporting for all SVs/indels with at least this
-# many spanning support observations
-minCandidateSpanningCount = 3
-
-# After candidate identification, only score and report SVs/indels at or above this size:
-minScoredVariantSize = 50
-
-# minimum VCF "QUAL" score for a variant to be included in the diploid vcf:
-minDiploidVariantScore = 10
-
-# VCF "QUAL" score below which a variant is marked as filtered in the diploid vcf:
-minPassDiploidVariantScore = 20
-
-# minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf:
-minPassDiploidGTScore = 15
-
-# somatic quality scores below this level are not included in the somatic vcf:
-minSomaticScore = 10
-
-# somatic quality scores below this level are filtered in the somatic vcf:
-minPassSomaticScore = 30
-
-# Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote
-# locations with poor mapping quality, which pair to confidently mapping reads near the insertion locus. These reads
-# can help to fully assemble longer insertions, under certain circumstances this feature can add a very large runtime
-# burden. For instance, given the very high chimeric pair rates found in degraded FFPE samples, the runtime of the read
-# retrieval process can be unpredicable. For this reason the feature is disabled by default for somatic variant calling.
-# This feature can be enabled/disabled separately for germline and cancer calling below.
-#
-# Here "CancerCallingModes" includes tumor-normal subtraction and tumor-only calling. "GermlineCallingModes" includes
-# all other calling modes.
-enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1
-enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0
-
-# Set if an overlapping read pair will be considered as evidence
-# Set to 0 to skip overlapping read pairs
-useOverlapPairEvidence = 0
--- a/manta.xml	Mon Jun 08 03:11:56 2020 -0400
+++ b/manta.xml	Tue Jun 09 06:23:39 2020 -0400
@@ -24,26 +24,27 @@
     ln -s '$bam_input.tumor_bam_file.metadata.bam_index' tumor.bai &&
     #end if
 
-    #if str( $set_configuration.set_configuration_switch ) == "Customized":
-        sed -i 's/minCandidateVariantSize = 8/minCandidateVariantSize = $set_configuration.minCandidateVariantSize/' ./configManta.py.ini &&
-        sed -i 's/rnaMinCandidateVariantSize = 1000/rnaMinCandidateVariantSize = $set_configuration.rnaMinCandidateVariantSize/' ./configManta.py.ini &&
-        sed -i 's/minEdgeObservations = 3/minEdgeObservations = $set_configuration.minEdgeObservations/' ./configManta.py.ini &&
-        sed -i 's/graphNodeMaxEdgeCount = 10/graphNodeMaxEdgeCount = $set_configuration.graphNodeMaxEdgeCount/' ./configManta.py.ini &&
-        sed -i 's/minCandidateSpanningCount = 3/minCandidateSpanningCount = $set_configuration.minCandidateSpanningCount/' ./configManta.py.ini &&
-        sed -i 's/minScoredVariantSize = 50/minScoredVariantSize = $set_configuration.minScoredVariantSize/' ./configManta.py.ini &&
-        sed -i 's/minDiploidVariantScore = 10/minDiploidVariantScore = $set_configuration.minDiploidVariantScore/' ./configManta.py.ini &&
-        sed -i 's/minPassDiploidVariantScore = 20/minPassDiploidVariantScore = $set_configuration.minPassDiploidVariantScore/' ./configManta.py.ini &&
-        sed -i 's/minPassDiploidGTScore = 15/minPassDiploidGTScore = $set_configuration.minPassDiploidGTScore/' ./configManta.py.ini &&
-        sed -i 's/minSomaticScore = 10/minSomaticScore = $set_configuration.minSomaticScore/' ./configManta.py.ini &&
-        sed -i 's/minPassSomaticScore = 30/minPassSomaticScore = $set_configuration.minPassSomaticScore/' ./configManta.py.ini &&
-        sed -i 's/enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1/enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = $set_configuration.enableRemoteReadRetrievalForInsertionsInGermlineCallingModes/' ./configManta.py.ini &&
-        sed -i 's/enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0/enableRemoteReadRetrievalForInsertionsInCancerCallingModes = $set_configuration.enableRemoteReadRetrievalForInsertionsInCancerCallingModes/' ./configManta.py.ini &&
-        sed -i 's/useOverlapPairEvidence = 0/useOverlapPairEvidence = $set_configuration.useOverlapPairEvidence/' ./configManta.py.ini &&
-    #end if
-
     #if str( $set_configuration.set_configuration_switch ) == "Custom_config_file":
         cp '$set_configuration.CustomConfigFile' ./configManta.py.ini &&
     #end if
+    #if str( $set_configuration.set_configuration_switch ) == "Customized":
+        rm ./configManta.py.ini &&
+        python $__tool_directory__/customConfigManta.py
+        --minCandidateVariantSize '$set_configuration.minCandidateVariantSize'
+        --rnaMinCandidateVariantSize '$set_configuration.rnaMinCandidateVariantSize'
+        --minEdgeObservations '$set_configuration.minEdgeObservations'
+        --graphNodeMaxEdgeCount '$set_configuration.graphNodeMaxEdgeCount'
+        --minCandidateSpanningCount '$set_configuration.minCandidateSpanningCount'
+        --minScoredVariantSize '$set_configuration.minScoredVariantSize'
+        --minDiploidVariantScore '$set_configuration.minDiploidVariantScore'
+        --minPassDiploidVariantScore '$set_configuration.minPassDiploidVariantScore'
+        --minPassDiploidGTScore '$set_configuration.minPassDiploidGTScore'
+        --minSomaticScore '$set_configuration.minSomaticScore'
+        --minPassSomaticScore '$set_configuration.minPassSomaticScore'
+        --enableRemoteReadRetrievalForInsertionsInGermlineCallingModes '$set_configuration.enableRemoteReadRetrievalForInsertionsInGermlineCallingModes'
+        --enableRemoteReadRetrievalForInsertionsInCancerCallingModes '$set_configuration.enableRemoteReadRetrievalForInsertionsInCancerCallingModes'
+        --useOverlapPairEvidence '$set_configuration.useOverlapPairEvidence' &&
+    #end if
     
     configManta.py --referenceFasta='${reference_fasta_filename}'
                    --config='./configManta.py.ini'
@@ -53,51 +54,39 @@
                        --bam='normal.bam'
                        --tumorBam='tumor.bam'
                    #end if
-                  --runDir='${run_dir}'
-                  --scanSizeMb=${advanced.scanSizeMb}
-                  --callMemMb=${advanced.callMemMb} &&
+                      --runDir='${run_dir}'
+                      --scanSizeMb=${advanced.scanSizeMb}
+                      --callMemMb=${advanced.callMemMb} &&
 
-    ln -s -f '${run_dir}/runWorkflow.py' '${run_manta_workflow}' &&
-    ln -s -f './configManta.py.ini' '${set_conf_file}' &&
     python2 '${run_dir}/runWorkflow.py' -m local -j \${GALAXY_SLOTS:-4}
 
     ]]></command>
 
     <inputs>
         <expand macro="reference_source_conditional" />
-
         <conditional name="bam_input">
-            <param name="bam_input_selector" type="select" label="Just 'normal' BAM file or 'normal' + 'tumor' BAM files" help="Select between a single normal BAM file or a pair of normal / tumor BAM files">
+            <param name="bam_input_selector" type="select" label="Single 'normal' or 'normal vs tumor' analysis" help="Select between a single normal BAM file or a pair of normal/tumor BAM files">
                 <option value="not_tumor_bam">Normal</option>
                 <option value="tumor_bam">Normal + Tumor</option>
             </param>
-
             <when value="not_tumor_bam">
                 <param name="normal_bam_file" type="data" format="bam" label="select normal BAM" help="Select the files you wish to send to Manta (normal sample, it must be in BAM format)." />
             </when>
-
             <when value='tumor_bam'>
                 <param name="normal_bam_file" type="data" format="bam" label="select normal BAM" help="Select the files you wish to send to Manta (normal sample, it must be in BAM format)." />
                 <param name="tumor_bam_file" type="data" format="bam" label="select tumor BAM" help="Select the files you wish to send to Manta (tumor sample, it must be in BAM format)." />
             </when>
         </conditional>
-
         <param name="additional_param" type="select" multiple="true" display="checkboxes" label="Additional parameters" >
             <option value="exome">Set options for WES input: turn off depth filters</option>
             <option value="rna">Set options for RNA-Seq input. Must specify exactly one bam input file</option>
             <option value="unstrandedRNA">Set if RNA-Seq input is unstranded: Allows splice-junctions on either strand</option>
         </param>
-
         <section name="advanced" title="Advanced options" expanded="false">
-
             <param name="callMemMb" type="integer" value="8000" label="Set default task memory requirements" help="The maximum memory size to assign to tasks" />
             <param name="scanSizeMb" type="integer" value="12" label="Set maximum sequence region size" help="The maximum sequence region size (in megabases) scanned by each task during SV Locus graph generation. (default: 12)" />
             <!-- <param name="generateEvidenceBam" type="boolean" checked="False" truevalue="-s" falsevalue="" label="Generate a bam of supporting reads for all SVs" help="Click yes for generating a BAM of supporting reads for all SVs."/> -->
-
         </section>
-
-        <!-- <expand macro="manta_configuration"/> -->
-
         <conditional name="set_configuration">
             <param name="set_configuration_switch" type="select" label="Do you want to change default configuration settings?">
                 <option value="Default_config_file">Default Manta Configuration File</option>    
@@ -126,42 +115,32 @@
                 <param name="useOverlapPairEvidence" type="integer" value="0" label="useOverlapPairEvidence" help="Set if an overlapping read pair will be considered as evidence. Set this value &lt;= 0 to skip overlapping read pairs."/>
             </when>
         </conditional>
-
-        <param name="runworkflow_file_check" type="boolean" label="output manta run_workflow file" checked="False" help="Show run_workflow file on history"/>
         <param name="config_file_check" type="boolean" label="output conf file" checked="False" help="Show configuration file on history"/>
-        <param name="candidateSV_check" type="boolean" label="Unscored candidate SV and indels" checked="False"
-               help="Show unfiltered structural variants"/>
-        <param name="candidateSmallIndels_check" type="boolean" label="all snvs" checked="False"
-               help="Subset of the Unscored candidate SV and indels, containing only simple insertion and deletion variants"/>
-        <param name="diploidSV_check" type="boolean" label="filtered variants in diploid model" checked="False"
+        <param name="candidateSV_check" type="boolean" label="Unfiltered structural variants" checked="False"
+               help="All unscored structural variant candidates"/>
+        <param name="candidateSmallIndels_check" type="boolean" label="Unfiltered small indel candidates" checked="False"
+               help="Subset of the unscored candidates, containing only small indel variants"/>
+        <param name="diploidSV_check" type="boolean" label="Score-filtered variants in diploid model" checked="False"
                help="Show filtered variants in a diploid (only normal) model. In the case of a tumor/normal subtraction, the scores in this file *do not*
                      reflect any information from the tumor sample" />
-        <param name="somaticSV_check" type="boolean" label="SVs and indels scored under a somatic variant model" checked="False"
-               help="This file will only be produced if a tumor sample alignment file is supplied during configuration"/>
     </inputs>
-
     <outputs>
-        <data format="txt" name="run_manta_workflow" label="Parameters for running Manta">
-            <filter>runworkflow_file_check == True</filter>
-        </data>
-
-        <data format="tabular" name="set_conf_file" label="conf_file.ini">
+        <data format="tabular" name="conf_file" label="conf_file.ini" from_work_dir="./configManta.py.ini">
             <filter>config_file_check == True</filter>
         </data>
-        <data format="vcf_bgzip" name="candidateSV" label="Manta unfiltered SVs" from_work_dir="MantaWorkflow/results/variants/candidateSV.vcf.gz">
+        <data format="vcf_bgzip" name="candidateSV" label="Manta unfiltered variants" from_work_dir="MantaWorkflow/results/variants/candidateSV.vcf.gz">
             <filter>candidateSV_check == True</filter>
         </data>
-        <data format="vcf_bgzip" name="candidateSmallIndels" label="Manta unfiltered Small Indels" from_work_dir="MantaWorkflow/results/variants/candidateSmallIndels.vcf.gz">
+        <data format="vcf_bgzip" name="candidateSmallIndels" label="Manta unfiltered indels" from_work_dir="MantaWorkflow/results/variants/candidateSmallIndels.vcf.gz">
             <filter>candidateSmallIndels_check == True</filter>
         </data>
-        <data format="vcf_bgzip" name="diploidSV" label="Manta SVs (diploid model)" from_work_dir="MantaWorkflow/results/variants/diploidSV.vcf.gz">
+        <data format="vcf_bgzip" name="diploidSV" label="Score-filtered Variants (diploid model)" from_work_dir="MantaWorkflow/results/variants/diploidSV.vcf.gz">
             <filter>diploidSV_check == True</filter>
         </data>
-        <data format="vcf_bgzip" name="somaticSV" label="Manta SVs (somatic model)" from_work_dir="MantaWorkflow/results/variants/somaticSV.vcf.gz">
-            <filter>somaticSV_check == True</filter>
+        <data format="vcf_bgzip" name="somaticSV" label="Score-filtered Variants (somatic model)" from_work_dir="MantaWorkflow/results/variants/somaticSV.vcf.gz">
+            <filter>bam_input['bam_input_selector'] == 'tumor_bam'</filter>
         </data>
     </outputs>
-
     <tests>
         <test>
                 <param name="reference_source_selector" value="cached"/>
@@ -172,11 +151,22 @@
                 <param name="set_configuration_switch" value="Default_config_file"/>
                 <param name="callMemMb" value="1000"/>
                 <param name="candidateSmallIndels_check" value="True"/>
-                <param name="somaticSV_check" value="True"/>
-                <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="4"/>
-                <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="4"/>
+                <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/>
+                <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/>
         </test>
-        <test>
+         <test>
+                <param name="reference_source_selector" value="cached"/>
+                <param name="index" value="hg19"/>
+                <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/>
+                <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/>
+                <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/>
+                <param name="set_configuration_switch" value="Customized"/>
+                <param name="callMemMb" value="1000"/>
+                <param name="candidateSmallIndels_check" value="True"/>
+                <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/>
+                <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/>
+        </test>
+       <test>
                 <param name="reference_source_selector" value="cached"/>
                 <param name="index" value="hg19"/>
                 <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/>
@@ -185,7 +175,8 @@
                 <param name="set_configuration_switch" value="Default_config_file"/>
                 <param name="callMemMb" value="1000"/>
                 <param name="candidateSmallIndels_check" value="True"/>
-                <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="4"/>
+                <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/>
+                <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/>
         </test>
         <test>
                 <param name="reference_source_selector" value="history"/>
@@ -196,7 +187,8 @@
                 <param name="set_configuration_switch" value="Default_config_file"/>
                 <param name="callMemMb" value="1000"/>
                 <param name="candidateSV_check" value="True"/>
-                <output name="candidateSV" file="candidateSV.vcf.gz" decompress="true" lines_diff="4"/>
+                <output name="candidateSV" file="candidateSV.vcf.gz" decompress="true" lines_diff="6"/>
+                <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/>
         </test>
         <test>
                 <param name="reference_source_selector" value="history"/>
@@ -207,10 +199,10 @@
                 <param name="set_configuration_switch" value="Default_config_file"/>
                 <param name="callMemMb" value="1000"/>
                 <param name="candidateSmallIndels_check" value="True"/>
-                <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="4"/>
+                <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/>
+                <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/>
         </test>
- </tests>
-
+    </tests>
     <help><![CDATA[
 **Outputs**
   The primary Manta outputs are a set of VCF 4.1 files. Currently there are 3 VCF files
@@ -333,10 +325,8 @@
 
     For further info see: https://github.com/Illumina/manta
 
-    ]]></help>
-
+  ]]></help>
     <citations>
         <citation type="doi">10.1093/bioinformatics/btv710</citation>
     </citations>
-
 </tool>
--- a/manta_macros.xml	Mon Jun 08 03:11:56 2020 -0400
+++ b/manta_macros.xml	Tue Jun 09 06:23:39 2020 -0400
@@ -1,7 +1,7 @@
 <macros>
 
     <token name="@VERSION@">1.6</token>
-    <token name="@WRAPPER_VERSION@">@VERSION@+galaxy6</token>
+    <token name="@WRAPPER_VERSION@">@VERSION@+galaxy7</token>
     <token name="@pipefail@"><![CDATA[set -o | grep -q pipefail && set -o pipefail;]]></token>
 
     <token name="@set_reference_fasta_filename@"><![CDATA[
Binary file test-data/candidateSV.vcf.gz has changed
Binary file test-data/candidateSmallIndels.vcf.gz has changed
Binary file test-data/somaticSV.vcf.gz has changed