diff manta.xml @ 0:42ba283a0fe2 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/manta commit e6c5d87dcd848fc4910af968e73adc481c811d15"
author artbio
date Wed, 13 May 2020 15:15:07 -0400
parents
children d648e40c6da9
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/manta.xml	Wed May 13 15:15:07 2020 -0400
@@ -0,0 +1,324 @@
+<tool id="manta" name="Manta" version="@WRAPPER_VERSION@">
+
+    <description>Manta calls structural variants (SVs) and indels from mapped paired-end sequencing reads.</description>
+
+    <macros>
+        <import>manta_macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+
+    <command detect_errors="exit_code"><![CDATA[
+    @VERSION@
+    @pipefail@
+    @set_reference_fasta_filename@
+
+    #import os
+    #import random
+    #set job_dir=os.getcwd()
+    #set run_dir = job_dir + '/MantaWorkflow_' + (' ' + str(random.randint(1,100000))).strip()
+    #set config_file = $__tool_directory__ + '/configManta.py.ini'
+    #set config_file_custom = $__tool_directory__ + '/customized.ini'   
+    #set $input_normal = 'normal.bam'
+    #set $input_tumor = 'tumor.bam'
+
+    #if str( $bam_input.bam_input_selector ) == "not_tumor_bam":
+    ln -s '$bam_input.normal_bam_file' $input_normal &&
+    ln -s '$bam_input.normal_bam_file.metadata.bam_index' normal.bai &&
+    #else if str( $bam_input.bam_input_selector ) == "tumor_bam":
+    ln -s '$bam_input.normal_bam_file' $input_normal &&
+    ln -s '$bam_input.normal_bam_file.metadata.bam_index' normal.bai &&
+    ln -s '$bam_input.tumor_bam_file' $input_tumor &&
+    ln -s '$bam_input.tumor_bam_file.metadata.bam_index' tumor.bai &&
+    #end if
+
+    cp ${config_file} ${config_file_custom} &&
+
+    #if str( $set_configuration.set_configuration_switch ) == "Customized":
+    sed -i 's/minCandidateVariantSize = 8/minCandidateVariantSize = $set_configuration.minCandidateVariantSize/' ${config_file_custom} &&
+    sed -i 's/rnaMinCandidateVariantSize = 1000/rnaMinCandidateVariantSize = $set_configuration.rnaMinCandidateVariantSize/' ${config_file_custom} &&
+    sed -i 's/minEdgeObservations = 3/minEdgeObservations = $set_configuration.minEdgeObservations/' ${config_file_custom} &&
+    sed -i 's/graphNodeMaxEdgeCount = 10/graphNodeMaxEdgeCount = $set_configuration.graphNodeMaxEdgeCount/' ${config_file_custom} &&
+    sed -i 's/minCandidateSpanningCount = 3/minCandidateSpanningCount = $set_configuration.minCandidateSpanningCount/' ${config_file_custom} &&
+    sed -i 's/minScoredVariantSize = 50/minScoredVariantSize = $set_configuration.minScoredVariantSize/' ${config_file_custom} &&
+    sed -i 's/minDiploidVariantScore = 10/minDiploidVariantScore = $set_configuration.minDiploidVariantScore/' ${config_file_custom} &&
+    sed -i 's/minPassDiploidVariantScore = 20/minPassDiploidVariantScore = $set_configuration.minPassDiploidVariantScore/' ${config_file_custom} &&
+    sed -i 's/minPassDiploidGTScore = 15/minPassDiploidGTScore = $set_configuration.minPassDiploidGTScore/' ${config_file_custom} &&
+    sed -i 's/minSomaticScore = 10/minSomaticScore = $set_configuration.minSomaticScore/' ${config_file_custom} &&
+    sed -i 's/minPassSomaticScore = 30/minPassSomaticScore = $set_configuration.minPassSomaticScore/' ${config_file_custom} &&
+    sed -i 's/enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1/enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = $set_configuration.enableRemoteReadRetrievalForInsertionsInGermlineCallingModes/' ${config_file_custom} &&
+    sed -i 's/enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0/enableRemoteReadRetrievalForInsertionsInCancerCallingModes = $set_configuration.enableRemoteReadRetrievalForInsertionsInCancerCallingModes/' ${config_file_custom} &&
+    sed -i 's/useOverlapPairEvidence = 0/useOverlapPairEvidence = $set_configuration.useOverlapPairEvidence/' ${config_file_custom} &&
+    #end if
+
+    configManta.py
+    --referenceFasta='${reference_fasta_filename}'
+
+    #if str( $set_configuration.set_configuration_switch ) == "Custom_config_file":
+    #set config_file = $set_configuration.CustomConfigFile
+    #else if str( $set_configuration.set_configuration_switch ) == "Customized":
+    #set config_file = config_file_custom
+    #end if
+
+    --config=${config_file}
+
+    #if str( $bam_input.bam_input_selector ) == "not_tumor_bam":
+    --bam=$input_normal
+    #else if str( $bam_input.bam_input_selector ) == "tumor_bam":
+    --bam=$input_normal
+    --tumorBam=$input_tumor
+    #end if
+
+    --runDir='${run_dir}'
+    --scanSizeMb=${advanced.scanSizeMb}
+    --callMemMb=${advanced.callMemMb} &&
+
+    ln -s -f '${run_dir}/runWorkflow.py' '${run_manta_workflow}' &&
+    ln -s -f '${config_file}' '${set_conf_file}' &&
+    python2 '${run_dir}/runWorkflow.py' -m local -j 8 &&
+    ln -s -f '${run_dir}/results/variants/candidateSV.vcf.gz' '${out_vcf1}' &&
+    ln -s -f '${run_dir}/results/variants/diploidSV.vcf.gz' '${out_vcf2}' &&
+    ln -s -f '${run_dir}/results/variants/candidateSmallIndels.vcf.gz' '${out_vcf3}'
+
+    ]]></command>
+
+    <inputs>
+        <expand macro="reference_source_conditional" />
+
+        <conditional name="bam_input">
+            <param name="bam_input_selector" type="select" label="Just 'normal' BAM file or 'normal' + 'tumor' BAM files" help="Select between a single normal BAM file or a pair of normal / tumor BAM files">
+                <option value="not_tumor_bam">Normal</option>
+                <option value="tumor_bam">Normal + Tumor</option>
+            </param>
+
+            <when value="not_tumor_bam">
+                <param name="normal_bam_file" type="data" format="bam" label="select normal BAM" help="Select the files you wish to send to Manta (normal sample, it must be in BAM format)." />
+            </when>
+
+            <when value='tumor_bam'>
+                <param name="normal_bam_file" type="data" format="bam" label="select normal BAM" help="Select the files you wish to send to Manta (normal sample, it must be in BAM format)." />
+                <param name="tumor_bam_file" type="data" format="bam" label="select tumor BAM" help="Select the files you wish to send to Manta (tumor sample, it must be in BAM format)." />
+            </when>
+        </conditional>
+
+        <param name="additional_param" type="select" multiple="true" display="checkboxes" label="Additional outputs" help="Additional parameters.">
+            <option value="exome">Set options for WES input: turn off depth filters</option>
+            <option value="rna">Set options for RNA-Seq input. Must specify exactly one bam input file</option>
+            <option value="unstrandedRNA">Set if RNA-Seq input is unstranded: Allows splice-junctions on either strand</option>
+        </param>
+
+        <section name="advanced" title="Advanced options" expanded="false">
+
+            <param name="callMemMb" type="integer" value="8000" label="Set default task memory requirements" help="The maximum memory size to assign to tasks" />
+            <param name="scanSizeMb" type="integer" value="12" label="Set maximum sequence region size" help="The maximum sequence region size (in megabases) scanned by each task during SV Locus graph generation. (default: 12)" />
+            <param name="retainTempFiles" type="boolean" checked="False" truevalue="-s" falsevalue="" label="Keep all temporary files" help="Click yes so all temporary files (for workflow debugging) will be kept."/>
+            <param name="generateEvidenceBam" type="boolean" checked="False" truevalue="-s" falsevalue="" label="Generate a bam of supporting reads for all SVs" help="Click yes for generating a BAM of supporting reads for all SVs."/>
+
+        </section>
+
+        <!-- <expand macro="manta_configuration"/> -->
+
+        <conditional name="set_configuration">
+            <param name="set_configuration_switch" type="select" label="Do you want to change default configuration settings?">
+                <option value="Default_config_file">Default</option>    
+                <option value="Custom_config_file">Upload a different config file</option>
+                <option value="Customized">Customize the options</option>
+            </param>
+            <when value="Default_config_file">
+            </when>
+            <when value="Custom_config_file">
+                <param format="ini" name="CustomConfigFile" type="data" label="config file"/>
+            </when>
+            <when value="Customized">
+                <param name="minCandidateVariantSize" type="integer" value="8" label="minCandidateVariantSize" help="Run discovery and candidate reporting for all SVs/indels at or above this size."/>
+                <param name="rnaMinCandidateVariantSize" type="integer" value="1000" label="rnaMinCandidateVariantSize" help="Separate option (to provide different default) used for runs in RNA-mode."/>
+                <param name="minEdgeObservations" type="integer" value="3" label="minEdgeObservations" help="Remove all edges from the graph unless they're supported by this many 'observations'."/>
+                <param name="graphNodeMaxEdgeCount" type="integer" value="10" label="graphNodeMaxEdgeCount" help="If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge."/>
+                <param name="minCandidateSpanningCount" type="integer" value="3" label="minCandidateSpanningCount" help="Run discovery and candidate reporting for all SVs/indels with at least this many spanning support observations."/>
+                <param name="minScoredVariantSize" type="integer" value="50" label="minScoredVariantSize" help="After candidate identification, only score and report SVs/indels at or above this size."/>
+                <param name="minDiploidVariantScore" type="integer" value="10" label="minDiploidVariantScore" help="Minimum VCF 'QUAL' score for a variant to be included in the diploid vcf."/>
+                <param name="minPassDiploidVariantScore" type="integer" value="20" label="minPassDiploidVariantScore" help="VCF 'QUAL' score below which a variant is marked as filtered in the diploid vcf."/>
+                <param name="minPassDiploidGTScore" type="integer" value="15" label="minPassDiploidGTScore" help="Minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf."/>
+                <param name="minSomaticScore" type="integer" value="10" label="minSomaticScore" help="Somatic quality scores below this level are not included in the somatic vcf."/>
+                <param name="minPassSomaticScore" type="integer" value="30" label="minPassSomaticScore" help="Somatic quality scores below this level are filtered in the somatic vcf."/>
+                <param name="enableRemoteReadRetrievalForInsertionsInGermlineCallingModes" type="integer" value="1" label="enableRemoteReadRetrievalForInsertionsInGermlineCallingModes" help="Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote locations with poor mapping quality. This feature can be enabled/disabled separately for germline and cancer calling below."/>
+                <param name="enableRemoteReadRetrievalForInsertionsInCancerCallingModes" type="integer" value="0" label="enableRemoteReadRetrievalForInsertionsInCancerCallingModes" help="Here 'CancerCallingModes' includes tumor-normal subtraction and tumor-only calling. 'GermlineCallingModes' includes all other calling modes."/>
+                <param name="useOverlapPairEvidence" type="integer" value="0" label="useOverlapPairEvidence" help="Set if an overlapping read pair will be considered as evidence. Set this value &lt;= 0 to skip overlapping read pairs."/>
+            </when>
+        </conditional>
+
+        <param name="runworkflow_file_check" type="boolean" label="output manta run_workflow file" checked="False" help="Show run_workflow file on history"/>
+        <param name="config_file_check" type="boolean" label="output conf file" checked="False" help="Show configuration file on history"/>
+        <param name="O1_check" type="boolean" label="snvs filtred" checked="False" help="Show filtred snvs"/>
+        <param name="O2_check" type="boolean" label="indels filtred" checked="False" help="Show filtred indels"/>
+        <param name="O3_check" type="boolean" label="all snvs" checked="False" help="Show snvs"/>
+        
+    </inputs>
+
+    <outputs>
+
+        <data format="txt" name="run_manta_workflow" label="Parameters for running Manta">
+            <filter>runworkflow_file_check == True</filter>
+        </data>
+
+        <data format="tabular" name="set_conf_file" label="conf_file.ini">
+            <filter>config_file_check == True</filter>
+        </data>
+        <data format="vcf_bgzip" name="out_vcf1" label="${tool.name} on ${on_string} (Generating the candidateSV.vcf file)" from_work_dir="MantaWorkflow/results/variants/candidateSV.vcf.gz">
+            <filter>O1_check == True</filter>
+        </data>
+        <data format="vcf_bgzip" name="out_vcf2" label="${tool.name} on ${on_string} (Generating the diploidSV.vcf file)" from_work_dir="MantaWorkflow/results/variants/diploidSV.vcf.gz">
+            <filter>O2_check == True</filter>
+        </data>
+        <data format="vcf_bgzip" name="out_vcf3" label="${tool.name} on ${on_string} (Generating the candidateSmallIndels.vcf file)" from_work_dir="MantaWorkflow/results/variants/candidateSmallIndels.vcf.gz">
+            <filter>O3_check == True</filter>
+        </data>
+    </outputs>
+
+    <tests>
+                <test>
+                        <conditional name="reference_source">
+                                <param name="reference_source_selector" value="history"/>
+                                <param name="ref_file" ftype="fasta" value="hg19_region.fa"/>
+                        </conditional>
+
+                        <conditional name="bam_input">
+                                <param name="bam_input_selector" value="tumor_bam"/>
+                                <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/>
+                                <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/>
+                        </conditional>
+
+                        <conditional name="set_configuration">
+                                <param name="set_configuration_switch" value="Default_config_file"/>
+                        </conditional>
+                        <param name="callMemMb" value="1000"/>
+                        <param name="O1_check" value="True"/>
+                        <output name="out_vcf1" file="candidateSV.vcf.gz" decompress="true" lines_diff="4"/>
+                </test>
+                <test>
+                        <conditional name="reference_source">
+                                <param name="reference_source_selector" value="history"/>
+                                <param name="ref_file" ftype="fasta" value="hg19_region.fa"/>
+                        </conditional>
+
+                        <conditional name="bam_input">
+                                <param name="bam_input_selector" value="tumor_bam"/>
+                                <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/>
+                                <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/>
+                        </conditional>
+
+                        <conditional name="set_configuration">
+                                <param name="set_configuration_switch" value="Default_config_file"/>
+                        </conditional>
+                        <param name="callMemMb" value="1000"/>
+                        <param name="O3_check" value="True"/>
+                        <output name="out_vcf3" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="4"/>
+                </test>
+                <test>
+                        <conditional name="reference_source">
+                                <param name="reference_source_selector" value="cached"/>
+                                <param name="index" value="hg19"/>
+                        </conditional>
+
+                        <conditional name="bam_input">
+                                <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/>
+                                <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/>
+                                <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/>
+                        </conditional>
+
+                        <conditional name="set_configuration">
+                                <param name="set_configuration_switch" value="Default_config_file"/>
+                        </conditional>
+                        <param name="callMemMb" value="1000"/>
+                        <param name="O3_check" value="True"/>
+                        <output name="out_vcf3" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="4"/>
+                </test>
+        </tests>
+        
+    <help><![CDATA[
+**Manta**
+This script configures the Manta SV analysis pipeline.
+You must specify a BAM or CRAM file for at least one sample.
+Configuration will produce a workflow run script which
+can execute the workflow on a single node or through
+sge and resume any interrupted execution.
+
+**Options**
+  --version             show program's version number and exit
+  -h, --help            show this help message and exit
+  --config=FILE         provide a configuration file to override defaults in
+                        global config file (/home/lpanunzi/Desktop/Hackaton_GC
+                        C2019/manta_sv/manta/bin/configManta.py.ini)
+  --allHelp             show all extended/hidden options
+**Workflow options**
+    --bam=FILE, --normalBam=FILE
+                        Normal sample BAM or CRAM file. May be specified more
+                        than once, multiple inputs will be treated as each BAM
+                        file representing a different sample. [optional] (no
+                        default)
+    --tumorBam=FILE, --tumourBam=FILE
+                        Tumor sample BAM or CRAM file. Only up to one tumor
+                        bam file accepted. [optional] (no default)
+    --exome             Set options for WES input: turn off depth filters
+    --rna               Set options for RNA-Seq input. Must specify exactly
+                        one bam input file
+    --unstrandedRNA     Set if RNA-Seq input is unstranded: Allows splice-
+                        junctions on either strand
+    --referenceFasta=FILE
+                        samtools-indexed reference fasta file [required]
+    --runDir=DIR        Name of directory to be created where all workflow
+                        scripts and output will be written. Each analysis
+                        requires a separate directory. (default:
+                        MantaWorkflow)
+    --callRegions=FILE  Optionally provide a bgzip-compressed/tabix-indexed
+                        BED file containing the set of regions to call. No VCF
+                        output will be provided outside of these regions. The
+                        full genome will still be used to estimate statistics
+                        from the input (such as expected fragment size
+                        distribution). Only one BED file may be specified.
+                        (default: call the entire genome)
+**Extended options**
+    These options are either unlikely to be reset after initial site
+    configuration or only of interest for workflow development/debugging.
+    They will not be printed here if a default exists unless --allHelp is
+    specified
+    --existingAlignStatsFile=FILE
+                        Pre-calculated alignment statistics file. Skips
+                        alignment stats calculation.
+    --useExistingChromDepths
+                        Use pre-calculated chromosome depths.
+    --candidateBins=candidateBins
+                        Provide the total number of tasks which candidate
+                        generation  will be sub-divided into. (default: 256)
+    --retainTempFiles   Keep all temporary files (for workflow debugging)
+    --generateEvidenceBam
+                        Generate a bam of supporting reads for all SVs
+    --outputContig      Output assembled contig sequences in VCF file
+    --scanSizeMb=INT    Maximum sequence region size (in megabases) scanned by
+                        each task during SV Locus graph generation. (default:
+                        12)
+    --region=REGION     Limit the analysis to a region of the genome for
+                        debugging purposes. If this argument is provided
+                        multiple times all specified regions will be analyzed
+                        together. All regions must be non-overlapping to get a
+                        meaningful result. Examples: '--region chr20' (whole
+                        chromosome), '--region chr2:100-2000 --region
+                        chr3:2500-3000' (two regions)'. If this option is
+                        specified (one or more times) together with the
+                        --callRegions BED file, then all region arguments will
+                        be intersected with the callRegions BED track.
+    --callMemMb=INT     Set default task memory requirement (in megabytes) for
+                        common tasks. This may benefit an analysis of unusual
+                        depth, chimera rate, etc.. 'Common' tasks refers to
+                        most compute intensive scatter-phase tasks of graph
+                        creation and candidate generation.
+
+    For further info see: https://github.com/Illumina/manta
+
+    ]]></help>
+
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btv710</citation>
+    </citations>
+
+</tool>