view manta.xml @ 3:d648e40c6da9 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/manta commit 3b3617515f0e59c35849939c26e4891f02c4de0b"
author artbio
date Sun, 07 Jun 2020 16:43:54 -0400
parents 42ba283a0fe2
children d09254e37c68
line wrap: on
line source

<tool id="manta" name="Manta" version="@WRAPPER_VERSION@">

    <description>Manta calls structural variants (SVs) and indels from mapped paired-end sequencing reads.</description>

    <macros>
        <import>manta_macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="stdio"/>

    <command detect_errors="exit_code"><![CDATA[
    @VERSION@
    @pipefail@
    @set_reference_fasta_filename@
    #set run_dir = './MantaWorkflow'
    cp $__tool_directory__/configManta.py.ini configManta.py.ini &&
    #if str( $bam_input.bam_input_selector ) == "not_tumor_bam":
    ln -s '$bam_input.normal_bam_file' normal.bam &&
    ln -s '$bam_input.normal_bam_file.metadata.bam_index' normal.bai &&
    #else if str( $bam_input.bam_input_selector ) == "tumor_bam":
    ln -s '$bam_input.normal_bam_file' normal.bam &&
    ln -s '$bam_input.normal_bam_file.metadata.bam_index' normal.bai &&
    ln -s '$bam_input.tumor_bam_file' tumor.bam &&
    ln -s '$bam_input.tumor_bam_file.metadata.bam_index' tumor.bai &&
    #end if

    #if str( $set_configuration.set_configuration_switch ) == "Customized":
        sed -i 's/minCandidateVariantSize = 8/minCandidateVariantSize = $set_configuration.minCandidateVariantSize/' ./configManta.py.ini &&
        sed -i 's/rnaMinCandidateVariantSize = 1000/rnaMinCandidateVariantSize = $set_configuration.rnaMinCandidateVariantSize/' ./configManta.py.ini &&
        sed -i 's/minEdgeObservations = 3/minEdgeObservations = $set_configuration.minEdgeObservations/' ./configManta.py.ini &&
        sed -i 's/graphNodeMaxEdgeCount = 10/graphNodeMaxEdgeCount = $set_configuration.graphNodeMaxEdgeCount/' ./configManta.py.ini &&
        sed -i 's/minCandidateSpanningCount = 3/minCandidateSpanningCount = $set_configuration.minCandidateSpanningCount/' ./configManta.py.ini &&
        sed -i 's/minScoredVariantSize = 50/minScoredVariantSize = $set_configuration.minScoredVariantSize/' ./configManta.py.ini &&
        sed -i 's/minDiploidVariantScore = 10/minDiploidVariantScore = $set_configuration.minDiploidVariantScore/' ./configManta.py.ini &&
        sed -i 's/minPassDiploidVariantScore = 20/minPassDiploidVariantScore = $set_configuration.minPassDiploidVariantScore/' ./configManta.py.ini &&
        sed -i 's/minPassDiploidGTScore = 15/minPassDiploidGTScore = $set_configuration.minPassDiploidGTScore/' ./configManta.py.ini &&
        sed -i 's/minSomaticScore = 10/minSomaticScore = $set_configuration.minSomaticScore/' ./configManta.py.ini &&
        sed -i 's/minPassSomaticScore = 30/minPassSomaticScore = $set_configuration.minPassSomaticScore/' ./configManta.py.ini &&
        sed -i 's/enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1/enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = $set_configuration.enableRemoteReadRetrievalForInsertionsInGermlineCallingModes/' ./configManta.py.ini &&
        sed -i 's/enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0/enableRemoteReadRetrievalForInsertionsInCancerCallingModes = $set_configuration.enableRemoteReadRetrievalForInsertionsInCancerCallingModes/' ./configManta.py.ini &&
        sed -i 's/useOverlapPairEvidence = 0/useOverlapPairEvidence = $set_configuration.useOverlapPairEvidence/' ./configManta.py.ini &&
    #end if

    #if str( $set_configuration.set_configuration_switch ) == "Custom_config_file":
        cp '$set_configuration.CustomConfigFile' ./configManta.py.ini &&
    #end if
    
    configManta.py --referenceFasta='${reference_fasta_filename}'
                   --config='./configManta.py.ini'
                   #if str( $bam_input.bam_input_selector ) == "not_tumor_bam":
                       --bam='normal.bam'
                   #else if str( $bam_input.bam_input_selector ) == "tumor_bam":
                       --bam='normal.bam'
                       --tumorBam='tumor.bam'
                   #end if
                  --runDir='${run_dir}'
                  --scanSizeMb=${advanced.scanSizeMb}
                  --callMemMb=${advanced.callMemMb} &&

    ln -s -f '${run_dir}/runWorkflow.py' '${run_manta_workflow}' &&
    ln -s -f './configManta.py.ini' '${set_conf_file}' &&
    python2 '${run_dir}/runWorkflow.py' -m local -j \${GALAXY_SLOTS:-4} &&
    cp '${run_dir}/results/variants/candidateSV.vcf.gz' '${out_vcf1}' &&
    cp '${run_dir}/results/variants/diploidSV.vcf.gz' '${out_vcf2}' &&
    cp '${run_dir}/results/variants/candidateSmallIndels.vcf.gz' '${out_vcf3}'

    ]]></command>

    <inputs>
        <expand macro="reference_source_conditional" />

        <conditional name="bam_input">
            <param name="bam_input_selector" type="select" label="Just 'normal' BAM file or 'normal' + 'tumor' BAM files" help="Select between a single normal BAM file or a pair of normal / tumor BAM files">
                <option value="not_tumor_bam">Normal</option>
                <option value="tumor_bam">Normal + Tumor</option>
            </param>

            <when value="not_tumor_bam">
                <param name="normal_bam_file" type="data" format="bam" label="select normal BAM" help="Select the files you wish to send to Manta (normal sample, it must be in BAM format)." />
            </when>

            <when value='tumor_bam'>
                <param name="normal_bam_file" type="data" format="bam" label="select normal BAM" help="Select the files you wish to send to Manta (normal sample, it must be in BAM format)." />
                <param name="tumor_bam_file" type="data" format="bam" label="select tumor BAM" help="Select the files you wish to send to Manta (tumor sample, it must be in BAM format)." />
            </when>
        </conditional>

        <param name="additional_param" type="select" multiple="true" display="checkboxes" label="Additional outputs" help="Additional parameters.">
            <option value="exome">Set options for WES input: turn off depth filters</option>
            <option value="rna">Set options for RNA-Seq input. Must specify exactly one bam input file</option>
            <option value="unstrandedRNA">Set if RNA-Seq input is unstranded: Allows splice-junctions on either strand</option>
        </param>

        <section name="advanced" title="Advanced options" expanded="false">

            <param name="callMemMb" type="integer" value="8000" label="Set default task memory requirements" help="The maximum memory size to assign to tasks" />
            <param name="scanSizeMb" type="integer" value="12" label="Set maximum sequence region size" help="The maximum sequence region size (in megabases) scanned by each task during SV Locus graph generation. (default: 12)" />
            <param name="retainTempFiles" type="boolean" checked="False" truevalue="-s" falsevalue="" label="Keep all temporary files" help="Click yes so all temporary files (for workflow debugging) will be kept."/>
            <param name="generateEvidenceBam" type="boolean" checked="False" truevalue="-s" falsevalue="" label="Generate a bam of supporting reads for all SVs" help="Click yes for generating a BAM of supporting reads for all SVs."/>

        </section>

        <!-- <expand macro="manta_configuration"/> -->

        <conditional name="set_configuration">
            <param name="set_configuration_switch" type="select" label="Do you want to change default configuration settings?">
                <option value="Default_config_file">Default</option>    
                <option value="Custom_config_file">Upload a different config file</option>
                <option value="Customized">Customize the options</option>
            </param>
            <when value="Default_config_file">
            </when>
            <when value="Custom_config_file">
                <param format="ini" name="CustomConfigFile" type="data" label="config file"/>
            </when>
            <when value="Customized">
                <param name="minCandidateVariantSize" type="integer" value="8" label="minCandidateVariantSize" help="Run discovery and candidate reporting for all SVs/indels at or above this size."/>
                <param name="rnaMinCandidateVariantSize" type="integer" value="1000" label="rnaMinCandidateVariantSize" help="Separate option (to provide different default) used for runs in RNA-mode."/>
                <param name="minEdgeObservations" type="integer" value="3" label="minEdgeObservations" help="Remove all edges from the graph unless they're supported by this many 'observations'."/>
                <param name="graphNodeMaxEdgeCount" type="integer" value="10" label="graphNodeMaxEdgeCount" help="If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge."/>
                <param name="minCandidateSpanningCount" type="integer" value="3" label="minCandidateSpanningCount" help="Run discovery and candidate reporting for all SVs/indels with at least this many spanning support observations."/>
                <param name="minScoredVariantSize" type="integer" value="50" label="minScoredVariantSize" help="After candidate identification, only score and report SVs/indels at or above this size."/>
                <param name="minDiploidVariantScore" type="integer" value="10" label="minDiploidVariantScore" help="Minimum VCF 'QUAL' score for a variant to be included in the diploid vcf."/>
                <param name="minPassDiploidVariantScore" type="integer" value="20" label="minPassDiploidVariantScore" help="VCF 'QUAL' score below which a variant is marked as filtered in the diploid vcf."/>
                <param name="minPassDiploidGTScore" type="integer" value="15" label="minPassDiploidGTScore" help="Minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf."/>
                <param name="minSomaticScore" type="integer" value="10" label="minSomaticScore" help="Somatic quality scores below this level are not included in the somatic vcf."/>
                <param name="minPassSomaticScore" type="integer" value="30" label="minPassSomaticScore" help="Somatic quality scores below this level are filtered in the somatic vcf."/>
                <param name="enableRemoteReadRetrievalForInsertionsInGermlineCallingModes" type="integer" value="1" label="enableRemoteReadRetrievalForInsertionsInGermlineCallingModes" help="Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote locations with poor mapping quality. This feature can be enabled/disabled separately for germline and cancer calling below."/>
                <param name="enableRemoteReadRetrievalForInsertionsInCancerCallingModes" type="integer" value="0" label="enableRemoteReadRetrievalForInsertionsInCancerCallingModes" help="Here 'CancerCallingModes' includes tumor-normal subtraction and tumor-only calling. 'GermlineCallingModes' includes all other calling modes."/>
                <param name="useOverlapPairEvidence" type="integer" value="0" label="useOverlapPairEvidence" help="Set if an overlapping read pair will be considered as evidence. Set this value &lt;= 0 to skip overlapping read pairs."/>
            </when>
        </conditional>

        <param name="runworkflow_file_check" type="boolean" label="output manta run_workflow file" checked="False" help="Show run_workflow file on history"/>
        <param name="config_file_check" type="boolean" label="output conf file" checked="False" help="Show configuration file on history"/>
        <param name="O1_check" type="boolean" label="snvs filtred" checked="False" help="Show filtred snvs"/>
        <param name="O2_check" type="boolean" label="indels filtred" checked="False" help="Show filtred indels"/>
        <param name="O3_check" type="boolean" label="all snvs" checked="False" help="Show snvs"/>
        
    </inputs>

    <outputs>

        <data format="txt" name="run_manta_workflow" label="Parameters for running Manta">
            <filter>runworkflow_file_check == True</filter>
        </data>

        <data format="tabular" name="set_conf_file" label="conf_file.ini">
            <filter>config_file_check == True</filter>
        </data>
        <data format="vcf_bgzip" name="out_vcf1" label="${tool.name} on ${on_string} (Generating the candidateSV.vcf file)" from_work_dir="MantaWorkflow/results/variants/candidateSV.vcf.gz">
            <filter>O1_check == True</filter>
        </data>
        <data format="vcf_bgzip" name="out_vcf2" label="${tool.name} on ${on_string} (Generating the diploidSV.vcf file)" from_work_dir="MantaWorkflow/results/variants/diploidSV.vcf.gz">
            <filter>O2_check == True</filter>
        </data>
        <data format="vcf_bgzip" name="out_vcf3" label="${tool.name} on ${on_string} (Generating the candidateSmallIndels.vcf file)" from_work_dir="MantaWorkflow/results/variants/candidateSmallIndels.vcf.gz">
            <filter>O3_check == True</filter>
        </data>
    </outputs>

    <tests>
                <test>
                        <conditional name="reference_source">
                                <param name="reference_source_selector" value="cached"/>
                                <param name="index" value="hg19"/>
                        </conditional>

                        <conditional name="bam_input">
                                <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/>
                                <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/>
                                <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/>
                        </conditional>

                        <conditional name="set_configuration">
                                <param name="set_configuration_switch" value="Default_config_file"/>
                        </conditional>
                        <param name="callMemMb" value="1000"/>
                        <param name="O3_check" value="True"/>
                        <output name="out_vcf3" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="4"/>
                </test>
                <test>
                        <conditional name="reference_source">
                                <param name="reference_source_selector" value="history"/>
                                <param name="ref_file" ftype="fasta" value="hg19_region.fa"/>
                        </conditional>

                        <conditional name="bam_input">
                                <param name="bam_input_selector" value="tumor_bam"/>
                                <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/>
                                <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/>
                        </conditional>

                        <conditional name="set_configuration">
                                <param name="set_configuration_switch" value="Default_config_file"/>
                        </conditional>
                        <param name="callMemMb" value="1000"/>
                        <param name="O1_check" value="True"/>
                        <output name="out_vcf1" file="candidateSV.vcf.gz" decompress="true" lines_diff="4"/>
                </test>
                <test>
                        <conditional name="reference_source">
                                <param name="reference_source_selector" value="history"/>
                                <param name="ref_file" ftype="fasta" value="hg19_region.fa"/>
                        </conditional>

                        <conditional name="bam_input">
                                <param name="bam_input_selector" value="tumor_bam"/>
                                <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/>
                                <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/>
                        </conditional>

                        <conditional name="set_configuration">
                                <param name="set_configuration_switch" value="Default_config_file"/>
                        </conditional>
                        <param name="callMemMb" value="1000"/>
                        <param name="O3_check" value="True"/>
                        <output name="out_vcf3" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="4"/>
                </test>
        </tests>
        
    <help><![CDATA[
**Manta**
This script configures the Manta SV analysis pipeline.
You must specify a BAM or CRAM file for at least one sample.
Configuration will produce a workflow run script which
can execute the workflow on a single node or through
sge and resume any interrupted execution.

**Options**
  --version             show program's version number and exit
  -h, --help            show this help message and exit
  --config=FILE         provide a configuration file to override defaults in
                        global config file (/home/lpanunzi/Desktop/Hackaton_GC
                        C2019/manta_sv/manta/bin/configManta.py.ini)
  --allHelp             show all extended/hidden options
**Workflow options**
    --bam=FILE, --normalBam=FILE
                        Normal sample BAM or CRAM file. May be specified more
                        than once, multiple inputs will be treated as each BAM
                        file representing a different sample. [optional] (no
                        default)
    --tumorBam=FILE, --tumourBam=FILE
                        Tumor sample BAM or CRAM file. Only up to one tumor
                        bam file accepted. [optional] (no default)
    --exome             Set options for WES input: turn off depth filters
    --rna               Set options for RNA-Seq input. Must specify exactly
                        one bam input file
    --unstrandedRNA     Set if RNA-Seq input is unstranded: Allows splice-
                        junctions on either strand
    --referenceFasta=FILE
                        samtools-indexed reference fasta file [required]
    --runDir=DIR        Name of directory to be created where all workflow
                        scripts and output will be written. Each analysis
                        requires a separate directory. (default:
                        MantaWorkflow)
    --callRegions=FILE  Optionally provide a bgzip-compressed/tabix-indexed
                        BED file containing the set of regions to call. No VCF
                        output will be provided outside of these regions. The
                        full genome will still be used to estimate statistics
                        from the input (such as expected fragment size
                        distribution). Only one BED file may be specified.
                        (default: call the entire genome)
**Extended options**
    These options are either unlikely to be reset after initial site
    configuration or only of interest for workflow development/debugging.
    They will not be printed here if a default exists unless --allHelp is
    specified
    --existingAlignStatsFile=FILE
                        Pre-calculated alignment statistics file. Skips
                        alignment stats calculation.
    --useExistingChromDepths
                        Use pre-calculated chromosome depths.
    --candidateBins=candidateBins
                        Provide the total number of tasks which candidate
                        generation  will be sub-divided into. (default: 256)
    --retainTempFiles   Keep all temporary files (for workflow debugging)
    --generateEvidenceBam
                        Generate a bam of supporting reads for all SVs
    --outputContig      Output assembled contig sequences in VCF file
    --scanSizeMb=INT    Maximum sequence region size (in megabases) scanned by
                        each task during SV Locus graph generation. (default:
                        12)
    --region=REGION     Limit the analysis to a region of the genome for
                        debugging purposes. If this argument is provided
                        multiple times all specified regions will be analyzed
                        together. All regions must be non-overlapping to get a
                        meaningful result. Examples: '--region chr20' (whole
                        chromosome), '--region chr2:100-2000 --region
                        chr3:2500-3000' (two regions)'. If this option is
                        specified (one or more times) together with the
                        --callRegions BED file, then all region arguments will
                        be intersected with the callRegions BED track.
    --callMemMb=INT     Set default task memory requirement (in megabytes) for
                        common tasks. This may benefit an analysis of unusual
                        depth, chimera rate, etc.. 'Common' tasks refers to
                        most compute intensive scatter-phase tasks of graph
                        creation and candidate generation.

    For further info see: https://github.com/Illumina/manta

    ]]></help>

    <citations>
        <citation type="doi">10.1093/bioinformatics/btv710</citation>
    </citations>

</tool>