Mercurial > repos > artbio > manta
diff manta.xml @ 0:42ba283a0fe2 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/manta commit e6c5d87dcd848fc4910af968e73adc481c811d15"
author | artbio |
---|---|
date | Wed, 13 May 2020 15:15:07 -0400 |
parents | |
children | d648e40c6da9 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/manta.xml Wed May 13 15:15:07 2020 -0400 @@ -0,0 +1,324 @@ +<tool id="manta" name="Manta" version="@WRAPPER_VERSION@"> + + <description>Manta calls structural variants (SVs) and indels from mapped paired-end sequencing reads.</description> + + <macros> + <import>manta_macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + + <command detect_errors="exit_code"><![CDATA[ + @VERSION@ + @pipefail@ + @set_reference_fasta_filename@ + + #import os + #import random + #set job_dir=os.getcwd() + #set run_dir = job_dir + '/MantaWorkflow_' + (' ' + str(random.randint(1,100000))).strip() + #set config_file = $__tool_directory__ + '/configManta.py.ini' + #set config_file_custom = $__tool_directory__ + '/customized.ini' + #set $input_normal = 'normal.bam' + #set $input_tumor = 'tumor.bam' + + #if str( $bam_input.bam_input_selector ) == "not_tumor_bam": + ln -s '$bam_input.normal_bam_file' $input_normal && + ln -s '$bam_input.normal_bam_file.metadata.bam_index' normal.bai && + #else if str( $bam_input.bam_input_selector ) == "tumor_bam": + ln -s '$bam_input.normal_bam_file' $input_normal && + ln -s '$bam_input.normal_bam_file.metadata.bam_index' normal.bai && + ln -s '$bam_input.tumor_bam_file' $input_tumor && + ln -s '$bam_input.tumor_bam_file.metadata.bam_index' tumor.bai && + #end if + + cp ${config_file} ${config_file_custom} && + + #if str( $set_configuration.set_configuration_switch ) == "Customized": + sed -i 's/minCandidateVariantSize = 8/minCandidateVariantSize = $set_configuration.minCandidateVariantSize/' ${config_file_custom} && + sed -i 's/rnaMinCandidateVariantSize = 1000/rnaMinCandidateVariantSize = $set_configuration.rnaMinCandidateVariantSize/' ${config_file_custom} && + sed -i 's/minEdgeObservations = 3/minEdgeObservations = $set_configuration.minEdgeObservations/' ${config_file_custom} && + sed -i 's/graphNodeMaxEdgeCount = 10/graphNodeMaxEdgeCount = $set_configuration.graphNodeMaxEdgeCount/' ${config_file_custom} && + sed -i 's/minCandidateSpanningCount = 3/minCandidateSpanningCount = $set_configuration.minCandidateSpanningCount/' ${config_file_custom} && + sed -i 's/minScoredVariantSize = 50/minScoredVariantSize = $set_configuration.minScoredVariantSize/' ${config_file_custom} && + sed -i 's/minDiploidVariantScore = 10/minDiploidVariantScore = $set_configuration.minDiploidVariantScore/' ${config_file_custom} && + sed -i 's/minPassDiploidVariantScore = 20/minPassDiploidVariantScore = $set_configuration.minPassDiploidVariantScore/' ${config_file_custom} && + sed -i 's/minPassDiploidGTScore = 15/minPassDiploidGTScore = $set_configuration.minPassDiploidGTScore/' ${config_file_custom} && + sed -i 's/minSomaticScore = 10/minSomaticScore = $set_configuration.minSomaticScore/' ${config_file_custom} && + sed -i 's/minPassSomaticScore = 30/minPassSomaticScore = $set_configuration.minPassSomaticScore/' ${config_file_custom} && + sed -i 's/enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1/enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = $set_configuration.enableRemoteReadRetrievalForInsertionsInGermlineCallingModes/' ${config_file_custom} && + sed -i 's/enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0/enableRemoteReadRetrievalForInsertionsInCancerCallingModes = $set_configuration.enableRemoteReadRetrievalForInsertionsInCancerCallingModes/' ${config_file_custom} && + sed -i 's/useOverlapPairEvidence = 0/useOverlapPairEvidence = $set_configuration.useOverlapPairEvidence/' ${config_file_custom} && + #end if + + configManta.py + --referenceFasta='${reference_fasta_filename}' + + #if str( $set_configuration.set_configuration_switch ) == "Custom_config_file": + #set config_file = $set_configuration.CustomConfigFile + #else if str( $set_configuration.set_configuration_switch ) == "Customized": + #set config_file = config_file_custom + #end if + + --config=${config_file} + + #if str( $bam_input.bam_input_selector ) == "not_tumor_bam": + --bam=$input_normal + #else if str( $bam_input.bam_input_selector ) == "tumor_bam": + --bam=$input_normal + --tumorBam=$input_tumor + #end if + + --runDir='${run_dir}' + --scanSizeMb=${advanced.scanSizeMb} + --callMemMb=${advanced.callMemMb} && + + ln -s -f '${run_dir}/runWorkflow.py' '${run_manta_workflow}' && + ln -s -f '${config_file}' '${set_conf_file}' && + python2 '${run_dir}/runWorkflow.py' -m local -j 8 && + ln -s -f '${run_dir}/results/variants/candidateSV.vcf.gz' '${out_vcf1}' && + ln -s -f '${run_dir}/results/variants/diploidSV.vcf.gz' '${out_vcf2}' && + ln -s -f '${run_dir}/results/variants/candidateSmallIndels.vcf.gz' '${out_vcf3}' + + ]]></command> + + <inputs> + <expand macro="reference_source_conditional" /> + + <conditional name="bam_input"> + <param name="bam_input_selector" type="select" label="Just 'normal' BAM file or 'normal' + 'tumor' BAM files" help="Select between a single normal BAM file or a pair of normal / tumor BAM files"> + <option value="not_tumor_bam">Normal</option> + <option value="tumor_bam">Normal + Tumor</option> + </param> + + <when value="not_tumor_bam"> + <param name="normal_bam_file" type="data" format="bam" label="select normal BAM" help="Select the files you wish to send to Manta (normal sample, it must be in BAM format)." /> + </when> + + <when value='tumor_bam'> + <param name="normal_bam_file" type="data" format="bam" label="select normal BAM" help="Select the files you wish to send to Manta (normal sample, it must be in BAM format)." /> + <param name="tumor_bam_file" type="data" format="bam" label="select tumor BAM" help="Select the files you wish to send to Manta (tumor sample, it must be in BAM format)." /> + </when> + </conditional> + + <param name="additional_param" type="select" multiple="true" display="checkboxes" label="Additional outputs" help="Additional parameters."> + <option value="exome">Set options for WES input: turn off depth filters</option> + <option value="rna">Set options for RNA-Seq input. Must specify exactly one bam input file</option> + <option value="unstrandedRNA">Set if RNA-Seq input is unstranded: Allows splice-junctions on either strand</option> + </param> + + <section name="advanced" title="Advanced options" expanded="false"> + + <param name="callMemMb" type="integer" value="8000" label="Set default task memory requirements" help="The maximum memory size to assign to tasks" /> + <param name="scanSizeMb" type="integer" value="12" label="Set maximum sequence region size" help="The maximum sequence region size (in megabases) scanned by each task during SV Locus graph generation. (default: 12)" /> + <param name="retainTempFiles" type="boolean" checked="False" truevalue="-s" falsevalue="" label="Keep all temporary files" help="Click yes so all temporary files (for workflow debugging) will be kept."/> + <param name="generateEvidenceBam" type="boolean" checked="False" truevalue="-s" falsevalue="" label="Generate a bam of supporting reads for all SVs" help="Click yes for generating a BAM of supporting reads for all SVs."/> + + </section> + + <!-- <expand macro="manta_configuration"/> --> + + <conditional name="set_configuration"> + <param name="set_configuration_switch" type="select" label="Do you want to change default configuration settings?"> + <option value="Default_config_file">Default</option> + <option value="Custom_config_file">Upload a different config file</option> + <option value="Customized">Customize the options</option> + </param> + <when value="Default_config_file"> + </when> + <when value="Custom_config_file"> + <param format="ini" name="CustomConfigFile" type="data" label="config file"/> + </when> + <when value="Customized"> + <param name="minCandidateVariantSize" type="integer" value="8" label="minCandidateVariantSize" help="Run discovery and candidate reporting for all SVs/indels at or above this size."/> + <param name="rnaMinCandidateVariantSize" type="integer" value="1000" label="rnaMinCandidateVariantSize" help="Separate option (to provide different default) used for runs in RNA-mode."/> + <param name="minEdgeObservations" type="integer" value="3" label="minEdgeObservations" help="Remove all edges from the graph unless they're supported by this many 'observations'."/> + <param name="graphNodeMaxEdgeCount" type="integer" value="10" label="graphNodeMaxEdgeCount" help="If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge."/> + <param name="minCandidateSpanningCount" type="integer" value="3" label="minCandidateSpanningCount" help="Run discovery and candidate reporting for all SVs/indels with at least this many spanning support observations."/> + <param name="minScoredVariantSize" type="integer" value="50" label="minScoredVariantSize" help="After candidate identification, only score and report SVs/indels at or above this size."/> + <param name="minDiploidVariantScore" type="integer" value="10" label="minDiploidVariantScore" help="Minimum VCF 'QUAL' score for a variant to be included in the diploid vcf."/> + <param name="minPassDiploidVariantScore" type="integer" value="20" label="minPassDiploidVariantScore" help="VCF 'QUAL' score below which a variant is marked as filtered in the diploid vcf."/> + <param name="minPassDiploidGTScore" type="integer" value="15" label="minPassDiploidGTScore" help="Minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf."/> + <param name="minSomaticScore" type="integer" value="10" label="minSomaticScore" help="Somatic quality scores below this level are not included in the somatic vcf."/> + <param name="minPassSomaticScore" type="integer" value="30" label="minPassSomaticScore" help="Somatic quality scores below this level are filtered in the somatic vcf."/> + <param name="enableRemoteReadRetrievalForInsertionsInGermlineCallingModes" type="integer" value="1" label="enableRemoteReadRetrievalForInsertionsInGermlineCallingModes" help="Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote locations with poor mapping quality. This feature can be enabled/disabled separately for germline and cancer calling below."/> + <param name="enableRemoteReadRetrievalForInsertionsInCancerCallingModes" type="integer" value="0" label="enableRemoteReadRetrievalForInsertionsInCancerCallingModes" help="Here 'CancerCallingModes' includes tumor-normal subtraction and tumor-only calling. 'GermlineCallingModes' includes all other calling modes."/> + <param name="useOverlapPairEvidence" type="integer" value="0" label="useOverlapPairEvidence" help="Set if an overlapping read pair will be considered as evidence. Set this value <= 0 to skip overlapping read pairs."/> + </when> + </conditional> + + <param name="runworkflow_file_check" type="boolean" label="output manta run_workflow file" checked="False" help="Show run_workflow file on history"/> + <param name="config_file_check" type="boolean" label="output conf file" checked="False" help="Show configuration file on history"/> + <param name="O1_check" type="boolean" label="snvs filtred" checked="False" help="Show filtred snvs"/> + <param name="O2_check" type="boolean" label="indels filtred" checked="False" help="Show filtred indels"/> + <param name="O3_check" type="boolean" label="all snvs" checked="False" help="Show snvs"/> + + </inputs> + + <outputs> + + <data format="txt" name="run_manta_workflow" label="Parameters for running Manta"> + <filter>runworkflow_file_check == True</filter> + </data> + + <data format="tabular" name="set_conf_file" label="conf_file.ini"> + <filter>config_file_check == True</filter> + </data> + <data format="vcf_bgzip" name="out_vcf1" label="${tool.name} on ${on_string} (Generating the candidateSV.vcf file)" from_work_dir="MantaWorkflow/results/variants/candidateSV.vcf.gz"> + <filter>O1_check == True</filter> + </data> + <data format="vcf_bgzip" name="out_vcf2" label="${tool.name} on ${on_string} (Generating the diploidSV.vcf file)" from_work_dir="MantaWorkflow/results/variants/diploidSV.vcf.gz"> + <filter>O2_check == True</filter> + </data> + <data format="vcf_bgzip" name="out_vcf3" label="${tool.name} on ${on_string} (Generating the candidateSmallIndels.vcf file)" from_work_dir="MantaWorkflow/results/variants/candidateSmallIndels.vcf.gz"> + <filter>O3_check == True</filter> + </data> + </outputs> + + <tests> + <test> + <conditional name="reference_source"> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" ftype="fasta" value="hg19_region.fa"/> + </conditional> + + <conditional name="bam_input"> + <param name="bam_input_selector" value="tumor_bam"/> + <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/> + <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/> + </conditional> + + <conditional name="set_configuration"> + <param name="set_configuration_switch" value="Default_config_file"/> + </conditional> + <param name="callMemMb" value="1000"/> + <param name="O1_check" value="True"/> + <output name="out_vcf1" file="candidateSV.vcf.gz" decompress="true" lines_diff="4"/> + </test> + <test> + <conditional name="reference_source"> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" ftype="fasta" value="hg19_region.fa"/> + </conditional> + + <conditional name="bam_input"> + <param name="bam_input_selector" value="tumor_bam"/> + <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/> + <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/> + </conditional> + + <conditional name="set_configuration"> + <param name="set_configuration_switch" value="Default_config_file"/> + </conditional> + <param name="callMemMb" value="1000"/> + <param name="O3_check" value="True"/> + <output name="out_vcf3" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="4"/> + </test> + <test> + <conditional name="reference_source"> + <param name="reference_source_selector" value="cached"/> + <param name="index" value="hg19"/> + </conditional> + + <conditional name="bam_input"> + <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/> + <param name="normal_bam_file" ftype="bam" value="HCC1954_normal.bam"/> + <param name="tumor_bam_file" ftype="bam" value="HCC1954_tumor.bam"/> + </conditional> + + <conditional name="set_configuration"> + <param name="set_configuration_switch" value="Default_config_file"/> + </conditional> + <param name="callMemMb" value="1000"/> + <param name="O3_check" value="True"/> + <output name="out_vcf3" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="4"/> + </test> + </tests> + + <help><![CDATA[ +**Manta** +This script configures the Manta SV analysis pipeline. +You must specify a BAM or CRAM file for at least one sample. +Configuration will produce a workflow run script which +can execute the workflow on a single node or through +sge and resume any interrupted execution. + +**Options** + --version show program's version number and exit + -h, --help show this help message and exit + --config=FILE provide a configuration file to override defaults in + global config file (/home/lpanunzi/Desktop/Hackaton_GC + C2019/manta_sv/manta/bin/configManta.py.ini) + --allHelp show all extended/hidden options +**Workflow options** + --bam=FILE, --normalBam=FILE + Normal sample BAM or CRAM file. May be specified more + than once, multiple inputs will be treated as each BAM + file representing a different sample. [optional] (no + default) + --tumorBam=FILE, --tumourBam=FILE + Tumor sample BAM or CRAM file. Only up to one tumor + bam file accepted. [optional] (no default) + --exome Set options for WES input: turn off depth filters + --rna Set options for RNA-Seq input. Must specify exactly + one bam input file + --unstrandedRNA Set if RNA-Seq input is unstranded: Allows splice- + junctions on either strand + --referenceFasta=FILE + samtools-indexed reference fasta file [required] + --runDir=DIR Name of directory to be created where all workflow + scripts and output will be written. Each analysis + requires a separate directory. (default: + MantaWorkflow) + --callRegions=FILE Optionally provide a bgzip-compressed/tabix-indexed + BED file containing the set of regions to call. No VCF + output will be provided outside of these regions. The + full genome will still be used to estimate statistics + from the input (such as expected fragment size + distribution). Only one BED file may be specified. + (default: call the entire genome) +**Extended options** + These options are either unlikely to be reset after initial site + configuration or only of interest for workflow development/debugging. + They will not be printed here if a default exists unless --allHelp is + specified + --existingAlignStatsFile=FILE + Pre-calculated alignment statistics file. Skips + alignment stats calculation. + --useExistingChromDepths + Use pre-calculated chromosome depths. + --candidateBins=candidateBins + Provide the total number of tasks which candidate + generation will be sub-divided into. (default: 256) + --retainTempFiles Keep all temporary files (for workflow debugging) + --generateEvidenceBam + Generate a bam of supporting reads for all SVs + --outputContig Output assembled contig sequences in VCF file + --scanSizeMb=INT Maximum sequence region size (in megabases) scanned by + each task during SV Locus graph generation. (default: + 12) + --region=REGION Limit the analysis to a region of the genome for + debugging purposes. If this argument is provided + multiple times all specified regions will be analyzed + together. All regions must be non-overlapping to get a + meaningful result. Examples: '--region chr20' (whole + chromosome), '--region chr2:100-2000 --region + chr3:2500-3000' (two regions)'. If this option is + specified (one or more times) together with the + --callRegions BED file, then all region arguments will + be intersected with the callRegions BED track. + --callMemMb=INT Set default task memory requirement (in megabytes) for + common tasks. This may benefit an analysis of unusual + depth, chimera rate, etc.. 'Common' tasks refers to + most compute intensive scatter-phase tasks of graph + creation and candidate generation. + + For further info see: https://github.com/Illumina/manta + + ]]></help> + + <citations> + <citation type="doi">10.1093/bioinformatics/btv710</citation> + </citations> + +</tool>