Mercurial > repos > artbio > manta
changeset 7:555971edd46e draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/manta commit 569d2234f8a576d5c4fdae120a32418c50436ac2
author | artbio |
---|---|
date | Tue, 20 Feb 2024 08:31:27 +0000 |
parents | cb5691381acb |
children | |
files | README.rst candidateSV.vcf.gz candidateSmallIndels.vcf.gz manta.xml manta_macros.xml somaticSV.vcf.gz test-data/candidateSV.vcf.gz test-data/candidateSmallIndels.vcf.gz test-data/conf_file_1.ini test-data/conf_file_2.ini test-data/conf_file_3.ini test-data/conf_file_4.ini test-data/conf_file_5.ini test-data/somaticSV.vcf.gz |
diffstat | 14 files changed, 270 insertions(+), 26 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Thu Jun 08 17:36:38 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ -# Wrapper of the variant caller 'MANTA', for use it as a Galaxy-based tool : - -Run the following commands in a terminal: - -planemo s - -Open in your browser: - -http://127.0.0.1:9090/ -
--- a/manta.xml Thu Jun 08 17:36:38 2023 +0000 +++ b/manta.xml Tue Feb 20 08:31:27 2024 +0000 @@ -1,4 +1,4 @@ -<tool id="manta" name="Manta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05"> +<tool id="manta" name="Manta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Manta calls structural variants (SVs) and indels from mapped paired-end sequencing reads.</description> <macros> <import>manta_macros.xml</import> @@ -10,6 +10,9 @@ @pipefail@ @set_reference_fasta_filename@ #set run_dir = './MantaWorkflow' + configManta=\$(which configManta.py) && + PATH=\${configManta/"configManta.py"/}:\$PATH && + printenv && cp $__tool_directory__/configManta.py.ini configManta.py.ini && #if str( $bam_input.bam_input_selector ) == "not_tumor_bam": ln -s '$bam_input.normal_bam_file' normal.bam && @@ -26,7 +29,7 @@ #end if #if str( $set_configuration.set_configuration_switch ) == "Customized": rm ./configManta.py.ini && - python '$__tool_directory__/customConfigManta.py' + python2 '$__tool_directory__/customConfigManta.py' --minCandidateVariantSize '$set_configuration.minCandidateVariantSize' --rnaMinCandidateVariantSize '$set_configuration.rnaMinCandidateVariantSize' --minEdgeObservations '$set_configuration.minEdgeObservations' @@ -112,7 +115,6 @@ <param name="useOverlapPairEvidence" type="integer" value="0" label="useOverlapPairEvidence" help="Set if an overlapping read pair will be considered as evidence. Set this value <= 0 to skip overlapping read pairs."/> </when> </conditional> - <param name="config_file_check" type="boolean" label="output conf file" checked="False" help="Show configuration file on history"/> <param name="candidateSV_check" type="boolean" label="Unfiltered structural variants" checked="False" help="All unscored structural variant candidates"/> <param name="candidateSmallIndels_check" type="boolean" label="Unfiltered small indel candidates" checked="False" @@ -122,9 +124,7 @@ reflect any information from the tumor sample" /> </inputs> <outputs> - <data format="tabular" name="conf_file" label="conf_file.ini" from_work_dir="./configManta.py.ini"> - <filter>config_file_check == True</filter> - </data> + <data format="txt" name="conf_file" label="conf_file.ini" from_work_dir="./configManta.py.ini"/> <data format="vcf_bgzip" name="candidateSV" label="Manta unfiltered variants" from_work_dir="MantaWorkflow/results/variants/candidateSV.vcf.gz"> <filter>candidateSV_check == True</filter> </data> @@ -139,7 +139,7 @@ </data> </outputs> <tests> - <test> + <test expect_num_outputs="3"> <param name="reference_source_selector" value="cached"/> <param name="index" value="hg19"/> <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/> @@ -148,10 +148,11 @@ <param name="set_configuration_switch" value="Default_config_file"/> <param name="callMemMb" value="1000"/> <param name="candidateSmallIndels_check" value="True"/> + <output name="conf_file" file="conf_file_1.ini" ftype="txt"/> <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/> <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/> </test> - <test> + <test expect_num_outputs="3"> <param name="reference_source_selector" value="cached"/> <param name="index" value="hg19"/> <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/> @@ -160,10 +161,11 @@ <param name="set_configuration_switch" value="Customized"/> <param name="callMemMb" value="1000"/> <param name="candidateSmallIndels_check" value="True"/> + <output name="conf_file" file="conf_file_2.ini" ftype="txt"/> <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/> <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/> </test> - <test> + <test expect_num_outputs="3"> <param name="reference_source_selector" value="cached"/> <param name="index" value="hg19"/> <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/> @@ -172,10 +174,11 @@ <param name="set_configuration_switch" value="Default_config_file"/> <param name="callMemMb" value="1000"/> <param name="candidateSmallIndels_check" value="True"/> + <output name="conf_file" file="conf_file_3.ini" ftype="txt"/> <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/> <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/> </test> - <test> + <test expect_num_outputs="3"> <param name="reference_source_selector" value="history"/> <param name="ref_file" ftype="fasta" value="hg19_region.fa"/> <param name="bam_input_selector" value="tumor_bam"/> @@ -184,10 +187,11 @@ <param name="set_configuration_switch" value="Default_config_file"/> <param name="callMemMb" value="1000"/> <param name="candidateSV_check" value="True"/> + <output name="conf_file" file="conf_file_4.ini" ftype="txt"/> <output name="candidateSV" file="candidateSV.vcf.gz" decompress="true" lines_diff="6"/> <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/> </test> - <test> + <test expect_num_outputs="3"> <param name="reference_source_selector" value="history"/> <param name="ref_file" ftype="fasta" value="hg19_region.fa"/> <param name="bam_input_selector" value="tumor_bam"/> @@ -196,6 +200,7 @@ <param name="set_configuration_switch" value="Default_config_file"/> <param name="callMemMb" value="1000"/> <param name="candidateSmallIndels_check" value="True"/> + <output name="conf_file" file="conf_file_5.ini" ftype="txt"/> <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/> <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/> </test>
--- a/manta_macros.xml Thu Jun 08 17:36:38 2023 +0000 +++ b/manta_macros.xml Tue Feb 20 08:31:27 2024 +0000 @@ -1,17 +1,18 @@ <macros> <token name="@TOOL_VERSION@">1.6</token> - <token name="@VERSION_SUFFIX@">8</token> + <token name="@VERSION_SUFFIX@">9</token> + <token name="@PROFILE@">20.05</token> <token name="@pipefail@"><![CDATA[set -o | grep -q pipefail && set -o pipefail;]]></token> <token name="@set_reference_fasta_filename@"><![CDATA[ #set $reference_fasta_filename = "localref.fa" #if str( $reference_source.reference_source_selector ) == "history": - ln -s '${reference_source.ref_file}' '${reference_fasta_filename}' && - samtools faidx '${reference_fasta_filename}' 2>&1 || echo "Error running samtools faidx for Manta" >&2 && + ln -s '${reference_source.ref_file}' '${reference_fasta_filename}' && + samtools faidx '${reference_fasta_filename}' 2>&1 || echo "Error running samtools faidx for Manta" >&2 && #else: - #set $reference_fasta_filename = str( $reference_source.index.fields.path ) + #set $reference_fasta_filename = str( $reference_source.index.fields.path ) #end if ]]></token> @@ -28,8 +29,8 @@ <xml name="requirements"> <requirements> + <requirement type="package" version="@TOOL_VERSION@">manta</requirement> <requirement type="package" version="1.7">samtools</requirement> - <requirement type="package" version="@TOOL_VERSION@">manta</requirement> </requirements> </xml>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/conf_file_1.ini Tue Feb 20 08:31:27 2024 +0000 @@ -0,0 +1,58 @@ + +# +# This section contains all configuration settings for the top-level manta workflow, +# +[manta] + +referenceFasta = /illumina/development/Isis/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa + +# Run discovery and candidate reporting for all SVs/indels at or above this size +# Separate option (to provide different default) used for runs in RNA-mode +minCandidateVariantSize = 8 +rnaMinCandidateVariantSize = 1000 + +# Remove all edges from the graph unless they're supported by this many 'observations'. +# Note that one supporting read pair or split read usually equals one observation, but evidence is sometimes downweighted. +minEdgeObservations = 3 + +# If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge. +# Set to 0 to turn this filtration off +graphNodeMaxEdgeCount = 10 + +# Run discovery and candidate reporting for all SVs/indels with at least this +# many spanning support observations +minCandidateSpanningCount = 3 + +# After candidate identification, only score and report SVs/indels at or above this size: +minScoredVariantSize = 50 + +# minimum VCF "QUAL" score for a variant to be included in the diploid vcf: +minDiploidVariantScore = 10 + +# VCF "QUAL" score below which a variant is marked as filtered in the diploid vcf: +minPassDiploidVariantScore = 20 + +# minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf: +minPassDiploidGTScore = 15 + +# somatic quality scores below this level are not included in the somatic vcf: +minSomaticScore = 10 + +# somatic quality scores below this level are filtered in the somatic vcf: +minPassSomaticScore = 30 + +# Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote +# locations with poor mapping quality, which pair to confidently mapping reads near the insertion locus. These reads +# can help to fully assemble longer insertions, under certain circumstances this feature can add a very large runtime +# burden. For instance, given the very high chimeric pair rates found in degraded FFPE samples, the runtime of the read +# retrieval process can be unpredicable. For this reason the feature is disabled by default for somatic variant calling. +# This feature can be enabled/disabled separately for germline and cancer calling below. +# +# Here "CancerCallingModes" includes tumor-normal subtraction and tumor-only calling. "GermlineCallingModes" includes +# all other calling modes. +enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1 +enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0 + +# Set if an overlapping read pair will be considered as evidence +# Set to 0 to skip overlapping read pairs +useOverlapPairEvidence = 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/conf_file_2.ini Tue Feb 20 08:31:27 2024 +0000 @@ -0,0 +1,16 @@ +[manta] +referenceFasta = /dummy/path/to/genome.fa +enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1 +minPassSomaticScore = 30 +minSomaticScore = 10 +minCandidateVariantSize = 8 +minPassDiploidVariantScore = 20 +useOverlapPairEvidence = 0 +minPassDiploidGTScore = 15 +graphNodeMaxEdgeCount = 10 +minEdgeObservations = 3 +minDiploidVariantScore = 10 +minCandidateSpanningCount = 3 +enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0 +rnaMinCandidateVariantSize = 1000 +minScoredVariantSize = 50
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/conf_file_3.ini Tue Feb 20 08:31:27 2024 +0000 @@ -0,0 +1,58 @@ + +# +# This section contains all configuration settings for the top-level manta workflow, +# +[manta] + +referenceFasta = /illumina/development/Isis/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa + +# Run discovery and candidate reporting for all SVs/indels at or above this size +# Separate option (to provide different default) used for runs in RNA-mode +minCandidateVariantSize = 8 +rnaMinCandidateVariantSize = 1000 + +# Remove all edges from the graph unless they're supported by this many 'observations'. +# Note that one supporting read pair or split read usually equals one observation, but evidence is sometimes downweighted. +minEdgeObservations = 3 + +# If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge. +# Set to 0 to turn this filtration off +graphNodeMaxEdgeCount = 10 + +# Run discovery and candidate reporting for all SVs/indels with at least this +# many spanning support observations +minCandidateSpanningCount = 3 + +# After candidate identification, only score and report SVs/indels at or above this size: +minScoredVariantSize = 50 + +# minimum VCF "QUAL" score for a variant to be included in the diploid vcf: +minDiploidVariantScore = 10 + +# VCF "QUAL" score below which a variant is marked as filtered in the diploid vcf: +minPassDiploidVariantScore = 20 + +# minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf: +minPassDiploidGTScore = 15 + +# somatic quality scores below this level are not included in the somatic vcf: +minSomaticScore = 10 + +# somatic quality scores below this level are filtered in the somatic vcf: +minPassSomaticScore = 30 + +# Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote +# locations with poor mapping quality, which pair to confidently mapping reads near the insertion locus. These reads +# can help to fully assemble longer insertions, under certain circumstances this feature can add a very large runtime +# burden. For instance, given the very high chimeric pair rates found in degraded FFPE samples, the runtime of the read +# retrieval process can be unpredicable. For this reason the feature is disabled by default for somatic variant calling. +# This feature can be enabled/disabled separately for germline and cancer calling below. +# +# Here "CancerCallingModes" includes tumor-normal subtraction and tumor-only calling. "GermlineCallingModes" includes +# all other calling modes. +enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1 +enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0 + +# Set if an overlapping read pair will be considered as evidence +# Set to 0 to skip overlapping read pairs +useOverlapPairEvidence = 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/conf_file_4.ini Tue Feb 20 08:31:27 2024 +0000 @@ -0,0 +1,58 @@ + +# +# This section contains all configuration settings for the top-level manta workflow, +# +[manta] + +referenceFasta = /illumina/development/Isis/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa + +# Run discovery and candidate reporting for all SVs/indels at or above this size +# Separate option (to provide different default) used for runs in RNA-mode +minCandidateVariantSize = 8 +rnaMinCandidateVariantSize = 1000 + +# Remove all edges from the graph unless they're supported by this many 'observations'. +# Note that one supporting read pair or split read usually equals one observation, but evidence is sometimes downweighted. +minEdgeObservations = 3 + +# If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge. +# Set to 0 to turn this filtration off +graphNodeMaxEdgeCount = 10 + +# Run discovery and candidate reporting for all SVs/indels with at least this +# many spanning support observations +minCandidateSpanningCount = 3 + +# After candidate identification, only score and report SVs/indels at or above this size: +minScoredVariantSize = 50 + +# minimum VCF "QUAL" score for a variant to be included in the diploid vcf: +minDiploidVariantScore = 10 + +# VCF "QUAL" score below which a variant is marked as filtered in the diploid vcf: +minPassDiploidVariantScore = 20 + +# minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf: +minPassDiploidGTScore = 15 + +# somatic quality scores below this level are not included in the somatic vcf: +minSomaticScore = 10 + +# somatic quality scores below this level are filtered in the somatic vcf: +minPassSomaticScore = 30 + +# Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote +# locations with poor mapping quality, which pair to confidently mapping reads near the insertion locus. These reads +# can help to fully assemble longer insertions, under certain circumstances this feature can add a very large runtime +# burden. For instance, given the very high chimeric pair rates found in degraded FFPE samples, the runtime of the read +# retrieval process can be unpredicable. For this reason the feature is disabled by default for somatic variant calling. +# This feature can be enabled/disabled separately for germline and cancer calling below. +# +# Here "CancerCallingModes" includes tumor-normal subtraction and tumor-only calling. "GermlineCallingModes" includes +# all other calling modes. +enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1 +enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0 + +# Set if an overlapping read pair will be considered as evidence +# Set to 0 to skip overlapping read pairs +useOverlapPairEvidence = 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/conf_file_5.ini Tue Feb 20 08:31:27 2024 +0000 @@ -0,0 +1,58 @@ + +# +# This section contains all configuration settings for the top-level manta workflow, +# +[manta] + +referenceFasta = /illumina/development/Isis/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa + +# Run discovery and candidate reporting for all SVs/indels at or above this size +# Separate option (to provide different default) used for runs in RNA-mode +minCandidateVariantSize = 8 +rnaMinCandidateVariantSize = 1000 + +# Remove all edges from the graph unless they're supported by this many 'observations'. +# Note that one supporting read pair or split read usually equals one observation, but evidence is sometimes downweighted. +minEdgeObservations = 3 + +# If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge. +# Set to 0 to turn this filtration off +graphNodeMaxEdgeCount = 10 + +# Run discovery and candidate reporting for all SVs/indels with at least this +# many spanning support observations +minCandidateSpanningCount = 3 + +# After candidate identification, only score and report SVs/indels at or above this size: +minScoredVariantSize = 50 + +# minimum VCF "QUAL" score for a variant to be included in the diploid vcf: +minDiploidVariantScore = 10 + +# VCF "QUAL" score below which a variant is marked as filtered in the diploid vcf: +minPassDiploidVariantScore = 20 + +# minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf: +minPassDiploidGTScore = 15 + +# somatic quality scores below this level are not included in the somatic vcf: +minSomaticScore = 10 + +# somatic quality scores below this level are filtered in the somatic vcf: +minPassSomaticScore = 30 + +# Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote +# locations with poor mapping quality, which pair to confidently mapping reads near the insertion locus. These reads +# can help to fully assemble longer insertions, under certain circumstances this feature can add a very large runtime +# burden. For instance, given the very high chimeric pair rates found in degraded FFPE samples, the runtime of the read +# retrieval process can be unpredicable. For this reason the feature is disabled by default for somatic variant calling. +# This feature can be enabled/disabled separately for germline and cancer calling below. +# +# Here "CancerCallingModes" includes tumor-normal subtraction and tumor-only calling. "GermlineCallingModes" includes +# all other calling modes. +enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1 +enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0 + +# Set if an overlapping read pair will be considered as evidence +# Set to 0 to skip overlapping read pairs +useOverlapPairEvidence = 0