Mercurial > repos > iuc > lofreq_call
view lofreq_call.xml @ 8:b11e8e9c23bf draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/lofreq commit 814935de92514901b397ad335e9a43dba728973f
author | iuc |
---|---|
date | Wed, 02 Oct 2024 08:20:25 +0000 |
parents | 4805fe3d8fda |
children |
line wrap: on
line source
<tool id="lofreq_call" name="Call variants" version="@TOOL_VERSION@+galaxy3" profile="18.01"> <description>with LoFreq</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements" /> <command detect_errors="exit_code"><![CDATA[ ## prepare reference genome and mapped reads input @PREPARE_REF@ ln -s '$reads' reads.bam && ln -s -f '${reads.metadata.bam_index}' reads.bam.bai && #if str($call_control.set_call_options) == 'yes': #set $ign_vcfs = [] #if str($call_control.source_qual.use_src_qual.src_qual): #for $n, $ign_vcf in enumerate($call_control.source_qual.use_src_qual.ign_vcf): ## lofreq recognizes compressed vcf input based on its .gz ## suffix => symlink all datasets with standard extensions. ## For bgzipped vcfs, also make the index files available. #if $ign_vcf: #if $ign_vcf.is_of_type('vcf_bgzip'): #set $ign_src = 'ign%d.vcf.gz' % $n ln -s '$ign_vcf' $ign_src && ln -s '$ign_vcf.metadata.tabix_index' ${ign_src}.tbi && #else: #set $ign_src = 'ign%d.vcf' % $n ln -s '$ign_vcf' $ign_src && #end if #silent $ign_vcfs.append($ign_src) #end if #end for #end if #set $ign_vcfs = ','.join($ign_vcfs) #end if ## call variants with lofreq lofreq call-parallel --pp-threads \${GALAXY_SLOTS:-1} --verbose --ref '$reference_fasta_fn' --out variants.vcf $variant_types #if str($regions.restrict_to_region) == 'regions_from_file': --bed '$regions.bed' #end if #if str($call_control.set_call_options) == 'yes': --min-cov $call_control.coverage.min_cov --max-depth $call_control.coverage.max_depth $call_control.pe.use_orphan --min-bq $call_control.bc_quals.min_bq #if $call_control.bc_quals.min_alt_bq > $call_control.bc_quals.min_bq: --min-alt-bq $call_control.bc_quals.min_alt_bq #else: --min-alt-bq $call_control.bc_quals.min_bq #end if #if str($call_control.bc_quals.alt_bq.modify): --def-alt-bq $call_control.bc_quals.alt_bq.def_alt_bq #end if ${call_control.align_quals.alnqual.use_alnqual} #if str($call_control.align_quals.alnqual.use_alnqual) != '-A -B': ${call_control.align_quals.alnqual.alnqual_choice.alnquals_to_use} ${call_control.align_quals.alnqual.alnqual_choice.extended_baq} #end if --min-mq $call_control.map_quals.min_mq #if str($call_control.map_quals.use_mq.no_mq): $call_control.map_quals.use_mq.no_mq #else: #if $call_control.map_quals.use_mq.max_mq > $call_control.map_quals.min_mq: --max-mq $call_control.map_quals.use_mq.max_mq #else: --max-mq $call_control.map_quals.min_mq #end if #end if #if str($call_control.source_qual.use_src_qual.src_qual): $call_control.source_qual.use_src_qual.src_qual #if $ign_vcfs: --ign-vcf '$ign_vcfs' #end if --def-nm-q $call_control.source_qual.use_src_qual.def_nm_q #end if --min-jq $call_control.joint_qual.min_jq --min-alt-jq $call_control.joint_qual.min_alt_jq --def-alt-jq $call_control.joint_qual.def_alt_jq #end if --sig $filter_control.sig #set $bonf_factor = $filter_control.bonf or 'dynamic' --bonf $bonf_factor $filter_control.others reads.bam 2>&1 ## in case of errors add the log files produced ## by the parallel workers to stderr || (tool_exit_code=\$? && cat "\$TMPDIR"/lofreq2_call_parallel*/*.log 1>&2 && exit \$tool_exit_code) ## work around a bug in lofreq call-parallel ## https://github.com/CSB5/lofreq/issues/85 ## that causes the output format to be vcf.gz with certain filter ## combinations. Issue is closed but still not fixed. #if str($bonf_factor) != 'dynamic': #if '--no-default-filter' in str($filter_control.others): && ln -s variants.vcf variants.vcf.gz && gzip -df variants.vcf.gz #end if #end if && echo $filter_control.filter_type ]]></command> <inputs> <param type="data" name="reads" format="bam" label="Input reads in BAM format" /> <expand macro="reference_interface" /> <conditional name="regions"> <param name="restrict_to_region" type="select" label="Call variants across"> <option value="genome">Whole reference</option> <option value="regions_from_file">Regions specified in BED</option> </param> <when value="genome" /> <when value="regions_from_file"> <param argument="--bed" type="data" format="bed" label="BED dataset with regions to examine" /> </when> </conditional> <param name="variant_types" type="select" label="Types of variants to call" help="Note: To have indels included in the called variants you HAVE to preprocess your input data using lofreq indelqual to include indel qualities. In addition, it is highly recommended that you calculate and make use of indel alignment qualities. See the detailed tool help below."> <option value="--call-indels">SNVs and indels</option> <option value="" selected="True">Only SNVs</option> <option value="--call-indels --only-indels">Only indels</option> </param> <conditional name="call_control"> <param name="set_call_options" type="select" label="Variant calling parameters"> <option value="no">Use default settings</option> <option value="yes">Configure settings</option> </param> <when value="no" /> <when value="yes"> <section name="coverage" title="Coverage" expanded="true"> <param name="min_cov" argument="--min-cov" type="integer" value="1" min="1" label="Minimal coverage" help="Do not attempt variant calling at sites that are not covered by at least this number of reads (default: 1)" /> <param name="max_depth" argument="--max-depth" type="integer" value="1000000" min="1" label="Coverage cap" help="For efficiency, don not consider more than this number of reads at any site (default: 1,000,000)" /> </section> <section name="pe" title="Paired reads" expanded="true"> <param name="use_orphan" argument="--use-orphan" type="boolean" truevalue="--use-orphan" falsevalue="" checked="False" label="Use reads from anomalously mapped pairs" help="Applies to paired-end reads only. If set to true, reads from pairs that are flagged as non-proper pairs (SAM/BAM FLAG field 2) will be used in variant calling. The default is to ignore such reads." /> </section> <section name="bc_quals" title="Base-calling quality" expanded="true"> <param name="min_bq" argument="--min-bq" type="integer" value="6" label="Minimum baseQ" help="For variant calling at any given site, do not consider reads for which the base at that site has a base quality less than this value (default: 6)" /> <param name="min_alt_bq" argument="--min-alt-bq" type="integer" min="0" value="6" label="Minimum baseQ for alternate bases" help="For variant calling at any given site, do not consider reads that support a non-reference allele at the site if that base has a base quality less than this value (default: 6). Note: this setting will have no effect if the specified value is less than the general Minimum baseQ above." /> <conditional name="alt_bq"> <param name="modify" type="select" label="Base quality to use for alternate bases" help="After filtering reads according to the Minimum baseQ settings above, you can choose to modify the base qualities of the non-reference bases in the surviving reads before performing further calculations."> <option value="">Use original base qualities</option> <option value="avg">Use the median of the reference base qualities at the site</option> <option value="custom">Use a fixed custom value</option> </param> <when value="" /> <when value="avg"> <param name="def_alt_bq" type="hidden" value="-1" /> </when> <when value="custom"> <param name="def_alt_bq" argument="--def-alt-bq" type="integer" min="1" value="1" label="Overwrite original base qualities with this value" /> </when> </conditional> </section> <section name="align_quals" title="Base alignment quality" expanded="true" help="Choose here whether you want to incorporate base and/or indel alignment qualities into lofreq's joint quality model. If you have previously computed and stored (using lofreq alnqual) any of these quality scores into your input dataset, you can tell the tool to reuse them. Alternatively, the tool can calculate the necessary scores on the fly."> <conditional name="alnqual"> <param name="use_alnqual" type="select" label="Consider base/indel alignment qualities during variant calling?"> <option value="">Yes, and prefer existing alignment qualities encoded in input</option> <option value="--del-baq">Yes, (re)calculate alignment qualities on the fly and use them</option> <option value="-A -B">No, don't make use of alignment qualities</option> </param> <when value=""> <expand macro="handle_existing_alnqual" /> </when> <when value="--del-baq"> <expand macro="handle_alnqual" mode="Add and use" /> </when> <when value="-A -B" /> </conditional> </section> <section name="map_quals" title="Mapping quality" expanded="true"> <param name="min_mq" argument="--min-mq" type="integer" value="0" label="Minimum mapping quality" help="For variant calling at any given site, do not consider reads with a mapping quality (MAPQ) less than this value (default: 0 = do not filter on read mapping quality)." /> <conditional name="use_mq"> <param name="no_mq" argument="--no-mq" type="select" label="Consider mapping quality during variant calling?"> <option value="">Yes, incorporate MAPQ into joint quality score</option> <option value="--no-mq">No, ignore MAPQ scores during variant calling</option> </param> <when value=""> <param name="max_mq" argument="--max-mq" type="integer" value="255" label="Maximum mapping quality" help="For the joint quality model at any site, cap the mapping quality of reads at this value (default: 255 = do not cap mapping qualities). Notes: 1) You cannot cap mapping qualities at less than the minimum MAPQ defined above. If you try, MAPQs will be capped at that minimum quality instead. 2) The special MAPQ value 255 is used by many tools to indicate undefined mapping quality, and lofreq call will ignore such reads during variant calling. Capping, however, will turn 255 into a regular MAPQ score. Thus, if you need to avoid using such reads, you should filter out MAPQ 255 reads from your input data with other tools before using lofreq call with MAPQ capping."/> </when> <when value="--no-mq"> <param name="max_mq" type="hidden" value="" /> </when> </conditional> </section> <section name="source_qual" title="Source quality" expanded="true"> <conditional name="use_src_qual"> <param argument="--src-qual" name="src_qual" type="select" label="Compute source quality and consider it during variant calling"> <option value="">No, don't incorporate source quality into joint quality score</option> <option value="--src-qual">Yes, compute source quality and merge it into joint quality score</option> </param> <when value="" /> <when value="--src-qual"> <param name="def_nm_q" argument="--def-nm-q" type="integer" min="-1" value="-1" label="Replace non-match base qualities with this value" help="For the calculation of the source quality, replace all base qualities of non-match bases with this value (default: -1 = use original base qualities)." /> <param name="ign_vcf" argument="--ign-vcf" type="data" format="vcf,vcf_bgzip" optional="true" multiple="true" label="VCF input(s) with variants to ignore for source quality computation" help="Mismatches caused by known true variants in your samples should not lower the source quality estimate. If any read in your input has a base that is non-reference, but supports one of the known variants in the specified VCF datasets, that base will not be treated as a mismatch." /> </when> </conditional> </section> <section name="joint_qual" title="Joint quality" expanded="true"> <param name="min_jq" argument="--min-jq" type="integer" min="0" value="0" label="Minimum joinedQ" help="At any site, do not use reads for variant calling, if their calculated joint quality at that site is lower than this value (default: 0 = do not filter based on joint quality)" /> <param name="min_alt_jq" argument="--min-alt-jq" type="integer" min="0" value="0" label="Minimum joinedQ for alternate bases" help="At any site, do not use reads for variant calling, if they support a non-reference allele at that site and their calculated joint quality at the site is lower than this value (default: 0 = do not filter based on joint quality). Note: this setting has no effect if the specified value is smaller than the general Minimum joinedQ set above." /> <!-- def-alt-jq==-1, though documented, is currently not implemented --> <param name="def_alt_jq" argument="--def-alt-jq" type="integer" min="0" value="0" label="Overwrite joinedQs of alternate bases with this value" help="After filtering according to the Minimum joinedQ settings above, set the joint quality values for all reads surviving filtering and supporting a non-reference allele to this value (default: 0 = use the original calculated joint quality)." /> </section> </when> </conditional> <conditional name="filter_control"> <param name="filter_type" type="select" label="Variant filter parameters"> <option value="set_all_off">Strictly no filtering</option> <option value="set_no_default">Preset QUAL score-based filtering</option> <option value="set_lofreq_standard" selected="true">Preset filtering on QUAL score + coverage + strand bias (lofreq call default)</option> <option value="set_custom">Custom filter settings/combinations</option> </param> <when value="set_all_off"> <param name="sig" type="hidden" value="1" /> <param name="bonf" type="hidden" value="1" /> <param name="others" type="hidden" value="--no-default-filter" /> </when> <when value="set_no_default"> <param name="sig" type="hidden" value="0.01" /> <param name="bonf" type="hidden" value="dynamic" /> <param name="others" type="hidden" value="--no-default-filter" /> </when> <when value="set_lofreq_standard"> <param name="sig" type="hidden" value="0.01" /> <param name="bonf" type="hidden" value="dynamic" /> <param name="others" type="hidden" value="" /> </when> <when value="set_custom"> <param name="sig" type="float" value="0.01" min="0" max="1" label="Significance threshold for calls"/> <param name="bonf" type="integer" value="0" min="0" label="Bonferroni correction factor for multiple testing" help="Set to zero to determine dynamically from actual number of variant tests performed. Dynamic detection will calculate separate correction factors for SNVs and indels" /> <param name="others" type="boolean" truevalue="" falsevalue="--no-default-filter" checked="true" label="Apply default coverage and strand-bias filter?" help="" /> </when> </conditional> </inputs> <outputs> <data name="variants" from_work_dir="variants.vcf" format="vcf" /> </outputs> <tests> <test> <param name="reads" ftype="bam" value="lofreq-in1.bam" /> <param name="ref_selector" value="history" /> <param name="ref" ftype="fasta" value="pBR322.fa" /> <!-- lines_diff="4" should be sufficient, but https://github.com/CSB5/lofreq/issues/131 --> <output name="variants" file="call-out1.vcf" lines_diff="6" /> <assert_command> <has_text text="--sig 0.01"/> <has_text text="--bonf dynamic"/> <has_text text="--no-default-filter" negate="true"/> </assert_command> </test> <test> <param name="reads" ftype="bam" value="lofreq-in1.bam" /> <param name="ref_selector" value="history" /> <param name="ref" ftype="fasta" value="pBR322.fa" /> <conditional name="call_control"> <param name="set_call_options" value="yes" /> </conditional> <!-- lines_diff="4" should be sufficient, but https://github.com/CSB5/lofreq/issues/131 --> <output name="variants" file="call-out1.vcf" lines_diff="6" /> </test> <test> <param name="reads" ftype="bam" value="lofreq-in1.bam" /> <param name="ref_selector" value="history" /> <param name="ref" ftype="fasta" value="pBR322.fa" /> <conditional name="call_control"> <param name="set_call_options" value="yes" /> <section name="source_qual"> <conditional name="use_src_qual"> <param name="src_qual" value="--src-qual" /> <param name="def_nm_q" value="40" /> <param name="ign_vcf" ftype="vcf" value="call-out1.vcf" /> </conditional> </section> </conditional> <!-- lines_diff="4" should be sufficient, but https://github.com/CSB5/lofreq/issues/131 --> <output name="variants" file="call-out1.vcf" lines_diff="6" /> </test> <test> <param name="reads" ftype="bam" value="lofreq-in1.bam" /> <param name="ref_selector" value="history" /> <param name="ref" ftype="fasta" value="pBR322.fa" /> <conditional name="filter_control"> <param name="filter_type" value="set_all_off" /> </conditional> <output name="variants" file="call-out2.vcf" lines_diff="4" /> </test> <test> <param name="reads" ftype="bam" value="indelqual-out3.bam" /> <param name="ref_selector" value="history" /> <param name="ref" ftype="fasta" value="pBR322.fa" /> <param name="variant_types" value="--call-indels --only-indels" /> <conditional name="filter_control"> <param name="filter_type" value="set_all_off" /> </conditional> <output name="variants" file="indel-call-out.vcf" lines_diff="4" /> </test> </tests> <help><![CDATA[ lofreq call: call variants from BAM file LoFreq is a fast and sensitive variant-caller for inferring SNVs and indels from next-generation sequencing data. It makes full use of base-call qualities and other sources of errors inherent in sequencing, which are usually ignored by other methods or only used for filtering. LoFreq can run on almost any type of aligned sequencing data since no machine- or sequencing-technology dependent thresholds are used. It automatically adapts to changes in coverage and sequencing quality and can therefore be applied to a variety of data-sets e.g. viral/quasispecies, bacterial, metagenomics or somatic data. While the tool will often give reasonable results with default settings a variety of options let you control its exact behavior. These advanced options can be subdivided into those affecting variant calling and those affecting posterior filtering of the results. **Variant calling paramters** At the heart of LoFreq's variant caller is a **joint quality score** that is computed for every site in every read (that survives filtering) and that combines some or all of the following read and base quality measures: - Base/indel quality For any read, this is the Phred-scaled likelihood that the base mapped to a given site does not represent a sequencing error. For every base, this score got computed by the base caller of your sequencing platform and got incorporated into your input dataset during read alignment. For insertions/deletions this is defined, analogously, as the Phred-scaled likelihood that any inserted/deleted base is real, however, you are responsible for adding indel qualitites, which are required for indel calling with lofreq, to your input. For doing so, you can use ``lofreq indelqual``. - Base/indel alignment quality For any read, this is the Phred-scaled likelihood that the read's base or indel mapped to a given reference genome position is mapped to this position correctly. The tool can calculate these scores for you on the fly. Alternatively, you can precalculate them using ``lofreq alnqual``, which will incorporate them into your input dataset. - Mapping quality The Phred-scaled likelihood that the read got mapped to the correct place in the reference genome. This score got incorporated into your input dataset by the aligner you used to map your reads. - Source quality This is the Phred-scaled likelihood that the given read comes from the reference genome. The tool can calculate this score for you. **Variant filter parameters** After generating a list of called variants, the tool can filter this list based on: - the statistical significance of the variant calls - strand-bias of reads supporting the variant - coverage of the variant site While posterior filtering can help reduce false-positive variant calls, please note that the separate ``lofreq filter``, which can be run on the output of ``lofreq call`` has many more options for configuring filters. These are the different filter settings supported by the tool: *Preset filtering on QUAL score + coverage + strand bias* For variants to pass this filter, the following is required: - statistical signficance of the variant call with a pvalue < 0.01 based on the retransformed QUAL score of the variant and multiple-testing corrected using a dynamically determined Bonferroni factor (based on the number of overall variants considered during calling). - A strand-bias in supporting reads not significant under a FDR-corrected p value of 0.001 and 85% of supporting reads mapped to the same strand of the genome. - A coverage of the variant site of at least 10x. *Preset QUAL score-based filtering* Same QUAL-based significance filter as the default, but without the strand-bias and coverage criteria *Strictly no filtering* Do not apply any filters, but produce the original list of all called variants. You will almost always want to use ``lofreq filter`` to process the resulting output. *Custom filter settings/combinations* Lets you define your own QUAL-based significance filter and, optionally, combine it with the default starnd-bias and coverage filters. ]]></help> <expand macro="citations" /> </tool>