Mercurial > repos > bgruening > nanopolish_variants
view nanopolish_variants.xml @ 8:bec636361cfd draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/nanopolish commit de2370d1a385731b3c65f1dcc44e7b8558da8fd4
author | bgruening |
---|---|
date | Thu, 30 Nov 2023 17:57:59 +0000 |
parents | 63af3144371a |
children |
line wrap: on
line source
<tool id="nanopolish_variants" name="Nanopolish variants" version="@VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>- Find SNPs of basecalled merged Nanopore reads and polishes the consensus sequences</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements" /> <command detect_errors="exit_code"><![CDATA[ @PREPROCESS_INPUTS@ nanopolish variants -r reads.fasta -b reads.bam -g genome.fa -o variants.vcf #if $consensus: --consensus #end if $snps $verbose $homopolymer $faster $all_bases -m $min_candidate_frequency -d $min_candidate_depth -x $max_haplotypes --max-rounds $max_rounds --threads "\${GALAXY_SLOTS:-4}" #if str($min_flanking_sequence): --min-flanking-sequence $min_flanking_sequence #end if #if $ploidy != -1: --ploidy $ploidy #end if #if $w and str($w).strip(): -w "${w}" #end if #if $methylation_aware and str($methylation_aware).strip(): -q "${methylation_aware}" #end if #if $adv.input_events_bam: -e '$adv.input_events_bam' #end if #if $adv.input_genotype: --genotype '$adv.input_genotype' #end if #if $adv.input_candidates: -c '$adv.input_candidates' #end if #if $adv.input_alt_bc_bam: -a '$adv.input_alt_bc_bam' #end if #if $adv.input_models_fofn: --models-fofn '$input_models_fofn' #end if && nanopolish vcf2fasta --skip-checks -g genome.fa variants.vcf > polished.fa ]]></command> <inputs> <!-- index inputs --> <param type="data" name="input_merged" format="fasta,fastq" label="Basecalled merged reads.fa"/> <param type="data" name="input_reads_raw" format="fast5.tar.gz,fast5.tar.bz2,fast5.tar" label="Flat archive file of raw fast5 files"/> <!-- variants consensus inputs --> <param type="data" argument="-b" format="bam" label="Reads aligned to the reference genome" /> <conditional name="reference_source"> <param name="reference_source_selector" type="select" label="Load reference genome from"> <option value="cached">Local cache</option> <option value="history">History</option> </param> <when value="cached"> <param name="ref_file" type="select" label="Using reference genome" help="REFERENCE_SEQUENCE"> <options from_data_table="all_fasta"> </options> <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> </param> </when> <when value="history"> <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="REFERENCE_SEQUENCE; You can upload a FASTA sequence to the history and use it as reference" /> </when> </conditional> <section name="adv" title="Optional data inputs"> <!-- optional inputs --> <param type="data" name="input_seq_summary" format="txt" optional="true" label="Sequencing summary file from albacore" help="(-s)"/> <param type="data" name="input_events_bam" format="bam" optional="true" label="Events aligned to the reference genome" help="(-e)" /> <param type="data" name="input_genotype" format="vcf" optional="true" label="Call genotypes for the variants in the vcf file" help="(--genotype)" /> <param type="data" name="input_candidates" format="vcf" optional="true" label="Use variant candidates, rather than discovering them from aligned reads" help="(-c)" /> <param type="data" name="input_alt_bc_bam" format="bam" optional="true" label="Alternative basecaller used that does not output event annotations" help="(-a)" /> <param type="data" name="input_models_fofn" format="txt" optional="true" label="Read alternative k-mer models" help="(--models-fofn)" /> </section> <!-- optional params --> <param argument="-w" type="text" optional="true" label="find variants in window of region chromsome:start-end" /> <param name="methylation_aware" type="text" optional="true" label="methylation aware polishing and test motifs given" help="(-q)"/> <param name="min_candidate_frequency" type="float" optional="true" value="0.2" label="Extarct if the variant frequency is at least F" help="(-m)"/> <param name="min_candidate_depth" type="integer" optional="true" value="20" label="Extarct if the depth is at least D" help="(-d)"/> <param name="max_haplotypes" type="integer" optional="true" value="1000" label="Consider at most N haplotype combinations" help="(-x)"/> <param name="max_rounds" type="integer" optional="true" value="50" label="Perform N rounds of consensus sequence improvement" help="(--max_rounds)"/> <param name="ploidy" type="integer" optional="true" value="-1" label="The ploidy level of the sequenced genome. -1:disabled" help="(-p)"/> <param name="min_flanking_sequence" type="integer" optional="true" value="" label="Distance from alignment end to calculate variants" help="(--min-flanking-sequence)"/> <!-- optional flags --> <param argument="--consensus" type="boolean" truevalue="--consensus" falsevalue="" checked="true" label="Consensus calling mode and output polished sequence" /> <param argument="--snps" type="boolean" truevalue="--snps" falsevalue="" checked="false" label="only call SNPs"/> <param argument="--verbose" type="boolean" truevalue="--verbose" falsevalue="" checked="false" label="verbose output"/> <param name="homopolymer" type="boolean" argument="--fix-homopolymers" truevalue="--fix-homopolymers" falsevalue="" checked="false" label="homopolymer caller" /> <param argument="--faster" type="boolean" truevalue="--faster" falsevalue="" checked="false" label="speedup while slightly reducing consensus accuracy"/> <param name="all_bases" type="boolean" argument="--calculate-all-support" truevalue="--calculate-all-support" falsevalue="" checked="false" label="calculate the support of the 3 other possible bases" /> </inputs> <outputs> <!-- variants consensus outputs --> <data name="output_polished" format="fasta" from_work_dir="polished.fa" label="polished sequence by consensus calling mode" /> <data name="output_variants" format="vcf" from_work_dir="variants.vcf" label="Computed variants"/> </outputs> <tests> <test> <param name="input_merged" ftype="fasta" value="reads.fasta" /> <param name="input_reads_raw" ftype="fast5.tar.gz" value="fast5_files.tar.gz" /> <param name="b" value="reads.sorted.bam" /> <param name="reference_source_selector" value="history" /> <param name="ref_file" value="draft_single_seq.fa" /> <param name="w" value="tig00000001:200000-202000" /> <output name="output_polished" file="polished.fa" /> <output name="output_variants"> <assert_contents> <has_text text="TotalReads" /> <has_text text="AlleleCount" /> <has_text text="SupportFraction" /> <has_text text="200061" /> <has_text text="200776" /> <has_text text="201588" /> <has_text text="tig00000001" /> <has_n_lines n="27" /> </assert_contents> </output> </test> <test> <param name="input_merged" ftype="fasta" value="reads.fasta" /> <param name="input_reads_raw" ftype="fast5.tar.bz2" value="fast5_files.tar.bz2" /> <param name="b" value="reads.sorted.bam" /> <param name="reference_source_selector" value="history" /> <param name="ref_file" value="draft_single_seq.fa" /> <param name="w" value="tig00000001:200000-202000" /> <output name="output_polished" file="t3_polished.fa" /> <output name="output_variants"> <assert_contents> <has_text text="TotalReads" /> <has_text text="AlleleCount" /> <has_text text="SupportFraction" /> <has_text text="200061" /> <has_text text="200776" /> <has_text text="201588" /> <has_text text="tig00000001" /> <has_n_lines n="27" /> </assert_contents> </output> </test> <test> <param name="input_merged" ftype="fasta" value="reads.fasta" /> <param name="input_reads_raw" ftype="fast5.tar" value="fast5_files.tar" /> <param name="b" value="reads.sorted.bam" /> <param name="reference_source_selector" value="history" /> <param name="ref_file" value="draft_single_seq.fa" /> <param name="w" value="tig00000001:200000-202000" /> <output name="output_polished" file="t4_polished.fa" /> <output name="output_variants"> <assert_contents> <has_text text="TotalReads" /> <has_text text="AlleleCount" /> <has_text text="SupportFraction" /> <has_text text="200061" /> <has_text text="200776" /> <has_text text="201588" /> <has_text text="tig00000001" /> <has_n_lines n="27" /> </assert_contents> </output> </test> <test> <param name="input_merged" ftype="fasta" value="reads.fasta" /> <param name="input_reads_raw" ftype="fast5.tar.gz" value="fast5_files.tar.gz" /> <param name="b" value="reads.sorted.bam" /> <param name="reference_source_selector" value="history" /> <param name="ref_file" value="draft_single_seq.fa" /> <param name="w" value="tig00000001:198000-202000" /> <param name="ploidy" value="2" /> <param name="snps" value="true" /> <param name="faster" value="true" /> <param name="all_bases" value="true" /> <param name="consensus" value="false" /> <param name="min_flanking_sequence" value="10" /> <output name="output_polished" file="t2-polished.fa" /> <output name="output_variants"> <assert_contents> <has_text text="TotalReads" /> <has_text text="AlleleCount" /> <has_text text="SupportFraction" /> <has_text text="tig00000001" /> <has_text text="198000-202000" /> <has_n_lines n="15" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ Build an index mapping from basecalled reads to the signals measured by the sequencer and find SNPs using a signal-level HMM. Tutorial and manual available at: http://nanopolish.readthedocs.io/en/latest/quickstart_consensus.html ]]></help> <expand macro="citations" /> </tool>