diff nanopolish_variants.xml @ 0:639b3a5bb415 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/nanopolish commit 0e94011a4ea84bf4ae5c2079680a37540e022625
author bgruening
date Wed, 30 May 2018 11:56:35 -0400
parents
children 1ebed8b65752
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nanopolish_variants.xml	Wed May 30 11:56:35 2018 -0400
@@ -0,0 +1,163 @@
+<tool id="nanopolish_variants" name="Nanopolish variants" version="0.1.0">
+    <description>- Find SNPs of basecalled merged Nanopore reads and polishes the consensus sequences</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+        ln -s '$input_merged' reads.fasta && 
+        
+        #if $input_reads_raw.extension == 'fast5':
+            mkdir fast5_files && ln -s '$input_reads_raw' fast5_files/read1.fast5 &&
+        #else
+            ln -s '$input_reads_raw' fast5_files.tar.gz &&
+            mkdir fast5_files && tar -xzf fast5_files.tar.gz -C fast5_files &&
+        #end if
+
+        nanopolish index -d fast5_files/ reads.fasta &&
+        ln -s '$b' reads.bam &&
+        ln -s '${b.metadata.bam_index}' reads.bam.bai &&
+        ln -s '$g' genome.fa &&
+        
+        nanopolish variants
+        -r reads.fasta
+        -b reads.bam
+        -g genome.fa
+        -o variants.vcf
+        #if $consensus:
+            --consensus polished.fa
+        #end if
+
+        $snps
+        $verbose
+        $homopolymer
+        $faster
+        $all_bases
+
+        -m $min_candidate_frequency
+        -d $min_candidate_depth
+        -x $max_haplotypes
+        --max-rounds $max_rounds
+        #if $ploidy != -1:
+            --ploidy $ploidy
+        #end if
+
+        #if $w and str($w).strip():
+          -w "${w}"
+        #end if  
+        #if $methylation_aware and str($methylation_aware).strip():
+          -q "${methylation_aware}"
+        #end if           
+        
+        #if $adv.input_events_bam:
+          -e '$adv.input_events_bam'
+        #end if
+        #if $adv.input_genotype:
+          --genotype '$adv.input_genotype'
+        #end if
+        #if $adv.input_candidates:
+          -c '$adv.input_candidates'
+        #end if
+        #if $adv.input_alt_bc_bam:
+          -a '$adv.input_alt_bc_bam'
+        #end if
+        #if $adv.input_models_fofn:
+          --models-fofn '$input_models_fofn'
+        #end if
+       
+
+
+    ]]></command>
+    <inputs>
+      <!-- index inputs -->
+        <param type="data" name="input_merged" format="fasta,fastq" label="Basecalled merged reads.fa"/>
+        <param type="data" name="input_reads_raw" format="h5,fast5.tar.gz" label="Flat archive file of raw fast5 files"/>
+
+        <!-- variants consensus inputs -->
+        <param type="data" argument="-b" format="bam" label="Reads aligned to the reference genome" />
+        <param type="data" argument="-g" format="fasta" label="The reference genome"/>
+
+        <section name="adv" title="Optional data inputs">
+            <!-- optional inputs -->
+            <param type="data" name="input_seq_summary" format="txt" optional="true" label="Sequencing summary file from albacore" help="(-s)"/>       
+            <param type="data" name="input_events_bam" format="bam" optional="true" label="Events aligned to the reference genome" help="(-e)" />
+            <param type="data" name="input_genotype" format="vcf" optional="true" label="Call genotypes for the variants in the vcf file" help="(--genotype)" />
+            <param type="data" name="input_candidates" format="vcf" optional="true" 
+                    label="Use variant candidates, rather than discovering them from aligned reads" help="(-c)" />
+            <param type="data" name="input_alt_bc_bam" format="bam" optional="true" label="Alternative basecaller used that does not output event annotations" help="(-a)" />
+            <param type="data" name="input_models_fofn" format="txt" optional="true" label="Read alternative k-mer models" help="(--models-fofn)" />
+       </section>
+
+        <!-- optional params -->
+        <!-- optional params -->
+        <param argument="-w" type="text" optional="true"
+            label="find variants in window of region chromsome:start-end" />
+        <param name="methylation_aware" type="text" optional="true" label="methylation aware polishing and test motifs given" help="(-q)"/>
+        <param name="min_candidate_frequency" type="float" optional="true" value="0.2" label="Extarct if the variant frequency is at least F" help="(-m)"/>
+        <param name="min_candidate_depth" type="integer" optional="true" value="20" label="Extarct if the depth is at least D" help="(-d)"/>
+        <param name="max_haplotypes" type="integer" optional="true" value="1000" label="Consider at most N haplotype combinations" help="(-x)"/>
+        <param name="max_rounds" type="integer" optional="true" value="50" label="Perform N rounds of consensus sequence improvement" help="(--max_rounds)"/>
+        <param name="ploidy" type="integer" optional="true" value="-1" label="The ploidy level of the sequenced genome. -1:disabled" help="(-p)"/>
+
+        <!-- optional flags -->
+        <param argument="--consensus" type="boolean" truevalue="--consensus" falsevalue="" checked="true" label="Consensus calling mode and output polished sequence" />
+        <param argument="--snps" type="boolean" truevalue="--snps" falsevalue="" checked="false" label="only call SNPs"/>
+        <param argument="--verbose" type="boolean" truevalue="--verbose" falsevalue="" checked="false" label="verbose output"/>
+        <param name="homopolymer" type="boolean" argument="--fix-homopolymers" truevalue="--fix-homopolymers" falsevalue="" checked="false" label="homopolymer caller" />
+        <param argument="--faster" type="boolean" truevalue="--faster" falsevalue="" checked="false" 
+                label="speedup while slightly reducing consensus accuracy"/>
+        <param name="all_bases" type="boolean" argument="--calculate-all-support" truevalue="--calculate-all-support" falsevalue="" checked="false" 
+                label="calculate the support of the 3 other possible bases" />
+    </inputs>
+
+    <outputs>
+      <!-- variants consensus outputs -->
+        <data name="output_polished" format="fasta" from_work_dir="polished.fa" label="polished sequence by consensus calling mode" />
+        <data name="output_variants" format="vcf" from_work_dir="variants.vcf" label="Computed variants"/>
+    </outputs>
+    <tests>
+        <test>
+      <!-- index test -->
+            <param name="input_merged" ftype="fasta" value="reads.fasta" />
+            <param name="input_reads_raw" ftype="fast5.tar.gz" value="fast5_files.tar.gz" />
+            
+      <!-- variants consensus test -->
+            <param name="b" value="reads.sorted.bam" />
+            <param name="g" value="draft.fa" />
+            <param name="w" value="tig00000001:200000-202000" />
+            
+            <output name="output_polished" file="polished.fa" />
+            <output name="output_variants" file="variants.vcf"/>
+        </test>
+        <test>
+      <!-- index test -->
+            <param name="input_merged" ftype="fasta" value="reads.fasta" />
+            <param name="input_reads_raw" ftype="fast5.tar.gz" value="fast5_files.tar.gz" />
+            
+      <!-- variants consensus test -->
+            <param name="b" value="reads.sorted.bam" />
+            <param name="g" value="draft.fa" />
+            <param name="w" value="tig00000001:200000-202000" />
+            <param name="ploidy" value="2" />
+            <param name="snps" value="true" />            
+            <param name="faster" value="true" />            
+            <param name="all_bases" value="true" /> 
+            <param name="consensus" value="false" /> 
+            <output name="output_polished" file="t2-polished.fa" />
+            <output name="output_variants" file="t2-variants.vcf"/>
+        </test>
+
+    </tests>
+    <help><![CDATA[
+
+        Build an index mapping from basecalled reads to the signals measured by the sequencer
+        and
+        Find SNPs using a signal-level HMM
+
+        Tutorial and manual available at:
+        http://nanopolish.readthedocs.io/en/latest/quickstart_consensus.html
+
+
+           ]]></help>
+    <expand macro="citations" />
+</tool>