view gubbins.xml @ 0:637ec5d5368c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gubbins commit 98be6edc2544513347149b50e0cfb530811f890a
author iuc
date Fri, 23 Jun 2017 08:29:43 -0400
parents
children 96e6283e4745
line wrap: on
line source

<tool id="gubbins" name="Gubbins" version="0.1.0">
    <description>Recombination detection in Bacteria</description>
    <requirements>
        <requirement type="package" version="2.2.1">gubbins</requirement>
    </requirements>
    
    <version_command>run_gubbins.py --version</version_command>
    
    <command detect_errors="exit_code"><![CDATA[
        
        ln -s '$alignment_file' foo.aln &&
      
        run_gubbins.py
        
        --threads \${GALAXY_SLOTS:-1}
        
        #if $adv.iters
            -i '$adv.iters'
        #end if
        
        #if $adv.conv_meth
            -z '$adv.conv_meth'
        #end if
        
        #if $adv.outgroup
            -o '$adv.outgroup'
        #end if
        
        #if $really_adv.tree_builder
            -t '$really_adv.tree_builder'
        #end if
        
        #if $really_adv.min_snps
            -m '$really_adv.min_snps'
        #end if
        
        #if $really_adv.filter_percentage
            -f '$really_adv.filter_percentage'
        #end if
        
        #if $really_adv.min_window_size
            -a '$really_adv.min_window_size'
        #end if
        
        #if $really_adv.max_window_size
            -b '$really_adv.max_window_size'
        #end if
        
        $really_adv.remove_duplicates
        
        foo.aln
        
    ]]></command>
    <inputs>
        <!-- Just the data set.. -->
        <param type="data" name="alignment_file" format="fasta" label="Whole genome alignment file" help="Whole genome alignment file in fasta format"/>
      
        <!-- output file picker -->
        <param type="select" name="outfiles" multiple="true" display="checkboxes" label="Select the required output files" help="Default selections are the Final Tree in Newick format, the Recombination Predictions in gff3 format and the Summary of SNP Distribution">
            <option value="ftree" selected="true">Final Tree in newick format</option>
            <option value="gff" selected="true">Recombination Predictions in gff3 format</option>
            <option value="vcf" selected="true">Summary of SNP Distribution in vcf format</option>
            <option value="recomb_embl">Recombination Predictions in embl format</option>
            <option value="fpoly">Filtered Polymorphic Sites in fasta format</option>
            <option value="ppoly">Filtered Polymorphic Sites in phylip format</option>
            <option value="stats">Per Branch Statistics in csv format</option>
            <option value="baseb">Base Branch Reconstruction in embl format</option>
        </param>
      
        <!-- A semi advanced section, really shouldn't have to change anything. -->
        <section name="adv" title="Advanced options" expanded="True">
            <param name="iters" type="integer" label="Iterations" value="5" help="Maximum No. of iterations, default is 5" />
            <param name="conv_meth" type="select" label="Model Conversion Options" help="Criteria to use to know when to halt iterations">
                <option value="weighted_robinson_foulds" selected="True">Weighted Robinson Foulds</option>
                <option value="robinson_foulds">Robinson Foulds</option>
                <option value="recombination">Recombination</option>
            </param>
            <param name="outgroup" type="text" label="Outgroup" value="" help="Outgroup name for rerooting. A list of comma separated names can be used if they form a clade."/>
        </section>
      
        <!-- a really advanced section, play with this at your peril! -->
        <section name="really_adv" title="Really advanced options - change these if you really know what you are doing." expanded="false">
            <param name="tree_builder" type="select" label="Tree builder" help="Application to use for tree building, default RAxML">
                <option value="raxml" selected="true">RAxML</option>
                <option value="fasttree">FastTree</option>
                <option value="hybrid">Hybrid</option>
            </param>
            <param name="min_snps" type="integer" label="Minimum SNPS" value="3" help="Min SNPs to identify a recombination block, default is 3" />
            <param name="filter_percentage" type="integer" label="Filter Percentage" value="25" help="Filter out taxa with more than this percentage of gaps, default is 25" />
            <param name="min_window_size" type="integer" label="Minimum Window Size" value="100" help="Minimum window size, default 100" />
            <param name="max_window_size" type="integer" label="Maximum Window Size" value="10000" help="Maximum window size, default 10000" />
            <param name="remove_duplicates" type="boolean" label="Remove identical sequences" falsevalue="" truevalue="-d" />
        </section>
      
    </inputs>
    
    <outputs>
        <data format="txt" name="final_tree" label="${tool.name} on ${on_string} Final Tree" from_work_dir="foo.final_tree.tre" >
            <filter>outfiles and 'ftree' in outfiles</filter>
        </data>
        <data format="gff3" name="recomb_pred_gff" label="${tool.name} on ${on_string} Recombinations Prediction gff" from_work_dir="foo.recombination_predictions.gff" >
            <filter>outfiles and 'gff' in outfiles</filter>
        </data>
        <data format="embl" name="recomb_pred_embl" label="${tool.name} on ${on_string} Recombinations Prediction embl" from_work_dir="foo.recombination_predictions.embl" >
            <filter>outfiles and 'recomb_embl' in outfiles</filter>
        </data>
        <data format="fasta" name="filt_polymorph_fna" label="${tool.name} on ${on_string} Filtered Polymorphic Sites fasta" from_work_dir="foo.filtered_polymorphic_sites.fasta" >
            <filter>outfiles and 'fpoly' in outfiles</filter>
        </data>
        <data format="phylip" name="filt_polymorph_phy" label="${tool.name} on ${on_string} Filtered Polymorphic Sites phylip" from_work_dir="foo.filtered_polymorphic_sites.phylip" >
            <filter>outfiles and 'ppoly' in outfiles</filter>
        </data>
        <data format="csv" name="per_b_stat_csv" label="${tool.name} on ${on_string} Per Branch Statistics csv" from_work_dir="foo.per_branch_statistics.csv" >
            <filter>outfiles and 'stats' in outfiles</filter>
        </data>
        <data format="vcf" name="sum_snp_vcf" label="${tool.name} on ${on_string} Summary of SNP Distribution vcf" from_work_dir="foo.summary_of_snp_distribution.vcf" >
            <filter>outfiles and 'vcf' in outfiles</filter>
        </data>
        <data format="embl" name="base_branch_embl" label="${tool.name} on ${on_string} Branch Base Reconstruction embl" from_work_dir="foo.branch_base_reconstruction.embl" >
            <filter>outfiles and 'baseb' in outfiles</filter>
        </data>
    </outputs>
    
    
    <tests>
        <test>
            <param name="alignment_file" value="multiple_recombinations.aln" ftype="fasta" />
            <output name="recomb_pred_gff">
                <assert_contents>
                    <has_text text="##gff-version 3" />
                </assert_contents>
            </output>
            <output name="sum_snp_vcf">
                <assert_contents>
                    <has_text text="##fileformat=VCFv4.2" />
                </assert_contents>
            </output>
        </test>
      
        <test>
            <param name="alignment_file" value="multiple_recombinations.aln" ftype="fasta" />
            <param name="iters" value="3"/>
            <output name="recomb_pred_gff">
                <assert_contents>
                    <has_text text="##gff-version 3" />
                </assert_contents>
            </output>
            <output name="sum_snp_vcf">
                <assert_contents>
                    <has_text text="##fileformat=VCFv4.2" />
                </assert_contents>
            </output>
        </test>
      
        <test>
            <param name="alignment_file" value="multiple_recombinations.aln" ftype="fasta" />
            <param name="conv_meth" value="recombination" />
            <output name="recomb_pred_gff">
                <assert_contents>
                    <has_text text="##gff-version 3" />
                </assert_contents>
            </output>
            <output name="sum_snp_vcf">
                <assert_contents>
                    <has_text text="##fileformat=VCFv4.2" />
                </assert_contents>
            </output>
        </test>
      
        <test>
            <param name="alignment_file" value="multiple_recombinations.aln" ftype="fasta" />
            <param name="conv_meth" value="recombination" />
            <param name="outfiles" value="gff,vcf,ftree,recomb_embl,fpoly,ppoly,stats,baseb"/>
            <output name="recomb_pred_gff">
                <assert_contents>
                    <has_text text="##gff-version 3" />
                </assert_contents>
            </output>
            <output name="sum_snp_vcf">
                <assert_contents>
                    <has_text text="##fileformat=VCFv4.2" />
                </assert_contents>
            </output>
            <output name="filt_polymorph_fna">
                <assert_contents>
                    <has_text text=">sequence_1" />
                </assert_contents>
            </output>
            <output name="recomb_pred_embl">
                <assert_contents>
                    <has_text text="FT" />
                </assert_contents>
            </output>
            <output name="filt_polymorph_phy">
                <assert_contents>
                    <has_text text="sequence_1" />
                </assert_contents>
            </output>
            <output name="per_b_stat_csv">
                <assert_contents>
                    <has_text text="Node" />
                </assert_contents>
            </output>
            <output name="base_branch_embl">
                <assert_contents>
                    <has_text text="FT" />
                </assert_contents>
            </output>
            <output name="final_tree">
                <assert_contents>
                    <has_text text="((sequence_" />
                </assert_contents>
            </output>
        </test>
    </tests>
    
    
    <help><![CDATA[
**Gubbins**

Since the introduction of high-throughput, second-generation DNA sequencing technologies, there has been an enormous increase in the size of datasets being used for estimating bacterial population phylodynamics. Although many phylogenetic techniques are scalable to hundreds of bacterial genomes, methods which have been used for mitigating the effect of mechanisms of horizontal sequence transfer on phylogenetic reconstructions cannot cope with these new datasets. Gubbins (Genealogies Unbiased By recomBinations In Nucleotide Sequences) is an algorithm that iteratively identifies loci containing elevated densities of base substitutions while concurrently constructing a phylogeny based on the putative point mutations outside of these regions. Simulations demonstrate the algorithm generates highly accurate reconstructions under realistic models of short-term bacterial evolution, and can be run in only a few hours on alignments of hundreds of bacterial genome sequences.

**Running Gubbins**

To run Gubbins with default settings:

Supply a "fasta" genome alignment file and press execute.

**Other options** 

**Advanced**

*Iterations*, (-i)	

The maximum number of iterations to perform; the algorithm will stop earlier than this if it converges on the same tree in two successive iterations. Default is 5.

*Converge_method* (-z)

Criteria to use to know when to halt iterations [weighted_robinson_foulds|robinson_foulds|recombination]. Default is weighted_robinson_foulds.

*Outgroup*, (-o)	

The name of a sequence in the alignment on which to root the tree

**Really Advanced**

These options are here for completeness and you shouldn't need to change them from their defaults. You really need to know what you are doing before you use these.

*Tree builder*, (-t)

The algorithm to use in the construction of phylogenies in the analysis; can be ‘raxml’, to use RAxML, ‘fasttree’, to use Fasttree, or ‘hybrid’, to use Fasttree for the first iteration and RAxML in all subsequent iterations. Default is raxml

Filter Percentage, (-f)	

Filter out taxa with more than this percentage of missing data. Default is 25%

*Minimum snps*, (-m)	

The minimum number of base substitutions required to identify a recombination. Default is 3.


And others...



**Output files**

* Recombination predictions in EMBL tab file format.


* Recombination predictions in GFF3 format


* Base substitution reconstruction in EMBL tab format.


* VCF file summarising the distribution of SNPs


* Per branch reporting of the base substitutions inside and outside recombinations events.


* FASTA format alignment of filtered polymorphic sites used to generate the phylogeny in the final iteration.


* Phylip format alignment of filtered polymorphic sites used to generate the phylogeny in the final iteration.


* Final phylogenetic tree in newick format.

    ]]></help>
    
    
    <citations>
        <citation type="doi">doi:10.1093/nar/gku1196</citation>
    </citations>

</tool>