view antismash.xml @ 0:5db064bbb3be draft

Imported from capsule None
author bgruening
date Tue, 15 Jul 2014 14:34:55 -0400
parents
children 593bb8f5488b
line wrap: on
line source

<tool id="antismash" name="Secondary Metabolites" version="2.0.2.2">
    <description>and Antibiotics Analysis (antiSMASH)</description>
    <requirements>
        <requirement type="package" version="3.0">hmmer</requirement>
        <requirement type="package">hmmer</requirement>
        <requirement type="package" version="2.2.28">blast+</requirement>
        <requirement type="package">blast+</requirement>
        <requirement type="package" version="3.8.31">muscle</requirement>
        <requirement type="package">muscle</requirement>
        <requirement type="package" version="1.4.0-post-1">straight.plugin</requirement>
        <requirement type="package">straight.plugin</requirement>
        <requirement type="package" version="1.62">biopython</requirement>
        <requirement type="package">biopython</requirement>
        <requirement type="package" version="1.2.6">pyquery</requirement>
        <requirement type="package">pyquery</requirement>
        <requirement type="package" version="0.1.2">helperlibs</requirement>
        <requirement type="package">helperlibs</requirement>
        <requirement type="package" version="0.9">cssselect</requirement>
        <requirement type="package">cssselect</requirement>
        <requirement type="package" version="2.0.2">antismash</requirement>
        <requirement type="package">antismash</requirement>
        <requirement type="package">glimmer</requirement>
    </requirements>
    <command>
        #import os, glob
        #set $outputfolder = $html.files_path
        #if str($infile.ext) == 'genbank':
            #set $file_extension = 'gb'
        #else:
            ## TODO add embl as input file
            #set $file_extension = 'gb'
        #end if

        ln -s $infile #echo 'input_tempfile.' + $file_extension#;
        mkdir -p $outputfolder;
        run_antismash.py 
            --cpus "\${GALAXY_SLOTS:-12}"
            --enable $types
            --input-type 'nucl'
            $smcogs
            $clusterblast
            $subclusterblast
            $inclusive
            $full_hmmer
            $full_blast
            $eukaryotic


            #if str($pfam_database) != "None":
                --pfamdir $pfam_database.fields.path
            #end if

            ##--debug

            --disable-embl
            --outputfolder $outputfolder

            #echo 'input_tempfile.' + $file_extension#

            ## leave out the start and end features, it can be easily replaced with Galaxy tools
            ##--from START          Start analysis at nucleotide specified
            ##--to END

        2>&#38;1

        ##
        ## shuffling files to create the correct outputs for Galaxy
        ##

        ## html output
        ;
        cp #echo os.path.join($outputfolder, 'index.html')# $html 2> /dev/null
        
        ## gene clusters
        #if 'geneclusterprots_tabular' in str($outputs).split(','):
            ;
            cp #echo os.path.join($outputfolder, 'geneclusters.txt')# $geneclusterprots_tabular 2> /dev/null
        #end if

        #if 'geneclusterprots_fasta' in str($outputs).split(','):
            ;
            cp #echo os.path.join($outputfolder, '*_genecluster_proteins.fa')# $geneclusterprots_fasta 2> /dev/null
        #end if


        ##SVG images
        #if 'archive_svgs' in str($outputs).split(','):
            ;
            cd #echo os.path.join($outputfolder, 'svg')#
            #if $clusterblast:
                ;
                tar cfz $archive_svgs *_all.svg genecluster* 2> /dev/null
            #else:
                ;
                tar cfz $archive_svgs genecluster*
            #end if
        #end if

        ##all files in a archive
        #if 'archive' in str($outputs).split(','):
            ;
            cd $outputfolder;
            tar cf $archive *.zip 2> /dev/null
        #end if

        ## genbank
        #if 'gb' in str($outputs).split(','):
            ;
            cat #echo os.path.join($outputfolder, '*.gbk')# > $genbank 2> /dev/null
        #end if

    </command>
    <inputs>
        <param name="infile" type="data" format="genbank" label="Nucleotide sequence file in GenBank format"/>

        <param name="eukaryotic" type="select" label="Origin of DNA">
            <option value="" selected="True">Prokaryotic</option>
            <option value="--eukaryotic">Eukaryotic</option>
        </param>

        <param name="clusterblast" type="boolean" label="BLAST identified clusters against known clusters"
            help="(--clusterblast)"
            truevalue="--clusterblast" falsevalue="" checked="True" />
        <param name="subclusterblast" type="boolean" label="Subcluster BLAST analysis"
            help="(--subclusterblast)"
            truevalue="--subclusterblast" falsevalue="" checked="false" />
        <param name="smcogs" type="boolean" label="Analysis of secondary metabolism gene families (smCOGs)"
            falsevalue="" truevalue="--smcogs" checked="True" />

        <param name="full_blast" type="boolean" label="Run a whole-genome BLAST analysis"
            help="(--full-blast)"
            truevalue="--full-blast" falsevalue="" checked="False" />
        <param name="full_hmmer" type="boolean" label="Run a whole-genome Pfam analysis"
            help="(--full-hmmer)"
            truevalue="--full-hmmer" falsevalue="" checked="false" />

        <param name="inclusive" type="boolean" label="Use Cimermancic et al. algorithm for cluster detection"
            help="(--inclusive)"
            truevalue="--inclusive" falsevalue="" checked="false" />

        <param name="pfam_database" type="select" optional="true" label="Pfam database" help="Pfam Covariance models">
            <options from_file="antismash.loc">
              <column name="value" index="0"/>
              <column name="name" index="1"/>
              <column name="path" index="2"/>
            </options>
        </param>

        <param name="types" type="select" display="checkboxes" multiple="true" label="Gene cluster types to search">
            <option value="t1pks" selected="True">type I polyketide synthases</option>
            <option value="t2pks" selected="True">type II polyketide synthases</option>
            <option value="t3pks" selected="True">type III polyketide synthases</option>
            <option value="t4pks" selected="True">type IV polyketide synthases</option>
            <option value="transatpks" selected="True">trans-AT PKS</option>
            <option value="nrps" selected="True">nonribosomal peptide synthetases</option>
            <option value="terpene" selected="True">terpene synthases</option>
            <option value="lantipeptide" selected="True">lantipeptides</option>
            <option value="bacteriocin" selected="True">bacteriocins</option>
            <option value="blactam" selected="True">beta-lactams</option>
            <option value="amglyccycl" selected="True">aminoglycosides / aminocyclitols</option>
            <option value="aminocoumarin" selected="True">aminocoumarins</option>
            <option value="siderophore" selected="True">siderophores</option>
            <option value="ectoine" selected="True">ectoines</option>
            <option value="butyrolactone" selected="True">butyrolactones</option>
            <option value="indole" selected="True">indoles</option>
            <option value="nucleoside" selected="True">nucleosides</option>
            <option value="phosphoglycolipid" selected="True">phosphoglycolipids</option>
            <option value="oligosaccharide" selected="True">oligosaccharides</option>
            <option value="furan" selected="True">furans</option>
            <option value="hserlactone" selected="True">hserlactones</option>
            <option value="thiopeptide" selected="True">thiopeptides</option>
            <option value="phenazine" selected="True">phenazines</option>
            <option value="phosphonate" selected="True">phosphonates</option>
            <option value="other" selected="True">others</option>
        </param>

        <param name="outputs" type="select" multiple="true" label="Additional outputs">
            <option value="geneclusterprots_fasta" selected="True">Gene cluster proteins (FASTA)</option>
            <option value="geneclusterprots_tabular">Gene cluster proteins (Tabular)</option>
            <option value="archive_svgs">All clusters as image (compressed)</option>
            <option value="archive">All files compressed</option>
            <option value="gb">Annotated genome (GenBank)</option>
        </param>

    </inputs>
    <outputs>
        <data format="fasta" name="geneclusterprots_fasta" label="${tool.name} on ${on_string} (Gen Cluster Proteins)">
          <filter>'geneclusterprots_fasta' in outputs</filter>
        </data>
        <data format="tabular" name="geneclusterprots_tabular" label="${tool.name} on ${on_string} (Gen Cluster Proteins)">
          <filter>'geneclusterprots_tabular' in outputs</filter>
        </data>
        <data format="tar" name="archive" label="${tool.name} on ${on_string} (all files compressed)">
          <filter>'archive' in outputs</filter>
        </data>
        <data format="tar.gz" name="archive_svgs" label="${tool.name} on ${on_string} (SVG images)">
          <filter>'archive_svgs' in outputs</filter>
        </data>
        <data format="html" name="html" label="${tool.name} on ${on_string} (html report)">
          <!-- html is default output at any time.
          <filter>'html' in outputs</filter>
          -->
        </data>
        <data name="genbank" format="genbank" label="${tool.name} on ${on_string} (genbank)">
          <filter>'gb' in outputs</filter>
        </data>
    </outputs>
  <help>
    
.. class:: infomark

**What it does**

antiSMASH allows the rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genomes. 
It integrates and cross-links with a large number of in silico secondary metabolite analysis tools that have been published earlier.


**Input**

The ideal input for antiSMASH is an annotated nucleotide file in Genbank format. If no annotation is available, 
we recommend running your sequence through an annotation pipeline like RAST are the one included in Galaxy.


There are several optional analyses that may or may not be run on your sequence.
Highly recommended is the Gene Cluster Blast Comparative Analysis, which runs BlastP using each amino acid sequence from a detected gene cluster as a 
query on a large database of predicted protein sequences from secondary metabolite biosynthetic gene clusters, and pools the results to identify 
the gene clusters that are most homologous to the gene cluster that was detected in your query nucleotide sequence.


Also available is the analysis of secondary metabolism gene families (smCOGs). 
This analysis attempts to allocate each gene in the detected gene clusters to a secondary metabolism-specific gene 
family using profile hidden Markov models specific for the conserved sequence region characteristic of this family. 
Additionally, a phylogenetic tree is constructed of each gene together with the (max. 100) sequences of the smCOG seed alignment.


For the most thorough genome analysis, we provide genome-wide PFAM HMM analysis of all genes in the genome through modules of the CLUSEAN pipeline. 
Of course, some regions important to secondary metabolism may have been missed in the gene cluster identification stage 
(e.g. because they represent the biosynthetic pathway of a yet unknown secondary metabolite). 
Therefore, when genome-wide PFAM HMM analysis is selected, the PFAM frequencies are also used to find all genome regions in which PFAM domains typical for secondary metabolism are overrepresented.


**References**

Marnix H. Medema, Kai Blin, Peter Cimermancic, Victor de Jager, Piotr Zakrzewski, Michael A. Fischbach, Tilmann Weber, 
Rainer Breitling and Eriko Takano (2011). antiSMASH: Rapid identification, annotation and analysis of secondary metabolite biosynthesis gene clusters. Nucleic Acids Research, doi: 10.1093/nar/gkr466.

http://antismash.secondarymetabolites.org/help.html


Bjoern A. Gruening: https://github.com/bgruening/galaxytools/tree/master/antismash

  </help>
</tool>