diff antismash.xml @ 0:5db064bbb3be draft

Imported from capsule None
author bgruening
date Tue, 15 Jul 2014 14:34:55 -0400
parents
children 593bb8f5488b
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/antismash.xml	Tue Jul 15 14:34:55 2014 -0400
@@ -0,0 +1,253 @@
+<tool id="antismash" name="Secondary Metabolites" version="2.0.2.2">
+    <description>and Antibiotics Analysis (antiSMASH)</description>
+    <requirements>
+        <requirement type="package" version="3.0">hmmer</requirement>
+        <requirement type="package">hmmer</requirement>
+        <requirement type="package" version="2.2.28">blast+</requirement>
+        <requirement type="package">blast+</requirement>
+        <requirement type="package" version="3.8.31">muscle</requirement>
+        <requirement type="package">muscle</requirement>
+        <requirement type="package" version="1.4.0-post-1">straight.plugin</requirement>
+        <requirement type="package">straight.plugin</requirement>
+        <requirement type="package" version="1.62">biopython</requirement>
+        <requirement type="package">biopython</requirement>
+        <requirement type="package" version="1.2.6">pyquery</requirement>
+        <requirement type="package">pyquery</requirement>
+        <requirement type="package" version="0.1.2">helperlibs</requirement>
+        <requirement type="package">helperlibs</requirement>
+        <requirement type="package" version="0.9">cssselect</requirement>
+        <requirement type="package">cssselect</requirement>
+        <requirement type="package" version="2.0.2">antismash</requirement>
+        <requirement type="package">antismash</requirement>
+        <requirement type="package">glimmer</requirement>
+    </requirements>
+    <command>
+        #import os, glob
+        #set $outputfolder = $html.files_path
+        #if str($infile.ext) == 'genbank':
+            #set $file_extension = 'gb'
+        #else:
+            ## TODO add embl as input file
+            #set $file_extension = 'gb'
+        #end if
+
+        ln -s $infile #echo 'input_tempfile.' + $file_extension#;
+        mkdir -p $outputfolder;
+        run_antismash.py 
+            --cpus "\${GALAXY_SLOTS:-12}"
+            --enable $types
+            --input-type 'nucl'
+            $smcogs
+            $clusterblast
+            $subclusterblast
+            $inclusive
+            $full_hmmer
+            $full_blast
+            $eukaryotic
+
+
+            #if str($pfam_database) != "None":
+                --pfamdir $pfam_database.fields.path
+            #end if
+
+            ##--debug
+
+            --disable-embl
+            --outputfolder $outputfolder
+
+            #echo 'input_tempfile.' + $file_extension#
+
+            ## leave out the start and end features, it can be easily replaced with Galaxy tools
+            ##--from START          Start analysis at nucleotide specified
+            ##--to END
+
+        2>&#38;1
+
+        ##
+        ## shuffling files to create the correct outputs for Galaxy
+        ##
+
+        ## html output
+        ;
+        cp #echo os.path.join($outputfolder, 'index.html')# $html 2> /dev/null
+        
+        ## gene clusters
+        #if 'geneclusterprots_tabular' in str($outputs).split(','):
+            ;
+            cp #echo os.path.join($outputfolder, 'geneclusters.txt')# $geneclusterprots_tabular 2> /dev/null
+        #end if
+
+        #if 'geneclusterprots_fasta' in str($outputs).split(','):
+            ;
+            cp #echo os.path.join($outputfolder, '*_genecluster_proteins.fa')# $geneclusterprots_fasta 2> /dev/null
+        #end if
+
+
+        ##SVG images
+        #if 'archive_svgs' in str($outputs).split(','):
+            ;
+            cd #echo os.path.join($outputfolder, 'svg')#
+            #if $clusterblast:
+                ;
+                tar cfz $archive_svgs *_all.svg genecluster* 2> /dev/null
+            #else:
+                ;
+                tar cfz $archive_svgs genecluster*
+            #end if
+        #end if
+
+        ##all files in a archive
+        #if 'archive' in str($outputs).split(','):
+            ;
+            cd $outputfolder;
+            tar cf $archive *.zip 2> /dev/null
+        #end if
+
+        ## genbank
+        #if 'gb' in str($outputs).split(','):
+            ;
+            cat #echo os.path.join($outputfolder, '*.gbk')# > $genbank 2> /dev/null
+        #end if
+
+    </command>
+    <inputs>
+        <param name="infile" type="data" format="genbank" label="Nucleotide sequence file in GenBank format"/>
+
+        <param name="eukaryotic" type="select" label="Origin of DNA">
+            <option value="" selected="True">Prokaryotic</option>
+            <option value="--eukaryotic">Eukaryotic</option>
+        </param>
+
+        <param name="clusterblast" type="boolean" label="BLAST identified clusters against known clusters"
+            help="(--clusterblast)"
+            truevalue="--clusterblast" falsevalue="" checked="True" />
+        <param name="subclusterblast" type="boolean" label="Subcluster BLAST analysis"
+            help="(--subclusterblast)"
+            truevalue="--subclusterblast" falsevalue="" checked="false" />
+        <param name="smcogs" type="boolean" label="Analysis of secondary metabolism gene families (smCOGs)"
+            falsevalue="" truevalue="--smcogs" checked="True" />
+
+        <param name="full_blast" type="boolean" label="Run a whole-genome BLAST analysis"
+            help="(--full-blast)"
+            truevalue="--full-blast" falsevalue="" checked="False" />
+        <param name="full_hmmer" type="boolean" label="Run a whole-genome Pfam analysis"
+            help="(--full-hmmer)"
+            truevalue="--full-hmmer" falsevalue="" checked="false" />
+
+        <param name="inclusive" type="boolean" label="Use Cimermancic et al. algorithm for cluster detection"
+            help="(--inclusive)"
+            truevalue="--inclusive" falsevalue="" checked="false" />
+
+        <param name="pfam_database" type="select" optional="true" label="Pfam database" help="Pfam Covariance models">
+            <options from_file="antismash.loc">
+              <column name="value" index="0"/>
+              <column name="name" index="1"/>
+              <column name="path" index="2"/>
+            </options>
+        </param>
+
+        <param name="types" type="select" display="checkboxes" multiple="true" label="Gene cluster types to search">
+            <option value="t1pks" selected="True">type I polyketide synthases</option>
+            <option value="t2pks" selected="True">type II polyketide synthases</option>
+            <option value="t3pks" selected="True">type III polyketide synthases</option>
+            <option value="t4pks" selected="True">type IV polyketide synthases</option>
+            <option value="transatpks" selected="True">trans-AT PKS</option>
+            <option value="nrps" selected="True">nonribosomal peptide synthetases</option>
+            <option value="terpene" selected="True">terpene synthases</option>
+            <option value="lantipeptide" selected="True">lantipeptides</option>
+            <option value="bacteriocin" selected="True">bacteriocins</option>
+            <option value="blactam" selected="True">beta-lactams</option>
+            <option value="amglyccycl" selected="True">aminoglycosides / aminocyclitols</option>
+            <option value="aminocoumarin" selected="True">aminocoumarins</option>
+            <option value="siderophore" selected="True">siderophores</option>
+            <option value="ectoine" selected="True">ectoines</option>
+            <option value="butyrolactone" selected="True">butyrolactones</option>
+            <option value="indole" selected="True">indoles</option>
+            <option value="nucleoside" selected="True">nucleosides</option>
+            <option value="phosphoglycolipid" selected="True">phosphoglycolipids</option>
+            <option value="oligosaccharide" selected="True">oligosaccharides</option>
+            <option value="furan" selected="True">furans</option>
+            <option value="hserlactone" selected="True">hserlactones</option>
+            <option value="thiopeptide" selected="True">thiopeptides</option>
+            <option value="phenazine" selected="True">phenazines</option>
+            <option value="phosphonate" selected="True">phosphonates</option>
+            <option value="other" selected="True">others</option>
+        </param>
+
+        <param name="outputs" type="select" multiple="true" label="Additional outputs">
+            <option value="geneclusterprots_fasta" selected="True">Gene cluster proteins (FASTA)</option>
+            <option value="geneclusterprots_tabular">Gene cluster proteins (Tabular)</option>
+            <option value="archive_svgs">All clusters as image (compressed)</option>
+            <option value="archive">All files compressed</option>
+            <option value="gb">Annotated genome (GenBank)</option>
+        </param>
+
+    </inputs>
+    <outputs>
+        <data format="fasta" name="geneclusterprots_fasta" label="${tool.name} on ${on_string} (Gen Cluster Proteins)">
+          <filter>'geneclusterprots_fasta' in outputs</filter>
+        </data>
+        <data format="tabular" name="geneclusterprots_tabular" label="${tool.name} on ${on_string} (Gen Cluster Proteins)">
+          <filter>'geneclusterprots_tabular' in outputs</filter>
+        </data>
+        <data format="tar" name="archive" label="${tool.name} on ${on_string} (all files compressed)">
+          <filter>'archive' in outputs</filter>
+        </data>
+        <data format="tar.gz" name="archive_svgs" label="${tool.name} on ${on_string} (SVG images)">
+          <filter>'archive_svgs' in outputs</filter>
+        </data>
+        <data format="html" name="html" label="${tool.name} on ${on_string} (html report)">
+          <!-- html is default output at any time.
+          <filter>'html' in outputs</filter>
+          -->
+        </data>
+        <data name="genbank" format="genbank" label="${tool.name} on ${on_string} (genbank)">
+          <filter>'gb' in outputs</filter>
+        </data>
+    </outputs>
+  <help>
+    
+.. class:: infomark
+
+**What it does**
+
+antiSMASH allows the rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genomes. 
+It integrates and cross-links with a large number of in silico secondary metabolite analysis tools that have been published earlier.
+
+
+**Input**
+
+The ideal input for antiSMASH is an annotated nucleotide file in Genbank format. If no annotation is available, 
+we recommend running your sequence through an annotation pipeline like RAST are the one included in Galaxy.
+
+
+There are several optional analyses that may or may not be run on your sequence.
+Highly recommended is the Gene Cluster Blast Comparative Analysis, which runs BlastP using each amino acid sequence from a detected gene cluster as a 
+query on a large database of predicted protein sequences from secondary metabolite biosynthetic gene clusters, and pools the results to identify 
+the gene clusters that are most homologous to the gene cluster that was detected in your query nucleotide sequence.
+
+
+Also available is the analysis of secondary metabolism gene families (smCOGs). 
+This analysis attempts to allocate each gene in the detected gene clusters to a secondary metabolism-specific gene 
+family using profile hidden Markov models specific for the conserved sequence region characteristic of this family. 
+Additionally, a phylogenetic tree is constructed of each gene together with the (max. 100) sequences of the smCOG seed alignment.
+
+
+For the most thorough genome analysis, we provide genome-wide PFAM HMM analysis of all genes in the genome through modules of the CLUSEAN pipeline. 
+Of course, some regions important to secondary metabolism may have been missed in the gene cluster identification stage 
+(e.g. because they represent the biosynthetic pathway of a yet unknown secondary metabolite). 
+Therefore, when genome-wide PFAM HMM analysis is selected, the PFAM frequencies are also used to find all genome regions in which PFAM domains typical for secondary metabolism are overrepresented.
+
+
+**References**
+
+Marnix H. Medema, Kai Blin, Peter Cimermancic, Victor de Jager, Piotr Zakrzewski, Michael A. Fischbach, Tilmann Weber, 
+Rainer Breitling and Eriko Takano (2011). antiSMASH: Rapid identification, annotation and analysis of secondary metabolite biosynthesis gene clusters. Nucleic Acids Research, doi: 10.1093/nar/gkr466.
+
+http://antismash.secondarymetabolites.org/help.html
+
+
+Bjoern A. Gruening: https://github.com/bgruening/galaxytools/tree/master/antismash
+
+  </help>
+</tool>