diff antismash.xml @ 1:593bb8f5488b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/antismash commit 654a4f3b3a1602cec2510d51fb953fd456427e08
author bgruening
date Wed, 07 Feb 2018 06:22:58 -0500
parents 5db064bbb3be
children 3f0077c88c16
line wrap: on
line diff
--- a/antismash.xml	Tue Jul 15 14:34:55 2014 -0400
+++ b/antismash.xml	Wed Feb 07 06:22:58 2018 -0500
@@ -1,253 +1,166 @@
-<tool id="antismash" name="Secondary Metabolites" version="2.0.2.2">
-    <description>and Antibiotics Analysis (antiSMASH)</description>
+<?xml version='1.0' encoding='utf-8'?>
+<tool id="antismash" name="Antismash" version="4.0.2" profile="17.01">
+    <description>allows the genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters</description>
     <requirements>
-        <requirement type="package" version="3.0">hmmer</requirement>
-        <requirement type="package">hmmer</requirement>
-        <requirement type="package" version="2.2.28">blast+</requirement>
-        <requirement type="package">blast+</requirement>
-        <requirement type="package" version="3.8.31">muscle</requirement>
-        <requirement type="package">muscle</requirement>
-        <requirement type="package" version="1.4.0-post-1">straight.plugin</requirement>
-        <requirement type="package">straight.plugin</requirement>
-        <requirement type="package" version="1.62">biopython</requirement>
-        <requirement type="package">biopython</requirement>
-        <requirement type="package" version="1.2.6">pyquery</requirement>
-        <requirement type="package">pyquery</requirement>
-        <requirement type="package" version="0.1.2">helperlibs</requirement>
-        <requirement type="package">helperlibs</requirement>
-        <requirement type="package" version="0.9">cssselect</requirement>
-        <requirement type="package">cssselect</requirement>
-        <requirement type="package" version="2.0.2">antismash</requirement>
-        <requirement type="package">antismash</requirement>
-        <requirement type="package">glimmer</requirement>
+        <requirement type="package" version="4.0.2">antismash</requirement>
     </requirements>
-    <command>
+    <version_command>antismash --version</version_command>
+    <command detect_errors="aggressive">
+<![CDATA[
         #import os, glob
-        #set $outputfolder = $html.files_path
+        #set $htmloutputfolder = $html.files_path
         #if str($infile.ext) == 'genbank':
             #set $file_extension = 'gb'
         #else:
-            ## TODO add embl as input file
-            #set $file_extension = 'gb'
-        #end if
-
-        ln -s $infile #echo 'input_tempfile.' + $file_extension#;
-        mkdir -p $outputfolder;
-        run_antismash.py 
-            --cpus "\${GALAXY_SLOTS:-12}"
-            --enable $types
-            --input-type 'nucl'
-            $smcogs
-            $clusterblast
-            $subclusterblast
-            $inclusive
-            $full_hmmer
-            $full_blast
-            $eukaryotic
-
-
-            #if str($pfam_database) != "None":
-                --pfamdir $pfam_database.fields.path
-            #end if
-
-            ##--debug
-
-            --disable-embl
-            --outputfolder $outputfolder
-
-            #echo 'input_tempfile.' + $file_extension#
-
-            ## leave out the start and end features, it can be easily replaced with Galaxy tools
-            ##--from START          Start analysis at nucleotide specified
-            ##--to END
-
-        2>&#38;1
-
-        ##
-        ## shuffling files to create the correct outputs for Galaxy
-        ##
-
-        ## html output
-        ;
-        cp #echo os.path.join($outputfolder, 'index.html')# $html 2> /dev/null
-        
-        ## gene clusters
-        #if 'geneclusterprots_tabular' in str($outputs).split(','):
-            ;
-            cp #echo os.path.join($outputfolder, 'geneclusters.txt')# $geneclusterprots_tabular 2> /dev/null
-        #end if
-
-        #if 'geneclusterprots_fasta' in str($outputs).split(','):
-            ;
-            cp #echo os.path.join($outputfolder, '*_genecluster_proteins.fa')# $geneclusterprots_fasta 2> /dev/null
-        #end if
-
-
-        ##SVG images
-        #if 'archive_svgs' in str($outputs).split(','):
-            ;
-            cd #echo os.path.join($outputfolder, 'svg')#
-            #if $clusterblast:
-                ;
-                tar cfz $archive_svgs *_all.svg genecluster* 2> /dev/null
-            #else:
-                ;
-                tar cfz $archive_svgs genecluster*
-            #end if
-        #end if
-
-        ##all files in a archive
-        #if 'archive' in str($outputs).split(','):
-            ;
-            cd $outputfolder;
-            tar cf $archive *.zip 2> /dev/null
+            #set $file_extension = $infile.ext
         #end if
 
-        ## genbank
-        #if 'gb' in str($outputs).split(','):
-            ;
-            cat #echo os.path.join($outputfolder, '*.gbk')# > $genbank 2> /dev/null
-        #end if
+        ln -s '$infile' input_tempfile.$file_extension &&
+
+        ## create html folder
+        mkdir -p $htmloutputfolder &&
+
+        antismash
+            --cpus "\${GALAXY_SLOTS:-12}"
+            --taxon '${taxon}'
+            --input-type '${input_type}'
 
+            ${clusterblast}
+            ${subclusterblast}
+            ${smcogs}
+            ${inclusive}
+            ${borderpredict}
+            ${tta}
+            ${asf}
+            ${full_hmmer}
+
+            input_tempfile.$file_extension &&
+
+        ## copy all content to html folder
+        cp input_tempfile/index.html '${html}' 2> /dev/null &&
+        cp -r input_tempfile/* '${htmloutputfolder}'
+
+]]>
     </command>
     <inputs>
-        <param name="infile" type="data" format="genbank" label="Nucleotide sequence file in GenBank format"/>
+        <param name="infile" type="data" format="genbank,fasta,embl" label="Sequence file in GenBank,EMBL or FASTA format"/>
 
-        <param name="eukaryotic" type="select" label="Origin of DNA">
-            <option value="" selected="True">Prokaryotic</option>
-            <option value="--eukaryotic">Eukaryotic</option>
+        <param argument="--taxon" type="select" label="Origin of DNA">
+            <option value="bacteria" selected="True">Bacteria</option>
+            <option value="fungi">Fungi</option>
         </param>
 
-        <param name="clusterblast" type="boolean" label="BLAST identified clusters against known clusters"
-            help="(--clusterblast)"
-            truevalue="--clusterblast" falsevalue="" checked="True" />
-        <param name="subclusterblast" type="boolean" label="Subcluster BLAST analysis"
-            help="(--subclusterblast)"
-            truevalue="--subclusterblast" falsevalue="" checked="false" />
-        <param name="smcogs" type="boolean" label="Analysis of secondary metabolism gene families (smCOGs)"
-            falsevalue="" truevalue="--smcogs" checked="True" />
-
-        <param name="full_blast" type="boolean" label="Run a whole-genome BLAST analysis"
-            help="(--full-blast)"
-            truevalue="--full-blast" falsevalue="" checked="False" />
-        <param name="full_hmmer" type="boolean" label="Run a whole-genome Pfam analysis"
-            help="(--full-hmmer)"
-            truevalue="--full-hmmer" falsevalue="" checked="false" />
-
-        <param name="inclusive" type="boolean" label="Use Cimermancic et al. algorithm for cluster detection"
-            help="(--inclusive)"
-            truevalue="--inclusive" falsevalue="" checked="false" />
-
-        <param name="pfam_database" type="select" optional="true" label="Pfam database" help="Pfam Covariance models">
-            <options from_file="antismash.loc">
-              <column name="value" index="0"/>
-              <column name="name" index="1"/>
-              <column name="path" index="2"/>
-            </options>
+        <param argument="--input_type" type="select" label="Origin of DNA">
+            <option value="nucl" selected="True">Nucleotide</option>
+            <option value="prot">Amino-acid</option>
         </param>
 
-        <param name="types" type="select" display="checkboxes" multiple="true" label="Gene cluster types to search">
-            <option value="t1pks" selected="True">type I polyketide synthases</option>
-            <option value="t2pks" selected="True">type II polyketide synthases</option>
-            <option value="t3pks" selected="True">type III polyketide synthases</option>
-            <option value="t4pks" selected="True">type IV polyketide synthases</option>
-            <option value="transatpks" selected="True">trans-AT PKS</option>
-            <option value="nrps" selected="True">nonribosomal peptide synthetases</option>
-            <option value="terpene" selected="True">terpene synthases</option>
-            <option value="lantipeptide" selected="True">lantipeptides</option>
-            <option value="bacteriocin" selected="True">bacteriocins</option>
-            <option value="blactam" selected="True">beta-lactams</option>
-            <option value="amglyccycl" selected="True">aminoglycosides / aminocyclitols</option>
-            <option value="aminocoumarin" selected="True">aminocoumarins</option>
-            <option value="siderophore" selected="True">siderophores</option>
-            <option value="ectoine" selected="True">ectoines</option>
-            <option value="butyrolactone" selected="True">butyrolactones</option>
-            <option value="indole" selected="True">indoles</option>
-            <option value="nucleoside" selected="True">nucleosides</option>
-            <option value="phosphoglycolipid" selected="True">phosphoglycolipids</option>
-            <option value="oligosaccharide" selected="True">oligosaccharides</option>
-            <option value="furan" selected="True">furans</option>
-            <option value="hserlactone" selected="True">hserlactones</option>
-            <option value="thiopeptide" selected="True">thiopeptides</option>
-            <option value="phenazine" selected="True">phenazines</option>
-            <option value="phosphonate" selected="True">phosphonates</option>
-            <option value="other" selected="True">others</option>
-        </param>
+        <param argument="--clusterblast" type="boolean" truevalue="--clusterblast" falsevalue="" checked="False"
+               label="BLAST identified clusters against known clusters"
+               help="Compare identified clusters against a database of antiSMASH-predicted clusters." />
+        <param argument="--subclusterblast" type="boolean" truevalue="--subclusterblast" falsevalue="" checked="True"
+               label="Subcluster BLAST analysis"
+               help="Compare identified clusters against known subclusters responsible for synthesising precursors." />
+        <param argument="--knownclusterblast" type="boolean" truevalue="--knownclusterblast" falsevalue="" checked="True"
+               label="KnowCluster BLAST analysis"
+               help="Compare identified clusters against known gene clusters from the MIBiG database."/>
+        <param argument="--smcogs" type="boolean" checked="True" truevalue="--smcogs" falsevalue=""
+               label="Analysis of secondary metabolism gene families (smCOGs)"
+               help="Look for sec. met. clusters of orthologous groups."/>
+        <param argument="--inclusive" type="boolean" truevalue="--inclusive" falsevalue="" checked="False"
+               label="Inclusive ClusterFinder algorithm"
+               help="Use inclusive ClusterFinder algorithm for additional cluster detection."/>
+        <param argument="--borderpredict" type="boolean" truevalue="--borderpredict" falsevalue="" checked="False"
+               label="Predict gene cluster borders with ClusterFinder"
+               help="Use ClusterFinder algorithm to predict gene cluster borders."/>
+        <param argument="--asf" type="boolean" truevalue="--asf" falsevalue="" checked="True"
+               label="Run active site finder module" />
+        <param argument="--tta" type="boolean" truevalue="--tta" falsevalue="" checked="False"
+               label="Run TTA codon detection module" />
+        <param argument="--full_hmmer" type="boolean" truevalue="--full-hmmer" falsevalue="" checked="False"
+               label="Run a whole-genome Pfam analysis" />
 
-        <param name="outputs" type="select" multiple="true" label="Additional outputs">
-            <option value="geneclusterprots_fasta" selected="True">Gene cluster proteins (FASTA)</option>
-            <option value="geneclusterprots_tabular">Gene cluster proteins (Tabular)</option>
-            <option value="archive_svgs">All clusters as image (compressed)</option>
-            <option value="archive">All files compressed</option>
-            <option value="gb">Annotated genome (GenBank)</option>
+        <param name="outputs" type="select" multiple="true" label="Outputs">
+            <option value="html" selected="True">HTML file</option>
+            <option value="all">All results</option>
+            <option value="embl">EMBL files</option>
+            <option value="gb">GenBank files</option>
+            <option value="genecluster_tabular">Gene clusters</option>
         </param>
 
     </inputs>
     <outputs>
-        <data format="fasta" name="geneclusterprots_fasta" label="${tool.name} on ${on_string} (Gen Cluster Proteins)">
-          <filter>'geneclusterprots_fasta' in outputs</filter>
-        </data>
-        <data format="tabular" name="geneclusterprots_tabular" label="${tool.name} on ${on_string} (Gen Cluster Proteins)">
-          <filter>'geneclusterprots_tabular' in outputs</filter>
-        </data>
-        <data format="tar" name="archive" label="${tool.name} on ${on_string} (all files compressed)">
-          <filter>'archive' in outputs</filter>
-        </data>
-        <data format="tar.gz" name="archive_svgs" label="${tool.name} on ${on_string} (SVG images)">
-          <filter>'archive_svgs' in outputs</filter>
-        </data>
-        <data format="html" name="html" label="${tool.name} on ${on_string} (html report)">
-          <!-- html is default output at any time.
-          <filter>'html' in outputs</filter>
-          -->
-        </data>
-        <data name="genbank" format="genbank" label="${tool.name} on ${on_string} (genbank)">
-          <filter>'gb' in outputs</filter>
-        </data>
+        <collection type="list" name="genecluster_tabular" label="${tool.name} on ${on_string} (Gene Cluster)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.txt" directory="input_tempfile" ext="txt" visible="false" />
+            <filter>'genecluster_tabular' in outputs</filter>
+        </collection>
+        <collection name="genbank" type="list" label="${tool.name} on ${on_string} (GenBank)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.gbk" directory="input_tempfile" ext="genbank" visible="false" />
+            <filter>'gb' in outputs</filter>
+        </collection>
+        <collection name="embl" type="list" label="${tool.name} on ${on_string} (EMBL)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.gbk" directory="input_tempfile" ext="embl" visible="false" />
+            <filter>'embl' in outputs</filter>
+        </collection>
+        <collection name="archive" type="list" label="${tool.name} on ${on_string} (all files compressed)">
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.zip" directory="input_tempfile" ext="zip" visible="false" />
+            <filter>'all' in outputs</filter>
+        </collection>
+        <data format="html" name="html" label="${tool.name} on ${on_string} (html report)" />
     </outputs>
-  <help>
-    
-.. class:: infomark
+    <tests>
+        <test>
+            <param name="infile" value="sequence.fasta"/>
+            <output name="html" file="index.html"/>
+        </test>
+        <test>
+            <param name="infile" value="sequence.gb"/>
+            <param name="outputs" value="html,gb"/>
+            <output_collection name="genbank" type="list">
+                <element name="ARBH01000003.1.cluster001" file="ARBH01000003.1.cluster001" ftype="genbank" compare="sim_size" />
+                <element name="ARBH01000003.1.final" file="ARBH01000003.1.final" ftype="genbank"/>
+            </output_collection>
+            <output name="html" file="index.2.html"/>
+        </test>
+    </tests>
+    <help>
+<![CDATA[
 
 **What it does**
 
-antiSMASH allows the rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genomes. 
+AntiSMASH allows the rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genomes.
 It integrates and cross-links with a large number of in silico secondary metabolite analysis tools that have been published earlier.
 
+antiSMASH is powered by several open source tools: NCBI BLAST+, HMMer 3, Muscle 3, Glimmer 3, FastTree, TreeGraph 2, Indigo-depict, PySVG and JQuery SVG.
 
 **Input**
 
-The ideal input for antiSMASH is an annotated nucleotide file in Genbank format. If no annotation is available, 
-we recommend running your sequence through an annotation pipeline like RAST are the one included in Galaxy.
-
+The ideal input for antiSMASH is an annotated nucleotide file in Genbank format or EMBL format.
+You can either upload a GenBank/EMBL file manually, or simply enter the GenBank/RefSeq accession number of your sequence for antiSMASH to upload it.
+If no annotation is available, we recommend running your sequence through an annotation pipeline like RAST to obtain GBK/EMBL files with high-quality annotations.
 
-There are several optional analyses that may or may not be run on your sequence.
-Highly recommended is the Gene Cluster Blast Comparative Analysis, which runs BlastP using each amino acid sequence from a detected gene cluster as a 
-query on a large database of predicted protein sequences from secondary metabolite biosynthetic gene clusters, and pools the results to identify 
-the gene clusters that are most homologous to the gene cluster that was detected in your query nucleotide sequence.
-
+Alternatively, you can provide a FASTA file containing a single sequence. antiSMASH will generate a preliminary annotation using Prodigal, and use that to run the rest of the analysis.
+You can also provide gene annotations in GFF3 foramt. Input files should be properly formatted.
+If you are creating your GBK/EMBL/FASTA file manually, be sure to do so in a plain text editor like Notepad or Emacs, and saving your files as "All files (.)", ending with the correct extension (for example ".fasta", ".gbk", or ".embl".
 
-Also available is the analysis of secondary metabolism gene families (smCOGs). 
-This analysis attempts to allocate each gene in the detected gene clusters to a secondary metabolism-specific gene 
-family using profile hidden Markov models specific for the conserved sequence region characteristic of this family. 
-Additionally, a phylogenetic tree is constructed of each gene together with the (max. 100) sequences of the smCOG seed alignment.
+There are several optional analyses that may or may not be run on your sequence. Highly recommended is the Gene Cluster Blast Comparative Analysis, which runs BlastP using each amino acid sequence from a detected gene cluster as a query on a large database of predicted protein sequences from secondary metabolite biosynthetic gene clusters, and pools the results to identify the gene clusters that are most homologous to the gene cluster that was detected in your query nucleotide sequence.
+This analysis is selected by default
 
+Also available is the analysis of secondary metabolism gene families (smCOGs). This analysis attempts to allocate each gene in the detected gene clusters to a secondary metabolism-specific gene family using profile hidden Markov models specific for the conserved sequence region characteristic of this family.
+Additionally, a phylogenetic tree is constructed of each gene together with the (max. 100) sequences of the smCOG seed alignment. This analysis is selected by default
 
-For the most thorough genome analysis, we provide genome-wide PFAM HMM analysis of all genes in the genome through modules of the CLUSEAN pipeline. 
-Of course, some regions important to secondary metabolism may have been missed in the gene cluster identification stage 
-(e.g. because they represent the biosynthetic pathway of a yet unknown secondary metabolite). 
-Therefore, when genome-wide PFAM HMM analysis is selected, the PFAM frequencies are also used to find all genome regions in which PFAM domains typical for secondary metabolism are overrepresented.
+**Ouput**
 
-
-**References**
+The output of the antiSMASH analysis pipeline is organized in an interactive HTML page with SVG graphics, and different parts of the analysis are displayed in different panels for every gene cluster
 
-Marnix H. Medema, Kai Blin, Peter Cimermancic, Victor de Jager, Piotr Zakrzewski, Michael A. Fischbach, Tilmann Weber, 
-Rainer Breitling and Eriko Takano (2011). antiSMASH: Rapid identification, annotation and analysis of secondary metabolite biosynthesis gene clusters. Nucleic Acids Research, doi: 10.1093/nar/gkr466.
-
-http://antismash.secondarymetabolites.org/help.html
+In the upper right, a small list of buttons offers further functionality. The house-shaped button will get you back on the antiSMASH start page.
+The question-mark button will get you to this help page. The exclamation-mark button leads to a page explaining about antiSMASH.
+The downward-pointing arrow will open a menu offering to download the complete set of results from the antiSMASH run, a summary Excel file and to the summary EMBL/GenBank output file.
+The EMBL/GenBank file can be viewed in a genome browser such as Artemis.
 
-
-Bjoern A. Gruening: https://github.com/bgruening/galaxytools/tree/master/antismash
-
-  </help>
+]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1093/nar/gkv437</citation>
+    </citations>
 </tool>