Mercurial > repos > bgruening > antismash
diff antismash.xml @ 0:5db064bbb3be draft
Imported from capsule None
author | bgruening |
---|---|
date | Tue, 15 Jul 2014 14:34:55 -0400 |
parents | |
children | 593bb8f5488b |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/antismash.xml Tue Jul 15 14:34:55 2014 -0400 @@ -0,0 +1,253 @@ +<tool id="antismash" name="Secondary Metabolites" version="2.0.2.2"> + <description>and Antibiotics Analysis (antiSMASH)</description> + <requirements> + <requirement type="package" version="3.0">hmmer</requirement> + <requirement type="package">hmmer</requirement> + <requirement type="package" version="2.2.28">blast+</requirement> + <requirement type="package">blast+</requirement> + <requirement type="package" version="3.8.31">muscle</requirement> + <requirement type="package">muscle</requirement> + <requirement type="package" version="1.4.0-post-1">straight.plugin</requirement> + <requirement type="package">straight.plugin</requirement> + <requirement type="package" version="1.62">biopython</requirement> + <requirement type="package">biopython</requirement> + <requirement type="package" version="1.2.6">pyquery</requirement> + <requirement type="package">pyquery</requirement> + <requirement type="package" version="0.1.2">helperlibs</requirement> + <requirement type="package">helperlibs</requirement> + <requirement type="package" version="0.9">cssselect</requirement> + <requirement type="package">cssselect</requirement> + <requirement type="package" version="2.0.2">antismash</requirement> + <requirement type="package">antismash</requirement> + <requirement type="package">glimmer</requirement> + </requirements> + <command> + #import os, glob + #set $outputfolder = $html.files_path + #if str($infile.ext) == 'genbank': + #set $file_extension = 'gb' + #else: + ## TODO add embl as input file + #set $file_extension = 'gb' + #end if + + ln -s $infile #echo 'input_tempfile.' + $file_extension#; + mkdir -p $outputfolder; + run_antismash.py + --cpus "\${GALAXY_SLOTS:-12}" + --enable $types + --input-type 'nucl' + $smcogs + $clusterblast + $subclusterblast + $inclusive + $full_hmmer + $full_blast + $eukaryotic + + + #if str($pfam_database) != "None": + --pfamdir $pfam_database.fields.path + #end if + + ##--debug + + --disable-embl + --outputfolder $outputfolder + + #echo 'input_tempfile.' + $file_extension# + + ## leave out the start and end features, it can be easily replaced with Galaxy tools + ##--from START Start analysis at nucleotide specified + ##--to END + + 2>&1 + + ## + ## shuffling files to create the correct outputs for Galaxy + ## + + ## html output + ; + cp #echo os.path.join($outputfolder, 'index.html')# $html 2> /dev/null + + ## gene clusters + #if 'geneclusterprots_tabular' in str($outputs).split(','): + ; + cp #echo os.path.join($outputfolder, 'geneclusters.txt')# $geneclusterprots_tabular 2> /dev/null + #end if + + #if 'geneclusterprots_fasta' in str($outputs).split(','): + ; + cp #echo os.path.join($outputfolder, '*_genecluster_proteins.fa')# $geneclusterprots_fasta 2> /dev/null + #end if + + + ##SVG images + #if 'archive_svgs' in str($outputs).split(','): + ; + cd #echo os.path.join($outputfolder, 'svg')# + #if $clusterblast: + ; + tar cfz $archive_svgs *_all.svg genecluster* 2> /dev/null + #else: + ; + tar cfz $archive_svgs genecluster* + #end if + #end if + + ##all files in a archive + #if 'archive' in str($outputs).split(','): + ; + cd $outputfolder; + tar cf $archive *.zip 2> /dev/null + #end if + + ## genbank + #if 'gb' in str($outputs).split(','): + ; + cat #echo os.path.join($outputfolder, '*.gbk')# > $genbank 2> /dev/null + #end if + + </command> + <inputs> + <param name="infile" type="data" format="genbank" label="Nucleotide sequence file in GenBank format"/> + + <param name="eukaryotic" type="select" label="Origin of DNA"> + <option value="" selected="True">Prokaryotic</option> + <option value="--eukaryotic">Eukaryotic</option> + </param> + + <param name="clusterblast" type="boolean" label="BLAST identified clusters against known clusters" + help="(--clusterblast)" + truevalue="--clusterblast" falsevalue="" checked="True" /> + <param name="subclusterblast" type="boolean" label="Subcluster BLAST analysis" + help="(--subclusterblast)" + truevalue="--subclusterblast" falsevalue="" checked="false" /> + <param name="smcogs" type="boolean" label="Analysis of secondary metabolism gene families (smCOGs)" + falsevalue="" truevalue="--smcogs" checked="True" /> + + <param name="full_blast" type="boolean" label="Run a whole-genome BLAST analysis" + help="(--full-blast)" + truevalue="--full-blast" falsevalue="" checked="False" /> + <param name="full_hmmer" type="boolean" label="Run a whole-genome Pfam analysis" + help="(--full-hmmer)" + truevalue="--full-hmmer" falsevalue="" checked="false" /> + + <param name="inclusive" type="boolean" label="Use Cimermancic et al. algorithm for cluster detection" + help="(--inclusive)" + truevalue="--inclusive" falsevalue="" checked="false" /> + + <param name="pfam_database" type="select" optional="true" label="Pfam database" help="Pfam Covariance models"> + <options from_file="antismash.loc"> + <column name="value" index="0"/> + <column name="name" index="1"/> + <column name="path" index="2"/> + </options> + </param> + + <param name="types" type="select" display="checkboxes" multiple="true" label="Gene cluster types to search"> + <option value="t1pks" selected="True">type I polyketide synthases</option> + <option value="t2pks" selected="True">type II polyketide synthases</option> + <option value="t3pks" selected="True">type III polyketide synthases</option> + <option value="t4pks" selected="True">type IV polyketide synthases</option> + <option value="transatpks" selected="True">trans-AT PKS</option> + <option value="nrps" selected="True">nonribosomal peptide synthetases</option> + <option value="terpene" selected="True">terpene synthases</option> + <option value="lantipeptide" selected="True">lantipeptides</option> + <option value="bacteriocin" selected="True">bacteriocins</option> + <option value="blactam" selected="True">beta-lactams</option> + <option value="amglyccycl" selected="True">aminoglycosides / aminocyclitols</option> + <option value="aminocoumarin" selected="True">aminocoumarins</option> + <option value="siderophore" selected="True">siderophores</option> + <option value="ectoine" selected="True">ectoines</option> + <option value="butyrolactone" selected="True">butyrolactones</option> + <option value="indole" selected="True">indoles</option> + <option value="nucleoside" selected="True">nucleosides</option> + <option value="phosphoglycolipid" selected="True">phosphoglycolipids</option> + <option value="oligosaccharide" selected="True">oligosaccharides</option> + <option value="furan" selected="True">furans</option> + <option value="hserlactone" selected="True">hserlactones</option> + <option value="thiopeptide" selected="True">thiopeptides</option> + <option value="phenazine" selected="True">phenazines</option> + <option value="phosphonate" selected="True">phosphonates</option> + <option value="other" selected="True">others</option> + </param> + + <param name="outputs" type="select" multiple="true" label="Additional outputs"> + <option value="geneclusterprots_fasta" selected="True">Gene cluster proteins (FASTA)</option> + <option value="geneclusterprots_tabular">Gene cluster proteins (Tabular)</option> + <option value="archive_svgs">All clusters as image (compressed)</option> + <option value="archive">All files compressed</option> + <option value="gb">Annotated genome (GenBank)</option> + </param> + + </inputs> + <outputs> + <data format="fasta" name="geneclusterprots_fasta" label="${tool.name} on ${on_string} (Gen Cluster Proteins)"> + <filter>'geneclusterprots_fasta' in outputs</filter> + </data> + <data format="tabular" name="geneclusterprots_tabular" label="${tool.name} on ${on_string} (Gen Cluster Proteins)"> + <filter>'geneclusterprots_tabular' in outputs</filter> + </data> + <data format="tar" name="archive" label="${tool.name} on ${on_string} (all files compressed)"> + <filter>'archive' in outputs</filter> + </data> + <data format="tar.gz" name="archive_svgs" label="${tool.name} on ${on_string} (SVG images)"> + <filter>'archive_svgs' in outputs</filter> + </data> + <data format="html" name="html" label="${tool.name} on ${on_string} (html report)"> + <!-- html is default output at any time. + <filter>'html' in outputs</filter> + --> + </data> + <data name="genbank" format="genbank" label="${tool.name} on ${on_string} (genbank)"> + <filter>'gb' in outputs</filter> + </data> + </outputs> + <help> + +.. class:: infomark + +**What it does** + +antiSMASH allows the rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genomes. +It integrates and cross-links with a large number of in silico secondary metabolite analysis tools that have been published earlier. + + +**Input** + +The ideal input for antiSMASH is an annotated nucleotide file in Genbank format. If no annotation is available, +we recommend running your sequence through an annotation pipeline like RAST are the one included in Galaxy. + + +There are several optional analyses that may or may not be run on your sequence. +Highly recommended is the Gene Cluster Blast Comparative Analysis, which runs BlastP using each amino acid sequence from a detected gene cluster as a +query on a large database of predicted protein sequences from secondary metabolite biosynthetic gene clusters, and pools the results to identify +the gene clusters that are most homologous to the gene cluster that was detected in your query nucleotide sequence. + + +Also available is the analysis of secondary metabolism gene families (smCOGs). +This analysis attempts to allocate each gene in the detected gene clusters to a secondary metabolism-specific gene +family using profile hidden Markov models specific for the conserved sequence region characteristic of this family. +Additionally, a phylogenetic tree is constructed of each gene together with the (max. 100) sequences of the smCOG seed alignment. + + +For the most thorough genome analysis, we provide genome-wide PFAM HMM analysis of all genes in the genome through modules of the CLUSEAN pipeline. +Of course, some regions important to secondary metabolism may have been missed in the gene cluster identification stage +(e.g. because they represent the biosynthetic pathway of a yet unknown secondary metabolite). +Therefore, when genome-wide PFAM HMM analysis is selected, the PFAM frequencies are also used to find all genome regions in which PFAM domains typical for secondary metabolism are overrepresented. + + +**References** + +Marnix H. Medema, Kai Blin, Peter Cimermancic, Victor de Jager, Piotr Zakrzewski, Michael A. Fischbach, Tilmann Weber, +Rainer Breitling and Eriko Takano (2011). antiSMASH: Rapid identification, annotation and analysis of secondary metabolite biosynthesis gene clusters. Nucleic Acids Research, doi: 10.1093/nar/gkr466. + +http://antismash.secondarymetabolites.org/help.html + + +Bjoern A. Gruening: https://github.com/bgruening/galaxytools/tree/master/antismash + + </help> +</tool>