comparison antismash.xml @ 0:5db064bbb3be draft

Imported from capsule None
author bgruening
date Tue, 15 Jul 2014 14:34:55 -0400
parents
children 593bb8f5488b
comparison
equal deleted inserted replaced
-1:000000000000 0:5db064bbb3be
1 <tool id="antismash" name="Secondary Metabolites" version="2.0.2.2">
2 <description>and Antibiotics Analysis (antiSMASH)</description>
3 <requirements>
4 <requirement type="package" version="3.0">hmmer</requirement>
5 <requirement type="package">hmmer</requirement>
6 <requirement type="package" version="2.2.28">blast+</requirement>
7 <requirement type="package">blast+</requirement>
8 <requirement type="package" version="3.8.31">muscle</requirement>
9 <requirement type="package">muscle</requirement>
10 <requirement type="package" version="1.4.0-post-1">straight.plugin</requirement>
11 <requirement type="package">straight.plugin</requirement>
12 <requirement type="package" version="1.62">biopython</requirement>
13 <requirement type="package">biopython</requirement>
14 <requirement type="package" version="1.2.6">pyquery</requirement>
15 <requirement type="package">pyquery</requirement>
16 <requirement type="package" version="0.1.2">helperlibs</requirement>
17 <requirement type="package">helperlibs</requirement>
18 <requirement type="package" version="0.9">cssselect</requirement>
19 <requirement type="package">cssselect</requirement>
20 <requirement type="package" version="2.0.2">antismash</requirement>
21 <requirement type="package">antismash</requirement>
22 <requirement type="package">glimmer</requirement>
23 </requirements>
24 <command>
25 #import os, glob
26 #set $outputfolder = $html.files_path
27 #if str($infile.ext) == 'genbank':
28 #set $file_extension = 'gb'
29 #else:
30 ## TODO add embl as input file
31 #set $file_extension = 'gb'
32 #end if
33
34 ln -s $infile #echo 'input_tempfile.' + $file_extension#;
35 mkdir -p $outputfolder;
36 run_antismash.py
37 --cpus "\${GALAXY_SLOTS:-12}"
38 --enable $types
39 --input-type 'nucl'
40 $smcogs
41 $clusterblast
42 $subclusterblast
43 $inclusive
44 $full_hmmer
45 $full_blast
46 $eukaryotic
47
48
49 #if str($pfam_database) != "None":
50 --pfamdir $pfam_database.fields.path
51 #end if
52
53 ##--debug
54
55 --disable-embl
56 --outputfolder $outputfolder
57
58 #echo 'input_tempfile.' + $file_extension#
59
60 ## leave out the start and end features, it can be easily replaced with Galaxy tools
61 ##--from START Start analysis at nucleotide specified
62 ##--to END
63
64 2>&#38;1
65
66 ##
67 ## shuffling files to create the correct outputs for Galaxy
68 ##
69
70 ## html output
71 ;
72 cp #echo os.path.join($outputfolder, 'index.html')# $html 2> /dev/null
73
74 ## gene clusters
75 #if 'geneclusterprots_tabular' in str($outputs).split(','):
76 ;
77 cp #echo os.path.join($outputfolder, 'geneclusters.txt')# $geneclusterprots_tabular 2> /dev/null
78 #end if
79
80 #if 'geneclusterprots_fasta' in str($outputs).split(','):
81 ;
82 cp #echo os.path.join($outputfolder, '*_genecluster_proteins.fa')# $geneclusterprots_fasta 2> /dev/null
83 #end if
84
85
86 ##SVG images
87 #if 'archive_svgs' in str($outputs).split(','):
88 ;
89 cd #echo os.path.join($outputfolder, 'svg')#
90 #if $clusterblast:
91 ;
92 tar cfz $archive_svgs *_all.svg genecluster* 2> /dev/null
93 #else:
94 ;
95 tar cfz $archive_svgs genecluster*
96 #end if
97 #end if
98
99 ##all files in a archive
100 #if 'archive' in str($outputs).split(','):
101 ;
102 cd $outputfolder;
103 tar cf $archive *.zip 2> /dev/null
104 #end if
105
106 ## genbank
107 #if 'gb' in str($outputs).split(','):
108 ;
109 cat #echo os.path.join($outputfolder, '*.gbk')# > $genbank 2> /dev/null
110 #end if
111
112 </command>
113 <inputs>
114 <param name="infile" type="data" format="genbank" label="Nucleotide sequence file in GenBank format"/>
115
116 <param name="eukaryotic" type="select" label="Origin of DNA">
117 <option value="" selected="True">Prokaryotic</option>
118 <option value="--eukaryotic">Eukaryotic</option>
119 </param>
120
121 <param name="clusterblast" type="boolean" label="BLAST identified clusters against known clusters"
122 help="(--clusterblast)"
123 truevalue="--clusterblast" falsevalue="" checked="True" />
124 <param name="subclusterblast" type="boolean" label="Subcluster BLAST analysis"
125 help="(--subclusterblast)"
126 truevalue="--subclusterblast" falsevalue="" checked="false" />
127 <param name="smcogs" type="boolean" label="Analysis of secondary metabolism gene families (smCOGs)"
128 falsevalue="" truevalue="--smcogs" checked="True" />
129
130 <param name="full_blast" type="boolean" label="Run a whole-genome BLAST analysis"
131 help="(--full-blast)"
132 truevalue="--full-blast" falsevalue="" checked="False" />
133 <param name="full_hmmer" type="boolean" label="Run a whole-genome Pfam analysis"
134 help="(--full-hmmer)"
135 truevalue="--full-hmmer" falsevalue="" checked="false" />
136
137 <param name="inclusive" type="boolean" label="Use Cimermancic et al. algorithm for cluster detection"
138 help="(--inclusive)"
139 truevalue="--inclusive" falsevalue="" checked="false" />
140
141 <param name="pfam_database" type="select" optional="true" label="Pfam database" help="Pfam Covariance models">
142 <options from_file="antismash.loc">
143 <column name="value" index="0"/>
144 <column name="name" index="1"/>
145 <column name="path" index="2"/>
146 </options>
147 </param>
148
149 <param name="types" type="select" display="checkboxes" multiple="true" label="Gene cluster types to search">
150 <option value="t1pks" selected="True">type I polyketide synthases</option>
151 <option value="t2pks" selected="True">type II polyketide synthases</option>
152 <option value="t3pks" selected="True">type III polyketide synthases</option>
153 <option value="t4pks" selected="True">type IV polyketide synthases</option>
154 <option value="transatpks" selected="True">trans-AT PKS</option>
155 <option value="nrps" selected="True">nonribosomal peptide synthetases</option>
156 <option value="terpene" selected="True">terpene synthases</option>
157 <option value="lantipeptide" selected="True">lantipeptides</option>
158 <option value="bacteriocin" selected="True">bacteriocins</option>
159 <option value="blactam" selected="True">beta-lactams</option>
160 <option value="amglyccycl" selected="True">aminoglycosides / aminocyclitols</option>
161 <option value="aminocoumarin" selected="True">aminocoumarins</option>
162 <option value="siderophore" selected="True">siderophores</option>
163 <option value="ectoine" selected="True">ectoines</option>
164 <option value="butyrolactone" selected="True">butyrolactones</option>
165 <option value="indole" selected="True">indoles</option>
166 <option value="nucleoside" selected="True">nucleosides</option>
167 <option value="phosphoglycolipid" selected="True">phosphoglycolipids</option>
168 <option value="oligosaccharide" selected="True">oligosaccharides</option>
169 <option value="furan" selected="True">furans</option>
170 <option value="hserlactone" selected="True">hserlactones</option>
171 <option value="thiopeptide" selected="True">thiopeptides</option>
172 <option value="phenazine" selected="True">phenazines</option>
173 <option value="phosphonate" selected="True">phosphonates</option>
174 <option value="other" selected="True">others</option>
175 </param>
176
177 <param name="outputs" type="select" multiple="true" label="Additional outputs">
178 <option value="geneclusterprots_fasta" selected="True">Gene cluster proteins (FASTA)</option>
179 <option value="geneclusterprots_tabular">Gene cluster proteins (Tabular)</option>
180 <option value="archive_svgs">All clusters as image (compressed)</option>
181 <option value="archive">All files compressed</option>
182 <option value="gb">Annotated genome (GenBank)</option>
183 </param>
184
185 </inputs>
186 <outputs>
187 <data format="fasta" name="geneclusterprots_fasta" label="${tool.name} on ${on_string} (Gen Cluster Proteins)">
188 <filter>'geneclusterprots_fasta' in outputs</filter>
189 </data>
190 <data format="tabular" name="geneclusterprots_tabular" label="${tool.name} on ${on_string} (Gen Cluster Proteins)">
191 <filter>'geneclusterprots_tabular' in outputs</filter>
192 </data>
193 <data format="tar" name="archive" label="${tool.name} on ${on_string} (all files compressed)">
194 <filter>'archive' in outputs</filter>
195 </data>
196 <data format="tar.gz" name="archive_svgs" label="${tool.name} on ${on_string} (SVG images)">
197 <filter>'archive_svgs' in outputs</filter>
198 </data>
199 <data format="html" name="html" label="${tool.name} on ${on_string} (html report)">
200 <!-- html is default output at any time.
201 <filter>'html' in outputs</filter>
202 -->
203 </data>
204 <data name="genbank" format="genbank" label="${tool.name} on ${on_string} (genbank)">
205 <filter>'gb' in outputs</filter>
206 </data>
207 </outputs>
208 <help>
209
210 .. class:: infomark
211
212 **What it does**
213
214 antiSMASH allows the rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genomes.
215 It integrates and cross-links with a large number of in silico secondary metabolite analysis tools that have been published earlier.
216
217
218 **Input**
219
220 The ideal input for antiSMASH is an annotated nucleotide file in Genbank format. If no annotation is available,
221 we recommend running your sequence through an annotation pipeline like RAST are the one included in Galaxy.
222
223
224 There are several optional analyses that may or may not be run on your sequence.
225 Highly recommended is the Gene Cluster Blast Comparative Analysis, which runs BlastP using each amino acid sequence from a detected gene cluster as a
226 query on a large database of predicted protein sequences from secondary metabolite biosynthetic gene clusters, and pools the results to identify
227 the gene clusters that are most homologous to the gene cluster that was detected in your query nucleotide sequence.
228
229
230 Also available is the analysis of secondary metabolism gene families (smCOGs).
231 This analysis attempts to allocate each gene in the detected gene clusters to a secondary metabolism-specific gene
232 family using profile hidden Markov models specific for the conserved sequence region characteristic of this family.
233 Additionally, a phylogenetic tree is constructed of each gene together with the (max. 100) sequences of the smCOG seed alignment.
234
235
236 For the most thorough genome analysis, we provide genome-wide PFAM HMM analysis of all genes in the genome through modules of the CLUSEAN pipeline.
237 Of course, some regions important to secondary metabolism may have been missed in the gene cluster identification stage
238 (e.g. because they represent the biosynthetic pathway of a yet unknown secondary metabolite).
239 Therefore, when genome-wide PFAM HMM analysis is selected, the PFAM frequencies are also used to find all genome regions in which PFAM domains typical for secondary metabolism are overrepresented.
240
241
242 **References**
243
244 Marnix H. Medema, Kai Blin, Peter Cimermancic, Victor de Jager, Piotr Zakrzewski, Michael A. Fischbach, Tilmann Weber,
245 Rainer Breitling and Eriko Takano (2011). antiSMASH: Rapid identification, annotation and analysis of secondary metabolite biosynthesis gene clusters. Nucleic Acids Research, doi: 10.1093/nar/gkr466.
246
247 http://antismash.secondarymetabolites.org/help.html
248
249
250 Bjoern A. Gruening: https://github.com/bgruening/galaxytools/tree/master/antismash
251
252 </help>
253 </tool>