Mercurial > repos > bgruening > antismash
comparison antismash.xml @ 0:5db064bbb3be draft
Imported from capsule None
author | bgruening |
---|---|
date | Tue, 15 Jul 2014 14:34:55 -0400 |
parents | |
children | 593bb8f5488b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5db064bbb3be |
---|---|
1 <tool id="antismash" name="Secondary Metabolites" version="2.0.2.2"> | |
2 <description>and Antibiotics Analysis (antiSMASH)</description> | |
3 <requirements> | |
4 <requirement type="package" version="3.0">hmmer</requirement> | |
5 <requirement type="package">hmmer</requirement> | |
6 <requirement type="package" version="2.2.28">blast+</requirement> | |
7 <requirement type="package">blast+</requirement> | |
8 <requirement type="package" version="3.8.31">muscle</requirement> | |
9 <requirement type="package">muscle</requirement> | |
10 <requirement type="package" version="1.4.0-post-1">straight.plugin</requirement> | |
11 <requirement type="package">straight.plugin</requirement> | |
12 <requirement type="package" version="1.62">biopython</requirement> | |
13 <requirement type="package">biopython</requirement> | |
14 <requirement type="package" version="1.2.6">pyquery</requirement> | |
15 <requirement type="package">pyquery</requirement> | |
16 <requirement type="package" version="0.1.2">helperlibs</requirement> | |
17 <requirement type="package">helperlibs</requirement> | |
18 <requirement type="package" version="0.9">cssselect</requirement> | |
19 <requirement type="package">cssselect</requirement> | |
20 <requirement type="package" version="2.0.2">antismash</requirement> | |
21 <requirement type="package">antismash</requirement> | |
22 <requirement type="package">glimmer</requirement> | |
23 </requirements> | |
24 <command> | |
25 #import os, glob | |
26 #set $outputfolder = $html.files_path | |
27 #if str($infile.ext) == 'genbank': | |
28 #set $file_extension = 'gb' | |
29 #else: | |
30 ## TODO add embl as input file | |
31 #set $file_extension = 'gb' | |
32 #end if | |
33 | |
34 ln -s $infile #echo 'input_tempfile.' + $file_extension#; | |
35 mkdir -p $outputfolder; | |
36 run_antismash.py | |
37 --cpus "\${GALAXY_SLOTS:-12}" | |
38 --enable $types | |
39 --input-type 'nucl' | |
40 $smcogs | |
41 $clusterblast | |
42 $subclusterblast | |
43 $inclusive | |
44 $full_hmmer | |
45 $full_blast | |
46 $eukaryotic | |
47 | |
48 | |
49 #if str($pfam_database) != "None": | |
50 --pfamdir $pfam_database.fields.path | |
51 #end if | |
52 | |
53 ##--debug | |
54 | |
55 --disable-embl | |
56 --outputfolder $outputfolder | |
57 | |
58 #echo 'input_tempfile.' + $file_extension# | |
59 | |
60 ## leave out the start and end features, it can be easily replaced with Galaxy tools | |
61 ##--from START Start analysis at nucleotide specified | |
62 ##--to END | |
63 | |
64 2>&1 | |
65 | |
66 ## | |
67 ## shuffling files to create the correct outputs for Galaxy | |
68 ## | |
69 | |
70 ## html output | |
71 ; | |
72 cp #echo os.path.join($outputfolder, 'index.html')# $html 2> /dev/null | |
73 | |
74 ## gene clusters | |
75 #if 'geneclusterprots_tabular' in str($outputs).split(','): | |
76 ; | |
77 cp #echo os.path.join($outputfolder, 'geneclusters.txt')# $geneclusterprots_tabular 2> /dev/null | |
78 #end if | |
79 | |
80 #if 'geneclusterprots_fasta' in str($outputs).split(','): | |
81 ; | |
82 cp #echo os.path.join($outputfolder, '*_genecluster_proteins.fa')# $geneclusterprots_fasta 2> /dev/null | |
83 #end if | |
84 | |
85 | |
86 ##SVG images | |
87 #if 'archive_svgs' in str($outputs).split(','): | |
88 ; | |
89 cd #echo os.path.join($outputfolder, 'svg')# | |
90 #if $clusterblast: | |
91 ; | |
92 tar cfz $archive_svgs *_all.svg genecluster* 2> /dev/null | |
93 #else: | |
94 ; | |
95 tar cfz $archive_svgs genecluster* | |
96 #end if | |
97 #end if | |
98 | |
99 ##all files in a archive | |
100 #if 'archive' in str($outputs).split(','): | |
101 ; | |
102 cd $outputfolder; | |
103 tar cf $archive *.zip 2> /dev/null | |
104 #end if | |
105 | |
106 ## genbank | |
107 #if 'gb' in str($outputs).split(','): | |
108 ; | |
109 cat #echo os.path.join($outputfolder, '*.gbk')# > $genbank 2> /dev/null | |
110 #end if | |
111 | |
112 </command> | |
113 <inputs> | |
114 <param name="infile" type="data" format="genbank" label="Nucleotide sequence file in GenBank format"/> | |
115 | |
116 <param name="eukaryotic" type="select" label="Origin of DNA"> | |
117 <option value="" selected="True">Prokaryotic</option> | |
118 <option value="--eukaryotic">Eukaryotic</option> | |
119 </param> | |
120 | |
121 <param name="clusterblast" type="boolean" label="BLAST identified clusters against known clusters" | |
122 help="(--clusterblast)" | |
123 truevalue="--clusterblast" falsevalue="" checked="True" /> | |
124 <param name="subclusterblast" type="boolean" label="Subcluster BLAST analysis" | |
125 help="(--subclusterblast)" | |
126 truevalue="--subclusterblast" falsevalue="" checked="false" /> | |
127 <param name="smcogs" type="boolean" label="Analysis of secondary metabolism gene families (smCOGs)" | |
128 falsevalue="" truevalue="--smcogs" checked="True" /> | |
129 | |
130 <param name="full_blast" type="boolean" label="Run a whole-genome BLAST analysis" | |
131 help="(--full-blast)" | |
132 truevalue="--full-blast" falsevalue="" checked="False" /> | |
133 <param name="full_hmmer" type="boolean" label="Run a whole-genome Pfam analysis" | |
134 help="(--full-hmmer)" | |
135 truevalue="--full-hmmer" falsevalue="" checked="false" /> | |
136 | |
137 <param name="inclusive" type="boolean" label="Use Cimermancic et al. algorithm for cluster detection" | |
138 help="(--inclusive)" | |
139 truevalue="--inclusive" falsevalue="" checked="false" /> | |
140 | |
141 <param name="pfam_database" type="select" optional="true" label="Pfam database" help="Pfam Covariance models"> | |
142 <options from_file="antismash.loc"> | |
143 <column name="value" index="0"/> | |
144 <column name="name" index="1"/> | |
145 <column name="path" index="2"/> | |
146 </options> | |
147 </param> | |
148 | |
149 <param name="types" type="select" display="checkboxes" multiple="true" label="Gene cluster types to search"> | |
150 <option value="t1pks" selected="True">type I polyketide synthases</option> | |
151 <option value="t2pks" selected="True">type II polyketide synthases</option> | |
152 <option value="t3pks" selected="True">type III polyketide synthases</option> | |
153 <option value="t4pks" selected="True">type IV polyketide synthases</option> | |
154 <option value="transatpks" selected="True">trans-AT PKS</option> | |
155 <option value="nrps" selected="True">nonribosomal peptide synthetases</option> | |
156 <option value="terpene" selected="True">terpene synthases</option> | |
157 <option value="lantipeptide" selected="True">lantipeptides</option> | |
158 <option value="bacteriocin" selected="True">bacteriocins</option> | |
159 <option value="blactam" selected="True">beta-lactams</option> | |
160 <option value="amglyccycl" selected="True">aminoglycosides / aminocyclitols</option> | |
161 <option value="aminocoumarin" selected="True">aminocoumarins</option> | |
162 <option value="siderophore" selected="True">siderophores</option> | |
163 <option value="ectoine" selected="True">ectoines</option> | |
164 <option value="butyrolactone" selected="True">butyrolactones</option> | |
165 <option value="indole" selected="True">indoles</option> | |
166 <option value="nucleoside" selected="True">nucleosides</option> | |
167 <option value="phosphoglycolipid" selected="True">phosphoglycolipids</option> | |
168 <option value="oligosaccharide" selected="True">oligosaccharides</option> | |
169 <option value="furan" selected="True">furans</option> | |
170 <option value="hserlactone" selected="True">hserlactones</option> | |
171 <option value="thiopeptide" selected="True">thiopeptides</option> | |
172 <option value="phenazine" selected="True">phenazines</option> | |
173 <option value="phosphonate" selected="True">phosphonates</option> | |
174 <option value="other" selected="True">others</option> | |
175 </param> | |
176 | |
177 <param name="outputs" type="select" multiple="true" label="Additional outputs"> | |
178 <option value="geneclusterprots_fasta" selected="True">Gene cluster proteins (FASTA)</option> | |
179 <option value="geneclusterprots_tabular">Gene cluster proteins (Tabular)</option> | |
180 <option value="archive_svgs">All clusters as image (compressed)</option> | |
181 <option value="archive">All files compressed</option> | |
182 <option value="gb">Annotated genome (GenBank)</option> | |
183 </param> | |
184 | |
185 </inputs> | |
186 <outputs> | |
187 <data format="fasta" name="geneclusterprots_fasta" label="${tool.name} on ${on_string} (Gen Cluster Proteins)"> | |
188 <filter>'geneclusterprots_fasta' in outputs</filter> | |
189 </data> | |
190 <data format="tabular" name="geneclusterprots_tabular" label="${tool.name} on ${on_string} (Gen Cluster Proteins)"> | |
191 <filter>'geneclusterprots_tabular' in outputs</filter> | |
192 </data> | |
193 <data format="tar" name="archive" label="${tool.name} on ${on_string} (all files compressed)"> | |
194 <filter>'archive' in outputs</filter> | |
195 </data> | |
196 <data format="tar.gz" name="archive_svgs" label="${tool.name} on ${on_string} (SVG images)"> | |
197 <filter>'archive_svgs' in outputs</filter> | |
198 </data> | |
199 <data format="html" name="html" label="${tool.name} on ${on_string} (html report)"> | |
200 <!-- html is default output at any time. | |
201 <filter>'html' in outputs</filter> | |
202 --> | |
203 </data> | |
204 <data name="genbank" format="genbank" label="${tool.name} on ${on_string} (genbank)"> | |
205 <filter>'gb' in outputs</filter> | |
206 </data> | |
207 </outputs> | |
208 <help> | |
209 | |
210 .. class:: infomark | |
211 | |
212 **What it does** | |
213 | |
214 antiSMASH allows the rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial and fungal genomes. | |
215 It integrates and cross-links with a large number of in silico secondary metabolite analysis tools that have been published earlier. | |
216 | |
217 | |
218 **Input** | |
219 | |
220 The ideal input for antiSMASH is an annotated nucleotide file in Genbank format. If no annotation is available, | |
221 we recommend running your sequence through an annotation pipeline like RAST are the one included in Galaxy. | |
222 | |
223 | |
224 There are several optional analyses that may or may not be run on your sequence. | |
225 Highly recommended is the Gene Cluster Blast Comparative Analysis, which runs BlastP using each amino acid sequence from a detected gene cluster as a | |
226 query on a large database of predicted protein sequences from secondary metabolite biosynthetic gene clusters, and pools the results to identify | |
227 the gene clusters that are most homologous to the gene cluster that was detected in your query nucleotide sequence. | |
228 | |
229 | |
230 Also available is the analysis of secondary metabolism gene families (smCOGs). | |
231 This analysis attempts to allocate each gene in the detected gene clusters to a secondary metabolism-specific gene | |
232 family using profile hidden Markov models specific for the conserved sequence region characteristic of this family. | |
233 Additionally, a phylogenetic tree is constructed of each gene together with the (max. 100) sequences of the smCOG seed alignment. | |
234 | |
235 | |
236 For the most thorough genome analysis, we provide genome-wide PFAM HMM analysis of all genes in the genome through modules of the CLUSEAN pipeline. | |
237 Of course, some regions important to secondary metabolism may have been missed in the gene cluster identification stage | |
238 (e.g. because they represent the biosynthetic pathway of a yet unknown secondary metabolite). | |
239 Therefore, when genome-wide PFAM HMM analysis is selected, the PFAM frequencies are also used to find all genome regions in which PFAM domains typical for secondary metabolism are overrepresented. | |
240 | |
241 | |
242 **References** | |
243 | |
244 Marnix H. Medema, Kai Blin, Peter Cimermancic, Victor de Jager, Piotr Zakrzewski, Michael A. Fischbach, Tilmann Weber, | |
245 Rainer Breitling and Eriko Takano (2011). antiSMASH: Rapid identification, annotation and analysis of secondary metabolite biosynthesis gene clusters. Nucleic Acids Research, doi: 10.1093/nar/gkr466. | |
246 | |
247 http://antismash.secondarymetabolites.org/help.html | |
248 | |
249 | |
250 Bjoern A. Gruening: https://github.com/bgruening/galaxytools/tree/master/antismash | |
251 | |
252 </help> | |
253 </tool> |