Mercurial > repos > bgruening > infernal
comparison cmscan.xml @ 5:6e18e0b098cd draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/infernal commit 80c721dcfe02a2b8baf8e2c64b76cbcd71b23d86
author | bgruening |
---|---|
date | Sat, 21 Jan 2017 17:36:57 -0500 |
parents | |
children | c9e29ac5d099 |
comparison
equal
deleted
inserted
replaced
4:c47a7c52ac4f | 5:6e18e0b098cd |
---|---|
1 <tool id="infernal_cmscan" name="cmscan" version="@VERSION@.0"> | |
2 <description> Search sequences against collections of covariance models</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements"/> | |
7 <expand macro="stdio"/> | |
8 <command> | |
9 <![CDATA[ | |
10 ## a temp file is needed, because the standard tabular output from infernal is not usefull in Galaxy | |
11 ## it will be converted to a tab delimited file and piped to Galaxy | |
12 temp_tabular_output=\$(mktemp) && | |
13 | |
14 #if str($cm_opts.cm_opts_selector) == "histdb": | |
15 ln -s '$cm_opts.cmfile' cmdb.cm && | |
16 #end if | |
17 | |
18 tar xvf '$aux_files' && | |
19 ln -s `find *.i1f` cmdb.cm.i1f && | |
20 ln -s `find *.i1i` cmdb.cm.i1i && | |
21 ln -s `find *.i1m` cmdb.cm.i1m && | |
22 ln -s `find *.i1p` cmdb.cm.i1p && | |
23 | |
24 cmscan | |
25 --cpu "\${GALAXY_SLOTS:-2}" | |
26 --tblout '\$temp_tabular_output' | |
27 -o /dev/null | |
28 $g | |
29 #if $Z | |
30 -Z $Z | |
31 #end if | |
32 $verbose | |
33 $other_opts.notrunc | |
34 $other_opts.anytrunc | |
35 $other_opts.nonull3 | |
36 #if $other_opts.smxsize <> 128.0 | |
37 --smxsize $other_opts.smxsize | |
38 #end if | |
39 #if $other_opts.mxsize <> 128.0 | |
40 --mxsize $other_opts.mxsize | |
41 #end if | |
42 $other_opts.cyk | |
43 $other_opts.acyk | |
44 $other_opts.bottomonly | |
45 $other_opts.toponly | |
46 #if str($inclusion_thresholds_opts.inclusion_thresholds_selector) == "--incE": | |
47 --incE $inclusion_thresholds_opts.incE | |
48 #elif str($inclusion_thresholds_opts.inclusion_thresholds_selector) == "--incT": | |
49 --incT $inclusion_thresholds_opts.incT | |
50 #end if | |
51 #if str($reporting_thresholds_opts.reporting_thresholds_selector) == "-E": | |
52 -E $reporting_thresholds_opts.E | |
53 #elif str($reporting_thresholds_opts.reporting_thresholds_selector) == "-T": | |
54 -T $reporting_thresholds_opts.T | |
55 #end if | |
56 $model_thresholds.cut_ga | |
57 $model_thresholds.cut_nc | |
58 $model_thresholds.cut_tc | |
59 #if $acceleration_huristics.acceleration_huristics_selector == "FZ" | |
60 --FZ $$acceleration_huristics.FZ | |
61 #else | |
62 $acceleration_huristics.acceleration_huristics_selector | |
63 #if $acceleration_huristics.acceleration_huristics_selector == "--mid" | |
64 --Fmid $acceleration_huristics.Fmid | |
65 #end if | |
66 #end if | |
67 ## CM file from the history or stored as database on disc | |
68 #if str($cm_opts.cm_opts_selector) == "db": | |
69 '$cm_opts.database.fields.path' | |
70 #else: | |
71 ##'$cm_opts.cmfile' | |
72 cmdb.cm | |
73 #end if | |
74 ## sequence file | |
75 '$seqdb' | |
76 && | |
77 sed 's/ \+ /\t/g' '\$temp_tabular_output' > '$outfile' | |
78 | |
79 ]]> | |
80 </command> | |
81 <inputs> | |
82 <param name="seqdb" type="data" format="fasta" label="Sequence database <seqfile>"/> | |
83 | |
84 <conditional name="cm_opts"> | |
85 <param name="cm_opts_selector" type="select" label="Subject covariance models <cmdb> "> | |
86 <option value="db" >Locally installed covariance models</option> | |
87 <option value="histdb" selected="True">Covariance model from your history</option> | |
88 </param> | |
89 <when value="db"> | |
90 <param name="database" type="select" label="Covariance models"> | |
91 <options from_file="infernal.loc"> | |
92 <column name="value" index="0"/> | |
93 <column name="name" index="1"/> | |
94 <column name="path" index="2"/> | |
95 </options> | |
96 </param> | |
97 </when> | |
98 <when value="histdb"> | |
99 <param name="cmfile" type="data" format="cm" label="Covariance models file from the history."/> | |
100 </when> | |
101 </conditional> | |
102 <param name="aux_files" type="data" format="tar" label="Auxillury files" help="A tar file contains the four auxillury files suffixed .i1{fimp}. These files are generated after pressing the cm files using cmpress"/> | |
103 | |
104 <param argument="-g" truevalue="-g" falsevalue="" checked="False" type="boolean" | |
105 label="Turn on the glocal alignment algorithm" help="... global with respect to the query model and local with respect to the target database."/> | |
106 <param argument="-Z" type="float" optional="true" min="0" label="Search space size in *Mb* for E-value calculations" help="Without the use of this option, the search space size changes for each query sequence"/> | |
107 <param argument="--verbose" truevalue="--verbose" falsevalue="" checked="False" type="boolean" | |
108 label="Be verbose" help="report extra information; mainly useful for debugging"/> | |
109 | |
110 | |
111 | |
112 <!-- Options for inclusion thresholds --> | |
113 <conditional name="inclusion_thresholds_opts"> | |
114 <param name="inclusion_thresholds_selector" type="select" label="Inclusion thresholds" | |
115 help="Inclusion thresholds are stricter than reporting thresholds. Inclusion thresholds control which hits are considered to be reliable enough to be included in an output alignment or in a possible subsequent search round, or marked as significant (”!”) as opposed to questionable (”?”) in hit output."> | |
116 <option value="" selected="true">default</option> | |
117 <option value="--incE">Use E-value</option> | |
118 <option value="--incT">Use bit score</option> | |
119 </param> | |
120 <when value=""/> | |
121 <when value="--incE"> | |
122 <param name="incE" type="float" value="0.01" label="Use E-value" help="of <= X as the hit inclusion threshold."> | |
123 <sanitizer> | |
124 <valid initial="string.printable"> | |
125 <remove value="'"/> | |
126 </valid> | |
127 </sanitizer> | |
128 </param> | |
129 </when> | |
130 <when value="--incT"> | |
131 <param name="incT" type="integer" value="0" label="Use bit score" help="of >= X as the hit inclusion threshold."> | |
132 <sanitizer> | |
133 <valid initial="string.printable"> | |
134 <remove value="'"/> | |
135 </valid> | |
136 </sanitizer> | |
137 </param> | |
138 </when> | |
139 </conditional> | |
140 | |
141 <!-- Options controlling reporting thresholds --> | |
142 | |
143 <conditional name="reporting_thresholds_opts"> | |
144 <param name="reporting_thresholds_selector" type="select" label="reporting thresholds" | |
145 help="Reporting thresholds control which hits are reported in output files"> | |
146 <option value="" selected="true">default</option> | |
147 <option value="-E">Use E-value</option> | |
148 <option value="-T">Use bit score</option> | |
149 </param> | |
150 <when value=""/> | |
151 <when value="-E"> | |
152 <param name="E" type="float" value="10.0" label="Use E-value" help="of <= X as the hit reporting threshold. The default is 10.0, meaning that on average, about 10 false positives will be reported per query, so you can see the top of the noise and decide for yourself if it’s really noise."> | |
153 <sanitizer> | |
154 <valid initial="string.printable"> | |
155 <remove value="'"/> | |
156 </valid> | |
157 </sanitizer> | |
158 </param> | |
159 </when> | |
160 <when value="-T"> | |
161 <param name="T" type="integer" value="0" label="Use bit score" help="of >= X as the hit reporting threshold."> | |
162 <sanitizer> | |
163 <valid initial="string.printable"> | |
164 <remove value="'"/> | |
165 </valid> | |
166 </sanitizer> | |
167 </param> | |
168 </when> | |
169 </conditional> | |
170 | |
171 <section name="model_thresholds" title="Options controlling model-specific reporting thresholds" help="Curated CM databases may define specific bit score thresholds for each CM, superseding any thresholding based on statistical significance alone."> | |
172 <param argument="--cut_ga" truevalue="--cut_ga" falsevalue="" checked="false" type="boolean" | |
173 label="Use CM's GA gathering cutoffs as reporting thresholds" help="GA thresholds are generally considered to be the reliable curated thresholds defining family membership"/> | |
174 <param argument="--cut_nc" truevalue="--cut_nc" falsevalue="" checked="false" type="boolean" | |
175 label="use CM's NC noise cutoffs as reporting thresholds" help="NC thresholds are generally considered to be the score of the highest-scoring known false positive."/> | |
176 <param argument="--cut_tc" truevalue="--cut_tc" falsevalue="" checked="false" type="boolean" | |
177 label="use CM's TC trusted cutoffs as reporting thresholds" help="TC thresholds are generally considered to be the score of the lowest-scoring known true positive that is above all known false positives."/> | |
178 </section> | |
179 | |
180 <conditional name="acceleration_huristics"> | |
181 <param name="acceleration_huristics_selector" type="select" label="Options controlling acceleration heuristics" help="These options are, in order from least strict (slowest but most sensitive) to most strict (fastest but least sensitive)"> | |
182 <option value="--max">Turn all heuristic filters off (--max)</option> | |
183 <option value="--nohmm">Skip all HMM filter stages, use only CM (--nohmm)</option> | |
184 <option value="--mid">Skip first two HMM filter stages (SSV and Vit) (--mid)</option> | |
185 <option value="--default" selected="true">Run search space size-dependent pipeline (--default)</option> | |
186 <option value="--rfam">Use a strict filtering strategy devised for large databases (more than 20 Gb) (--rfam)</option> | |
187 <option value="--hmmonly">Use HMM only, don't use a CM at all (--hmmonly)</option> | |
188 <option value="FZ">set filters to defaults used for a search space of size 'x' Mb (--FZ)</option> | |
189 </param> | |
190 <when value="--max"> | |
191 </when> | |
192 <when value="--nohmm"> | |
193 </when> | |
194 <when value="--mid"> | |
195 <param argument="--Fmid" type="float" value="0.02" label="P-value threshold for HMM stages"/> | |
196 </when> | |
197 <when value="--default"> | |
198 </when> | |
199 <when value="--rfam"> | |
200 </when> | |
201 <when value="--hmmonly"> | |
202 </when> | |
203 <when value="FZ"> | |
204 <param argument="--FZ" type="float" value="125" label="Size of search space in Mb"/> | |
205 </when> | |
206 </conditional> | |
207 | |
208 <section name="other_opts" title="Other options"> | |
209 <param argument="--notrunc" truevalue="--notrunc" falsevalue="" checked="False" type="boolean" | |
210 label="Skip truncated hit detection" help=""/> | |
211 <param argument="--anytrunc" truevalue="--anytrunc" falsevalue="" checked="false" type="boolean" | |
212 label="Allow full and truncated hits anywhere within sequences" help=""/> | |
213 <param argument="--nonull3" truevalue="--nonull3" falsevalue="" checked="false" type="boolean" | |
214 label="Turn off the null3 CM score corrections for biased composition" help="This correction is not used during the HMM filter stages."/> | |
215 <param argument="--mxsize" type="float" value="128.0" min="0.1" | |
216 label="Set the maximum allowable CM DP matrix size to 'x' megabytes" help=""/> | |
217 <param argument="--smxsize" type="float" value="128.0" min="0.1" | |
218 label="Set the maximum allowable CM search DP matrix size to 'x' megabytes." help=""/> | |
219 <param argument="--cyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean" | |
220 label="Use the CYK algorithm, not Inside, to determine the final score of all hits" help=""/> | |
221 <param argument="--acyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean" | |
222 label="Use the CYK algorithm to align hits" help="By default, the Durbin/Holmes optimal accuracy algorithm is used, which finds the alignment that maximizes the expected accuracy of all aligned residues."/> | |
223 <param argument="--bottomonly" truevalue="--bottomonly" falsevalue="" checked="False" type="boolean" | |
224 label="Only search the bottom (Crick) strand of target sequences" help="in the sequence database"/> | |
225 <param argument="--toponly" truevalue="--toponly" falsevalue="" checked="False" type="boolean" | |
226 label="Only search the top (Watson) strand of target sequences" help="in the sequence database"/> | |
227 | |
228 </section> | |
229 | |
230 | |
231 | |
232 </inputs> | |
233 <outputs> | |
234 <data format="tabular" name="outfile" label="cmscan on ${on_string}"/> | |
235 </outputs> | |
236 <tests> | |
237 <test> | |
238 <conditional name="cm_opts"> | |
239 <param name="cm_opts_selector" value="histdb"/> | |
240 <param name="cmfile" value="minifam.cm" /> | |
241 </conditional> | |
242 <param name="aux_files" value="minifam.tar" ftype="tar"/> | |
243 <param name="seqdb" value="metag-example.fa"/> | |
244 <output name="outfile"> | |
245 <assert_contents> | |
246 <has_text text="AAGA01015927.1"/> | |
247 </assert_contents> | |
248 </output> | |
249 </test> | |
250 | |
251 </tests> | |
252 <help> | |
253 <![CDATA[ | |
254 | |
255 | |
256 **What it does** | |
257 | |
258 cmscan is used to search sequences against collections of covariance models. | |
259 For each sequence in <seqfile>, use that query sequence to search the target database of CMs in <cmdb>, | |
260 and output ranked lists of the CMs with the most significant matches to the sequence | |
261 | |
262 **Input format** | |
263 | |
264 The <seqfile> may contain more than one query sequence. It can be in FASTA format, or several other common | |
265 sequence file formats (genbank, embl, and among others), or in alignment file formats (stockholm, aligned fasta, and | |
266 others). | |
267 | |
268 The <cmdb> needs to be press’ed using cmpress before it can be searched with cmscan. This creates four binary | |
269 files, suffixed .i1{fimp}. Additionally, <cmdb> must have been calibrated for E-values with cmcalibrate before being | |
270 press’ed with cmpress. | |
271 | |
272 NOTE: Please provid a tar file that contains the .cm file in addition to the four binary files, suffixed .i1{fimp}, | |
273 and specify the file type as "tar" before uploading the file. Otherwise Galaxy will not read the binary files properly. | |
274 | |
275 **Output format** | |
276 | |
277 The output format is designed to be human-readable. | |
278 | |
279 For further questions please refere to the Infernal `Userguide <http://eddylab.org/infernal/Userguide.pdf>`_. | |
280 | |
281 | |
282 ]]> | |
283 </help> | |
284 | |
285 <expand macro="citations" /> | |
286 | |
287 </tool> |