1
|
1 <tool id="infernal_cmsearch" name="Search covariance model(s)" version="1.1.0.2">
|
|
2 <description>against a sequence database (cmsearch)</description>
|
|
3 <parallelism method="multi" split_inputs="seqdb" split_mode="to_size" split_size="100" shared_inputs="" merge_outputs="outfile,multiple_alignment_output"></parallelism>
|
|
4 <requirements>
|
|
5 <requirement type="package">infernal</requirement>
|
|
6 <requirement type="package" version="1.1">infernal</requirement>
|
|
7 <requirement type="package" version="8.21">gnu_coreutils</requirement>
|
|
8 </requirements>
|
|
9 <command>
|
|
10 ## a temp file is needed, because the standard tabular output from infernal is not usefull in Galaxy
|
|
11 ## it will be converted to a tab delimited file and piped to Galaxy
|
|
12 temp_tabular_output=\$(mktemp);
|
|
13
|
|
14 cmsearch
|
|
15 ## Infernal Options
|
|
16 --cpu "\${GALAXY_SLOTS:-12}"
|
|
17 -o /dev/null
|
|
18 --tformat $seqdb.ext ##target format: fasta, embl, genbank, ddbj, stockholm, pfam, a2m, afa, clustal, and phylip
|
|
19 $bottomonly
|
|
20 $toponly
|
|
21 $cyk
|
|
22 $notrunc
|
|
23 $max
|
|
24 $nohmm
|
|
25 $mid
|
|
26 ##$bitscore_thresholds
|
|
27 --tblout \$temp_tabular_output
|
|
28 $g
|
|
29 #if $A:
|
|
30 $A $multiple_alignment_output
|
|
31 #end if
|
|
32
|
|
33 #if $inclusion_thresholds_opts.inclusion_thresholds_selector == "--incE":
|
|
34 --incE $inclusion_thresholds_opts.incE
|
|
35 #elif $inclusion_thresholds_opts.inclusion_thresholds_selector == "--incT":
|
|
36 --incT $inclusion_thresholds_opts.incT
|
|
37 #end if
|
|
38
|
|
39 #if $reporting_thresholds_opts.reporting_thresholds_selector == "-E":
|
|
40 -E $reporting_thresholds_opts.E
|
|
41 #elif $reporting_thresholds_opts.reporting_thresholds_selector == "-T":
|
|
42 -T $reporting_thresholds_opts.T
|
|
43 #end if
|
|
44
|
|
45 ## CM file from the history or stored as database on disc
|
|
46
|
|
47 #if $cm_opts.cm_opts_selector == "db":
|
|
48 $cm_opts.database.fields.path
|
|
49 #else:
|
|
50 $cm_opts.cmfile
|
|
51 #end if
|
|
52
|
|
53 ## sequence file
|
|
54 $seqdb
|
|
55 2>&1
|
|
56 ;
|
|
57
|
|
58 ## 1. replace all lines starting # (comment lines)
|
|
59 ## 2. replace the first 18 spaces with tabs, 18th field is a free text field (can contain spaces)
|
|
60 sed -e 's/#.*$//' -e '/^$/d' -e 's/ /\t/g' -e 's/\t/ /18g' \$temp_tabular_output > $outfile
|
|
61
|
|
62 </command>
|
|
63 <inputs>
|
|
64
|
|
65 <param name="seqdb" type="data" format="fasta" label="Sequence database"/>
|
|
66 <conditional name="cm_opts">
|
|
67 <param name="cm_opts_selector" type="select" label="Subject covariance models">
|
|
68 <option value="db" selected="True">Locally installed covariance models</option>
|
|
69 <option value="histdb">Covariance model from your history</option>
|
|
70 </param>
|
|
71 <when value="db">
|
|
72 <param name="database" type="select" label="Covariance models">
|
|
73 <options from_file="infernal.loc">
|
|
74 <column name="value" index="0"/>
|
|
75 <column name="name" index="1"/>
|
|
76 <column name="path" index="2"/>
|
|
77 </options>
|
|
78 </param>
|
|
79 </when>
|
|
80 <when value="histdb">
|
|
81 <param name="cmfile" type="data" format="txt" label="Covariance models file from the history."/>
|
|
82 </when>
|
|
83 </conditional>
|
|
84
|
|
85 <param name="g" truevalue="-g" falsevalue="" checked="False" type="boolean"
|
|
86 label="Turn on the glocal alignment algorithm" help="... global with respect to the query model and local with respect to the target database."/>
|
|
87
|
|
88 <param name="bottomonly" truevalue="--bottomonly" falsevalue="" checked="False" type="boolean"
|
|
89 label="Only search the bottom (Crick) strand of target sequences" help="in the sequence database"/>
|
|
90 <param name="toponly" truevalue="--toponly" falsevalue="" checked="False" type="boolean"
|
|
91 label="Only search the top (Watson) strand of target sequences" help="in the sequence database"/>
|
|
92
|
|
93 <param name="cyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean"
|
|
94 label="Use the CYK algorithm, not Inside, to determine the final score of all hits" help=""/>
|
|
95 <param name="--acyk" truevalue="--cyk" falsevalue="" checked="False" type="boolean"
|
|
96 label="Use the CYK algorithm to align hits" help="By default, the Durbin/Holmes optimal accuracy algorithm is used, which finds the alignment that maximizes the expected accuracy of all aligned residues."/>
|
|
97
|
|
98 <param name="notrunc" truevalue="--notrunc" falsevalue="" checked="False" type="boolean"
|
|
99 label="Turn off truncated hit detection" help=""/>
|
|
100
|
|
101 <!-- accelleration pipeline -->
|
|
102
|
|
103 <param name="max" truevalue="--max" falsevalue="" checked="False" type="boolean"
|
|
104 label="Turn off all filters, and run non-banded Inside on every full-length target sequence" help="This
|
|
105 increases sensitivity somewhat, at an extremely large cost in speed."/>
|
|
106
|
|
107 <param name="nohmm" truevalue="--nohmm" falsevalue="" checked="False" type="boolean"
|
|
108 label="Turn off all HMM filter stages " help=""/>
|
|
109
|
|
110 <param name="mid" truevalue="--mid" falsevalue="" checked="False" type="boolean"
|
|
111 label="Turn off the HMM SSV and Viterbi filter stages" help=""/>
|
|
112
|
|
113
|
|
114 <!-- Options for model-specific score thresholding -->
|
|
115 <!--
|
|
116 <param name="bitscore_thresholds" type="select" label="Bit score thresholds" help="Curated CM databases may define specific bit score thresholds for each CM, superseding any thresholding based on statistical significance alone.">
|
|
117 <option value="" selected="true">None</option>
|
|
118 <option value=" - -cut_ga">GA (gathering) bit scores</option>
|
|
119 <option value=" - -cut_nc">NC (noise cutoff) bit score</option>
|
|
120 <option value=" - -cut_tc">TC (trusted cutoff) bit score</option>
|
|
121 </param>
|
|
122 -->
|
|
123 <!-- Options for inclusion thresholds -->
|
|
124
|
|
125
|
|
126 <conditional name="inclusion_thresholds_opts">
|
|
127 <param name="inclusion_thresholds_selector" type="select" label="Inclusion thresholds"
|
|
128 help="Inclusion thresholds are stricter than reporting thresholds. Inclusion thresholds control which hits are considered to be reliable enough to be included in an output alignment or in a possible subsequent search round, or marked as significant (”!”) as opposed to questionable (”?”) in hit output.">
|
|
129 <option value="" selected="true">default</option>
|
|
130 <option value="--incE">Use E-value</option>
|
|
131 <option value="--incT">Use bit score</option>
|
|
132 </param>
|
|
133 <when />
|
|
134 <when value="--incE">
|
|
135 <param name="incE" type="float" value="0.01" size="5" label="Use E-value" help="of <= X as the hit inclusion threshold.">
|
|
136 <sanitizer>
|
|
137 <valid initial="string.printable">
|
|
138 <remove value="'"/>
|
|
139 </valid>
|
|
140 </sanitizer>
|
|
141 </param>
|
|
142 </when>
|
|
143 <when value="--incT">
|
|
144 <param name="incT" type="integer" size="5" value="0" label="Use bit score" help="of >= X as the hit inclusion threshold.">
|
|
145 <sanitizer>
|
|
146 <valid initial="string.printable">
|
|
147 <remove value="'"/>
|
|
148 </valid>
|
|
149 </sanitizer>
|
|
150 </param>
|
|
151 </when>
|
|
152 </conditional>
|
|
153
|
|
154 <!-- Options controlling reporting thresholds -->
|
|
155
|
|
156 <conditional name="reporting_thresholds_opts">
|
|
157 <param name="reporting_thresholds_selector" type="select" label="reporting thresholds"
|
|
158 help="Reporting thresholds control which hits are reported in output files">
|
|
159 <option value="" selected="true">default</option>
|
|
160 <option value="-E">Use E-value</option>
|
|
161 <option value="-T">Use bit score</option>
|
|
162 </param>
|
|
163 <when />
|
|
164 <when value="-E">
|
|
165 <param name="E" type="float" value="10.0" size="5" label="Use E-value" help="of <= X as the hit reporting threshold. The default is 10.0, meaning that on average, about 10 false positives will be reported per query, so you can see the top of the noise and decide for yourself if it’s really noise.">
|
|
166 <sanitizer>
|
|
167 <valid initial="string.printable">
|
|
168 <remove value="'"/>
|
|
169 </valid>
|
|
170 </sanitizer>
|
|
171 </param>
|
|
172 </when>
|
|
173 <when value="-T">
|
|
174 <param name="T" type="integer" size="5" value="0" label="Use bit score" help="of >= X as the hit reporting threshold.">
|
|
175 <sanitizer>
|
|
176 <valid initial="string.printable">
|
|
177 <remove value="'"/>
|
|
178 </valid>
|
|
179 </sanitizer>
|
|
180 </param>
|
|
181 </when>
|
|
182 </conditional>
|
|
183
|
|
184 <param name="A" truevalue="-A" falsevalue="" checked="False" type="boolean"
|
|
185 label="Save a multiple alignment of all significant hits" help="... those satisfying inclusion thresholds"/>
|
|
186
|
|
187 </inputs>
|
|
188 <outputs>
|
|
189
|
|
190 <data format="tabular" name="outfile" label="cmsearch on ${on_string}"/>
|
|
191 <data format="tabular" name="multiple_alignment_output" label="cmsearch on ${on_string} (multi alignment)">
|
|
192 <filter>A is True</filter>
|
|
193 </data>
|
|
194
|
|
195 </outputs>
|
|
196 <help>
|
|
197
|
|
198
|
|
199 **What it does**
|
|
200
|
|
201 cmalign aligns the RNA sequences in <seqfile> to the covariance model (CM) in <cmfile>. The new alignment is
|
|
202 output to stdout in Stockholm format, but can be redirected to a file <f> with the -o <f> option.
|
|
203 Either <cmfile> or <seqfile> (but not both) may be ’-’ (dash), which means reading this input from stdin rather than a
|
|
204 file.
|
|
205 The sequence file <seqfile> must be in FASTA or Genbank format.
|
|
206 cmalign uses an HMM banding technique to accelerate alignment by default as described below for the --hbanded
|
|
207 option. HMM banding can be turned off with the --nonbanded option.
|
|
208 By default, cmalign computes the alignment with maximum expected accuracy that is consistent with constraints
|
|
209 (bands) derived from an HMM, using a banded version of the Durbin/Holmes optimal accuracy algorithm. This be-
|
|
210 havior can be changed with the --cyk or --sample options.
|
|
211 cmalign takes special care to correctly align truncated sequences, where some nucleotides from the beginning (5’)
|
|
212 and/or end (3’) of the actual full length biological sequence are not present in the input sequence (see DL Kolbe and
|
|
213 SR Eddy, Bioinformatics, 25:1236-1243, 2009). This behavior is on by default, but can be turned off with --notrunc. In
|
|
214 previous versions of cmalign the --sub option was required to appropriately handle truncated sequences. The --sub
|
|
215 option is still available in this version, but the new default method for handling truncated sequences should be as good
|
|
216 or superior to the sub method in nearly all cases.
|
|
217 The --mapali <s> option allows inclusion of the fixed training alignment used to build the CM from file <s> within the
|
|
218 output alignment of cmalign.
|
|
219 It is possible to merge two or more alignments created by the same CM using the Easel miniapp esl-alimerge (included
|
|
220 in the easel/miniapps/ subdirectory of Infernal). Previous versions of cmalign included options to merge alignments
|
|
221 but they were deprecated upon development of esl-alimerge, which is significantly more memory efficient.
|
|
222 By default, cmalign will output the alignment to stdout. The alignment can be redirected to an output file <f> with the
|
|
223 -o <f> option. With -o, information on each aligned sequence, including score and model alignment boundaries will be
|
|
224 printed to stdout (more on this below).
|
|
225 The output alignment will be in Stockholm format by default. This can be changed to Pfam, aligned FASTA (AFA), A2M,
|
|
226 Clustal, or Phylip format using the --outformat <s> option, where <s> is the name of the desired format. As a special
|
|
227 case, if the output alignment is large (more than 10,000 sequences or more than 10,000,000 total nucleotides) than the
|
|
228 output format will be Pfam format, with each sequence appearing on a single line, for reasons of memory efficiency. For
|
|
229 alignments larger than this, using --ileaved will force interleaved Stockholm format, but the user should be aware that
|
|
230 this may require a lot of memory. --ileaved will only work for alignments up to 100,000 sequences or 100,000,000 total
|
|
231 nucleotides.
|
|
232 If the output alignment format is Stockholm or Pfam, the output alignment will be annotated with posterior probabilities
|
|
233 which estimate the confidence level of each aligned nucleotide. This annotation appears as lines beginning with ”#=GR
|
|
234 <seq name> PP”, one per sequence, each immediately below the corresponding aligned sequence ”<seq name>”.
|
|
235 Characters in PP lines have 12 possible values: ”0-9”, ”*”, or ”.”. If ”.”, the position corresponds to a gap in the sequence.
|
|
236 A value of ”0” indicates a posterior probability of between 0.0 and 0.05, ”1” indicates between 0.05 and 0.15, ”2”
|
|
237 indicates between 0.15 and 0.25 and so on up to ”9” which indicates between 0.85 and 0.95. A value of ”*” indicates
|
|
238 a posterior probability of between 0.95 and 1.0. Higher posterior probabilities correspond to greater confidence that
|
|
239 the aligned nucleotide belongs where it appears in the alignment. With --nonbanded, the calculation of the posterior
|
|
240 probabilities considers all possible alignments of the target sequence to the CM. Without --nonbanded (i.e. in default
|
|
241 mode), the calculation considers only possible alignments within the HMM bands. Further, the posterior probabilities
|
|
242 are conditional on the truncation mode of the alignment. For example, if the sequence alignment is truncated 5’, a PP
|
|
243 value of ”9” indicates between 0.85 and 0.95 of all 5’ truncated alignments include the given nucleotide at the given
|
|
244 position. The posterior annotation can be turned off with the --noprob option. If --small is enabled, posterior annotation
|
|
245 must also be turned off using --noprob.
|
|
246 The tabular output that is printed to stdout if the -o option is used includes one line per sequence and twelve fields
|
|
247 per line: ”idx”: the index of the sequence in the input file, ”seq name”: the sequence name; ”length”: the length of the
|
|
248 sequence; ”cm from” and ”cm to”: the model start and end positions of the alignment; ”trunc”: ”no” if the sequence is
|
|
249 not truncated, ”5’” if the beginning of the sequence truncated 5’, ”3’” if the end of the sequence is truncated, and ”5’&3’”
|
|
250 if both the beginning and the end are truncated; ”bit sc”: the bit score of the alignment, ”avg pp” the average posterior
|
|
251 probability of all aligned nucleotides in the alignment; ”band calc”, ”alignment” and ”total”: the time in seconds required
|
|
252 for calculating HMM bands, computing the alignment, and complete processing of the sequence, respectively; ”mem
|
|
253 (Mb)”: the size in Mb of all dynamic programming matrices required for aligning the sequence. This tabular data can be
|
|
254 saved to file <f> with the --sfile <f> option.
|
|
255
|
|
256
|
|
257 Options for controlling the alignment algorithm
|
|
258 --optacc Align sequences using the Durbin/Holmes optimal accuracy algorithm. This is the default.
|
|
259 The optimal accuracy alignment will be constrained by HMM bands for acceleration unless
|
|
260 the --nonbanded option is enabled. The optimal accuracy algorithm determines the align-
|
|
261 ment that maximizes the posterior probabilities of the aligned nucleotides within it. The
|
|
262 posterior probabilites are determined using (possibly HMM banded) variants of the Inside
|
|
263 and Outside algorithms.
|
|
264 --cyk Do not use the Durbin/Holmes optimal accuracy alignment to align the sequences, instead
|
|
265 use the CYK algorithm which determines the optimally scoring (maximum likelihood) align-
|
|
266 ment of the sequence to the model, given the HMM bands (unless --nonbanded is also
|
|
267 enabled).
|
|
268 --sample Sample an alignment from the posterior distribution of alignments. The posterior distribution
|
|
269 is determined using an HMM banded (unless --nonbanded) variant of the Inside algorithm.
|
|
270 --seed <n> Seed the random number generator with <n>, an integer >= 0. This option can only be
|
|
271 used in combination with --sample. If <n> is nonzero, stochastic sampling of alignments
|
|
272 will be reproducible; the same command will give the same results. If <n> is 0, the random
|
|
273 number generator is seeded arbitrarily, and stochastic samplings may vary from run to run
|
|
274 of the same command. The default seed is 181.
|
|
275 --notrunc Turn off truncated alignment algorithms. All sequences in the input file will be assumed to be
|
|
276 full length, unless --sub is also used, in which case the program can still handle truncated
|
|
277 sequences but will use an alternative strategy for their alignment.
|
|
278 --sub Turn on the sub model construction and alignment procedure. For each sequence, an HMM
|
|
279 is first used to predict the model start and end consensus columns, and a new sub CM is
|
|
280 constructed that only models consensus columns from start to end. The sequence is then
|
|
281 aligned to this sub CM. Sub alignment is an older method than the default one for aligning
|
|
282 sequences that are possibly truncated. By default, cmalign uses special DP algorithms to
|
|
283 handle truncated sequences which should be more accurate than the sub method in most
|
|
284 cases. --sub is still included as an option mainly for testing against this default truncated
|
|
285 sequence handling. This ”sub CM” procedure is not the same as the ”sub CMs” described
|
|
286 by Weinberg and Ruzzo.
|
|
287
|
|
288
|
|
289 Other options
|
|
290 --mapali <f> Reads the alignment from file <f> used to build the model aligns it as a single object to
|
|
291 the CM; e.g. the alignment in <f> is held fixed. This allows you to align sequences to a
|
|
292 model with cmalign and view them in the context of an existing trusted multiple alignment.
|
|
293 <f> must be the alignment file that the CM was built from. The program verifies that the
|
|
294 checksum of the file matches that of the file used to construct the CM. A similar option to
|
|
295 this one was called --withali in previous versions of cmalign.
|
|
296 --mapstr Must be used in combination with --mapali <f>. Propogate structural information for any
|
|
297 pseudoknots that exist in <f> to the output alignment. A similar option to this one was called
|
|
298 --withstr in previous versions of cmalign.
|
|
299 --informat <s> Assert that the input <seqfile> is in format <s>. Do not run Babelfish format autodec-
|
|
300 tion. This increases the reliability of the program somewhat, because the Babelfish can
|
|
301 make mistakes; particularly recommended for unattended, high-throughput runs of Infernal.
|
|
302 Acceptable formats are: FASTA, GENBANK, and DDBJ. <s> is case-insensitive.
|
|
303 --outformat <s> Specify the output alignment format as <s>. Acceptable formats are: Pfam, AFA, A2M,
|
|
304 Clustal, and Phylip. AFA is aligned fasta. Only Pfam and Stockholm alignment formats
|
|
305 will include consensus structure annotation and posterior probability annotation of aligned
|
|
306 residues.
|
|
307 --dnaout Output the alignments as DNA sequence alignments, instead of RNA ones.
|
|
308 --noprob Do not annotate the output alignment with posterior probabilities.
|
|
309 --matchonly Only include match columns in the output alignment, do not include any insertions relative
|
|
310 to the consensus model. This option may be useful when creating very large alignments
|
|
311 that require a lot of memory and disk space, most of which is necessary only to deal with
|
|
312 insert columns that are gaps in most sequences.
|
|
313 --ileaved Output the alignment in interleaved Stockholm format of a fixed width that may be more con-
|
|
314 venient for examination. This was the default output alignment format of previous versions
|
|
315 of cmalign. Note that cmalign requires more memory when this option is used. For this
|
|
316 reason, --ileaved will only work for alignments of up to 100,000 sequences or a total of
|
|
317 100,000,000 aligned nucleotides.
|
|
318 --regress <s> Save an additional copy of the output alignment with no author information to file <s>.
|
|
319 --verbose Output additional information in the tabular scores output (output to stdout if -o is used, or
|
|
320 to <f> if --sfile <f> is used). These are mainly useful for testing and debugging.
|
|
321 --cpu <n> Specify that <n> parallel CPU workers be used. If <n> is set as ”0”, then the program will
|
|
322 be run in serial mode, without using threads. You can also control this number by setting an
|
|
323 environment variable, INFERNAL NCPU. This option will only be available if the machine on
|
|
324 which Infernal was built is capable of using POSIX threading (see the Installation section of
|
|
325 the user guide for more information).
|
|
326 --mpi Run as an MPI parallel program. This option will only be available if Infernal has been
|
|
327 configured and built with the ”--enable-mpi” flag (see the Installation section of the user
|
|
328 guide for more information).
|
|
329
|
|
330
|
|
331
|
|
332
|
|
333
|
|
334
|
|
335
|
|
336 Output format
|
|
337 -------------
|
|
338
|
|
339 (1) target name: The name of the target sequence or profile.
|
|
340 (2) accession: The accession of the target sequence or profile, or ’-’ if none.
|
|
341 (3) query name: The name of the query sequence or profile.
|
|
342 (4) accession: The accession of the query sequence or profile, or ’-’ if none.
|
|
343 (5) mdl (model): Which type of model was used to compute the final score. Either ’cm’ or ’hmm’. A CM is used to compute the final hit scores unless the model has zero basepairs or the --hmmonly option is used, in which case a HMM will be used.
|
|
344 (6) mdl from (model coord): The start of the alignment of this hit with respect to the profile (CM or HMM), numbered 1..N for a profile of N consensus positions.
|
|
345 (7) mdl to (model coord): The end of the alignment of this hit with respect to the profile (CM or HMM), numbered 1..N for a profile of N consensus positions.
|
|
346 (8) seq from (ali coord): The start of the alignment of this hit with respect to the sequence, numbered 1..L for a sequence of L residues.
|
|
347 (9) seq to (ali coord): The end of the alignment of this hit with respect to the sequence, numbered 1..L for a sequence of L residues.
|
|
348 (10) strand: The strand on which the hit occurs on the sequence. ’+’ if the hit is on the top (Watson) strand, ’-’ if the hit is on the bottom (Crick) strand. If on the top strand, the “seq from” value will be less than or equal to the “seq to” value, else it will be greater than or equal to it.
|
|
349 (11) trunc: Indicates if this is predicted to be a truncated CM hit or not. This will be “no” if it is a CM hit that is not predicted to be truncated by the end of the sequence, “5’ ” or “3’ ” if the hit is predicted to have one or more 5’ or 3’ residues missing due to a artificial truncation of the sequence, or “5’&3”’ if the hit is predicted to have one or more 5’ residues missing and one or more 3’ residues missing. If the hit is an HMM hit, this will always be ’-’.
|
|
350 (12) pass: Indicates what “pass” of the pipeline the hit was detected on. This is probably only useful for testing and debugging. Non-truncated hits are found on the first pass, truncated hits are found on successive passes.
|
|
351 (13) gc: Fraction of G and C nucleotides in the hit.
|
|
352 (14) bias: The biased-composition correction: the bit score difference contributed by the null3 model for CM hits, or the null2 model for HMM hits. High bias scores may be a red flag for a false positive. It is difficult to correct for all possible ways in which a nonrandom but nonhomologous biological sequences can appear to be similar, such as short-period tandem repeats, so there are cases where the bias correction is not strong enough (creating false positives).
|
|
353 (15) score: The score (in bits) for this target/query comparison. It includes the biased-composition cor-rection (the “null3” model for CM hits, or the “null2” model for HMM hits).
|
|
354 (16) E-value: The expectation value (statistical significance) of the target. This is a per query E-value; i.e. calculated as the expected number of false positives achieving this comparison’s score for a single query against the search space Z. For cmsearch Z is defined as the total number of nucleotides in the target dataset multiplied by 2 because both strands are searched. For cmscan Z is the total number of nucleotides in the query sequence multiplied by 2 because both strands are searched and multiplied by the number of models in the target database. If you search with multiple queries and if you want to control the overall false positive rate of that search rather than the false positive rate per query, you will want to multiply this per-query E-value by how many queries you’re doing.
|
|
355 (17) inc: Indicates whether or not this hit achieves the inclusion threshold: ’!’ if it does, ’?’ if it does not (and rather only achieves the reporting threshold). By default, the inclusion threshold is an E-value of 0.01 and the reporting threshold is an E-value of 10.0, but these can be changed with command line options as described in the manual pages.
|
|
356 (18) description of target: The remainder of the line is the target’s description line, as free text.
|
|
357
|
|
358
|
|
359 For further questions please refere to the Infernal Userguide_.
|
|
360
|
|
361 .. _Userguide: http://selab.janelia.org/software/infernal/Userguide.pdf
|
|
362
|
|
363
|
|
364 How do I cite Infernal?
|
|
365 -----------------------
|
|
366
|
|
367 The recommended citation for using Infernal 1.1 is E. P. Nawrocki and S. R. Eddy, Infernal 1.1: 100-fold faster RNA homology searches , Bioinformatics 29:2933-2935 (2013).
|
|
368
|
|
369 **Galaxy Wrapper Author**::
|
|
370
|
|
371 * Bjoern Gruening, University of Freiburg
|
|
372
|
|
373 </help>
|
|
374 </tool>
|