annotate cmbuild.xml @ 1:55bb96edfc07 draft

Uploaded
author bgruening
date Thu, 24 Apr 2014 15:02:05 -0400
parents 652f9d550531
children fac157e22e1b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
652f9d550531 Uploaded
bgruening
parents:
diff changeset
1 <tool id="infernal_cmbuild" name="Build covariance models" version="1.1.0.1">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
2 <description>from sequence alignments (cmbuild)</description>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
3 <parallelism method="multi" split_inputs="alignment_infile" split_mode="to_size" split_size="10" shared_inputs="" merge_outputs="cmfile_outfile"></parallelism>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
4 <requirements>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
5 <requirement type="package">infernal</requirement>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
6 <requirement type="package" version="1.1">infernal</requirement>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
7 <requirement type="package" version="8.21">gnu_coreutils</requirement>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
8 </requirements>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
9 <command>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
10 cmbuild
652f9d550531 Uploaded
bgruening
parents:
diff changeset
11 #if $is_summery_output:
652f9d550531 Uploaded
bgruening
parents:
diff changeset
12 -o $summary_outfile
652f9d550531 Uploaded
bgruening
parents:
diff changeset
13 #end if
652f9d550531 Uploaded
bgruening
parents:
diff changeset
14
652f9d550531 Uploaded
bgruening
parents:
diff changeset
15 ## to many outputs, is that one really needed?
652f9d550531 Uploaded
bgruening
parents:
diff changeset
16 ##-O $annotated_source_alignment_outfile
652f9d550531 Uploaded
bgruening
parents:
diff changeset
17
652f9d550531 Uploaded
bgruening
parents:
diff changeset
18 $model_construction_opts.model_construction_opts_selector
652f9d550531 Uploaded
bgruening
parents:
diff changeset
19 #if $model_construction_opts.model_construction_opts_selector == '--fast':
652f9d550531 Uploaded
bgruening
parents:
diff changeset
20 --symfrac $model_construction_opts.symfrac
652f9d550531 Uploaded
bgruening
parents:
diff changeset
21 #end if
652f9d550531 Uploaded
bgruening
parents:
diff changeset
22
652f9d550531 Uploaded
bgruening
parents:
diff changeset
23 $noss
652f9d550531 Uploaded
bgruening
parents:
diff changeset
24
652f9d550531 Uploaded
bgruening
parents:
diff changeset
25 $relative_weights_opts.relative_weights_opts_selector
652f9d550531 Uploaded
bgruening
parents:
diff changeset
26 #if $relative_weights_opts.relative_weights_opts_selector == '--wblosum':
652f9d550531 Uploaded
bgruening
parents:
diff changeset
27 --wid $relative_weights_opts.wid
652f9d550531 Uploaded
bgruening
parents:
diff changeset
28 #end if
652f9d550531 Uploaded
bgruening
parents:
diff changeset
29
652f9d550531 Uploaded
bgruening
parents:
diff changeset
30 $effective_opts.effective_opts_selector
652f9d550531 Uploaded
bgruening
parents:
diff changeset
31 #if str($effective_opts.effective_opts_selector) == '--eent':
652f9d550531 Uploaded
bgruening
parents:
diff changeset
32 --ere $effective_opts.ere
652f9d550531 Uploaded
bgruening
parents:
diff changeset
33 --eminseq $effective_opts.eminseq
652f9d550531 Uploaded
bgruening
parents:
diff changeset
34 --ehmmre $effective_opts.ehmmre
652f9d550531 Uploaded
bgruening
parents:
diff changeset
35 --eset $effective_opts.eset
652f9d550531 Uploaded
bgruening
parents:
diff changeset
36 #end if
652f9d550531 Uploaded
bgruening
parents:
diff changeset
37
652f9d550531 Uploaded
bgruening
parents:
diff changeset
38 #if str($refining_opts.refining_opts_selector) == '--refine':
652f9d550531 Uploaded
bgruening
parents:
diff changeset
39 #if $refining_opts.refine_output:
652f9d550531 Uploaded
bgruening
parents:
diff changeset
40 --refine $refined_multiple_alignment_output
652f9d550531 Uploaded
bgruening
parents:
diff changeset
41 #else:
652f9d550531 Uploaded
bgruening
parents:
diff changeset
42 --refine /dev/null
652f9d550531 Uploaded
bgruening
parents:
diff changeset
43 #end if
652f9d550531 Uploaded
bgruening
parents:
diff changeset
44
652f9d550531 Uploaded
bgruening
parents:
diff changeset
45 $l
652f9d550531 Uploaded
bgruening
parents:
diff changeset
46 $refining_opts.gibbs_opts.gibbs_opts_selector
652f9d550531 Uploaded
bgruening
parents:
diff changeset
47
652f9d550531 Uploaded
bgruening
parents:
diff changeset
48 #if str($refining_opts.gibbs_opts.gibbs_opts_selector) == '--gibbs':
652f9d550531 Uploaded
bgruening
parents:
diff changeset
49 $refining_opts.gibbs_opts.random_seed
652f9d550531 Uploaded
bgruening
parents:
diff changeset
50 #end if
652f9d550531 Uploaded
bgruening
parents:
diff changeset
51
652f9d550531 Uploaded
bgruening
parents:
diff changeset
52 $notrunc
652f9d550531 Uploaded
bgruening
parents:
diff changeset
53 $cyk
652f9d550531 Uploaded
bgruening
parents:
diff changeset
54 #end if
652f9d550531 Uploaded
bgruening
parents:
diff changeset
55
652f9d550531 Uploaded
bgruening
parents:
diff changeset
56 $cmfile_outfile
652f9d550531 Uploaded
bgruening
parents:
diff changeset
57 $alignment_infile
652f9d550531 Uploaded
bgruening
parents:
diff changeset
58
652f9d550531 Uploaded
bgruening
parents:
diff changeset
59 </command>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
60 <inputs>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
61 <!-- Stockholm or SELEX
652f9d550531 Uploaded
bgruening
parents:
diff changeset
62 SELEX is defined in EMBOSS datatypes
652f9d550531 Uploaded
bgruening
parents:
diff changeset
63 -->
652f9d550531 Uploaded
bgruening
parents:
diff changeset
64 <param name="alignment_infile" type="data" format="stockholm,selex" label="Sequence database"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
65
652f9d550531 Uploaded
bgruening
parents:
diff changeset
66 <conditional name="model_construction_opts">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
67 <param name="model_construction_opts_selector" type="select" label="These options control how consensus columns are defined in an alignment" help="">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
68 <option value="--fast" selected="true">automatic (--fast)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
69 <option value="--hand">user defined (--hand)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
70 </param>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
71 <when value="--fast">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
72 <param name="symfrac" type="float" value="0.5" size="5"
652f9d550531 Uploaded
bgruening
parents:
diff changeset
73 label="Define the residue fraction threshold necessary to define a consensus (--symfrac)" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
74 </when>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
75 <when value="--hand"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
76 </conditional>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
77
652f9d550531 Uploaded
bgruening
parents:
diff changeset
78 <param name="noss" truevalue="--noss" falsevalue="" checked="False" type="boolean"
652f9d550531 Uploaded
bgruening
parents:
diff changeset
79 label="Ignore the secondary structure annotation, if any, in your multiple alignment file (--noss)" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
80
652f9d550531 Uploaded
bgruening
parents:
diff changeset
81 <conditional name="relative_weights_opts">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
82 <param name="relative_weights_opts_selector" type="select" label="Options controlling relative weights" help="">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
83 <option value="--wpb" selected="true">Henikoff (--wgb)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
84 <option value="--wgsc">Gerstein/Sonnhammer/Chothia (--wgsc)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
85 <option value="--wnone">no sequence weighting (--wnone)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
86 <option value="--wgiven">Sequence weight from given in input file (--wgiven)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
87 <option value="--wblosum">BLOSUM filtering algorithm (--wblosum)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
88 </param>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
89 <when value="--wpb"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
90 <when value="--wgsc"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
91 <when value="--wnone"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
92 <when value="--wgiven"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
93 <when value="--wblosum">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
94 <param name="wid" type="float" value="0.5" size="5"
652f9d550531 Uploaded
bgruening
parents:
diff changeset
95 label="Percent identity for clustering the alignment (--wid)" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
96 </when>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
97 </conditional>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
98
652f9d550531 Uploaded
bgruening
parents:
diff changeset
99
652f9d550531 Uploaded
bgruening
parents:
diff changeset
100 <conditional name="effective_opts">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
101 <param name="effective_opts_selector" type="select" label="Options controlling effective sequence number" help="">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
102 <option value="--eent" selected="true">entropy weighting strategy (--eent)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
103 <option value="--enone">Turn off the entropy weighting strategy (--enone)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
104 </param>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
105 <when value="--enone"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
106 <when value="--eent">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
107 <param name="ere" type="float" value="0.59" size="5"
652f9d550531 Uploaded
bgruening
parents:
diff changeset
108 label="Set the target mean match state relative entropy (--ere)" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
109
652f9d550531 Uploaded
bgruening
parents:
diff changeset
110 <param name="eminseq" type="integer" value="" size="5"
652f9d550531 Uploaded
bgruening
parents:
diff changeset
111 label="Define the minimum allowed effective sequence number (--eminseq)" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
112
652f9d550531 Uploaded
bgruening
parents:
diff changeset
113 <param name="ehmmre" type="float" value="" size="5"
652f9d550531 Uploaded
bgruening
parents:
diff changeset
114 label="Set the target HMM mean match state relative entropy (--ehmmre)" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
115
652f9d550531 Uploaded
bgruening
parents:
diff changeset
116 <param name="eset" type="integer" value="" size="5"
652f9d550531 Uploaded
bgruening
parents:
diff changeset
117 label="Set the effective sequence number for entropy weighting (--eset)" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
118 </when>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
119 </conditional>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
120
652f9d550531 Uploaded
bgruening
parents:
diff changeset
121
652f9d550531 Uploaded
bgruening
parents:
diff changeset
122 <conditional name="refining_opts">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
123 <param name="refining_opts_selector" type="select" label="Options for refining the input alignment" help="">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
124 <option value="" selected="true">No refinement</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
125 <option value="--refine">refine the input alignment</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
126 </param>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
127 <when value=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
128 <when value="--refine">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
129
652f9d550531 Uploaded
bgruening
parents:
diff changeset
130 <conditional name="gibbs_opts">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
131 <param name="gibbs_opts_selector" type="select" label="refinement mode" help="">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
132 <option value="" selected="true">expectation-maximization (EM)</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
133 <option value="--gibbs">Gibbs sampling</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
134 </param>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
135 <when value=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
136 <when value="--gibbs">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
137 <param name="random_seed" type="integer" value="0" label="Randam Seed" help="" />
652f9d550531 Uploaded
bgruening
parents:
diff changeset
138 </when>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
139 </conditional>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
140
652f9d550531 Uploaded
bgruening
parents:
diff changeset
141 <param name="l" truevalue="-l" falsevalue="" checked="False" type="boolean"
652f9d550531 Uploaded
bgruening
parents:
diff changeset
142 label="Turn on the local alignment algorithm" help="... which allows the alignment to span two or more subsequences if necessary"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
143
652f9d550531 Uploaded
bgruening
parents:
diff changeset
144 <param name="notrunc" truevalue="--notrunc" falsevalue="" checked="False" type="boolean"
652f9d550531 Uploaded
bgruening
parents:
diff changeset
145 label="Turn off the truncated alignment algorithm" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
146
652f9d550531 Uploaded
bgruening
parents:
diff changeset
147 <param name="cyk" type="select" label="Options for refining the input alignment" help="">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
148 <option value="" selected="true">optimal accuracy algorithm</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
149 <option value="--cyk">align with the CYK algorithm</option>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
150 </param>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
151
652f9d550531 Uploaded
bgruening
parents:
diff changeset
152 <param name="refine_output" truevalue="" falsevalue="" checked="False" type="boolean"
652f9d550531 Uploaded
bgruening
parents:
diff changeset
153 label="Output the refined alignment file as it is used to build the covariance model" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
154
652f9d550531 Uploaded
bgruening
parents:
diff changeset
155 </when>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
156 </conditional>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
157
652f9d550531 Uploaded
bgruening
parents:
diff changeset
158
652f9d550531 Uploaded
bgruening
parents:
diff changeset
159 <param name="is_summery_output" truevalue="" falsevalue="" checked="False" type="boolean"
652f9d550531 Uploaded
bgruening
parents:
diff changeset
160 label="Output a summery file?" help=""/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
161
652f9d550531 Uploaded
bgruening
parents:
diff changeset
162 </inputs>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
163 <outputs>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
164
652f9d550531 Uploaded
bgruening
parents:
diff changeset
165 <data format="text" name="summary_outfile" label="cmbuild summary on ${on_string}">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
166 <filter>is_summery_output is True</filter>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
167 </data>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
168 <!--<data format="stockholm" name="annotated_source_alignment_outfile" label="Annotated alignment from ${on_string}"/>-->
652f9d550531 Uploaded
bgruening
parents:
diff changeset
169 <data format="cm" name="cmfile_outfile" label="Covariance models from ${on_string}"/>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
170
652f9d550531 Uploaded
bgruening
parents:
diff changeset
171 <data format="stockholm" name="refined_multiple_alignment_output" label="refined alignment file of ${on_string}">
652f9d550531 Uploaded
bgruening
parents:
diff changeset
172 <filter>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
173 ((
652f9d550531 Uploaded
bgruening
parents:
diff changeset
174 refining_opts['refining_opts_selector'] == "--refine" and
652f9d550531 Uploaded
bgruening
parents:
diff changeset
175 refining_opts['refine_output'] is True
652f9d550531 Uploaded
bgruening
parents:
diff changeset
176 ))
652f9d550531 Uploaded
bgruening
parents:
diff changeset
177 </filter>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
178 </data>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
179
652f9d550531 Uploaded
bgruening
parents:
diff changeset
180 </outputs>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
181 <help>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
182
652f9d550531 Uploaded
bgruening
parents:
diff changeset
183
652f9d550531 Uploaded
bgruening
parents:
diff changeset
184 **What it does**
652f9d550531 Uploaded
bgruening
parents:
diff changeset
185
652f9d550531 Uploaded
bgruening
parents:
diff changeset
186 For each multiple sequence alignment build a covariance model.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
187 The alignment file must be in Stockholm or SELEX format, and must contain consensus secondary structure annotation.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
188 cmbuild uses the consensus structure to determine the architecture of the CM.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
189
652f9d550531 Uploaded
bgruening
parents:
diff changeset
190 In addition to writing CM(s) to CMFILE_OUT, cmbuild also outputs a single line for each model created to stdout. Each
652f9d550531 Uploaded
bgruening
parents:
diff changeset
191 line has the following fields: ”aln”: the index of the alignment used to build the CM; ”idx”: the index of the CM in the
652f9d550531 Uploaded
bgruening
parents:
diff changeset
192 CMFILE_OUT; ”name”: the name of the CM; ”nseq”: the number of sequences in the alignment used to build the CM;
652f9d550531 Uploaded
bgruening
parents:
diff changeset
193 ”eff nseq”: the effective number of sequences used to build the model; ”alen”: the length of the alignment used to build
652f9d550531 Uploaded
bgruening
parents:
diff changeset
194 the CM; ”clen”: the number of columns from the alignment defined as consensus (match) columns; ”bps”: the number
652f9d550531 Uploaded
bgruening
parents:
diff changeset
195 of basepairs in the CM; ”bifs”: the number of bifurcations in the CM; ”rel entropy: CM”: the total relative entropy of the
652f9d550531 Uploaded
bgruening
parents:
diff changeset
196 model divided by the number of consensus columns; ”rel entropy: HMM”: the total relative entropy of the model ignoring
652f9d550531 Uploaded
bgruening
parents:
diff changeset
197 secondary structure divided by the number of consensus columns. ”description”: description of the model/alignment.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
198
652f9d550531 Uploaded
bgruening
parents:
diff changeset
199
652f9d550531 Uploaded
bgruening
parents:
diff changeset
200 Options controlling model construction
652f9d550531 Uploaded
bgruening
parents:
diff changeset
201 --------------------------------------
652f9d550531 Uploaded
bgruening
parents:
diff changeset
202
652f9d550531 Uploaded
bgruening
parents:
diff changeset
203 These options control how consensus columns are defined in an alignment.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
204
652f9d550531 Uploaded
bgruening
parents:
diff changeset
205 * --fast Define consensus columns automatically as those that have a fraction >= symfrac of residues as opposed to gaps. (See below for the --symfrac option.) This is the default.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
206 * --hand Use reference coordinate annotation (#=GC RF line, in Stockholm) to determine which columns are consensus, and which are inserts. Any non-gap character indicates a consensus column. (For example, mark consensus columns with ”x”, and insert columns with ”.”.)
652f9d550531 Uploaded
bgruening
parents:
diff changeset
207 * --symfrac Define the residue fraction threshold necessary to define a consensus column when not using --hand. The default is 0.5. The symbol fraction in each column is calculated after taking relative sequence weighting into account. Setting this to 0.0 means that every alignment column will be assigned as consensus, which may be useful in some cases. Setting it to 1.0 means that only columns that include 0 gaps will be assigned as consensus.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
208 * --noss Ignore the secondary structure annotation, if any, in MSA-Infile and build a CM with zero basepairs. This model will be similar to a profile HMM and the cmsearch and cmscan programs will use HMM algorithms which are faster than CM ones for this model. Additionally, a zero basepair model need not be calibrated with cmcalibrate prior to running cmsearch with it. The --noss option must be used if there is no secondary structure annotation in MSA-Infile.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
209
652f9d550531 Uploaded
bgruening
parents:
diff changeset
210
652f9d550531 Uploaded
bgruening
parents:
diff changeset
211 Options controlling relative weights
652f9d550531 Uploaded
bgruening
parents:
diff changeset
212 ------------------------------------
652f9d550531 Uploaded
bgruening
parents:
diff changeset
213
652f9d550531 Uploaded
bgruening
parents:
diff changeset
214 cmbuild uses an ad hoc sequence weighting algorithm to downweight closely related sequences and upweight distantly
652f9d550531 Uploaded
bgruening
parents:
diff changeset
215 related ones. This has the effect of making models less biased by uneven phylogenetic representation. For example,
652f9d550531 Uploaded
bgruening
parents:
diff changeset
216 two identical sequences would typically each receive half the weight that one sequence would. These options control
652f9d550531 Uploaded
bgruening
parents:
diff changeset
217 which algorithm gets used.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
218
652f9d550531 Uploaded
bgruening
parents:
diff changeset
219 * --wpb Use the Henikoff position-based sequence weighting scheme [Henikoff and Henikoff, J. Mol. Biol. 243:574, 1994]. This is the default.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
220 * --wgsc Use the Gerstein/Sonnhammer/Chothia weighting algorithm [Gerstein et al, J. Mol. Biol. 235:1067, 1994].
652f9d550531 Uploaded
bgruening
parents:
diff changeset
221 * --wnone Turn sequence weighting off; e.g. explicitly set all sequence weights to 1.0.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
222 * --wgiven Use sequence weights as given in annotation in the input alignment file. If no weights were given, assume they are all 1.0. The default is to determine new sequence weights by the Gerstein/Sonnhammer/Chothia algorithm, ignoring any annotated weights.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
223 * --wblosum Use the BLOSUM filtering algorithm to weight the sequences, instead of the default GSC weighting. Cluster the sequences at a given percentage identity (see --wid); assign each cluster a total weight of 1.0, distributed equally amongst the members of that cluster.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
224 * --wid Controls the behavior of the --wblosum weighting option by setting the percent identity for clustering the alignment.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
225
652f9d550531 Uploaded
bgruening
parents:
diff changeset
226
652f9d550531 Uploaded
bgruening
parents:
diff changeset
227 Options controlling effective sequence number
652f9d550531 Uploaded
bgruening
parents:
diff changeset
228 ---------------------------------------------
652f9d550531 Uploaded
bgruening
parents:
diff changeset
229
652f9d550531 Uploaded
bgruening
parents:
diff changeset
230 After relative weights are determined, they are normalized to sum to a total effective sequence number, eff nseq. This
652f9d550531 Uploaded
bgruening
parents:
diff changeset
231 number may be the actual number of sequences in the alignment, but it is almost always smaller than that. The default
652f9d550531 Uploaded
bgruening
parents:
diff changeset
232 entropy weighting method (--eent) reduces the effective sequence number to reduce the information content (relative
652f9d550531 Uploaded
bgruening
parents:
diff changeset
233 entropy, or average expected score on true homologs) per consensus position. The target relative entropy is controlled
652f9d550531 Uploaded
bgruening
parents:
diff changeset
234 by a two-parameter function, where the two parameters are settable with --ere and --esigma.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
235
652f9d550531 Uploaded
bgruening
parents:
diff changeset
236 * --eent Use the entropy weighting strategy to determine the effective sequence number that gives a target mean match state relative entropy. This option is the default, and can be turned off with --enone. The default target mean match state relative entropy is 0.59 bits for models with at least 1 basepair and 0.38 bits for models with zero basepairs, but changed with --ere. The default of 0.59 or 0.38 bits is automatically changed if the total relative entropy of the model (summed match state relative entropy) is less than a cutoff, which is is 6.0 bits by default, but can be changed with the expert, undocumented --eX option. If you really want to play with that option, consult the source code.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
237 * --enone Turn off the entropy weighting strategy. The effective sequence number is just the number of sequences in the alignment.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
238 * --ere Set the target mean match state relative entropy. By default the target relative entropy per match position is 0.59 bits for models with at least 1 basepair and 0.38 for models with zero basepairs.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
239 * --eminseq Define the minimum allowed effective sequence number.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
240 * --ehmmre Set the target HMM mean match state relative entropy. Entropy for basepairing match states is calculated using marginalized basepair emission probabilities.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
241 * --eset Set the effective sequence number for entropy weighting.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
242
652f9d550531 Uploaded
bgruening
parents:
diff changeset
243
652f9d550531 Uploaded
bgruening
parents:
diff changeset
244
652f9d550531 Uploaded
bgruening
parents:
diff changeset
245 Options for refining the input alignment
652f9d550531 Uploaded
bgruening
parents:
diff changeset
246 ----------------------------------------
652f9d550531 Uploaded
bgruening
parents:
diff changeset
247
652f9d550531 Uploaded
bgruening
parents:
diff changeset
248 * --refine Attempt to refine the alignment before building the CM using expectation-maximization (EM). A CM is first built from the initial alignment as usual. Then, the sequences in the alignment are realigned optimally (with the HMM banded CYK algorithm, optimal means optimal given the bands) to the CM, and a new CM is built from the resulting alignment. The sequences are then realigned to the new CM, and a new CM is built from that alignment. This is continued until convergence, specifically when the alignments for two successive iterations are not significantly different (the summed bit scores of all the sequences in the alignment changes less than 1% between two successive iterations).
652f9d550531 Uploaded
bgruening
parents:
diff changeset
249 * -l Turn on the local alignment algorithm, which allows the alignment to span two or more subsequences if necessary (e.g. if the structures of the query model and target sequence are only partially shared), allowing certain large insertions and deletions in the structure to be penalized differently than normal indels. The default is to globally align the query model to the target sequences.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
250 * --gibbs Modifies the behavior of --refine so Gibbs sampling is used instead of EM. The difference is that during the alignment stage the alignment is not necessarily optimal, instead an alignment (parsetree) for each sequences is sampled from the posterior distribution of alignments as determined by the Inside algorithm. Due to this sampling step --gibbs is non- deterministic, so different runs with the same alignment may yield different results. This is not true when --refine is used without the --gibbs option, in which case the final alignment and CM will always be the same. When --gibbs is enabled, the --seed "number" option can be used to seed the random number generator predictably, making the results reproducible. The goal of the --gibbs option is to help expert RNA alignment curators refine structural alignments by allowing them to observe alternative high scoring alignments.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
251 * --seed Seed the random number generator with an integer >= 0. This option can only be used in combination with --gibbs. If the given number is nonzero, stochastic sampling of alignments will be reproducible; the same command will give the same results. If the given number is 0, the random number generator is seeded arbitrarily, and stochastic samplings may vary from run to run of the same command. The default seed is 0.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
252 * --cyk With --refine, align with the CYK algorithm. By default the optimal accuracy algorithm is used. There is more information on this in the cmalign manual page.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
253 * --notrunc With --refine, turn off the truncated alignment algorithm. There is more information on this in the cmalign manual page.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
254
652f9d550531 Uploaded
bgruening
parents:
diff changeset
255
652f9d550531 Uploaded
bgruening
parents:
diff changeset
256 For further questions please refere to the Infernal Userguide_.
652f9d550531 Uploaded
bgruening
parents:
diff changeset
257
652f9d550531 Uploaded
bgruening
parents:
diff changeset
258 .. _Userguide: http://selab.janelia.org/software/infernal/Userguide.pdf
652f9d550531 Uploaded
bgruening
parents:
diff changeset
259
652f9d550531 Uploaded
bgruening
parents:
diff changeset
260
652f9d550531 Uploaded
bgruening
parents:
diff changeset
261 How do I cite Infernal?
652f9d550531 Uploaded
bgruening
parents:
diff changeset
262 -----------------------
652f9d550531 Uploaded
bgruening
parents:
diff changeset
263
652f9d550531 Uploaded
bgruening
parents:
diff changeset
264 The recommended citation for using Infernal 1.1 is E. P. Nawrocki and S. R. Eddy, Infernal 1.1: 100-fold faster RNA homology searches , Bioinformatics 29:2933-2935 (2013).
652f9d550531 Uploaded
bgruening
parents:
diff changeset
265
652f9d550531 Uploaded
bgruening
parents:
diff changeset
266 **Galaxy Wrapper Author**::
652f9d550531 Uploaded
bgruening
parents:
diff changeset
267
652f9d550531 Uploaded
bgruening
parents:
diff changeset
268 * Bjoern Gruening, University of Freiburg
652f9d550531 Uploaded
bgruening
parents:
diff changeset
269
652f9d550531 Uploaded
bgruening
parents:
diff changeset
270 </help>
652f9d550531 Uploaded
bgruening
parents:
diff changeset
271 </tool>