Mercurial > repos > petr-novak > repeatrxplorer
comparison lib/tarean_output_help.html @ 0:1d1b9e1b2e2f draft
Uploaded
author | petr-novak |
---|---|
date | Thu, 19 Dec 2019 10:24:45 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1d1b9e1b2e2f |
---|---|
1 <?xml version="1.0" encoding="utf-8"?> | |
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
3 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
4 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> | |
5 <head> | |
6 <!-- 2016-10-21 Pá 11:06 --> | |
7 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> | |
8 <meta name="viewport" content="width=device-width, initial-scale=1" /> | |
9 <title>TAREAN output description</title> | |
10 <meta name="generator" content="Org-mode" /> | |
11 <meta name="author" content="petr" /> | |
12 <style type="text/css"> | |
13 <!--/*--><![CDATA[/*><!--*/ | |
14 .title { text-align: center; | |
15 margin-bottom: .2em; } | |
16 .subtitle { text-align: center; | |
17 font-size: medium; | |
18 font-weight: bold; | |
19 margin-top:0; } | |
20 .todo { font-family: monospace; color: red; } | |
21 .done { font-family: monospace; color: green; } | |
22 .priority { font-family: monospace; color: orange; } | |
23 .tag { background-color: #eee; font-family: monospace; | |
24 padding: 2px; font-size: 80%; font-weight: normal; } | |
25 .timestamp { color: #bebebe; } | |
26 .timestamp-kwd { color: #5f9ea0; } | |
27 .org-right { margin-left: auto; margin-right: 0px; text-align: right; } | |
28 .org-left { margin-left: 0px; margin-right: auto; text-align: left; } | |
29 .org-center { margin-left: auto; margin-right: auto; text-align: center; } | |
30 .underline { text-decoration: underline; } | |
31 #postamble p, #preamble p { font-size: 90%; margin: .2em; } | |
32 p.verse { margin-left: 3%; } | |
33 pre { | |
34 border: 1px solid #ccc; | |
35 box-shadow: 3px 3px 3px #eee; | |
36 padding: 8pt; | |
37 font-family: monospace; | |
38 overflow: auto; | |
39 margin: 1.2em; | |
40 } | |
41 pre.src { | |
42 position: relative; | |
43 overflow: visible; | |
44 padding-top: 1.2em; | |
45 } | |
46 pre.src:before { | |
47 display: none; | |
48 position: absolute; | |
49 background-color: white; | |
50 top: -10px; | |
51 right: 10px; | |
52 padding: 3px; | |
53 border: 1px solid black; | |
54 } | |
55 pre.src:hover:before { display: inline;} | |
56 pre.src-sh:before { content: 'sh'; } | |
57 pre.src-bash:before { content: 'sh'; } | |
58 pre.src-emacs-lisp:before { content: 'Emacs Lisp'; } | |
59 pre.src-R:before { content: 'R'; } | |
60 pre.src-perl:before { content: 'Perl'; } | |
61 pre.src-java:before { content: 'Java'; } | |
62 pre.src-sql:before { content: 'SQL'; } | |
63 | |
64 table { border-collapse:collapse; } | |
65 caption.t-above { caption-side: top; } | |
66 caption.t-bottom { caption-side: bottom; } | |
67 td, th { vertical-align:top; } | |
68 th.org-right { text-align: center; } | |
69 th.org-left { text-align: center; } | |
70 th.org-center { text-align: center; } | |
71 td.org-right { text-align: right; } | |
72 td.org-left { text-align: left; } | |
73 td.org-center { text-align: center; } | |
74 dt { font-weight: bold; } | |
75 .footpara { display: inline; } | |
76 .footdef { margin-bottom: 1em; } | |
77 .figure { padding: 1em; } | |
78 .figure p { text-align: center; } | |
79 .inlinetask { | |
80 padding: 10px; | |
81 border: 2px solid gray; | |
82 margin: 10px; | |
83 background: #ffffcc; | |
84 } | |
85 #org-div-home-and-up | |
86 { text-align: right; font-size: 70%; white-space: nowrap; } | |
87 textarea { overflow-x: auto; } | |
88 .linenr { font-size: smaller } | |
89 .code-highlighted { background-color: #ffff00; } | |
90 .org-info-js_info-navigation { border-style: none; } | |
91 #org-info-js_console-label | |
92 { font-size: 10px; font-weight: bold; white-space: nowrap; } | |
93 .org-info-js_search-highlight | |
94 { background-color: #ffff00; color: #000000; font-weight: bold; } | |
95 /*]]>*/--> | |
96 </style> | |
97 <link rel="stylesheet" type="text/css" href="style1.css" /> | |
98 <script type="text/javascript"> | |
99 /* | |
100 @licstart The following is the entire license notice for the | |
101 JavaScript code in this tag. | |
102 | |
103 Copyright (C) 2012-2013 Free Software Foundation, Inc. | |
104 | |
105 The JavaScript code in this tag is free software: you can | |
106 redistribute it and/or modify it under the terms of the GNU | |
107 General Public License (GNU GPL) as published by the Free Software | |
108 Foundation, either version 3 of the License, or (at your option) | |
109 any later version. The code is distributed WITHOUT ANY WARRANTY; | |
110 without even the implied warranty of MERCHANTABILITY or FITNESS | |
111 FOR A PARTICULAR PURPOSE. See the GNU GPL for more details. | |
112 | |
113 As additional permission under GNU GPL version 3 section 7, you | |
114 may distribute non-source (e.g., minimized or compacted) forms of | |
115 that code without the copy of the GNU GPL normally required by | |
116 section 4, provided you include this license notice and a URL | |
117 through which recipients can access the Corresponding Source. | |
118 | |
119 | |
120 @licend The above is the entire license notice | |
121 for the JavaScript code in this tag. | |
122 */ | |
123 <!--/*--><![CDATA[/*><!--*/ | |
124 function CodeHighlightOn(elem, id) | |
125 { | |
126 var target = document.getElementById(id); | |
127 if(null != target) { | |
128 elem.cacheClassElem = elem.className; | |
129 elem.cacheClassTarget = target.className; | |
130 target.className = "code-highlighted"; | |
131 elem.className = "code-highlighted"; | |
132 } | |
133 } | |
134 function CodeHighlightOff(elem, id) | |
135 { | |
136 var target = document.getElementById(id); | |
137 if(elem.cacheClassElem) | |
138 elem.className = elem.cacheClassElem; | |
139 if(elem.cacheClassTarget) | |
140 target.className = elem.cacheClassTarget; | |
141 } | |
142 /*]]>*///--> | |
143 </script> | |
144 </head> | |
145 <body> | |
146 <div id="content"> | |
147 <h1 class="title">TAREAN output description</h1> | |
148 <div id="table-of-contents"> | |
149 <h2>Table of Contents</h2> | |
150 <div id="text-table-of-contents"> | |
151 <ul> | |
152 <li><a href="#orgheadline1">1. Introduction</a></li> | |
153 <li><a href="#orgheadline3">2. Main HTML report</a> | |
154 <ul> | |
155 <li><a href="#orgheadline2">2.1. Table legend</a></li> | |
156 </ul> | |
157 </li> | |
158 <li><a href="#orgheadline5">3. Detailed cluster report</a> | |
159 <ul> | |
160 <li><a href="#orgheadline4">3.1. Table legend</a></li> | |
161 </ul> | |
162 </li> | |
163 <li><a href="#orgheadline7">4. Structure of the output archive</a> | |
164 <ul> | |
165 <li><a href="#orgheadline6">4.1. structure of cluster directories</a></li> | |
166 </ul> | |
167 </li> | |
168 </ul> | |
169 </div> | |
170 </div> | |
171 | |
172 <div id="outline-container-orgheadline1" class="outline-2"> | |
173 <h2 id="orgheadline1"><span class="section-number-2">1</span> Introduction</h2> | |
174 <div class="outline-text-2" id="text-1"> | |
175 <p> | |
176 TAREAN output includes <b>HTML report</b> with list of all analyzed clusters; the clusters are classified into five categories: | |
177 </p> | |
178 <ul class="org-ul"> | |
179 <li>high confidence satellites</li> | |
180 <li>low confidence satellites</li> | |
181 <li>potential LTR elements</li> | |
182 <li>rDNA</li> | |
183 <li>other clusters</li> | |
184 </ul> | |
185 <p> | |
186 Each cluster for which consensus sequences was reconstructed has also its own detailed report, linked to the main report. | |
187 </p> | |
188 </div> | |
189 </div> | |
190 | |
191 <div id="outline-container-orgheadline3" class="outline-2"> | |
192 <h2 id="orgheadline3"><span class="section-number-2">2</span> Main HTML report</h2> | |
193 <div class="outline-text-2" id="text-2"> | |
194 <p> | |
195 This report contains basic information about all clusters larger than specified threshold (default value is 0.01% of analyzed reads) | |
196 </p> | |
197 </div> | |
198 <div id="outline-container-orgheadline2" class="outline-3"> | |
199 <h3 id="orgheadline2"><span class="section-number-3">2.1</span> Table legend</h3> | |
200 <div class="outline-text-3" id="text-2-1"> | |
201 <dl class="org-dl"> | |
202 <dt>Cluster</dt><dd>Cluster identifier</dd> | |
203 <dt>Genome Proportion<code>[%]</code></dt><dd><i>(Number of sequences in cluster/Number of sequences in clustering) x 100%</i></dd> | |
204 <dt>Size</dt><dd>Number of reads in the cluster</dd> | |
205 <dt>Satellite probability</dt><dd>Empirical probability estimate that cluster sequences | |
206 are derived from satellite repeat. This estimate is based on analysis of more | |
207 than xxx clusters including yyy manually anotated and zzz experimentaly | |
208 validated satellite repeats</dd> | |
209 <dt>Consensus</dt><dd>Consensus sequence is outcome of kmer-based | |
210 analysis and represents the most probable satellite monomer | |
211 sequence</dd> | |
212 <dt>Kmer analysis</dt><dd>link to analysis report for individual clusters</dd> | |
213 <dt>Graph layout</dt><dd>Graph-based visualization of similarities among sequence | |
214 reads</dd> | |
215 <dt>Connected component index</dt><dd>Proportion of nodes of the graph which are part | |
216 of the the largest strongly connected component</dd> | |
217 <dt>Pair completeness index</dt><dd>Proportion of reads with available | |
218 mate-pair within the same cluster</dd> | |
219 <dt>Kmer coverage</dt><dd>Sum of relative frequencies of all kmers used for consensus | |
220 sequence reconstruction</dd> | |
221 <dt>|V|</dt><dd>Number of vertices of the graph</dd> | |
222 <dt>|E|</dt><dd>Number of edges of the graph</dd> | |
223 <dt>PBS score</dt><dd>Primer binding site detection score</dd> | |
224 <dt>The longest ORF length</dt><dd>Length of the longest open reading frame found in | |
225 any of the possible six reading frames. Search was done on dimer of | |
226 consensus so ORFs can be longer than 'monomer' length</dd> | |
227 <dt>Similarity-based annotation</dt><dd>Annotation based on | |
228 similarity search using blastn/blastx against database of known | |
229 repeats.</dd> | |
230 </dl> | |
231 </div> | |
232 </div> | |
233 </div> | |
234 <div id="outline-container-orgheadline5" class="outline-2"> | |
235 <h2 id="orgheadline5"><span class="section-number-2">3</span> Detailed cluster report</h2> | |
236 <div class="outline-text-2" id="text-3"> | |
237 <p> | |
238 Cluster report includes a list of major monomer sequence varinats reconstructed from the most frequent k-mers. The reconstructed consensus sequences are sorted based on their significance (that is, what proportion of k-mer they represent). | |
239 </p> | |
240 </div> | |
241 <div id="outline-container-orgheadline4" class="outline-3"> | |
242 <h3 id="orgheadline4"><span class="section-number-3">3.1</span> Table legend</h3> | |
243 <div class="outline-text-3" id="text-3-1"> | |
244 <dl class="org-dl"> | |
245 <dt>kmer</dt><dd>length of kmer used for consensus reconstruction.</dd> | |
246 <dt>variant</dt><dd>identifier of consensus variant.</dd> | |
247 <dt>total score</dt><dd>measure of significance of consensus variant. Score is calculated as a sum of weights of all k-mers used for consensus reconstruction.</dd> | |
248 <dt>monomer length</dt><dd>length of the consensus</dd> | |
249 <dt>consensus</dt><dd>consensus sequence without ambiguous bases.</dd> | |
250 <dt>graph image</dt><dd>part of de-Bruijn graph based on the abundant k-mers. Size of | |
251 vertices corresponds to k-mer frequencies, Paths in the graph which was used | |
252 for reconstruction of consensus sequences is gray colored.</dd> | |
253 <dt>logo image</dt><dd>consensus sequences shown as DNA logo. Height of letters corresponds to kmer frequencies. Logo images are linked to corresponding position probability matrices.</dd> | |
254 </dl> | |
255 </div> | |
256 </div> | |
257 </div> | |
258 | |
259 <div id="outline-container-orgheadline7" class="outline-2"> | |
260 <h2 id="orgheadline7"><span class="section-number-2">4</span> Structure of the output archive</h2> | |
261 <div class="outline-text-2" id="text-4"> | |
262 <p> | |
263 Complete results from TAREAN analysis can by downloaded as zip archive which contains the following | |
264 files and directories: | |
265 </p> | |
266 | |
267 <div class="org-src-container"> | |
268 | |
269 <pre class="src src-files">. | |
270 . | |
271 ├── clusters_info.csv <------------ list of clusters in tab delimited format | |
272 ├── index.html <------------ main html report | |
273 ├── seqclust | |
274 │ ├── assembly # not implemented yet | |
275 │ ├── blastn <------------ results of read comparison with DNA database | |
276 │ ├── blastx <------------ results of read comparison with protein database | |
277 │ ├── clustering | |
278 │ │ ├── clusters | |
279 │ │ │ ├── dir_CL0001 <----┐- detailed information about clusters | |
280 │ │ │ ├── dir_CL0002 <----│ | |
281 │ │ │ ├── dir_CL0003 <----│ | |
282 │ │ │ .... <----┘ | |
283 │ │ │ | |
284 │ │ └── hitsort.cls <--------- list of reads in individual clusters | |
285 │ ├── mgblast | |
286 │ ├── prerun | |
287 │ └── sequences <--------- input reads | |
288 ├── summary # not implemented yet | |
289 ├── TR_consensus_rank_1_.fasta <-- reconstructed monomer sequences for HIGH confidence satellites | |
290 ├── TR_consensus_rank_2_.fasta <-- reconstructed monomer sequences for LOW confidence satellites | |
291 ├── TR_consensus_rank_3_.fasta <-- reconstructed sequences of potential LTR elements | |
292 └── TR_consensus_rank_4_.fasta <-- reconstructed consensus for rDNA | |
293 </pre> | |
294 </div> | |
295 | |
296 <p> | |
297 List of all clusters which is available in HTML file <code>index.html</code> is also | |
298 available in tab delimited format in the file <code>clusters_info.csv</code> which can be | |
299 easily viewed and edited in spreadsheet editing programs. List of all clusters | |
300 and the corresponding reads is in the file <code>hitsort.cls</code> which has the following | |
301 format: | |
302 </p> | |
303 | |
304 <pre class="example"> | |
305 >CL1 11 | |
306 134234r 55494f 85525f 136746r 96742f 91926f 239729r 105445f 222518r 136402r 9013 | |
307 >CL2 10 | |
308 76205r 120735r 69527r 12235r 176778f 189307f 131952f 163507f 100038r 178475r | |
309 >CL3 6 | |
310 99835r 222598f 29715r 102023f 99524r 30116f | |
311 >CL4 6 | |
312 51723r 69073r 218774r 146425f 136314r 41744f | |
313 >CL5 5 | |
314 70686f 65565f 234078r 50430r 68247r | |
315 </pre> | |
316 | |
317 <p> | |
318 where <code>CL1 11</code> is the cluster ID followed by number of reads in the cluster; | |
319 next line contains list of all read names belonging to the cluster. | |
320 </p> | |
321 </div> | |
322 <div id="outline-container-orgheadline6" class="outline-3"> | |
323 <h3 id="orgheadline6"><span class="section-number-3">4.1</span> structure of cluster directories</h3> | |
324 <div class="outline-text-3" id="text-4-1"> | |
325 <p> | |
326 Detailed information for each cluster is stored is subdirectories: | |
327 </p> | |
328 | |
329 <div class="org-src-container"> | |
330 | |
331 <pre class="src src-folder">dir_CL0011 | |
332 ├── blast.csv <------------tab delimited file, all-to-all comparison od reads within cluster | |
333 ├── CL11_directed_graph.RData <----directed graph representation of cluster saved as R igraph object | |
334 ├── CL11.GL <-----------------undirected graph representation of cluster saved as R igraph object | |
335 ├── CL11.png <-----------┐- images with graph visualization | |
336 ├── CL11_tmb.png <-----------┘ | |
337 ├── dna_database_annotation.csv <-- annotation of cluster reads based on the DNA database of repeats | |
338 ├── reads_all.fas <---------------- all reads included in the cluster in fasta format | |
339 ├── reads.fas <---------------- subset of reads used for monomer reconstruction | |
340 ├── reads_oriented.fas <------------ subset of reads all in the same orientation | |
341 └── tarean | |
342 ├── consensus.fasta <----------- fasta file with tandem repeat consensus variants | |
343 ├── ggmin.RData | |
344 ├── img | |
345 │ ├── graph_11mer_1.png <-----┐ | |
346 │ ├── graph_11mer_2.png <-----│ | |
347 │ ├── graph_15mer_2.png <-----│ | |
348 │ ├── graph_15mer_3.png <-----│ | |
349 │ ├── graph_15mer_4.png <-----│ images of kmer-based graphs used for reconstruction of | |
350 │ ├── graph_19mer_2.png <-----│ monomer variants | |
351 │ ├── graph_19mer_4.png <-----│ | |
352 │ ├── graph_19mer_5.png <-----│ | |
353 │ ├── graph_23mer_2.png <-----│ | |
354 │ ├── graph_27mer_3.png <-----┘ | |
355 │ │ | |
356 │ ├── logo_11mer_1.png <-----┐ | |
357 │ ├── logo_11mer_2.png <-----│ | |
358 │ ├── logo_15mer_2.png <-----│ | |
359 │ ├── logo_15mer_3.png <-----│ | |
360 │ ├── logo_15mer_4.png <-----│ images with DNA logos representing consensus sequences | |
361 │ ├── logo_19mer_2.png <-----│ of monomer variants | |
362 │ ├── logo_19mer_4.png <-----│ | |
363 │ ├── logo_19mer_5.png <-----│ | |
364 │ ├── logo_23mer_2.png <-----│ | |
365 │ └── logo_27mer_3.png <-----┘ | |
366 │ | |
367 ├── ppm_11mer_1.csv <-----┐ | |
368 ├── ppm_11mer_2.csv <-----│ | |
369 ├── ppm_15mer_2.csv <-----│ | |
370 ├── ppm_15mer_3.csv <-----│ | |
371 ├── ppm_15mer_4.csv <-----│ position probability matrices for individual monomer | |
372 ├── ppm_19mer_2.csv <-----│ variants derived from k-mer frequencies | |
373 ├── ppm_19mer_4.csv <-----│ | |
374 ├── ppm_19mer_5.csv <-----│ | |
375 ├── ppm_23mer_2.csv <-----│ | |
376 ├── ppm_27mer_3.csv <-----┘ | |
377 │ | |
378 ├── reads_oriented.fas_11.kmers <-----┐ | |
379 ├── reads_oriented.fas_15.kmers <-----│ | |
380 ├── reads_oriented.fas_19.kmers <-----│ k-mer frequencies calculated on oriented reads | |
381 ├── reads_oriented.fas_23.kmers <-----│ for k-mer lengths 11 - 27 | |
382 ├── reads_oriented.fas_27.kmers <-----┘ | |
383 ├── reads_oriented.fasblast_out.cvs <---------┐results of blastn search against database of tRNA | |
384 ├── reads_oriented.fasblast_out.cvs_L.csv <----│for purposes of LTR detection | |
385 ├── reads_oriented.fasblast_out.cvs_R.csv <----┘ | |
386 └── report.html <--- cluster analysisHTML summary | |
387 </pre> | |
388 </div> | |
389 </div> | |
390 </div> | |
391 </div> | |
392 </div> | |
393 <div id="postamble" class="status"> | |
394 <p class="author">Author: petr</p> | |
395 <p class="date">Created: 2016-10-21 Pá 11:06</p> | |
396 <p class="validation"><a href="http://validator.w3.org/check?uri=referer">Validate</a></p> | |
397 </div> | |
398 </body> | |
399 </html> |