comparison lib/tarean_output_help.html @ 0:1d1b9e1b2e2f draft

Uploaded
author petr-novak
date Thu, 19 Dec 2019 10:24:45 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1d1b9e1b2e2f
1 <?xml version="1.0" encoding="utf-8"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
5 <head>
6 <!-- 2016-10-21 Pá 11:06 -->
7 <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
8 <meta name="viewport" content="width=device-width, initial-scale=1" />
9 <title>TAREAN output description</title>
10 <meta name="generator" content="Org-mode" />
11 <meta name="author" content="petr" />
12 <style type="text/css">
13 <!--/*--><![CDATA[/*><!--*/
14 .title { text-align: center;
15 margin-bottom: .2em; }
16 .subtitle { text-align: center;
17 font-size: medium;
18 font-weight: bold;
19 margin-top:0; }
20 .todo { font-family: monospace; color: red; }
21 .done { font-family: monospace; color: green; }
22 .priority { font-family: monospace; color: orange; }
23 .tag { background-color: #eee; font-family: monospace;
24 padding: 2px; font-size: 80%; font-weight: normal; }
25 .timestamp { color: #bebebe; }
26 .timestamp-kwd { color: #5f9ea0; }
27 .org-right { margin-left: auto; margin-right: 0px; text-align: right; }
28 .org-left { margin-left: 0px; margin-right: auto; text-align: left; }
29 .org-center { margin-left: auto; margin-right: auto; text-align: center; }
30 .underline { text-decoration: underline; }
31 #postamble p, #preamble p { font-size: 90%; margin: .2em; }
32 p.verse { margin-left: 3%; }
33 pre {
34 border: 1px solid #ccc;
35 box-shadow: 3px 3px 3px #eee;
36 padding: 8pt;
37 font-family: monospace;
38 overflow: auto;
39 margin: 1.2em;
40 }
41 pre.src {
42 position: relative;
43 overflow: visible;
44 padding-top: 1.2em;
45 }
46 pre.src:before {
47 display: none;
48 position: absolute;
49 background-color: white;
50 top: -10px;
51 right: 10px;
52 padding: 3px;
53 border: 1px solid black;
54 }
55 pre.src:hover:before { display: inline;}
56 pre.src-sh:before { content: 'sh'; }
57 pre.src-bash:before { content: 'sh'; }
58 pre.src-emacs-lisp:before { content: 'Emacs Lisp'; }
59 pre.src-R:before { content: 'R'; }
60 pre.src-perl:before { content: 'Perl'; }
61 pre.src-java:before { content: 'Java'; }
62 pre.src-sql:before { content: 'SQL'; }
63
64 table { border-collapse:collapse; }
65 caption.t-above { caption-side: top; }
66 caption.t-bottom { caption-side: bottom; }
67 td, th { vertical-align:top; }
68 th.org-right { text-align: center; }
69 th.org-left { text-align: center; }
70 th.org-center { text-align: center; }
71 td.org-right { text-align: right; }
72 td.org-left { text-align: left; }
73 td.org-center { text-align: center; }
74 dt { font-weight: bold; }
75 .footpara { display: inline; }
76 .footdef { margin-bottom: 1em; }
77 .figure { padding: 1em; }
78 .figure p { text-align: center; }
79 .inlinetask {
80 padding: 10px;
81 border: 2px solid gray;
82 margin: 10px;
83 background: #ffffcc;
84 }
85 #org-div-home-and-up
86 { text-align: right; font-size: 70%; white-space: nowrap; }
87 textarea { overflow-x: auto; }
88 .linenr { font-size: smaller }
89 .code-highlighted { background-color: #ffff00; }
90 .org-info-js_info-navigation { border-style: none; }
91 #org-info-js_console-label
92 { font-size: 10px; font-weight: bold; white-space: nowrap; }
93 .org-info-js_search-highlight
94 { background-color: #ffff00; color: #000000; font-weight: bold; }
95 /*]]>*/-->
96 </style>
97 <link rel="stylesheet" type="text/css" href="style1.css" />
98 <script type="text/javascript">
99 /*
100 @licstart The following is the entire license notice for the
101 JavaScript code in this tag.
102
103 Copyright (C) 2012-2013 Free Software Foundation, Inc.
104
105 The JavaScript code in this tag is free software: you can
106 redistribute it and/or modify it under the terms of the GNU
107 General Public License (GNU GPL) as published by the Free Software
108 Foundation, either version 3 of the License, or (at your option)
109 any later version. The code is distributed WITHOUT ANY WARRANTY;
110 without even the implied warranty of MERCHANTABILITY or FITNESS
111 FOR A PARTICULAR PURPOSE. See the GNU GPL for more details.
112
113 As additional permission under GNU GPL version 3 section 7, you
114 may distribute non-source (e.g., minimized or compacted) forms of
115 that code without the copy of the GNU GPL normally required by
116 section 4, provided you include this license notice and a URL
117 through which recipients can access the Corresponding Source.
118
119
120 @licend The above is the entire license notice
121 for the JavaScript code in this tag.
122 */
123 <!--/*--><![CDATA[/*><!--*/
124 function CodeHighlightOn(elem, id)
125 {
126 var target = document.getElementById(id);
127 if(null != target) {
128 elem.cacheClassElem = elem.className;
129 elem.cacheClassTarget = target.className;
130 target.className = "code-highlighted";
131 elem.className = "code-highlighted";
132 }
133 }
134 function CodeHighlightOff(elem, id)
135 {
136 var target = document.getElementById(id);
137 if(elem.cacheClassElem)
138 elem.className = elem.cacheClassElem;
139 if(elem.cacheClassTarget)
140 target.className = elem.cacheClassTarget;
141 }
142 /*]]>*///-->
143 </script>
144 </head>
145 <body>
146 <div id="content">
147 <h1 class="title">TAREAN output description</h1>
148 <div id="table-of-contents">
149 <h2>Table of Contents</h2>
150 <div id="text-table-of-contents">
151 <ul>
152 <li><a href="#orgheadline1">1. Introduction</a></li>
153 <li><a href="#orgheadline3">2. Main HTML report</a>
154 <ul>
155 <li><a href="#orgheadline2">2.1. Table legend</a></li>
156 </ul>
157 </li>
158 <li><a href="#orgheadline5">3. Detailed cluster report</a>
159 <ul>
160 <li><a href="#orgheadline4">3.1. Table legend</a></li>
161 </ul>
162 </li>
163 <li><a href="#orgheadline7">4. Structure of the output archive</a>
164 <ul>
165 <li><a href="#orgheadline6">4.1. structure of cluster directories</a></li>
166 </ul>
167 </li>
168 </ul>
169 </div>
170 </div>
171
172 <div id="outline-container-orgheadline1" class="outline-2">
173 <h2 id="orgheadline1"><span class="section-number-2">1</span> Introduction</h2>
174 <div class="outline-text-2" id="text-1">
175 <p>
176 TAREAN output includes <b>HTML report</b> with list of all analyzed clusters; the clusters are classified into five categories:
177 </p>
178 <ul class="org-ul">
179 <li>high confidence satellites</li>
180 <li>low confidence satellites</li>
181 <li>potential LTR elements</li>
182 <li>rDNA</li>
183 <li>other clusters</li>
184 </ul>
185 <p>
186 Each cluster for which consensus sequences was reconstructed has also its own detailed report, linked to the main report.
187 </p>
188 </div>
189 </div>
190
191 <div id="outline-container-orgheadline3" class="outline-2">
192 <h2 id="orgheadline3"><span class="section-number-2">2</span> Main HTML report</h2>
193 <div class="outline-text-2" id="text-2">
194 <p>
195 This report contains basic information about all clusters larger than specified threshold (default value is 0.01% of analyzed reads)
196 </p>
197 </div>
198 <div id="outline-container-orgheadline2" class="outline-3">
199 <h3 id="orgheadline2"><span class="section-number-3">2.1</span> Table legend</h3>
200 <div class="outline-text-3" id="text-2-1">
201 <dl class="org-dl">
202 <dt>Cluster</dt><dd>Cluster identifier</dd>
203 <dt>Genome Proportion<code>[%]</code></dt><dd><i>(Number of sequences in cluster/Number of sequences in clustering) x 100%</i></dd>
204 <dt>Size</dt><dd>Number of reads in the cluster</dd>
205 <dt>Satellite probability</dt><dd>Empirical probability estimate that cluster sequences
206 are derived from satellite repeat. This estimate is based on analysis of more
207 than xxx clusters including yyy manually anotated and zzz experimentaly
208 validated satellite repeats</dd>
209 <dt>Consensus</dt><dd>Consensus sequence is outcome of kmer-based
210 analysis and represents the most probable satellite monomer
211 sequence</dd>
212 <dt>Kmer analysis</dt><dd>link to analysis report for individual clusters</dd>
213 <dt>Graph layout</dt><dd>Graph-based visualization of similarities among sequence
214 reads</dd>
215 <dt>Connected component index</dt><dd>Proportion of nodes of the graph which are part
216 of the the largest strongly connected component</dd>
217 <dt>Pair completeness index</dt><dd>Proportion of reads with available
218 mate-pair within the same cluster</dd>
219 <dt>Kmer coverage</dt><dd>Sum of relative frequencies of all kmers used for consensus
220 sequence reconstruction</dd>
221 <dt>|V|</dt><dd>Number of vertices of the graph</dd>
222 <dt>|E|</dt><dd>Number of edges of the graph</dd>
223 <dt>PBS score</dt><dd>Primer binding site detection score</dd>
224 <dt>The longest ORF length</dt><dd>Length of the longest open reading frame found in
225 any of the possible six reading frames. Search was done on dimer of
226 consensus so ORFs can be longer than 'monomer' length</dd>
227 <dt>Similarity-based annotation</dt><dd>Annotation based on
228 similarity search using blastn/blastx against database of known
229 repeats.</dd>
230 </dl>
231 </div>
232 </div>
233 </div>
234 <div id="outline-container-orgheadline5" class="outline-2">
235 <h2 id="orgheadline5"><span class="section-number-2">3</span> Detailed cluster report</h2>
236 <div class="outline-text-2" id="text-3">
237 <p>
238 Cluster report includes a list of major monomer sequence varinats reconstructed from the most frequent k-mers. The reconstructed consensus sequences are sorted based on their significance (that is, what proportion of k-mer they represent).
239 </p>
240 </div>
241 <div id="outline-container-orgheadline4" class="outline-3">
242 <h3 id="orgheadline4"><span class="section-number-3">3.1</span> Table legend</h3>
243 <div class="outline-text-3" id="text-3-1">
244 <dl class="org-dl">
245 <dt>kmer</dt><dd>length of kmer used for consensus reconstruction.</dd>
246 <dt>variant</dt><dd>identifier of consensus variant.</dd>
247 <dt>total score</dt><dd>measure of significance of consensus variant. Score is calculated as a sum of weights of all k-mers used for consensus reconstruction.</dd>
248 <dt>monomer length</dt><dd>length of the consensus</dd>
249 <dt>consensus</dt><dd>consensus sequence without ambiguous bases.</dd>
250 <dt>graph image</dt><dd>part of de-Bruijn graph based on the abundant k-mers. Size of
251 vertices corresponds to k-mer frequencies, Paths in the graph which was used
252 for reconstruction of consensus sequences is gray colored.</dd>
253 <dt>logo image</dt><dd>consensus sequences shown as DNA logo. Height of letters corresponds to kmer frequencies. Logo images are linked to corresponding position probability matrices.</dd>
254 </dl>
255 </div>
256 </div>
257 </div>
258
259 <div id="outline-container-orgheadline7" class="outline-2">
260 <h2 id="orgheadline7"><span class="section-number-2">4</span> Structure of the output archive</h2>
261 <div class="outline-text-2" id="text-4">
262 <p>
263 Complete results from TAREAN analysis can by downloaded as zip archive which contains the following
264 files and directories:
265 </p>
266
267 <div class="org-src-container">
268
269 <pre class="src src-files">.
270 .
271 ├── clusters_info.csv &lt;------------ list of clusters in tab delimited format
272 ├── index.html &lt;------------ main html report
273 ├── seqclust
274 │   ├── assembly # not implemented yet
275 │   ├── blastn &lt;------------ results of read comparison with DNA database
276 │   ├── blastx &lt;------------ results of read comparison with protein database
277 │   ├── clustering
278 │   │   ├── clusters
279 │   │   │   ├── dir_CL0001 &lt;----┐- detailed information about clusters
280 │   │   │   ├── dir_CL0002 &lt;----│
281 │   │   │   ├── dir_CL0003 &lt;----│
282 │ │ │ .... &lt;----┘
283 │ │ │
284 │   │   └── hitsort.cls &lt;--------- list of reads in individual clusters
285 │   ├── mgblast
286 │   ├── prerun
287 │   └── sequences &lt;--------- input reads
288 ├── summary # not implemented yet
289 ├── TR_consensus_rank_1_.fasta &lt;-- reconstructed monomer sequences for HIGH confidence satellites
290 ├── TR_consensus_rank_2_.fasta &lt;-- reconstructed monomer sequences for LOW confidence satellites
291 ├── TR_consensus_rank_3_.fasta &lt;-- reconstructed sequences of potential LTR elements
292 └── TR_consensus_rank_4_.fasta &lt;-- reconstructed consensus for rDNA
293 </pre>
294 </div>
295
296 <p>
297 List of all clusters which is available in HTML file <code>index.html</code> is also
298 available in tab delimited format in the file <code>clusters_info.csv</code> which can be
299 easily viewed and edited in spreadsheet editing programs. List of all clusters
300 and the corresponding reads is in the file <code>hitsort.cls</code> which has the following
301 format:
302 </p>
303
304 <pre class="example">
305 &gt;CL1 11
306 134234r 55494f 85525f 136746r 96742f 91926f 239729r 105445f 222518r 136402r 9013
307 &gt;CL2 10
308 76205r 120735r 69527r 12235r 176778f 189307f 131952f 163507f 100038r 178475r
309 &gt;CL3 6
310 99835r 222598f 29715r 102023f 99524r 30116f
311 &gt;CL4 6
312 51723r 69073r 218774r 146425f 136314r 41744f
313 &gt;CL5 5
314 70686f 65565f 234078r 50430r 68247r
315 </pre>
316
317 <p>
318 where <code>CL1 11</code> is the cluster ID followed by number of reads in the cluster;
319 next line contains list of all read names belonging to the cluster.
320 </p>
321 </div>
322 <div id="outline-container-orgheadline6" class="outline-3">
323 <h3 id="orgheadline6"><span class="section-number-3">4.1</span> structure of cluster directories</h3>
324 <div class="outline-text-3" id="text-4-1">
325 <p>
326 Detailed information for each cluster is stored is subdirectories:
327 </p>
328
329 <div class="org-src-container">
330
331 <pre class="src src-folder">dir_CL0011
332 ├── blast.csv &lt;------------tab delimited file, all-to-all comparison od reads within cluster
333 ├── CL11_directed_graph.RData &lt;----directed graph representation of cluster saved as R igraph object
334 ├── CL11.GL &lt;-----------------undirected graph representation of cluster saved as R igraph object
335 ├── CL11.png &lt;-----------┐- images with graph visualization
336 ├── CL11_tmb.png &lt;-----------┘
337 ├── dna_database_annotation.csv &lt;-- annotation of cluster reads based on the DNA database of repeats
338 ├── reads_all.fas &lt;---------------- all reads included in the cluster in fasta format
339 ├── reads.fas &lt;---------------- subset of reads used for monomer reconstruction
340 ├── reads_oriented.fas &lt;------------ subset of reads all in the same orientation
341 └── tarean
342 ├── consensus.fasta &lt;----------- fasta file with tandem repeat consensus variants
343 ├── ggmin.RData
344 ├── img
345 │   ├── graph_11mer_1.png &lt;-----┐
346 │   ├── graph_11mer_2.png &lt;-----│
347 │   ├── graph_15mer_2.png &lt;-----│
348 │   ├── graph_15mer_3.png &lt;-----│
349 │   ├── graph_15mer_4.png &lt;-----│ images of kmer-based graphs used for reconstruction of
350 │   ├── graph_19mer_2.png &lt;-----│ monomer variants
351 │   ├── graph_19mer_4.png &lt;-----│
352 │   ├── graph_19mer_5.png &lt;-----│
353 │   ├── graph_23mer_2.png &lt;-----│
354 │   ├── graph_27mer_3.png &lt;-----┘
355 │ │
356 │   ├── logo_11mer_1.png &lt;-----┐
357 │   ├── logo_11mer_2.png &lt;-----│
358 │   ├── logo_15mer_2.png &lt;-----│
359 │   ├── logo_15mer_3.png &lt;-----│
360 │   ├── logo_15mer_4.png &lt;-----│ images with DNA logos representing consensus sequences
361 │   ├── logo_19mer_2.png &lt;-----│ of monomer variants
362 │   ├── logo_19mer_4.png &lt;-----│
363 │   ├── logo_19mer_5.png &lt;-----│
364 │   ├── logo_23mer_2.png &lt;-----│
365 │   └── logo_27mer_3.png &lt;-----┘
366
367 ├── ppm_11mer_1.csv &lt;-----┐
368 ├── ppm_11mer_2.csv &lt;-----│
369 ├── ppm_15mer_2.csv &lt;-----│
370 ├── ppm_15mer_3.csv &lt;-----│
371 ├── ppm_15mer_4.csv &lt;-----│ position probability matrices for individual monomer
372 ├── ppm_19mer_2.csv &lt;-----│ variants derived from k-mer frequencies
373 ├── ppm_19mer_4.csv &lt;-----│
374 ├── ppm_19mer_5.csv &lt;-----│
375 ├── ppm_23mer_2.csv &lt;-----│
376 ├── ppm_27mer_3.csv &lt;-----┘
377
378 ├── reads_oriented.fas_11.kmers &lt;-----┐
379 ├── reads_oriented.fas_15.kmers &lt;-----│
380 ├── reads_oriented.fas_19.kmers &lt;-----│ k-mer frequencies calculated on oriented reads
381 ├── reads_oriented.fas_23.kmers &lt;-----│ for k-mer lengths 11 - 27
382 ├── reads_oriented.fas_27.kmers &lt;-----┘
383 ├── reads_oriented.fasblast_out.cvs &lt;---------┐results of blastn search against database of tRNA
384 ├── reads_oriented.fasblast_out.cvs_L.csv &lt;----│for purposes of LTR detection
385 ├── reads_oriented.fasblast_out.cvs_R.csv &lt;----┘
386 └── report.html &lt;--- cluster analysisHTML summary
387 </pre>
388 </div>
389 </div>
390 </div>
391 </div>
392 </div>
393 <div id="postamble" class="status">
394 <p class="author">Author: petr</p>
395 <p class="date">Created: 2016-10-21 Pá 11:06</p>
396 <p class="validation"><a href="http://validator.w3.org/check?uri=referer">Validate</a></p>
397 </div>
398 </body>
399 </html>