Mercurial > repos > vipints > deseq_hts
comparison deseq-hts_1.0/mex/read.cpp @ 0:94a108763d9e draft
deseq-hts version 1.0 wraps the DESeq 1.6.0
author | vipints |
---|---|
date | Wed, 09 May 2012 20:43:47 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:94a108763d9e |
---|---|
1 /* | |
2 * This program is free software; you can redistribute it and/or modify | |
3 * it under the terms of the GNU General Public License as published by | |
4 * the Free Software Foundation; either version 3 of the License, or | |
5 * (at your option) any later version. | |
6 * | |
7 * Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch | |
8 * Copyright (C) 2010-2011 Max Planck Society | |
9 */ | |
10 | |
11 | |
12 #include "read.h" | |
13 | |
14 CRead::CRead() { | |
15 read_id = NULL; | |
16 sam_line = NULL; | |
17 start_pos = 0; | |
18 matches = 0; | |
19 mismatches = 0; | |
20 multiple_alignment_index = 0; | |
21 strand = NULL; | |
22 left = false; | |
23 right = false; | |
24 reverse = false; | |
25 } | |
26 | |
27 CRead::~CRead() { | |
28 delete[] read_id; | |
29 delete[] sam_line; | |
30 delete[] strand; | |
31 } | |
32 | |
33 /* | |
34 * Augments 'coverage' array at the positions covered by the read in the queried interval. | |
35 */ | |
36 void CRead::get_coverage(int p_start_pos, int p_end_pos, uint32_t* coverage) | |
37 { | |
38 // block1 block2 | |
39 // |=====|======|============|===========|======|====| | |
40 // ^ ^ ^ | |
41 // p_start_pos | p_end_pos | |
42 // start_pos | |
43 // |0000001111111111111000000000000111111100000| | |
44 // *coverage | |
45 int len = p_end_pos-p_start_pos+1; | |
46 for (uint32_t n = 0; n < block_starts.size(); n++) { | |
47 int32_t from, to; | |
48 from = block_starts[n]+start_pos-p_start_pos; | |
49 to = block_starts[n]+start_pos-p_start_pos+block_lengths[n]; | |
50 if (from < 0) | |
51 from = 0; | |
52 if (to < 0) | |
53 continue; | |
54 else if (to > len) | |
55 to = len; | |
56 for (int bp=from; bp<to; bp++) { | |
57 coverage[bp]++; | |
58 } | |
59 } | |
60 } | |
61 int CRead::get_last_position() | |
62 { | |
63 if (block_starts.size()>0) // this if for some reason zero in case of softclips | |
64 return start_pos+block_starts.back()+block_lengths.back(); | |
65 return -1; | |
66 } | |
67 | |
68 /* | |
69 * Adds the column indices (= positions) covered by the read to 'reads' array in current row (= read). | |
70 * These indices can be used to build up a sparse matrix of reads x positions. | |
71 */ | |
72 void CRead::get_reads_sparse(int p_start_pos, int p_end_pos, double* reads, uint32_t & reads_c, uint32_t row_idx) { | |
73 int len = p_end_pos-p_start_pos+1; | |
74 for (uint32_t n = 0; n < block_starts.size(); n++) { | |
75 uint32_t from, to; | |
76 if (block_starts[n]+start_pos-p_start_pos >= 0) | |
77 from = block_starts[n]+start_pos-p_start_pos; | |
78 else | |
79 from = 0; | |
80 if (block_starts[n]+start_pos-p_start_pos+block_lengths[n] >= 0) | |
81 to = block_starts[n]+start_pos-p_start_pos+block_lengths[n]; | |
82 else | |
83 to = 0; | |
84 for (int bp=from; bp<to&bp<len; bp++) { | |
85 reads[reads_c] = row_idx+1; // row indices for sparse matrix | |
86 reads[reads_c+1] = bp+1; // column indices for sparse matrix | |
87 reads_c += 2; | |
88 } | |
89 } | |
90 } | |
91 | |
92 void CRead::get_acc_splice_sites(vector<int>* acc_pos) | |
93 { | |
94 if (strand[0]=='+') | |
95 { | |
96 for (int k=1;k<block_starts.size(); k++) | |
97 acc_pos->push_back(start_pos+block_starts[k]-1); | |
98 } | |
99 else if (strand[0]=='-') | |
100 { | |
101 for (int k=1;k<block_starts.size(); k++) | |
102 acc_pos->push_back(start_pos+block_starts[k-1]+block_lengths[k-1]-2); | |
103 } | |
104 } | |
105 | |
106 void CRead::get_don_splice_sites(vector<int>* don_pos) | |
107 { | |
108 | |
109 if (strand[0]=='+') | |
110 { | |
111 for (int k=1;k<block_starts.size(); k++) | |
112 don_pos->push_back(start_pos+block_starts[k-1]+block_lengths[k-1]-2); | |
113 } | |
114 else if (strand[0]=='-') | |
115 { | |
116 for (int k=1;k<block_starts.size(); k++) | |
117 don_pos->push_back(start_pos+block_starts[k]-1); | |
118 } | |
119 } | |
120 | |
121 int CRead::min_exon_len() | |
122 { | |
123 int min = 1e8; | |
124 for (int k=0;k<block_starts.size(); k++) | |
125 if (block_lengths[k]<min) | |
126 min = block_lengths[k]; | |
127 return min; | |
128 } | |
129 | |
130 int CRead::max_intron_len() | |
131 { | |
132 int max = 0; | |
133 for (int k=1;k<block_starts.size(); k++) | |
134 if (block_starts[k]-(block_starts[k-1]+block_lengths[k-1])>max) | |
135 max = block_starts[k]-(block_starts[k-1]+block_lengths[k-1]); | |
136 return max; | |
137 } | |
138 | |
139 /* | |
140 * Adds start and end of introns in the read consecutively to the 'introns' vector. | |
141 */ | |
142 void CRead::get_introns(vector<int>* introns) | |
143 { | |
144 for (int i=1; i<block_starts.size(); i++) | |
145 { | |
146 int istart = block_starts[i-1]+block_lengths[i-1]+start_pos; | |
147 int iend = block_starts[i]+start_pos-1; | |
148 introns->push_back(istart); | |
149 introns->push_back(iend); | |
150 //fprintf(stdout, "%i intron: %d->%d\n", i, istart, iend); | |
151 } | |
152 } | |
153 void CRead::get_introns(vector<uint32_t>* intron_starts, vector<uint32_t>* intron_ends, vector<uint32_t>* block_len1, vector<uint32_t>* block_len2) | |
154 { | |
155 for (int i=1; i<block_starts.size(); i++) | |
156 { | |
157 uint32_t istart = block_starts[i-1]+block_lengths[i-1]+start_pos; | |
158 uint32_t iend = block_starts[i]+start_pos-1; | |
159 intron_starts->push_back(istart); | |
160 intron_ends->push_back(iend); | |
161 block_len1->push_back(block_lengths[i-1]) ; | |
162 block_len2->push_back(block_lengths[i]) ; | |
163 } | |
164 } | |
165 | |
166 bool CRead::operator==(const CRead& read) const | |
167 { | |
168 if (block_starts.size()!=read.block_starts.size()) | |
169 return false; | |
170 if (block_lengths.size()!=read.block_lengths.size()) | |
171 return false; | |
172 if (start_pos!=read.start_pos) | |
173 return false; | |
174 if (strand[0] != read.strand[0]) | |
175 return false; | |
176 for (int i=0; i<block_starts.size(); i++) | |
177 if (block_starts[i]!=read.block_starts[i]) | |
178 return false; | |
179 for (int i=0; i<block_lengths.size(); i++) | |
180 if (block_lengths[i]!=read.block_lengths[i]) | |
181 return false; | |
182 return true; | |
183 } | |
184 | |
185 void CRead::print() | |
186 { | |
187 fprintf(stdout, "start_pos: %d\n", start_pos); | |
188 fprintf(stdout, "starts:"); | |
189 for (int i=0; i<block_starts.size(); i++) | |
190 { | |
191 fprintf(stdout, " %d", block_starts[i]); | |
192 } | |
193 fprintf(stdout, "\n"); | |
194 | |
195 fprintf(stdout, "lengths:"); | |
196 for (int i=0; i<block_starts.size(); i++) | |
197 { | |
198 fprintf(stdout, " %d", block_lengths[i]); | |
199 } | |
200 fprintf(stdout, "\n"); | |
201 } | |
202 | |
203 void CRead::set_strand(char s) | |
204 { | |
205 delete[] strand; | |
206 strand = new char [2]; | |
207 strand[0] = s; | |
208 strand[1] = '0'; | |
209 } | |
210 | |
211 int CRead::get_mismatches() | |
212 { | |
213 return mismatches ; | |
214 } |