comparison rDiff/mex/read.cpp @ 0:0f80a5141704

version 0.3 uploaded
author vipints
date Thu, 14 Feb 2013 23:38:36 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0f80a5141704
1 /*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 3 of the License, or
5 * (at your option) any later version.
6 *
7 * Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch
8 * Copyright (C) 2010-2011 Max Planck Society
9 */
10
11
12 #include "read.h"
13
14 CRead::CRead() {
15 read_id = NULL;
16 sam_line = NULL;
17 start_pos = 0;
18 matches = 0;
19 mismatches = 0;
20 multiple_alignment_index = 0;
21 strand = NULL;
22 left = false;
23 right = false;
24 reverse = false;
25 }
26
27 CRead::~CRead() {
28 delete[] read_id;
29 delete[] sam_line;
30 delete[] strand;
31 }
32
33 /*
34 * Augments 'coverage' array at the positions covered by the read in the queried interval.
35 */
36 void CRead::get_coverage(int p_start_pos, int p_end_pos, uint32_t* coverage)
37 {
38 // block1 block2
39 // |=====|======|============|===========|======|====|
40 // ^ ^ ^
41 // p_start_pos | p_end_pos
42 // start_pos
43 // |0000001111111111111000000000000111111100000|
44 // *coverage
45 int len = p_end_pos-p_start_pos+1;
46 for (uint32_t n = 0; n < block_starts.size(); n++) {
47 int32_t from, to;
48 from = block_starts[n]+start_pos-p_start_pos;
49 to = block_starts[n]+start_pos-p_start_pos+block_lengths[n];
50 if (from < 0)
51 from = 0;
52 if (to < 0)
53 continue;
54 else if (to > len)
55 to = len;
56 for (int bp=from; bp<to; bp++) {
57 coverage[bp]++;
58 }
59 }
60 }
61 int CRead::get_last_position()
62 {
63 if (block_starts.size()>0) // this if for some reason zero in case of softclips
64 return start_pos+block_starts.back()+block_lengths.back();
65 return -1;
66 }
67
68 /*
69 * Adds the column indices (= positions) covered by the read to 'reads' array in current row (= read).
70 * These indices can be used to build up a sparse matrix of reads x positions.
71 */
72 void CRead::get_reads_sparse(int p_start_pos, int p_end_pos, double* reads, uint32_t & reads_c, uint32_t row_idx) {
73 int len = p_end_pos-p_start_pos+1;
74 for (uint32_t n = 0; n < block_starts.size(); n++) {
75 uint32_t from, to;
76 if (block_starts[n]+start_pos-p_start_pos >= 0)
77 from = block_starts[n]+start_pos-p_start_pos;
78 else
79 from = 0;
80 if (block_starts[n]+start_pos-p_start_pos+block_lengths[n] >= 0)
81 to = block_starts[n]+start_pos-p_start_pos+block_lengths[n];
82 else
83 to = 0;
84 for (int bp=from; bp<to&bp<len; bp++) {
85 reads[reads_c] = row_idx+1; // row indices for sparse matrix
86 reads[reads_c+1] = bp+1; // column indices for sparse matrix
87 reads_c += 2;
88 }
89 }
90 }
91
92 void CRead::get_acc_splice_sites(vector<int>* acc_pos)
93 {
94 if (strand[0]=='+')
95 {
96 for (int k=1;k<block_starts.size(); k++)
97 acc_pos->push_back(start_pos+block_starts[k]-1);
98 }
99 else if (strand[0]=='-')
100 {
101 for (int k=1;k<block_starts.size(); k++)
102 acc_pos->push_back(start_pos+block_starts[k-1]+block_lengths[k-1]-2);
103 }
104 }
105
106 void CRead::get_don_splice_sites(vector<int>* don_pos)
107 {
108
109 if (strand[0]=='+')
110 {
111 for (int k=1;k<block_starts.size(); k++)
112 don_pos->push_back(start_pos+block_starts[k-1]+block_lengths[k-1]-2);
113 }
114 else if (strand[0]=='-')
115 {
116 for (int k=1;k<block_starts.size(); k++)
117 don_pos->push_back(start_pos+block_starts[k]-1);
118 }
119 }
120
121 int CRead::min_exon_len()
122 {
123 int min = 1e8;
124 for (int k=0;k<block_starts.size(); k++)
125 if (block_lengths[k]<min)
126 min = block_lengths[k];
127 return min;
128 }
129
130 int CRead::max_intron_len()
131 {
132 int max = 0;
133 for (int k=1;k<block_starts.size(); k++)
134 if (block_starts[k]-(block_starts[k-1]+block_lengths[k-1])>max)
135 max = block_starts[k]-(block_starts[k-1]+block_lengths[k-1]);
136 return max;
137 }
138
139 /*
140 * Adds start and end of introns in the read consecutively to the 'introns' vector.
141 */
142 void CRead::get_introns(vector<int>* introns)
143 {
144 for (int i=1; i<block_starts.size(); i++)
145 {
146 int istart = block_starts[i-1]+block_lengths[i-1]+start_pos;
147 int iend = block_starts[i]+start_pos-1;
148 introns->push_back(istart);
149 introns->push_back(iend);
150 //fprintf(stdout, "%i intron: %d->%d\n", i, istart, iend);
151 }
152 }
153 void CRead::get_introns(vector<uint32_t>* intron_starts, vector<uint32_t>* intron_ends, vector<uint32_t>* block_len1, vector<uint32_t>* block_len2)
154 {
155 for (int i=1; i<block_starts.size(); i++)
156 {
157 uint32_t istart = block_starts[i-1]+block_lengths[i-1]+start_pos;
158 uint32_t iend = block_starts[i]+start_pos-1;
159 intron_starts->push_back(istart);
160 intron_ends->push_back(iend);
161 block_len1->push_back(block_lengths[i-1]) ;
162 block_len2->push_back(block_lengths[i]) ;
163 }
164 }
165
166 bool CRead::operator==(const CRead& read) const
167 {
168 if (block_starts.size()!=read.block_starts.size())
169 return false;
170 if (block_lengths.size()!=read.block_lengths.size())
171 return false;
172 if (start_pos!=read.start_pos)
173 return false;
174 if (strand[0] != read.strand[0])
175 return false;
176 for (int i=0; i<block_starts.size(); i++)
177 if (block_starts[i]!=read.block_starts[i])
178 return false;
179 for (int i=0; i<block_lengths.size(); i++)
180 if (block_lengths[i]!=read.block_lengths[i])
181 return false;
182 return true;
183 }
184
185 void CRead::print()
186 {
187 fprintf(stdout, "start_pos: %d\n", start_pos);
188 fprintf(stdout, "starts:");
189 for (int i=0; i<block_starts.size(); i++)
190 {
191 fprintf(stdout, " %d", block_starts[i]);
192 }
193 fprintf(stdout, "\n");
194
195 fprintf(stdout, "lengths:");
196 for (int i=0; i<block_starts.size(); i++)
197 {
198 fprintf(stdout, " %d", block_lengths[i]);
199 }
200 fprintf(stdout, "\n");
201 }
202
203 void CRead::set_strand(char s)
204 {
205 delete[] strand;
206 strand = new char [2];
207 strand[0] = s;
208 strand[1] = '0';
209 }
210
211 int CRead::get_mismatches()
212 {
213 return mismatches ;
214 }