annotate rDiff/src/tools/read_utils/get_reads.cpp @ 3:29a698dc5c7e default tip

Merge multiple heads.
author Dave Bouvier <dave@bx.psu.edu>
date Mon, 27 Jan 2014 14:15:36 -0500
parents 0f80a5141704
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
1 /* written by Jonas Behr, Regina Bohnert and Gunnar Raetsch, FML Tuebingen, Germany, 2010 */
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
2
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
3 #include <stdio.h>
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
4 #include <string.h>
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
5 #include <signal.h>
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
6 #include <mex.h>
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
7 #include <algorithm>
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
8 #include <vector>
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
9 using std::vector;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
10 #include "get_reads_direct.h"
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
11 #include "mex_input.h"
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
12 #include "read.h"
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
13
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
14 #define MAXLINE 10000
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
15
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
16 /*
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
17 * input:
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
18 * 1 bam file
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
19 * 2 chromosome
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
20 * 3 region start (1-based index)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
21 * 4 region end (1-based index)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
22 * 5 strand (either '+' or '-' or '0')
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
23 * [6] collapse flag: if true the reads are collapsed to a coverage track
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
24 * [7] subsample percentage: percentage of reads to be subsampled (in per mill)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
25 * [8] intron length filter
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
26 * [9] exon length filter
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
27 * [10] mismatch filter
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
28 * [11] bool: use mapped reads for coverage
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
29 * [12] bool: use spliced reads for coverage
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
30 * [13] return maxminlen
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
31 * [14] return pair coverage and pair index list
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
32 * [15] only_clipped
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
33 * [15] switch of pair filter
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
34 *
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
35 * output:
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
36 * 1 coverage
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
37 * [2] intron cell array
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
38 * [3] pair coverage
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
39 * [4] pair list
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
40 *
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
41 * example call:
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
42 * [cov introns] = get_reads('polyA_left_I+_el15_mm1_spliced.bam', 'I', 10000, 12000, '-', 1, 30);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
43 */
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
44 void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
45
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
46 if (nrhs<5 || nrhs>16 || (nlhs<1 || nlhs>4)) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
47 fprintf(stderr, "usage: [x [introns] [pair]] = get_reads(fname, chr, start, end, strand, [collapse], [subsample], [max intron length], [min exonlength], [max mismatches], [mapped], [spliced], [maxminlen], [pair], [only clipped], [all pairs]);\n");
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
48 return;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
49 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
50
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
51 /* obligatory arguments
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
52 * **********************/
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
53 char *fname = get_string(prhs[0]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
54 //fprintf(stdout, "arg1: %s\n", fname);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
55 char *chr = get_string(prhs[1]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
56 //fprintf(stdout, "arg2: %s\n", chr);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
57 int from_pos = get_int(prhs[2]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
58 //fprintf(stdout, "arg3: %d\n", from_pos);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
59 int to_pos = get_int(prhs[3]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
60 //fprintf(stdout, "arg4: %d\n", to_pos);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
61 char *strand = get_string(prhs[4]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
62 //fprintf(stdout, "arg5: %s\n", strand);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
63
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
64 if (from_pos>to_pos)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
65 mexErrMsgTxt("Start (arg 3) must be <= end (arg 4)\n");
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
66
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
67 if (strand[0]!='+' && strand[0]!='-' && strand[0]!='0')
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
68 mexErrMsgTxt("Unknown strand (arg 5): either + or - or 0");
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
69
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
70 /* optional arguments
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
71 * ******************/
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
72 int collapse = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
73 if (nrhs>=6)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
74 collapse = get_int(prhs[5]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
75
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
76 int subsample = 1000;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
77 if (nrhs>=7)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
78 subsample = get_int(prhs[6]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
79
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
80 int intron_len_filter = 1e9;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
81 if (nrhs>=8)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
82 intron_len_filter = get_int(prhs[7]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
83
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
84 int exon_len_filter = -1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
85 if (nrhs>=9)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
86 exon_len_filter = get_int(prhs[8]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
87
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
88 int filter_mismatch = 1e9;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
89 if (nrhs>=10)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
90 filter_mismatch = get_int(prhs[9]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
91
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
92 int mapped = 1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
93 if (nrhs>=11)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
94 mapped = get_int(prhs[10]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
95
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
96 int spliced = 1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
97 if (nrhs>=12)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
98 spliced = get_int(prhs[11]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
99
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
100 int maxminlen = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
101 if (nrhs>=13)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
102 maxminlen = get_int(prhs[12]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
103
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
104 int pair_cov = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
105 if (nrhs>=14)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
106 pair_cov = get_int(prhs[13]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
107
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
108 int only_clipped = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
109 if (nrhs>=15)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
110 only_clipped = get_int(prhs[14]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
111
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
112 int no_pair_filter = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
113 if (nrhs>=16)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
114 no_pair_filter = get_int(prhs[15]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
115
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
116
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
117 /* call function to get reads
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
118 * **************************/
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
119 char region[MAXLINE];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
120 sprintf(region, "%s:%i-%i", chr, from_pos, to_pos);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
121
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
122 vector<CRead*> all_reads;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
123
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
124 get_reads_from_bam(fname, region, &all_reads, strand[0], subsample);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
125
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
126 for (int i=0; i<all_reads.size(); i++) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
127 all_reads[i]->strip_leftright_tag() ;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
128 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
129
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
130 /* filter reads
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
131 * **************/
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
132 int left = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
133 int right = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
134
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
135 vector<CRead*> reads;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
136 for (int i=0; i<all_reads.size(); i++) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
137 if (all_reads[i]->left)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
138 left++;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
139 if (all_reads[i]->right)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
140 right++;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
141 //if (all_reads[i]->max_intron_len()<intron_len_filter && all_reads[i]->min_exon_len()>exon_len_filter && all_reads[i]->get_mismatches()<=filter_mismatch && all_reads[i]->multiple_alignment_index==0)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
142 if (all_reads[i]->max_intron_len()<intron_len_filter && all_reads[i]->min_exon_len()>exon_len_filter && all_reads[i]->get_mismatches()<=filter_mismatch && (only_clipped==0 || all_reads[i]->is_clipped))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
143 reads.push_back(all_reads[i]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
144 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
145
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
146
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
147 /* prepare output
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
148 * **************/
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
149 int num_rows = reads.size();
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
150 int num_pos = to_pos-from_pos+1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
151
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
152 if (pair_cov==1 && nlhs>=3) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
153 // sort reads by read_id
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
154 //printf("\n\nleft:%i right:%i \n\n", left, right);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
155 //printf("\nreads[0]->read_id: %s\n", reads[0]->read_id);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
156 sort(reads.begin(), reads.end(), CRead::compare_by_read_id);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
157 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
158
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
159 // read coverages collapsed
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
160 if (collapse) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
161 plhs[0] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
162 uint32_t *mask_ret = (uint32_t*) mxGetData(plhs[0]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
163 if (num_pos>0 && mask_ret==NULL)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
164 mexErrMsgTxt("Error allocating memory\n");
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
165 if (mapped && spliced) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
166 for (int i=0; i<reads.size(); i++) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
167 reads[i]->get_coverage(from_pos, to_pos, mask_ret);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
168 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
169 } else {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
170 for (int i=0; i<reads.size(); i++) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
171 ssize_t num_exons = reads[i]->block_starts.size();
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
172 if ((num_exons==1 && mapped) || (num_exons>1 && spliced))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
173 reads[i]->get_coverage(from_pos, to_pos, mask_ret);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
174 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
175 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
176 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
177 // reads not collapsed
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
178 else {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
179 uint32_t nzmax = 0; // maximal number of nonzero elements
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
180 int len = to_pos-from_pos+1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
181 for (uint i=0; i<reads.size(); i++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
182 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
183 ssize_t num_exons = reads[i]->block_starts.size();
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
184 if (!((mapped && spliced) || (num_exons==1 && mapped) || (num_exons>1 && spliced)))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
185 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
186 continue;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
187 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
188 for (uint n = 0; n < reads[i]->block_starts.size(); n++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
189 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
190 uint32_t from, to;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
191 if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos >= 0)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
192 from = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
193 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
194 from = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
195 if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n] >= 0)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
196 to = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
197 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
198 to = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
199 for (int bp=from; bp<to&bp<len; bp++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
200 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
201 nzmax++;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
202 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
203 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
204 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
205 // 1st row: row indices
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
206 // 2nd row: column indices
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
207 plhs[0] = mxCreateDoubleMatrix(2, nzmax, mxREAL);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
208 double *mask_ret = (double*) mxGetData(plhs[0]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
209 if (nzmax>0 && mask_ret==NULL)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
210 mexErrMsgTxt("Error allocating memory\n");
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
211 uint32_t mask_ret_c = 0; // counter
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
212 for (uint i=0; i<reads.size(); i++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
213 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
214 ssize_t num_exons = reads[i]->block_starts.size();
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
215 if (!((mapped && spliced) || (num_exons==1 && mapped) || (num_exons>1 && spliced)))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
216 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
217 continue;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
218 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
219 reads[i]->get_reads_sparse(from_pos, to_pos, mask_ret, mask_ret_c, i);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
220 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
221 if (mask_ret_c!=2*nzmax)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
222 mexErrMsgTxt("Error filling index arrays for sparse matrix\n");
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
223 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
224 // introns
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
225 if (maxminlen==0 && nlhs>=2) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
226 vector<int> intron_list;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
227 for (int i=0; i<reads.size(); i++) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
228 reads[i]->get_introns(&intron_list);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
229 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
230
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
231 plhs[1] = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
232 uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
233 for (int p = 0; p<intron_list.size(); p++) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
234 p_intron_list[p] = intron_list[p];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
235 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
236 intron_list.clear();
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
237 } else if (nlhs>=2) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
238 vector<uint32_t> intron_starts;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
239 vector<uint32_t> intron_ends;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
240 vector<uint32_t> block_len1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
241 vector<uint32_t> block_len2;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
242 for (int i=0; i<reads.size(); i++) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
243 reads[i]->get_introns(&intron_starts, &intron_ends, &block_len1, &block_len2);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
244 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
245
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
246 plhs[1] = mxCreateNumericMatrix(4, intron_starts.size(), mxINT32_CLASS, mxREAL);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
247 uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
248 for (int p = 0; p<intron_starts.size(); p++) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
249 p_intron_list[4*p] = intron_starts[p];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
250 p_intron_list[(4*p)+1] = intron_ends[p];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
251 p_intron_list[(4*p)+2] = block_len1[p];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
252 p_intron_list[(4*p)+3] = block_len2[p];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
253 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
254 intron_starts.clear() ;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
255 intron_ends.clear() ;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
256 block_len1.clear() ;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
257 block_len2.clear() ;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
258 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
259 if (pair_cov==1 && nlhs>=3) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
260 plhs[2] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
261 uint32_t *p_pair_map = (uint32_t*) mxGetData(plhs[2]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
262 if (num_pos>0 && p_pair_map==NULL)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
263 mexErrMsgTxt("Error allocating memory\n");
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
264
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
265 vector<int> pair_ids;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
266
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
267 int take_cnt = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
268 int discard_cnt = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
269 int discard_strand_cnt = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
270 //printf("reads.size(): %i\n", reads.size());
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
271 // find consecutive reads with the same id
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
272 for (int i=0; i<((int) reads.size())-1; i++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
273 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
274 //printf("reads[%i]->read_id: %s\n", i, reads[i]->read_id);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
275 int j = i+1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
276 while(j<reads.size() && strcmp(reads[i]->read_id, reads[j]->read_id) == 0)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
277 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
278 if ((reads[i]->left && reads[j]->right) || (reads[j]->left && reads[i]->right) && (reads[i]->reverse != reads[j]->reverse))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
279 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
280 if (reads[i]->get_last_position()==-1 || reads[j]->get_last_position()==-1)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
281 break;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
282 if (false)//(reads[i]->strand[0]=='0' && reads[j]->strand[0]=='0' )
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
283 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
284 // discard pairs without strand information
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
285 discard_strand_cnt++;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
286 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
287 else if (reads[i]->get_last_position()<reads[j]->start_pos && reads[j]->start_pos-reads[i]->get_last_position()<60000)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
288 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
289 int from = std::max(0, reads[i]->get_last_position()-from_pos);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
290 int to = std::min(num_pos-1, reads[j]->start_pos-from_pos);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
291 pair_ids.push_back(i);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
292 pair_ids.push_back(j);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
293 for (int k=from; k<to; k++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
294 p_pair_map[k]++;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
295 take_cnt++;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
296 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
297 else if (reads[i]->start_pos>reads[j]->get_last_position() && reads[j]->get_last_position()-reads[i]->start_pos<60000)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
298 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
299 int from = std::max(0, reads[j]->get_last_position()-from_pos);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
300 int to = std::min(num_pos-1, reads[i]->start_pos-from_pos);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
301 pair_ids.push_back(i);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
302 pair_ids.push_back(j);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
303 for (int k=from; k<to; k++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
304 p_pair_map[k]++;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
305 take_cnt++;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
306 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
307 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
308 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
309 if (no_pair_filter>0 && reads[i]->start_pos<reads[j]->start_pos)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
310 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
311 pair_ids.push_back(i);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
312 pair_ids.push_back(j);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
313 take_cnt++;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
314 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
315 else if (no_pair_filter>0)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
316 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
317 pair_ids.push_back(j);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
318 pair_ids.push_back(i);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
319 take_cnt++;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
320 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
321 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
322 discard_cnt++;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
323 //printf("istart:%i, ilast:%i jstart:%i, jlast: %i\n", reads[i]->start_pos, reads[i]->get_last_position(), reads[j]->start_pos, reads[j]->get_last_position());
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
324 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
325 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
326 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
327 discard_cnt++;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
328 j++;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
329 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
330 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
331 //printf("take:%i, discard:%i discard_strand_cnt:%i\n", take_cnt, discard_cnt+discard_strand_cnt, discard_strand_cnt);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
332
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
333 if (nlhs>=4) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
334 plhs[3] = mxCreateNumericMatrix(2, pair_ids.size()/2, mxUINT32_CLASS, mxREAL);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
335 uint32_t *pair_ids_ret = (uint32_t*) mxGetData(plhs[3]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
336 if (pair_ids.size()>0 && pair_ids_ret==NULL)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
337 mexErrMsgTxt("Error allocating memory\n");
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
338 for (int i=0; i<pair_ids.size(); i++) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
339 pair_ids_ret[i] = pair_ids[i];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
340 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
341 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
342 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
343 for (int i=0; i<all_reads.size(); i++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
344 delete all_reads[i];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
345 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
346