0
|
1 function [reads1] = get_reads_for_gene(CFG,gene)
|
|
2
|
|
3
|
|
4 % Get the reads from the bam-file
|
|
5 if strcmp(gene.strand,'-')
|
|
6 [mask1, read_intron_list] = get_reads(CFG.curr_bamfile, [CFG.chr_prefix gene.chr],gene.start+1,gene.stop+1, '0');
|
|
7 else
|
|
8 [mask1, read_intron_list] = get_reads(CFG.curr_bamfile, [CFG.chr_prefix gene.chr],gene.start,gene.stop, '0');
|
|
9 end
|
|
10
|
|
11 % Bring the reads into a matrix form
|
|
12 if isempty(mask1)
|
|
13 reads1=zeros(0,gene.stop-gene.start+1);
|
|
14 return
|
|
15 else
|
|
16 reads1=sparse(mask1(1,:)',mask1(2,:)',ones(size(mask1,2),1),max(mask1(1,:)),gene.stop-gene.start+1);
|
|
17 end
|
|
18
|
|
19 % remove reads which are shorter than CFG.min_read_length
|
|
20 reads1=reads1(sum(reads1,2)>CFG.min_read_length,:);
|
|
21
|
|
22 if isempty(reads1)
|
|
23 return
|
|
24 end
|
|
25 %remove reads which could stem from other genes
|
|
26 [reads1,FLAG]=remove_reads_from_other_genes(reads1,gene);
|
|
27
|
|
28 %Subsample reads
|
|
29 if isempty(reads1)
|
|
30 return
|
|
31 end
|
|
32 if CFG.rDiff_subsample>0
|
|
33 if size(reads1,1)>CFG.rDiff_subsample
|
|
34 RP=randperm(size(reads1,1));
|
|
35 reads1=reads1(RP(1:CFG.rDiff_subsample),:);
|
|
36 end
|
|
37 end
|
|
38
|
|
39 %Clip reads
|
|
40 if isempty(reads1)
|
|
41 return
|
|
42 end
|
|
43 if CFG.bases_to_clip>0
|
|
44 CFG.bases_to_clip=3;
|
|
45 [reads1]=clip_reads(reads1,CFG.bases_to_clip);
|
|
46 end
|
|
47
|