diff rDiff/src/tools/read_utils/get_reads_for_gene.m @ 0:0f80a5141704

version 0.3 uploaded
author vipints
date Thu, 14 Feb 2013 23:38:36 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rDiff/src/tools/read_utils/get_reads_for_gene.m	Thu Feb 14 23:38:36 2013 -0500
@@ -0,0 +1,47 @@
+function [reads1] = get_reads_for_gene(CFG,gene)
+  
+
+% Get the reads from the bam-file
+if strcmp(gene.strand,'-')
+    [mask1, read_intron_list] = get_reads(CFG.curr_bamfile, [CFG.chr_prefix gene.chr],gene.start+1,gene.stop+1, '0');
+else 
+    [mask1, read_intron_list] = get_reads(CFG.curr_bamfile, [CFG.chr_prefix gene.chr],gene.start,gene.stop, '0');
+end
+
+% Bring the reads into a matrix form
+if isempty(mask1)
+    reads1=zeros(0,gene.stop-gene.start+1);
+    return
+else
+    reads1=sparse(mask1(1,:)',mask1(2,:)',ones(size(mask1,2),1),max(mask1(1,:)),gene.stop-gene.start+1);
+end
+
+% remove reads which are shorter than CFG.min_read_length
+reads1=reads1(sum(reads1,2)>CFG.min_read_length,:);
+
+if isempty(reads1)
+    return
+end
+%remove reads which could stem from other genes
+[reads1,FLAG]=remove_reads_from_other_genes(reads1,gene); 
+
+%Subsample reads
+if isempty(reads1)
+    return
+end
+if CFG.rDiff_subsample>0
+    if size(reads1,1)>CFG.rDiff_subsample
+        RP=randperm(size(reads1,1));
+        reads1=reads1(RP(1:CFG.rDiff_subsample),:);
+    end
+end
+
+%Clip reads
+if isempty(reads1)
+    return
+end
+if CFG.bases_to_clip>0
+    CFG.bases_to_clip=3;
+    [reads1]=clip_reads(reads1,CFG.bases_to_clip); 
+end
+