Mercurial > repos > vipints > rdiff
diff rDiff/src/tools/read_utils/get_reads_for_gene.m @ 0:0f80a5141704
version 0.3 uploaded
author | vipints |
---|---|
date | Thu, 14 Feb 2013 23:38:36 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rDiff/src/tools/read_utils/get_reads_for_gene.m Thu Feb 14 23:38:36 2013 -0500 @@ -0,0 +1,47 @@ +function [reads1] = get_reads_for_gene(CFG,gene) + + +% Get the reads from the bam-file +if strcmp(gene.strand,'-') + [mask1, read_intron_list] = get_reads(CFG.curr_bamfile, [CFG.chr_prefix gene.chr],gene.start+1,gene.stop+1, '0'); +else + [mask1, read_intron_list] = get_reads(CFG.curr_bamfile, [CFG.chr_prefix gene.chr],gene.start,gene.stop, '0'); +end + +% Bring the reads into a matrix form +if isempty(mask1) + reads1=zeros(0,gene.stop-gene.start+1); + return +else + reads1=sparse(mask1(1,:)',mask1(2,:)',ones(size(mask1,2),1),max(mask1(1,:)),gene.stop-gene.start+1); +end + +% remove reads which are shorter than CFG.min_read_length +reads1=reads1(sum(reads1,2)>CFG.min_read_length,:); + +if isempty(reads1) + return +end +%remove reads which could stem from other genes +[reads1,FLAG]=remove_reads_from_other_genes(reads1,gene); + +%Subsample reads +if isempty(reads1) + return +end +if CFG.rDiff_subsample>0 + if size(reads1,1)>CFG.rDiff_subsample + RP=randperm(size(reads1,1)); + reads1=reads1(RP(1:CFG.rDiff_subsample),:); + end +end + +%Clip reads +if isempty(reads1) + return +end +if CFG.bases_to_clip>0 + CFG.bases_to_clip=3; + [reads1]=clip_reads(reads1,CFG.bases_to_clip); +end +