comparison rDiff/src/tools/sanitize_genes.m @ 0:0f80a5141704

version 0.3 uploaded
author vipints
date Thu, 14 Feb 2013 23:38:36 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0f80a5141704
1 function [genes]=sanitize_genes(genes,CFG)
2 %This function removes trnascript and genes which have a invalid
3 %structure and recomputes the splicegraph
4
5
6 %Mark genes with eronous exon definitions
7 RM_GENES_IDX=[]; %genes to keep
8 for i=1:size(genes,2)
9 %remove transcripts which have a length smaller than
10 %readlength
11 RM_TR_IDX=[];
12 START_MIN=inf;
13 STOP_MAX=0;
14 for j=1:size(genes(i).transcripts,2)
15 if sum(genes(i).exons{j}(:,2)-genes(i).exons{j}(:,1))< CFG.sequenced_length
16 RM_TR_IDX=[RM_TR_IDX,j];
17 else
18 START_MIN=min(START_MIN,genes(i).exons{j}(1,1));
19 STOP_MAX=max(STOP_MAX,genes(i).exons{j}(end,2));
20 end
21 end
22 if ~isempty(RM_TR_IDX)
23 genes(i).exons(RM_TR_IDX)=[];
24 genes(i).transcripts(RM_TR_IDX)=[];
25 genes(i).start=START_MIN;
26 genes(i).stop=STOP_MAX;
27 end
28 if genes(i).start>START_MIN
29 genes(i).start=START_MIN;
30 end
31 if genes(i).stop<STOP_MAX
32 genes(i).stop=STOP_MAX;
33 end
34
35
36 %Check if exons are eronous
37 CHECK=1;
38
39 if size(genes(i).transcripts,2)==0
40 CHECK=0;
41 end
42
43 for j=1:size(genes(i).transcripts,2)
44 for k=1:(size(genes(i).exons{j},1)-1)
45 if (genes(i).exons{j}(k,2)> genes(i).exons{j}(k+1,1))
46 CHECK=0;
47 break;
48 end
49 end
50
51 if isempty(genes(i).exons{j})
52 CHECK=0;
53 break;
54 end
55 if CHECK==0
56 break
57 end
58 end
59
60 if genes(i).stop-genes(i).start<=CFG.sequenced_length
61 CHECK=0;
62 end
63 if CHECK==0
64 RM_GENES_IDX=[RM_GENES_IDX;i];
65 genes(i).do_not_quant=1;
66 else
67 genes(i).do_not_quant=0;
68 end
69 end
70 %genes(RM_GENES_IDX)=[];
71
72 % Create splicegraph
73 for i=1:size(genes,2)
74 gene=genes(i);
75 ALL_EXO=[];
76
77 for j=1:size(gene.exons,2)
78 ALL_EXO=[ALL_EXO;gene.exons{j}];
79 end
80 ALL_EXO=unique(ALL_EXO,'rows');
81 GRAPH=zeros(size(ALL_EXO,1),size(ALL_EXO,1));
82 for j=1:size(gene.exons,2)
83 for k=1:(size(gene.exons{j},1)-1)
84 [A,B]=intersect(ALL_EXO,gene.exons{j}(k,:),'rows');
85 [A,C]=intersect(ALL_EXO,gene.exons{j}(k+1,:),'rows');
86 GRAPH(B,C)=1;
87 end
88 end
89 GRAPH=GRAPH+GRAPH';
90 genes(i).splicegraph{1}=ALL_EXO';
91 genes(i).splicegraph{2}=GRAPH;
92 end
93
94
95