annotate rDiff/src/tools/detect_overlapping_regions.m @ 2:233c30f91d66

updated python based GFF parsing module which will handle GTF/GFF/GFF3 file types
author vipints <vipin@cbio.mskcc.org>
date Tue, 08 Oct 2013 07:15:44 -0400
parents 0f80a5141704
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
1 function [new_genes]=detect_overlapping_regions(genes);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
2 % this function determines regions in a gene which overlapp with
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
3 % other genes. Those regons are then saved in the field "non_unique_regions"
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
4
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
5 CHROMOSOMES={};
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
6 COUNTER=1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
7 for i=1:size(genes,2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
8 CHROMOSOMES{COUNTER}=genes(i).chr;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
9 COUNTER=COUNTER+1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
10 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
11 CHROMOSOMES=unique(CHROMOSOMES);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
12
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
13
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
14 INFO=zeros(size(genes,2),4);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
15 for i=1:size(genes,2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
16 CHR_VAL=0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
17 for chr= 1:length(CHROMOSOMES)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
18 if strcmp(genes(i).chr,CHROMOSOMES(chr))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
19 CHR_VAL=chr;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
20 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
21 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
22 INFO(i,:)=[i,genes(i).start,genes(i).stop, CHR_VAL];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
23 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
24
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
25 COUNTER=1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
26 new_genes=genes;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
27 for chr= 1:length(CHROMOSOMES)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
28 GENES_ON_CHR=INFO(INFO(:,4)==chr,:);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
29 [TEMP,POS]=sort(GENES_ON_CHR(:,2));
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
30 GENES_ON_CHR=GENES_ON_CHR(POS,:);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
31 STARTS=GENES_ON_CHR(:,2);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
32 STOPS=GENES_ON_CHR(:,3);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
33 for i=1:(size(GENES_ON_CHR,1))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
34 MIN_START=find(STOPS>=STARTS(i),1,'first');
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
35 MAX_STOP=find(STARTS<=STOPS(i),1,'last');
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
36 if MIN_START==i
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
37 MIN_START=[];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
38 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
39 if MAX_STOP==i
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
40 MAX_STOP=[];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
41 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
42 EXONS=[];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
43 if not (isempty(MIN_START))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
44 for CURR=MIN_START:(i-1)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
45 if(not(isempty(genes(GENES_ON_CHR(CURR,1)).transcripts)))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
46 for tra=1:size(genes(GENES_ON_CHR(CURR,1)).transcripts,2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
47 if(not(isempty(genes(GENES_ON_CHR(CURR,1)).exons)))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
48 EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).exons{tra}];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
49 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
50 EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
51 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
52 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
53 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
54 EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
55 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
56 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
57 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
58 if not (isempty(MAX_STOP))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
59 for CURR=(i+1):MAX_STOP
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
60 if(not(isempty(genes(GENES_ON_CHR(CURR,1)).transcripts)))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
61 for tra=1:size(genes(GENES_ON_CHR(CURR,1)).transcripts,2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
62 if(not(isempty(genes(GENES_ON_CHR(CURR,1)).exons)))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
63 EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).exons{tra}];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
64 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
65 EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
66 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
67 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
68 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
69 EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
70 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
71
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
72 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
73 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
74 if not (isempty([MAX_STOP,MIN_START]))
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
75 EXONS=EXONS(EXONS(:,2)>=STARTS(i),:);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
76 EXONS=EXONS(EXONS(:,1)<=STOPS(i),:);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
77 new_genes(GENES_ON_CHR(i,1)).non_unique_regions=EXONS;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
78 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
79 new_genes(GENES_ON_CHR(i,1)).non_unique_regions=[];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
80 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
81 end
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
82 COUNTER=COUNTER+1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
83 end