Mercurial > repos > jeremyjliu > region_motif_enrichment
comparison region_motif_lib/regions.r @ 0:5c044273554d draft
initial commit
author | jeremyjliu |
---|---|
date | Tue, 05 Aug 2014 13:56:22 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5c044273554d |
---|---|
1 | |
2 # SHOULD ONLY OCCUR IN ONE FILE | |
3 #common.dir = "/Users/jeremyliu1/galaxy-dist/tools/my_tools" | |
4 | |
5 # commonDir from region_motif_intersect.r sourcing file | |
6 dyn.load(paste(commonDir, "/region_motif_lib/regions.so",sep="")) | |
7 | |
8 ##reg = matrix(cbind(from,to)) from<to | |
9 ##region[[chr]] = reg | |
10 ##pos = unique(integer()) | |
11 ##poslist = list(chr,pos, optional(strand=c(-1,0,+1))) | |
12 | |
13 # USED | |
14 merge.reg <- function(...,sep=1) { | |
15 ##This function returns union of regs. | |
16 reg = rbind(...) | |
17 x=.C("merge_regions",as.integer(t(reg)),as.integer(nrow(reg)),as.integer(sep)) | |
18 reg=matrix(x[[1]][1:(x[[2]]*2)],ncol=2,byrow=TRUE) | |
19 reg = matrix(reg[which(reg[,2]>reg[,1]),],ncol=2) | |
20 reg[which(reg==0)]=1 | |
21 return(reg) | |
22 } | |
23 | |
24 merge.regions<-function(...,sep=1) { | |
25 ##This function returns union of regions. | |
26 regions=list(...) | |
27 chrs = unique(unlist(lapply(regions,names),use.names=F)) | |
28 region = list() | |
29 for(chr in chrs) { | |
30 region[[chr]] = do.call("merge.reg",c(lapply(regions,function(i) i[[chr]]),sep=sep)) | |
31 } | |
32 return(region) | |
33 } | |
34 | |
35 plot.reg<-function(reg,xlim=NULL,y=NULL,vertical=FALSE,...) { | |
36 ##This function does not stack if reg is overlapping. | |
37 ##new plot is made unless y is specified. | |
38 if(nrow(reg)==0) return() | |
39 if(is.null(xlim)) xlim=range(reg) | |
40 if(is.null(y)) { | |
41 plot(xlim,c(0,1),type="n",axes=FALSE,xlab=" ",ylab=" ") | |
42 y=0.5 | |
43 } | |
44 segments(reg[,1],y,reg[,2],...) | |
45 if(vertical) abline(v=reg) | |
46 } | |
47 | |
48 distance.to.closest.reg.of.reg <- function(reg,reg2) { | |
49 ##for each element of reg, what is the closest distance to any element of reg2? | |
50 reg2 = merge.reg(reg2) | |
51 reg2 = c(-Inf,t(reg2),Inf) | |
52 s=reg[,1] | |
53 e=reg[,2] | |
54 sbin = as.integer(cut(s,reg2)) | |
55 ebin = as.integer(cut(e,reg2)) | |
56 d = pmin(s-reg2[sbin], reg2[sbin+1]-s, e-reg2[ebin], reg2[ebin+1]-e) | |
57 d[which(sbin!=ebin | sbin%%2==0)] = 0 | |
58 return(d) | |
59 } | |
60 | |
61 # USED | |
62 distance.to.closest.reg.of.pos <- function(pos,reg) { | |
63 ##for each element of pos, what is the closest distance to any element of reg? | |
64 reg = merge.reg(reg) | |
65 reg = c(-Inf,t(reg),Inf) | |
66 pbin = as.integer(cut(pos,reg)) | |
67 d = pmin(pos-reg[pbin], reg[pbin+1]-pos) | |
68 d[which(pbin%%2==0)] = 0 | |
69 return(d) | |
70 } | |
71 | |
72 distance.to.closest.pos.of.reg <- function(reg,pos,pos.strand=NULL,index.return=FALSE) { | |
73 ##for each element of reg, what is the closest distance to any element of pos? | |
74 ##if strand is provided, distance is along strand | |
75 o = order(pos) | |
76 pos = c(-Inf,pos[o],Inf) | |
77 o = c(o[1],o,o[length(o)]) | |
78 | |
79 s=reg[,1] | |
80 e=reg[,2] | |
81 sbin = as.integer(cut(s,pos)) | |
82 ebin = as.integer(cut(e,pos)) | |
83 | |
84 d=integer(nrow(reg)) | |
85 s.is.closer = s-pos[sbin] < pos[sbin+1]-e | |
86 if(index.return) { | |
87 return(ifelse(s.is.closer,o[sbin],o[sbin+1])) | |
88 } | |
89 d = ifelse(s.is.closer, s-pos[sbin], e-pos[sbin+1]) | |
90 d[which(sbin!=ebin)] = 0 | |
91 if(!is.null(pos.strand)) { | |
92 reg.strand = ifelse(s.is.closer,pos.strand[o][sbin],pos.strand[o][sbin+1]) | |
93 d = d * reg.strand | |
94 } | |
95 return(d) | |
96 } | |
97 | |
98 if(F) { | |
99 pos = sample(seq(0,1000,200)) | |
100 pos2 = sample(seq(10,1010,100)) | |
101 pos.strand = sample(c(1,-1),6,replace = T) | |
102 pos2.strand = sample(c(1,-1),11,replace = T) | |
103 } | |
104 | |
105 distance.to.closest.pos.of.pos <- function(pos,pos2,pos.strand=NULL,pos2.strand=NULL, ignore.pos.strand=TRUE,index.return=FALSE) { | |
106 ##for each element of pos, what is the closest distance to any element of pos2? | |
107 ##if index.return==TRUE, index of pos2 closest to pos is returned | |
108 ##else if strand2 is provided, distance is along strand2 | |
109 ##if strand and strand2 are both provided and !ignore.pos.strand | |
110 ## then output is a list giving plus.up, plus.down, minus.up, minus.down | |
111 ## plus.up: distance to closest upstream on the same same strand etc. etc. | |
112 o = order(pos2) | |
113 pos2 = c(-Inf,pos2[o],Inf) | |
114 if(!is.null(pos2.strand)) pos2.strand = c(-Inf,pos2.strand[o],Inf) | |
115 | |
116 if(is.null(pos2.strand) | is.null(pos.strand) | ignore.pos.strand) { | |
117 pbin = as.integer(cut(pos,pos2)) | |
118 | |
119 pbin = ifelse(pos-pos2[pbin] < pos2[pbin+1]-pos,pbin,pbin+1) | |
120 d = pos-pos2[pbin] | |
121 if(!is.null(pos2.strand)) d = d * pos2.strand[pbin] | |
122 | |
123 if(index.return) return(o[pbin-1]) | |
124 return(d) | |
125 } | |
126 strands = list(plus=1,minus=-1) | |
127 relcoords = list(up=0,down=1) | |
128 ind = lapply(strands,function(strand) { | |
129 ind.p = c(1,which(pos2.strand==strand),length(pos2)) | |
130 pbin.p = cut(pos,pos2[ind.p],labels=FALSE) | |
131 as.data.frame(lapply(relcoords,function(i) ind.p[pbin.p+i])) | |
132 }) | |
133 ind.temp = ind | |
134 ind.minus = which(pos.strand==-1) | |
135 if(length(ind.minus)>0) { | |
136 ind[[1]][ind.minus,]=ind.temp[[2]][ind.minus,2:1] | |
137 ind[[2]][ind.minus,]=ind.temp[[1]][ind.minus,2:1] | |
138 } | |
139 ind = unlist(ind,recursive=FALSE) | |
140 if(index.return) { | |
141 return( lapply(ind,function(i) { | |
142 i[which(i==1)]=NA | |
143 i[which(i==length(pos2))]=NA | |
144 o[i-1] | |
145 }) ) | |
146 } | |
147 return(lapply(ind,function(i) pos.strand*(pos2[i]-pos))) | |
148 } | |
149 | |
150 distance.to.closest.region.of.region <- function(region,region2) { | |
151 ##for each element of region[[chr]], what is the closest distance to any element of region2[[chr]]? | |
152 ##returns d[[chr]] | |
153 chrs = names(region) | |
154 d=list() | |
155 for(chr in chrs) { | |
156 if(is.null(region2[[chr]])) { | |
157 d[[chr]] = rep(Inf,nrow(region[[chr]])) | |
158 } else { | |
159 d[[chr]] = distance.to.closest.reg.of.reg(region[[chr]],region2[[chr]]) | |
160 } | |
161 } | |
162 return(d) | |
163 } | |
164 | |
165 # USED | |
166 distance.to.closest.region.of.poslist <- function(poslist,region) { | |
167 ##for each element of poslist, what is the closest distance to any element of region? | |
168 chrs = names(table(poslist$chr)) | |
169 d=integer() | |
170 for(chr in chrs) { | |
171 ind = which(poslist$chr==chr) | |
172 pos=poslist$pos[ind] | |
173 if(is.null(region[[chr]])) { | |
174 d[ind] = Inf | |
175 } else { | |
176 d[ind] = distance.to.closest.reg.of.pos(pos,region[[chr]]) | |
177 } | |
178 } | |
179 return(d) | |
180 } | |
181 distance.to.closest.poslist.of.region <- function(region,poslist,index.return=FALSE) { | |
182 ##for each element of region, what is the closest distance to any element of poslist? | |
183 chrs = names(region) | |
184 d=list() | |
185 for(chr in chrs) { | |
186 ind = which(poslist$chr==chr) | |
187 pos=poslist$pos[ind] | |
188 pos.strand=poslist$strand[ind] | |
189 d[[chr]] = distance.to.closest.pos.of.reg(region[[chr]],pos,pos.strand,index.return=index.return) | |
190 if(index.return) d[[chr]] = ind[d[[chr]]] | |
191 } | |
192 return(d) | |
193 } | |
194 | |
195 distance.to.closest.poslist.of.poslist <- function(poslist,poslist2,ignore.poslist.strand=TRUE,index.return=FALSE) { | |
196 ##for each element of poslist, what is the closest distance to any element of poslist2? | |
197 ##if poslist2$strand is provided, distance is along strand2 | |
198 ##if strand and strand2 are provided and no ignore.poslist.strand | |
199 ## then output is a list giving plus.up, plus.down, minus.up, minus.down | |
200 ## plus.up: distance to closest upstream on the same same strand etc. etc. | |
201 ##if index.return==TRUE, index of pos2 closest to pos is returned | |
202 | |
203 chrs = names(table(poslist$chr)) | |
204 | |
205 d=integer() | |
206 stranded = !(is.null(poslist2$strand) | is.null(poslist$strand) | ignore.poslist.strand) | |
207 if(stranded) { | |
208 brs = c("plus.up","plus.down","minus.up","minus.down") | |
209 d=list() | |
210 for(br in brs) d[[br]]=integer() | |
211 } | |
212 | |
213 for(chr in chrs) { | |
214 ind = which(poslist$chr==chr) | |
215 ind2 = which(poslist2$chr==chr) | |
216 pos=poslist$pos[ind] | |
217 pos2=poslist2$pos[ind2] | |
218 pos.strand=poslist$strand[ind] | |
219 pos2.strand=poslist2$strand[ind2] | |
220 if(!stranded) { | |
221 d[ind] = distance.to.closest.pos.of.pos(pos,pos2,pos.strand,pos2.strand,ignore.poslist.strand,index.return=index.return) | |
222 if(index.return) d[ind] = ind2[d[ind]] | |
223 } else { | |
224 x = distance.to.closest.pos.of.pos(pos,pos2,pos.strand,pos2.strand,ignore.poslist.strand) | |
225 for(br in brs) { | |
226 d[[br]][ind] = x[[br]] | |
227 if(index.return) d[[br]][ind] = ind2[d[[br]][ind]] | |
228 } | |
229 } | |
230 } | |
231 return(d) | |
232 } | |
233 | |
234 | |
235 reg.minus.reg <- function(reg,reg2) { | |
236 x = .C("region_minus_region",as.integer(t(reg)),as.integer(nrow(reg)),as.integer(t(reg2)),as.integer(nrow(reg2)),integer((nrow(reg)+nrow(reg2))*2))[[5]] | |
237 x=x[which(x>=0)] | |
238 return(matrix(x,ncol=2,byrow=TRUE)) | |
239 } | |
240 | |
241 intersection.of.regs <- function(reg,reg2) { | |
242 x = .C("intersection_of_regions",as.integer(t(reg)),as.integer(nrow(reg)),as.integer(t(reg2)),as.integer(nrow(reg2)),integer((nrow(reg)+nrow(reg2))*2))[[5]] | |
243 x=x[which(x>=0)] | |
244 return(matrix(x,ncol=2,byrow=TRUE)) | |
245 } | |
246 | |
247 region.minus.region<-function(region,region2) { | |
248 chrs = names(region) | |
249 for(chr in chrs) { | |
250 if(is.null(region[[chr]])) next | |
251 if(!is.null(region2[[chr]])) { | |
252 region[[chr]] = reg.minus.reg(region[[chr]],region2[[chr]]) | |
253 } | |
254 } | |
255 return(region) | |
256 } | |
257 | |
258 intersection.of.regions<-function(region,region2) { | |
259 chrs = names(region) | |
260 for(chr in chrs) { | |
261 if(is.null(region2[[chr]])) { | |
262 region[[chr]]<-NULL | |
263 } else { | |
264 region[[chr]] = intersection.of.regs(region[[chr]],region2[[chr]]) | |
265 } | |
266 } | |
267 return(region) | |
268 } | |
269 | |
270 reg.around.pos <-function(pos,range=500,strand=NULL) { | |
271 if(length(range)==1) range=c(range,range) | |
272 if(is.null(strand)) strand = 1; | |
273 reg = cbind(pos-range[1]*strand,pos+range[2]*strand); | |
274 ind = which(reg[,2]<reg[,1]) | |
275 reg[ind,] = reg[ind,2:1] | |
276 ind = which(reg<=0) | |
277 reg[ind] = 1 | |
278 return(reg) | |
279 } | |
280 | |
281 | |
282 region.around.poslist <-function(poslist,range=500) { | |
283 chrs = names(table(poslist$chr)) | |
284 region=list() | |
285 for(chr in chrs) { | |
286 ind = which(poslist$chr==chr) | |
287 pos=poslist$pos[ind] | |
288 strand = 1 | |
289 if(!is.null(poslist$strand)) { | |
290 strand = poslist$strand[ind] | |
291 } | |
292 region[[chr]] = reg.around.pos(pos,range,strand) | |
293 } | |
294 return(region) | |
295 } | |
296 | |
297 | |
298 poslist.of.region.centers <-function(region) { | |
299 chrs = names(region) | |
300 n=sapply(region,nrow) | |
301 return(data.frame(chr=rep(chrs,n),pos=unlist(lapply(region,function(chr)(chr[,1]+chr[,2])/2),use.names = FALSE))) | |
302 } | |
303 | |
304 write.gff.region<-function(region,outfname) { | |
305 region = lapply(region,function(chr) list(s=chr[,1],e=chr[,2])) | |
306 out=unlist.chr(region) | |
307 out$chr=rep(names(region),sapply(region,function(i) length(i$s))) | |
308 empty=rep(".",length(out$chr)) | |
309 write.table(data.frame(out$chr,empty,empty,out$s,out$e,empty,empty,empty,empty),quote=FALSE,sep="\t",file=outfname,col.names=FALSE,row.names=FALSE) | |
310 } | |
311 | |
312 number.of.regions = function(region)sum(sapply(region,nrow)) | |
313 size.of.regions = function(region) sum(sapply(merge.regions(region),function(reg) sum(reg[,2]-reg[,1]))) |