comparison mytools/cdf.r @ 0:39217fa39ff2

Uploaded
author xuebing
date Tue, 13 Mar 2012 23:34:52 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:39217fa39ff2
1 # bin and average
2 binaverage = function(x,nbin,rankORvalue){
3 # use x[,1] to bin x[,2]
4 binned = list()
5 if (rankORvalue == 'value'){
6 mi = min(x[,1])
7 ma = max(x[,1])
8 bins = seq(mi,ma,length.out=nbin+1)
9 bins[1] = bins[1] - abs(mi)/100
10 bins[nbin+1] = bins[nbin+1] + abs(ma)/100
11 for (i in 1:nbin){
12 binned[[i]] = x[x[,1] >= bins[i] & x[,1] < bins[i+1],2]
13 }
14 bins[1] = bins[1] + abs(mi)/100
15 bins[nbin+1] = bins[nbin+1] - abs(ma)/100
16 } else {
17 x = x[order(x[,1]),]
18 step = round(nrow(x)/nbin)
19 bins = x[1,1]
20 for (i in 1:(nbin-1)){
21 binned[[i]] = x[((i-1)*step+1):(i*step),2]
22 bins = c(bins,x[i*step+1,1])
23 }
24 binned[[nbin]] = x[((nbin-1)*step+1):nrow(x),2]
25 bins[nbin+1] = x[nrow(x),1]
26 }
27 # bin lavel
28 labels = character(0)
29 for (i in 1:nbin){
30 labels = c(labels,paste(format(bins[i],digits=2,nsmall=2),format(bins[i+1],digits=2,nsmall=2),sep='~'))
31 }
32 list(binned=binned,bins=bins,labels=labels)
33 }
34 #res = binaverage(x,3,'rank')
35
36 # CDF plot and KS.test
37 mycdf = function(list,title,labels,legendposition,xlabel,legend_title){
38 L = length(list)
39
40 # barplot for mean and std
41 avg = numeric(L)
42 err = numeric(L)
43 for (i in 1:L){
44 avg[i] = mean(list[[i]])
45 err[i] = sd(list[[i]])
46 }
47 #print(list[[1]])
48 #print(list[[2]])
49 #print(avg)
50 #print(err)
51 par(cex=1.5,mar=c(8,6,6,4))
52 xticks <- barplot(avg,names.arg=labels,las=2,ylab=xlabel,main='mean and standard deviation',xlab=legend_title,ylim=c(0,max(avg+err)))
53 arrows(xticks,avg+err, xticks, avg-err, angle=90, code=3, length=0.0)
54
55 if (L>1){
56 # ks test
57 cat('\nKS test:\n')
58 cat('sample1\tsample2\tp-value\n')
59 cat('-------------------------------------------------\n')
60 for (i in 1:(L-1)){
61 for (j in (i+1):L){
62 cat(labels[i],'\t',labels[j],'\t')
63 ks = ks.test(list[[i]],list[[j]])
64 pv = max(2.2e-16,ks$p.value)
65 pv = format(pv,digits=3,nsmall=2)
66 cat(pv,'\n')
67 }
68 }
69 cat('-------------------------------------------------\n')
70 }
71 if (L == 2){
72 title = paste(title,'\np=',pv,sep='')
73 }
74 # cdf plot
75 listx = list()
76 listy = list()
77 mi = 1e10
78 ma = 1e-10
79 for (i in 1:L){
80 mi = min(mi,list[[i]])
81 ma = max(ma,list[[i]])
82 }
83 for (i in 1:L){
84 listx[[i]] = c(mi,listx[i],ma)
85 listy[[i]] = c(0,listy[i],1)
86 }
87 for (i in 1:L){
88 mi = min(mi,list[[i]])
89 ma = max(ma,list[[i]])
90 listx[[i]] = sort(list[[i]])
91 listy[[i]] = c(1:length(list[[i]]))/length(list[[i]])
92 }
93 #par(xlog=(xlog=='xlog'))
94 plot(listx[[1]],listy[[1]],type='l',lty=1,lwd=2,col=2,main=title,xlab=xlabel,ylab='cumulative frequency')
95 for (i in 2:L){
96 lines(listx[[i]],listy[[i]],type='l',lty=i,lwd=2,col=i+1)
97 }
98 # legend
99 for (i in 1:L){
100 labels[i] = paste(labels[i],', n=',length(list[[i]]),sep='')
101 }
102 legend(legendposition,legend=labels,col=2:(L+1), lty=1:L,lwd=2, bty='n',title=legend_title)
103 }