annotate mytools/cdf.r @ 7:f0dc65e7f6c0

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:59:07 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
1 # bin and average
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
2 binaverage = function(x,nbin,rankORvalue){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
3 # use x[,1] to bin x[,2]
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
4 binned = list()
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
5 if (rankORvalue == 'value'){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
6 mi = min(x[,1])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
7 ma = max(x[,1])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
8 bins = seq(mi,ma,length.out=nbin+1)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
9 bins[1] = bins[1] - abs(mi)/100
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
10 bins[nbin+1] = bins[nbin+1] + abs(ma)/100
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
11 for (i in 1:nbin){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
12 binned[[i]] = x[x[,1] >= bins[i] & x[,1] < bins[i+1],2]
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
13 }
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
14 bins[1] = bins[1] + abs(mi)/100
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
15 bins[nbin+1] = bins[nbin+1] - abs(ma)/100
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
16 } else {
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
17 x = x[order(x[,1]),]
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
18 step = round(nrow(x)/nbin)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
19 bins = x[1,1]
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
20 for (i in 1:(nbin-1)){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
21 binned[[i]] = x[((i-1)*step+1):(i*step),2]
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
22 bins = c(bins,x[i*step+1,1])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
23 }
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
24 binned[[nbin]] = x[((nbin-1)*step+1):nrow(x),2]
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
25 bins[nbin+1] = x[nrow(x),1]
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
26 }
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
27 # bin lavel
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
28 labels = character(0)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
29 for (i in 1:nbin){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
30 labels = c(labels,paste(format(bins[i],digits=2,nsmall=2),format(bins[i+1],digits=2,nsmall=2),sep='~'))
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
31 }
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
32 list(binned=binned,bins=bins,labels=labels)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
33 }
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
34 #res = binaverage(x,3,'rank')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
35
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
36 # CDF plot and KS.test
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
37 mycdf = function(list,title,labels,legendposition,xlabel,legend_title){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
38 L = length(list)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
39
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
40 # barplot for mean and std
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
41 avg = numeric(L)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
42 err = numeric(L)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
43 for (i in 1:L){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
44 avg[i] = mean(list[[i]])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
45 err[i] = sd(list[[i]])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
46 }
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
47 #print(list[[1]])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
48 #print(list[[2]])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
49 #print(avg)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
50 #print(err)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
51 par(cex=1.5,mar=c(8,6,6,4))
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
52 xticks <- barplot(avg,names.arg=labels,las=2,ylab=xlabel,main='mean and standard deviation',xlab=legend_title,ylim=c(0,max(avg+err)))
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
53 arrows(xticks,avg+err, xticks, avg-err, angle=90, code=3, length=0.0)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
54
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
55 if (L>1){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
56 # ks test
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
57 cat('\nKS test:\n')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
58 cat('sample1\tsample2\tp-value\n')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
59 cat('-------------------------------------------------\n')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
60 for (i in 1:(L-1)){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
61 for (j in (i+1):L){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
62 cat(labels[i],'\t',labels[j],'\t')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
63 ks = ks.test(list[[i]],list[[j]])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
64 pv = max(2.2e-16,ks$p.value)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
65 pv = format(pv,digits=3,nsmall=2)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
66 cat(pv,'\n')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
67 }
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
68 }
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
69 cat('-------------------------------------------------\n')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
70 }
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
71 if (L == 2){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
72 title = paste(title,'\np=',pv,sep='')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
73 }
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
74 # cdf plot
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
75 listx = list()
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
76 listy = list()
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
77 mi = 1e10
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
78 ma = 1e-10
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
79 for (i in 1:L){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
80 mi = min(mi,list[[i]])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
81 ma = max(ma,list[[i]])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
82 }
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
83 for (i in 1:L){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
84 listx[[i]] = c(mi,listx[i],ma)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
85 listy[[i]] = c(0,listy[i],1)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
86 }
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
87 for (i in 1:L){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
88 mi = min(mi,list[[i]])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
89 ma = max(ma,list[[i]])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
90 listx[[i]] = sort(list[[i]])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
91 listy[[i]] = c(1:length(list[[i]]))/length(list[[i]])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
92 }
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
93 #par(xlog=(xlog=='xlog'))
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
94 plot(listx[[1]],listy[[1]],type='l',lty=1,lwd=2,col=2,main=title,xlab=xlabel,ylab='cumulative frequency')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
95 for (i in 2:L){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
96 lines(listx[[i]],listy[[i]],type='l',lty=i,lwd=2,col=i+1)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
97 }
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
98 # legend
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
99 for (i in 1:L){
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
100 labels[i] = paste(labels[i],', n=',length(list[[i]]),sep='')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
101 }
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
102 legend(legendposition,legend=labels,col=2:(L+1), lty=1:L,lwd=2, bty='n',title=legend_title)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
103 }