diff mytools/cdf.r @ 0:39217fa39ff2

Uploaded
author xuebing
date Tue, 13 Mar 2012 23:34:52 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mytools/cdf.r	Tue Mar 13 23:34:52 2012 -0400
@@ -0,0 +1,103 @@
+# bin and average
+binaverage = function(x,nbin,rankORvalue){
+# use x[,1] to bin x[,2]
+	binned = list()
+	if (rankORvalue == 'value'){
+		mi = min(x[,1])
+		ma = max(x[,1])
+		bins = seq(mi,ma,length.out=nbin+1)
+		bins[1] = bins[1] - abs(mi)/100
+		bins[nbin+1] = bins[nbin+1] + abs(ma)/100
+		for (i in 1:nbin){
+			binned[[i]] = x[x[,1] >= bins[i] & x[,1] < bins[i+1],2]
+		}
+		bins[1] = bins[1] + abs(mi)/100
+		bins[nbin+1] = bins[nbin+1] - abs(ma)/100
+	} else {
+		x = x[order(x[,1]),]
+		step = round(nrow(x)/nbin)
+		bins = x[1,1]
+		for (i in 1:(nbin-1)){
+			binned[[i]] = x[((i-1)*step+1):(i*step),2]
+			bins = c(bins,x[i*step+1,1])
+		}
+		binned[[nbin]] = x[((nbin-1)*step+1):nrow(x),2]
+		bins[nbin+1] = x[nrow(x),1]
+	}
+# bin lavel
+	labels = character(0)
+	for (i in 1:nbin){
+		labels = c(labels,paste(format(bins[i],digits=2,nsmall=2),format(bins[i+1],digits=2,nsmall=2),sep='~'))	
+	}
+    list(binned=binned,bins=bins,labels=labels)
+}
+#res = binaverage(x,3,'rank')
+
+# CDF plot and KS.test
+mycdf = function(list,title,labels,legendposition,xlabel,legend_title){
+    L = length(list)
+    
+    # barplot for mean and std
+    avg = numeric(L)
+    err = numeric(L)
+    for (i in 1:L){
+        avg[i] = mean(list[[i]])
+        err[i] = sd(list[[i]])
+    }
+    #print(list[[1]])
+    #print(list[[2]])
+    #print(avg)
+    #print(err)
+	par(cex=1.5,mar=c(8,6,6,4))
+    xticks <- barplot(avg,names.arg=labels,las=2,ylab=xlabel,main='mean and standard deviation',xlab=legend_title,ylim=c(0,max(avg+err)))
+    arrows(xticks,avg+err, xticks, avg-err, angle=90, code=3, length=0.0)
+    
+	if (L>1){
+    # ks test
+	cat('\nKS test:\n')
+	cat('sample1\tsample2\tp-value\n')
+	cat('-------------------------------------------------\n')
+    for (i in 1:(L-1)){
+        for (j in (i+1):L){
+        cat(labels[i],'\t',labels[j],'\t')
+        ks = ks.test(list[[i]],list[[j]])
+        pv = max(2.2e-16,ks$p.value)
+	    pv = format(pv,digits=3,nsmall=2)
+		cat(pv,'\n')
+        }
+    }
+	cat('-------------------------------------------------\n')
+	}
+	if (L == 2){
+		title = paste(title,'\np=',pv,sep='')
+	}
+    # cdf plot
+    listx = list()
+    listy = list()
+    mi = 1e10
+    ma = 1e-10
+    for (i in 1:L){
+        mi = min(mi,list[[i]])
+        ma = max(ma,list[[i]])
+    }
+    for (i in 1:L){
+        listx[[i]] = c(mi,listx[i],ma)
+        listy[[i]] = c(0,listy[i],1)
+    }
+    for (i in 1:L){
+        mi = min(mi,list[[i]])
+        ma = max(ma,list[[i]])
+        listx[[i]] = sort(list[[i]])
+        listy[[i]] = c(1:length(list[[i]]))/length(list[[i]])          
+    }
+#par(xlog=(xlog=='xlog'))
+    plot(listx[[1]],listy[[1]],type='l',lty=1,lwd=2,col=2,main=title,xlab=xlabel,ylab='cumulative frequency')
+    for (i in 2:L){
+        lines(listx[[i]],listy[[i]],type='l',lty=i,lwd=2,col=i+1)
+    }
+    # legend
+    for (i in 1:L){
+        labels[i] = paste(labels[i],', n=',length(list[[i]]),sep='')
+    }
+    legend(legendposition,legend=labels,col=2:(L+1), lty=1:L,lwd=2, bty='n',title=legend_title)
+}