Mercurial > repos > xuebing > sharplabtool
view cdf.r @ 11:b7f1d9f8f3bc
Uploaded
author | xuebing |
---|---|
date | Sat, 10 Mar 2012 07:59:27 -0500 |
parents | |
children |
line wrap: on
line source
# bin and average binaverage = function(x,nbin,rankORvalue){ # use x[,1] to bin x[,2] binned = list() if (rankORvalue == 'value'){ mi = min(x[,1]) ma = max(x[,1]) bins = seq(mi,ma,length.out=nbin+1) bins[1] = bins[1] - abs(mi)/100 bins[nbin+1] = bins[nbin+1] + abs(ma)/100 for (i in 1:nbin){ binned[[i]] = x[x[,1] >= bins[i] & x[,1] < bins[i+1],2] } bins[1] = bins[1] + abs(mi)/100 bins[nbin+1] = bins[nbin+1] - abs(ma)/100 } else { x = x[order(x[,1]),] step = round(nrow(x)/nbin) bins = x[1,1] for (i in 1:(nbin-1)){ binned[[i]] = x[((i-1)*step+1):(i*step),2] bins = c(bins,x[i*step+1,1]) } binned[[nbin]] = x[((nbin-1)*step+1):nrow(x),2] bins[nbin+1] = x[nrow(x),1] } # bin lavel labels = character(0) for (i in 1:nbin){ labels = c(labels,paste(format(bins[i],digits=2,nsmall=2),format(bins[i+1],digits=2,nsmall=2),sep='~')) } list(binned=binned,bins=bins,labels=labels) } #res = binaverage(x,3,'rank') # CDF plot and KS.test mycdf = function(list,title,labels,legendposition,xlabel,legend_title){ L = length(list) # barplot for mean and std avg = numeric(L) err = numeric(L) for (i in 1:L){ avg[i] = mean(list[[i]]) err[i] = sd(list[[i]]) } #print(list[[1]]) #print(list[[2]]) #print(avg) #print(err) par(cex=1.5,mar=c(8,6,6,4)) xticks <- barplot(avg,names.arg=labels,las=2,ylab=xlabel,main='mean and standard deviation',xlab=legend_title,ylim=c(0,max(avg+err))) arrows(xticks,avg+err, xticks, avg-err, angle=90, code=3, length=0.0) if (L>1){ # ks test cat('\nKS test:\n') cat('sample1\tsample2\tp-value\n') cat('-------------------------------------------------\n') for (i in 1:(L-1)){ for (j in (i+1):L){ cat(labels[i],'\t',labels[j],'\t') ks = ks.test(list[[i]],list[[j]]) pv = max(2.2e-16,ks$p.value) pv = format(pv,digits=3,nsmall=2) cat(pv,'\n') } } cat('-------------------------------------------------\n') } if (L == 2){ title = paste(title,'\np=',pv,sep='') } # cdf plot listx = list() listy = list() mi = 1e10 ma = 1e-10 for (i in 1:L){ mi = min(mi,list[[i]]) ma = max(ma,list[[i]]) } for (i in 1:L){ listx[[i]] = c(mi,listx[i],ma) listy[[i]] = c(0,listy[i],1) } for (i in 1:L){ mi = min(mi,list[[i]]) ma = max(ma,list[[i]]) listx[[i]] = sort(list[[i]]) listy[[i]] = c(1:length(list[[i]]))/length(list[[i]]) } #par(xlog=(xlog=='xlog')) plot(listx[[1]],listy[[1]],type='l',lty=1,lwd=2,col=2,main=title,xlab=xlabel,ylab='cumulative frequency') for (i in 2:L){ lines(listx[[i]],listy[[i]],type='l',lty=i,lwd=2,col=i+1) } # legend for (i in 1:L){ labels[i] = paste(labels[i],', n=',length(list[[i]]),sep='') } legend(legendposition,legend=labels,col=2:(L+1), lty=1:L,lwd=2, bty='n',title=legend_title) }