Mercurial > repos > xuebing > sharplabtool
comparison tools/stats/plot_from_lda.xml @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 <tool id="plot_for_lda_output1" name="Draw ROC plot" version="1.0.1"> | |
2 <description>on "Perform LDA" output</description> | |
3 <command interpreter="sh">r_wrapper.sh $script_file</command> | |
4 | |
5 <inputs> | |
6 <param format="txt" name="input" type="data" label="Source file"> </param> | |
7 <param name="my_title" size="30" type="text" value="My Figure" label="Title of your plot" help="See syntax below"> </param> | |
8 <param name="X_axis" size="30" type="text" value="Text for X axis" label="Legend of X axis in your plot" help="See syntax below"> </param> | |
9 <param name="Y_axis" size="30" type="text" value="Text for Y axis" label="Legend of Y axis in your plot" help="See syntax below"> </param> | |
10 </inputs> | |
11 <outputs> | |
12 <data format="pdf" name="pdf_output" /> | |
13 </outputs> | |
14 | |
15 <tests> | |
16 <test> | |
17 <param name="input" value="lda_analy_output.txt"/> | |
18 <param name="my_title" value="Test Plot1"/> | |
19 <param name="X_axis" value="Test Plot2"/> | |
20 <param name="Y_axis" value="Test Plot3"/> | |
21 <output name="pdf_output" file="plot_for_lda_output.pdf"/> | |
22 </test> | |
23 </tests> | |
24 | |
25 <configfiles> | |
26 <configfile name="script_file"> | |
27 | |
28 rm(list = objects() ) | |
29 | |
30 ############# FORMAT X DATA ######################### | |
31 format<-function(data) { | |
32 ind=NULL | |
33 for(i in 1 : ncol(data)){ | |
34 if (is.na(data[nrow(data),i])) { | |
35 ind<-c(ind,i) | |
36 } | |
37 } | |
38 #print(is.null(ind)) | |
39 if (!is.null(ind)) { | |
40 data<-data[,-c(ind)] | |
41 } | |
42 | |
43 data | |
44 } | |
45 | |
46 ########GET RESPONSES ############################### | |
47 get_resp<- function(data) { | |
48 resp1<-as.vector(data[,ncol(data)]) | |
49 resp=numeric(length(resp1)) | |
50 for (i in 1:length(resp1)) { | |
51 if (resp1[i]=="Control ") { | |
52 resp[i] = 0 | |
53 } | |
54 if (resp1[i]=="XLMR ") { | |
55 resp[i] = 1 | |
56 } | |
57 } | |
58 return(resp) | |
59 } | |
60 | |
61 ######## CHARS TO NUMBERS ########################### | |
62 f_to_numbers<- function(F) { | |
63 ind<-NULL | |
64 G<-matrix(0,nrow(F), ncol(F)) | |
65 for (i in 1:nrow(F)) { | |
66 for (j in 1:ncol(F)) { | |
67 G[i,j]<-as.integer(F[i,j]) | |
68 } | |
69 } | |
70 return(G) | |
71 } | |
72 | |
73 ###################NORMALIZING######################### | |
74 norm <- function(M, a=NULL, b=NULL) { | |
75 C<-NULL | |
76 ind<-NULL | |
77 | |
78 for (i in 1: ncol(M)) { | |
79 if (sd(M[,i])!=0) { | |
80 M[,i]<-(M[,i]-mean(M[,i]))/sd(M[,i]) | |
81 } | |
82 # else {print(mean(M[,i]))} | |
83 } | |
84 return(M) | |
85 } | |
86 | |
87 ##### LDA DIRECTIONS ################################# | |
88 lda_dec <- function(data, k){ | |
89 priors=numeric(k) | |
90 grandmean<-numeric(ncol(data)-1) | |
91 means=matrix(0,k,ncol(data)-1) | |
92 B = matrix(0, ncol(data)-1, ncol(data)-1) | |
93 N=nrow(data) | |
94 for (i in 1:k){ | |
95 priors[i]=sum(data[,1]==i)/N | |
96 grp=subset(data,data\$group==i) | |
97 means[i,]=mean(grp[,2:ncol(data)]) | |
98 #print(means[i,]) | |
99 #print(priors[i]) | |
100 #print(priors[i]*means[i,]) | |
101 grandmean = priors[i]*means[i,] + grandmean | |
102 } | |
103 | |
104 for (i in 1:k) { | |
105 B= B + priors[i]*((means[i,]-grandmean)%*%t(means[i,]-grandmean)) | |
106 } | |
107 | |
108 W = var(data[,2:ncol(data)]) | |
109 svdW = svd(W) | |
110 inv_sqrtW =solve(svdW\$v %*% diag(sqrt(svdW\$d)) %*% t(svdW\$v)) | |
111 B_star= t(inv_sqrtW)%*%B%*%inv_sqrtW | |
112 B_star_decomp = svd(B_star) | |
113 directions = inv_sqrtW%*%B_star_decomp\$v | |
114 return( list(directions, B_star_decomp\$d) ) | |
115 } | |
116 | |
117 ################ NAIVE BAYES FOR 1D SIR OR LDA ############## | |
118 naive_bayes_classifier <- function(resp, tr_data, test_data, k=2, tau) { | |
119 tr_data=data.frame(resp=resp, dir=tr_data) | |
120 means=numeric(k) | |
121 #print(k) | |
122 cl=numeric(k) | |
123 predclass=numeric(length(test_data)) | |
124 for (i in 1:k) { | |
125 grp = subset(tr_data, resp==i) | |
126 means[i] = mean(grp\$dir) | |
127 #print(i, means[i]) | |
128 } | |
129 cutoff = tau*means[1]+(1-tau)*means[2] | |
130 #print(tau) | |
131 #print(means) | |
132 #print(cutoff) | |
133 if (cutoff>means[1]) { | |
134 cl[1]=1 | |
135 cl[2]=2 | |
136 } | |
137 else { | |
138 cl[1]=2 | |
139 cl[2]=1 | |
140 } | |
141 | |
142 for (i in 1:length(test_data)) { | |
143 | |
144 if (test_data[i] <= cutoff) { | |
145 predclass[i] = cl[1] | |
146 } | |
147 else { | |
148 predclass[i] = cl[2] | |
149 } | |
150 } | |
151 #print(means) | |
152 #print(mean(means)) | |
153 #X11() | |
154 #plot(test_data,pch=predclass, col=resp) | |
155 predclass | |
156 } | |
157 | |
158 ################# EXTENDED ERROR RATES ################# | |
159 ext_error_rate <- function(predclass, actualclass,msg=c("you forgot the message"), pr=1) { | |
160 er=sum(predclass != actualclass)/length(predclass) | |
161 | |
162 matr<-data.frame(predclass=predclass,actualclass=actualclass) | |
163 escapes = subset(matr, actualclass==1) | |
164 subjects = subset(matr, actualclass==2) | |
165 er_esc=sum(escapes\$predclass != escapes\$actualclass)/length(escapes\$predclass) | |
166 er_subj=sum(subjects\$predclass != subjects\$actualclass)/length(subjects\$predclass) | |
167 | |
168 if (pr==1) { | |
169 # print(paste(c(msg, 'overall : ', (1-er)*100, "%."),collapse=" ")) | |
170 # print(paste(c(msg, 'within escapes : ', (1-er_esc)*100, "%."),collapse=" ")) | |
171 # print(paste(c(msg, 'within subjects: ', (1-er_subj)*100, "%."),collapse=" ")) | |
172 } | |
173 return(c((1-er)*100, (1-er_esc)*100, (1-er_subj)*100)) | |
174 } | |
175 | |
176 ## Main Function ## | |
177 | |
178 files_alias<-c("${my_title}") | |
179 tau=seq(0,1,by=0.005) | |
180 nfiles=1 | |
181 f = c("${input}") | |
182 | |
183 rez_ext<-list() | |
184 for (i in 1:nfiles) { | |
185 rez_ext[[i]]<-dget(paste(f[i], sep="",collapse="")) | |
186 } | |
187 | |
188 tau<-tau[1:(length(tau)-1)] | |
189 for (i in 1:nfiles) { | |
190 rez_ext[[i]]<-rez_ext[[i]][,1:(length(tau)-1)] | |
191 } | |
192 | |
193 ######## OPTIMAIL TAU ########################### | |
194 | |
195 #rez_ext | |
196 | |
197 rate<-c("Optimal tau","Tr total", "Tr Y", "Tr X") | |
198 | |
199 m_tr<-numeric(nfiles) | |
200 m_xp22<-numeric(nfiles) | |
201 m_x<-numeric(nfiles) | |
202 | |
203 for (i in 1:nfiles) { | |
204 r<-rez_ext[[i]] | |
205 #tr | |
206 # rate<-rbind(rate, c(files_alias[i]," "," "," ") ) | |
207 mm<-which((r[3,])==max(r[3,])) | |
208 | |
209 m_tr[i]<-mm[1] | |
210 rate<-rbind(rate,c(tau[m_tr[i]],r[,m_tr[i]])) | |
211 } | |
212 print(rate) | |
213 | |
214 pdf(file= paste("${pdf_output}")) | |
215 | |
216 plot(rez_ext[[i]][2,]~rez_ext[[i]][3,], xlim=c(0,100), ylim=c(0,100), xlab="${X_axis} [1-FP(False Positive)]", ylab="${Y_axis} [1-FP(False Positive)]", type="l", lty=1, col="blue", xaxt='n', yaxt='n') | |
217 for (i in 1:nfiles) { | |
218 lines(rez_ext[[i]][2,]~rez_ext[[i]][3,], xlab="${X_axis} [1-FP(False Positive)]", ylab="${Y_axis} [1-FP(False Positive)]", type="l", lty=1, col=i) | |
219 # pt=c(r,) | |
220 points(x=rez_ext[[i]][3,m_tr[i]],y=rez_ext[[i]][2,m_tr[i]], pch=16, col=i) | |
221 } | |
222 | |
223 | |
224 title(main="${my_title}", adj=0, cex.main=1.1) | |
225 axis(2, at=c(0,20,40,60,80,100), labels=c('0','20','40','60','80','100%')) | |
226 axis(1, at=c(0,20,40,60,80,100), labels=c('0','20','40','60','80','100%')) | |
227 | |
228 #leg=c("10 kb","50 kb","100 kb") | |
229 #legend("bottomleft",legend=leg , col=c(1,2,3), lty=c(1,1,1)) | |
230 | |
231 #dev.off() | |
232 | |
233 </configfile> | |
234 </configfiles> | |
235 | |
236 | |
237 <help> | |
238 .. class:: infomark | |
239 | |
240 **What it does** | |
241 | |
242 This tool generates a Receiver Operating Characteristic (ROC) plot that shows LDA classification success rates for different values of the tuning parameter tau as Figure 3 in Carrel et al., 2006 (PMID: 17009873). | |
243 | |
244 *Carrel L, Park C, Tyekucheva S, Dunn J, Chiaromonte F, et al. (2006) Genomic Environment Predicts Expression Patterns on the Human Inactive X Chromosome. PLoS Genet 2(9): e151. doi:10.1371/journal.pgen.0020151* | |
245 | |
246 ----- | |
247 | |
248 .. class:: warningmark | |
249 | |
250 **Note** | |
251 | |
252 - Output from "Perform LDA" tool is used as input file for this tool. | |
253 | |
254 </help> | |
255 | |
256 | |
257 | |
258 </tool> |