comparison predict_activity.R @ 0:20df9782b07a draft

planemo upload commit a1f4dd8eb560c649391ada1a6bb9505893a35272
author anmoljh
date Thu, 31 May 2018 11:33:23 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:20df9782b07a
1 ##########
2 args <- commandArgs(T)
3 arg1 <- args[1]
4 arg2 <- args[2]
5 arg3 <- args[3]
6 #source("~/galaxy-dist/tools/mpdstoolsV2/tool3/Preold.R")
7 #pre(arg1,arg2,arg3
8 set.seed(1)
9 pre <- function(args1,args2,args3){
10 #args <- commandArgs(TRUE)
11 nTrain <- read.csv(args1,row.names= 1, header = T) # example nTrain.csv file of unknown activity
12 #save(nTrain,file = "nTrain.RData")
13 #load("nTrain.RData")
14 load(args2) # model generated from previous programn
15 newdata <- nTrain
16 modelFit <- Fit
17 ###########
18 # input csv file must contaion the exact same column as used in model building #
19 # Also do pre-proccessing by means of centering and scaling
20 ## problem in s4 object so first check that the given model has s4 object in
21 ## >isS4(Fit$finalmodel) if it is s4 than add in with elseif loop
22 ## eg . isS4(plsFit$finalModel) == TRUE
23 f=function(x){
24 x<-as.numeric(as.character(x)) #first convert each column into numeric if it is from factor
25 x[is.na(x) | is.nan(x) | is.infinite(x)] = median(x, na.rm=TRUE) #convert the item with NA to median value from the column
26 x #display the column
27 }
28
29 f2=function(x){
30 all(is.na(x))
31 }
32
33
34 fop <- apply(newdata,2,f2)
35 allcolumnmissing <- which(fop)
36 if (length(allcolumnmissing) > 0){
37 newdata[,allcolumnmissing] <- 0
38 newdata[,allcolumnmissing] <- newdata[,allcolumnmissing] + runif(3,0,0.00001) ### add noise}
39 }
40
41 library(caret)
42
43 #if(as.character(!isS4(Fit$finalModel == "TRUE")))
44 if(exists('ppInfo')){
45 #if(as.character(!isS4(Fit$finalModel == "TRUE")))
46 if((Fit$method != "svmRadial") && (Fit$method != "svmLinear"))
47 {
48 reqcol <- Fit$finalModel$xNames
49 newdata <- newdata[,reqcol]
50 newdata <- apply(newdata,2,f)
51 #newdata <- newdata + runif(3,0,0.0001) ### add noise to overcome from NZV error
52 #newdata1 <- preProcess(newdata, method = c("center", "scale"))
53 #newdata1 <- preProcess(newdata, ppInfo)
54 newdata11 <- predict(ppInfo,newdata)
55 ###########
56 library(stats)
57 testpredict <- predict(modelFit,newdata11)
58 Label <- levels(testpredict)
59 a1 <- Label[1]
60 a2 <- Label[2]
61 probpredict <- predict(modelFit,newdata11,type="prob")
62 names <- as.data.frame(rownames(nTrain))
63 colnames(names) <- "COMPOUND"
64 activity <- as.data.frame(testpredict)
65 colnames(activity) <- "PREDICTED ACTIVITY"
66 colnames(probpredict) <- c(eval(a1),eval(a2))
67 Prob <- as.data.frame(probpredict)
68 dw <- format(cbind(names,Prob,activity),justify="centre")
69 write.table(dw,file=args3,row.names=FALSE,sep="\t")
70
71
72
73 } else if((Fit$method == "svmRadial") | (Fit$method == "svmLinear")){
74 library(stats)
75 reqcol <- colnames(Fit$trainingData)
76 reqcol <- reqcol[1:length(reqcol)-1]
77 newdata <- newdata[,reqcol]
78
79 newdata <- apply(newdata,2,f)
80 #newdata <- newdata + runif(3,0,0.0001) ### add little noise to overcome from NZV problem
81 #newdata1 <- preProcess(newdata, method = c("center", "scale"))
82 #newdata1 <- preProcess(newdata,ppInfo)
83 newdata11 <- predict(ppInfo,newdata)
84 testpredict <- predict(modelFit,newdata11)
85 Label <- levels(testpredict)
86 a1 <- Label[1]
87 a2 <- Label[2]
88 probpredict <- predict(modelFit,newdata11,type="prob")
89 names <- as.data.frame(rownames(nTrain))
90 colnames(names) <- "COMPOUND"
91 activity <- as.data.frame(testpredict)
92 colnames(activity) <- "PREDICTED ACTIVITY"
93 colnames(probpredict) <- c(eval(a1),eval(a2))
94 Prob <- as.data.frame(probpredict)
95 dw <- format(cbind(names,Prob,activity),justify="centre")
96 write.table(dw,file=args3,row.names=FALSE,sep="\t")
97 }else {
98 dw <- "There is something wrong in data or model"
99 write.csv(dw,file=args3,row.names=FALSE)
100 }
101 } else{
102
103 #if(as.character(!isS4(Fit$finalModel == "TRUE")))
104 if((Fit$method != "svmRadial") && (Fit$method != "svmLinear"))
105 {
106 reqcol <- Fit$finalModel$xNames
107 newdata <- newdata[,reqcol]
108 newdata <- apply(newdata,2,f)
109 #newdata <- newdata + runif(3,0,0.0001) ### add noise to overcome from NZV error
110
111 ###########
112 library(stats)
113 testpredict <- predict(modelFit,newdata)
114 Label <- levels(testpredict)
115 a1 <- Label[1]
116 a2 <- Label[2]
117 probpredict <- predict(modelFit,newdata,type="prob")
118 names <- as.data.frame(rownames(nTrain))
119 colnames(names) <- "COMPOUND"
120 activity <- as.data.frame(testpredict)
121 colnames(activity) <- "PREDICTED ACTIVITY"
122 colnames(probpredict) <- c(eval(a1),eval(a2))
123 Prob <- as.data.frame(probpredict)
124 dw <- format(cbind(names,Prob,activity),justify="centre")
125 write.table(dw,file=args3,row.names=FALSE,sep="\t")
126
127
128
129 } else if((Fit$method == "svmRadial") | (Fit$method == "svmLinear")){
130 library(stats)
131 reqcol <- colnames(Fit$trainingData)
132 reqcol <- reqcol[1:length(reqcol)-1]
133 newdata <- newdata[,reqcol]
134
135 newdata <- apply(newdata,2,f)
136 #newdata <- newdata + runif(3,0,0.0001) ### add little noise to overcome from NZV problem
137
138 testpredict <- predict(modelFit,newdata)
139 Label <- levels(testpredict)
140 a1 <- Label[1]
141 a2 <- Label[2]
142 probpredict <- predict(modelFit,newdata,type="prob")
143 names <- as.data.frame(rownames(nTrain))
144 colnames(names) <- "COMPOUND"
145 activity <- as.data.frame(testpredict)
146 colnames(activity) <- "PREDICTED ACTIVITY"
147 colnames(probpredict) <- c(eval(a1),eval(a2))
148 Prob <- as.data.frame(probpredict)
149 dw <- format(cbind(names,Prob,activity),justify="centre")
150 write.table(dw,file=args3,row.names=FALSE,sep="\t")
151 }else {
152 dw <- "There is something wrong in data or model"
153 write.csv(dw,file=args3,row.names=FALSE)
154 }
155 }
156 }
157 pre(arg1,arg2,arg3)