Mercurial > repos > anmoljh > activity_predict
comparison predict_activity.R @ 0:20df9782b07a draft
planemo upload commit a1f4dd8eb560c649391ada1a6bb9505893a35272
author | anmoljh |
---|---|
date | Thu, 31 May 2018 11:33:23 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:20df9782b07a |
---|---|
1 ########## | |
2 args <- commandArgs(T) | |
3 arg1 <- args[1] | |
4 arg2 <- args[2] | |
5 arg3 <- args[3] | |
6 #source("~/galaxy-dist/tools/mpdstoolsV2/tool3/Preold.R") | |
7 #pre(arg1,arg2,arg3 | |
8 set.seed(1) | |
9 pre <- function(args1,args2,args3){ | |
10 #args <- commandArgs(TRUE) | |
11 nTrain <- read.csv(args1,row.names= 1, header = T) # example nTrain.csv file of unknown activity | |
12 #save(nTrain,file = "nTrain.RData") | |
13 #load("nTrain.RData") | |
14 load(args2) # model generated from previous programn | |
15 newdata <- nTrain | |
16 modelFit <- Fit | |
17 ########### | |
18 # input csv file must contaion the exact same column as used in model building # | |
19 # Also do pre-proccessing by means of centering and scaling | |
20 ## problem in s4 object so first check that the given model has s4 object in | |
21 ## >isS4(Fit$finalmodel) if it is s4 than add in with elseif loop | |
22 ## eg . isS4(plsFit$finalModel) == TRUE | |
23 f=function(x){ | |
24 x<-as.numeric(as.character(x)) #first convert each column into numeric if it is from factor | |
25 x[is.na(x) | is.nan(x) | is.infinite(x)] = median(x, na.rm=TRUE) #convert the item with NA to median value from the column | |
26 x #display the column | |
27 } | |
28 | |
29 f2=function(x){ | |
30 all(is.na(x)) | |
31 } | |
32 | |
33 | |
34 fop <- apply(newdata,2,f2) | |
35 allcolumnmissing <- which(fop) | |
36 if (length(allcolumnmissing) > 0){ | |
37 newdata[,allcolumnmissing] <- 0 | |
38 newdata[,allcolumnmissing] <- newdata[,allcolumnmissing] + runif(3,0,0.00001) ### add noise} | |
39 } | |
40 | |
41 library(caret) | |
42 | |
43 #if(as.character(!isS4(Fit$finalModel == "TRUE"))) | |
44 if(exists('ppInfo')){ | |
45 #if(as.character(!isS4(Fit$finalModel == "TRUE"))) | |
46 if((Fit$method != "svmRadial") && (Fit$method != "svmLinear")) | |
47 { | |
48 reqcol <- Fit$finalModel$xNames | |
49 newdata <- newdata[,reqcol] | |
50 newdata <- apply(newdata,2,f) | |
51 #newdata <- newdata + runif(3,0,0.0001) ### add noise to overcome from NZV error | |
52 #newdata1 <- preProcess(newdata, method = c("center", "scale")) | |
53 #newdata1 <- preProcess(newdata, ppInfo) | |
54 newdata11 <- predict(ppInfo,newdata) | |
55 ########### | |
56 library(stats) | |
57 testpredict <- predict(modelFit,newdata11) | |
58 Label <- levels(testpredict) | |
59 a1 <- Label[1] | |
60 a2 <- Label[2] | |
61 probpredict <- predict(modelFit,newdata11,type="prob") | |
62 names <- as.data.frame(rownames(nTrain)) | |
63 colnames(names) <- "COMPOUND" | |
64 activity <- as.data.frame(testpredict) | |
65 colnames(activity) <- "PREDICTED ACTIVITY" | |
66 colnames(probpredict) <- c(eval(a1),eval(a2)) | |
67 Prob <- as.data.frame(probpredict) | |
68 dw <- format(cbind(names,Prob,activity),justify="centre") | |
69 write.table(dw,file=args3,row.names=FALSE,sep="\t") | |
70 | |
71 | |
72 | |
73 } else if((Fit$method == "svmRadial") | (Fit$method == "svmLinear")){ | |
74 library(stats) | |
75 reqcol <- colnames(Fit$trainingData) | |
76 reqcol <- reqcol[1:length(reqcol)-1] | |
77 newdata <- newdata[,reqcol] | |
78 | |
79 newdata <- apply(newdata,2,f) | |
80 #newdata <- newdata + runif(3,0,0.0001) ### add little noise to overcome from NZV problem | |
81 #newdata1 <- preProcess(newdata, method = c("center", "scale")) | |
82 #newdata1 <- preProcess(newdata,ppInfo) | |
83 newdata11 <- predict(ppInfo,newdata) | |
84 testpredict <- predict(modelFit,newdata11) | |
85 Label <- levels(testpredict) | |
86 a1 <- Label[1] | |
87 a2 <- Label[2] | |
88 probpredict <- predict(modelFit,newdata11,type="prob") | |
89 names <- as.data.frame(rownames(nTrain)) | |
90 colnames(names) <- "COMPOUND" | |
91 activity <- as.data.frame(testpredict) | |
92 colnames(activity) <- "PREDICTED ACTIVITY" | |
93 colnames(probpredict) <- c(eval(a1),eval(a2)) | |
94 Prob <- as.data.frame(probpredict) | |
95 dw <- format(cbind(names,Prob,activity),justify="centre") | |
96 write.table(dw,file=args3,row.names=FALSE,sep="\t") | |
97 }else { | |
98 dw <- "There is something wrong in data or model" | |
99 write.csv(dw,file=args3,row.names=FALSE) | |
100 } | |
101 } else{ | |
102 | |
103 #if(as.character(!isS4(Fit$finalModel == "TRUE"))) | |
104 if((Fit$method != "svmRadial") && (Fit$method != "svmLinear")) | |
105 { | |
106 reqcol <- Fit$finalModel$xNames | |
107 newdata <- newdata[,reqcol] | |
108 newdata <- apply(newdata,2,f) | |
109 #newdata <- newdata + runif(3,0,0.0001) ### add noise to overcome from NZV error | |
110 | |
111 ########### | |
112 library(stats) | |
113 testpredict <- predict(modelFit,newdata) | |
114 Label <- levels(testpredict) | |
115 a1 <- Label[1] | |
116 a2 <- Label[2] | |
117 probpredict <- predict(modelFit,newdata,type="prob") | |
118 names <- as.data.frame(rownames(nTrain)) | |
119 colnames(names) <- "COMPOUND" | |
120 activity <- as.data.frame(testpredict) | |
121 colnames(activity) <- "PREDICTED ACTIVITY" | |
122 colnames(probpredict) <- c(eval(a1),eval(a2)) | |
123 Prob <- as.data.frame(probpredict) | |
124 dw <- format(cbind(names,Prob,activity),justify="centre") | |
125 write.table(dw,file=args3,row.names=FALSE,sep="\t") | |
126 | |
127 | |
128 | |
129 } else if((Fit$method == "svmRadial") | (Fit$method == "svmLinear")){ | |
130 library(stats) | |
131 reqcol <- colnames(Fit$trainingData) | |
132 reqcol <- reqcol[1:length(reqcol)-1] | |
133 newdata <- newdata[,reqcol] | |
134 | |
135 newdata <- apply(newdata,2,f) | |
136 #newdata <- newdata + runif(3,0,0.0001) ### add little noise to overcome from NZV problem | |
137 | |
138 testpredict <- predict(modelFit,newdata) | |
139 Label <- levels(testpredict) | |
140 a1 <- Label[1] | |
141 a2 <- Label[2] | |
142 probpredict <- predict(modelFit,newdata,type="prob") | |
143 names <- as.data.frame(rownames(nTrain)) | |
144 colnames(names) <- "COMPOUND" | |
145 activity <- as.data.frame(testpredict) | |
146 colnames(activity) <- "PREDICTED ACTIVITY" | |
147 colnames(probpredict) <- c(eval(a1),eval(a2)) | |
148 Prob <- as.data.frame(probpredict) | |
149 dw <- format(cbind(names,Prob,activity),justify="centre") | |
150 write.table(dw,file=args3,row.names=FALSE,sep="\t") | |
151 }else { | |
152 dw <- "There is something wrong in data or model" | |
153 write.csv(dw,file=args3,row.names=FALSE) | |
154 } | |
155 } | |
156 } | |
157 pre(arg1,arg2,arg3) |