annotate aggregation.R @ 71:37d3d073b51d draft

Uploaded
author nicolas
date Fri, 28 Oct 2016 08:44:42 -0400
parents 3da64b89dc00
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
68
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
1 ########################################################
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
2 #
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
3 # creation date : 25/10/16
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
4 # last modification : 25/10/16
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
5 # author : Dr Nicolas Beaume
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
6 #
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
7 ########################################################
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
8
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
9 suppressWarnings(suppressMessages(library(GA)))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
10 library("miscTools")
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
11 library(rpart)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
12 suppressWarnings(suppressMessages(library(randomForest)))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
13 library(e1071)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
14 suppressWarnings(suppressMessages(library(glmnet)))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
15 options(warn=-1)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
16 ############################ helper functions #######################
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
17
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
18 ##### Genetic algorithm
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
19
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
20 # compute r2 by computing the classic formula
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
21 # compare the sum of square difference from target to prediciton
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
22 # to the sum of square difference from target to the mean of the target
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
23 r2 <- function(target, prediction) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
24 sst <- sum((target-mean(target))^2)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
25 ssr <- sum((target-prediction)^2)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
26 return(1-ssr/sst)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
27 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
28
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
29 optimizeOneIndividual <- function(values, trueValue) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
30 # change the value into a function
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
31 f <- function(w) {sum(values * w/sum(w))}
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
32 fitness <- function(x) {1/abs(trueValue-f(x))}
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
33 resp <- ga(type = "real-valued", fitness = fitness, min = rep(0, length(values)), max = rep(1, length(values)),
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
34 maxiter = 1000, monitor = NULL, keepBest = T)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
35 resp@solution <- resp@solution/sum(resp@solution)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
36 return(resp)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
37 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
38
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
39 optimizeWeight <- function(values, trueValue, n=1000) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
40 fitnessAll <- function(w) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
41 predicted <- apply(values, 1, weightedPrediction.vec, w)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
42 return(mean(r2(trueValue, predicted)))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
43 #return(mean(1/abs(trueValue-predicted)))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
44 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
45 resp <- ga(type = "real-valued", fitness = fitnessAll, min = rep(0, ncol(values)), max = rep(1, ncol(values)),
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
46 maxiter = n, monitor = NULL, keepBest = T)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
47 resp@solution <- resp@solution/sum(resp@solution)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
48 return(resp)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
49 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
50
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
51 weightedPrediction <- function(classifiers, w) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
52 if(length(w) > ncol(classifiers)) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
53 warning("more weights than classifiers, extra weigths are ignored")
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
54 w <- w[1:ncol(classifiers)]
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
55 } else if(length(w) < ncol(classifiers)) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
56 warning("less weights than classifiers, extra classifiers are ignored")
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
57 classifiers <- classifiers[,1:length(w)]
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
58 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
59 prediction <- NULL
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
60 prediction <- c(prediction, apply(classifiers, 1, weightedPrediction.vec, w))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
61 return(prediction)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
62 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
63
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
64 weightedPrediction.vec <- function(values, w) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
65 return(sum(values * w/sum(w)))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
66 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
67
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
68 ##### meta-decision tree
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
69
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
70 tuneTree <- function(data, target) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
71 data <- data.frame(data, target=target)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
72 size <- nrow(data)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
73 xerror <- NULL
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
74 split <- 1:ceiling(size/5)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
75 leafSize <- 1:ceiling(size/10)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
76 xerror <- matrix(rep(-1, length(split)*length(leafSize)), ncol=length(leafSize))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
77 cp <- matrix(rep(-1, length(split)*length(leafSize)), ncol=length(leafSize))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
78 for(i in 1:length(split)) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
79 for(j in 1:length(leafSize)) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
80 op <- list(minsplit=split[i], minbucket=leafSize[j])
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
81 tree <- rpart(target ~., data=data, control=op, method="anova")
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
82 xerror[i,j] <- tree$cptable[which.min(tree$cptable[,"xerror"]),"xerror"]
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
83 cp[i,j] <- tree$cptable[which.min(tree$cptable[,"xerror"]),"CP"]
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
84 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
85 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
86 index <- which(xerror==min(xerror), arr.ind = T)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
87 op <- list(minsplit=split[index[1]], minbucket=leafSize[index[2]], cp=cp[index[1], index[2]])
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
88 return(op)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
89 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
90
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
91 ###### meta-LASSO
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
92 # create fold by picking at random row indexes
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
93 createFolds <- function(nbObs, n) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
94 # pick indexes
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
95 index <- sample(1:n, size=nbObs, replace = T)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
96 # populate folds
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
97 folds <- NULL
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
98 for(i in 1:n) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
99 folds <- c(folds, list(which(index==i)))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
100 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
101 return(folds)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
102 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
103
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
104 searchParamLASSO <- function(genotype, phenotype, alpha=seq(0,1,0.1), n=7) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
105 folds <- createFolds(nrow(genotype), n = n)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
106 acc <- NULL
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
107 indexAlpha <- 1
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
108 for(a in alpha) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
109 curAcc <- NULL
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
110 for(i in 1:n) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
111 train <- genotype[-folds[[i]],]
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
112 test <- genotype[folds[[i]],]
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
113 phenoTrain <- phenotype[-folds[[i]]]
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
114 phenoTest <- phenotype[folds[[i]]]
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
115 cv <- cv.glmnet(x=as.matrix(train), y=phenoTrain, alpha=a)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
116 model <- glmnet(x=as.matrix(train), y=phenoTrain, alpha=a, lambda = cv$lambda.1se)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
117 pred <- predict(model, test, type = "response")
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
118 curAcc <- c(curAcc, r2(phenoTest, pred))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
119 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
120 acc <- c(acc, mean(curAcc))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
121 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
122 names(acc) <- alpha
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
123 return(as.numeric(names(acc)[which.max(acc)]))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
124 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
125
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
126 ###### meta-random forest
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
127
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
128 searchParamRF <- function(genotype, phenotype, rangeNtree, mtry=ncol(genotype)) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
129 n <- ceiling(nrow(genotype)/3)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
130 indexTest <- sample(1:nrow(genotype), size=n)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
131 train <- genotype[-indexTest,]
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
132 test <- genotype[indexTest,]
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
133 phenoTrain <- phenotype[-indexTest]
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
134 phenoTest <- phenotype[indexTest]
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
135 acc <- NULL
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
136 indexNtree <- 1
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
137 for(ntree in rangeNtree) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
138 model <- randomForest(x=train, y=phenoTrain, ntree = ntree, mtry = mtry)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
139 pred <- predict(model, test)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
140 acc <- c(acc, r2(phenoTest, pred))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
141 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
142 names(acc) <- rangeNtree
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
143 best <- which.max(acc)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
144 return(as.numeric(names(acc)[best]))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
145 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
146
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
147 ###### meta-SVM
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
148 searchParamSVM <- function(train, target, kernel="radial") {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
149 # tuning parameters then train
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
150 model <- NULL
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
151 switch(kernel,
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
152 sigmoid={
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
153 tune <- tune.svm(train, target, gamma = 10^(-6:-1), cost = 10^(0:2), kernel="sigmoid");
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
154 g <- tune$best.parameters[[1]];
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
155 c <- tune$best.parameters[[2]];
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
156 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")},
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
157 linear={
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
158 tune <- tune.svm(train, target, cost = 10^(0:2), kernel="linear");
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
159 c <- tune$best.parameters[[1]];
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
160 model <- svm(x=train, y=target, cost = c, kernel = "linear")},
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
161 polynomial={
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
162 tune <- tune.svm(train, target, gamma = 10^(-6:-1), cost = 10^(0:2), degree = 0:4, coef0 = 0:3, kernel="polynomial");
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
163 d <- tune$best.parameters[[1]];
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
164 g <- tune$best.parameters[[2]];
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
165 coef <- tune$best.parameters[[3]];
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
166 c <- tune$best.parameters[[4]];
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
167 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)},
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
168 {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
169 tune <- tune.svm(train, target, gamma = 10^(-6:-1), cost = 10^(0:3), kernel="radial");
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
170 g <- tune$best.parameters[[1]];
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
171 c <- tune$best.parameters[[2]];
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
172 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")}
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
173 )
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
174 return(model)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
175 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
176
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
177 #################### upper level functions #####################
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
178
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
179 aggregateDT <- function(classifiers, target=NULL, prediction=F, model=NULL, out) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
180 if(!prediction) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
181 treeParam <- tuneTree(classifiers, target)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
182 data <- data.frame(classifiers, target)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
183 model <- rpart(target ~., data=data, method = "anova", control = treeParam)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
184 model <- prune(model, cp=treeParam["cp"])
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
185 out <- paste(out, ".rds", sep = "")
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
186 saveRDS(model, out)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
187 return(out)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
188 } else {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
189 return(predict(model, classifiers))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
190 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
191 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
192
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
193 aggregateGeneticMean <- function(classifiers, target=NULL, prediction=F, model=NULL, out){
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
194 if(!prediction) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
195 opt <- optimizeWeight(values = classifiers, trueValue = target)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
196 out <- paste(out, ".rds", sep = "")
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
197 saveRDS(opt@solution, out)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
198 return(out)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
199 } else {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
200 return(weightedPrediction.vec(classifiers, model))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
201 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
202 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
203
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
204 aggregateLASSO <- function(classifiers, target=NULL, prediction=F, model=NULL, alpha=NULL, out) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
205 if(!prediction) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
206 alpha <- searchParamLASSO(classifiers, target)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
207 cv <- cv.glmnet(x=as.matrix(classifiers), y=target, alpha=alpha)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
208 model <- glmnet(x=as.matrix(classifiers), y=target, alpha=alpha, lambda = cv$lambda.1se)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
209 out <- paste(out, ".rds", sep = "")
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
210 saveRDS(model, out)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
211 return(out)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
212 } else {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
213 return(predict(model, classifiers))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
214 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
215 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
216
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
217 aggregateRF <- function(classifiers, target=NULL, model=NULL, ntree=NULL, prediction=F, out) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
218 if(!prediction) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
219 ntree <- searchParamRF(genotype = classifiers, phenotype = target,
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
220 rangeNtree = seq(100, 1000, 100))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
221 model <- randomForest(x=classifiers, y=target, ntree = ntree, mtry = ncol(classifiers))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
222 out <- paste(out, ".rds", sep = "")
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
223 saveRDS(model, out)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
224 return(out)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
225 } else {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
226 return(predict(model, classifiers))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
227 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
228 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
229
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
230 aggregateSVM <- function(classifiers, target=NULL, prediction=F,
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
231 model=NULL, c=NULL, g=NULL, d=NULL, coef=NULL, kernel="radial", out) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
232 if(!prediction) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
233 model <- searchParamSVM(train = classifiers, target = target, kernel = kernel)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
234 out <- paste(out, ".rds", sep="")
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
235 saveRDS(model, out)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
236 return(out)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
237 } else {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
238 return(predict(model, classifiers))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
239 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
240 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
241
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
242 ################################### main #############################
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
243 # # load argument
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
244 cmd <- commandArgs(T)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
245 source(cmd[1])
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
246 # check if evaluation is required
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
247 evaluation <- F
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
248 if(as.integer(doEvaluation) == 1) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
249 evaluation <- T
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
250 con = file(folds)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
251 folds <- readLines(con = con, n = 1, ok=T)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
252 close(con)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
253 folds <- readRDS(folds)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
254 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
255 # check for model
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
256 if(model == "None") {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
257 model <- NULL
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
258 prediction <- F
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
259 } else {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
260 prediction <- T
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
261 con = file(model)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
262 model <- readLines(con = con, n = 1, ok=T)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
263 close(con)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
264 model <- readRDS(model)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
265 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
266 # load classifiers and phenotype
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
267 classifiers <- NULL
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
268 classifNames <- NULL
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
269 if(lassoPred !="None"){
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
270 classifiers <- c(classifiers, lassoPred)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
271 classifNames <- c(classifNames, "lasso")
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
272 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
273 if(rrBLUPPred !="None"){
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
274 classifiers <- c(classifiers, rrBLUPPred)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
275 classifNames <- c(classifNames, "rrBLUP")
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
276 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
277 if(rfPred !="None"){
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
278 classifiers <- c(classifiers, rfPred)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
279 classifNames <- c(classifNames, "rf")
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
280 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
281 if(svmPred !="None"){
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
282 classifiers <- c(classifiers, svmPred)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
283 classifNames <- c(classifNames, "svm")
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
284 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
285 classifPrediction <- NULL
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
286 for(classif in classifiers) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
287 classifPrediction <- c(classifPrediction, list(read.table(classif, sep="\t", h=T)))
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
288 }
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
289 classifPrediction <- data.frame(classifPrediction)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
290 colnames(classifPrediction) <- classifNames
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
291 # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
292 phenotype <- read.table(phenotype, sep="\t", h=T)[,1]
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
293 # aggregate !
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
294 switch(method,
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
295 geneticMean={
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
296 res <- aggregateGeneticMean(classifiers = classifPrediction, target = phenotype,
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
297 out = out, prediction = prediction, model=model)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
298 },
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
299 dt={
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
300 res <- aggregateDT(classifiers = classifPrediction, target = phenotype,
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
301 out = out, prediction = prediction, model=model)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
302 },
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
303 lasso={
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
304 res <- aggregateLASSO(classifiers = data.matrix(classifPrediction), target = phenotype,
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
305 out = out, prediction = prediction, model=model)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
306 },
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
307 rf={
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
308 res <- aggregateRF(classifiers = classifPrediction, target = phenotype,
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
309 out = out, prediction = prediction, model=model)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
310 },
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
311 # svm
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
312 {res <- aggregateSVM(classifiers = classifPrediction, target = phenotype, kernel = kernel,
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
313 out = out, prediction = prediction, model = model)}
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
314 )
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
315 if(prediction) {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
316 write.table(data.frame(lines=rownames(classifPrediction), res), out,
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
317 sep="\t", row.names = F)
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
318 } else {
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
319 cat(paste(out, ".rds", sep=""), "\n", sep="")
3da64b89dc00 Uploaded
nicolas
parents:
diff changeset
320 }