annotate evaluate_aggregation.R @ 103:e7115e44d8d8 draft default tip

Uploaded
author nicolas
date Mon, 31 Oct 2016 07:20:49 -0400
parents f90c741e2a32
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
94
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
1 ########################################################
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
2 #
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
3 # creation date : 10/10/16
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
4 # last modification : 29/10/16
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
5 # author : Dr Nicolas Beaume
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
6 #
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
7 ########################################################
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
8
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
9 suppressWarnings(suppressMessages(library(GA)))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
10 library("miscTools")
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
11 library(rpart)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
12 suppressWarnings(suppressMessages(library(randomForest)))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
13 library(e1071)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
14 suppressWarnings(suppressMessages(library(glmnet)))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
15 library(rrBLUP)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
16 options(warn=-1)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
17 ############################ helper functions #######################
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
18
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
19 ##### classifiers
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
20 prediction <- function(genotype, model, classifier="unknown") {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
21 # run prediction according to the classifier
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
22 switch(classifier,
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
23 rrBLUP={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
24 predictions <- as.matrix(genotype) %*% as.matrix(model$u);
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
25 predictions <- predictions[,1]+model$beta;
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
26 },
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
27 rf={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
28 predictions <- predict(model, genotype);
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
29 },
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
30 svm={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
31 predictions <- predict(model, genotype);
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
32 },
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
33 lasso={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
34 predictions <- predict(model, as.matrix(genotype), type = "response");
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
35 },
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
36 {warning("unkonwn classifier, please choose among the following : rrBLUP, rf, svm, lasso")})
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
37 return(predictions)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
38 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
39
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
40 # extract parameter from a model, excluding rrBLUP which auto-optimize
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
41 extractParameter <- function(model, classifierName) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
42 param <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
43 switch(classifierName,
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
44 # random forest
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
45 rf={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
46 param <- model$ntree
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
47 param <- c(param, list(model$mtry))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
48 names(param) <- c("ntree", "mtry")
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
49 },
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
50 # svm
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
51 svm={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
52 param <- as.numeric(model$cost)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
53 param <- c(param, list(model$gamma))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
54 param <- c(param, list(model$coef0))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
55 param <- c(param, list(model$degree))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
56 param <- c(param, list(model$kernel))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
57 names(param) <- c("c", "g", "coef", "d", "kernel")
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
58 switch((model$kernel+1),
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
59 param$kernel <- "linear",
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
60 param$kernel <- "polynomial",
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
61 param$kernel <- "radial",
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
62 param$kernel <- "sigmoid"
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
63 )
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
64 },
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
65 # lasso
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
66 lasso={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
67 param <- as.list(model$lambda)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
68 names(param) <- "lambda"
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
69 },
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
70 {print(paste("unknown classifier, please choose among rf, svm, lasso"));
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
71 stop()}
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
72 )
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
73 return(param)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
74 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
75
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
76 ##### Genetic algorithm
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
77
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
78 # compute r2 by computing the classic formula
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
79 # compare the sum of square difference from target to prediciton
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
80 # to the sum of square difference from target to the mean of the target
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
81 r2 <- function(target, prediction) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
82 sst <- sum((target-mean(target))^2)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
83 ssr <- sum((target-prediction)^2)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
84 return(1-ssr/sst)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
85 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
86
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
87 optimizeOneIndividual <- function(values, trueValue) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
88 # change the value into a function
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
89 f <- function(w) {sum(values * w/sum(w))}
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
90 fitness <- function(x) {1/abs(trueValue-f(x))}
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
91 resp <- ga(type = "real-valued", fitness = fitness, min = rep(0, length(values)), max = rep(1, length(values)),
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
92 maxiter = 1000, monitor = NULL, keepBest = T)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
93 resp@solution <- resp@solution/sum(resp@solution)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
94 return(resp)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
95 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
96
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
97 optimizeWeight <- function(values, trueValue, n=1000) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
98 fitnessAll <- function(w) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
99 predicted <- apply(values, 1, weightedPrediction.vec, w)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
100 return(mean(r2(trueValue, predicted)))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
101 #return(mean(1/abs(trueValue-predicted)))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
102 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
103 resp <- ga(type = "real-valued", fitness = fitnessAll, min = rep(0, ncol(values)), max = rep(1, ncol(values)),
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
104 maxiter = n, monitor = NULL, keepBest = T)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
105 resp@solution <- resp@solution/sum(resp@solution)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
106 return(resp)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
107 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
108
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
109 weightedPrediction <- function(classifiers, w) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
110 if(length(w) > ncol(classifiers)) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
111 warning("more weights than classifiers, extra weigths are ignored")
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
112 w <- w[1:ncol(classifiers)]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
113 } else if(length(w) < ncol(classifiers)) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
114 warning("less weights than classifiers, extra classifiers are ignored")
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
115 classifiers <- classifiers[,1:length(w)]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
116 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
117 prediction <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
118 prediction <- c(prediction, apply(classifiers, 1, weightedPrediction.vec, w))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
119 return(prediction)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
120 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
121
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
122 weightedPrediction.vec <- function(values, w) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
123 return(sum(values * w/sum(w)))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
124 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
125
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
126 ##### meta-decision tree
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
127
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
128 tuneTree <- function(data, target) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
129 data <- data.frame(data, target=target)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
130 size <- nrow(data)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
131 xerror <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
132 split <- 1:ceiling(size/5)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
133 leafSize <- 1:ceiling(size/10)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
134 xerror <- matrix(rep(-1, length(split)*length(leafSize)), ncol=length(leafSize))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
135 cp <- matrix(rep(-1, length(split)*length(leafSize)), ncol=length(leafSize))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
136 for(i in 1:length(split)) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
137 for(j in 1:length(leafSize)) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
138 op <- list(minsplit=split[i], minbucket=leafSize[j])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
139 tree <- rpart(target ~., data=data, control=op, method="anova")
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
140 xerror[i,j] <- tree$cptable[which.min(tree$cptable[,"xerror"]),"xerror"]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
141 cp[i,j] <- tree$cptable[which.min(tree$cptable[,"xerror"]),"CP"]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
142 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
143 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
144 index <- which(xerror==min(xerror), arr.ind = T)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
145 op <- list(minsplit=split[index[1]], minbucket=leafSize[index[2]], cp=cp[index[1], index[2]])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
146 return(op)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
147 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
148
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
149 ###### meta-LASSO
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
150 # create fold by picking at random row indexes
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
151 createFolds <- function(nbObs, n) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
152 # pick indexes
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
153 index <- sample(1:n, size=nbObs, replace = T)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
154 # populate folds
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
155 folds <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
156 for(i in 1:n) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
157 folds <- c(folds, list(which(index==i)))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
158 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
159 return(folds)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
160 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
161
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
162 searchParamLASSO <- function(genotype, phenotype, alpha=seq(0,1,0.1), n=7) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
163 folds <- createFolds(nrow(genotype), n = n)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
164 acc <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
165 indexAlpha <- 1
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
166 for(a in alpha) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
167 curAcc <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
168 for(i in 1:n) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
169 train <- genotype[-folds[[i]],]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
170 test <- genotype[folds[[i]],]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
171 phenoTrain <- phenotype[-folds[[i]]]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
172 phenoTest <- phenotype[folds[[i]]]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
173 cv <- cv.glmnet(x=as.matrix(train), y=phenoTrain, alpha=a)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
174 model <- glmnet(x=as.matrix(train), y=phenoTrain, alpha=a, lambda = cv$lambda.1se)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
175 pred <- predict(model, test, type = "response")
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
176 curAcc <- c(curAcc, r2(phenoTest, pred))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
177 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
178 acc <- c(acc, mean(curAcc))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
179 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
180 names(acc) <- alpha
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
181 return(as.numeric(names(acc)[which.max(acc)]))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
182 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
183
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
184 ###### meta-random forest
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
185
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
186 searchParamRF <- function(genotype, phenotype, rangeNtree, mtry=ncol(genotype)) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
187 n <- ceiling(nrow(genotype)/3)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
188 indexTest <- sample(1:nrow(genotype), size=n)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
189 train <- genotype[-indexTest,]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
190 test <- genotype[indexTest,]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
191 phenoTrain <- phenotype[-indexTest]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
192 phenoTest <- phenotype[indexTest]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
193 acc <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
194 indexNtree <- 1
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
195 for(ntree in rangeNtree) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
196 model <- randomForest(x=train, y=phenoTrain, ntree = ntree, mtry = mtry)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
197 pred <- predict(model, test)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
198 acc <- c(acc, r2(phenoTest, pred))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
199 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
200 names(acc) <- rangeNtree
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
201 best <- which.max(acc)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
202 return(as.numeric(names(acc)[best]))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
203 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
204
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
205 ###### meta-SVM
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
206 searchParamSVM <- function(train, target, kernel="radial") {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
207 # tuning parameters then train
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
208 model <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
209 switch(kernel,
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
210 sigmoid={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
211 tune <- tune.svm(train, target, gamma = 10^(-6:-1), cost = 10^(0:2), kernel="sigmoid");
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
212 g <- tune$best.parameters[[1]];
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
213 c <- tune$best.parameters[[2]];
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
214 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")},
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
215 linear={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
216 tune <- tune.svm(train, target, cost = 10^(0:2), kernel="linear");
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
217 c <- tune$best.parameters[[1]];
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
218 model <- svm(x=train, y=target, cost = c, kernel = "linear")},
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
219 polynomial={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
220 tune <- tune.svm(train, target, gamma = 10^(-6:-1), cost = 10^(0:2), degree = 0:4, coef0 = 0:3, kernel="polynomial");
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
221 d <- tune$best.parameters[[1]];
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
222 g <- tune$best.parameters[[2]];
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
223 coef <- tune$best.parameters[[3]];
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
224 c <- tune$best.parameters[[4]];
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
225 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)},
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
226 {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
227 tune <- tune.svm(train, target, gamma = 10^(-6:-1), cost = 10^(0:3), kernel="radial");
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
228 g <- tune$best.parameters[[1]];
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
229 c <- tune$best.parameters[[2]];
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
230 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")}
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
231 )
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
232 return(model)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
233 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
234
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
235 #################### upper level functions #####################
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
236
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
237 aggregateDT <- function(train, test, target, folds) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
238 r2Aggreg <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
239 for (i in 1:length(folds)) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
240 trainIndex <- unlist(folds[-i])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
241 testIndex <- folds[[i]]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
242 treeParam <- tuneTree(train[[i]], target[trainIndex])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
243 data <- data.frame(train[[i]], target=target[trainIndex])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
244 model <- rpart(target ~., data=data, method = "anova", control = treeParam)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
245 model <- prune(model, cp=treeParam["cp"])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
246 pred <- predict(model, data.frame(test[[i]]))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
247 r2Aggreg <- c(r2Aggreg, r2(target[testIndex], pred))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
248 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
249 return(r2Aggreg)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
250 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
251
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
252 aggregateGeneticMean <- function(train, test, target, folds) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
253 r2Aggreg <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
254 for (i in 1:length(folds)) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
255 trainIndex <- unlist(folds[-i])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
256 testIndex <- folds[[i]]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
257 opt <- optimizeWeight(values = train[[i]], trueValue = target[trainIndex])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
258 pred <- weightedPrediction(test[[i]], opt@solution)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
259 r2Aggreg <- c(r2Aggreg, r2(target[testIndex], pred))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
260 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
261 return(r2Aggreg)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
262 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
263
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
264 aggregateLASSO <- function(train, test, target, folds) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
265 r2Aggreg <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
266 for (i in 1:length(folds)) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
267 trainIndex <- unlist(folds[-i])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
268 testIndex <- folds[[i]]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
269 alpha <- searchParamLASSO(train[[i]], target[trainIndex])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
270 cv <- cv.glmnet(x=as.matrix(train[[i]]), y=target[trainIndex], alpha=alpha)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
271 model <- glmnet(x=as.matrix(train[[i]]), y=target[trainIndex], alpha=alpha, lambda = cv$lambda.1se)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
272 pred <- predict(model, test[[i]])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
273 r2Aggreg <- c(r2Aggreg, r2(target[testIndex], pred))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
274 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
275 return(r2Aggreg)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
276 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
277
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
278 aggregateRF <- function(train, test, target, folds) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
279 r2Aggreg <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
280 for (i in 1:length(folds)) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
281 trainIndex <- unlist(folds[-i])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
282 testIndex <- folds[[i]]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
283 ntree <- searchParamRF(genotype = train[[i]], phenotype = target[trainIndex],
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
284 rangeNtree = seq(100, 1000, 100))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
285 model <- randomForest(x=as.matrix(train[[i]]), y=target[trainIndex],
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
286 ntree = ntree, mtry = ncol(train[[i]]))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
287 pred <- predict(model, test[[i]])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
288 r2Aggreg <- c(r2Aggreg, r2(target[testIndex], pred))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
289 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
290 return(r2Aggreg)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
291 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
292
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
293 aggregateSVM <- function(train, test, target, folds, kernel="linear") {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
294 r2Aggreg <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
295 for (i in 1:length(folds)) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
296 trainIndex <- unlist(folds[-i])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
297 testIndex <- folds[[i]]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
298 model <- searchParamSVM(train = train[[i]], target = target[trainIndex], kernel = kernel)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
299 pred <- predict(model, test[[i]])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
300 r2Aggreg <- c(r2Aggreg, r2(target[testIndex], pred))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
301 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
302 return(r2Aggreg)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
303 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
304
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
305 ################################### main #############################
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
306 # # load argument
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
307 cmd <- commandArgs(T)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
308 source(cmd[1])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
309 # load folds
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
310 con = file(folds)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
311 folds <- readLines(con = con, n = 1, ok=T)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
312 close(con)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
313 folds <- readRDS(folds)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
314 # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
315 phenotype <- read.table(phenotype, sep="\t", h=T)[,1]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
316 # load genotype
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
317 con = file(genotype)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
318 genotype <- readLines(con = con, n = 1, ok=T)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
319 close(con)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
320 genotype <- read.table(genotype, sep="\t", h=T)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
321 # find which classifiers will be used for aggregation
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
322 classifNames <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
323 if(lassoModel !="None"){
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
324 classifNames <- c(classifNames, "lasso")
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
325 con = file(lassoModel)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
326 lassoModel <- readLines(con = con, n = 1, ok=T)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
327 close(con)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
328 lassoModel <- readRDS(lassoModel)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
329 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
330 if(rrBLUPModel !="None"){
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
331 classifNames <- c(classifNames, "rrBLUP")
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
332 con = file(rrBLUPModel)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
333 rrBLUPModel <- readLines(con = con, n = 1, ok=T)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
334 close(con)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
335 rrBLUPModel <- readRDS(rrBLUPModel)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
336 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
337 if(rfModel !="None"){
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
338 classifNames <- c(classifNames, "rf")
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
339 con = file(rfModel)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
340 rfModel <- readLines(con = con, n = 1, ok=T)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
341 close(con)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
342 rfModel <- readRDS(rfModel)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
343 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
344 if(svmModel !="None"){
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
345 classifNames <- c(classifNames, "svm")
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
346 con = file(svmModel)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
347 svmModel <- readLines(con = con, n = 1, ok=T)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
348 close(con)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
349 svmModel <- readRDS(svmModel)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
350 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
351
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
352 # compute prediction of the training set and test set for each fold and each classifiers
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
353 # train predictions and test prediction are stored in separate lists
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
354 # where each element of the list represent a folds
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
355 predictionTrain.list <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
356 predictionTest.list <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
357 r2Classif.list <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
358 for(i in 1:length(folds)) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
359 # for the current fold, create training set and test set
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
360 trainIndex <- unlist(folds[-i])
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
361 testIndex <- folds[[i]]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
362 train <- genotype[trainIndex,]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
363 phenoTrain <- phenotype[trainIndex]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
364 test <- genotype[testIndex,]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
365 phenoTest <- phenotype[testIndex]
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
366 # only to intialize data frame containing predictions
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
367 predictionTrain <- matrix(rep(-1, length(classifNames)*length(trainIndex)),
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
368 ncol=length(classifNames))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
369 colnames(predictionTrain) <- classifNames
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
370 predictionTest <- matrix(rep(-1, length(classifNames)*length(testIndex)),
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
371 ncol=length(classifNames))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
372 colnames(predictionTest) <- classifNames
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
373 r2Classif <- NULL
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
374 # for each classifiers, compute prediction on both sets
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
375 # and evaluate r2 to find the best classifier
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
376 for(j in 1:length(classifNames)) {
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
377 switch(classifNames[j],
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
378 # random forest
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
379 rf={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
380 # predict train and test
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
381 param <- extractParameter(rfModel, "rf")
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
382 model <- randomForest(x=train, y=phenoTrain, mtry = param$mtry,
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
383 ntree = param$ntree);
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
384 predictionTrain[,j] <- prediction(train, model, classifier = "rf");
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
385 predictionTest[,j] <- prediction(test, model, classifier = "rf");
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
386 r2Classif <- c(r2Classif, rf=r2(phenoTest, predictionTest[,"rf"]))},
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
387 # svm
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
388 svm={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
389 # predict train and test
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
390 param <- extractParameter(svmModel, "svm");
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
391 model <- svm(train, phenoTrain, kernel = param$kernel, cost = param$c,
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
392 gamma=param$g, degree = param$d, coef0 = param$coef, scale = F)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
393 predictionTrain[,j] <- prediction(train, model, classifier = "svm");
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
394 predictionTest[,j] <- prediction(test, model, classifier = "svm");
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
395 r2Classif <- c(r2Classif, svm=r2(phenoTest, predictionTest[,"svm"]))},
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
396 # lasso
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
397 lasso={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
398 # predict train and test
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
399 param <- extractParameter(lassoModel, "lasso");
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
400 model <- glmnet(x=as.matrix(train), y=phenoTrain, lambda = param$lambda);
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
401 predictionTrain[,j] <- prediction(train, model, classifier = "lasso");
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
402 predictionTest[,j] <- prediction(test, model, classifier = "lasso");
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
403 r2Classif <- c(r2Classif, lasso=r2(phenoTest, predictionTest[,"lasso"]))},
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
404 # rrBLUP
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
405 rrBLUP={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
406 # predict train and test
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
407 model <- mixed.solve(phenoTrain, Z=train,K=NULL, SE=F,return.Hinv = F);
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
408 predictionTrain[,j] <- prediction(train, model, classifier = "rrBLUP");
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
409 predictionTest[,j] <- prediction(test, model, classifier = "rrBLUP");
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
410 r2Classif <- c(r2Classif, rrBLUP=r2(phenoTest, predictionTest[,"rrBLUP"]))},
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
411 {print(paste("unknown classifier, please choose among rf, svm, lasso, rrBLUP"));
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
412 stop()}
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
413 )
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
414 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
415 predictionTrain.list <- c(predictionTrain.list, list(predictionTrain))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
416 predictionTest.list <- c(predictionTest.list, list(predictionTest))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
417 r2Classif.list <- c(r2Classif.list, list(r2Classif))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
418 }
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
419 # aggregate !
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
420 switch(method,
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
421 geneticMean={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
422 aggreg <- aggregateGeneticMean(train=predictionTrain.list, test=predictionTest.list,
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
423 target = phenotype, folds=folds)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
424 },
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
425 dt={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
426 aggreg <- aggregateDT(train=predictionTrain.list, test=predictionTest.list,
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
427 target = phenotype, folds=folds)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
428 },
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
429 lasso={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
430 aggreg <- aggregateLASSO(train=predictionTrain.list, test=predictionTest.list,
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
431 target = phenotype, folds=folds)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
432 },
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
433 rf={
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
434 aggreg <- aggregateRF(train=predictionTrain.list, test=predictionTest.list,
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
435 target = phenotype, folds=folds)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
436 },
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
437 # svm, by default
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
438 {aggreg <- aggregateSVM(train=predictionTrain.list, test=predictionTest.list,
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
439 target = phenotype, folds=folds, kernel=kernel)}
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
440 )
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
441 # determine best classifier
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
442 # first, transform list into a matrix
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
443 r2Classif.list <- t(data.frame(r2Classif.list))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
444 # then, compute the mean r2 for each classifier
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
445 meanR2Classif <- apply(r2Classif.list, 2, mean)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
446 # choose the best one
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
447 bestClassif <- which.max(meanR2Classif)
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
448 # compare aggregation and best classifiers
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
449 finalRes <- cbind(bestClassif=r2Classif.list[,bestClassif], aggreg=aggreg,
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
450 diff=(aggreg-r2Classif.list[,bestClassif]))
f90c741e2a32 Uploaded
nicolas
parents:
diff changeset
451 print(apply(finalRes, 2, mean))