annotate svm.R @ 22:f0d89ff35ad2 draft

Uploaded
author nicolas
date Fri, 21 Oct 2016 10:35:13 -0400
parents f9d2d5058395
children 8cdeaa91ebc3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
16
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
1 ########################################################
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
2 #
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
3 # creation date : 07/01/16
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
4 # last modification : 03/07/16
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
5 # author : Dr Nicolas Beaume
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
6 # owner : IRRI
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
7 #
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
8 ########################################################
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
9 log <- file(paste(getwd(), "log_SVM.txt", sep="/"), open = "wt")
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
10 sink(file = log, type="message")
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
11 library("e1071")
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
12 ############################ helper functions #######################
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
13 svmModel <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) {
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
14 # tuning parameters then train
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
15 model <- NULL
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
16 if(is.null(g)){g <- 10^(-6:0)}
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
17 if(is.null(c)){c <- 10^(0:2)}
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
18 switch(kernel,
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
19 sigmoid={
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
20 tune <- tune.svm(train, target, gamma = , cost = 10^(0:2), kernel="sigmoid");
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
21 g <- tune$best.parameters[[1]];
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
22 c <- tune$best.parameters[[2]];
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
23 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")},
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
24 linear={
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
25 tune <- tune.svm(train, target, cost = c, kernel="linear");
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
26 c <- tune$best.parameters[[2]];
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
27 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")},
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
28 polynomial={
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
29 if(is.null(coef)){coef <- 0:3};
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
30 if(is.null(d)){d <- 0:4};
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
31 tune <- tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial");
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
32 d <- tune$best.parameters[[1]];
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
33 g <- tune$best.parameters[[2]];
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
34 coef <- tune$best.parameters[[3]];
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
35 c <- tune$best.parameters[[4]];
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
36 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)},
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
37 {
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
38 tune <- tune.svm(train, target, gamma = g, cost = c, kernel="radial");
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
39 g <- tune$best.parameters[[1]];
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
40 c <- tune$best.parameters[[2]];
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
41 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")}
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
42 )
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
43 return(model)
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
44 }
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
45 ################################## main function ###########################
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
46 svmSelection <- function(genotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) {
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
47 # build model
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
48 labelIndex <- match("label", colnames(genotype))
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
49 if(evaluation) {
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
50 prediction <- NULL
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
51 for(i in 1:length(folds)) {
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
52 test <- folds[[i]]
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
53 train <- unlist(folds[-i])
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
54 svm.fit <- svmModel(train = genotype[train,-labelIndex], target = genotype[train,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d)
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
55 prediction <- c(prediction, list(predict(svm.fit, genotype[test,-labelIndex])))
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
56 }
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
57 saveRDS(prediction, file=paste(outFile, ".rds", sep = ""))
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
58 } else {
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
59 model <- svmModel(train = genotype[,-labelIndex], target = genotype[,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d)
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
60 saveRDS(model, file=paste(outFile, ".rds", sep = ""))
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
61 }
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
62 }
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
63
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
64 ############################ main #############################
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
65
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
66 cmd <- commandArgs(T)
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
67 source(cmd[1])
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
68 if(as.numeric(g) == -1) {g <- NULL}
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
69 if(as.numeric(c) == -1) {c <- NULL}
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
70 if(as.numeric(coef) == -1) {coef <- NULL}
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
71 if(as.numeric(d) == -1) {d <- NULL}
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
72 # check if evaluation is required
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
73 evaluation <- F
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
74 if(as.integer(doEvaluation) == 1) {
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
75 evaluation <- T
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
76 con = file(folds)
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
77 folds <- readLines(con = con, n = 1, ok=T)
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
78 close(con)
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
79 folds <- readRDS(folds)
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
80 }
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
81 # load genotype and phenotype
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
82 con = file(genotype)
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
83 genotype <- readLines(con = con, n = 1, ok=T)
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
84 close(con)
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
85 genotype <- read.table(genotype, sep="\t", h=T)
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
86 # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
87 phenotype <- read.table(phenotype, sep="\t", h=T)[,1]
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
88 # run !
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
89 svmSelection(genotype = data.frame(genotype, label=phenotype, check.names = F, stringsAsFactors = F),
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
90 evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel)
f9d2d5058395 Uploaded
nicolas
parents:
diff changeset
91 cat(paste(paste(out, ".rds", sep = ""), "\n", sep=""))