annotate svm.R @ 103:e7115e44d8d8 draft default tip

Uploaded
author nicolas
date Mon, 31 Oct 2016 07:20:49 -0400
parents c2efdf0c23a1
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
89
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
1 ########################################################
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
2 #
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
3 # creation date : 07/01/16
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
4 # last modification : 03/07/16
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
5 # author : Dr Nicolas Beaume
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
6 # owner : IRRI
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
7 #
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
8 ########################################################
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
9 library("e1071")
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
10 options(warn=-1)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
11 ############################ helper functions #######################
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
12 # optimize svm parameters
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
13 optimizeSVM <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) {
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
14 # tuning parameters then train
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
15 model <- NULL
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
16 if(is.null(g)){g <- 10^(-6:0)}
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
17 if(is.null(c)){c <- 10^(-1:0)}
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
18 # optimize parameter for the kernel in use
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
19 switch(kernel,
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
20 # sigmoid kernel : need gamma, cost and coef
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
21 sigmoid={
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
22 if(is.null(coef)){coef <- 0:3};
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
23 # optimize then extract best parameters
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
24 tune <- tune.svm(train, target, gamma = g, cost = 10^(0:2), kernel="sigmoid", coef0 = coef);
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
25 g <- tune$best.parameters[[1]];
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
26 c <- tune$best.parameters[[3]];
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
27 coef <- tune$best.parameters[[2]];
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
28 # compute model
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
29 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")},
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
30 # linear kernel, only cost is required
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
31 linear={
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
32 # optimize then extract best parameters
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
33 tune <- tune.svm(train, target, cost = c, kernel="linear");
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
34 c <- tune$best.parameters[[1]];
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
35 # compute model
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
36 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")},
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
37 # polynomial kernel, use degree, gamma, cost and coef as parameters
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
38 polynomial={
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
39 # optimize and extract best parameters
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
40 if(is.null(coef)){coef <- 0:3};
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
41 if(is.null(d)){d <- 0:4};
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
42 tune <- tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial");
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
43 d <- tune$best.parameters[[1]];
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
44 g <- tune$best.parameters[[2]];
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
45 coef <- tune$best.parameters[[3]];
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
46 c <- tune$best.parameters[[4]];
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
47 # compute model
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
48 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)},
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
49 # default : radial kernel, use gamma and cost as parameters
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
50 {
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
51 # optimize and extract parameters
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
52 tune <- tune.svm(train, target, gamma = g, cost = c, kernel="radial");
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
53 g <- tune$best.parameters[[1]];
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
54 c <- tune$best.parameters[[2]];
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
55 # compute model
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
56 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")}
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
57 )
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
58 return(model)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
59 }
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
60 ################################## main function ###########################
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
61 svmClassifier <- function(genotype, phenotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) {
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
62 # optimize classifier if any parameter is NULL
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
63 switch(kernel,
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
64 # sigmoid kernel : need gamma, cost and coef
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
65 sigmoid={
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
66 if(any(is.null(c(coef, c, g)))){
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
67 fit <- optimizeSVM(genotype, phenotype, kernel = "sigmoid",
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
68 g = g, c=c, coef = coef);
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
69 c <- fit$cost;
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
70 g <- fit$gamma;
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
71 coef <- fit$coef0;
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
72 }
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
73 },
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
74 # linear kernel, only cost is required
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
75 linear={
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
76 if(is.null(c)){fit <- optimizeSVM(genotype, phenotype, kernel = "linear", c=c);
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
77 c <- fit$cost;
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
78 }
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
79 },
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
80 # polynomial kernel, use degree, gamma, cost and coef as parameters
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
81 polynomial={
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
82 if(any(is.null(c(coef, c, g, d)))){fit <- optimizeSVM(genotype, phenotype, kernel = "polynomial",
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
83 g = g, c=c, coef = coef, d = d);
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
84 c <- fit$cost;
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
85 g <- fit$gamma;
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
86 coef <- fit$coef0;
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
87 d <- fit$degree
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
88 }
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
89 },
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
90 # default : radial kernel, use gamma and cost as parameters
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
91 {if(any(is.null(c(c, g)))){fit <- optimizeSVM(genotype, phenotype, kernel = "radial",
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
92 g = g, c=c);
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
93 c <- fit$cost;
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
94 g <- fit$gamma;
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
95 }
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
96 }
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
97 )
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
98 # do evaluation
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
99 if(evaluation) {
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
100 prediction <- NULL
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
101 # iterate over folds
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
102 for(i in 1:length(folds)) {
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
103 # prepare indexes for training and test
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
104 test <- folds[[i]]
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
105 train <- unlist(folds[-i])
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
106 # compute model
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
107 svm.fit <- optimizeSVM(train = genotype[train,], target = phenotype[train], kernel=kernel,
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
108 g=g, c=c, coef=coef, d=d)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
109 # predict on test set of the current fold
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
110 prediction <- c(prediction, list(predict(svm.fit, genotype[test,])))
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
111 }
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
112 # save all prediction for further evaluation
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
113 saveRDS(prediction, file=paste(outFile, ".rds", sep = ""))
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
114 } else {
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
115 # compute model and save it
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
116 switch(kernel,
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
117 # sigmoid kernel : need gamma, cost and coef
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
118 sigmoid={
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
119 model <- svm(x = genotype, y = phenotype, kernel="sigmoid", gamma =g,
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
120 cost =c, coef0=coef)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
121 },
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
122 # linear kernel, only cost is required
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
123 linear={
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
124 model <- svm(x = genotype, y = phenotype, kernel="linear", cost =c)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
125 },
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
126 # polynomial kernel, use degree, gamma, cost and coef as parameters
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
127 polynomial={
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
128 model <- svm(x = genotype, y = phenotype, kernel="polynomial", gamma =g, cost =c,
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
129 coef0=coef, degree =d)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
130 },
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
131 # default : radial kernel, use gamma and cost as parameters
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
132 { model <- svm(x = genotype, y = phenotype, kernel="radial", gamma =g, cost =c)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
133 })
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
134 saveRDS(model, file=paste(outFile, ".rds", sep = ""))
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
135 }
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
136 }
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
137
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
138 ############################ main #############################
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
139 # load argument
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
140 cmd <- commandArgs(T)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
141 source(cmd[1])
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
142 # check for svm paramater, especially to know if optimization is requiered
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
143 if(as.numeric(g) == -1) {g <- NULL}
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
144 if(as.numeric(c) == -1) {c <- NULL}
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
145 if(as.numeric(coef) == -1) {coef <- NULL}
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
146 if(as.numeric(d) == -1) {d <- NULL}
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
147 # check if evaluation is required
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
148 evaluation <- F
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
149 if(as.integer(doEvaluation) == 1) {
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
150 evaluation <- T
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
151 con = file(folds)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
152 folds <- readLines(con = con, n = 1, ok=T)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
153 close(con)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
154 folds <- readRDS(folds)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
155 }
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
156 # load genotype and phenotype
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
157 con = file(genotype)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
158 genotype <- readLines(con = con, n = 1, ok=T)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
159 close(con)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
160 genotype <- read.table(genotype, sep="\t", h=T)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
161 # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
162 phenotype <- read.table(phenotype, sep="\t", h=T)[,1]
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
163 # run !
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
164 svmClassifier(genotype = genotype, phenotype = phenotype,
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
165 evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel)
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
166 # retunr path of the result file to galaxy
c2efdf0c23a1 Uploaded
nicolas
parents:
diff changeset
167 cat(paste(paste(out, ".rds", sep = ""), "\n", sep=""))