89
|
1 ########################################################
|
|
2 #
|
|
3 # creation date : 07/01/16
|
|
4 # last modification : 03/07/16
|
|
5 # author : Dr Nicolas Beaume
|
|
6 # owner : IRRI
|
|
7 #
|
|
8 ########################################################
|
|
9 library("e1071")
|
|
10 options(warn=-1)
|
|
11 ############################ helper functions #######################
|
|
12 # optimize svm parameters
|
|
13 optimizeSVM <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) {
|
|
14 # tuning parameters then train
|
|
15 model <- NULL
|
|
16 if(is.null(g)){g <- 10^(-6:0)}
|
|
17 if(is.null(c)){c <- 10^(-1:0)}
|
|
18 # optimize parameter for the kernel in use
|
|
19 switch(kernel,
|
|
20 # sigmoid kernel : need gamma, cost and coef
|
|
21 sigmoid={
|
|
22 if(is.null(coef)){coef <- 0:3};
|
|
23 # optimize then extract best parameters
|
|
24 tune <- tune.svm(train, target, gamma = g, cost = 10^(0:2), kernel="sigmoid", coef0 = coef);
|
|
25 g <- tune$best.parameters[[1]];
|
|
26 c <- tune$best.parameters[[3]];
|
|
27 coef <- tune$best.parameters[[2]];
|
|
28 # compute model
|
|
29 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")},
|
|
30 # linear kernel, only cost is required
|
|
31 linear={
|
|
32 # optimize then extract best parameters
|
|
33 tune <- tune.svm(train, target, cost = c, kernel="linear");
|
|
34 c <- tune$best.parameters[[1]];
|
|
35 # compute model
|
|
36 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")},
|
|
37 # polynomial kernel, use degree, gamma, cost and coef as parameters
|
|
38 polynomial={
|
|
39 # optimize and extract best parameters
|
|
40 if(is.null(coef)){coef <- 0:3};
|
|
41 if(is.null(d)){d <- 0:4};
|
|
42 tune <- tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial");
|
|
43 d <- tune$best.parameters[[1]];
|
|
44 g <- tune$best.parameters[[2]];
|
|
45 coef <- tune$best.parameters[[3]];
|
|
46 c <- tune$best.parameters[[4]];
|
|
47 # compute model
|
|
48 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)},
|
|
49 # default : radial kernel, use gamma and cost as parameters
|
|
50 {
|
|
51 # optimize and extract parameters
|
|
52 tune <- tune.svm(train, target, gamma = g, cost = c, kernel="radial");
|
|
53 g <- tune$best.parameters[[1]];
|
|
54 c <- tune$best.parameters[[2]];
|
|
55 # compute model
|
|
56 model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")}
|
|
57 )
|
|
58 return(model)
|
|
59 }
|
|
60 ################################## main function ###########################
|
|
61 svmClassifier <- function(genotype, phenotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) {
|
|
62 # optimize classifier if any parameter is NULL
|
|
63 switch(kernel,
|
|
64 # sigmoid kernel : need gamma, cost and coef
|
|
65 sigmoid={
|
|
66 if(any(is.null(c(coef, c, g)))){
|
|
67 fit <- optimizeSVM(genotype, phenotype, kernel = "sigmoid",
|
|
68 g = g, c=c, coef = coef);
|
|
69 c <- fit$cost;
|
|
70 g <- fit$gamma;
|
|
71 coef <- fit$coef0;
|
|
72 }
|
|
73 },
|
|
74 # linear kernel, only cost is required
|
|
75 linear={
|
|
76 if(is.null(c)){fit <- optimizeSVM(genotype, phenotype, kernel = "linear", c=c);
|
|
77 c <- fit$cost;
|
|
78 }
|
|
79 },
|
|
80 # polynomial kernel, use degree, gamma, cost and coef as parameters
|
|
81 polynomial={
|
|
82 if(any(is.null(c(coef, c, g, d)))){fit <- optimizeSVM(genotype, phenotype, kernel = "polynomial",
|
|
83 g = g, c=c, coef = coef, d = d);
|
|
84 c <- fit$cost;
|
|
85 g <- fit$gamma;
|
|
86 coef <- fit$coef0;
|
|
87 d <- fit$degree
|
|
88 }
|
|
89 },
|
|
90 # default : radial kernel, use gamma and cost as parameters
|
|
91 {if(any(is.null(c(c, g)))){fit <- optimizeSVM(genotype, phenotype, kernel = "radial",
|
|
92 g = g, c=c);
|
|
93 c <- fit$cost;
|
|
94 g <- fit$gamma;
|
|
95 }
|
|
96 }
|
|
97 )
|
|
98 # do evaluation
|
|
99 if(evaluation) {
|
|
100 prediction <- NULL
|
|
101 # iterate over folds
|
|
102 for(i in 1:length(folds)) {
|
|
103 # prepare indexes for training and test
|
|
104 test <- folds[[i]]
|
|
105 train <- unlist(folds[-i])
|
|
106 # compute model
|
|
107 svm.fit <- optimizeSVM(train = genotype[train,], target = phenotype[train], kernel=kernel,
|
|
108 g=g, c=c, coef=coef, d=d)
|
|
109 # predict on test set of the current fold
|
|
110 prediction <- c(prediction, list(predict(svm.fit, genotype[test,])))
|
|
111 }
|
|
112 # save all prediction for further evaluation
|
|
113 saveRDS(prediction, file=paste(outFile, ".rds", sep = ""))
|
|
114 } else {
|
|
115 # compute model and save it
|
|
116 switch(kernel,
|
|
117 # sigmoid kernel : need gamma, cost and coef
|
|
118 sigmoid={
|
|
119 model <- svm(x = genotype, y = phenotype, kernel="sigmoid", gamma =g,
|
|
120 cost =c, coef0=coef)
|
|
121 },
|
|
122 # linear kernel, only cost is required
|
|
123 linear={
|
|
124 model <- svm(x = genotype, y = phenotype, kernel="linear", cost =c)
|
|
125 },
|
|
126 # polynomial kernel, use degree, gamma, cost and coef as parameters
|
|
127 polynomial={
|
|
128 model <- svm(x = genotype, y = phenotype, kernel="polynomial", gamma =g, cost =c,
|
|
129 coef0=coef, degree =d)
|
|
130 },
|
|
131 # default : radial kernel, use gamma and cost as parameters
|
|
132 { model <- svm(x = genotype, y = phenotype, kernel="radial", gamma =g, cost =c)
|
|
133 })
|
|
134 saveRDS(model, file=paste(outFile, ".rds", sep = ""))
|
|
135 }
|
|
136 }
|
|
137
|
|
138 ############################ main #############################
|
|
139 # load argument
|
|
140 cmd <- commandArgs(T)
|
|
141 source(cmd[1])
|
|
142 # check for svm paramater, especially to know if optimization is requiered
|
|
143 if(as.numeric(g) == -1) {g <- NULL}
|
|
144 if(as.numeric(c) == -1) {c <- NULL}
|
|
145 if(as.numeric(coef) == -1) {coef <- NULL}
|
|
146 if(as.numeric(d) == -1) {d <- NULL}
|
|
147 # check if evaluation is required
|
|
148 evaluation <- F
|
|
149 if(as.integer(doEvaluation) == 1) {
|
|
150 evaluation <- T
|
|
151 con = file(folds)
|
|
152 folds <- readLines(con = con, n = 1, ok=T)
|
|
153 close(con)
|
|
154 folds <- readRDS(folds)
|
|
155 }
|
|
156 # load genotype and phenotype
|
|
157 con = file(genotype)
|
|
158 genotype <- readLines(con = con, n = 1, ok=T)
|
|
159 close(con)
|
|
160 genotype <- read.table(genotype, sep="\t", h=T)
|
|
161 # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve
|
|
162 phenotype <- read.table(phenotype, sep="\t", h=T)[,1]
|
|
163 # run !
|
|
164 svmClassifier(genotype = genotype, phenotype = phenotype,
|
|
165 evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel)
|
|
166 # retunr path of the result file to galaxy
|
|
167 cat(paste(paste(out, ".rds", sep = ""), "\n", sep=""))
|