changeset 16:f9d2d5058395 draft

Uploaded
author nicolas
date Fri, 21 Oct 2016 06:30:02 -0400
parents 9178c17023aa
children 58b58d95b0e0
files svm.R
diffstat 1 files changed, 91 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/svm.R	Fri Oct 21 06:30:02 2016 -0400
@@ -0,0 +1,91 @@
+########################################################
+#
+# creation date : 07/01/16
+# last modification : 03/07/16
+# author : Dr Nicolas Beaume
+# owner : IRRI
+#
+########################################################
+log <- file(paste(getwd(), "log_SVM.txt", sep="/"), open = "wt")
+sink(file = log, type="message")
+library("e1071")
+############################ helper functions #######################
+svmModel <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) {
+  # tuning parameters then train
+  model <- NULL
+  if(is.null(g)){g <- 10^(-6:0)}
+  if(is.null(c)){c <- 10^(0:2)}
+  switch(kernel,
+         sigmoid={
+           tune <-  tune.svm(train, target, gamma = , cost = 10^(0:2), kernel="sigmoid");
+           g <- tune$best.parameters[[1]];
+           c <- tune$best.parameters[[2]];
+           model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")},
+         linear={
+           tune <-  tune.svm(train, target, cost = c, kernel="linear");
+           c <- tune$best.parameters[[2]];
+           model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")},
+         polynomial={
+           if(is.null(coef)){coef <- 0:3};
+           if(is.null(d)){d <- 0:4};
+           tune <-  tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial");
+           d <- tune$best.parameters[[1]];
+           g <- tune$best.parameters[[2]];
+           coef <- tune$best.parameters[[3]];
+           c <- tune$best.parameters[[4]];
+           model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)},
+         {
+           tune <-  tune.svm(train, target, gamma = g, cost = c, kernel="radial");
+           g <- tune$best.parameters[[1]];
+           c <- tune$best.parameters[[2]];
+           model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")}
+  )
+  return(model)
+}
+################################## main function ###########################
+svmSelection <- function(genotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) {
+  # build model
+  labelIndex <- match("label", colnames(genotype))
+  if(evaluation) {
+    prediction <- NULL
+    for(i in 1:length(folds)) {
+      test <- folds[[i]]
+      train <- unlist(folds[-i])
+      svm.fit <- svmModel(train = genotype[train,-labelIndex], target = genotype[train,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d)
+      prediction <- c(prediction, list(predict(svm.fit, genotype[test,-labelIndex])))
+    }
+    saveRDS(prediction, file=paste(outFile, ".rds", sep = ""))
+  } else {
+    model <- svmModel(train = genotype[,-labelIndex], target = genotype[,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d)
+    saveRDS(model, file=paste(outFile, ".rds", sep = ""))
+  }
+}
+
+############################ main #############################
+
+cmd <- commandArgs(T)
+source(cmd[1])
+if(as.numeric(g) == -1) {g <- NULL}
+if(as.numeric(c) == -1) {c <- NULL}
+if(as.numeric(coef) == -1) {coef <- NULL}
+if(as.numeric(d) == -1) {d <- NULL}
+# check if evaluation is required
+evaluation <- F
+if(as.integer(doEvaluation) == 1) {
+  evaluation <- T
+  con = file(folds)
+  folds <- readLines(con = con, n = 1, ok=T)
+  close(con)
+  folds <- readRDS(folds)
+}
+# load genotype and phenotype
+con = file(genotype)
+genotype <- readLines(con = con, n = 1, ok=T)
+close(con)
+genotype <- read.table(genotype, sep="\t", h=T)
+# phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve
+phenotype <- read.table(phenotype, sep="\t", h=T)[,1] 
+# run !
+svmSelection(genotype = data.frame(genotype, label=phenotype, check.names = F, stringsAsFactors = F),
+             evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel)
+cat(paste(paste(out, ".rds", sep = ""), "\n", sep=""))