view svm.R @ 103:e7115e44d8d8 draft default tip

Uploaded
author nicolas
date Mon, 31 Oct 2016 07:20:49 -0400
parents c2efdf0c23a1
children
line wrap: on
line source

########################################################
#
# creation date : 07/01/16
# last modification : 03/07/16
# author : Dr Nicolas Beaume
# owner : IRRI
#
########################################################
library("e1071")
options(warn=-1)
############################ helper functions #######################
# optimize svm parameters
optimizeSVM <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) {
  # tuning parameters then train
  model <- NULL
  if(is.null(g)){g <- 10^(-6:0)}
  if(is.null(c)){c <- 10^(-1:0)}
  # optimize parameter for the kernel in use
  switch(kernel,
         # sigmoid kernel : need gamma, cost and coef
         sigmoid={
           if(is.null(coef)){coef <- 0:3};
           # optimize then extract best parameters
           tune <-  tune.svm(train, target, gamma = g, cost = 10^(0:2), kernel="sigmoid", coef0 = coef);
           g <- tune$best.parameters[[1]];
           c <- tune$best.parameters[[3]];
           coef <- tune$best.parameters[[2]];
           # compute model
           model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")},
         # linear kernel, only cost is required
         linear={
           # optimize then extract best parameters
           tune <-  tune.svm(train, target, cost = c, kernel="linear");
           c <- tune$best.parameters[[1]];
           # compute model
           model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")},
         # polynomial kernel, use degree, gamma, cost and coef as parameters
         polynomial={
           # optimize and extract best parameters
           if(is.null(coef)){coef <- 0:3};
           if(is.null(d)){d <- 0:4};
           tune <-  tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial");
           d <- tune$best.parameters[[1]];
           g <- tune$best.parameters[[2]];
           coef <- tune$best.parameters[[3]];
           c <- tune$best.parameters[[4]];
           # compute model
           model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)},
         # default : radial kernel, use gamma and cost as parameters
         {
           # optimize and extract parameters
           tune <-  tune.svm(train, target, gamma = g, cost = c, kernel="radial");
           g <- tune$best.parameters[[1]];
           c <- tune$best.parameters[[2]];
           # compute model
           model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")}
  )
  return(model)
}
################################## main function ###########################
svmClassifier <- function(genotype, phenotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) {
  # optimize classifier if any parameter is NULL
  switch(kernel,
         # sigmoid kernel : need gamma, cost and coef
         sigmoid={
           if(any(is.null(c(coef, c, g)))){
             fit <- optimizeSVM(genotype, phenotype, kernel = "sigmoid",
                                                              g = g, c=c, coef = coef);
              c <- fit$cost;
              g <- fit$gamma;
              coef <- fit$coef0;
            }
           },
         # linear kernel, only cost is required
         linear={
           if(is.null(c)){fit <- optimizeSVM(genotype, phenotype, kernel = "linear", c=c);
            c <- fit$cost;
           }
         },
         # polynomial kernel, use degree, gamma, cost and coef as parameters
         polynomial={
           if(any(is.null(c(coef, c, g, d)))){fit <- optimizeSVM(genotype, phenotype, kernel = "polynomial",
                                                              g = g, c=c, coef = coef, d = d);
              c <- fit$cost;
              g <- fit$gamma;
              coef <- fit$coef0;
              d <- fit$degree
           }
         },
         # default : radial kernel, use gamma and cost as parameters
         {if(any(is.null(c(c, g)))){fit <- optimizeSVM(genotype, phenotype, kernel = "radial",
                                                             g = g, c=c);
            c <- fit$cost;
            g <- fit$gamma;
          }
         }
  )
  # do evaluation
  if(evaluation) {
    prediction <- NULL
    # iterate over folds
    for(i in 1:length(folds)) {
      # prepare indexes for training and test
      test <- folds[[i]]
      train <- unlist(folds[-i])
      # compute model
      svm.fit <- optimizeSVM(train = genotype[train,], target = phenotype[train], kernel=kernel,
                          g=g, c=c, coef=coef, d=d)
      # predict on test set of the current fold
      prediction <- c(prediction, list(predict(svm.fit, genotype[test,])))
    }
    # save all prediction for further evaluation
    saveRDS(prediction, file=paste(outFile, ".rds", sep = ""))
  } else {
    # compute model and save it
    switch(kernel,
           # sigmoid kernel : need gamma, cost and coef
           sigmoid={
             model <- svm(x = genotype, y = phenotype, kernel="sigmoid", gamma =g,
                          cost =c, coef0=coef)
           },
           # linear kernel, only cost is required
           linear={
             model <- svm(x = genotype, y = phenotype, kernel="linear", cost =c)
           },
           # polynomial kernel, use degree, gamma, cost and coef as parameters
           polynomial={
             model <- svm(x = genotype, y = phenotype, kernel="polynomial", gamma =g, cost =c,
                          coef0=coef, degree =d)
           },
           # default : radial kernel, use gamma and cost as parameters
           { model <- svm(x = genotype, y = phenotype, kernel="radial", gamma =g, cost =c)
           })
    saveRDS(model, file=paste(outFile, ".rds", sep = ""))
  }
}

############################ main #############################
# load argument
cmd <- commandArgs(T)
source(cmd[1])
# check for svm paramater, especially to know if optimization is requiered
if(as.numeric(g) == -1) {g <- NULL}
if(as.numeric(c) == -1) {c <- NULL}
if(as.numeric(coef) == -1) {coef <- NULL}
if(as.numeric(d) == -1) {d <- NULL}
# check if evaluation is required
evaluation <- F
if(as.integer(doEvaluation) == 1) {
  evaluation <- T
  con = file(folds)
  folds <- readLines(con = con, n = 1, ok=T)
  close(con)
  folds <- readRDS(folds)
}
# load genotype and phenotype
con = file(genotype)
genotype <- readLines(con = con, n = 1, ok=T)
close(con)
genotype <- read.table(genotype, sep="\t", h=T)
# phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve
phenotype <- read.table(phenotype, sep="\t", h=T)[,1] 
# run !
svmClassifier(genotype = genotype, phenotype = phenotype,
             evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel)
# retunr path of the result file to galaxy
cat(paste(paste(out, ".rds", sep = ""), "\n", sep=""))