view svm.R @ 37:6b5c0c7b4585 draft

Uploaded
author nicolas
date Tue, 25 Oct 2016 14:41:18 -0400
parents f9d2d5058395
children 8cdeaa91ebc3
line wrap: on
line source

########################################################
#
# creation date : 07/01/16
# last modification : 03/07/16
# author : Dr Nicolas Beaume
# owner : IRRI
#
########################################################
log <- file(paste(getwd(), "log_SVM.txt", sep="/"), open = "wt")
sink(file = log, type="message")
library("e1071")
############################ helper functions #######################
svmModel <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) {
  # tuning parameters then train
  model <- NULL
  if(is.null(g)){g <- 10^(-6:0)}
  if(is.null(c)){c <- 10^(0:2)}
  switch(kernel,
         sigmoid={
           tune <-  tune.svm(train, target, gamma = , cost = 10^(0:2), kernel="sigmoid");
           g <- tune$best.parameters[[1]];
           c <- tune$best.parameters[[2]];
           model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")},
         linear={
           tune <-  tune.svm(train, target, cost = c, kernel="linear");
           c <- tune$best.parameters[[2]];
           model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")},
         polynomial={
           if(is.null(coef)){coef <- 0:3};
           if(is.null(d)){d <- 0:4};
           tune <-  tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial");
           d <- tune$best.parameters[[1]];
           g <- tune$best.parameters[[2]];
           coef <- tune$best.parameters[[3]];
           c <- tune$best.parameters[[4]];
           model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)},
         {
           tune <-  tune.svm(train, target, gamma = g, cost = c, kernel="radial");
           g <- tune$best.parameters[[1]];
           c <- tune$best.parameters[[2]];
           model <-  svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")}
  )
  return(model)
}
################################## main function ###########################
svmSelection <- function(genotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) {
  # build model
  labelIndex <- match("label", colnames(genotype))
  if(evaluation) {
    prediction <- NULL
    for(i in 1:length(folds)) {
      test <- folds[[i]]
      train <- unlist(folds[-i])
      svm.fit <- svmModel(train = genotype[train,-labelIndex], target = genotype[train,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d)
      prediction <- c(prediction, list(predict(svm.fit, genotype[test,-labelIndex])))
    }
    saveRDS(prediction, file=paste(outFile, ".rds", sep = ""))
  } else {
    model <- svmModel(train = genotype[,-labelIndex], target = genotype[,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d)
    saveRDS(model, file=paste(outFile, ".rds", sep = ""))
  }
}

############################ main #############################

cmd <- commandArgs(T)
source(cmd[1])
if(as.numeric(g) == -1) {g <- NULL}
if(as.numeric(c) == -1) {c <- NULL}
if(as.numeric(coef) == -1) {coef <- NULL}
if(as.numeric(d) == -1) {d <- NULL}
# check if evaluation is required
evaluation <- F
if(as.integer(doEvaluation) == 1) {
  evaluation <- T
  con = file(folds)
  folds <- readLines(con = con, n = 1, ok=T)
  close(con)
  folds <- readRDS(folds)
}
# load genotype and phenotype
con = file(genotype)
genotype <- readLines(con = con, n = 1, ok=T)
close(con)
genotype <- read.table(genotype, sep="\t", h=T)
# phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve
phenotype <- read.table(phenotype, sep="\t", h=T)[,1] 
# run !
svmSelection(genotype = data.frame(genotype, label=phenotype, check.names = F, stringsAsFactors = F),
             evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel)
cat(paste(paste(out, ".rds", sep = ""), "\n", sep=""))