# HG changeset patch # User nicolas # Date 1477045802 14400 # Node ID f9d2d5058395d632c915f55eb410a669b390280b # Parent 9178c17023aad35cf51221c625862b31506d78ab Uploaded diff -r 9178c17023aa -r f9d2d5058395 svm.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/svm.R Fri Oct 21 06:30:02 2016 -0400 @@ -0,0 +1,91 @@ +######################################################## +# +# creation date : 07/01/16 +# last modification : 03/07/16 +# author : Dr Nicolas Beaume +# owner : IRRI +# +######################################################## +log <- file(paste(getwd(), "log_SVM.txt", sep="/"), open = "wt") +sink(file = log, type="message") +library("e1071") +############################ helper functions ####################### +svmModel <- function(train, target, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { + # tuning parameters then train + model <- NULL + if(is.null(g)){g <- 10^(-6:0)} + if(is.null(c)){c <- 10^(0:2)} + switch(kernel, + sigmoid={ + tune <- tune.svm(train, target, gamma = , cost = 10^(0:2), kernel="sigmoid"); + g <- tune$best.parameters[[1]]; + c <- tune$best.parameters[[2]]; + model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "sigmoid")}, + linear={ + tune <- tune.svm(train, target, cost = c, kernel="linear"); + c <- tune$best.parameters[[2]]; + model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "linear")}, + polynomial={ + if(is.null(coef)){coef <- 0:3}; + if(is.null(d)){d <- 0:4}; + tune <- tune.svm(train, target, gamma = g, cost = c, degree = d, coef0 = coef, kernel="polynomial"); + d <- tune$best.parameters[[1]]; + g <- tune$best.parameters[[2]]; + coef <- tune$best.parameters[[3]]; + c <- tune$best.parameters[[4]]; + model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "polynomial", degree = d, coef0 = coef)}, + { + tune <- tune.svm(train, target, gamma = g, cost = c, kernel="radial"); + g <- tune$best.parameters[[1]]; + c <- tune$best.parameters[[2]]; + model <- svm(x=train, y=target, gamma = g, cost = c, kernel = "radial")} + ) + return(model) +} +################################## main function ########################### +svmSelection <- function(genotype, evaluation = T, outFile, folds, kernel="radial", g=NULL, c=NULL, coef=NULL, d=NULL) { + # build model + labelIndex <- match("label", colnames(genotype)) + if(evaluation) { + prediction <- NULL + for(i in 1:length(folds)) { + test <- folds[[i]] + train <- unlist(folds[-i]) + svm.fit <- svmModel(train = genotype[train,-labelIndex], target = genotype[train,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d) + prediction <- c(prediction, list(predict(svm.fit, genotype[test,-labelIndex]))) + } + saveRDS(prediction, file=paste(outFile, ".rds", sep = "")) + } else { + model <- svmModel(train = genotype[,-labelIndex], target = genotype[,labelIndex], kernel=kernel, g=g, c=c, coef=coef, d=d) + saveRDS(model, file=paste(outFile, ".rds", sep = "")) + } +} + +############################ main ############################# + +cmd <- commandArgs(T) +source(cmd[1]) +if(as.numeric(g) == -1) {g <- NULL} +if(as.numeric(c) == -1) {c <- NULL} +if(as.numeric(coef) == -1) {coef <- NULL} +if(as.numeric(d) == -1) {d <- NULL} +# check if evaluation is required +evaluation <- F +if(as.integer(doEvaluation) == 1) { + evaluation <- T + con = file(folds) + folds <- readLines(con = con, n = 1, ok=T) + close(con) + folds <- readRDS(folds) +} +# load genotype and phenotype +con = file(genotype) +genotype <- readLines(con = con, n = 1, ok=T) +close(con) +genotype <- read.table(genotype, sep="\t", h=T) +# phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve +phenotype <- read.table(phenotype, sep="\t", h=T)[,1] +# run ! +svmSelection(genotype = data.frame(genotype, label=phenotype, check.names = F, stringsAsFactors = F), + evaluation = evaluation, outFile = out, folds = folds, g=g, c=c, coef=coef, d=d, kernel=kernel) +cat(paste(paste(out, ".rds", sep = ""), "\n", sep=""))