annotate prediction.R @ 83:eab9bce19e04 draft

Uploaded
author nicolas
date Fri, 28 Oct 2016 08:47:48 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
83
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
1 ########################################################
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
2 #
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
3 # creation date : 26/01/16
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
4 # last modification : 02/06/16
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
5 # author : Dr Nicolas Beaume
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
6 # owner : IRRI
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
7 #
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
8 ########################################################
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
9
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
10 library(rrBLUP)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
11 suppressWarnings(suppressMessages(library(randomForest)))
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
12 library(e1071)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
13 suppressWarnings(suppressMessages(library(glmnet)))
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
14 library(methods)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
15
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
16
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
17 ############################ main #############################
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
18 classifierNames <- c("list", "randomForest", "svm", "glmnet")
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
19 # load argument
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
20 cmd <- commandArgs(trailingOnly = T)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
21 source(cmd[1])
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
22 # load data
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
23 con = file(genotype)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
24 genotype <- readLines(con = con, n = 1, ok=T)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
25 close(con)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
26 genotype <- read.table(genotype, sep="\t", h=T)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
27 con = file(model)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
28 model <- readLines(con = con, n = 1, ok=T)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
29 close(con)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
30 model <- readRDS(model)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
31 # check if the classifier name is valid
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
32 if(all(is.na(match(class(model), classifierNames)))) {
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
33 stop(paste(class(model), "is not recognized as a valid model. Valid models are : ", classifierNames))
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
34 }
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
35 # run prediction according to the classifier
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
36 # rrBLUP prediction
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
37 if(any(class(model) == "list")) {
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
38 predictions <- as.matrix(genotype) %*% as.matrix(model$u)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
39 predictions <- predictions[,1]+model$beta
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
40 predictions <- data.frame(lines=rownames(genotype), predictions=predictions)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
41 # LASSO prediction
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
42 } else if(any(class(model) == "glmnet")) {
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
43 predictions <- predict(model, as.matrix(genotype), type = "response")
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
44 predictions <- data.frame(lines=rownames(genotype), predictions=predictions)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
45 # SVM or RandomForest prediction (predict is a wrapper for many machine learning function)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
46 } else {
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
47 predictions <- predict(model, genotype)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
48 predictions <- data.frame(lines=names(predictions), predictions=predictions)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
49 }
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
50 # save results
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
51 write.table(predictions, file=out, sep="\t", row.names = F)
eab9bce19e04 Uploaded
nicolas
parents:
diff changeset
52