22
|
1 ########################################################
|
|
2 #
|
|
3 # creation date : 26/01/16
|
|
4 # last modification : 02/06/16
|
|
5 # author : Dr Nicolas Beaume
|
|
6 # owner : IRRI
|
|
7 #
|
|
8 ########################################################
|
|
9 log <- file(paste(getwd(), "log_prediction.txt", sep="/"), open = "wt")
|
|
10 sink(file = log, type="message")
|
|
11
|
|
12 library(rrBLUP)
|
|
13 library(randomForest)
|
|
14 library(e1071)
|
|
15 library(glmnet)
|
|
16 library(methods)
|
|
17 ############################ helper functions #######################
|
|
18
|
|
19 ################################## main function ###########################
|
|
20
|
|
21
|
|
22 ############################ main #############################
|
|
23 # running from terminal (supposing the OghmaGalaxy/bin directory is in your path) :
|
|
24 # predict.sh -i genotype_file -m model_file -n name -o path_to_result_directory
|
|
25 ## -i : path to the file that contains the genotypes.
|
|
26 # please note that the table must be called "genotype" when your datafile is saved into .rda (automatic if encode.R is used)
|
|
27
|
|
28 ## -m : model build through any methods
|
|
29 # please note that the table must be called "model" when your datafile is saved into .rda
|
|
30 # (automatic if classifiers from this pipeline were used)
|
|
31
|
|
32 ## -n : prefix of the names of all result files
|
|
33
|
|
34 ## -o : path to the directory where the evaluation results are stored.
|
|
35
|
|
36 classifierNames <- c("list", "randomForest", "svm", "glmnet")
|
|
37 cmd <- commandArgs(trailingOnly = T)
|
|
38 source(cmd[1])
|
|
39 # load data
|
|
40 con = file(genotype)
|
|
41 genotype <- readLines(con = con, n = 1, ok=T)
|
|
42 close(con)
|
|
43 genotype <- read.table(genotype, sep="\t", h=T)
|
|
44 con = file(model)
|
|
45 model <- readLines(con = con, n = 1, ok=T)
|
|
46 close(con)
|
|
47 model <- readRDS(model)
|
|
48 # check if the classifier name is valid
|
|
49 if(all(is.na(match(class(model), classifierNames)))) {
|
|
50 stop(paste(class(model), "is not recognized as a valid model. Valid models are : ", classifierNames))
|
|
51 }
|
|
52 # run prediction according to the classifier
|
|
53 if(any(class(model) == "list")) {
|
|
54 predictions <- as.matrix(genotype) %*% as.matrix(model$u)
|
|
55 predictions <- predictions[,1]+model$beta
|
|
56 predictions <- data.frame(lines=rownames(genotype), predictions=predictions)
|
|
57 } else if(any(class(model) == "glmnet")) {
|
|
58 predictions <- predict(model, as.matrix(genotype), type = "response")
|
|
59 predictions <- data.frame(lines=rownames(genotype), predictions=predictions)
|
|
60 } else {
|
|
61 predictions <- predict(model, genotype)
|
|
62 predictions <- data.frame(lines=names(predictions), predictions=predictions)
|
|
63 }
|
|
64 # save results
|
|
65 write.table(predictions, file=out, sep="\t", row.names = F)
|
|
66
|