annotate prediction.R @ 22:f0d89ff35ad2 draft

Uploaded
author nicolas
date Fri, 21 Oct 2016 10:35:13 -0400
parents
children 8112bc642858
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
22
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
1 ########################################################
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
2 #
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
3 # creation date : 26/01/16
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
4 # last modification : 02/06/16
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
5 # author : Dr Nicolas Beaume
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
6 # owner : IRRI
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
7 #
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
8 ########################################################
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
9 log <- file(paste(getwd(), "log_prediction.txt", sep="/"), open = "wt")
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
10 sink(file = log, type="message")
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
11
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
12 library(rrBLUP)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
13 library(randomForest)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
14 library(e1071)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
15 library(glmnet)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
16 library(methods)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
17 ############################ helper functions #######################
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
18
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
19 ################################## main function ###########################
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
20
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
21
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
22 ############################ main #############################
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
23 # running from terminal (supposing the OghmaGalaxy/bin directory is in your path) :
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
24 # predict.sh -i genotype_file -m model_file -n name -o path_to_result_directory
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
25 ## -i : path to the file that contains the genotypes.
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
26 # please note that the table must be called "genotype" when your datafile is saved into .rda (automatic if encode.R is used)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
27
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
28 ## -m : model build through any methods
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
29 # please note that the table must be called "model" when your datafile is saved into .rda
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
30 # (automatic if classifiers from this pipeline were used)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
31
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
32 ## -n : prefix of the names of all result files
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
33
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
34 ## -o : path to the directory where the evaluation results are stored.
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
35
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
36 classifierNames <- c("list", "randomForest", "svm", "glmnet")
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
37 cmd <- commandArgs(trailingOnly = T)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
38 source(cmd[1])
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
39 # load data
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
40 con = file(genotype)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
41 genotype <- readLines(con = con, n = 1, ok=T)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
42 close(con)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
43 genotype <- read.table(genotype, sep="\t", h=T)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
44 con = file(model)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
45 model <- readLines(con = con, n = 1, ok=T)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
46 close(con)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
47 model <- readRDS(model)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
48 # check if the classifier name is valid
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
49 if(all(is.na(match(class(model), classifierNames)))) {
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
50 stop(paste(class(model), "is not recognized as a valid model. Valid models are : ", classifierNames))
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
51 }
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
52 # run prediction according to the classifier
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
53 if(any(class(model) == "list")) {
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
54 predictions <- as.matrix(genotype) %*% as.matrix(model$u)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
55 predictions <- predictions[,1]+model$beta
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
56 predictions <- data.frame(lines=rownames(genotype), predictions=predictions)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
57 } else if(any(class(model) == "glmnet")) {
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
58 predictions <- predict(model, as.matrix(genotype), type = "response")
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
59 predictions <- data.frame(lines=rownames(genotype), predictions=predictions)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
60 } else {
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
61 predictions <- predict(model, genotype)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
62 predictions <- data.frame(lines=names(predictions), predictions=predictions)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
63 }
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
64 # save results
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
65 write.table(predictions, file=out, sep="\t", row.names = F)
f0d89ff35ad2 Uploaded
nicolas
parents:
diff changeset
66