4
|
1
|
|
2 ########################################################
|
|
3 #
|
|
4 # creation date : 08/01/16
|
|
5 # last modification : 23/06/16
|
|
6 # author : Dr Nicolas Beaume
|
|
7 # owner : IRRI
|
|
8 #
|
|
9 ########################################################
|
|
10
|
|
11 log <- file(paste(getwd(), "log_evaluation.txt", sep="/"), open = "wt")
|
|
12 sink(file = log, type="message")
|
|
13
|
|
14 library("pROC")
|
|
15 library("randomForest")
|
|
16 library("miscTools")
|
|
17
|
|
18 predictionCorrelation <- function(prediction, obs) {
|
|
19 return(cor(prediction, obs, method = "pearson"))
|
|
20 }
|
|
21
|
|
22 r2.plot <- function(true, predicted, scatterplot=T) {
|
|
23 if(scatterplot) {
|
|
24 plot(true, predicted, xlab="trait value", ylab="predicted value", main="", pch=16,
|
|
25 ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted))))
|
|
26 lines(true, true, col="red")
|
|
27 } else {
|
|
28 plot(true, ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted))),
|
|
29 xlab="individual index", ylab="traitValue", type="l", main="")
|
|
30 lines(predicted, col="red")
|
|
31 legend("topright", legend = c("pedicted", "traitValue"), col = c("red", "black"), lty=c(1,1))
|
|
32 }
|
|
33 }
|
|
34
|
|
35 r2 <- function(target, prediction) {
|
|
36 sst <- sum((target-mean(target))^2)
|
|
37 ssr <- sum((target-prediction)^2)
|
|
38 return(1-ssr/sst)
|
|
39 }
|
|
40
|
|
41 ################################## main function ###########################
|
|
42
|
|
43 evaluatePredictions <- function(data, prediction=NULL, traitValue=NULL, doR2=F,
|
|
44 prefix="data", folds=NULL, classes=NULL, doCor=F, path=".") {
|
|
45 eval <- NULL
|
|
46 if(doR2) {
|
|
47 eval <- c(eval, list(r2=NULL))
|
|
48 }
|
|
49 if(doCor) {
|
|
50 eval <- c(eval, list(cor=NULL))
|
|
51 }
|
|
52 for (i in 1:length(folds)) {
|
|
53 train <- unlist(folds[-i])
|
|
54 test <- folds[[i]]
|
|
55 if(doR2) {
|
|
56 if(!is.null(traitValue)) {
|
|
57 jpeg(paste(path, "/", prefix,"fold_",i,"_scatterPlot.jpeg", sep=""))
|
|
58 eval$r2 <- c(eval$r2, r2(traitValue[test], prediction[[i]]))
|
|
59 r2.plot(traitValue[test], prediction[[i]])
|
|
60 dev.off()
|
|
61 } else {
|
|
62 eval$r2 <- c(eval$r2, NA)
|
|
63 }
|
|
64 }
|
|
65 if(doCor) {
|
|
66 if(!is.null(traitValue)) {
|
|
67 eval$cor <- c(eval$cor, cor(traitValue[test], prediction[[i]]))
|
|
68 } else {
|
|
69 eval$cor <- c(eval$cor, NA)
|
|
70 }
|
|
71 }
|
|
72 }
|
|
73 print(eval)
|
|
74 write.table(eval, file = paste(path,"/",prefix,"_evaluation.csv", sep=""), sep="\t", row.names = F)
|
|
75 }
|
|
76 ############################ main #############################
|
|
77 # evaluation.sh -i path_to_data -p prediction -f folds -t phenotype -r -c -a -n name -o path_to_result_directory
|
|
78 ## -i : path to the file that contains the genotypes.
|
|
79 # please note that the table must be called "encoded" when your datafile is saved into .rda (automatic if encode.R is used)
|
|
80
|
|
81 ## -p : prediction made through any methods
|
|
82 # please note that the table must be called "prediction" when your datafile is saved into .rda
|
|
83 # (automatic if prediction methods from this pipeline were used)
|
|
84
|
|
85 ## -f : index of the folds to which belong each individual
|
|
86 # please note that the list must be called "folds" (automatic if folds.R was used)
|
|
87
|
|
88 ## -t : phenotype of each individual
|
|
89 # please note that the table must be called "phenotype" when your datafile is saved into .rda (automatic if loadGenotype.R was used)
|
|
90
|
|
91 ## -r : flag to run a R2 evaluation
|
|
92
|
|
93 ## -c : flag to run a correlation evaluation
|
|
94
|
|
95 ## -n : prefix of the names of all result files
|
|
96
|
|
97 ## -o : path to the directory where the evaluation results are stored.
|
|
98 cmd <- commandArgs(trailingOnly = T)
|
|
99 source(cmd[1])
|
|
100 # set which evaluation are used
|
|
101 if(as.integer(doR2) == 1) {
|
|
102 doR2 <- T
|
|
103 }
|
|
104 if(as.integer(doCor) == 1) {
|
|
105 doCor <- T
|
|
106 }
|
|
107 # load genotype & phenotype
|
|
108 con = file(genotype)
|
|
109 genotype <- readLines(con = con, n = 1, ok=T)
|
|
110 close(con)
|
|
111 genotype <- read.table(genotype, sep="\t",h=T)
|
|
112 phenotype <- read.table(phenotype, sep="\t",h=T)[,1]
|
|
113 # load prediction
|
|
114 con = file(prediction)
|
|
115 prediction <- readLines(con = con, n = 1, ok=T)
|
|
116 close(con)
|
|
117 prediction <- readRDS(prediction)
|
|
118 # load folds
|
|
119 con = file(folds)
|
|
120 folds <- readLines(con = con, n = 1, ok=T)
|
|
121 close(con)
|
|
122 folds <- readRDS(folds)
|
|
123 evaluatePredictions(data=genotype, prediction=prediction, traitValue=phenotype, doR2=doR2,
|
|
124 prefix=name, folds=folds, doCor=doCor, path=out) |