view evaluation.R @ 4:62e7a8d66b1f draft

Uploaded
author nicolas
date Fri, 21 Oct 2016 06:25:28 -0400
parents
children 4f017b111d6c
line wrap: on
line source


########################################################
#
# creation date : 08/01/16
# last modification : 23/06/16
# author : Dr Nicolas Beaume
# owner : IRRI
#
########################################################

log <- file(paste(getwd(), "log_evaluation.txt", sep="/"), open = "wt")
sink(file = log, type="message")

library("pROC")
library("randomForest")
library("miscTools")

predictionCorrelation <- function(prediction, obs) {
  return(cor(prediction, obs, method = "pearson"))
}

r2.plot <- function(true, predicted, scatterplot=T) {
  if(scatterplot) {
    plot(true, predicted, xlab="trait value", ylab="predicted value", main="", pch=16, 
         ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted))))
    lines(true, true, col="red")
  } else {
    plot(true, ylim=c(min(min(true), min(predicted)), max(max(true), max(predicted))), 
         xlab="individual index", ylab="traitValue", type="l", main="")
    lines(predicted, col="red")
    legend("topright", legend = c("pedicted", "traitValue"), col = c("red", "black"), lty=c(1,1))
  }
}

r2 <- function(target, prediction) {
  sst <- sum((target-mean(target))^2)
  ssr <- sum((target-prediction)^2)
  return(1-ssr/sst)
}

################################## main function ###########################

evaluatePredictions <- function(data, prediction=NULL, traitValue=NULL, doR2=F, 
                                prefix="data", folds=NULL, classes=NULL, doCor=F, path=".") {
  eval <- NULL
  if(doR2) {
    eval <- c(eval, list(r2=NULL))
  }
  if(doCor) {
    eval <- c(eval, list(cor=NULL))
  }
  for (i in 1:length(folds)) {
    train <- unlist(folds[-i])
    test <- folds[[i]]
    if(doR2) {
      if(!is.null(traitValue)) {
        jpeg(paste(path, "/", prefix,"fold_",i,"_scatterPlot.jpeg", sep=""))
        eval$r2 <- c(eval$r2, r2(traitValue[test], prediction[[i]]))
        r2.plot(traitValue[test], prediction[[i]])
        dev.off()
      } else {
        eval$r2 <- c(eval$r2, NA)
      }
    }
    if(doCor) {
      if(!is.null(traitValue)) {
        eval$cor <- c(eval$cor, cor(traitValue[test], prediction[[i]]))
      } else {
        eval$cor <- c(eval$cor, NA)
      }
    }
  }
  print(eval)
  write.table(eval, file = paste(path,"/",prefix,"_evaluation.csv", sep=""), sep="\t", row.names = F)
}
############################ main #############################
# evaluation.sh -i path_to_data -p prediction -f folds -t phenotype -r -c -a -n name -o path_to_result_directory 
## -i : path to the file that contains the genotypes.
# please note that the table must be called "encoded" when your datafile is saved into .rda (automatic if encode.R is used)

## -p : prediction made through any methods
# please note that the table must be called "prediction" when your datafile is saved into .rda
# (automatic if prediction methods from this pipeline were used)

## -f : index of the folds to which belong each individual
# please note that the list must be called "folds" (automatic if folds.R was used)

## -t : phenotype of each individual
# please note that the table must be called "phenotype" when your datafile is saved into .rda (automatic if loadGenotype.R was used)

## -r : flag to run a R2 evaluation

## -c : flag to run a correlation evaluation

## -n : prefix of the names of all result files

## -o : path to the directory where the evaluation results are stored.
cmd <- commandArgs(trailingOnly = T)
source(cmd[1])
# set which evaluation are used
if(as.integer(doR2) == 1) {
  doR2 <- T
}
if(as.integer(doCor) == 1) {
  doCor <- T
}
# load genotype & phenotype
con = file(genotype)
genotype <- readLines(con = con, n = 1, ok=T)
close(con)
genotype <- read.table(genotype, sep="\t",h=T)
phenotype <- read.table(phenotype, sep="\t",h=T)[,1]
# load prediction
con = file(prediction)
prediction <- readLines(con = con, n = 1, ok=T)
close(con)
prediction <- readRDS(prediction)
# load folds
con = file(folds)
folds <- readLines(con = con, n = 1, ok=T)
close(con)
folds <- readRDS(folds)
evaluatePredictions(data=genotype, prediction=prediction, traitValue=phenotype, doR2=doR2, 
                    prefix=name, folds=folds, doCor=doCor, path=out)