annotate rrBLUP.R @ 17:58b58d95b0e0 draft

Uploaded
author nicolas
date Fri, 21 Oct 2016 06:30:17 -0400
parents 4d21b6806e19
children fcfd1cbeb5a9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
14
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
1
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
2 ########################################################
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
3 #
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
4 # creation date : 05/01/16
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
5 # last modification : 02/09/16
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
6 # author : Dr Nicolas Beaume
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
7 #
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
8 ########################################################
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
9
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
10
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
11 #log <- file(paste(getwd(), "log_rrBLUP.txt", sep="/"), open = "wt")
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
12 #sink(file = log, type="message")
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
13 library(rrBLUP)
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
14 ############################ helper functions #######################
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
15
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
16
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
17 ################################## main function ###########################
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
18
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
19 rrBLUP <- function(genotype, phenotype, outFile, evaluation=F, folds) {
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
20 if(evaluation) {
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
21 prediction <- NULL
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
22 for(i in 1:length(folds)) {
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
23 train <- genotype[-folds[[i]],]
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
24 test <- genotype[folds[[i]],]
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
25 phenoTrain <- phenotype[-folds[[i]]]
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
26 phenoTest <- phenotype[folds[[i]]]
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
27 model <-mixed.solve(phenoTrain, Z=genoTrain,K=NULL, SE=F,return.Hinv = F)
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
28 pred <- as.matrix(genoTest) %*% as.matrix(model$u)
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
29 pred <- pred[,1]+model$beta
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
30 prediction <- c(prediction, list(pred))
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
31 }
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
32 saveRDS(prediction, file=paste(outFile,".rds", sep=""))
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
33 # just create a model
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
34 } else {
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
35 model <-mixed.solve(phenotype, Z=genotype,K=NULL, SE=F,return.Hinv = F)
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
36 saveRDS(model, file = paste(outFile, ".rds", sep = ""))
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
37 }
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
38 }
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
39
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
40
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
41 ############################ main #############################
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
42 # running from terminal (supposing the OghmaGalaxy/bin directory is in your path) :
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
43 # rrBLUP.sh -i path_to_genotype -p phenotype_file -f folds -e -f fold_file -o out_file
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
44 ## -i : path to the file that contains the genotypes, must be a .rda file (as outputed by loadGenotype).
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
45 # please note that the table must be called "encoded" when your datafile is saved into .rda (automatic if encoded.R was used)
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
46
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
47 ## -p : file that contains the phenotype must be a .rda file (as outputed by loadGenotype.R).
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
48 # please note that the table must be called "phenotype" when your datafile is saved into .rda (automatic if loadGenotype.R was used)
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
49
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
50 ## -e : do evaluation instead of producing a model
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
51
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
52 ## -f : index of the folds to which belong each individual
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
53 # please note that the list must be called "folds" (automatic if folds.R was used)
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
54
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
55 ## -o : path to the output folder and generic name of the analysis. The extension related to each type of
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
56 # files are automatically added
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
57
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
58 cmd <- commandArgs(T)
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
59 source(cmd[1])
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
60 # load genotype and phenotype
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
61 con = file(genotype)
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
62 genotype <- readLines(con = con, n = 1, ok=T)
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
63 close(con)
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
64 genotype <- read.table(genotype, sep="\t", h=T)
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
65 # phenotype is written as a table (in columns) but it must be sent as a vector for mixed.solve
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
66 phenotype <- read.table(phenotype, sep="\t", h=T)[,1]
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
67 # run !
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
68 rrBLUP(genotype = genotype, phenotype = phenotype, outFile = out)
4d21b6806e19 Uploaded
nicolas
parents:
diff changeset
69 cat(paste(paste(out, ".rds", sep = ""), "\n", sep=""))