6
|
1 ########################################################
|
|
2 #
|
|
3 # creation date : 05/01/16
|
|
4 # last modification : 27/06/16
|
|
5 # author : Dr Nicolas Beaume
|
|
6 # owner : IRRI
|
|
7 #
|
|
8 ########################################################
|
|
9
|
|
10
|
|
11 ############################ helper function ####################
|
|
12
|
|
13 ############################ main function #######################
|
|
14
|
|
15 createFolds <- function(nbObs, n) {
|
|
16 index <- sample(1:n, size=nbObs, replace = T)
|
|
17 folds <- NULL
|
|
18 for(i in 1:n) {
|
|
19 folds <- c(folds, list(which(index==i)))
|
|
20 }
|
|
21 return(folds)
|
|
22 }
|
|
23
|
|
24 ############################ main #############################
|
|
25 # running from terminal (supposing the OghmaGalaxy/bin directory is in your path) :
|
|
26 # folds.sh -i path_to_data [-n nfold] -p phenotype_file [-k nb_classes] -o output_file
|
|
27 ## -i : path to the file that contains the genotypes, must be a .rda file (as outputed by loadGenotype.R).
|
|
28 # please note that the table must be called "genotype" when your datafile is saved into .rda (automatic if loadGenotype.R was used)
|
|
29
|
|
30 ## -k : [optional] number of classes of phenotype. This information is used to equilibrate the folds
|
|
31 # if omitted, 2 classes are assumed
|
|
32
|
|
33 ## -p : file that contains the phenotype must be a .rda file (as outputed by loadGenotype.R).
|
|
34 # please note that the table must be called "phenotype" when your datafile is saved into .rda (automatic if loadGenotype.R was used)
|
|
35
|
|
36 ## -n : [optional] number of folds for cross validation. if ommited, n will be setted to 10
|
|
37
|
|
38 ## -o : path to the file of encoded genotype. the .rda extension is automatically added
|
|
39 cmd <- commandArgs(trailingOnly = T)
|
|
40 source(cmd[1])
|
|
41 # load data and merge them
|
|
42 con = file(genotype)
|
|
43 genotype <- readLines(con = con, n = 1, ok=T)
|
|
44 close(con)
|
|
45 nObs <- nrow(read.table(genotype, sep="\t", h=T))
|
|
46 folds <- createFolds(nObs, as.numeric(n))
|
|
47 out <- paste(out,".rds",sep="")
|
|
48 saveRDS(folds, file=out)
|
|
49 cat(paste(out, "\n", sep="")) |