annotate folds.R @ 18:27fb6c2a98a3 draft

Uploaded
author nicolas
date Fri, 21 Oct 2016 06:30:52 -0400
parents ec8f372ec8de
children ff82ecfb509e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
1 ########################################################
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
2 #
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
3 # creation date : 05/01/16
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
4 # last modification : 27/06/16
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
5 # author : Dr Nicolas Beaume
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
6 # owner : IRRI
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
7 #
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
8 ########################################################
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
9
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
10
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
11 ############################ helper function ####################
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
12
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
13 ############################ main function #######################
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
14
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
15 createFolds <- function(nbObs, n) {
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
16 index <- sample(1:n, size=nbObs, replace = T)
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
17 folds <- NULL
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
18 for(i in 1:n) {
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
19 folds <- c(folds, list(which(index==i)))
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
20 }
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
21 return(folds)
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
22 }
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
23
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
24 ############################ main #############################
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
25 # running from terminal (supposing the OghmaGalaxy/bin directory is in your path) :
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
26 # folds.sh -i path_to_data [-n nfold] -p phenotype_file [-k nb_classes] -o output_file
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
27 ## -i : path to the file that contains the genotypes, must be a .rda file (as outputed by loadGenotype.R).
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
28 # please note that the table must be called "genotype" when your datafile is saved into .rda (automatic if loadGenotype.R was used)
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
29
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
30 ## -k : [optional] number of classes of phenotype. This information is used to equilibrate the folds
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
31 # if omitted, 2 classes are assumed
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
32
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
33 ## -p : file that contains the phenotype must be a .rda file (as outputed by loadGenotype.R).
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
34 # please note that the table must be called "phenotype" when your datafile is saved into .rda (automatic if loadGenotype.R was used)
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
35
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
36 ## -n : [optional] number of folds for cross validation. if ommited, n will be setted to 10
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
37
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
38 ## -o : path to the file of encoded genotype. the .rda extension is automatically added
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
39 cmd <- commandArgs(trailingOnly = T)
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
40 source(cmd[1])
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
41 # load data and merge them
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
42 con = file(genotype)
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
43 genotype <- readLines(con = con, n = 1, ok=T)
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
44 close(con)
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
45 nObs <- nrow(read.table(genotype, sep="\t", h=T))
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
46 folds <- createFolds(nObs, as.numeric(n))
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
47 out <- paste(out,".rds",sep="")
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
48 saveRDS(folds, file=out)
ec8f372ec8de Uploaded
nicolas
parents:
diff changeset
49 cat(paste(out, "\n", sep=""))