Mercurial > repos > nicolas > oghma
view folds.R @ 18:27fb6c2a98a3 draft
Uploaded
author | nicolas |
---|---|
date | Fri, 21 Oct 2016 06:30:52 -0400 |
parents | ec8f372ec8de |
children | ff82ecfb509e |
line wrap: on
line source
######################################################## # # creation date : 05/01/16 # last modification : 27/06/16 # author : Dr Nicolas Beaume # owner : IRRI # ######################################################## ############################ helper function #################### ############################ main function ####################### createFolds <- function(nbObs, n) { index <- sample(1:n, size=nbObs, replace = T) folds <- NULL for(i in 1:n) { folds <- c(folds, list(which(index==i))) } return(folds) } ############################ main ############################# # running from terminal (supposing the OghmaGalaxy/bin directory is in your path) : # folds.sh -i path_to_data [-n nfold] -p phenotype_file [-k nb_classes] -o output_file ## -i : path to the file that contains the genotypes, must be a .rda file (as outputed by loadGenotype.R). # please note that the table must be called "genotype" when your datafile is saved into .rda (automatic if loadGenotype.R was used) ## -k : [optional] number of classes of phenotype. This information is used to equilibrate the folds # if omitted, 2 classes are assumed ## -p : file that contains the phenotype must be a .rda file (as outputed by loadGenotype.R). # please note that the table must be called "phenotype" when your datafile is saved into .rda (automatic if loadGenotype.R was used) ## -n : [optional] number of folds for cross validation. if ommited, n will be setted to 10 ## -o : path to the file of encoded genotype. the .rda extension is automatically added cmd <- commandArgs(trailingOnly = T) source(cmd[1]) # load data and merge them con = file(genotype) genotype <- readLines(con = con, n = 1, ok=T) close(con) nObs <- nrow(read.table(genotype, sep="\t", h=T)) folds <- createFolds(nObs, as.numeric(n)) out <- paste(out,".rds",sep="") saveRDS(folds, file=out) cat(paste(out, "\n", sep=""))