Mercurial > repos > nicolas > oghma
view qualityControl.R @ 103:e7115e44d8d8 draft default tip
Uploaded
author | nicolas |
---|---|
date | Mon, 31 Oct 2016 07:20:49 -0400 |
parents | f818e787d0c0 |
children |
line wrap: on
line source
######################################################## # # creation date : 09/08/16 # last modification : 16/08/16 # author : Dr Nicolas Beaume # owner : IRRI # ######################################################## ####################################################### dataStats.nbNA <- function(x) { return(length(x[is.na(x)])) } dataStats.matrixSize <- function(data) { nbMarker <- nrow(data) nbSample <- ncol(data) return(list(nbMarker=nbMarker, nbSample=nbSample)) } parseFrq <- function(freqFile) { data <- read.table(freqFile, h=T) return(data.frame(chr=data$CHR, pos=data$SNP, maf=data$MAF)) } parseHWE <- function(hweFile) { data <- read.table(hweFile, h=T) return(data.frame(chr=data$CHR, pos=data$SNP, pvalue=data$P)) } ########################## main function ################## createReport <- function(genoFile, freqFile, hweFile, out="report.txt") { # get basic statistics (nb markers, nb samples, nb NA) data <- read.table(genoFile, sep="\t", h=T) missingData <- dataStats.nbNA(data)/length(data) dataDimension <- dataStats.matrixSize(data) # get MAF info freq <- parseFrq(freqFile) # get Hardy-Weinberg equilibrium hwe <- parseHWE(hweFile) # merge info info <- data.frame(freq, hwe=hwe[,3]) # write report write(paste("number of marker :", dataDimension$nbMarker), file=out) write(paste("number of sample :", dataDimension$nbSample), file=out, append = T) write(paste("ratio of missing data :", missingData), file=out, append = T) suppressWarnings(suppressMessages(write.table(info, file=out, append = T))) } ############################ main ########################## cmd <- commandArgs(T) source(cmd[1]) createReport(genotype, plinkFreq, plinkHWE, out) # # get basic statistics (nb markers, nb samples, nb NA) # data <- read.table(genotype, sep="\t", h=T) # dataDimension <- dataStats.matrixSize(data) # # get MAF info # freq <- parseFrq(freqFile) # # get Hardy-Weinberg equilibrium # hwe <- parseHWE(hweFile) # # merge info # info <- data.frame(freq, hwe=hwe[,3]) # # write report # cat(paste("number of marker : ", dataDimension$nbMarker, "\n", sep="")) # cat(paste("number of sample : ", dataDimension$nbSample, "\n", sep="")) # print(info) # cat("\n")