Mercurial > repos > nicolas > oghma
changeset 12:87f772c49a20 draft
Uploaded
author | nicolas |
---|---|
date | Fri, 21 Oct 2016 06:28:49 -0400 |
parents | 502fbb316d2d |
children | 377a34a001b0 |
files | qualityControl.R |
diffstat | 1 files changed, 64 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qualityControl.R Fri Oct 21 06:28:49 2016 -0400 @@ -0,0 +1,64 @@ +######################################################## +# +# creation date : 09/08/16 +# last modification : 16/08/16 +# author : Dr Nicolas Beaume +# owner : IRRI +# +######################################################## +log <- file(paste(getwd(), "log_QC.txt", sep="/"), open = "wt") +sink(file = log, type="message") +####################################################### +dataStats.nbNA <- function(x) { + return(length(x[is.na(x)])) +} +dataStats.matrixSize <- function(data) { + nbMarker <- nrow(data) + nbSample <- ncol(data) + return(list(nbMarker=nbMarker, nbSample=nbSample)) +} + +parseFrq <- function(freqFile) { + data <- read.table(freqFile, h=T) + return(data.frame(chr=data$CHR, pos=data$SNP, maf=data$MAF)) +} + +parseHWE <- function(hweFile) { + data <- read.table(hweFile, h=T) + return(data.frame(chr=data$CHR, pos=data$SNP, pvalue=data$P)) +} + +########################## main function ################## +createReport <- function(genoFile, freqFile, hweFile, out="report.txt") { + # get basic statistics (nb markers, nb samples, nb NA) + data <- read.table(genoFile, sep="\t", h=T) + dataDimension <- dataStats.matrixSize(data) + # get MAF info + freq <- parseFrq(freqFile) + # get Hardy-Weinberg equilibrium + hwe <- parseHWE(hweFile) + # merge info + info <- data.frame(freq, hwe=hwe[,3]) + # write report + write(paste("number of marker :", dataDimension$nbMarker), file=out) + write(paste("number of sample :", dataDimension$nbSample), file=out, append = T) + write.table(info, file=out, append = T) +} +############################ main ########################## +cmd <- commandArgs(T) +source(cmd[1]) +createReport(genotype, plinkFreq, plinkHWE, out) +# # get basic statistics (nb markers, nb samples, nb NA) +# data <- read.table(genotype, sep="\t", h=T) +# dataDimension <- dataStats.matrixSize(data) +# # get MAF info +# freq <- parseFrq(freqFile) +# # get Hardy-Weinberg equilibrium +# hwe <- parseHWE(hweFile) +# # merge info +# info <- data.frame(freq, hwe=hwe[,3]) +# # write report +# cat(paste("number of marker : ", dataDimension$nbMarker, "\n", sep="")) +# cat(paste("number of sample : ", dataDimension$nbSample, "\n", sep="")) +# print(info) +# cat("\n")