view qualityControl.R @ 67:99e8e055ddd6 draft

Deleted selected files
author nicolas
date Wed, 26 Oct 2016 19:15:52 -0400
parents f818e787d0c0
children
line wrap: on
line source

########################################################
#
# creation date : 09/08/16
# last modification : 16/08/16
# author : Dr Nicolas Beaume
# owner : IRRI
#
########################################################

#######################################################
dataStats.nbNA <- function(x) {
  return(length(x[is.na(x)]))
}
dataStats.matrixSize <- function(data) {
  nbMarker <- nrow(data)
  nbSample <- ncol(data)
  return(list(nbMarker=nbMarker, nbSample=nbSample))
}

parseFrq <- function(freqFile) {
  data <- read.table(freqFile, h=T)
  return(data.frame(chr=data$CHR, pos=data$SNP, maf=data$MAF))
}

parseHWE <- function(hweFile) {
  data <- read.table(hweFile, h=T)
  return(data.frame(chr=data$CHR, pos=data$SNP, pvalue=data$P))
}

########################## main function ##################
createReport <- function(genoFile, freqFile, hweFile, out="report.txt") {
  # get basic statistics (nb markers, nb samples, nb NA)
  data <- read.table(genoFile, sep="\t", h=T)
  missingData <- dataStats.nbNA(data)/length(data)
  dataDimension <- dataStats.matrixSize(data)
  # get MAF info
  freq <- parseFrq(freqFile)
  # get Hardy-Weinberg equilibrium
  hwe <- parseHWE(hweFile)
  # merge info
  info <- data.frame(freq, hwe=hwe[,3])
  # write report
  write(paste("number of marker :", dataDimension$nbMarker), file=out)
  write(paste("number of sample :", dataDimension$nbSample), file=out, append = T)
  write(paste("ratio of missing data :", missingData), file=out, append = T)
  suppressWarnings(suppressMessages(write.table(info, file=out, append = T)))
}
############################ main ##########################
cmd <- commandArgs(T)
source(cmd[1])
createReport(genotype, plinkFreq, plinkHWE, out)
# # get basic statistics (nb markers, nb samples, nb NA)
# data <- read.table(genotype, sep="\t", h=T)
# dataDimension <- dataStats.matrixSize(data)
# # get MAF info
# freq <- parseFrq(freqFile)
# # get Hardy-Weinberg equilibrium
# hwe <- parseHWE(hweFile)
# # merge info
# info <- data.frame(freq, hwe=hwe[,3])
# # write report
# cat(paste("number of marker : ", dataDimension$nbMarker, "\n", sep=""))
# cat(paste("number of sample : ", dataDimension$nbSample, "\n", sep=""))
# print(info)
# cat("\n")