Mercurial > repos > jasonxu > matrixeqtl
view MatrixEQTL/MatrixEQTL/demo/sample.cis.r @ 1:2d1118ddc31d draft
Uploaded
author | jasonxu |
---|---|
date | Fri, 12 Mar 2021 08:14:04 +0000 |
parents | |
children |
line wrap: on
line source
# Matrix eQTL by Andrey A. Shabalin # http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/ # # Be sure to use an up to date version of R and Matrix eQTL. # source("Matrix_eQTL_R/Matrix_eQTL_engine.r"); library(MatrixEQTL) ## Location of the package with the data files. base.dir = find.package('MatrixEQTL'); # base.dir = '.'; ## Settings # Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS # Genotype file name SNP_file_name = paste(base.dir, "/data/SNP.txt", sep=""); snps_location_file_name = paste(base.dir, "/data/snpsloc.txt", sep=""); # Gene expression file name expression_file_name = paste(base.dir, "/data/GE.txt", sep=""); gene_location_file_name = paste(base.dir, "/data/geneloc.txt", sep=""); # Covariates file name # Set to character() for no covariates covariates_file_name = paste(base.dir, "/data/Covariates.txt", sep=""); # Output file name output_file_name_cis = tempfile(); output_file_name_tra = tempfile(); # Only associations significant at this level will be saved pvOutputThreshold_cis = 2e-2; pvOutputThreshold_tra = 1e-2; # Error covariance matrix # Set to numeric() for identity. errorCovariance = numeric(); # errorCovariance = read.table("Sample_Data/errorCovariance.txt"); # Distance for local gene-SNP pairs cisDist = 1e6; ## Load genotype data snps = SlicedData$new(); snps$fileDelimiter = "\t"; # the TAB character snps$fileOmitCharacters = "NA"; # denote missing values; snps$fileSkipRows = 1; # one row of column labels snps$fileSkipColumns = 1; # one column of row labels snps$fileSliceSize = 2000; # read file in slices of 2,000 rows snps$LoadFile(SNP_file_name); ## Load gene expression data gene = SlicedData$new(); gene$fileDelimiter = "\t"; # the TAB character gene$fileOmitCharacters = "NA"; # denote missing values; gene$fileSkipRows = 1; # one row of column labels gene$fileSkipColumns = 1; # one column of row labels gene$fileSliceSize = 2000; # read file in slices of 2,000 rows gene$LoadFile(expression_file_name); ## Load covariates cvrt = SlicedData$new(); cvrt$fileDelimiter = "\t"; # the TAB character cvrt$fileOmitCharacters = "NA"; # denote missing values; cvrt$fileSkipRows = 1; # one row of column labels cvrt$fileSkipColumns = 1; # one column of row labels if(length(covariates_file_name)>0) { cvrt$LoadFile(covariates_file_name); } ## Run the analysis snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE); genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE); me = Matrix_eQTL_main( snps = snps, gene = gene, cvrt = cvrt, output_file_name = output_file_name_tra, pvOutputThreshold = pvOutputThreshold_tra, useModel = useModel, errorCovariance = errorCovariance, verbose = TRUE, output_file_name.cis = output_file_name_cis, pvOutputThreshold.cis = pvOutputThreshold_cis, snpspos = snpspos, genepos = genepos, cisDist = cisDist, pvalue.hist = TRUE, min.pv.by.genesnp = TRUE, noFDRsaveMemory = FALSE); unlink(output_file_name_tra); unlink(output_file_name_cis); ## Results: cat('Analysis done in: ', me$time.in.sec, ' seconds', '\n'); cat('Detected local eQTLs:', '\n'); show(me$cis$eqtls) cat('Detected distant eQTLs:', '\n'); show(me$trans$eqtls) ## Plot the histogram of local and distant p-values plot(me)