Mercurial > repos > jasonxu > matrixeqtl
diff MatrixEQTL/demo/sample.cis.r @ 3:ae74f8fb3aef draft
Uploaded
author | jasonxu |
---|---|
date | Fri, 12 Mar 2021 08:20:57 +0000 |
parents | cd4c8e4a4b5b |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MatrixEQTL/demo/sample.cis.r Fri Mar 12 08:20:57 2021 +0000 @@ -0,0 +1,112 @@ +# Matrix eQTL by Andrey A. Shabalin +# http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/ +# +# Be sure to use an up to date version of R and Matrix eQTL. + +# source("Matrix_eQTL_R/Matrix_eQTL_engine.r"); +library(MatrixEQTL) + +## Location of the package with the data files. +base.dir = find.package('MatrixEQTL'); +# base.dir = '.'; + +## Settings + +# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS +useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS + +# Genotype file name +SNP_file_name = paste(base.dir, "/data/SNP.txt", sep=""); +snps_location_file_name = paste(base.dir, "/data/snpsloc.txt", sep=""); + +# Gene expression file name +expression_file_name = paste(base.dir, "/data/GE.txt", sep=""); +gene_location_file_name = paste(base.dir, "/data/geneloc.txt", sep=""); + +# Covariates file name +# Set to character() for no covariates +covariates_file_name = paste(base.dir, "/data/Covariates.txt", sep=""); + +# Output file name +output_file_name_cis = tempfile(); +output_file_name_tra = tempfile(); + +# Only associations significant at this level will be saved +pvOutputThreshold_cis = 2e-2; +pvOutputThreshold_tra = 1e-2; + +# Error covariance matrix +# Set to numeric() for identity. +errorCovariance = numeric(); +# errorCovariance = read.table("Sample_Data/errorCovariance.txt"); + +# Distance for local gene-SNP pairs +cisDist = 1e6; + +## Load genotype data + +snps = SlicedData$new(); +snps$fileDelimiter = "\t"; # the TAB character +snps$fileOmitCharacters = "NA"; # denote missing values; +snps$fileSkipRows = 1; # one row of column labels +snps$fileSkipColumns = 1; # one column of row labels +snps$fileSliceSize = 2000; # read file in slices of 2,000 rows +snps$LoadFile(SNP_file_name); + +## Load gene expression data + +gene = SlicedData$new(); +gene$fileDelimiter = "\t"; # the TAB character +gene$fileOmitCharacters = "NA"; # denote missing values; +gene$fileSkipRows = 1; # one row of column labels +gene$fileSkipColumns = 1; # one column of row labels +gene$fileSliceSize = 2000; # read file in slices of 2,000 rows +gene$LoadFile(expression_file_name); + +## Load covariates + +cvrt = SlicedData$new(); +cvrt$fileDelimiter = "\t"; # the TAB character +cvrt$fileOmitCharacters = "NA"; # denote missing values; +cvrt$fileSkipRows = 1; # one row of column labels +cvrt$fileSkipColumns = 1; # one column of row labels +if(length(covariates_file_name)>0) { + cvrt$LoadFile(covariates_file_name); +} + +## Run the analysis +snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE); +genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE); + +me = Matrix_eQTL_main( + snps = snps, + gene = gene, + cvrt = cvrt, + output_file_name = output_file_name_tra, + pvOutputThreshold = pvOutputThreshold_tra, + useModel = useModel, + errorCovariance = errorCovariance, + verbose = TRUE, + output_file_name.cis = output_file_name_cis, + pvOutputThreshold.cis = pvOutputThreshold_cis, + snpspos = snpspos, + genepos = genepos, + cisDist = cisDist, + pvalue.hist = TRUE, + min.pv.by.genesnp = TRUE, + noFDRsaveMemory = FALSE); + +unlink(output_file_name_tra); +unlink(output_file_name_cis); + +## Results: + +cat('Analysis done in: ', me$time.in.sec, ' seconds', '\n'); +cat('Detected local eQTLs:', '\n'); +show(me$cis$eqtls) +cat('Detected distant eQTLs:', '\n'); +show(me$trans$eqtls) + +## Plot the histogram of local and distant p-values + +plot(me)