0
|
1 \name{MatrixEQTL_cis_code}
|
|
2 \alias{MatrixEQTL_cis_code}
|
|
3
|
|
4 \title{Sample code for cis/trans-eQTL analysis with Matrix eQTL}
|
|
5 \description{
|
|
6 The following code is the best starting point for those who want to perform cis-/trans-eQTL analysis with Matrix eQTL.
|
|
7 }
|
|
8 \references{
|
|
9 The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
|
|
10 }
|
|
11 \seealso{
|
|
12 See \code{\link{Matrix_eQTL_engine}} for reference and other sample code.
|
|
13 }
|
|
14 \author{
|
|
15 Andrey Shabalin \email{ashabalin@vcu.edu}
|
|
16 }
|
|
17 \examples{
|
|
18 # Matrix eQTL by Andrey A. Shabalin
|
|
19 # http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
|
|
20 #
|
|
21 # Be sure to use an up to date version of R and Matrix eQTL.
|
|
22
|
|
23 # source("Matrix_eQTL_R/Matrix_eQTL_engine.r");
|
|
24 library(MatrixEQTL)
|
|
25
|
|
26 ## Location of the package with the data files.
|
|
27 base.dir = find.package('MatrixEQTL');
|
|
28
|
|
29 ## Settings
|
|
30
|
|
31 # Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
|
|
32 useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
|
|
33
|
|
34 # Genotype file name
|
|
35 SNP_file_name = paste(base.dir, "/data/SNP.txt", sep="");
|
|
36 snps_location_file_name = paste(base.dir, "/data/snpsloc.txt", sep="");
|
|
37
|
|
38 # Gene expression file name
|
|
39 expression_file_name = paste(base.dir, "/data/GE.txt", sep="");
|
|
40 gene_location_file_name = paste(base.dir, "/data/geneloc.txt", sep="");
|
|
41
|
|
42 # Covariates file name
|
|
43 # Set to character() for no covariates
|
|
44 covariates_file_name = paste(base.dir, "/data/Covariates.txt", sep="");
|
|
45
|
|
46 # Output file name
|
|
47 output_file_name_cis = tempfile();
|
|
48 output_file_name_tra = tempfile();
|
|
49
|
|
50 # Only associations significant at this level will be saved
|
|
51 pvOutputThreshold_cis = 2e-2;
|
|
52 pvOutputThreshold_tra = 1e-2;
|
|
53
|
|
54 # Error covariance matrix
|
|
55 # Set to numeric() for identity.
|
|
56 errorCovariance = numeric();
|
|
57 # errorCovariance = read.table("Sample_Data/errorCovariance.txt");
|
|
58
|
|
59 # Distance for local gene-SNP pairs
|
|
60 cisDist = 1e6;
|
|
61
|
|
62 ## Load genotype data
|
|
63
|
|
64 snps = SlicedData$new();
|
|
65 snps$fileDelimiter = "\t"; # the TAB character
|
|
66 snps$fileOmitCharacters = "NA"; # denote missing values;
|
|
67 snps$fileSkipRows = 1; # one row of column labels
|
|
68 snps$fileSkipColumns = 1; # one column of row labels
|
|
69 snps$fileSliceSize = 2000; # read file in slices of 2,000 rows
|
|
70 snps$LoadFile(SNP_file_name);
|
|
71
|
|
72 ## Load gene expression data
|
|
73
|
|
74 gene = SlicedData$new();
|
|
75 gene$fileDelimiter = "\t"; # the TAB character
|
|
76 gene$fileOmitCharacters = "NA"; # denote missing values;
|
|
77 gene$fileSkipRows = 1; # one row of column labels
|
|
78 gene$fileSkipColumns = 1; # one column of row labels
|
|
79 gene$fileSliceSize = 2000; # read file in slices of 2,000 rows
|
|
80 gene$LoadFile(expression_file_name);
|
|
81
|
|
82 ## Load covariates
|
|
83
|
|
84 cvrt = SlicedData$new();
|
|
85 cvrt$fileDelimiter = "\t"; # the TAB character
|
|
86 cvrt$fileOmitCharacters = "NA"; # denote missing values;
|
|
87 cvrt$fileSkipRows = 1; # one row of column labels
|
|
88 cvrt$fileSkipColumns = 1; # one column of row labels
|
|
89 if(length(covariates_file_name)>0) {
|
|
90 cvrt$LoadFile(covariates_file_name);
|
|
91 }
|
|
92
|
|
93 ## Run the analysis
|
|
94 snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
|
|
95 genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE);
|
|
96
|
|
97 me = Matrix_eQTL_main(
|
|
98 snps = snps,
|
|
99 gene = gene,
|
|
100 cvrt = cvrt,
|
|
101 output_file_name = output_file_name_tra,
|
|
102 pvOutputThreshold = pvOutputThreshold_tra,
|
|
103 useModel = useModel,
|
|
104 errorCovariance = errorCovariance,
|
|
105 verbose = TRUE,
|
|
106 output_file_name.cis = output_file_name_cis,
|
|
107 pvOutputThreshold.cis = pvOutputThreshold_cis,
|
|
108 snpspos = snpspos,
|
|
109 genepos = genepos,
|
|
110 cisDist = cisDist,
|
|
111 pvalue.hist = TRUE,
|
|
112 min.pv.by.genesnp = FALSE,
|
|
113 noFDRsaveMemory = FALSE);
|
|
114
|
|
115 unlink(output_file_name_tra);
|
|
116 unlink(output_file_name_cis);
|
|
117
|
|
118 ## Results:
|
|
119
|
|
120 cat('Analysis done in: ', me$time.in.sec, ' seconds', '\n');
|
|
121 cat('Detected local eQTLs:', '\n');
|
|
122 show(me$cis$eqtls)
|
|
123 cat('Detected distant eQTLs:', '\n');
|
|
124 show(me$trans$eqtls)
|
|
125
|
|
126 ## Make the histogram of local and distant p-values
|
|
127
|
|
128 plot(me)
|
|
129 }
|