0
|
1 # Matrix eQTL by Andrey A. Shabalin
|
|
2 # http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
|
|
3 #
|
|
4 # Be sure to use an up to date version of R and Matrix eQTL.
|
|
5
|
|
6 # source("Matrix_eQTL_R/Matrix_eQTL_engine.r");
|
|
7 library(MatrixEQTL)
|
|
8
|
|
9 ## Location of the package with the data files.
|
|
10 base.dir = find.package('MatrixEQTL');
|
|
11 # base.dir = '.';
|
|
12
|
|
13 ## Settings
|
|
14
|
|
15 # Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
|
|
16 useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
|
|
17
|
|
18 # Genotype file name
|
|
19 SNP_file_name = paste(base.dir, "/data/SNP.txt", sep="");
|
|
20
|
|
21 # Gene expression file name
|
|
22 expression_file_name = paste(base.dir, "/data/GE.txt", sep="");
|
|
23
|
|
24 # Covariates file name
|
|
25 # Set to character() for no covariates
|
|
26 covariates_file_name = paste(base.dir, "/data/Covariates.txt", sep="");
|
|
27
|
|
28 # Output file name
|
|
29 output_file_name = tempfile();
|
|
30
|
|
31 # Only associations significant at this level will be saved
|
|
32 pvOutputThreshold = 1e-2;
|
|
33
|
|
34 # Error covariance matrix
|
|
35 # Set to numeric() for identity.
|
|
36 errorCovariance = numeric();
|
|
37 # errorCovariance = read.table("Sample_Data/errorCovariance.txt");
|
|
38
|
|
39
|
|
40 ## Load genotype data
|
|
41
|
|
42 snps = SlicedData$new();
|
|
43 snps$fileDelimiter = "\t"; # the TAB character
|
|
44 snps$fileOmitCharacters = "NA"; # denote missing values;
|
|
45 snps$fileSkipRows = 1; # one row of column labels
|
|
46 snps$fileSkipColumns = 1; # one column of row labels
|
|
47 snps$fileSliceSize = 2000; # read file in slices of 2,000 rows
|
|
48 snps$LoadFile(SNP_file_name);
|
|
49
|
|
50 ## Load gene expression data
|
|
51
|
|
52 gene = SlicedData$new();
|
|
53 gene$fileDelimiter = "\t"; # the TAB character
|
|
54 gene$fileOmitCharacters = "NA"; # denote missing values;
|
|
55 gene$fileSkipRows = 1; # one row of column labels
|
|
56 gene$fileSkipColumns = 1; # one column of row labels
|
|
57 gene$fileSliceSize = 2000; # read file in slices of 2,000 rows
|
|
58 gene$LoadFile(expression_file_name);
|
|
59
|
|
60 ## Load covariates
|
|
61
|
|
62 cvrt = SlicedData$new();
|
|
63 cvrt$fileDelimiter = "\t"; # the TAB character
|
|
64 cvrt$fileOmitCharacters = "NA"; # denote missing values;
|
|
65 cvrt$fileSkipRows = 1; # one row of column labels
|
|
66 cvrt$fileSkipColumns = 1; # one column of row labels
|
|
67 if(length(covariates_file_name)>0) {
|
|
68 cvrt$LoadFile(covariates_file_name);
|
|
69 }
|
|
70
|
|
71 ## Run the analysis
|
|
72
|
|
73 me = Matrix_eQTL_engine(
|
|
74 snps = snps,
|
|
75 gene = gene,
|
|
76 cvrt = cvrt,
|
|
77 output_file_name = output_file_name,
|
|
78 pvOutputThreshold = pvOutputThreshold,
|
|
79 useModel = useModel,
|
|
80 errorCovariance = errorCovariance,
|
|
81 verbose = TRUE,
|
|
82 pvalue.hist = TRUE,
|
|
83 min.pv.by.genesnp = FALSE,
|
|
84 noFDRsaveMemory = FALSE);
|
|
85
|
|
86 unlink(output_file_name);
|
|
87
|
|
88 ## Results:
|
|
89
|
|
90 cat('Analysis done in: ', me$time.in.sec, ' seconds', '\n');
|
|
91 cat('Detected eQTLs:', '\n');
|
|
92 show(me$all$eqtls)
|
|
93
|
|
94 ## Plot the histogram of all p-values
|
|
95
|
|
96 plot(me)
|