comparison MatrixEQTL/demo/sample.cis.r @ 3:ae74f8fb3aef draft

Uploaded
author jasonxu
date Fri, 12 Mar 2021 08:20:57 +0000
parents cd4c8e4a4b5b
children
comparison
equal deleted inserted replaced
2:5fafba5359eb 3:ae74f8fb3aef
1 # Matrix eQTL by Andrey A. Shabalin
2 # http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
3 #
4 # Be sure to use an up to date version of R and Matrix eQTL.
5
6 # source("Matrix_eQTL_R/Matrix_eQTL_engine.r");
7 library(MatrixEQTL)
8
9 ## Location of the package with the data files.
10 base.dir = find.package('MatrixEQTL');
11 # base.dir = '.';
12
13 ## Settings
14
15 # Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
16 useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
17
18 # Genotype file name
19 SNP_file_name = paste(base.dir, "/data/SNP.txt", sep="");
20 snps_location_file_name = paste(base.dir, "/data/snpsloc.txt", sep="");
21
22 # Gene expression file name
23 expression_file_name = paste(base.dir, "/data/GE.txt", sep="");
24 gene_location_file_name = paste(base.dir, "/data/geneloc.txt", sep="");
25
26 # Covariates file name
27 # Set to character() for no covariates
28 covariates_file_name = paste(base.dir, "/data/Covariates.txt", sep="");
29
30 # Output file name
31 output_file_name_cis = tempfile();
32 output_file_name_tra = tempfile();
33
34 # Only associations significant at this level will be saved
35 pvOutputThreshold_cis = 2e-2;
36 pvOutputThreshold_tra = 1e-2;
37
38 # Error covariance matrix
39 # Set to numeric() for identity.
40 errorCovariance = numeric();
41 # errorCovariance = read.table("Sample_Data/errorCovariance.txt");
42
43 # Distance for local gene-SNP pairs
44 cisDist = 1e6;
45
46 ## Load genotype data
47
48 snps = SlicedData$new();
49 snps$fileDelimiter = "\t"; # the TAB character
50 snps$fileOmitCharacters = "NA"; # denote missing values;
51 snps$fileSkipRows = 1; # one row of column labels
52 snps$fileSkipColumns = 1; # one column of row labels
53 snps$fileSliceSize = 2000; # read file in slices of 2,000 rows
54 snps$LoadFile(SNP_file_name);
55
56 ## Load gene expression data
57
58 gene = SlicedData$new();
59 gene$fileDelimiter = "\t"; # the TAB character
60 gene$fileOmitCharacters = "NA"; # denote missing values;
61 gene$fileSkipRows = 1; # one row of column labels
62 gene$fileSkipColumns = 1; # one column of row labels
63 gene$fileSliceSize = 2000; # read file in slices of 2,000 rows
64 gene$LoadFile(expression_file_name);
65
66 ## Load covariates
67
68 cvrt = SlicedData$new();
69 cvrt$fileDelimiter = "\t"; # the TAB character
70 cvrt$fileOmitCharacters = "NA"; # denote missing values;
71 cvrt$fileSkipRows = 1; # one row of column labels
72 cvrt$fileSkipColumns = 1; # one column of row labels
73 if(length(covariates_file_name)>0) {
74 cvrt$LoadFile(covariates_file_name);
75 }
76
77 ## Run the analysis
78 snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
79 genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE);
80
81 me = Matrix_eQTL_main(
82 snps = snps,
83 gene = gene,
84 cvrt = cvrt,
85 output_file_name = output_file_name_tra,
86 pvOutputThreshold = pvOutputThreshold_tra,
87 useModel = useModel,
88 errorCovariance = errorCovariance,
89 verbose = TRUE,
90 output_file_name.cis = output_file_name_cis,
91 pvOutputThreshold.cis = pvOutputThreshold_cis,
92 snpspos = snpspos,
93 genepos = genepos,
94 cisDist = cisDist,
95 pvalue.hist = TRUE,
96 min.pv.by.genesnp = TRUE,
97 noFDRsaveMemory = FALSE);
98
99 unlink(output_file_name_tra);
100 unlink(output_file_name_cis);
101
102 ## Results:
103
104 cat('Analysis done in: ', me$time.in.sec, ' seconds', '\n');
105 cat('Detected local eQTLs:', '\n');
106 show(me$cis$eqtls)
107 cat('Detected distant eQTLs:', '\n');
108 show(me$trans$eqtls)
109
110 ## Plot the histogram of local and distant p-values
111
112 plot(me)