Mercurial > repos > mora-lab > gsar
comparison GSAR.R @ 1:8ff053661ae2 draft default tip
Uploaded
author | mora-lab |
---|---|
date | Thu, 20 May 2021 08:22:56 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:f0cad4d3a301 | 1:8ff053661ae2 |
---|---|
1 ############################################################################### | |
2 # title: Gene set analysis in R | |
3 # author: Xiaowei | |
4 # time: Mar.31 2021 | |
5 ############################################################################### | |
6 #================================================================= | |
7 #how to pass parameters | |
8 #================================================================= | |
9 spec <- matrix(c("expr_file",'E', 1, 'character', 'Gene expression data which is an CSV file of expression values where rows correspond to genes and columns correspond to samples.', | |
10 "geneSet_file", 'G', 1, 'character', 'Gene set', | |
11 "design_file", 'D', 1, 'character', 'Design for the samples of expression data', | |
12 "min_size", 'I',1, 'numeric','a numeric value indicating the minimum allowed gene set size. Default value is 10.', | |
13 "max_size", 'A',1, 'numeric','a numeric value indicating the maximum allowed gene set size. Default value is 500.', | |
14 "test_method", 'T',1, 'character', "a character parameter indicating which statistical method to use for testing the gene sets. Must be one of 'GSNCAtest', 'WWtest', 'KStest', 'MDtest', 'RKStest', 'RMDtest'.", | |
15 "nperm_number", 'N', 1, 'numeric',"number of permutations used to estimate the null distribution of the test statistic. If not given, a default value 1000 is used.", | |
16 "cor_method", 'M', 1, 'character',"a character string indicating which correlation coefficient is to be computed. Possible values are 'pearson' (default), 'spearman' and 'kendall'. Default value is 'pearson'.", | |
17 "threshold_value", "V", 1, 'numeric', 'Threshold value for setting significant geneSet.', | |
18 "GSAR_output_p_value", 'R', 1, 'character',"P-value table", | |
19 "GSAR_output_plot", 'P', 1, 'character',"Plot genes relationships of significant pathway" | |
20 | |
21 ), | |
22 byrow = TRUE, ncol = 5) | |
23 | |
24 | |
25 if (!requireNamespace("getopt", quietly = TRUE)) | |
26 install.packages("getopt") | |
27 | |
28 opt <- getopt::getopt(spec) | |
29 | |
30 #---------------- | |
31 #整理参数 | |
32 #---------------- | |
33 # expr_file | |
34 # geneSet_file | |
35 # design_file | |
36 if (is.null(opt$min_size)){min_size = 10}else{min_size = opt$min_size} | |
37 if (is.null(opt$max_size)){max_size = 500}else{max_size = opt$max_size} | |
38 if (is.null(opt$test_method)){test_method = "GSNCAtest"}else{test_method = opt$test_method} | |
39 if (is.null(opt$nperm_number)){nperm_number = 1000}else{nperm_number = opt$nperm_number} | |
40 if (is.null(opt$cor_method)){cor_method = "pearson"}else{cor_method = opt$cor_method} | |
41 if (is.null(opt$threshold_value)){threshold_value = 0.05}else{threshold_value = opt$threshold_value} | |
42 | |
43 | |
44 #================================================================ | |
45 #run codes | |
46 #================================================================ | |
47 | |
48 #--- load package ------------------ | |
49 | |
50 suppressPackageStartupMessages(library(GSAR)) | |
51 suppressPackageStartupMessages(library(GSEABase)) | |
52 options(stringsAsFactors = FALSE) | |
53 #---input -------------------------- | |
54 # expr | |
55 data <- as.matrix(read.csv(opt$expr_file, row.names = 1)) | |
56 | |
57 # design | |
58 design <- read.csv(opt$design_file, row.names = 1) | |
59 group <- design$group | |
60 label <- design$label | |
61 | |
62 # geneSet | |
63 load(opt$geneSet_file) | |
64 geneSetlist <- lapply(geneSet, geneIds) | |
65 names(geneSetlist) <- names(geneSet) | |
66 | |
67 #-------GSAR ------------------------- | |
68 # test.method <- c("GSNCAtest", "WWtest", "KStest", "MDtest", "RKStest", "RMDtest") | |
69 # “GSNCAtest”, “WWtest”, “KStest”, “MDtest”, “RKStest”, “RMDtest” | |
70 results <- TestGeneSets(object=data, group=group, | |
71 geneSets=geneSetlist, min.size=min_size, | |
72 max.size=max_size, test=test_method, | |
73 nperm = nperm_number) | |
74 | |
75 | |
76 | |
77 #================================================================ | |
78 #output | |
79 #================================================================ | |
80 | |
81 | |
82 # output p-value---------------------------------------------------- | |
83 resutlts1 <- as.data.frame(t(as.data.frame(results))) | |
84 colnames(resutlts1) = "P_value" | |
85 resutlts1$geneSet <- rownames(resutlts1) | |
86 resutlts1 <- resutlts1[,c("geneSet", "P_value")] | |
87 resutlts1 <- resutlts1[order(resutlts1$P_value),] | |
88 write.csv(resutlts1, file = opt$GSAR_output_p_value, row.names = FALSE) | |
89 #------------------------------------------------------------------- | |
90 | |
91 | |
92 | |
93 #plot ------------------------------------------------------------- | |
94 sig.paths <- names(results[results <= threshold_value]) | |
95 | |
96 | |
97 group1 <- unique(design[design$group == 1, "label"]) | |
98 group2 <- unique(design[design$group == 2, "label"]) | |
99 allgenes <- rownames(data) | |
100 | |
101 pdf(file = opt$GSAR_output_plot, width = 10.92) | |
102 if (length(sig.paths) > 0){ | |
103 for (sig.path in sig.paths){ | |
104 path.index <- allgenes %in% geneSetlist[[sig.path]] | |
105 ## Plot MST2 for a pathway in two conditions | |
106 plotMST2.pathway(object=data[path.index,], | |
107 group=group, name=sig.path, | |
108 legend.size=1.2, #leg.x=-1.2, leg.y=2, | |
109 label.size=1, label.dist=0.8, cor.method=cor_method, group1.name = group1, group2.name = group2) | |
110 | |
111 rm(sig.path, path.index) | |
112 } | |
113 | |
114 } | |
115 dev.off() | |
116 | |
117 |