annotate GSVA.R @ 1:acd8a43b0973 draft

GSVA.R --2021.4.01
author xiaowei
date Thu, 01 Apr 2021 10:19:31 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
1 #=================================================================
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
2 #how to pass parameters
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
3 #=================================================================
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
4 spec <- matrix(c("expr", 'E', 1, 'character', 'Gene expression data which is an CSV file of expression values where rows correspond to genes and columns correspond to samples.',
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
5 "geneSet", 'G', 1, 'character', 'Gene set',
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
6 'gene_Identifier_class','C', 1, 'character', 'Gene Identifier class of GeneSet',
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
7 'method', "M", 1,'character', 'One of gsva, ssgsea, zscore, plage',
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
8 'img_file', 'I', 1,'character', 'img_file',
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
9 'img_type', 'T', 1,'character', 'PDF, PNG, JPEG',
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
10 'img_width', 'W', 1, 'integer', 'the img file width',
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
11 'img_height', 'H', 1, 'integer', 'the img file height',
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
12 'GSVA_result', 'R', 1, 'character', 'Result of GSVA, an CSV file.'
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
13
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
14 ),
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
15 byrow = TRUE, ncol = 5)
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
16
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
17
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
18 if (!requireNamespace("getopt", quietly = TRUE))
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
19 install.packages("getopt")
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
20
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
21 opt <- getopt::getopt(spec)
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
22
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
23 #----------------
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
24 #整理参数
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
25 #----------------
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
26
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
27 if(is.null(opt$gene_Identifier_class)){gene_Identifier_class = 'Symbol'}else{gene_Identifier_class = opt$gene_Identifier_class }
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
28 if(is.null(opt$method)){opt$method = 'gsva'}
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
29 if(is.null(opt$img_type)){opt$img_type = 'PNG'}
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
30 if(is.null(opt$img_width)){img_width = 900}else{img_width = opt$img_width}
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
31 if(is.null(opt$img_height)){img_height = 900}else{img_height = opt$img_height}
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
32
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
33 #================================================================
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
34 #run codes
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
35 #================================================================
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
36 gsva_input_data <- read.csv(opt$expr, row.names = 1)
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
37
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
38 # if (gene_Identifier_class == 'Symbol'){
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
39 # geneset <- GSEABase::getGmt(opt$geneSet,
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
40 # geneIdType = GSEABase::SymbolIdentifier())
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
41 # }else{
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
42 # geneset <- GSEABase::getGmt(opt$geneSet,
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
43 # geneIdType = GSEABase::EntrezIdentifier())
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
44 # }
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
45
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
46 load(opt$geneSet)
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
47
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
48 result <- GSVA::gsva(as.matrix(gsva_input_data), geneSet, mx.diff=FALSE,
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
49 verbose=FALSE, parallel.sz=2, method = opt$method)
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
50 #================================================================
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
51 #output
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
52 #================================================================
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
53
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
54 write.csv(result, file = opt$GSVA_result)
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
55
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
56 if (opt$img_type == 'PNG'){
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
57 png(filename = opt$img_file, width = img_width, height = img_height)
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
58 }else if(opt$img_type == 'JPG'){
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
59 jpeg(filename = opt$img_file, width = img_width, height = img_height)
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
60 }else{
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
61 pdf(file = opt$img_file, width = img_width, height = img_height)
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
62 }
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
63
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
64 pheatmap::pheatmap(result, scale = "row", main = "heatmap", show_colnames=T)
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
65 dev.off()
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
66
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
67
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
68
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
69
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
70
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
71
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
72
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
73
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
74
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
75
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
76
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
77
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
78
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
79
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
80
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
81
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
82
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
83
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
84
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
85
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
86
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
87
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
88 #参考:
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
89 #https://nbviewer.jupyter.org/github/mora-lab/benchmarks/blob/master/single-sample/workflows/Tutorial%20of%20GSVA%20using%20data%20GSE10245.ipynb
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
90
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
91 #<requirement type="package" version="1.0.12">r-pheatmap</requirement>
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
92
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
93 # library(GSVA)
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
94 #
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
95 # p <- 20000 ## number of genes
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
96 # n <- 30 ## number of samples
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
97 # nGS <- 100 ## number of gene sets
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
98 # min.sz <- 10 ## minimum gene set size
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
99 # max.sz <- 100 ## maximum gene set size
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
100 # X <- matrix(rnorm(p*n), nrow=p, dimnames=list(1:p, 1:n))
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
101 # dim(X)
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
102 # gs <- as.list(sample(min.sz:max.sz, size=nGS, replace=TRUE)) ## sample gene set sizes
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
103 # gs <- lapply(gs, function(n, p) sample(1:p, size=n, replace=FALSE), p) ## sample gene sets
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
104 # es.max <- gsva(X, gs, mx.diff=FALSE, verbose=FALSE, parallel.sz=1)
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
105 # es.dif <- gsva(X, gs, mx.diff=TRUE, verbose=FALSE, parallel.sz=1)
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
106
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
107 #boss说只需要输出行为pathway列为sample的矩阵和其heatmap就可以了
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
108
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
109 #
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
110 # gsva(expr, #基因表达数据,可以是SummarizedExperiment或ExpressionSet对象,也可以是行为基因列为样本的矩阵
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
111 # gset.idx.list, #基因集,list或GeneSetCollection 对象
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
112 # annotation, #基因表达数据为SummarizedExperiment对象时,用来挑选矩阵中的分子数据; 当基因表达数据为matrix,基因集为GeneSetCollection对象时,annotation参数用来注释矩阵的注释信息。 当表达数据是ExpressionSet时,该参数忽略。
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
113 # method=c("gsva", "ssgsea", "zscore", "plage"), #用于估计每个样本的基因集富集分数的方法
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
114 # kcdf=c("Gaussian", "Poisson", "none"), #表示样本间表达式水平累积分布函数非参数估计时使用的内核字符串。当基因表达值是连续时(对数),使用"Gaussian", 当基因表达值是整数计数时(非对数),使用"Poisson"
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
115 # abs.ranking=FALSE, #当mx.diff=TRUE时才会用到该参数。 abs.ranking=FALSE(默认值),使用修正的Kuiper统计量来计算富集分数,取最大的正随机游走偏差和负随机游走偏差之间的量级差。 当abs.ranking=TRUE时,使用最大的正的和负的随机游走偏差之和的原始Kuiper统计量。
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
116 # min.sz=1, #gene set的最小基因数
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
117 # max.sz=Inf, #gene set的最大基因数
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
118 # parallel.sz=1L, #并行计算时要使用的执行线程数。 参数BPPARAM允许设置并行后端并优化其配置。
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
119 # mx.diff=TRUE, #提出从KS随机游走统计量计算富集统计量的两种方法。 mx.diff=FALSE, 计算为随机游走到0的最大距离。 TRUE(默认)时,计算最大的正的和负的随机游走偏差之间的幅度差
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
120 # tau=switch(method, gsva=1, ssgsea=0.25, NA), #定义由gsva和ssgsea方法执行的随机游走中尾部权重的指数。
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
121 # ssgsea.norm=TRUE, #当为TRUE时,运行ssgsea时,通过最小值和最大值之间的绝对差对分数进行标准化。否则忽略这步标准化
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
122 # verbose=TRUE, 是否打印输出每一步计算,默认是false
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
123 # BPPARAM=SerialParam(progressbar=verbose)) #BiocParallelParam类的一个对象,指定与此函数内的一些任务和计算的并行执行相关的参数。
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
124
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
125
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
126 ###########################
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
127 #Input
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
128 ###########################
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
129 #表达数据
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
130 #gene set, kegg的那些
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
131 #method "gsva", "ssgsea", "zscore", "plage" 之一
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
132
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
133 ######################################################
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
134 #boss要求输出-- 矩阵数据,热图, limma做的通路差异表达
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
135 ######################################################
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
136
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
137
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
138 #要求第一列是基因名称,第二列开始是样本
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
139 # gsva_input_data <- read.csv("GSVA_input.csv", row.names = 1)
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
140 #
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
141 # geneset <- GSEABase::getGmt("example_pathway.gmt",
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
142 # geneIdType = SymbolIdentifier())
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
143 #
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
144 # result <- gsva(as.matrix(gsva_input_data), geneset, mx.diff=FALSE,
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
145 # verbose=FALSE, parallel.sz=2, method = "plage")
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
146 #
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
147 # write.csv(result, file = "GSVA_result.csv")
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
148
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
149
acd8a43b0973 GSVA.R --2021.4.01
xiaowei
parents:
diff changeset
150