annotate SPIA.R @ 1:401181d40d7a draft

Uploaded
author mora-lab
date Thu, 20 May 2021 08:52:23 +0000
parents 42c80b0324fc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
1 ###############################################################################
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
2 # title: SPIA
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
3 # author: Xiaowei
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
4 # time: Mar.31 2021
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
5 ###############################################################################
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
6
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
7 #=================================================================
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
8 #input arguments
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
9 #=================================================================
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
10 #spiadata An CSV file included columns ENTREZ, logFC, and adj.P.Val
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
11 #pvalue Set a threshold value to define different expression genes
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
12 #organism A three letter character designating the organism. See a full list at ftp://ftp.genome.jp/pub/kegg/xml/organisms
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
13 #nB Number of bootstrap iterations used to compute the P PERT value. Should be larger than 100. A recommended value is 2000.
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
14 #plot If set to TRUE, the function plots the gene perturbation accumulation vs log2 fold change for every gene on each pathway. The null distribution of the total net accumulations from which PPERT is computed, is plotted as well. The figures are sent to the SPIAPerturbationPlots.pdf file in the current directory.
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
15 #combine Method used to combine the two types of p-values. If set to "fisher" it will use Fisher's method. If set to "norminv" it will use the normal inversion method.
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
16 #pathid A character vector with the names of the pathways to be analyzed. If left NULL all pathways available will be tested.
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
17 #=================================================================
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
18 #output arguments
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
19 #=================================================================
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
20 #result_of_SPIA CSV file of SPIA
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
21 #perturbation_Plot if plot argument set TRUE, the gene perturbation accumulation vs log2 fold change for every gene on each pathway
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
22
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
23 #=================================================================
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
24 #how to pass parameters
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
25 #=================================================================
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
26 spec <- matrix(c("spiadata", "D", 1, "character", "An CSV file included columns ENTREZ, logFC, and adj.P.Val",
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
27 "pvalue", "P", 1, "numeric", "Set a threshold value to define different expression genes",
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
28 "organism", "O", 1, "character", "A three letter character designating the organism. See a full list at ftp://ftp.genome.jp/pub/kegg/xml/organisms",
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
29 "nB", "N", 1, "integer", "Number of bootstrap iterations used to compute the P PERT value. Should be larger than 100. A recommended value is 2000.",
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
30 "combine", "C", 1, "character", "Method used to combine the two types of p-values. If set to 'fisher' it will use Fisher's method. If set to 'norminv' it will use the normal inversion method.",
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
31 "pathid", "I", 1, "character", "A character vector with the names of the pathways to be analyzed. If left NULL all pathways available will be tested.",
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
32 "result_of_SPIA", "R", 1, "character", "CSV file of SPIA",
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
33 "plot", "W", 0, "logical", "if plot argument set TRUE, the gene perturbation accumulation vs log2 fold change for every gene on each pathway",
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
34 "perturbation_Plot", "L", 1, "character", "An pdf file for the gene perturbation accumulation vs log2 fold change for every gene on each pathway "),
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
35 byrow = TRUE, ncol = 5)
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
36
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
37
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
38 if (!requireNamespace("getopt", quietly = TRUE))
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
39 install.packages("getopt")
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
40
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
41 opt <- getopt::getopt(spec)
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
42
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
43 #----------------
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
44 #整理参数
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
45 #----------------
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
46
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
47 spiadata = opt$spiadata
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
48
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
49 if (!is.null(opt$pvalue)){
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
50 pvalue = opt$pvalue
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
51 }else{
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
52 pvalue = 0.05
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
53 }
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
54
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
55 if (!is.null(opt$organism)){
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
56 organism = opt$organism
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
57 }else{
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
58 organism = "hsa"
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
59 }
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
60
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
61 if (!is.null(opt$nB)){
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
62 nB = opt$nB
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
63 }else{
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
64 nB = 2000
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
65 }
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
66
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
67 if (!is.null(opt$combine)){
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
68 combine = opt$combine
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
69 }else{
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
70 combine = "fisher"
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
71 }
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
72
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
73 if (!is.null(opt$pathid)){
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
74 opt$pathid = gsub(" ","", opt$pathid) #remove all blank in opt$pathid
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
75 pathid = strsplit(opt$pathid,",")[[1]]
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
76 }else{
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
77 pathid = NULL
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
78 }
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
79
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
80 if (!is.null(opt$result_of_SPIA)){
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
81 result_of_SPIA = opt$result_of_SPIA
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
82 }else{
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
83 result_of_SPIA = "result_SPIA.csv"
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
84 }
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
85
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
86 if (!is.null(opt$plot)){
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
87 plots = opt$plot
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
88 }else{
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
89 plots = FALSE
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
90 }
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
91
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
92
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
93 #================================================================
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
94 #run codes
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
95 #================================================================
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
96 suppressPackageStartupMessages(library(SPIA))
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
97 top<- read.csv(spiadata)
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
98
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
99 tg1 <- top[top$adj.P.Val < pvalue,] #chose DE genes
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
100 DE_Colorectal = tg1$logFC #chose logFC
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
101 names(DE_Colorectal) <- as.vector(tg1$ENTREZ) #set its name as ENTREZ
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
102 ALL_Colorectal = top$ENTREZ #Get the whole ENTREZ genes in the files input
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
103 #run spia
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
104 # pathway analysis based on combined evidence; # use nB=2000 or more for more accurate results
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
105 res=spia(de=DE_Colorectal,
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
106 all=ALL_Colorectal,
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
107 organism=organism,
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
108 nB=nB,
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
109 plots=plots,
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
110 pathids = pathid,
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
111 beta=NULL,
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
112 combine=combine,
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
113 verbose=FALSE)
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
114 #================================================================
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
115 #result of SPIA
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
116 #================================================================
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
117 write.csv(res, file = result_of_SPIA )
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
118 if(!is.null(opt$perturbation_Plot)){
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
119 file.rename(from = 'SPIAPerturbationPlots.pdf', to=opt$perturbation_Plot)
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
120 }
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
121
42c80b0324fc Uploaded
mora-lab
parents:
diff changeset
122