comparison SPIA.R @ 0:42c80b0324fc draft

Uploaded
author mora-lab
date Thu, 20 May 2021 08:52:08 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:42c80b0324fc
1 ###############################################################################
2 # title: SPIA
3 # author: Xiaowei
4 # time: Mar.31 2021
5 ###############################################################################
6
7 #=================================================================
8 #input arguments
9 #=================================================================
10 #spiadata An CSV file included columns ENTREZ, logFC, and adj.P.Val
11 #pvalue Set a threshold value to define different expression genes
12 #organism A three letter character designating the organism. See a full list at ftp://ftp.genome.jp/pub/kegg/xml/organisms
13 #nB Number of bootstrap iterations used to compute the P PERT value. Should be larger than 100. A recommended value is 2000.
14 #plot If set to TRUE, the function plots the gene perturbation accumulation vs log2 fold change for every gene on each pathway. The null distribution of the total net accumulations from which PPERT is computed, is plotted as well. The figures are sent to the SPIAPerturbationPlots.pdf file in the current directory.
15 #combine Method used to combine the two types of p-values. If set to "fisher" it will use Fisher's method. If set to "norminv" it will use the normal inversion method.
16 #pathid A character vector with the names of the pathways to be analyzed. If left NULL all pathways available will be tested.
17 #=================================================================
18 #output arguments
19 #=================================================================
20 #result_of_SPIA CSV file of SPIA
21 #perturbation_Plot if plot argument set TRUE, the gene perturbation accumulation vs log2 fold change for every gene on each pathway
22
23 #=================================================================
24 #how to pass parameters
25 #=================================================================
26 spec <- matrix(c("spiadata", "D", 1, "character", "An CSV file included columns ENTREZ, logFC, and adj.P.Val",
27 "pvalue", "P", 1, "numeric", "Set a threshold value to define different expression genes",
28 "organism", "O", 1, "character", "A three letter character designating the organism. See a full list at ftp://ftp.genome.jp/pub/kegg/xml/organisms",
29 "nB", "N", 1, "integer", "Number of bootstrap iterations used to compute the P PERT value. Should be larger than 100. A recommended value is 2000.",
30 "combine", "C", 1, "character", "Method used to combine the two types of p-values. If set to 'fisher' it will use Fisher's method. If set to 'norminv' it will use the normal inversion method.",
31 "pathid", "I", 1, "character", "A character vector with the names of the pathways to be analyzed. If left NULL all pathways available will be tested.",
32 "result_of_SPIA", "R", 1, "character", "CSV file of SPIA",
33 "plot", "W", 0, "logical", "if plot argument set TRUE, the gene perturbation accumulation vs log2 fold change for every gene on each pathway",
34 "perturbation_Plot", "L", 1, "character", "An pdf file for the gene perturbation accumulation vs log2 fold change for every gene on each pathway "),
35 byrow = TRUE, ncol = 5)
36
37
38 if (!requireNamespace("getopt", quietly = TRUE))
39 install.packages("getopt")
40
41 opt <- getopt::getopt(spec)
42
43 #----------------
44 #整理参数
45 #----------------
46
47 spiadata = opt$spiadata
48
49 if (!is.null(opt$pvalue)){
50 pvalue = opt$pvalue
51 }else{
52 pvalue = 0.05
53 }
54
55 if (!is.null(opt$organism)){
56 organism = opt$organism
57 }else{
58 organism = "hsa"
59 }
60
61 if (!is.null(opt$nB)){
62 nB = opt$nB
63 }else{
64 nB = 2000
65 }
66
67 if (!is.null(opt$combine)){
68 combine = opt$combine
69 }else{
70 combine = "fisher"
71 }
72
73 if (!is.null(opt$pathid)){
74 opt$pathid = gsub(" ","", opt$pathid) #remove all blank in opt$pathid
75 pathid = strsplit(opt$pathid,",")[[1]]
76 }else{
77 pathid = NULL
78 }
79
80 if (!is.null(opt$result_of_SPIA)){
81 result_of_SPIA = opt$result_of_SPIA
82 }else{
83 result_of_SPIA = "result_SPIA.csv"
84 }
85
86 if (!is.null(opt$plot)){
87 plots = opt$plot
88 }else{
89 plots = FALSE
90 }
91
92
93 #================================================================
94 #run codes
95 #================================================================
96 suppressPackageStartupMessages(library(SPIA))
97 top<- read.csv(spiadata)
98
99 tg1 <- top[top$adj.P.Val < pvalue,] #chose DE genes
100 DE_Colorectal = tg1$logFC #chose logFC
101 names(DE_Colorectal) <- as.vector(tg1$ENTREZ) #set its name as ENTREZ
102 ALL_Colorectal = top$ENTREZ #Get the whole ENTREZ genes in the files input
103 #run spia
104 # pathway analysis based on combined evidence; # use nB=2000 or more for more accurate results
105 res=spia(de=DE_Colorectal,
106 all=ALL_Colorectal,
107 organism=organism,
108 nB=nB,
109 plots=plots,
110 pathids = pathid,
111 beta=NULL,
112 combine=combine,
113 verbose=FALSE)
114 #================================================================
115 #result of SPIA
116 #================================================================
117 write.csv(res, file = result_of_SPIA )
118 if(!is.null(opt$perturbation_Plot)){
119 file.rename(from = 'SPIAPerturbationPlots.pdf', to=opt$perturbation_Plot)
120 }
121
122