| 
1
 | 
     1 ###############################################################################
 | 
| 
 | 
     2 #title: Reactome Pathway enrich Analysis of a gene set
 | 
| 
 | 
     3 #author: xiaowei
 | 
| 
 | 
     4 # time: Mar.31 2021
 | 
| 
 | 
     5 ###############################################################################
 | 
| 
 | 
     6 
 | 
| 
 | 
     7 
 | 
| 
 | 
     8 #####################
 | 
| 
 | 
     9 #Input argument for this function
 | 
| 
 | 
    10 #####################
 | 
| 
 | 
    11 # title
 | 
| 
 | 
    12   # Reactome Pathway enrich Analysis of a gene set
 | 
| 
 | 
    13 
 | 
| 
 | 
    14 # description
 | 
| 
 | 
    15   #Input a vector of all different expression gene and Output the enriched Significant Reactome Pathways
 | 
| 
 | 
    16 
 | 
| 
 | 
    17 # basic input: 
 | 
| 
 | 
    18   # genelist, g, 1, character, an csv file contained all different expression genes with entrez gene id, and has one column as DEgenes
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 # Optional:
 | 
| 
 | 
    21   # pvalueCutoff  , p , 1, numeric  , Cutoff value of p-value,
 | 
| 
 | 
    22   # organism      , o , 1, character, one of 'human', 'rat', 'mouse', 'celegans', 'yeast', 'zebrafish' and 'fly',
 | 
| 
 | 
    23   # pAdjustMethod , w , 1, character, one of 'holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none',
 | 
| 
 | 
    24   # minGSSize     , i , 1, integer  , minimal size of genes annotated by Ontology term for testing,
 | 
| 
 | 
    25   # maxGSSize     , a , 1, integer  , maximal size of each geneSet for analyzing,
 | 
| 
 | 
    26   # convertId     , c , 0, logical  , whether papping gene ID to gene Name,
 | 
| 
 | 
    27 
 | 
| 
 | 
    28 
 | 
| 
 | 
    29 # OUT:
 | 
| 
 | 
    30   # sigP, s, 1, character, output csv file name
 | 
| 
 | 
    31 
 | 
| 
 | 
    32 ##############################################################################
 | 
| 
 | 
    33 #Input argument
 | 
| 
 | 
    34 ###############################################################################
 | 
| 
 | 
    35 #args <- commandArgs(trailingOnly = TRUE)
 | 
| 
 | 
    36 
 | 
| 
 | 
    37 spec <- matrix(c("genelist", "g", 1, "character", "an csv file contained all different expression genes with entrez gene id",
 | 
| 
 | 
    38                  "pvalueCutoff", "p", 1, "numeric", "Cutoff value of p-value",
 | 
| 
 | 
    39                  "organism", "o", 1, "character", "one of 'human', 'rat', 'mouse', 'celegans', 'yeast', 'zebrafish' and 'fly'",
 | 
| 
 | 
    40                  "pAdjustMethod","w", 1, "character", "one of 'holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none'",
 | 
| 
 | 
    41                  "minGSSize", "i",1, "integer", "minimal size of genes annotated by Ontology term for testing",
 | 
| 
 | 
    42                  "maxGSSize", "a",1, "integer", "maximal size of each geneSet for analyzing",
 | 
| 
 | 
    43                  "convertId", "c",0, "logical", "whether papping gene ID to gene Name",
 | 
| 
 | 
    44                  "sigP", "s", 1, "character", "output csv file name"),
 | 
| 
 | 
    45                byrow = TRUE, ncol = 5)
 | 
| 
 | 
    46 
 | 
| 
 | 
    47 
 | 
| 
 | 
    48 if (!requireNamespace("getopt", quietly = TRUE))
 | 
| 
 | 
    49   install.packages("getopt")
 | 
| 
 | 
    50 
 | 
| 
 | 
    51 
 | 
| 
 | 
    52 opt <- getopt::getopt(spec)
 | 
| 
 | 
    53 
 | 
| 
 | 
    54 #-------整理输入的参数----------------
 | 
| 
 | 
    55 de <- opt$genelist 
 | 
| 
 | 
    56 if(is.null(opt$pvalueCutoff)){
 | 
| 
 | 
    57   pvalueCutoff = 0.05
 | 
| 
 | 
    58 }else{
 | 
| 
 | 
    59   pvalueCutoff <- opt$pvalueCutoff
 | 
| 
 | 
    60 }
 | 
| 
 | 
    61 
 | 
| 
 | 
    62 if(is.null(opt$organism)){
 | 
| 
 | 
    63   organism <- "human"
 | 
| 
 | 
    64 }else{
 | 
| 
 | 
    65   organism <- opt$organism
 | 
| 
 | 
    66 }
 | 
| 
 | 
    67 
 | 
| 
 | 
    68 if(is.null(opt$pAdjustMethod)){
 | 
| 
 | 
    69   pAdjustMethod <- "BH"
 | 
| 
 | 
    70 }else{
 | 
| 
 | 
    71   pAdjustMethod <- opt$pAdjustMethod  
 | 
| 
 | 
    72 }
 | 
| 
 | 
    73 
 | 
| 
 | 
    74 if(is.null(opt$minGSSize)){
 | 
| 
 | 
    75   minGSSize <- 10
 | 
| 
 | 
    76 }else{
 | 
| 
 | 
    77   minGSSize <- opt$minGSSize  
 | 
| 
 | 
    78 }
 | 
| 
 | 
    79 
 | 
| 
 | 
    80 if(is.null(opt$maxGSSize)){
 | 
| 
 | 
    81   maxGSSize <- 500
 | 
| 
 | 
    82 }else{
 | 
| 
 | 
    83   maxGSSize <- opt$maxGSSize  
 | 
| 
 | 
    84 }
 | 
| 
 | 
    85 
 | 
| 
 | 
    86 
 | 
| 
 | 
    87 if(is.null(opt$convertId)){
 | 
| 
 | 
    88   convertId <- FALSE
 | 
| 
 | 
    89 }else{
 | 
| 
 | 
    90   convertId <- opt$convertId
 | 
| 
 | 
    91 }
 | 
| 
 | 
    92 
 | 
| 
 | 
    93 if(is.null(opt$sigP)){
 | 
| 
 | 
    94   opt$sigP <- "Significant_Reactome_Pathway_result.csv"
 | 
| 
 | 
    95 }
 | 
| 
 | 
    96 
 | 
| 
 | 
    97 ###############################################################################
 | 
| 
 | 
    98 #运行代码
 | 
| 
 | 
    99 ###############################################################################
 | 
| 
 | 
   100 de <- read.csv(de)
 | 
| 
 | 
   101 de <- de$DEgenes
 | 
| 
 | 
   102 
 | 
| 
 | 
   103 suppressPackageStartupMessages(
 | 
| 
 | 
   104   if (!requireNamespace("ReactomePA", quietly = TRUE)){
 | 
| 
 | 
   105     if (!requireNamespace("BiocManager", quietly = TRUE))
 | 
| 
 | 
   106       install.packages("BiocManager")
 | 
| 
 | 
   107     BiocManager::install("ReactomePA")
 | 
| 
 | 
   108   })
 | 
| 
 | 
   109 
 | 
| 
 | 
   110 #suppressPackageStartupMessages(library(ReactomePA))
 | 
| 
 | 
   111 
 | 
| 
 | 
   112 #Pathway Enrichment Analysis of a gene set
 | 
| 
 | 
   113 result_sigP <- ReactomePA::enrichPathway(gene=de, #entrez id 的基因 向量
 | 
| 
 | 
   114                              pvalueCutoff= pvalueCutoff,  #p值阈值
 | 
| 
 | 
   115                              organism = organism, #有机体是"human"、"rat"、"mouse"、"celegans"、"yeast"、"zebrafish"、"fly"其中一个
 | 
| 
 | 
   116                              pAdjustMethod = pAdjustMethod, #"holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none" 其中一个
 | 
| 
 | 
   117                              #unverse, #背景基因
 | 
| 
 | 
   118                              minGSSize = minGSSize, #检验本体术语时,限制最少基因个数
 | 
| 
 | 
   119                              maxGSSize = maxGSSize, #分析基因集时,限制最大基因个数
 | 
| 
 | 
   120                              readable=convertId) #是否将geneID映射到基因名称
 | 
| 
 | 
   121 
 | 
| 
 | 
   122 result_sigP <- as.data.frame(result_sigP)
 | 
| 
 | 
   123 
 | 
| 
 | 
   124 
 | 
| 
 | 
   125 
 | 
| 
 | 
   126 ###############################################################################
 | 
| 
 | 
   127 #输出
 | 
| 
 | 
   128 ###############################################################################
 | 
| 
 | 
   129 write.csv(result_sigP, file = opt$sigP, row.names = FALSE)
 | 
| 
 | 
   130 
 | 
| 
 | 
   131 
 | 
| 
 | 
   132 
 |