comparison multiGSEA.R @ 1:e48b10ce08b8 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/multigsea commit 945cc63f011002e3f61d7e848d556b647e9c8878
author iuc
date Wed, 21 Feb 2024 15:41:52 +0000
parents 28e29a3d0eda
children
comparison
equal deleted inserted replaced
0:28e29a3d0eda 1:e48b10ce08b8
1 library(multiGSEA, 1 library(multiGSEA,
2 quietly = TRUE, 2 quietly = TRUE,
3 warn.conflicts = FALSE) 3 warn.conflicts = FALSE
4 )
4 library(argparse, quietly = TRUE, warn.conflicts = FALSE) 5 library(argparse, quietly = TRUE, warn.conflicts = FALSE)
5 6
6 ################################################################################ 7 ################################################################################
7 ### Input Processing 8 ### Input Processing
8 ################################################################################ 9 ################################################################################
9 10
10 11
11 # Collect arguments from command line 12 # Collect arguments from command line
12 parser <- ArgumentParser(description = "multiGSEA R script") 13 parser <- ArgumentParser(description = "multiGSEA R script")
13 14
14 parser$add_argument("--transcriptomics", required = FALSE, 15 parser$add_argument("--transcriptomics",
15 help = "Transcriptomics data") 16 required = FALSE,
17 help = "Transcriptomics data"
18 )
16 parser$add_argument( 19 parser$add_argument(
17 "--transcriptome_ids", 20 "--transcriptome_ids",
18 required = FALSE, 21 required = FALSE,
19 help = "Transcriptomics ids", 22 help = "Transcriptomics ids",
20 default = "SYMBOL" 23 default = "SYMBOL"
21 ) 24 )
22 parser$add_argument("--proteomics", required = FALSE, 25 parser$add_argument("--proteomics",
23 help = "Proteomics data") 26 required = FALSE,
27 help = "Proteomics data"
28 )
24 parser$add_argument( 29 parser$add_argument(
25 "--proteome_ids", 30 "--proteome_ids",
26 required = FALSE, 31 required = FALSE,
27 help = "Proteomics ids", 32 help = "Proteomics ids",
28 default = "SYMBOL" 33 default = "SYMBOL"
29 ) 34 )
30 parser$add_argument("--metabolomics", required = FALSE, 35 parser$add_argument("--metabolomics",
31 help = "Metabolomics data") 36 required = FALSE,
37 help = "Metabolomics data"
38 )
32 parser$add_argument( 39 parser$add_argument(
33 "--metabolome_ids", 40 "--metabolome_ids",
34 required = FALSE, 41 required = FALSE,
35 help = "Metabolomics ids", 42 help = "Metabolomics ids",
36 default = "HMDB" 43 default = "HMDB"
37 ) 44 )
38 parser$add_argument("--organism", required = TRUE, 45 parser$add_argument("--organism",
39 help = "Organism") 46 required = TRUE,
40 parser$add_argument("--combine_pvalues", required = TRUE, 47 help = "Organism"
41 help = "Combine p-values method") 48 )
42 parser$add_argument("--padj_method", required = TRUE, 49 parser$add_argument("--combine_pvalues",
43 help = "P-adjustment method") 50 required = TRUE,
51 help = "Combine p-values method"
52 )
53 parser$add_argument("--padj_method",
54 required = TRUE,
55 help = "P-adjustment method"
56 )
44 parser$add_argument("--databases", 57 parser$add_argument("--databases",
45 required = TRUE, 58 required = TRUE,
46 help = "Pathway databases") 59 help = "Pathway databases"
60 )
47 61
48 args <- parser$parse_args() 62 args <- parser$parse_args()
49 63
50 ## ----Load library------------------------------------------------------------- 64 ## ----Load library-------------------------------------------------------------
51 65
52 organism_mapping <- c( 66 organism_mapping <- c(
53 "hsapiens" = "org.Hs.eg.db", 67 "hsapiens" = "org.Hs.eg.db",
54 "mmusculus" = "org.Mm.eg.db", 68 "mmusculus" = "org.Mm.eg.db",
55 "rnorvegicus" = "org.Rn.eg.db", 69 "rnorvegicus" = "org.Rn.eg.db",
56 "cfamiliaris" = "org.Cf.eg.db", 70 "cfamiliaris" = "org.Cf.eg.db",
57 "btaurus" = "org.Bt.eg.db", 71 "btaurus" = "org.Bt.eg.db",
58 "sscrofa" = "org.Ss.eg.db", 72 "sscrofa" = "org.Ss.eg.db",
59 "ggallus" = "org.Gg.eg.db", 73 "ggallus" = "org.Gg.eg.db",
60 "drerio" = "org.Xl.eg.db", 74 "drerio" = "org.Xl.eg.db",
61 "xlaevis" = "org.Dr.eg.db", 75 "xlaevis" = "org.Dr.eg.db",
62 "dmelanogaster" = "org.Dm.eg.db", 76 "dmelanogaster" = "org.Dm.eg.db",
63 "celegans" = "org.Ce.eg.db" 77 "celegans" = "org.Ce.eg.db"
64 ) 78 )
65 79
66 library(organism_mapping[args$organism], character.only = TRUE) 80 library(organism_mapping[args$organism], character.only = TRUE)
67 81
68 82
69 ## ----Load omics data---------------------------------------------------------- 83 ## ----Load omics data----------------------------------------------------------
70 84
71 layer <- c() 85 layer <- c()
72 86
73 if (!is.null(args$transcriptomics)) { 87 if (!is.null(args$transcriptomics)) {
74 transcriptome <- read.csv( 88 transcriptome <- read.csv(
75 args$transcriptomics, 89 args$transcriptomics,
76 header = TRUE, 90 header = TRUE,
77 sep = "\t", 91 sep = "\t",
78 dec = "." 92 dec = "."
79 ) 93 )
80 layer <- append(layer, "transcriptome") 94 layer <- append(layer, "transcriptome")
81 } 95 }
82 96
83 if (!is.null(args$proteomics)) { 97 if (!is.null(args$proteomics)) {
84 proteome <- read.csv(args$proteomics, 98 proteome <- read.csv(args$proteomics,
85 header = TRUE, 99 header = TRUE,
86 sep = "\t", 100 sep = "\t",
87 dec = ".") 101 dec = "."
88 layer <- append(layer, "proteome") 102 )
103 layer <- append(layer, "proteome")
89 } 104 }
90 105
91 if (!is.null(args$metabolomics)) { 106 if (!is.null(args$metabolomics)) {
92 metabolome <- read.csv(args$metabolomics, 107 metabolome <- read.csv(args$metabolomics,
93 header = TRUE, 108 header = TRUE,
94 sep = "\t", 109 sep = "\t",
95 dec = ".") 110 dec = "."
96 layer <- append(layer, "metabolome") 111 )
112 layer <- append(layer, "metabolome")
97 } 113 }
98 114
99 ## ----rank_features------------------------------------------------------------ 115 ## ----rank_features------------------------------------------------------------
100 116
101 # create data structure 117 # create data structure
102 omics_data <- initOmicsDataStructure(layer) 118 omics_data <- initOmicsDataStructure(layer)
103 119
104 ## add transcriptome layer 120 ## add transcriptome layer
105 if (!is.null(args$transcriptomics)) { 121 if (!is.null(args$transcriptomics)) {
106 omics_data$transcriptome <- rankFeatures(transcriptome$logFC, 122 omics_data$transcriptome <- rankFeatures(
107 transcriptome$pValue) 123 transcriptome$logFC,
108 names(omics_data$transcriptome) <- transcriptome$Symbol 124 transcriptome$pValue
125 )
126 names(omics_data$transcriptome) <- transcriptome$Symbol
109 } 127 }
110 128
111 ## add proteome layer 129 ## add proteome layer
112 if (!is.null(args$proteomics)) { 130 if (!is.null(args$proteomics)) {
113 omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue) 131 omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue)
114 names(omics_data$proteome) <- proteome$Symbol 132 names(omics_data$proteome) <- proteome$Symbol
115 } 133 }
116 134
117 ## add metabolome layer 135 ## add metabolome layer
118 ## HMDB features have to be updated to the new HMDB format 136 ## HMDB features have to be updated to the new HMDB format
119 if (!is.null(args$metabolomics)) { 137 if (!is.null(args$metabolomics)) {
120 omics_data$metabolome <- 138 omics_data$metabolome <-
121 rankFeatures(metabolome$logFC, metabolome$pValue) 139 rankFeatures(metabolome$logFC, metabolome$pValue)
122 names(omics_data$metabolome) <- metabolome$HMDB 140 names(omics_data$metabolome) <- metabolome$HMDB
123 names(omics_data$metabolome) <- gsub("HMDB", "HMDB00", 141 names(omics_data$metabolome) <- gsub(
124 names(omics_data$metabolome)) 142 "HMDB", "HMDB00",
143 names(omics_data$metabolome)
144 )
125 } 145 }
126 146
127 147
128 ## remove NA's and sort feature ranks 148 ## remove NA's and sort feature ranks
129 omics_data <- lapply(omics_data, function(vec) { 149 omics_data <- lapply(omics_data, function(vec) {
130 sort(vec[!is.na(vec)]) 150 sort(vec[!is.na(vec)])
131 }) 151 })
132 152
133 ## ----Pathway definitions------------------------------------------------------ 153 ## ----Pathway definitions------------------------------------------------------
134 154
135 pathways <- 155 pathways <-
136 getMultiOmicsFeatures( 156 getMultiOmicsFeatures(
137 dbs = unlist(strsplit(args$databases, ",", fixed = TRUE)), 157 dbs = unlist(strsplit(args$databases, ",", fixed = TRUE)),
138 layer = layer, 158 layer = layer,
139 returnTranscriptome = args$transcriptome_ids, 159 returnTranscriptome = args$transcriptome_ids,
140 returnProteome = args$proteome_ids, 160 returnProteome = args$proteome_ids,
141 returnMetabolome = args$metabolome_ids, 161 returnMetabolome = args$metabolome_ids,
142 organism = args$organism, 162 organism = args$organism,
143 useLocal = FALSE 163 useLocal = FALSE
144 ) 164 )
145 165
146 ## ----calculate enrichment----------------------------------------------------- 166 ## ----calculate enrichment-----------------------------------------------------
147 167
148 enrichment_scores <- 168 enrichment_scores <-
149 multiGSEA(pathways, omics_data) 169 multiGSEA(pathways, omics_data)
150 170
151 ## ----combine_pvalues---------------------------------------------------------- 171 ## ----combine_pvalues----------------------------------------------------------
152 172
153 df <- extractPvalues(enrichmentScores = enrichment_scores, 173 df <- extractPvalues(
154 pathwayNames = names(pathways[[1]])) 174 enrichmentScores = enrichment_scores,
175 pathwayNames = names(pathways[[1]])
176 )
155 177
156 df$combined_pval <- 178 df$combined_pval <-
157 combinePvalues(df, method = args$combine_pvalues) 179 combinePvalues(df, method = args$combine_pvalues)
158 df$combined_padj <- 180 df$combined_padj <-
159 p.adjust(df$combined_pval, method = args$padj_method) 181 p.adjust(df$combined_pval, method = args$padj_method)
160 182
161 df <- cbind(data.frame(pathway = names(pathways[[1]])), df) 183 df <- cbind(data.frame(pathway = names(pathways[[1]])), df)
162 184
163 ## ----Write output------------------------------------------------------------- 185 ## ----Write output-------------------------------------------------------------
164 186
165 write.table( 187 write.table(
166 df, 188 df,
167 file = "results.tsv", 189 file = "results.tsv",
168 quote = FALSE, 190 quote = FALSE,
169 sep = "\t", 191 sep = "\t",
170 col.names = TRUE, 192 col.names = TRUE,
171 row.names = FALSE 193 row.names = FALSE
172 ) 194 )