Mercurial > repos > iuc > multigsea
comparison multiGSEA.R @ 1:e48b10ce08b8 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/multigsea commit 945cc63f011002e3f61d7e848d556b647e9c8878
author | iuc |
---|---|
date | Wed, 21 Feb 2024 15:41:52 +0000 |
parents | 28e29a3d0eda |
children |
comparison
equal
deleted
inserted
replaced
0:28e29a3d0eda | 1:e48b10ce08b8 |
---|---|
1 library(multiGSEA, | 1 library(multiGSEA, |
2 quietly = TRUE, | 2 quietly = TRUE, |
3 warn.conflicts = FALSE) | 3 warn.conflicts = FALSE |
4 ) | |
4 library(argparse, quietly = TRUE, warn.conflicts = FALSE) | 5 library(argparse, quietly = TRUE, warn.conflicts = FALSE) |
5 | 6 |
6 ################################################################################ | 7 ################################################################################ |
7 ### Input Processing | 8 ### Input Processing |
8 ################################################################################ | 9 ################################################################################ |
9 | 10 |
10 | 11 |
11 # Collect arguments from command line | 12 # Collect arguments from command line |
12 parser <- ArgumentParser(description = "multiGSEA R script") | 13 parser <- ArgumentParser(description = "multiGSEA R script") |
13 | 14 |
14 parser$add_argument("--transcriptomics", required = FALSE, | 15 parser$add_argument("--transcriptomics", |
15 help = "Transcriptomics data") | 16 required = FALSE, |
17 help = "Transcriptomics data" | |
18 ) | |
16 parser$add_argument( | 19 parser$add_argument( |
17 "--transcriptome_ids", | 20 "--transcriptome_ids", |
18 required = FALSE, | 21 required = FALSE, |
19 help = "Transcriptomics ids", | 22 help = "Transcriptomics ids", |
20 default = "SYMBOL" | 23 default = "SYMBOL" |
21 ) | 24 ) |
22 parser$add_argument("--proteomics", required = FALSE, | 25 parser$add_argument("--proteomics", |
23 help = "Proteomics data") | 26 required = FALSE, |
27 help = "Proteomics data" | |
28 ) | |
24 parser$add_argument( | 29 parser$add_argument( |
25 "--proteome_ids", | 30 "--proteome_ids", |
26 required = FALSE, | 31 required = FALSE, |
27 help = "Proteomics ids", | 32 help = "Proteomics ids", |
28 default = "SYMBOL" | 33 default = "SYMBOL" |
29 ) | 34 ) |
30 parser$add_argument("--metabolomics", required = FALSE, | 35 parser$add_argument("--metabolomics", |
31 help = "Metabolomics data") | 36 required = FALSE, |
37 help = "Metabolomics data" | |
38 ) | |
32 parser$add_argument( | 39 parser$add_argument( |
33 "--metabolome_ids", | 40 "--metabolome_ids", |
34 required = FALSE, | 41 required = FALSE, |
35 help = "Metabolomics ids", | 42 help = "Metabolomics ids", |
36 default = "HMDB" | 43 default = "HMDB" |
37 ) | 44 ) |
38 parser$add_argument("--organism", required = TRUE, | 45 parser$add_argument("--organism", |
39 help = "Organism") | 46 required = TRUE, |
40 parser$add_argument("--combine_pvalues", required = TRUE, | 47 help = "Organism" |
41 help = "Combine p-values method") | 48 ) |
42 parser$add_argument("--padj_method", required = TRUE, | 49 parser$add_argument("--combine_pvalues", |
43 help = "P-adjustment method") | 50 required = TRUE, |
51 help = "Combine p-values method" | |
52 ) | |
53 parser$add_argument("--padj_method", | |
54 required = TRUE, | |
55 help = "P-adjustment method" | |
56 ) | |
44 parser$add_argument("--databases", | 57 parser$add_argument("--databases", |
45 required = TRUE, | 58 required = TRUE, |
46 help = "Pathway databases") | 59 help = "Pathway databases" |
60 ) | |
47 | 61 |
48 args <- parser$parse_args() | 62 args <- parser$parse_args() |
49 | 63 |
50 ## ----Load library------------------------------------------------------------- | 64 ## ----Load library------------------------------------------------------------- |
51 | 65 |
52 organism_mapping <- c( | 66 organism_mapping <- c( |
53 "hsapiens" = "org.Hs.eg.db", | 67 "hsapiens" = "org.Hs.eg.db", |
54 "mmusculus" = "org.Mm.eg.db", | 68 "mmusculus" = "org.Mm.eg.db", |
55 "rnorvegicus" = "org.Rn.eg.db", | 69 "rnorvegicus" = "org.Rn.eg.db", |
56 "cfamiliaris" = "org.Cf.eg.db", | 70 "cfamiliaris" = "org.Cf.eg.db", |
57 "btaurus" = "org.Bt.eg.db", | 71 "btaurus" = "org.Bt.eg.db", |
58 "sscrofa" = "org.Ss.eg.db", | 72 "sscrofa" = "org.Ss.eg.db", |
59 "ggallus" = "org.Gg.eg.db", | 73 "ggallus" = "org.Gg.eg.db", |
60 "drerio" = "org.Xl.eg.db", | 74 "drerio" = "org.Xl.eg.db", |
61 "xlaevis" = "org.Dr.eg.db", | 75 "xlaevis" = "org.Dr.eg.db", |
62 "dmelanogaster" = "org.Dm.eg.db", | 76 "dmelanogaster" = "org.Dm.eg.db", |
63 "celegans" = "org.Ce.eg.db" | 77 "celegans" = "org.Ce.eg.db" |
64 ) | 78 ) |
65 | 79 |
66 library(organism_mapping[args$organism], character.only = TRUE) | 80 library(organism_mapping[args$organism], character.only = TRUE) |
67 | 81 |
68 | 82 |
69 ## ----Load omics data---------------------------------------------------------- | 83 ## ----Load omics data---------------------------------------------------------- |
70 | 84 |
71 layer <- c() | 85 layer <- c() |
72 | 86 |
73 if (!is.null(args$transcriptomics)) { | 87 if (!is.null(args$transcriptomics)) { |
74 transcriptome <- read.csv( | 88 transcriptome <- read.csv( |
75 args$transcriptomics, | 89 args$transcriptomics, |
76 header = TRUE, | 90 header = TRUE, |
77 sep = "\t", | 91 sep = "\t", |
78 dec = "." | 92 dec = "." |
79 ) | 93 ) |
80 layer <- append(layer, "transcriptome") | 94 layer <- append(layer, "transcriptome") |
81 } | 95 } |
82 | 96 |
83 if (!is.null(args$proteomics)) { | 97 if (!is.null(args$proteomics)) { |
84 proteome <- read.csv(args$proteomics, | 98 proteome <- read.csv(args$proteomics, |
85 header = TRUE, | 99 header = TRUE, |
86 sep = "\t", | 100 sep = "\t", |
87 dec = ".") | 101 dec = "." |
88 layer <- append(layer, "proteome") | 102 ) |
103 layer <- append(layer, "proteome") | |
89 } | 104 } |
90 | 105 |
91 if (!is.null(args$metabolomics)) { | 106 if (!is.null(args$metabolomics)) { |
92 metabolome <- read.csv(args$metabolomics, | 107 metabolome <- read.csv(args$metabolomics, |
93 header = TRUE, | 108 header = TRUE, |
94 sep = "\t", | 109 sep = "\t", |
95 dec = ".") | 110 dec = "." |
96 layer <- append(layer, "metabolome") | 111 ) |
112 layer <- append(layer, "metabolome") | |
97 } | 113 } |
98 | 114 |
99 ## ----rank_features------------------------------------------------------------ | 115 ## ----rank_features------------------------------------------------------------ |
100 | 116 |
101 # create data structure | 117 # create data structure |
102 omics_data <- initOmicsDataStructure(layer) | 118 omics_data <- initOmicsDataStructure(layer) |
103 | 119 |
104 ## add transcriptome layer | 120 ## add transcriptome layer |
105 if (!is.null(args$transcriptomics)) { | 121 if (!is.null(args$transcriptomics)) { |
106 omics_data$transcriptome <- rankFeatures(transcriptome$logFC, | 122 omics_data$transcriptome <- rankFeatures( |
107 transcriptome$pValue) | 123 transcriptome$logFC, |
108 names(omics_data$transcriptome) <- transcriptome$Symbol | 124 transcriptome$pValue |
125 ) | |
126 names(omics_data$transcriptome) <- transcriptome$Symbol | |
109 } | 127 } |
110 | 128 |
111 ## add proteome layer | 129 ## add proteome layer |
112 if (!is.null(args$proteomics)) { | 130 if (!is.null(args$proteomics)) { |
113 omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue) | 131 omics_data$proteome <- rankFeatures(proteome$logFC, proteome$pValue) |
114 names(omics_data$proteome) <- proteome$Symbol | 132 names(omics_data$proteome) <- proteome$Symbol |
115 } | 133 } |
116 | 134 |
117 ## add metabolome layer | 135 ## add metabolome layer |
118 ## HMDB features have to be updated to the new HMDB format | 136 ## HMDB features have to be updated to the new HMDB format |
119 if (!is.null(args$metabolomics)) { | 137 if (!is.null(args$metabolomics)) { |
120 omics_data$metabolome <- | 138 omics_data$metabolome <- |
121 rankFeatures(metabolome$logFC, metabolome$pValue) | 139 rankFeatures(metabolome$logFC, metabolome$pValue) |
122 names(omics_data$metabolome) <- metabolome$HMDB | 140 names(omics_data$metabolome) <- metabolome$HMDB |
123 names(omics_data$metabolome) <- gsub("HMDB", "HMDB00", | 141 names(omics_data$metabolome) <- gsub( |
124 names(omics_data$metabolome)) | 142 "HMDB", "HMDB00", |
143 names(omics_data$metabolome) | |
144 ) | |
125 } | 145 } |
126 | 146 |
127 | 147 |
128 ## remove NA's and sort feature ranks | 148 ## remove NA's and sort feature ranks |
129 omics_data <- lapply(omics_data, function(vec) { | 149 omics_data <- lapply(omics_data, function(vec) { |
130 sort(vec[!is.na(vec)]) | 150 sort(vec[!is.na(vec)]) |
131 }) | 151 }) |
132 | 152 |
133 ## ----Pathway definitions------------------------------------------------------ | 153 ## ----Pathway definitions------------------------------------------------------ |
134 | 154 |
135 pathways <- | 155 pathways <- |
136 getMultiOmicsFeatures( | 156 getMultiOmicsFeatures( |
137 dbs = unlist(strsplit(args$databases, ",", fixed = TRUE)), | 157 dbs = unlist(strsplit(args$databases, ",", fixed = TRUE)), |
138 layer = layer, | 158 layer = layer, |
139 returnTranscriptome = args$transcriptome_ids, | 159 returnTranscriptome = args$transcriptome_ids, |
140 returnProteome = args$proteome_ids, | 160 returnProteome = args$proteome_ids, |
141 returnMetabolome = args$metabolome_ids, | 161 returnMetabolome = args$metabolome_ids, |
142 organism = args$organism, | 162 organism = args$organism, |
143 useLocal = FALSE | 163 useLocal = FALSE |
144 ) | 164 ) |
145 | 165 |
146 ## ----calculate enrichment----------------------------------------------------- | 166 ## ----calculate enrichment----------------------------------------------------- |
147 | 167 |
148 enrichment_scores <- | 168 enrichment_scores <- |
149 multiGSEA(pathways, omics_data) | 169 multiGSEA(pathways, omics_data) |
150 | 170 |
151 ## ----combine_pvalues---------------------------------------------------------- | 171 ## ----combine_pvalues---------------------------------------------------------- |
152 | 172 |
153 df <- extractPvalues(enrichmentScores = enrichment_scores, | 173 df <- extractPvalues( |
154 pathwayNames = names(pathways[[1]])) | 174 enrichmentScores = enrichment_scores, |
175 pathwayNames = names(pathways[[1]]) | |
176 ) | |
155 | 177 |
156 df$combined_pval <- | 178 df$combined_pval <- |
157 combinePvalues(df, method = args$combine_pvalues) | 179 combinePvalues(df, method = args$combine_pvalues) |
158 df$combined_padj <- | 180 df$combined_padj <- |
159 p.adjust(df$combined_pval, method = args$padj_method) | 181 p.adjust(df$combined_pval, method = args$padj_method) |
160 | 182 |
161 df <- cbind(data.frame(pathway = names(pathways[[1]])), df) | 183 df <- cbind(data.frame(pathway = names(pathways[[1]])), df) |
162 | 184 |
163 ## ----Write output------------------------------------------------------------- | 185 ## ----Write output------------------------------------------------------------- |
164 | 186 |
165 write.table( | 187 write.table( |
166 df, | 188 df, |
167 file = "results.tsv", | 189 file = "results.tsv", |
168 quote = FALSE, | 190 quote = FALSE, |
169 sep = "\t", | 191 sep = "\t", |
170 col.names = TRUE, | 192 col.names = TRUE, |
171 row.names = FALSE | 193 row.names = FALSE |
172 ) | 194 ) |