comparison XSeekerPreparator.R @ 19:2937e72e5891 draft

" master branch Updating"
author lain
date Tue, 18 Oct 2022 12:57:28 +0000
parents 2c7e7fd1f740
children ce94e7a141bb
comparison
equal deleted inserted replaced
18:2c7e7fd1f740 19:2937e72e5891
1 1
2 2
3 TOOL_NAME <- "XSeekerPreparator" 3 assign("TOOL_NAME", "XSeekerPreparator", envir = globalenv())
4 VERSION <- "1.2.4" 4 lockBinding("TOOL_NAME", globalenv())
5 5 assign("VERSION", "1.3.0", envir = globalenv())
6 DEBUG_FAST <- FALSE 6 lockBinding("VERSION", globalenv())
7 DEBUG_FAST_IGNORE_SLOW_OP <- DEBUG_FAST 7 assign("DEBUG_FAST", FALSE, envir = globalenv())
8 PROCESS_SMOL_BATCH <- DEBUG_FAST 8 lockBinding("DEBUG_FAST", globalenv())
9 FAST_FEATURE_RATIO <- 10 9 assign("DEBUG_FAST_IGNORE_SLOW_OP", DEBUG_FAST, envir = globalenv())
10 10 lockBinding("DEBUG_FAST_IGNORE_SLOW_OP", globalenv())
11 OUTPUT_SPECIFIC_TOOL <- "XSeeker_Galaxy" 11 assign("PROCESS_SMOL_BATCH", DEBUG_FAST, envir = globalenv())
12 12 lockBinding("PROCESS_SMOL_BATCH", globalenv())
13 ENRICHED_RDATA_VERSION <- paste("1.2.4", OUTPUT_SPECIFIC_TOOL, sep="-") 13 assign("FAST_FEATURE_RATIO", 10, envir = globalenv())
14 ENRICHED_RDATA_DOC <- sprintf(" 14 lockBinding("FAST_FEATURE_RATIO", globalenv())
15 assign("OUTPUT_SPECIFIC_TOOL", "XSeeker_Galaxy", envir = globalenv())
16 lockBinding("OUTPUT_SPECIFIC_TOOL", globalenv())
17
18 assign(
19 "ENRICHED_RDATA_VERSION",
20 paste(VERSION, OUTPUT_SPECIFIC_TOOL, sep = "-"),
21 envir = globalenv()
22 )
23 lockBinding("ENRICHED_RDATA_VERSION", globalenv())
24 assign("ENRICHED_RDATA_DOC", sprintf("
15 Welcome to the enriched <Version %s> of the output of CAMERA/xcms. 25 Welcome to the enriched <Version %s> of the output of CAMERA/xcms.
16 This doc was generated by the tool: %s - Version %s 26 This doc was generated by the tool: %s - Version %s
17 To show the different variables contained in this rdata, type: 27 To show the different variables contained in this rdata, type:
18 - `load('this_rdata.rdata', rdata_env <- new.env())` 28 - `load('this_rdata.rdata', rdata_env <- new.env())`
19 - `names(rdata_env)` 29 - `names(rdata_env)`
39 - Retrieval method: enriched_rdata <- TRUE 49 - Retrieval method: enriched_rdata <- TRUE
40 50
41 - enriched_rdata_version: 51 - enriched_rdata_version:
42 - Description: A flag created by that tool to tell which version of 52 - Description: A flag created by that tool to tell which version of
43 this tool has enriched the rdata. 53 this tool has enriched the rdata.
44 - Retrieval method: enriched_rdata_version <- sprintf(\"%s\", ENRICHED_RDATA_VERSION) 54 - Retrieval method:
55 enriched_rdata_version <- sprintf(
56 \"%s\",
57 ENRICHED_RDATA_VERSION
58 )
45 59
46 - enriched_rdata_doc: 60 - enriched_rdata_doc:
47 - Description: Contains the documentation string. 61 - Description: Contains the documentation string.
48 62
49 Data from original mzxml file 63 Data from original mzxml file
79 - xcms version: 2.0 93 - xcms version: 2.0
80 94
81 - polarity: 95 - polarity:
82 - Description: Those are the polarity values from the original mzxml 96 - Description: Those are the polarity values from the original mzxml
83 file, extracted using xcms 2. 97 file, extracted using xcms 2.
84 - Retrieval method: as.character(xcms::xcmsRaw('original_file.mzxml')@polarity[[1]]) 98 - Retrieval method:
99 as.character(xcms::xcmsRaw(
100 'original_file.mzxml'
101 )@polarity[[1]])
85 - xcms version: 2.0 102 - xcms version: 2.0
86 103
87 Data taken from incoming rdata 104 Data taken from incoming rdata
88 ------ 105 ------
89 - variableMetadata: 106 - variableMetadata:
101 - Retrieval method: 118 - Retrieval method:
102 ## just he same list, but simplified 119 ## just he same list, but simplified
103 process_params <- list() 120 process_params <- list()
104 for (list_name in names(rdata_file$listOFlistArguments)) { 121 for (list_name in names(rdata_file$listOFlistArguments)) {
105 param_list <- list() 122 param_list <- list()
106 for (param_name in names(rdata_file$listOFlistArguments[[list_name]])) { 123 for (param_name in names(
107 param_list[[param_name]] <- rdata_file$listOFlistArguments[[list_name]][[param_name]] 124 rdata_file$listOFlistArguments[[list_name]]
125 )) {
126 param_list[[param_name]] <- rdata_file$listOFlistArguments[[
127 list_name
128 ]][[param_name]]
108 } 129 }
109 process_params[[length(process_params)+1]] <- param_list 130 process_params[[length(process_params)+1]] <- param_list
110 } 131 }
111 ", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION) 132 ", ENRICHED_RDATA_VERSION, TOOL_NAME, VERSION, ENRICHED_RDATA_VERSION),
133 envir = globalenv())
134 lockBinding("ENRICHED_RDATA_DOC", globalenv())
112 135
113 136
114 137
115 get_models <- function(path) { 138 get_models <- function(path) {
116 if (is.null(path)) { 139 if (is.null(path)) {
118 } else { 141 } else {
119 message(sprintf("Loading models from %s", path)) 142 message(sprintf("Loading models from %s", path))
120 } 143 }
121 ## galaxy mangles the "@" to a "__at__" 144 ## galaxy mangles the "@" to a "__at__"
122 if (substr(path, 1, 9) == "git__at__") { 145 if (substr(path, 1, 9) == "git__at__") {
123 path <- sub("^git__at__", "git@", path, perl=TRUE) 146 path <- sub("^git__at__", "git@", path, perl = TRUE)
124 } 147 }
125 if ( 148 if (
126 substr(path, 1, 4) == "git@" 149 substr(path, 1, 4) == "git@"
127 || substr(path, length(path)-4, 4) == ".git" 150 || substr(path, length(path) - 4, 4) == ".git"
128 ) { 151 ) {
129 return (get_models_from_git(path)) 152 return(get_models_from_git(path))
130 } 153 }
131 if (substr(path, 1, 4) == "http") { 154 if (substr(path, 1, 4) == "http") {
132 return (get_models_from_url(path)) 155 return(get_models_from_url(path))
133 } 156 }
134 return (source(path)$value) 157 return(source(path)$value)
135 } 158 }
136 159
137 get_models_from_git <- function (url, target_file="models.R", rm=TRUE) { 160 get_models_from_git <- function(url, target_file = "models.R", rm = TRUE) {
138 tmp <- tempdir() 161 tmp <- tempdir()
139 message(sprintf("Cloning %s", url)) 162 message(sprintf("Cloning %s", url))
140 system2("git", c("clone", url, tmp)) 163 system2("git", c("clone", url, tmp))
141 result <- search_tree(file.path(tmp, dir), target_file) 164 result <- search_tree(file.path(tmp, dir), target_file)
142 if (!is.null(result)) { 165 if (!is.null(result)) {
143 models <- source(result)$value 166 models <- source(result)$value
144 if (rm) { 167 if (rm) {
145 unlink(tmp, recursive=TRUE) 168 unlink(tmp, recursive = TRUE)
146 } 169 }
147 return (models) 170 return(models)
148 } 171 }
149 if (rm) { 172 if (rm) {
150 unlink(tmp, recursive=TRUE) 173 unlink(tmp, recursive = TRUE)
151 } 174 }
152 stop(sprintf( 175 stop(sprintf(
153 "Could not find any file named \"%s\" in this repo", 176 "Could not find any file named \"%s\" in this repo",
154 target_file 177 target_file
155 )) 178 ))
156 } 179 }
157 180
158 get_models_from_url <- function (url, target_file="models.R", rm=TRUE) { 181 get_models_from_url <- function(url, target_file = "models.R", rm = TRUE) {
159 tmp <- tempdir() 182 tmp <- tempdir()
160 message(sprintf("Downloading %s", url)) 183 message(sprintf("Downloading %s", url))
161 result <- file.path(tmp, target_file) 184 result <- file.path(tmp, target_file)
162 if (download.file(url, destfile=result) == 0) { 185 if (download.file(url, destfile = result) == 0) {
163 models <- source(result)$value 186 models <- source(result)$value
164 if (rm) { 187 if (rm) {
165 unlink(tmp, recursive=TRUE) 188 unlink(tmp, recursive = TRUE)
166 } 189 }
167 return (models) 190 return(models)
168 } 191 }
169 if (rm) { 192 if (rm) {
170 unlink(tmp, recursive=TRUE) 193 unlink(tmp, recursive = TRUE)
171 } 194 }
172 stop("Could not download any file at this adress.") 195 stop("Could not download any file at this adress.")
173 } 196 }
174 197
175 search_tree <- function(path, target) { 198 search_tree <- function(path, target) {
176 target <- tolower(target) 199 target <- tolower(target)
177 for (file in list.files(path)) { 200 for (file in list.files(path)) {
178 if (is.dir(file)) { 201 if (is.dir(file)) {
179 result <- search_tree(file.path(path, file), target) 202 result <- search_tree(file.path(path, file), target)
180 if (!is.null(result)) { 203 if (!is.null(result)) {
181 return (result) 204 return(result)
182 } 205 }
183 } else if (tolower(file) == target) { 206 } else if (tolower(file) == target) {
184 return (file.path(path, file)) 207 return(file.path(path, file))
185 } 208 }
186 } 209 }
187 return (NULL) 210 return(NULL)
188 } 211 }
189 212
190 create_database <- function(orm) { 213 create_database <- function(orm) {
191 orm$recreate_database(no_exists=FALSE) 214 orm$recreate_database(no_exists = FALSE)
192 set_database_version(orm, "created") 215 set_database_version(orm, "created")
193 } 216 }
194 217
195 insert_adducts <- function(orm) { 218 insert_adducts <- function(orm) {
196 message("Creating adducts...") 219 message("Creating adducts...")
197 adducts <- list( 220 adducts <- list(
198 list("[M-H2O-H]-",1,-1,-48.992020312000001069,1,0,0.5,"H0","H1O3"), 221 list("[M-H2O-H]-", 1, -1, -48.992020312000001069, 1, 0, 0.5, "H0", "H1O3"),
199 list("[M-H-Cl+O]-",1,-1,-19.981214542000000022,2,0,0.5,"O1","H1Cl1"), 222 list("[M-H-Cl+O]-", 1, -1, -19.981214542000000022, 2, 0, 0.5, "O1", "H1Cl1"),
200 list("[M-Cl+O]-",1,-1,-18.973389510000000512,3,0,0.5,"O1","Cl1"), 223 list("[M-Cl+O]-", 1, -1, -18.973389510000000512, 3, 0, 0.5, "O1", "Cl1"),
201 list("[M-3H]3-",1,-3,-3.0218293560000000219,4,0,1.0,"H0","H3"), 224 list("[M-3H]3-", 1, -3, -3.0218293560000000219, 4, 0, 1.0, "H0", "H3"),
202 list("[2M-3H]3-",2,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"), 225 list("[2M-3H]3-", 2, -3, -3.0218293560000000219, 4, 0, 0.5, "H0", "H3"),
203 list("[3M-3H]3-",3,-3,-3.0218293560000000219,4,0,0.5,"H0","H3"), 226 list("[3M-3H]3-", 3, -3, -3.0218293560000000219, 4, 0, 0.5, "H0", "H3"),
204 list("[M-2H]2-",1,-2,-2.0145529039999998666,5,0,1.0,"H0","H2"), 227 list("[M-2H]2-", 1, -2, -2.0145529039999998666, 5, 0, 1.0, "H0", "H2"),
205 list("[2M-2H]2-",2,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"), 228 list("[2M-2H]2-", 2, -2, -2.0145529039999998666, 5, 0, 0.5, "H0", "H2"),
206 list("[3M-2H]2-",3,-2,-2.0145529039999998666,5,0,0.5,"H0","H2"), 229 list("[3M-2H]2-", 3, -2, -2.0145529039999998666, 5, 0, 0.5, "H0", "H2"),
207 list("[M-H]-",1,-1,-1.0072764519999999333,6,1,1.0,"H0","H1"), 230 list("[M-H]-", 1, -1, -1.0072764519999999333, 6, 1, 1.0, "H0", "H1"),
208 list("[2M-H]-",2,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"), 231 list("[2M-H]-", 2, -1, -1.0072764519999999333, 6, 0, 0.5, "H0", "H1"),
209 list("[3M-H]-",3,-1,-1.0072764519999999333,6,0,0.5,"H0","H1"), 232 list("[3M-H]-", 3, -1, -1.0072764519999999333, 6, 0, 0.5, "H0", "H1"),
210 list("[M]+",1,1,-0.00054858000000000000945,7,1,1.0,"H0","H0"), 233 list("[M]+", 1, 1, -0.00054858000000000000945, 7, 1, 1.0, "H0", "H0"),
211 list("[M]-",1,-1,0.00054858000000000000945,8,1,1.0,"H0","H0"), 234 list("[M]-", 1, -1, 0.00054858000000000000945, 8, 1, 1.0, "H0", "H0"),
212 list("[M+H]+",1,1,1.0072764519999999333,9,1,1.0,"H1","H0"), 235 list("[M+H]+", 1, 1, 1.0072764519999999333, 9, 1, 1.0, "H1", "H0"),
213 list("[2M+H]+",2,1,1.0072764519999999333,9,0,0.5,"H1","H0"), 236 list("[2M+H]+", 2, 1, 1.0072764519999999333, 9, 0, 0.5, "H1", "H0"),
214 list("[3M+H]+",3,1,1.0072764519999999333,9,0,0.25,"H1","H0"), 237 list("[3M+H]+", 3, 1, 1.0072764519999999333, 9, 0, 0.25, "H1", "H0"),
215 list("[M+2H]2+",1,2,2.0145529039999998666,10,0,0.75,"H2","H0"), 238 list("[M+2H]2+", 1, 2, 2.0145529039999998666, 10, 0, 0.75, "H2", "H0"),
216 list("[2M+2H]2+",2,2,2.0145529039999998666,10,0,0.5,"H2","H0"), 239 list("[2M+2H]2+", 2, 2, 2.0145529039999998666, 10, 0, 0.5, "H2", "H0"),
217 list("[3M+2H]2+",3,2,2.0145529039999998666,10,0,0.25,"H2","H0"), 240 list("[3M+2H]2+", 3, 2, 2.0145529039999998666, 10, 0, 0.25, "H2", "H0"),
218 list("[M+3H]3+",1,3,3.0218293560000000219,11,0,0.75,"H3","H0"), 241 list("[M+3H]3+", 1, 3, 3.0218293560000000219, 11, 0, 0.75, "H3", "H0"),
219 list("[2M+3H]3+",2,3,3.0218293560000000219,11,0,0.5,"H3","H0"), 242 list("[2M+3H]3+", 2, 3, 3.0218293560000000219, 11, 0, 0.5, "H3", "H0"),
220 list("[3M+3H]3+",3,3,3.0218293560000000219,11,0,0.25,"H3","H0"), 243 list("[3M+3H]3+", 3, 3, 3.0218293560000000219, 11, 0, 0.25, "H3", "H0"),
221 list("[M-2H+NH4]-",1,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"), 244 list("[M-2H+NH4]-", 1, -1, 16.019272654000001665, 12, 0, 0.25, "N1H4", "H2"),
222 list("[2M-2H+NH4]-",2,-1,16.019272654000001665,12,0,0.0,"N1H4","H2"), 245 list("[2M-2H+NH4]-", 2, -1, 16.019272654000001665, 12, 0, 0.0, "N1H4", "H2"),
223 list("[3M-2H+NH4]-",3,-1,16.019272654000001665,12,0,0.25,"N1H4","H2"), 246 list("[3M-2H+NH4]-", 3, -1, 16.019272654000001665, 12, 0, 0.25, "N1H4", "H2"),
224 list("[M+NH4]+",1,1,18.033825558000000199,13,1,1.0,"N1H4","H0"), 247 list("[M+NH4]+", 1, 1, 18.033825558000000199, 13, 1, 1.0, "N1H4", "H0"),
225 list("[2M+NH4]+",2,1,18.033825558000000199,13,0,0.5,"N1H4","H0"), 248 list("[2M+NH4]+", 2, 1, 18.033825558000000199, 13, 0, 0.5, "N1H4", "H0"),
226 list("[3M+NH4]+",3,1,18.033825558000000199,13,0,0.25,"N1H4","H0"), 249 list("[3M+NH4]+", 3, 1, 18.033825558000000199, 13, 0, 0.25, "N1H4", "H0"),
227 list("[M+H+NH4]2+",1,2,19.041102009999999467,14,0,0.5,"N1H5","H0"), 250 list("[M+H+NH4]2+", 1, 2, 19.041102009999999467, 14, 0, 0.5, "N1H5", "H0"),
228 list("[2M+H+NH4]2+",2,2,19.041102009999999467,14,0,0.5,"N1H5","H0"), 251 list("[2M+H+NH4]2+", 2, 2, 19.041102009999999467, 14, 0, 0.5, "N1H5", "H0"),
229 list("[3M+H+NH4]2+",3,2,19.041102009999999467,14,0,0.25,"N1H5","H0"), 252 list("[3M+H+NH4]2+", 3, 2, 19.041102009999999467, 14, 0, 0.25, "N1H5", "H0"),
230 list("[M+Na-2H]-",1,-1,20.974668176000001551,15,0,0.75,"Na1","H2"), 253 list("[M+Na-2H]-", 1, -1, 20.974668176000001551, 15, 0, 0.75, "Na1", "H2"),
231 list("[2M-2H+Na]-",2,-1,20.974668176000001551,15,0,0.25,"Na1","H2"), 254 list("[2M-2H+Na]-", 2, -1, 20.974668176000001551, 15, 0, 0.25, "Na1", "H2"),
232 list("[3M-2H+Na]-",3,-1,20.974668176000001551,15,0,0.25,"Na1","H2"), 255 list("[3M-2H+Na]-", 3, -1, 20.974668176000001551, 15, 0, 0.25, "Na1", "H2"),
233 list("[M+Na]+",1,1,22.989221080000000086,16,1,1.0,"Na1","H0"), 256 list("[M+Na]+", 1, 1, 22.989221080000000086, 16, 1, 1.0, "Na1", "H0"),
234 list("[2M+Na]+",2,1,22.989221080000000086,16,0,0.5,"Na1","H0"), 257 list("[2M+Na]+", 2, 1, 22.989221080000000086, 16, 0, 0.5, "Na1", "H0"),
235 list("[3M+Na]+",3,1,22.989221080000000086,16,0,0.25,"Na1","H0"), 258 list("[3M+Na]+", 3, 1, 22.989221080000000086, 16, 0, 0.25, "Na1", "H0"),
236 list("[M+H+Na]2+",1,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"), 259 list("[M+H+Na]2+", 1, 2, 23.996497531999999353, 17, 0, 0.5, "Na1H1", "H0"),
237 list("[2M+H+Na]2+",2,2,23.996497531999999353,17,0,0.5,"Na1H1","H0"), 260 list("[2M+H+Na]2+", 2, 2, 23.996497531999999353, 17, 0, 0.5, "Na1H1", "H0"),
238 list("[3M+H+Na]2+",3,2,23.996497531999999353,17,0,0.25,"Na1H1","H0"), 261 list("[3M+H+Na]2+", 3, 2, 23.996497531999999353, 17, 0, 0.25, "Na1H1", "H0"),
239 list("[M+2H+Na]3+",1,3,25.003773983999998619,18,0,0.25,"H2Na1","H0"), 262 list("[M+2H+Na]3+", 1, 3, 25.003773983999998619, 18, 0, 0.25, "H2Na1", "H0"),
240 list("[M+CH3OH+H]+",1,1,33.033491200000000276,19,0,0.25,"C1O1H5","H0"), 263 list("[M+CH3OH+H]+", 1, 1, 33.033491200000000276, 19, 0, 0.25, "C1O1H5", "H0"),
241 list("[M-H+Cl]2-",1,-2,33.962124838000001148,20,0,1.0,"Cl1","H1"), 264 list("[M-H+Cl]2-", 1, -2, 33.962124838000001148, 20, 0, 1.0, "Cl1", "H1"),
242 list("[2M-H+Cl]2-",2,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"), 265 list("[2M-H+Cl]2-", 2, -2, 33.962124838000001148, 20, 0, 0.5, "Cl1", "H1"),
243 list("[3M-H+Cl]2-",3,-2,33.962124838000001148,20,0,0.5,"Cl1","H1"), 266 list("[3M-H+Cl]2-", 3, -2, 33.962124838000001148, 20, 0, 0.5, "Cl1", "H1"),
244 list("[M+Cl]-",1,-1,34.969401290000000416,21,1,1.0,"Cl1","H0"), 267 list("[M+Cl]-", 1, -1, 34.969401290000000416, 21, 1, 1.0, "Cl1", "H0"),
245 list("[2M+Cl]-",2,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"), 268 list("[2M+Cl]-", 2, -1, 34.969401290000000416, 21, 0, 0.5, "Cl1", "H0"),
246 list("[3M+Cl]-",3,-1,34.969401290000000416,21,0,0.5,"Cl1","H0"), 269 list("[3M+Cl]-", 3, -1, 34.969401290000000416, 21, 0, 0.5, "Cl1", "H0"),
247 list("[M+K-2H]-",1,-1,36.948605415999999479,22,0,0.5,"K1","H2"), 270 list("[M+K-2H]-", 1, -1, 36.948605415999999479, 22, 0, 0.5, "K1", "H2"),
248 list("[2M-2H+K]-",2,-1,36.948605415999999479,22,0,0.0,"K1","H2"), 271 list("[2M-2H+K]-", 2, -1, 36.948605415999999479, 22, 0, 0.0, "K1", "H2"),
249 list("[3M-2H+K]-",3,-1,36.948605415999999479,22,0,0.0,"K1","H2"), 272 list("[3M-2H+K]-", 3, -1, 36.948605415999999479, 22, 0, 0.0, "K1", "H2"),
250 list("[M+K]+",1,1,38.963158319999998013,23,1,1.0,"K1","H0"), 273 list("[M+K]+", 1, 1, 38.963158319999998013, 23, 1, 1.0, "K1", "H0"),
251 list("[2M+K]+",2,1,38.963158319999998013,23,0,0.5,"K1","H0"), 274 list("[2M+K]+", 2, 1, 38.963158319999998013, 23, 0, 0.5, "K1", "H0"),
252 list("[3M+K]+",3,1,38.963158319999998013,23,0,0.25,"K1","H0"), 275 list("[3M+K]+", 3, 1, 38.963158319999998013, 23, 0, 0.25, "K1", "H0"),
253 list("[M+H+K]2+",1,2,39.970434771999997281,24,0,0.5,"K1H1","H0"), 276 list("[M+H+K]2+", 1, 2, 39.970434771999997281, 24, 0, 0.5, "K1H1", "H0"),
254 list("[2M+H+K]2+",2,2,39.970434771999997281,24,0,0.5,"K1H1","H0"), 277 list("[2M+H+K]2+", 2, 2, 39.970434771999997281, 24, 0, 0.5, "K1H1", "H0"),
255 list("[3M+H+K]2+",3,2,39.970434771999997281,24,0,0.25,"K1H1","H0"), 278 list("[3M+H+K]2+", 3, 2, 39.970434771999997281, 24, 0, 0.25, "K1H1", "H0"),
256 list("[M+ACN+H]+",1,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"), 279 list("[M+ACN+H]+", 1, 1, 42.033825557999996646, 25, 0, 0.25, "C2H4N1", "H0"),
257 list("[2M+ACN+H]+",2,1,42.033825557999996646,25,0,0.25,"C2H4N1","H0"), 280 list("[2M+ACN+H]+", 2, 1, 42.033825557999996646, 25, 0, 0.25, "C2H4N1", "H0"),
258 list("[M+2Na-H]+",1,1,44.971165708000000902,26,0,0.5,"Na2","H1"), 281 list("[M+2Na-H]+", 1, 1, 44.971165708000000902, 26, 0, 0.5, "Na2", "H1"),
259 list("[2M+2Na-H]+",2,1,44.971165708000000902,26,0,0.25,"Na2","H1"), 282 list("[2M+2Na-H]+", 2, 1, 44.971165708000000902, 26, 0, 0.25, "Na2", "H1"),
260 list("[3M+2Na-H]+",3,1,44.971165708000000902,26,0,0.25,"Na2","H1"), 283 list("[3M+2Na-H]+", 3, 1, 44.971165708000000902, 26, 0, 0.25, "Na2", "H1"),
261 list("[2M+FA-H]-",2,-1,44.998202851999998586,27,0,0.25,"C1O2H2","H1"), 284 list("[2M+FA-H]-", 2, -1, 44.998202851999998586, 27, 0, 0.25, "C1O2H2", "H1"),
262 list("[M+FA-H]-",1,-1,44.998202851999998586,27,0,0.5,"C1O2H2","H1"), 285 list("[M+FA-H]-", 1, -1, 44.998202851999998586, 27, 0, 0.5, "C1O2H2", "H1"),
263 list("[M+2Na]2+",1,2,45.978442160000000172,28,0,0.5,"Na2","H0"), 286 list("[M+2Na]2+", 1, 2, 45.978442160000000172, 28, 0, 0.5, "Na2", "H0"),
264 list("[2M+2Na]2+",2,2,45.978442160000000172,28,0,0.5,"Na2","H0"), 287 list("[2M+2Na]2+", 2, 2, 45.978442160000000172, 28, 0, 0.5, "Na2", "H0"),
265 list("[3M+2Na]2+",3,2,45.978442160000000172,28,0,0.25,"Na2","H0"), 288 list("[3M+2Na]2+", 3, 2, 45.978442160000000172, 28, 0, 0.25, "Na2", "H0"),
266 list("[M+H+2Na]3+",1,3,46.985718611999999438,29,0,0.25,"H1Na2","H0"), 289 list("[M+H+2Na]3+", 1, 3, 46.985718611999999438, 29, 0, 0.25, "H1Na2", "H0"),
267 list("[M+H+FA]+",1,1,47.012755755999997122,30,0,0.25,"C1O2H3","H0"), 290 list("[M+H+FA]+", 1, 1, 47.012755755999997122, 30, 0, 0.25, "C1O2H3", "H0"),
268 list("[M+Hac-H]-",1,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"), 291 list("[M+Hac-H]-", 1, -1, 59.013852915999997607, 31, 0, 0.25, "C2O2H4", "H1"),
269 list("[2M+Hac-H]-",2,-1,59.013852915999997607,31,0,0.25,"C2O2H4","H1"), 292 list("[2M+Hac-H]-", 2, -1, 59.013852915999997607, 31, 0, 0.25, "C2O2H4", "H1"),
270 list("[M+IsoProp+H]+",1,1,61.064791327999998317,32,0,0.25,"C3H9O1","H0"), 293 list("[M+IsoProp+H]+", 1, 1, 61.064791327999998317, 32, 0, 0.25, "C3H9O1", "H0"),
271 list("[M+Na+K]2+",1,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"), 294 list("[M+Na+K]2+", 1, 2, 61.9523793999999981, 33, 0, 0.5, "Na1K1", "H0"),
272 list("[2M+Na+K]2+",2,2,61.9523793999999981,33,0,0.5,"Na1K1","H0"), 295 list("[2M+Na+K]2+", 2, 2, 61.9523793999999981, 33, 0, 0.5, "Na1K1", "H0"),
273 list("[3M+Na+K]2+",3,2,61.9523793999999981,33,0,0.25,"Na1K1","H0"), 296 list("[3M+Na+K]2+", 3, 2, 61.9523793999999981, 33, 0, 0.25, "Na1K1", "H0"),
274 list("[M+NO3]-",1,-1,61.988366450000000895,34,0,0.5,"N1O3","H0"), 297 list("[M+NO3]-", 1, -1, 61.988366450000000895, 34, 0, 0.5, "N1O3", "H0"),
275 list("[M+ACN+Na]+",1,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"), 298 list("[M+ACN+Na]+", 1, 1, 64.015770185999997464, 35, 0, 0.25, "C2H3N1Na1", "H0"),
276 list("[2M+ACN+Na]+",2,1,64.015770185999997464,35,0,0.25,"C2H3N1Na1","H0"), 299 list("[2M+ACN+Na]+", 2, 1, 64.015770185999997464, 35, 0, 0.25, "C2H3N1Na1", "H0"),
277 list("[M+NH4+FA]+",1,1,64.039304861999994502,36,0,0.25,"N1C1O2H6","H0"), 300 list("[M+NH4+FA]+", 1, 1, 64.039304861999994502, 36, 0, 0.25, "N1C1O2H6", "H0"),
278 list("[M-2H+Na+FA]-",1,-1,66.980147479999999405,37,0,0.5,"NaC1O2H2","H2"), 301 list("[M-2H+Na+FA]-", 1, -1, 66.980147479999999405, 37, 0, 0.5, "NaC1O2H2", "H2"),
279 list("[M+3Na]3+",1,3,68.967663239999993153,38,0,0.25,"Na3","H0"), 302 list("[M+3Na]3+", 1, 3, 68.967663239999993153, 38, 0, 0.25, "Na3", "H0"),
280 list("[M+Na+FA]+",1,1,68.99470038399999794,39,0,0.25,"Na1C1O2H2","H0"), 303 list("[M+Na+FA]+", 1, 1, 68.99470038399999794, 39, 0, 0.25, "Na1C1O2H2", "H0"),
281 list("[M+2Cl]2-",1,-2,69.938802580000000832,40,0,1.0,"Cl2","H0"), 304 list("[M+2Cl]2-", 1, -2, 69.938802580000000832, 40, 0, 1.0, "Cl2", "H0"),
282 list("[2M+2Cl]2-",2,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"), 305 list("[2M+2Cl]2-", 2, -2, 69.938802580000000832, 40, 0, 0.5, "Cl2", "H0"),
283 list("[3M+2Cl]2-",3,-2,69.938802580000000832,40,0,0.5,"Cl2","H0"), 306 list("[3M+2Cl]2-", 3, -2, 69.938802580000000832, 40, 0, 0.5, "Cl2", "H0"),
284 list("[M+2K-H]+",1,1,76.919040187999996758,41,0,0.5,"K2","H1"), 307 list("[M+2K-H]+", 1, 1, 76.919040187999996758, 41, 0, 0.5, "K2", "H1"),
285 list("[2M+2K-H]+",2,1,76.919040187999996758,41,0,0.25,"K2","H1"), 308 list("[2M+2K-H]+", 2, 1, 76.919040187999996758, 41, 0, 0.25, "K2", "H1"),
286 list("[3M+2K-H]+",3,1,76.919040187999996758,41,0,0.25,"K2","H1"), 309 list("[3M+2K-H]+", 3, 1, 76.919040187999996758, 41, 0, 0.25, "K2", "H1"),
287 list("[M+2K]2+",1,2,77.926316639999996028,42,0,0.5,"K2","H0"), 310 list("[M+2K]2+", 1, 2, 77.926316639999996028, 42, 0, 0.5, "K2", "H0"),
288 list("[2M+2K]2+",2,2,77.926316639999996028,42,0,0.5,"K2","H0"), 311 list("[2M+2K]2+", 2, 2, 77.926316639999996028, 42, 0, 0.5, "K2", "H0"),
289 list("[3M+2K]2+",3,2,77.926316639999996028,42,0,0.25,"K2","H0"), 312 list("[3M+2K]2+", 3, 2, 77.926316639999996028, 42, 0, 0.25, "K2", "H0"),
290 list("[M+Br]-",1,-1,78.918886479999997619,43,1,1.0,"Br1","H0"), 313 list("[M+Br]-", 1, -1, 78.918886479999997619, 43, 1, 1.0, "Br1", "H0"),
291 list("[M+Cl+FA]-",1,-1,80.974880593999998268,44,0,0.5,"Cl1C1O2H2","H0"), 314 list("[M+Cl+FA]-", 1, -1, 80.974880593999998268, 44, 0, 0.5, "Cl1C1O2H2", "H0"),
292 list("[M+AcNa-H]-",1,-1,80.995797543999998426,45,0,0.25,"C2H3Na1O2","H1"), 315 list("[M+AcNa-H]-", 1, -1, 80.995797543999998426, 45, 0, 0.25, "C2H3Na1O2", "H1"),
293 list("[M+2ACN+2H]2+",1,2,84.067651115999993292,46,0,0.25,"C4H8N2","H0"), 316 list("[M+2ACN+2H]2+", 1, 2, 84.067651115999993292, 46, 0, 0.25, "C4H8N2", "H0"),
294 list("[M+K+FA]+",1,1,84.968637623999995868,47,0,0.25,"K1C1O2H2","H0"), 317 list("[M+K+FA]+", 1, 1, 84.968637623999995868, 47, 0, 0.25, "K1C1O2H2", "H0"),
295 list("[M+Cl+Na+FA-H]-",1,-1,102.95682522200000619,48,0,0.5,"Cl1Na1C1O2H2","H1"), 318 list("[M+Cl+Na+FA-H]-", 1, -1, 102.95682522200000619, 48, 0, 0.5, "Cl1Na1C1O2H2", "H1"),
296 list("[2M+3H2O+2H]+",2,1,104.03153939599999944,49,0,0.25,"H8O6","H0"), 319 list("[2M+3H2O+2H]+", 2, 1, 104.03153939599999944, 49, 0, 0.25, "H8O6", "H0"),
297 list("[M+TFA-H]-",1,-1,112.98558742000000165,50,0,0.5,"C2F3O2H1","H1"), 320 list("[M+TFA-H]-", 1, -1, 112.98558742000000165, 50, 0, 0.5, "C2F3O2H1", "H1"),
298 list("[M+H+TFA]+",1,1,115.00014032400000019,51,0,0.25,"C2F3O2H2","H0"), 321 list("[M+H+TFA]+", 1, 1, 115.00014032400000019, 51, 0, 0.25, "C2F3O2H2", "H0"),
299 list("[M+3ACN+2H]2+",1,2,125.09420022199999778,52,0,0.25,"C6H11N3","H0"), 322 list("[M+3ACN+2H]2+", 1, 2, 125.09420022199999778, 52, 0, 0.25, "C6H11N3", "H0"),
300 list("[M+NH4+TFA]+",1,1,132.02668943000000468,53,0,0.25,"N1C2F3O2H5","H0"), 323 list("[M+NH4+TFA]+", 1, 1, 132.02668943000000468, 53, 0, 0.25, "N1C2F3O2H5", "H0"),
301 list("[M+Na+TFA]+",1,1,136.98208495200000811,54,0,0.25,"Na1C2F3O2H1","H0"), 324 list("[M+Na+TFA]+", 1, 1, 136.98208495200000811, 54, 0, 0.25, "Na1C2F3O2H1", "H0"),
302 list("[M+Cl+TFA]-",1,-1,148.96226516199999423,55,0,0.5,"Cl1C2F3O2H1","H0"), 325 list("[M+Cl+TFA]-", 1, -1, 148.96226516199999423, 55, 0, 0.5, "Cl1C2F3O2H1", "H0"),
303 list("[M+K+TFA]+",1,1,152.95602219200000604,56,0,0.25,"K1C2F3O2H1","H0") 326 list("[M+K+TFA]+", 1, 1, 152.95602219200000604, 56, 0, 0.25, "K1C2F3O2H1","H0")
304 ) 327 )
305 dummy_adduct <- orm$adduct() 328 dummy_adduct <- orm$adduct()
306 for (adduct in adducts) { 329 for (adduct in adducts) {
307 i <- 0 330 i <- 0
308 dummy_adduct$set_name(adduct[[i <- i+1]]) 331 dummy_adduct$set_name(adduct[[i <- i + 1]])
309 dummy_adduct$set_multi(adduct[[i <- i+1]]) 332 dummy_adduct$set_multi(adduct[[i <- i + 1]])
310 dummy_adduct$set_charge(adduct[[i <- i+1]]) 333 dummy_adduct$set_charge(adduct[[i <- i + 1]])
311 dummy_adduct$set_mass(adduct[[i <- i+1]]) 334 dummy_adduct$set_mass(adduct[[i <- i + 1]])
312 dummy_adduct$set_oidscore(adduct[[i <- i+1]]) 335 dummy_adduct$set_oidscore(adduct[[i <- i + 1]])
313 dummy_adduct$set_quasi(adduct[[i <- i+1]]) 336 dummy_adduct$set_quasi(adduct[[i <- i + 1]])
314 dummy_adduct$set_ips(adduct[[i <- i+1]]) 337 dummy_adduct$set_ips(adduct[[i <- i + 1]])
315 dummy_adduct$set_formula_add(adduct[[i <- i+1]]) 338 dummy_adduct$set_formula_add(adduct[[i <- i + 1]])
316 dummy_adduct$set_formula_ded(adduct[[i <- i+1]]) 339 dummy_adduct$set_formula_ded(adduct[[i <- i + 1]])
317 invisible(dummy_adduct$save()) 340 invisible(dummy_adduct$save())
318 dummy_adduct$clear(unset_id=TRUE) 341 dummy_adduct$clear(unset_id = TRUE)
319 } 342 }
320 message("Adducts created") 343 message("Adducts created")
321 } 344 }
322 345
323 insert_base_data <- function(orm, path, archetype=FALSE) { 346 insert_base_data <- function(orm, path, archetype = FALSE) {
324 if (archetype) { 347 if (archetype) {
325 ## not implemented yet 348 ## not implemented yet
326 return () 349 return()
327 } 350 }
328 base_data <- readLines(path) 351 base_data <- readLines(path)
329 for (sql in strsplit(paste(base_data, collapse=" "), ";")[[1]]) { 352 for (sql in strsplit(paste(base_data, collapse = " "), ";")[[1]]) {
330 orm$execute(sql) 353 orm$execute(sql)
331 } 354 }
332 set_database_version(orm, "enriched") 355 set_database_version(orm, "enriched")
333 } 356 }
334 357
335 insert_compounds <- function(orm, compounds_path) { 358 insert_compounds <- function(orm, compounds_path) {
336 compounds <- read.csv(file=compounds_path, sep="\t") 359 compounds <- read.csv(file = compounds_path, sep = "\t")
337 if (is.null(compounds <- translate_compounds(compounds))) { 360 if (is.null(compounds <- translate_compounds(compounds))) {
338 stop("Could not find asked compound's attributes in csv file.") 361 stop("Could not find asked compound's attributes in csv file.")
339 } 362 }
340 dummy_compound <- orm$compound() 363 dummy_compound <- orm$compound()
341 compound_list <- list() 364 compound_list <- list()
342 for (i in seq_len(nrow(compounds))) { 365 for (i in seq_len(nrow(compounds))) {
343 dummy_compound$set_mz(compounds[i, "mz"]) 366 dummy_compound$set_mz(compounds[i, "mz"])
344 dummy_compound$set_name(compounds[i, "name"]) 367 dummy_compound$set_name(compounds[i, "name"])
345 dummy_compound$set_common_name(compounds[i, "common_name"]) 368 dummy_compound$set_common_name(compounds[i, "common_name"])
346 dummy_compound$set_formula(compounds[i, "formula"]) 369 dummy_compound$set_formula(compounds[i, "formula"])
347 compound_list[[length(compound_list)+1]] <- as.list( 370 compound_list[[length(compound_list) + 1]] <- as.list(
348 dummy_compound, 371 dummy_compound,
349 c("mz", "name", "common_name", "formula") 372 c("mz", "name", "common_name", "formula")
350 ) 373 )
351 dummy_compound$clear(unset_id=TRUE) 374 dummy_compound$clear(unset_id = TRUE)
352 } 375 }
353 invisible(dummy_compound$save(bulk=compound_list)) 376 invisible(dummy_compound$save(bulk = compound_list))
354 } 377 }
355 378
356 translate_compounds <- function(compounds) { 379 translate_compounds <- function(compounds) {
357 recognized_headers <- list( 380 recognized_headers <- list(
358 c("HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1", "MetName", "ChemFormula", "INChIkey") 381 c(
382 "HMDB_ID", "MzBank", "X.M.H..", "X.M.H...1",
383 "MetName", "ChemFormula", "INChIkey"
384 )
359 ) 385 )
360 header_translators <- list( 386 header_translators <- list(
361 hmdb_header_translator 387 hmdb_header_translator
362 ) 388 )
363 for (index in seq_along(recognized_headers)) { 389 for (index in seq_along(recognized_headers)) {
364 headers <- recognized_headers[[index]] 390 headers <- recognized_headers[[index]]
365 if (identical(colnames(compounds), headers)) { 391 if (identical(colnames(compounds), headers)) {
366 return (header_translators[[index]](compounds)) 392 return(header_translators[[index]](compounds))
367 } 393 }
368 } 394 }
369 if (is.null(translator <- guess_translator(colnames(compounds)))) { 395 if (is.null(translator <- guess_translator(colnames(compounds)))) {
370 return (NULL) 396 return(NULL)
371 } 397 }
372 return (csv_header_translator(translator, compounds)) 398 return(csv_header_translator(translator, compounds))
373 } 399 }
374 400
375 guess_translator <- function(header) { 401 guess_translator <- function(header) {
376 result <- list( 402 result <- list(
377 # HMDB_ID=NULL, 403 # HMDB_ID = NULL,
378 mz=NULL, 404 mz = NULL,
379 name=NULL, 405 name = NULL,
380 common_name=NULL, 406 common_name = NULL,
381 formula=NULL, 407 formula = NULL,
382 # inchi_key=NULL 408 # inchi_key = NULL
383 ) 409 )
384 asked_cols <- names(result) 410 asked_cols <- names(result)
385 for (asked_col in asked_cols) { 411 for (asked_col in asked_cols) {
386 for (col in header) { 412 for (col in header) {
387 if ((twisted <- tolower(col)) == asked_col 413 if ((twisted <- tolower(col)) == asked_col
393 next 419 next
394 } 420 }
395 } 421 }
396 } 422 }
397 if (any(mapply(is.null, result))) { 423 if (any(mapply(is.null, result))) {
398 return (NULL) 424 return(NULL)
399 } 425 }
400 return (result) 426 return(result)
401 } 427 }
402 428
403 hmdb_header_translator <- function(compounds) { 429 hmdb_header_translator <- function(compounds) {
404 return (csv_header_translator( 430 return(csv_header_translator(
405 list( 431 list(
406 HMDB_ID="HMDB_ID", 432 HMDB_ID = "HMDB_ID",
407 mz="MzBank", 433 mz = "MzBank",
408 name="MetName", 434 name = "MetName",
409 common_name="MetName", 435 common_name = "MetName",
410 formula="ChemFormula", 436 formula = "ChemFormula",
411 inchi_key="INChIkey" 437 inchi_key = "INChIkey"
412 ), compounds 438 ), compounds
413 )) 439 ))
414 } 440 }
415 441
416 csv_header_translator <- function(translation_table, csv) { 442 csv_header_translator <- function(translation_table, csv) {
417 header_names <- names(translation_table) 443 header_names <- names(translation_table)
418 result <- data.frame(1:nrow(csv)) 444 result <- data.frame(seq_len(nrow(csv)))
419 for (i in seq_along(header_names)) { 445 for (i in seq_along(header_names)) {
420 result[, header_names[[i]]] <- csv[, translation_table[[i]]] 446 result[, header_names[[i]]] <- csv[, translation_table[[i]]]
421 } 447 }
422 result[, "mz"] <- as.numeric(result[, "mz"]) 448 result[, "mz"] <- as.numeric(result[, "mz"])
423 return (result) 449 return(result)
424 } 450 }
425 451
426 set_database_version <- function(orm, version) { 452 set_database_version <- function(orm, version) {
427 orm$set_tag( 453 orm$set_tag(
428 version, 454 version,
429 tag_name="database_version", 455 tag_name = "database_version",
430 tag_table_name="XSeeker_tagging_table" 456 tag_table_name = "XSeeker_tagging_table"
431 ) 457 )
432 } 458 }
433 459
434 process_rdata <- function(orm, rdata, options) { 460 process_rdata <- function(orm, rdata, options) {
435 mzml_tmp_dir <- gather_mzml_files(rdata) 461 mzml_tmp_dir <- gather_mzml_files(rdata)
442 || options$`not-show-percent` == FALSE 468 || options$`not-show-percent` == FALSE
443 ) 469 )
444 error <- tryCatch({ 470 error <- tryCatch({
445 process_sample_list( 471 process_sample_list(
446 orm, rdata, samples, 472 orm, rdata, samples,
447 show_percent=show_percent 473 show_percent = show_percent,
474 file_grouping_var = options$class
448 ) 475 )
449 NULL 476 NULL
450 }, error=function(e) { 477 }, error = function(e) {
451 message(e) 478 message(e)
452 e 479 e
453 }) 480 })
454 if (!is.null(mzml_tmp_dir)) { 481 if (!is.null(mzml_tmp_dir)) {
455 unlink(mzml_tmp_dir, recursive=TRUE) 482 unlink(mzml_tmp_dir, recursive = TRUE)
456 } 483 }
457 if (!is.null(error)) { 484 if (!is.null(error)) {
458 stop(error) 485 stop(error)
459 } 486 }
460 } 487 }
461 488
462 gather_mzml_files <- function(rdata) { 489 gather_mzml_files <- function(rdata) {
463 if (is.null(rdata$singlefile)) { 490 if (is.null(rdata$singlefile)) {
464 message("Extracting mxml files") 491 message("Extracting mxml files")
465 tmp <- tempdir() 492 tmp <- tempdir()
466 rdata$singlefile <- utils::unzip(rdata$zipfile, exdir=tmp) 493 rdata$singlefile <- utils::unzip(rdata$zipfile, exdir = tmp)
467 names(rdata$singlefile) <- tools::file_path_sans_ext(basename(rdata$singlefile)) 494 names(rdata$singlefile) <- tools::file_path_sans_ext(
495 basename(rdata$singlefile)
496 )
468 message("Extracted") 497 message("Extracted")
469 return (tmp) 498 return(tmp)
470 } else { 499 } else {
471 message(sprintf("Not a zip file, loading files directly from path: %s", paste(rdata$singlefile, collapse=" ; "))) 500 message(sprintf(
472 } 501 "Not a zip file, loading files directly from path: %s",
473 return (NULL) 502 paste(rdata$singlefile, collapse = " ; ")
474 } 503 ))
475 504 }
476 process_sample_list <- function(orm, radta, sample_names, show_percent) { 505 return(NULL)
477 file_grouping_var <- find_grouping_var(rdata$variableMetadata) 506 }
507
508 process_sample_list <- function(
509 orm,
510 rdata,
511 sample_names,
512 show_percent,
513 file_grouping_var = NULL
514 ) {
515 if (is.null(file_grouping_var)) {
516 file_grouping_var <- find_grouping_var(rdata$variableMetadata)
517 if (is.null(file_grouping_var)) {
518 stop("Malformed variableMetada.")
519 }
520 }
521 tryCatch({
522 headers <- colnames(rdata$variableMetadata)
523 file_grouping_var <- headers[[as.numeric(file_grouping_var)]]
524 }, error = function(e) NULL)
525 if (
526 is.null(file_grouping_var)
527 || !(file_grouping_var %in% colnames(rdata$variableMetadata))
528 ) {
529 stop(sprintf(
530 "Could not find grouping variable %s in var meta file.",
531 file_grouping_var
532 ))
533 }
478 message("Processing samples.") 534 message("Processing samples.")
479 message(sprintf("File grouping variable: %s", file_grouping_var)) 535 message(sprintf("File grouping variable: %s", file_grouping_var))
480 if(is.null(file_grouping_var)) {
481 stop("Malformed variableMetada.")
482 }
483 536
484 context <- new.env() 537 context <- new.env()
485 context$samples <- list() 538 context$samples <- list()
486 context$peaks <- rdata$xa@xcmsSet@peaks 539 context$peaks <- rdata$xa@xcmsSet@peaks
487 context$groupidx <- rdata$xa@xcmsSet@groupidx 540 context$groupidx <- rdata$xa@xcmsSet@groupidx
490 process_arg_list <- rdata$listOFlistArguments 543 process_arg_list <- rdata$listOFlistArguments
491 var_meta <- rdata$variableMetadata 544 var_meta <- rdata$variableMetadata
492 545
493 process_params <- list() 546 process_params <- list()
494 if (is.null(process_arg_list)) { 547 if (is.null(process_arg_list)) {
495 histories <- list()
496 for (history in xcms_set@.processHistory) { 548 for (history in xcms_set@.processHistory) {
497 if ( 549 if (
498 class(history@param) == "CentWaveParam" 550 class(history@param) == "CentWaveParam"
499 && history@type == "Peak detection" 551 && history@type == "Peak detection"
500 ) { 552 ) {
501 params <- history@param 553 params <- history@param
502 process_params <- list(list( 554 process_params <- list(list(
503 xfunction="annotatediff", 555 xfunction = "annotatediff",
504 ppm=params@ppm, 556 ppm = params@ppm,
505 peakwidth=sprintf("%s - %s", params@peakwidth[[1]], params@peakwidth[[2]]), 557 peakwidth = sprintf(
506 snthresh=params@snthresh, 558 "%s - %s",
507 prefilterStep=params@prefilter[[1]], 559 params@peakwidth[[1]],
508 prefilterLevel=params@prefilter[[2]], 560 params@peakwidth[[2]]
509 mzdiff=params@mzdiff, 561 ),
510 fitgauss=params@fitgauss, 562 snthresh = params@snthresh,
511 noise=params@noise, 563 prefilterStep = params@prefilter[[1]],
512 mzCenterFun=params@mzCenterFun, 564 prefilterLevel = params@prefilter[[2]],
513 integrate=params@integrate, 565 mzdiff = params@mzdiff,
514 firstBaselineCheck=params@firstBaselineCheck, 566 fitgauss = params@fitgauss,
515 snthreshIsoROIs=!identical(params@roiScales, numeric(0)) 567 noise = params@noise,
568 mzCenterFun = params@mzCenterFun,
569 integrate = params@integrate,
570 firstBaselineCheck = params@firstBaselineCheck,
571 snthreshIsoROIs = !identical(params@roiScales, numeric(0))
516 )) 572 ))
517 break 573 break
518 } 574 }
519 } 575 }
520 } else { 576 } else {
521 for (list_name in names(process_arg_list)) { 577 for (list_name in names(process_arg_list)) {
522 param_list <- list() 578 param_list <- list()
523 for (param_name in names(process_arg_list[[list_name]])) { 579 for (param_name in names(process_arg_list[[list_name]])) {
524 param_list[[param_name]] <- process_arg_list[[list_name]][[param_name]] 580 param_list[[param_name]] <- process_arg_list[[
581 list_name
582 ]][[param_name]]
525 } 583 }
526 process_params[[length(process_params)+1]] <- param_list 584 process_params[[length(process_params) + 1]] <- param_list
527 } 585 }
528 } 586 }
529 587
530 message("Parameters from previous processes extracted.") 588 message("Parameters from previous processes extracted.")
531 589
532 590
533 indices <- as.numeric(unique(var_meta[, file_grouping_var])) 591 indices <- as.numeric(unique(var_meta[, file_grouping_var]))
592 if (any(is.null(names(singlefile)[indices]))) {
593 stop(sprintf(
594 paste(
595 "Indices defined by grouping variable %s are not all present",
596 "in singlefile names (%s).\nCannot continue. Indices: %s"
597 ),
598 file_grouping_var,
599 paste(names(singlefile), collapse = ", "),
600 paste(indices, collapse = ", ")
601 ))
602 }
534 smol_xcms_set <- orm$smol_xcms_set() 603 smol_xcms_set <- orm$smol_xcms_set()
535 mz_tab_info <- new.env() 604 mz_tab_info <- new.env()
536 g <- xcms::groups(xcms_set) 605 g <- xcms::groups(xcms_set)
537 mz_tab_info$group_length <- nrow(g) 606 mz_tab_info$group_length <- nrow(g)
538 mz_tab_info$dataset_path <- xcms::filepaths(xcms_set) 607 mz_tab_info$dataset_path <- xcms::filepaths(xcms_set)
539 mz_tab_info$sampnames <- xcms::sampnames(xcms_set) 608 mz_tab_info$sampnames <- xcms::sampnames(xcms_set)
540 mz_tab_info$sampclass <- xcms::sampclass(xcms_set) 609 mz_tab_info$sampclass <- xcms::sampclass(xcms_set)
541 mz_tab_info$rtmed <- g[,"rtmed"] 610 mz_tab_info$rtmed <- g[, "rtmed"]
542 mz_tab_info$mzmed <- g[,"mzmed"] 611 mz_tab_info$mzmed <- g[, "mzmed"]
543 mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(xcms_set, value="into") 612 mz_tab_info$smallmolecule_abundance_assay <- xcms::groupval(
544 blogified <- blob::blob(fst::compress_fst(serialize(mz_tab_info, NULL), compression=100)) 613 xcms_set,
614 value = "into"
615 )
616 blogified <- blob::blob(fst::compress_fst(
617 serialize(mz_tab_info, NULL),
618 compression = 100
619 ))
545 rm(mz_tab_info) 620 rm(mz_tab_info)
546 621
547 invisible(smol_xcms_set$set_raw(blogified)$save()) 622 invisible(smol_xcms_set$set_raw(blogified)$save())
548 smol_xcms_set_id <- smol_xcms_set$get_id() 623 smol_xcms_set_id <- smol_xcms_set$get_id()
549 rm(smol_xcms_set) 624 rm(smol_xcms_set)
580 env$enriched_rdata_version <- ENRICHED_RDATA_VERSION 655 env$enriched_rdata_version <- ENRICHED_RDATA_VERSION
581 env$tool_name <- TOOL_NAME 656 env$tool_name <- TOOL_NAME
582 env$enriched_rdata_doc <- ENRICHED_RDATA_DOC 657 env$enriched_rdata_doc <- ENRICHED_RDATA_DOC
583 658
584 sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id) 659 sample <- add_sample_to_database(orm, env, context, smol_xcms_set_id)
585 rm (env) 660 rm(env)
586 context$samples[no] <- sample$get_id() 661 context$samples[no] <- sample$get_id()
587 rm (sample) 662 rm(sample)
588 } 663 }
589 context$clusters <- list() 664 context$clusters <- list()
590 context$show_percent <- show_percent 665 context$show_percent <- show_percent
591 context$cluster_mean_rt_abundance <- list() 666 context$cluster_mean_rt_abundance <- list()
592 context$central_feature <- list() 667 context$central_feature <- list()
595 clusters <- context$clusters 670 clusters <- context$clusters
596 rm(context) 671 rm(context)
597 message("Features enrichment") 672 message("Features enrichment")
598 complete_features(orm, clusters, show_percent) 673 complete_features(orm, clusters, show_percent)
599 message("Features enrichment done.") 674 message("Features enrichment done.")
600 return (NULL) 675 return(NULL)
601 } 676 }
602 677
603 find_grouping_var <- function(var_meta) { 678 find_grouping_var <- function(var_meta) {
604 known_colnames = c( 679 known_colnames <- c(
605 "name", "namecustom", "mz", "mzmin", "mzmax", 680 "name", "namecustom", "mz", "mzmin", "mzmax",
606 "rt", "rtmin", "rtmax", "npeaks", "isotopes", "adduct", "pcgroup" 681 "rt", "rtmin", "rtmax", "npeaks", "isotopes", "adduct",
682 "pcgroup", "ms_level"
607 ) 683 )
608 col_names <- colnames(var_meta) 684 col_names <- colnames(var_meta)
609 classes = list() 685 classes <- list()
610 for (name in col_names) { 686 for (name in col_names) {
611 if (!(name %in% known_colnames)) { 687 if (!(name %in% known_colnames)) {
612 classes[[length(classes)+1]] = name 688 classes[[length(classes) + 1]] <- name
613 } 689 }
614 } 690 }
615 if (length(classes) > 1) { 691 if (length(classes) > 1) {
616 stop(sprintf("Only one class expected in the variable metadata. Found %d .", length(classes))) 692 stop(sprintf(
693 "Only one class expected in the variable metadata. Found %d .",
694 length(classes)
695 ))
617 } 696 }
618 if (length(classes) == 0) { 697 if (length(classes) == 0) {
619 stop("Could not find any class column in your variableMetadata.") 698 stop("Could not find any class column in your variableMetadata.")
620 } 699 }
621 return (classes[[1]]) 700 return(classes[[1]])
622 } 701 }
623 702
624 add_sample_to_database <- function(orm, env, context, smol_xcms_set_id) { 703 add_sample_to_database <- function(orm, env, context, smol_xcms_set_id) {
625 message(sprintf("Processing sample %s", env$sample_name)) 704 message(sprintf("Processing sample %s", env$sample_name))
626 sample <- ( 705 sample <- (
627 orm$sample() 706 orm$sample()
628 $set_name(env$sample_name) 707 $set_name(env$sample_name)
629 $set_path(env$dataset_path) 708 $set_path(env$dataset_path)
630 $set_kind("enriched_rdata") 709 $set_kind("enriched_rdata")
631 $set_polarity( 710 $set_polarity(
632 if (is.null(env$polarity) || identical(env$polarity, character(0))) "" 711 if (
712 is.null(env$polarity)
713 || identical(env$polarity, character(0))
714 ) ""
633 else env$polarity 715 else env$polarity
634 ) 716 )
635 $set_raw(blob::blob(fst::compress_fst( 717 $set_raw(blob::blob(fst::compress_fst(
636 serialize(env, NULL), 718 serialize(env, NULL),
637 compression=100 719 compression = 100
638 ))) 720 )))
639 ) 721 )
640 sample[["smol_xcms_set_id"]] <- smol_xcms_set_id 722 sample[["smol_xcms_set_id"]] <- smol_xcms_set_id
641 sample$modified__[["smol_xcms_set_id"]] <- smol_xcms_set_id 723 sample$modified__[["smol_xcms_set_id"]] <- smol_xcms_set_id
642 sample <- sample$save() 724 sample <- sample$save()
643 load_process_params(orm, sample, env$process_params) 725 load_process_params(orm, sample, env$process_params)
644 message(sprintf("Sample %s inserted.", env$sample_name)) 726 message(sprintf("Sample %s inserted.", env$sample_name))
645 return (sample) 727 return(sample)
646 } 728 }
647 729
648 730
649 load_variable_metadata <- function(orm, var_meta, context) { 731 load_variable_metadata <- function(orm, var_meta, context) {
650 all_clusters <- orm$cluster()$all() 732 all_clusters <- orm$cluster()$all()
658 orm, var_meta, context, 740 orm, var_meta, context,
659 next_feature_id, next_cluster_id, 741 next_feature_id, next_cluster_id,
660 next_pc_group, next_align_group 742 next_pc_group, next_align_group
661 )) 743 ))
662 message("Extracting features done.") 744 message("Extracting features done.")
663 return (NULL) 745 return(NULL)
664 } 746 }
665 747
666 get_next_id <- function(models, attribute) { 748 get_next_id <- function(models, attribute) {
667 if ((id <- models$max(attribute)) == Inf || id == -Inf) { 749 if ((id <- models$max(attribute)) == Inf || id == -Inf) {
668 return (0) 750 return(0)
669 } 751 }
670 return (id) 752 return(id)
671 } 753 }
672 754
673 create_features <- function( 755 create_features <- function(
674 orm, var_meta, context, 756 orm, var_meta, context,
675 next_feature_id, next_cluster_id, 757 next_feature_id, next_cluster_id,
676 next_pc_group, next_align_group 758 next_pc_group, next_align_group
677 ) { 759 ) {
678 field_names <- as.list(names(orm$feature()$fields__)) 760 field_names <- as.list(names(orm$feature()$fields__))
679 field_names[field_names=="id"] <- NULL 761 field_names[field_names == "id"] <- NULL
680 762
681 features <- list() 763 features <- list()
682 dummy_feature <- orm$feature() 764 dummy_feature <- orm$feature()
683 765
684 if (show_percent <- context$show_percent) { 766 if (show_percent <- context$show_percent) {
686 total <- nrow(var_meta) 768 total <- nrow(var_meta)
687 } 769 }
688 rows <- seq_len(nrow(var_meta)) 770 rows <- seq_len(nrow(var_meta))
689 if (PROCESS_SMOL_BATCH) { 771 if (PROCESS_SMOL_BATCH) {
690 772
691 rows <- rows[1:as.integer(FAST_FEATURE_RATIO/100.0 * length(rows))] 773 rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))]
692 } 774 }
693 cluster_row <- list() 775 cluster_row <- list()
694 for (row in rows) { 776 for (row in rows) {
695 if (show_percent && (row / total) * 100 > percent) { 777 if (show_percent && (row / total) * 100 > percent) {
696 percent <- percent + 1 778 percent <- percent + 1
697 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) 779 message("\r", sprintf("\r%d %%", percent), appendLF = FALSE)
698 } 780 }
699 781
700 dummy_feature$set_featureID(next_feature_id) 782 dummy_feature$set_featureID(next_feature_id)
701 next_feature_id <- next_feature_id + 1 783 next_feature_id <- next_feature_id + 1
702 784
708 context$clusterID <- clusterID 790 context$clusterID <- clusterID
709 dummy_feature$set_iso(iso) 791 dummy_feature$set_iso(iso)
710 792
711 peak_list <- context$peaks[context$groupidx[[row]], ] 793 peak_list <- context$peaks[context$groupidx[[row]], ]
712 if (! ("matrix" %in% class(peak_list))) { 794 if (! ("matrix" %in% class(peak_list))) {
713 peak_list <- matrix(peak_list, nrow=1, ncol=length(peak_list), dimnames=list(c(), names(peak_list))) 795 peak_list <- matrix(
796 peak_list,
797 nrow = 1,
798 ncol = length(peak_list),
799 dimnames = list(c(), names(peak_list))
800 )
714 } 801 }
715 802
716 clusterID <- as.character(clusterID) 803 clusterID <- as.character(clusterID)
717 if (is.null(context$central_feature[[clusterID]])) { 804 if (is.null(context$central_feature[[clusterID]])) {
718 int_o <- extract_peak_var(peak_list, "into") 805 int_o <- extract_peak_var(peak_list, "into")
719 context$central_feature[[clusterID]] <- ( 806 context$central_feature[[clusterID]] <- (
720 peak_list[peak_list[, "into"] == int_o,]["sample"] 807 peak_list[peak_list[, "into"] == int_o, ]["sample"]
721 ) 808 )
722 } 809 }
723 810
724 if (!DEBUG_FAST_IGNORE_SLOW_OP) { 811 if (!DEBUG_FAST_IGNORE_SLOW_OP) {
725 sample_peak_list <- peak_list[as.integer(peak_list[, "sample"]) == context$central_feature[[clusterID]], , drop=FALSE] 812 central_feature <- context$central_feature[[clusterID]]
726 if (!identical(sample_peak_list, numeric(0)) && !is.null(nrow(sample_peak_list)) && nrow(sample_peak_list) != 0) { 813 sample_peak_list <- peak_list[
727 if (!is.na(int_o <- extract_peak_var(sample_peak_list, "into"))) { 814 as.integer(peak_list[, "sample"]) == central_feature,
815 ,
816 drop = FALSE
817 ]
818 if (
819 !identical(sample_peak_list, numeric(0))
820 && !is.null(nrow(sample_peak_list))
821 && nrow(sample_peak_list) != 0
822 ) {
823 int_o <- extract_peak_var(sample_peak_list, "into")
824 if (!is.na(int_o)) {
728 dummy_feature$set_int_o(int_o) 825 dummy_feature$set_int_o(int_o)
729 } 826 }
730 if (!is.na(int_b <- extract_peak_var(sample_peak_list, "intb"))) { 827 int_b <- extract_peak_var(sample_peak_list, "intb")
828 if (!is.na(int_b)) {
731 dummy_feature$set_int_b(int_b) 829 dummy_feature$set_int_b(int_b)
732 } 830 }
733 if (!is.na(max_o <- extract_peak_var(sample_peak_list, "maxo"))) { 831 max_o <- extract_peak_var(sample_peak_list, "maxo")
832 if (!is.na(max_o)) {
734 dummy_feature$set_max_o(max_o) 833 dummy_feature$set_max_o(max_o)
735 } 834 }
736 } 835 }
737 } 836 }
738 837
742 dummy_feature, clusterID, 841 dummy_feature, clusterID,
743 context, curent_var_meta, next_pc_group, 842 context, curent_var_meta, next_pc_group,
744 next_align_group 843 next_align_group
745 ) 844 )
746 next_align_group <- next_align_group + 1 845 next_align_group <- next_align_group + 1
747 features[[length(features)+1]] <- as.list(dummy_feature, field_names) 846 features[[length(features) + 1]] <- as.list(dummy_feature, field_names)
748 dummy_feature$clear() 847 dummy_feature$clear()
749 } 848 }
750 rm(var_meta) 849 rm(var_meta)
751 message("") 850 message("")
752 message("Saving features") 851 message("Saving features")
753 invisible(dummy_feature$save(bulk=features)) 852 invisible(dummy_feature$save(bulk = features))
754 853
755 ## We link manually clusters to the sample they're in. 854 ## We link manually clusters to the sample they're in.
756 link_cache <- list() 855 link_cache <- list()
757 for (row in rows) { 856 for (row in rows) {
758 sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"]) 857 sample_nos <- unique(context$peaks[context$groupidx[[row]], "sample"])
759 for (sample_id in context$samples[sample_nos]) { 858 for (sample_id in context$samples[sample_nos]) {
760 cluster_id <- cluster_row[[row]]$get_id() 859 cluster_id <- cluster_row[[row]]$get_id()
761 if (is.null(link_cache[[id <- paste(sample_id, cluster_id, sep=";")]])) { 860 id <- paste(sample_id, cluster_id, sep = ";")
861 if (is.null(link_cache[[id]])) {
762 link_cache[[id]] <- 1 862 link_cache[[id]] <- 1
763 orm$cluster_sample( 863 orm$cluster_sample(
764 sample_id=sample_id, 864 sample_id = sample_id,
765 cluster_id=cluster_id 865 cluster_id = cluster_id
766 )$save() 866 )$save()
767 } 867 }
768 } 868 }
769 } 869 }
770 870
771 message("Saved.") 871 message("Saved.")
772 return (context$clusters) 872 return(context$clusters)
773 } 873 }
774 874
775 extract_peak_var <- function(peak_list, var_name, selector=max) { 875 extract_peak_var <- function(peak_list, var_name, selector = max) {
776 value <- peak_list[, var_name] 876 value <- peak_list[, var_name]
777 names(value) <- NULL 877 names(value) <- NULL
778 return (selector(value)) 878 return(selector(value))
779 } 879 }
780 880
781 set_feature_fields_from_var_meta <- function(feature, var_meta) { 881 set_feature_fields_from_var_meta <- function(feature, var_meta) {
782 if (!is.null(mz <- var_meta[["mz"]]) && !is.na(mz)) { 882 if (!is.null(mz <- var_meta[["mz"]]) && !is.na(mz)) {
783 feature$set_mz(mz) 883 feature$set_mz(mz)
798 feature$set_rt_max(rtmax) 898 feature$set_rt_max(rtmax)
799 } 899 }
800 if (!is.null(isotopes <- var_meta[["isotopes"]]) && !is.na(isotopes)) { 900 if (!is.null(isotopes <- var_meta[["isotopes"]]) && !is.na(isotopes)) {
801 feature$set_iso(isotopes) 901 feature$set_iso(isotopes)
802 } 902 }
803 return (feature) 903 return(feature)
804 } 904 }
805 905
806 extract_iso <- function(weird_data) { 906 extract_iso <- function(weird_data) {
807 if (grepl("^\\[\\d+\\]", weird_data)[[1]]) { 907 if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
808 return (sub("^\\[\\d+\\]", "", weird_data, perl=TRUE)) 908 return(sub("^\\[\\d+\\]", "", weird_data, perl = TRUE))
809 } 909 }
810 return (weird_data) 910 return(weird_data)
811 } 911 }
812 912
813 extract_clusterID <- function(weird_data, next_cluster_id){ 913 extract_clusterID <- function(weird_data, next_cluster_id) {
814 if (grepl("^\\[\\d+\\]", weird_data)[[1]]) { 914 if (grepl("^\\[\\d+\\]", weird_data)[[1]]) {
815 clusterID <- stringr::str_extract(weird_data, "^\\[\\d+\\]") 915 clusterID <- stringr::str_extract(weird_data, "^\\[\\d+\\]")
816 clusterID <- as.numeric(stringr::str_extract(clusterID, "\\d+")) 916 clusterID <- as.numeric(stringr::str_extract(clusterID, "\\d+"))
817 } else { 917 } else {
818 clusterID <- 0 918 clusterID <- 0
819 } 919 }
820 return (clusterID + next_cluster_id) 920 return(clusterID + next_cluster_id)
821 } 921 }
822 922
823 create_associated_cluster <- function( 923 create_associated_cluster <- function(
824 orm, 924 orm,
825 main_sample_id, feature, clusterID, 925 main_sample_id, feature, clusterID,
829 if (is.null(cluster <- context$clusters[[clusterID]])) { 929 if (is.null(cluster <- context$clusters[[clusterID]])) {
830 pcgroup <- as.numeric(curent_var_meta[["pcgroup"]]) 930 pcgroup <- as.numeric(curent_var_meta[["pcgroup"]])
831 adduct_name <- as.character(curent_var_meta[["adduct"]]) 931 adduct_name <- as.character(curent_var_meta[["adduct"]])
832 annotation <- curent_var_meta[["isotopes"]] 932 annotation <- curent_var_meta[["isotopes"]]
833 cluster <- context$clusters[[clusterID]] <- orm$cluster( 933 cluster <- context$clusters[[clusterID]] <- orm$cluster(
834 pc_group=pcgroup + next_pc_group, 934 pc_group = pcgroup + next_pc_group,
835 # adduct=adduct, 935 # adduct=adduct,
836 align_group=next_align_group, 936 align_group = next_align_group,
837 # curent_group=curent_group, 937 # curent_group=curent_group,
838 clusterID=context$clusterID, 938 clusterID = context$clusterID,
839 annotation=annotation 939 annotation = annotation
840 ) 940 )
841 if (is.null(adduct <- context$adducts[[adduct_name]])) { 941 if (is.null(adduct <- context$adducts[[adduct_name]])) {
842 context$adducts[[adduct_name]] <- orm$adduct()$load_by(name=adduct_name)$first() 942 context$adducts[[adduct_name]] <- orm$adduct()$load_by(
943 name = adduct_name
944 )$first()
843 if (is.null(adduct <- context$adducts[[adduct_name]])) { 945 if (is.null(adduct <- context$adducts[[adduct_name]])) {
844 adduct <- context$adducts[[adduct_name]] <- orm$adduct(name=adduct_name, charge=0) 946 adduct <- context$adducts[[adduct_name]] <- orm$adduct(
947 name = adduct_name,
948 charge = 0
949 )
845 adduct$save() 950 adduct$save()
846 } 951 }
847 } 952 }
848 cluster$set_adduct(adduct) 953 cluster$set_adduct(adduct)
849 ## Crappy hack to assign sample id to cluster without loading the sample. 954 ## Crappy hack to assign sample id to cluster without loading the
850 ## Samples are too big (their sample$env) and slows the process, and eat all the menory 955 ## sample. Samples are too big (their sample$env) and slows the
851 ## so we dont't want to load them. 956 ## process, and eat all the menory so we dont't want to load them.
852 cluster[["sample_id"]] <- main_sample_id 957 cluster[["sample_id"]] <- main_sample_id
853 cluster$modified__[["sample_id"]] <- main_sample_id 958 cluster$modified__[["sample_id"]] <- main_sample_id
854 } else { 959 } else {
855 if (context$clusterID != 0 && cluster$get_clusterID() == 0) { 960 if (context$clusterID != 0 && cluster$get_clusterID() == 0) {
856 cluster$set_clusterID(context$clusterID) 961 cluster$set_clusterID(context$clusterID)
857 } 962 }
858 } 963 }
859 cluster$save() 964 cluster$save()
860 feature$set_cluster(cluster) 965 feature$set_cluster(cluster)
861 return (cluster) 966 return(cluster)
862 } 967 }
863 968
864 complete_features <- function(orm, clusters, show_percent) { 969 complete_features <- function(orm, clusters, show_percent) {
865 total <- length(clusters) 970 total <- length(clusters)
866 percent <- -1 971 percent <- -1
867 i <- 0 972 i <- 0
868 for (cluster in clusters) { 973 for (cluster in clusters) {
869 i <- i+1 974 i <- i + 1
870 if (show_percent && (i / total) * 100 > percent) { 975 if (show_percent && (i / total) * 100 > percent) {
871 percent <- percent + 1 976 percent <- percent + 1
872 message("\r", sprintf("\r%d %%", percent), appendLF=FALSE) 977 message("\r", sprintf("\r%d %%", percent), appendLF = FALSE)
873 } 978 }
874 features <- orm$feature()$load_by(cluster_id=cluster$get_id()) 979 features <- orm$feature()$load_by(cluster_id = cluster$get_id())
875 if (features$any()) { 980 if (features$any()) {
876 if (!is.null(rt <- features$mean("rt"))) { 981 if (!is.null(rt <- features$mean("rt"))) {
877 cluster$set_mean_rt(rt)$save() 982 cluster$set_mean_rt(rt)$save()
878 } 983 }
879 features_df <- as.data.frame(features) 984 features_df <- as.data.frame(features)
880 central_feature <- features_df[grepl("^\\[M\\]", features_df[, "iso"]), ] 985 central_feature <- features_df[
986 grepl("^\\[M\\]", features_df[, "iso"]),
987 ]
881 central_feature_into <- central_feature[["int_o"]] 988 central_feature_into <- central_feature[["int_o"]]
882 if (!identical(central_feature_into, numeric(0)) && central_feature_into != 0) { 989 if (
990 !identical(central_feature_into, numeric(0))
991 && central_feature_into != 0
992 ) {
883 for (feature in as.vector(features)) { 993 for (feature in as.vector(features)) {
884 feature$set_abundance( 994 feature$set_abundance(
885 feature$get_int_o() / central_feature_into * 100 995 feature$get_int_o() / central_feature_into * 100
886 )$save() 996 )$save()
887 } 997 }
888 } 998 }
889 } 999 }
890 } 1000 }
891 return (NULL) 1001 return(NULL)
892 } 1002 }
893 1003
894 load_process_params <- function(orm, sample, params) { 1004 load_process_params <- function(orm, sample, params) {
895 for (param_list in params) { 1005 for (param_list in params) {
896 if (is.null(param_list[["xfunction"]])) { 1006 if (is.null(param_list[["xfunction"]])) {
898 } 1008 }
899 if (param_list[["xfunction"]] == "annotatediff") { 1009 if (param_list[["xfunction"]] == "annotatediff") {
900 load_process_params_peak_picking(orm, sample, param_list) 1010 load_process_params_peak_picking(orm, sample, param_list)
901 } 1011 }
902 } 1012 }
903 return (sample) 1013 return(sample)
904 } 1014 }
905 1015
906 load_process_params_peak_picking <- function(orm, sample, peak_picking_params) { 1016 load_process_params_peak_picking <- function(
907 return (add_sample_process_parameters( 1017 orm,
908 params=peak_picking_params, 1018 sample,
909 params_translation=list( 1019 peak_picking_params
910 ppm="ppm", 1020 ) {
911 maxcharge="maxCharge", 1021 return(add_sample_process_parameters(
912 maxiso="maxIso" 1022 params = peak_picking_params,
1023 params_translation = list(
1024 ppm = "ppm",
1025 maxcharge = "maxCharge",
1026 maxiso = "maxIso"
913 ), 1027 ),
914 param_model_generator=orm$peak_picking_parameters, 1028 param_model_generator = orm$peak_picking_parameters,
915 sample_param_setter=sample$set_peak_picking_parameters 1029 sample_param_setter = sample$set_peak_picking_parameters
916 )) 1030 ))
917 } 1031 }
918 1032
919 add_sample_process_parameters <- function( 1033 add_sample_process_parameters <- function(
920 params, 1034 params,
935 params_model <- params_models$first() 1049 params_model <- params_models$first()
936 } else { 1050 } else {
937 params_model <- do.call(param_model_generator, model_params) 1051 params_model <- do.call(param_model_generator, model_params)
938 params_model$save() 1052 params_model$save()
939 } 1053 }
940 return (sample_param_setter(params_model)$save()) 1054 return(sample_param_setter(params_model)$save())
941 } 1055 }
942 1056
943 1057
944 library(optparse) 1058 library(optparse)
945 1059
946 option_list <- list( 1060 option_list <- list(
947 optparse::make_option( 1061 optparse::make_option(
948 c("-v", "--version"), 1062 c("-v", "--version"),
949 action="store_true", 1063 action = "store_true",
950 help="Display this tool's version and exits" 1064 help = "Display this tool's version and exits"
951 ), 1065 ),
952 optparse::make_option( 1066 optparse::make_option(
953 c("-i", "--input"), 1067 c("-i", "--input"),
954 type="character", 1068 type = "character",
955 help="The rdata path to import in XSeeker" 1069 help = "The rdata path to import in XSeeker"
956 ), 1070 ),
957 optparse::make_option( 1071 optparse::make_option(
958 c("-s", "--samples"), 1072 c("-s", "--samples"),
959 type="character", 1073 type = "character",
960 help="Samples to visualise in XSeeker" 1074 help = "Samples to visualise in XSeeker"
961 ), 1075 ),
962 optparse::make_option( 1076 optparse::make_option(
963 c("-B", "--archetype"), 1077 c("-B", "--archetype"),
964 type="character", 1078 type = "character",
965 help="The name of the base database" 1079 help = "The name of the base database"
966 ), 1080 ),
967 optparse::make_option( 1081 optparse::make_option(
968 c("-b", "--database"), 1082 c("-b", "--database"),
969 type="character", 1083 type = "character",
970 help="The base database's path" 1084 help = "The base database's path"
971 ), 1085 ),
972 optparse::make_option( 1086 optparse::make_option(
973 c("-c", "--compounds-csv"), 1087 c("-c", "--compounds-csv"),
974 type="character", 1088 type = "character",
975 help="The csv containing compounds" 1089 help = "The csv containing compounds"
976 ), 1090 ),
977 optparse::make_option( 1091 optparse::make_option(
978 c("-m", "--models"), 1092 c("-m", "--models"),
979 type="character", 1093 type = "character",
980 help="The path or url (must begin with http[s]:// or git@) to the database's models" 1094 help = paste(
1095 "The path or url (must begin with http[s]:// or git@) to",
1096 "the database's models"
1097 )
981 ), 1098 ),
982 optparse::make_option( 1099 optparse::make_option(
1100 c("-k", "--class"),
1101 type = "character",
1102 help = "The name of the column containing the classes"
1103 ),
1104 optparse::make_option(
983 c("-o", "--output"), 1105 c("-o", "--output"),
984 type="character", 1106 type = "character",
985 help="The path where to output sqlite" 1107 help = "The path where to output sqlite"
986 ), 1108 ),
987 optparse::make_option( 1109 optparse::make_option(
988 c("-P", "--not-show-percent"), 1110 c("-P", "--not-show-percent"),
989 action="store_true", 1111 action = "store_true",
990 help="Flag not to show the percents", 1112 help = "Flag not to show the percents",
991 default=FALSE 1113 default = FALSE
992 ) 1114 )
993 ) 1115 )
994 1116
995 options(error=function(){traceback(3)}) 1117 options(error = function(){traceback(3)})
996 1118
997 parser <- OptionParser(usage="%prog [options] file", option_list=option_list) 1119 parser <- OptionParser(
998 args <- parse_args(parser, positional_arguments=0) 1120 usage = "%prog [options] file",
1121 option_list = option_list
1122 )
1123 args <- parse_args(parser, positional_arguments = 0)
999 1124
1000 err_code <- 0 1125 err_code <- 0
1001 1126
1002 if (!is.null(args$options$version)) { 1127 if (!is.null(args$options$version)) {
1003 message(sprintf("%s %s", TOOL_NAME, VERSION)) 1128 message(sprintf("%s %s", TOOL_NAME, VERSION))
1004 quit() 1129 quit()
1005 } 1130 }
1006 1131
1007 models <- get_models(args$options$models) 1132 models <- get_models(args$options$models)
1008 orm <- DBModelR::ORM( 1133 orm <- DBModelR::ORM(
1009 connection_params=list(dbname=args$options$output), 1134 connection_params = list(dbname=args$options$output),
1010 dbms="SQLite" 1135 dbms = "SQLite"
1011 ) 1136 )
1012 1137
1013 invisible(orm$models(models)) 1138 invisible(orm$models(models))
1014 invisible(create_database(orm)) 1139 invisible(create_database(orm))
1015 1140
1021 insert_base_data(orm, args$options$database) 1146 insert_base_data(orm, args$options$database)
1022 } 1147 }
1023 message(sprintf("Base data inserted using %s.", args$options$database)) 1148 message(sprintf("Base data inserted using %s.", args$options$database))
1024 1149
1025 if (!is.null(args$options$archetype)) { 1150 if (!is.null(args$options$archetype)) {
1026 insert_base_data(orm, args$options$archetype, archetype=TRUE) 1151 insert_base_data(orm, args$options$archetype, archetype = TRUE)
1027 } 1152 }
1028 if (!is.null(args$options$`compounds-csv`)) { 1153 if (!is.null(args$options$`compounds-csv`)) {
1029 insert_compounds(orm, args$options$`compounds-csv`) 1154 insert_compounds(orm, args$options$`compounds-csv`)
1030 } 1155 }
1031 1156
1036 1161
1037 load(args$options$input, rdata <- new.env()) 1162 load(args$options$input, rdata <- new.env())
1038 1163
1039 process_rdata(orm, rdata, args$options) 1164 process_rdata(orm, rdata, args$options)
1040 1165
1041 quit(status=err_code) 1166 quit(status = err_code)
1042
1043