comparison MS2snoop.R @ 5:78d5a12406c2 draft

planemo upload commit a5f94dac9b268629399dc22c5d6ac48c5a85adc3
author workflow4metabolomics
date Fri, 05 Aug 2022 17:25:45 +0000
parents 856001213966
children 77abacd33c31
comparison
equal deleted inserted replaced
4:856001213966 5:78d5a12406c2
13 #' 13 #'
14 #' @import optparse 14 #' @import optparse
15 #' 15 #'
16 16
17 17
18 assign("MS2SNOOP_VERSION", "2.0.0") 18 get_version <- function() {
19 lockBinding("MS2SNOOP_VERSION", globalenv()) 19 cmd <- commandArgs(trailingOnly = FALSE)
20 20 root <- dirname(gsub("--file=", "", cmd[grep("--file=", cmd)]))
21 assign("MISSING_PARAMETER_ERROR", 1) 21 readme <- readLines(file.path(root, "README.md"))
22 lockBinding("MISSING_PARAMETER_ERROR", globalenv()) 22 version_line <- readme[grepl(" * **@version**: ", readme, fixed = TRUE)]
23 23 return(gsub(".*: ", "", version_line))
24 assign("BAD_PARAMETER_VALUE_ERROR", 2) 24 }
25 lockBinding("BAD_PARAMETER_VALUE_ERROR", globalenv()) 25
26 26 defaults <- list(
27 assign("MISSING_INPUT_FILE_ERROR", 3) 27 MS2SNOOP_VERSION = get_version(),
28 lockBinding("MISSING_INPUT_FILE_ERROR", globalenv()) 28 MISSING_PARAMETER_ERROR = 1,
29 29 BAD_PARAMETER_VALUE_ERROR = 2,
30 assign("NO_ANY_RESULT_ERROR", 255) 30 MISSING_INPUT_FILE_ERROR = 3,
31 lockBinding("NO_ANY_RESULT_ERROR", globalenv()) 31 NO_ANY_RESULT_ERROR = 255,
32 32 DEFAULT_PRECURSOR_PATH = NULL,
33 assign("DEFAULT_PRECURSOR_PATH", "peaklist_precursors.tsv") 33 DEFAULT_FRAGMENTS_PATH = NULL,
34 assign("DEFAULT_FRAGMENTS_PATH", "peaklist_fragments.tsv") 34 DEFAULT_COMPOUNDS_PATH = NULL,
35 assign("DEFAULT_COMPOUNDS_PATH", "compounds_pos.txt") 35 DEFAULT_OUTPUT_PATH = "compound_fragments_result.txt",
36 assign("DEFAULT_OUTPUT_PATH", "compound_fragments_result.txt") 36 DEFAULT_TOLMZ = 0.01,
37 assign("DEFAULT_TOLMZ", 0.01) 37 DEFAULT_TOLRT = 20,
38 assign("DEFAULT_TOLRT", 20) 38 DEFAULT_MZDECIMAL = 3,
39 assign("DEFAULT_MZDECIMAL", 0) 39 DEFAULT_R_THRESHOLD = 0.85,
40 assign("DEFAULT_R_THRESHOLD", 0.85) 40 DEFAULT_MINNUMBERSCAN = 8,
41 assign("DEFAULT_MINNUMBERSCAN", 8) 41 DEFAULT_SEUIL_RA = 0.05,
42 assign("DEFAULT_SEUIL_RA", 0.5) 42 DEFAULT_FRAGMENTS_MATCH_DELTA = 10,
43 lockBinding("DEFAULT_PRECURSOR_PATH", globalenv()) 43 DEFAULT_FRAGMENTS_MATCH_DELTA_UNIT = "ppm",
44 lockBinding("DEFAULT_FRAGMENTS_PATH", globalenv()) 44 DEFAULT_PDF_PATH = ""
45 lockBinding("DEFAULT_COMPOUNDS_PATH", globalenv()) 45 )
46 lockBinding("DEFAULT_OUTPUT_PATH", globalenv()) 46 env <- globalenv()
47 lockBinding("DEFAULT_TOLMZ", globalenv()) 47 for (default in names(defaults)) {
48 lockBinding("DEFAULT_TOLRT", globalenv()) 48 assign(default, defaults[[default]], envir = env)
49 lockBinding("DEFAULT_MZDECIMAL", globalenv()) 49 lockBinding(default, env)
50 lockBinding("DEFAULT_R_THRESHOLD", globalenv()) 50 }
51 lockBinding("DEFAULT_MINNUMBERSCAN", globalenv())
52 lockBinding("DEFAULT_SEUIL_RA", globalenv())
53
54 assign("DEFAULT_EXTRACT_FRAGMENTS_R_THRESHOLD", 0.85)
55 assign("DEFAULT_EXTRACT_FRAGMENTS_SEUIL_RA", 0.1)
56 assign("DEFAULT_EXTRACT_FRAGMENTS_TOLMZ", 0.01)
57 assign("DEFAULT_EXTRACT_FRAGMENTS_TOLRT", 60)
58 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_R_THRESHOLD", globalenv())
59 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_SEUIL_RA", globalenv())
60 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_TOLMZ", globalenv())
61 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_TOLRT", globalenv())
62
63 51
64 ######################################################################## 52 ########################################################################
53
54 get_formulas <- function(
55 mzref,
56 spectra,
57 nominal_mz_list,
58 processing_parameters,
59 background = !TRUE
60 ) {
61 if (is.vector(mzref) && length(mzref) > 1) {
62 return(lapply(
63 mzref,
64 function(mz) {
65 return(get_formulas(
66 mzref = mz,
67 spectra = spectra,
68 nominal_mz_list = nominal_mz_list,
69 processing_parameters = processing_parameters,
70 background = background
71 ))
72 }
73 ))
74 }
75 input <- sprintf(
76 "%s-%s.ms",
77 gsub("[[:space:]]", "_", processing_parameters$c_name),
78 mzref
79 )
80 create_ms_file(input, mzref, spectra, processing_parameters)
81 output <- sprintf(
82 "out/%s-%s.out",
83 gsub("[[:space:]]", "_", processing_parameters$c_name),
84 mzref
85 )
86 command <- sprintf(
87 paste(
88 "sirius",
89 "--noCite",
90 "--noSummaries",
91 "--loglevel=WARNING",
92 "-i='%s'",
93 "-o='%s'",
94 "tree",
95 ## loglevel is not working taken into account during
96 ## sirius startup, so we filter outputs...
97 "2>&1 | grep '^(WARNING|SEVERE)'"
98 ),
99 input,
100 output
101 )
102 verbose_catf(
103 ">> Sirius is running %swith the command: %s\n",
104 if (background) "in the background " else "",
105 command
106 )
107 system(
108 command,
109 wait = !background,
110 ignore.stdout = background,
111 ignore.stderr = background
112 )
113 return(extract_sirius_results(output, spectra$mz, processing_parameters))
114 }
115
116 create_ms_file <- function(
117 path,
118 mzref,
119 spectra,
120 processing_parameters
121 ) {
122 file_content <- paste(
123 sprintf(">compound %s", processing_parameters$c_name),
124 sprintf(">ionization %s", processing_parameters$ionization),
125 sprintf(">parentmass %s", mzref),
126 sprintf(">formula %s", processing_parameters$elemcomposition),
127 sep = "\n"
128 )
129 displayed_file_content <- sprintf(
130 "%s\n>collision\n%s",
131 file_content,
132 paste(
133 sprintf(
134 "%s %s",
135 spectra[1:3, "mz"],
136 spectra[1:3, "intensities"]
137 ),
138 collapse = "\n"
139 )
140 )
141 if (nrow(spectra) > 3) {
142 displayed_file_content <- sprintf(
143 "%s\n[... %s more rows of mz and intensities ...]",
144 displayed_file_content,
145 nrow(spectra) - 3
146 )
147 }
148 catf(
149 ">> MS file created for %s with content:\n%s\n",
150 processing_parameters$c_name,
151 displayed_file_content
152 )
153 file_content <- sprintf(
154 "%s\n\n>collision\n%s",
155 file_content,
156 paste(
157 sprintf("%s %s", spectra$mz, spectra$intensities),
158 collapse = "\n"
159 )
160 )
161 cat(file_content, file = path, append = FALSE)
162 }
163
164 extract_sirius_results <- function(
165 output,
166 mz_list,
167 processing_parameters
168 ) {
169
170 delta <- processing_parameters$fragment_match_delta
171 delta_unit <- tolower(processing_parameters$fragment_match_delta_unit)
172
173 output <- list.dirs(output, recursive = FALSE)[[1]]
174
175 spectra_out_dir <- sprintf("%s/spectra", output)
176 spectra_filename <- sprintf(
177 "%s/%s",
178 spectra_out_dir,
179 list.files(spectra_out_dir)[[1]]
180 )
181
182 trees_out_dir <- sprintf("%s/trees", output)
183 trees_filename <- sprintf(
184 "%s/%s",
185 trees_out_dir,
186 list.files(trees_out_dir)[[1]]
187 )
188
189 if (!is.null(spectra_filename)) {
190 sirius_results <- get_csv_or_tsv(spectra_filename)
191 } else {
192 return(rep(NA, length(mz_list)))
193 }
194 if (!is.null(trees_filename)) {
195 sirius_results <- cbind(sirius_results, extract_sirius_ppm(trees_filename))
196 } else {
197 return(rep(NA, length(mz_list)))
198 }
199
200 fragment_matchings <- data.frame(
201 formula = NA,
202 ppm = NA,
203 mz = mz_list,
204 error = NA
205 )
206
207 sirius_results <- filter_sirius_with_delta(
208 sirius_results = sirius_results,
209 original_mz = fragment_matchings$mz,
210 delta = delta,
211 delta_unit = delta_unit
212 )
213
214 for (index in seq_len(nrow(sirius_results))) {
215 result <- sirius_results[index, ]
216 filter <- which(order(abs(fragment_matchings$mz - result$mz)) == 1)
217 fragment_matchings[filter, "formula"] <- result$formula
218 fragment_matchings[filter, "ppm"] <- result$ppm
219 catf(
220 "[OK] Fragment with m/z=%s matches %s with a difference of %s ppm\n",
221 fragment_matchings[filter, "mz"], result$formula, result$ppm
222 )
223 }
224 return(fragment_matchings)
225 }
226
227 filter_sirius_with_delta <- function(
228 sirius_results,
229 original_mz,
230 delta,
231 delta_unit
232 ) {
233 if (is.numeric(delta) && !is.na(delta) && delta > 0) {
234 if (delta_unit == "ppm") {
235 filter <- abs(sirius_results$ppm) <= delta
236 fine <- which(filter)
237 not_fine <- which(!filter)
238 catf(
239 paste("[KO] fragment %s (m/z=%s) eleminated because ppm=%s is greater",
240 "than delta=%s\n"
241 ),
242 sirius_results[not_fine, ]$formula,
243 sirius_results[not_fine, ]$mz,
244 sirius_results[not_fine, ]$ppm,
245 delta
246 )
247 sirius_results <- sirius_results[fine, ]
248 } else if (delta_unit == "mz") {
249 differences <- sapply(
250 sirius_results$mz,
251 function(mz) min(abs(original_mz - mz))
252 )
253 fine <- which(sapply(
254 sirius_results$mz,
255 function(mz) any(abs(original_mz - mz) <= delta)
256 ))
257 not_fine <- which(sapply(
258 sirius_results$mz,
259 function(mz) all(abs(original_mz - mz) > delta)
260 ))
261 catf(
262 paste(
263 "[KO] fragment %s eleminated because mz difference=%s is",
264 "greater than delta=%s\n"
265 ),
266 sirius_results[not_fine, ]$formula,
267 differences[not_fine],
268 delta
269 )
270 sirius_results <- sirius_results[fine, ]
271 }
272 }
273 return(sirius_results)
274 }
275
276 extract_sirius_ppm <- function(path) {
277 json <- file(path, "r")
278 suppressWarnings(json_lines <- readLines(json))
279 close(json)
280 json_lines <- json_lines[
281 grepl("\\s+\"(massDeviation|recalibratedMass)\" :", json_lines)
282 ]
283 json_lines <- gsub("^\\s+\"[^\"]+\" : \"?", "", json_lines)
284 ppms <- json_lines[seq(1, length(json_lines), 2)]
285 mz <- json_lines[seq(2, length(json_lines), 2)]
286 ppms <- as.numeric(gsub(" ppm .*", "", ppms))
287 mz <- as.numeric(gsub(",$", "", mz))
288 ordered <- order(mz)
289 return(list(ppm = ppms[ordered], recalibrated_mz = mz[ordered]))
290 }
65 291
66 #' @title plot_pseudo_spectra 292 #' @title plot_pseudo_spectra
67 #' @param x 293 #' @param x
68 #' @param r_threshold
69 #' @param fid 294 #' @param fid
70 #' @param sum_int 295 #' @param sum_int
71 #' @param vmz 296 #' @param vmz
72 #' @param cor_abs_int 297 #' @param cor_abs_int
73 #' @param refcol 298 #' @param refcol
79 #' x dataframe scan X fragments with scans number in the 1st column and 304 #' x dataframe scan X fragments with scans number in the 1st column and
80 #' ions in next with intensities 305 #' ions in next with intensities
81 #' fid file id when several a precursor has been detected in several files 306 #' fid file id when several a precursor has been detected in several files
82 plot_pseudo_spectra <- function( 307 plot_pseudo_spectra <- function(
83 x, 308 x,
84 r_threshold,
85 fid, 309 fid,
86 sum_int, 310 sum_int,
87 vmz, 311 vmz,
88 cor_abs_int, 312 cor_abs_int,
89 refcol, 313 refcol,
90 c_name, 314 meaned_mz,
91 inchikey, 315 processing_parameters
92 elemcomposition
93 ) { 316 ) {
94 ## du fait de la difference de nombre de colonne entre la dataframe qui 317 ## du fait de la difference de nombre de colonne entre la dataframe qui
95 ## inclue les scans en 1ere col, mzRef se decale de 1 318 ## inclue les scans en 1ere col, mzRef se decale de 1
96 refcol <- refcol - 1 319 refcol <- refcol - 1
97 ## compute relative intensities max=100% 320 ## compute relative intensities max=100%
98 rel_int <- sum_int[-1] 321 rel_int <- sum_int[-1]
99 rel_int <- rel_int / max(rel_int) 322 rel_int <- rel_int / max(rel_int)
100 323
101 ## define max value on vertical axis (need to increase in order to plot the 324 if (processing_parameters$do_pdf) {
102 ## label of fragments) 325 ## define max value on vertical axis (need to increase in order to plot the
103 ymax <- max(rel_int) + 0.2 * max(rel_int) 326 ## label of fragments)
104 327 ymax <- max(rel_int) + 0.2 * max(rel_int)
105 par(mfrow = c(2, 1)) 328
106 plot(vmz, rel_int, type = "h", ylim = c(0, ymax), main = c_name) 329 par(mfrow = c(2, 1))
107 ## low correl coef. will be display in grey 330 plot(vmz, rel_int, type = "h", ylim = c(0, ymax),
108 cor_low <- which(round(cor_abs_int, 2) < r_threshold) 331 main = processing_parameters$c_name
109 332 )
110 lbmzcor <- sprintf("%s(r=%s)", vmz, round(cor_abs_int, 2)) 333 ## low correl coef. will be display in grey
111 334 cor_low <- which(round(cor_abs_int, 2) < processing_parameters$r_threshold)
112 if (length(cor_low) > 0) { 335
113 text( 336 lbmzcor <- sprintf("%s(r=%s)", vmz, round(cor_abs_int, 2))
114 vmz[cor_low], 337
115 rel_int[cor_low], 338 if (length(cor_low) > 0) {
116 lbmzcor[cor_low],
117 cex = 0.5,
118 col = "grey",
119 srt = 90,
120 adj = 0
121 )
122 if (length(vmz) - length(cor_low) > 1) {
123 text( 339 text(
124 vmz[-c(refcol, cor_low)], 340 vmz[cor_low],
125 rel_int[-c(refcol, cor_low)], 341 rel_int[cor_low],
126 lbmzcor[-c(refcol, cor_low)], 342 lbmzcor[cor_low],
127 cex = 0.6, 343 cex = 0.5,
128 col = 1, 344 col = "grey",
129 srt = 90, 345 srt = 90,
130 adj = 0 346 adj = 0
131 ) 347 )
348 if (length(vmz) - length(cor_low) > 1) {
349 text(
350 vmz[-c(refcol, cor_low)],
351 rel_int[-c(refcol, cor_low)],
352 lbmzcor[-c(refcol, cor_low)],
353 cex = 0.6,
354 col = 1,
355 srt = 90,
356 adj = 0
357 )
358 }
359 } else {
360 if (length(vmz) > 1) {
361 text(
362 vmz[-c(refcol)],
363 rel_int[-c(refcol)],
364 lbmzcor[-c(refcol)],
365 cex = 0.6,
366 col = 1,
367 srt = 90,
368 adj = 0
369 )
370 }
371 }
372
373 text(
374 vmz[refcol],
375 rel_int[refcol],
376 lbmzcor[refcol],
377 cex = 0.8,
378 col = 2,
379 srt = 90,
380 adj = 0
381 )
382 }
383
384 ## prepare result file
385 cor_valid <- (round(cor_abs_int, 2) >= processing_parameters$r_threshold)
386
387 do_sirius <- TRUE
388 verbose_catf("Checking sirius parameters...\n")
389 if (is.null(processing_parameters$ionization)) {
390 do_sirius <- FALSE
391 verbose_catf("[KO] No ionization passed in parameter.\n")
392 } else {
393 verbose_catf("[OK] Ionization=%s.\n", processing_parameters$ionization)
394 }
395 if (is.na(processing_parameters$elemcomposition)) {
396 do_sirius <- FALSE
397 verbose_catf("[KO] Elemental composition is NA.\n")
398 } else if (length(processing_parameters$elemcomposition) < 1) {
399 do_sirius <- FALSE
400 verbose_catf("[KO] No elemental composition is provided.\n")
401 } else if (processing_parameters$elemcomposition == "") {
402 do_sirius <- FALSE
403 verbose_catf("[KO] Elemental composition is an empty string.\n")
404 } else {
405 verbose_catf(
406 "[OK] Elemental composition=%s.\n",
407 processing_parameters$elemcomposition
408 )
409 }
410
411 cp_res_length <- length(vmz)
412 ppm <- rep(NA, cp_res_length)
413 formulas <- rep(NA, cp_res_length)
414 if (do_sirius) {
415 verbose_catf("Everything is ok, preparing for sirius.\n")
416 formulas <- get_formulas(
417 mzref = processing_parameters$mzref,
418 spectra = data.frame(mz = meaned_mz, intensities = sum_int[-1]),
419 nominal_mz_list = vmz,
420 processing_parameters = processing_parameters
421 )
422 if (nrow(formulas) == 0) {
423 catf("No formula found.\n")
424 } else {
425 ppm <- formulas$ppm
426 formulas <- formulas$formula
427 catf(
428 "Found %s formula for %s fragments\n",
429 length(formulas[which(!(is.na(formulas)))]),
430 cp_res_length
431 )
132 } 432 }
133 } else { 433 } else {
134 if (length(vmz) > 1) { 434 verbose_catf("Sirius cannot be run.\n")
135 text( 435 }
136 vmz[-c(refcol)],
137 rel_int[-c(refcol)],
138 lbmzcor[-c(refcol)],
139 cex = 0.6,
140 col = 1,
141 srt = 90,
142 adj = 0
143 )
144 }
145 }
146
147 text(
148 vmz[refcol],
149 rel_int[refcol],
150 lbmzcor[refcol],
151 cex = 0.8,
152 col = 2,
153 srt = 90,
154 adj = 0
155 )
156
157 ## prepare result file
158 corValid <- (round(cor_abs_int, 2) >= r_threshold) ##nolint object_name_linter
159 cp_res <- data.frame( 436 cp_res <- data.frame(
160 rep(c_name, length(vmz)), 437 rep(processing_parameters$c_name, cp_res_length),
161 rep(inchikey, length(vmz)), 438 rep(processing_parameters$inchikey, cp_res_length),
162 rep(elemcomposition, length(vmz)), 439 rep(processing_parameters$elemcomposition, cp_res_length),
163 rep(fid, length(vmz)), 440 formulas,
164 vmz, 441 vmz,
442 ppm,
443 rep(fid, cp_res_length),
165 cor_abs_int, 444 cor_abs_int,
166 sum_int[-1], 445 sum_int[-1],
167 rel_int, 446 rel_int,
168 corValid 447 cor_valid
169 ) 448 )
170 449
171 colnames(cp_res) <- c( 450 colnames(cp_res) <- c(
172 "compoundName", 451 "compoundName",
173 "inchikey", 452 "inchikey",
174 "elemcomposition", 453 "elemcomposition",
454 "fragment",
455 "fragment_mz",
456 "ppm",
175 "fileid", 457 "fileid",
176 "fragments_mz",
177 "CorWithPrecursor", 458 "CorWithPrecursor",
178 "AbsoluteIntensity", 459 "AbsoluteIntensity",
179 "relativeIntensity", 460 "relativeIntensity",
180 "corValid" 461 "corValid"
181 ) 462 )
182 return(cp_res) 463 return(cp_res)
183
184 } 464 }
185 465
186 #' 466 #'
187 #' @title extract_fragments 467 #' @title extract_fragments
188 #' 468 #'
189 #' @param precursors the precursor list from mspurity 469 #' @param precursors the precursor list from mspurity
190 #' @param fragments the fragments list from ms purity 470 #' @param fragments the fragments list from ms purity
191 #' @param mzref 471 # ' @param mzref
192 #' @param rtref 472 # ' @param rtref
193 #' @param c_name 473 # ' @param c_name
194 #' @param r_threshold default = DEFAULT_EXTRACT_FRAGMENTS_R_THRESHOLD 474 # ' @param inchikey
195 #' @param seuil_ra default = DEFAULT_EXTRACT_FRAGMENTS_SEUIL_RA 475 # ' @param elemcomposition
196 #' @param tolmz default = DEFAULT_EXTRACT_FRAGMENTS_TOLMZ 476 #' @param processing_parameters
197 #' @param tolrt default = DEFAULT_EXTRACT_FRAGMENTS_TOLRT
198 #' @returns 477 #' @returns
199 #' 478 #'
200 #' @description 479 #' @description
201 #' function for extraction of fragments corresponding to precursors 480 #' function for extraction of fragments corresponding to precursors
202 #' detected by MSPurity 481 #' detected by MSPurity
203 extract_fragments <- function( ## nolint cyclocomp_linter 482 extract_fragments <- function( ## nolint cyclocomp_linter
204 precursors, 483 precursors,
205 fragments, 484 fragments,
206 mzref, 485 processing_parameters
207 rtref,
208 c_name,
209 inchikey,
210 elemcomposition,
211 min_number_scan,
212 mzdecimal,
213 r_threshold = DEFAULT_EXTRACT_FRAGMENTS_R_THRESHOLD,
214 seuil_ra = DEFAULT_EXTRACT_FRAGMENTS_SEUIL_RA,
215 tolmz = DEFAULT_EXTRACT_FRAGMENTS_TOLMZ,
216 tolrt = DEFAULT_EXTRACT_FRAGMENTS_TOLRT
217 ) { 486 ) {
218 ## filter precursor in the precursors file based on mz and rt in the 487 ## filter precursor in the precursors file based on mz and rt in the
219 ## compound list 488 ## compound list
220 cat("processing ", c_name, "\n") 489 catf("processing %s\n", processing_parameters$c_name)
490 verbose_catf("===\n")
491 param <- processing_parameters
221 selected_precursors <- which( 492 selected_precursors <- which(
222 (abs(precursors$precurMtchMZ - mzref) <= tolmz) 493 (abs(precursors$precurMtchMZ - param$mzref) <= param$tolmz)
223 & (abs(precursors$precurMtchRT - rtref) <= tolrt) 494 & (abs(precursors$precurMtchRT - param$rtref) <= param$tolrt)
495 )
496 rm(param)
497
498 verbose_catf(
499 "> %s precursors selected with mz=%s±%s and rt=%s±%s\n",
500 length(selected_precursors),
501 processing_parameters$mzref,
502 processing_parameters$tolmz,
503 processing_parameters$rtref,
504 processing_parameters$tolrt
224 ) 505 )
225 506
226 ## check if there is the precursor in the file 507 ## check if there is the precursor in the file
227 if (length(selected_precursors) > 0) { 508
228 509 if (length(selected_precursors) < 1) {
229 sprecini <- precursors[selected_precursors, ] 510 cat("> non detected in precursor file\n")
230 511 show_end_processing()
231 ## check if fragments corresponding to precursor are found in several 512 return(NULL)
232 ## files (collision energy) 513 }
233 ## this lead to a processing for each fileid 514
234 mf <- levels(as.factor(sprecini$fileid)) 515 precursors <- precursors[selected_precursors, ]
235 if (length(mf) > 1 && global_verbose) { 516
236 cat(" several files detected for this compounds :\n") 517 ## check if fragments corresponding to precursor are found in several
237 } 518 ## files (collision energy)
238 519 ## this lead to a processing for each fileid
239 for (f in seq_along(mf)) { 520 file_ids <- as.character(sort(unique(precursors$fileid)))
240 521 if (length(file_ids) > 1) {
241 sprec <- sprecini[sprecini$fileid == mf[f], ] 522 catf("> several files detected for this compounds :\n")
242 523 } else if (length(file_ids) < 1 || nrow(precursors) < 1) {
243 ## selection of fragment in the fragments file with the grpid common in 524 return(data.frame())
244 ## both fragments and precursors 525 }
245 selfrgt <- levels(as.factor(sprec$grpid)) 526
246 sfrgt <- fragments[ 527 res_comp <- data.frame()
247 fragments$grpid %in% selfrgt 528 for (curent_file_id in file_ids) {
248 & fragments$fileid == mf[f], 529 curent_precursors <- precursors[precursors$fileid == curent_file_id, ]
249 ] 530 selected_fragments <- fragments[
250 531 fragments$grpid %in% as.character(curent_precursors$grpid)
251 ## filter fragments on relative intensity seuil_ra = user defined 532 & fragments$fileid == curent_file_id,
252 ## parameter (MSpurity flags could be used here) 533 ]
253 sfrgtfil <- sfrgt[sfrgt$ra > seuil_ra, ] 534 filtered_fragments <- selected_fragments[
254 535 selected_fragments$ra > processing_parameters$seuil_ra,
255 mznominal <- round(x = sfrgtfil$mz, mzdecimal) 536 ]
256 sfrgtfil <- data.frame(sfrgtfil, mznominal) 537 if (nrow(filtered_fragments) != 0) {
257 538 res_comp_by_file <- process_file(
258 ## creation of cross table row=scan col=mz X=ra 539 curent_file_id = curent_file_id,
259 vmz <- levels(as.factor(sfrgtfil$mznominal)) 540 precursor_mz = curent_precursors$mz,
260 541 filtered_fragments = filtered_fragments,
261 if (global_verbose) { 542 processing_parameters = processing_parameters
262 cat(" fragments :", vmz)
263 cat("\n")
264 }
265
266 ## mz of precursor in data precursor to check correlation with
267 mz_prec <- paste0("mz", round(mean(sprec$mz), mzdecimal))
268
269 for (m in seq_along(vmz)) {
270
271 ## absolute intensity
272 cln <- c(
273 which(colnames(sfrgtfil) == "acquisitionNum"),
274 which(colnames(sfrgtfil) == "i")
275 )
276 int_mz <- sfrgtfil[sfrgtfil$mznominal == vmz[m], cln]
277 colnames(int_mz)[2] <- paste0("mz", vmz[m])
278
279 ## average intensities of mass in duplicate scans
280 comp_scans <- aggregate(x = int_mz, by = list(int_mz[[1]]), FUN = mean)
281 int_mz <- comp_scans[, -1]
282
283 if (m == 1) {
284 ds_abs_int <- int_mz
285 } else {
286 ds_abs_int <- merge(
287 x = ds_abs_int,
288 y = int_mz,
289 by.x = 1,
290 by.y = 1,
291 all.x = TRUE,
292 all.y = TRUE
293 )
294 }
295 }
296 if (global_debug) {
297 print(ds_abs_int)
298 write.table(
299 x = ds_abs_int,
300 file = paste0(c_name, "ds_abs_int.txt"),
301 row.names = FALSE,
302 sep = "\t"
303 )
304 }
305
306 ## elimination of mz with less than min_number_scan scans (user defined
307 ## parameter)
308 xmz <- rep(NA, ncol(ds_abs_int) - 1)
309 sum_int <- rep(NA, ncol(ds_abs_int))
310 nbxmz <- 0
311 nb_scan_check <- min(nrow(ds_abs_int), min_number_scan)
312
313 for (j in 2:ncol(ds_abs_int)) {
314 sum_int[j] <- sum(ds_abs_int[j], na.rm = TRUE)
315 if (sum(!is.na(ds_abs_int[[j]])) < nb_scan_check) {
316 nbxmz <- nbxmz + 1
317 xmz[nbxmz] <- j
318 }
319 }
320
321 xmz <- xmz[-which(is.na(xmz))]
322 if (length(xmz) > 0) {
323 ds_abs_int <- ds_abs_int[, -c(xmz)]
324 sum_int <- sum_int[-c(xmz)]
325 ## liste des mz keeped decale de 1 avec ds_abs_int
326 vmz <- as.numeric(vmz[-c(xmz - 1)])
327 }
328
329 ## reference ion for correlation computing = precursor OR maximum
330 ## intensity ion in precursor is not present
331 refcol <- which(colnames(ds_abs_int) == mz_prec)
332 if (length(refcol) == 0) {
333 refcol <- which(sum_int == max(sum_int, na.rm = TRUE))
334 }
335 pdf(
336 file = sprintf("%s_processing_file%s.pdf", c_name, mf[f]),
337 width = 8,
338 height = 11
339 ) 543 )
340 par(mfrow = c(3, 2))
341
342 ## Pearson correlations between absolute intensities computing
343 cor_abs_int <- rep(NA, length(vmz))
344
345 if (length(refcol) > 0) {
346 for (i in 2:length(ds_abs_int)) {
347 cor_abs_int[i - 1] <- cor(
348 x = ds_abs_int[[refcol]],
349 y = ds_abs_int[[i]],
350 use = "pairwise.complete.obs",
351 method = "pearson"
352 )
353 plot(
354 ds_abs_int[[refcol]],
355 ds_abs_int[[i]],
356 xlab = colnames(ds_abs_int)[refcol],
357 ylab = colnames(ds_abs_int)[i],
358 main = sprintf(
359 "%s corr coeff r=%s", c_name, round(cor_abs_int[i - 1], 2)
360 )
361 )
362 }
363 ## plot pseudo spectra
364 res_comp_by_file <- plot_pseudo_spectra(
365 x = ds_abs_int,
366 r_threshold = r_threshold,
367 fid = mf[f],
368 sum_int = sum_int,
369 vmz = vmz,
370 cor_abs_int = cor_abs_int,
371 refcol = refcol,
372 c_name = c_name,
373 inchikey = inchikey,
374 elemcomposition = elemcomposition
375 )
376 if (f == 1) {
377 res_comp <- res_comp_by_file
378 }
379 } else {
380 res_comp_by_file <- NULL
381 cat(" non detected in fragments file \n")
382 }
383 if (!is.null(res_comp_by_file)) { 544 if (!is.null(res_comp_by_file)) {
384 res_comp <- rbind(res_comp, res_comp_by_file) 545 res_comp <- rbind(res_comp, res_comp_by_file)
385 } 546 }
386 dev.off() 547 } else {
548 catf("No fragment found for in fragment file\n")
387 } 549 }
550 }
551 return(unique(res_comp))
552 }
553
554 process_file <- function(
555 curent_file_id,
556 precursor_mz,
557 filtered_fragments,
558 processing_parameters
559 ) {
560 mznominal <- round(x = filtered_fragments$mz, digits = 0)
561 meaned_mz <- round(
562 aggregate(
563 data.frame(
564 mz = filtered_fragments$mz,
565 mznominal = mznominal
566 ),
567 list(mznominal),
568 FUN = mean
569 )$mz,
570 digits = processing_parameters$mzdecimal
571 )
572 filtered_fragments <- data.frame(filtered_fragments, mznominal)
573
574 ## creation of cross table row=scan col=mz X=ra
575
576 vmz <- as.character(sort(unique(filtered_fragments$mznominal)))
577
578 ds_abs_int <- create_ds_abs_int(vmz, filtered_fragments)
579
580 if (global_debug) {
581 print(ds_abs_int)
582 }
583
584 ## elimination of mz with less than min_number_scan scans (user defined
585 ## parameter)
586 xmz <- rep(NA, ncol(ds_abs_int) - 1)
587 sum_int <- rep(NA, ncol(ds_abs_int))
588 nbxmz <- 0
589 nb_scan_check <- min(nrow(ds_abs_int), processing_parameters$min_number_scan)
590
591 for (j in 2:ncol(ds_abs_int)) {
592 sum_int[j] <- sum(ds_abs_int[j], na.rm = TRUE)
593 if (sum(!is.na(ds_abs_int[[j]])) < nb_scan_check) {
594 nbxmz <- nbxmz + 1
595 xmz[nbxmz] <- j
596 }
597 }
598
599 xmz <- xmz[-which(is.na(xmz))]
600 if (length(xmz) > 0) {
601 ds_abs_int <- ds_abs_int[, -c(xmz)]
602 sum_int <- sum_int[-c(xmz)]
603 ## liste des mz keeped decale de 1 avec ds_abs_int
604 vmz <- as.numeric(vmz[-c(xmz - 1)])
605 meaned_mz <- meaned_mz[-c(xmz - 1)]
606 }
607
608 ## mz of precursor in data precursor to check correlation with
609 mz_prec <- paste0(
610 "mz",
611 round(mean(precursor_mz), processing_parameters$mzdecimal)
612 )
613 ## reference ion for correlation computing = precursor OR maximum
614 ## intensity ion in precursor is not present
615 refcol <- which(colnames(ds_abs_int) == mz_prec)
616 if (length(refcol) == 0) {
617 refcol <- which(sum_int == max(sum_int, na.rm = TRUE))
618 }
619
620 if (processing_parameters$do_pdf) {
621 start_pdf(processing_parameters, curent_file_id)
622 }
623
624 ## Pearson correlations between absolute intensities computing
625 cor_abs_int <- rep(NA, length(vmz))
626
627 if (length(refcol) > 0) {
628 for (i in 2:length(ds_abs_int)) {
629 cor_abs_int[i - 1] <- stats::cor(
630 x = ds_abs_int[[refcol]],
631 y = ds_abs_int[[i]],
632 use = "pairwise.complete.obs",
633 method = "pearson"
634 )
635 debug_catf(
636 "Correlation between %s and %s: %s\n",
637 paste(ds_abs_int[[refcol]], collapse = ";"),
638 paste(ds_abs_int[[i]], collapse = ";"),
639 paste(cor_abs_int[i - 1], collapse = ";")
640 )
641 if (processing_parameters$do_pdf) {
642 pdf_plot_ds_abs_int(
643 processing_parameters$c_name,
644 ds_abs_int,
645 refcol,
646 i,
647 round(cor_abs_int[i - 1], 2)
648 )
649 }
650 }
651 ## plot pseudo spectra
652 res_comp_by_file <- plot_pseudo_spectra(
653 x = ds_abs_int,
654 fid = curent_file_id,
655 sum_int = sum_int,
656 vmz = vmz,
657 cor_abs_int = cor_abs_int,
658 refcol = refcol,
659 meaned_mz = meaned_mz,
660 processing_parameters = processing_parameters
661 )
662 catf(
663 "%s has been processed and %s fragments have been found.\n",
664 processing_parameters$c_name,
665 nrow(res_comp_by_file)
666 )
388 } else { 667 } else {
389 res_comp <- NULL 668 res_comp_by_file <- NULL
390 cat(" non detected in precursor file \n") 669 cat(">> non detected in fragments file \n")
391 } 670 }
392 return(res_comp) 671 show_end_processing()
672 if (processing_parameters$do_pdf) {
673 end_pdf()
674 }
675 return(res_comp_by_file)
676 }
677
678 create_ds_abs_int <- function(vmz, filtered_fragments) {
679 verbose_catf(
680 ">> fragments: %s\n",
681 paste(vmz, collapse = " ")
682 )
683 ds_abs_int <- create_int_mz(vmz[1], filtered_fragments)
684 for (mz in vmz[-1]) {
685 int_mz <- create_int_mz(mz, filtered_fragments)
686 ds_abs_int <- merge(
687 x = ds_abs_int,
688 y = int_mz,
689 by.x = 1,
690 by.y = 1,
691 all.x = TRUE,
692 all.y = TRUE
693 )
694 }
695 return(ds_abs_int)
696 }
697
698 create_int_mz <- function(mz, filtered_fragments) {
699 ## absolute intensity
700 int_mz <- filtered_fragments[
701 filtered_fragments$mznominal == mz,
702 c("acquisitionNum", "i")
703 ]
704 colnames(int_mz)[2] <- paste0("mz", mz)
705 ## average intensities of mass in duplicate scans
706 comp_scans <- aggregate(x = int_mz, by = list(int_mz[[1]]), FUN = mean)
707 return(comp_scans[, -1])
708 }
709
710 show_end_processing <- function() {
711 verbose_catf("==========\n")
712 cat("\n")
713 }
714
715 start_pdf <- function(processing_parameters, curent_file_id) {
716 if (!dir.exists(processing_parameters$pdf_path)) {
717 dir.create(processing_parameters$pdf_path, recursive = TRUE)
718 }
719 pdf(
720 file = sprintf(
721 "%s/%s_processing_file%s.pdf",
722 processing_parameters$pdf_path,
723 processing_parameters$c_name,
724 curent_file_id
725 ),
726 width = 8,
727 height = 11
728 )
729 par(mfrow = c(3, 2))
730 }
731
732 pdf_plot_ds_abs_int <- function(c_name, ds_abs_int, refcol, i, r_coef) {
733 plot(
734 ds_abs_int[[refcol]],
735 ds_abs_int[[i]],
736 xlab = colnames(ds_abs_int)[refcol],
737 ylab = colnames(ds_abs_int)[i],
738 main = sprintf(
739 "%s corr coeff r=%s", c_name, r_coef
740 )
741 )
742 }
743 end_pdf <- function() {
744 dev.off()
393 } 745 }
394 746
395 set_global <- function(var, value) { 747 set_global <- function(var, value) {
396 assign(var, value, envir = globalenv()) 748 assign(var, value, envir = globalenv())
397 } 749 }
408 set_global("global_verbose", TRUE) 760 set_global("global_verbose", TRUE)
409 } 761 }
410 762
411 unset_verbose <- function() { 763 unset_verbose <- function() {
412 set_global("global_verbose", FALSE) 764 set_global("global_verbose", FALSE)
765 }
766
767 verbose_catf <- function(...) {
768 if (global_verbose) {
769 cat(sprintf(...), sep = "")
770 }
771 }
772
773
774 debug_catf <- function(...) {
775 if (global_debug) {
776 cat(sprintf(...), sep = "")
777 }
778 }
779
780 catf <- function(...) {
781 cat(sprintf(...), sep = "")
413 } 782 }
414 783
415 create_parser <- function() { 784 create_parser <- function() {
416 parser <- optparse::OptionParser() 785 parser <- optparse::OptionParser()
417 parser <- optparse::add_option( 786 parser <- optparse::add_option(
545 "Fragments are kept if there are found in a minimum number", 914 "Fragments are kept if there are found in a minimum number",
546 "of min_number_scan scans" 915 "of min_number_scan scans"
547 ), 916 ),
548 metavar = "number" 917 metavar = "number"
549 ) 918 )
919 parser <- optparse::add_option(
920 parser,
921 c("--pdf_path"),
922 type = "character",
923 default = DEFAULT_PDF_PATH,
924 help = paste(
925 "[default %default]",
926 "PDF files output path"
927 )
928 )
929 parser <- optparse::add_option(
930 parser,
931 c("--ionization"),
932 type = "character",
933 action = "store",
934 default = "None",
935 help = paste(
936 "[default %default]",
937 "Which ionization to use for sirius"
938 ),
939 metavar = "character"
940 )
941 parser <- optparse::add_option(
942 parser,
943 c("--fragment_match_delta"),
944 type = "numeric",
945 action = "store",
946 default = DEFAULT_FRAGMENTS_MATCH_DELTA,
947 help = paste(
948 "[default %default]",
949 "Fragment match delta"
950 ),
951 metavar = "numeric"
952 )
953 parser <- optparse::add_option(
954 parser,
955 c("--fragment_match_delta_unit"),
956 type = "character",
957 action = "store",
958 default = DEFAULT_FRAGMENTS_MATCH_DELTA_UNIT,
959 help = paste(
960 "[default %default]",
961 "Fragment match delta"
962 ),
963 metavar = "character"
964 )
550 return(parser) 965 return(parser)
551 } 966 }
552 967
553 stop_with_status <- function(msg, status) { 968 stop_with_status <- function(msg, status) {
554 sink(stderr()) 969 sink(stderr())
557 sink(NULL) 972 sink(NULL)
558 base::quit(status = status) 973 base::quit(status = status)
559 } 974 }
560 975
561 check_args_validity <- function(args) { ## nolint cyclocomp_linter 976 check_args_validity <- function(args) { ## nolint cyclocomp_linter
562 sysvars <- Sys.getenv()
563 sysvarnames <- names(sysvars)
564 if (length(args$output) == 0 || nchar(args$output[1]) == 0) { 977 if (length(args$output) == 0 || nchar(args$output[1]) == 0) {
565 stop_with_status( 978 stop_with_status(
566 "Missing output parameters. Please set it with --output.", 979 "Missing output parameters. Please set it with --output.",
567 MISSING_PARAMETER_ERROR 980 MISSING_PARAMETER_ERROR
568 ) 981 )
610 args$compounds 1023 args$compounds
611 ), 1024 ),
612 MISSING_INPUT_FILE_ERROR 1025 MISSING_INPUT_FILE_ERROR
613 ) 1026 )
614 } 1027 }
615 if ( 1028 if (in_galaxy_env()) {
1029 check_galaxy_args_validity(args)
1030 }
1031 }
1032
1033 in_galaxy_env <- function() {
1034 sysvars <- Sys.getenv()
1035 sysvarnames <- names(sysvars)
1036 return(
616 "_GALAXY_JOB_HOME_DIR" %in% sysvarnames 1037 "_GALAXY_JOB_HOME_DIR" %in% sysvarnames
617 || "_GALAXY_JOB_TMP_DIR" %in% sysvarnames 1038 || "_GALAXY_JOB_TMP_DIR" %in% sysvarnames
618 || "GALAXY_MEMORY_MB" %in% sysvarnames 1039 || "GALAXY_MEMORY_MB" %in% sysvarnames
619 || "GALAXY_MEMORY_MB_PER_SLOT" %in% sysvarnames 1040 || "GALAXY_MEMORY_MB_PER_SLOT" %in% sysvarnames
620 || "GALAXY_SLOTS" %in% sysvarnames 1041 || "GALAXY_SLOTS" %in% sysvarnames
621 ) { 1042 )
622 check_galaxy_args_validity(args)
623 }
624 } 1043 }
625 1044
626 check_galaxy_args_validity <- function(args) { 1045 check_galaxy_args_validity <- function(args) {
627 if (!file.exists(args$output)) { 1046 if (!file.exists(args$output)) {
628 stop_with_status( 1047 stop_with_status(
636 } 1055 }
637 1056
638 get_csv_or_tsv <- function( 1057 get_csv_or_tsv <- function(
639 path, 1058 path,
640 sep_stack = c("\t", ",", ";"), 1059 sep_stack = c("\t", ",", ";"),
1060 sep_names = c("tab", "comma", "semicolon"),
641 header = TRUE, 1061 header = TRUE,
642 quote = "\"" 1062 quote = "\""
643 ) { 1063 ) {
644 sep <- sep_stack[1] 1064 sep <- determine_csv_or_tsv_sep(
645 result <- tryCatch({ 1065 path = path,
646 read.table( 1066 sep_stack = sep_stack,
647 file = path, 1067 header = header,
648 sep = sep, 1068 quote = quote
649 header = header, 1069 )
650 quote = quote 1070 verbose_catf(
651 ) 1071 "%s separator has been determined for %s.\n",
652 }, error = function(e) { 1072 sep_names[sep_stack == sep],
653 return(data.frame()) 1073 path
654 }) 1074 )
655 if (length(sep_stack) == 1) { 1075 return(read.table(
656 return(result) 1076 file = path,
657 } 1077 sep = sep,
658 # if ( 1078 header = header,
659 # ncol(result) == 0 || ## failed 1079 quote = quote
660 # ncol(result) == 1 ## only one row, suspicious, possible fail # nolint 1080 ))
661 # ) { 1081 }
662 new_result <- get_csv_or_tsv( 1082
663 path, 1083 determine_csv_or_tsv_sep <- function(
664 sep_stack = sep_stack[-1], 1084 path,
665 header = header, 1085 sep_stack = c("\t", ",", ";"),
666 quote = quote 1086 header = TRUE,
667 ) 1087 quote = "\""
668 if (ncol(new_result) > ncol(result)) { 1088 ) {
669 return(new_result) 1089 count <- -1
670 } 1090 best_sep <- sep_stack[1]
671 # } 1091 for (sep in sep_stack) {
672 return(result) 1092 tryCatch({
1093 table <- read.table(
1094 file = path,
1095 sep = sep,
1096 header = header,
1097 quote = quote,
1098 nrows = 1
1099 )
1100 if (ncol(table) > count) {
1101 count <- ncol(table)
1102 best_sep <- sep
1103 }
1104 })
1105 }
1106 return(best_sep)
673 } 1107 }
674 1108
675 uniformize_columns <- function(df) { 1109 uniformize_columns <- function(df) {
676 cols <- colnames(df) 1110 cols <- colnames(df)
677 for (func in c(tolower)) { 1111 for (func in c(tolower)) {
679 } 1113 }
680 colnames(df) <- cols 1114 colnames(df) <- cols
681 return(df) 1115 return(df)
682 } 1116 }
683 1117
1118 handle_galaxy_param <- function(args) {
1119 for (param in names(args)) {
1120 if (is.character(args[[param]])) {
1121 args[[param]] <- gsub("__ob__", "[", args[[param]])
1122 args[[param]] <- gsub("__cb__", "]", args[[param]])
1123 }
1124 }
1125 return(args)
1126 }
1127
1128 zip_pdfs <- function(processing_parameters) {
1129 if (processing_parameters$do_pdf) {
1130 if (zip <- Sys.getenv("R_ZIPCMD", "zip") == "") {
1131 catf("R could not fin the zip executable. Trying luck: zip = \"zip\"")
1132 zip <- "zip"
1133 } else {
1134 catf("Found zip executable at %s .", zip)
1135 }
1136 utils::zip(
1137 processing_parameters$pdf_zip_path,
1138 processing_parameters$pdf_path,
1139 zip = zip
1140 )
1141 }
1142 }
1143
684 main <- function(args) { 1144 main <- function(args) {
685 if (args$version) { 1145 if (args$version) {
686 cat(sprintf("%s\n", MS2SNOOP_VERSION)) 1146 catf("%s\n", MS2SNOOP_VERSION)
687 base::quit(status = 0) 1147 base::quit(status = 0)
688 } 1148 }
689 sessionInfo() 1149 if (in_galaxy_env()) {
1150 print(sessionInfo())
1151 cat("\n\n")
1152 }
690 check_args_validity(args) 1153 check_args_validity(args)
1154 args <- handle_galaxy_param(args)
1155 if (args$ionization == "None") {
1156 args$ionization <- NULL
1157 }
691 if (args$debug) { 1158 if (args$debug) {
692 set_debug() 1159 set_debug()
693 } 1160 }
694 if (args$verbose) { 1161 if (args$verbose) {
695 set_verbose() 1162 set_verbose()
696 } 1163 }
697 ## MSpurity precursors file
698 precursors <- get_csv_or_tsv(args$precursors) 1164 precursors <- get_csv_or_tsv(args$precursors)
699 ## MSpurity fragments file
700 fragments <- get_csv_or_tsv(args$fragments) 1165 fragments <- get_csv_or_tsv(args$fragments)
701 ## list of compounds : col1=Name of molecule, col2=m/z, col3=retention time
702 compounds <- get_csv_or_tsv(args$compounds) 1166 compounds <- get_csv_or_tsv(args$compounds)
703 1167
704 compounds <- uniformize_columns(compounds) 1168 compounds <- uniformize_columns(compounds)
705 mandatory_columns <- c( 1169 mandatory_columns <- c(
706 "compound_name", 1170 "compound_name",
717 ), 1181 ),
718 BAD_PARAMETER_VALUE_ERROR 1182 BAD_PARAMETER_VALUE_ERROR
719 ) 1183 )
720 } 1184 }
721 1185
722 res_all <- NULL 1186 res_all <- data.frame()
1187 processing_parameters <- list(
1188 min_number_scan = args$min_number_scan,
1189 mzdecimal = args$mzdecimal,
1190 r_threshold = args$r_threshold,
1191 seuil_ra = args$seuil_ra,
1192 tolmz = args$tolmz,
1193 tolrt = args$tolrt,
1194 ionization = args$ionization,
1195 do_pdf = nchar(args$pdf_path) > 0,
1196 pdf_zip_path = args$pdf_path,
1197 pdf_path = tempdir(),
1198 fragment_match_delta = args$fragment_match_delta,
1199 fragment_match_delta_unit = args$fragment_match_delta_unit
1200 )
723 for (i in seq_len(nrow(compounds))) { 1201 for (i in seq_len(nrow(compounds))) {
724 ## loop execution for all compounds in the compounds file 1202 processing_parameters$mzref <- compounds[["mz"]][i]
725 res_cor <- NULL 1203 processing_parameters$rtref <- compounds[["rtsec"]][i]
1204 processing_parameters$c_name <- compounds[["compound_name"]][i]
1205 processing_parameters$inchikey <- compounds[["inchikey"]][i]
1206 processing_parameters$elemcomposition <- compounds[["elemcomposition"]][i]
726 res_cor <- extract_fragments( 1207 res_cor <- extract_fragments(
727 precursors = precursors, 1208 precursors = precursors,
728 fragments = fragments, 1209 fragments = fragments,
729 mzref = compounds[["mz"]][i], 1210 processing_parameters = processing_parameters
730 rtref = compounds[["rtsec"]][i],
731 c_name = compounds[["compound_name"]][i],
732 inchikey = compounds[["inchikey"]][i],
733 elemcomposition = compounds[["elemcomposition"]][i],
734 min_number_scan = args$min_number_scan,
735 mzdecimal = args$mzdecimal,
736 r_threshold = args$r_threshold,
737 seuil_ra = args$seuil_ra,
738 tolmz = args$tolmz,
739 tolrt = args$tolrt
740 ) 1211 )
741 if (!is.null(res_cor)) { 1212 if (!is.null(res_cor)) {
742 if (is.null(res_all)) { 1213 res_all <- rbind(res_all, res_cor)
743 res_all <- res_cor
744 } else {
745 res_all <- rbind(res_all, res_cor)
746 }
747 } 1214 }
748 } 1215 }
749 1216
750 if (is.null(res_all)) { 1217 if (nrow(res_all) == 0) {
751 stop_with_status("No result at all!", NO_ANY_RESULT_ERROR) 1218 stop_with_status("No result at all!", NO_ANY_RESULT_ERROR)
752 } 1219 }
1220
753 write.table( 1221 write.table(
754 x = res_all, 1222 x = res_all,
755 file = args$output, 1223 file = args$output,
756 sep = "\t", 1224 sep = "\t",
757 row.names = FALSE 1225 row.names = FALSE
758 ) 1226 )
1227 zip_pdfs(processing_parameters)
1228 unlink(processing_parameters$pdf_path, recursive = TRUE)
759 } 1229 }
760 1230
761 global_debug <- FALSE 1231 global_debug <- FALSE
762 global_verbose <- FALSE 1232 global_verbose <- FALSE
763 args <- optparse::parse_args(create_parser()) 1233 args <- optparse::parse_args(create_parser())