Mercurial > repos > workflow4metabolomics > ms2snoop
comparison MS2snoop.R @ 1:df2672c37732 draft
planemo upload commit 42359ca78388ce5221bc88905a78c996c758aa43
author | workflow4metabolomics |
---|---|
date | Tue, 24 May 2022 18:14:49 +0000 |
parents | 91a3242fd67f |
children | c68c94865667 |
comparison
equal
deleted
inserted
replaced
0:91a3242fd67f | 1:df2672c37732 |
---|---|
11 #' tested against data from other labs. | 11 #' tested against data from other labs. |
12 #' new maintainer: Lain Pavot - lain.pavot@inrae.fr | 12 #' new maintainer: Lain Pavot - lain.pavot@inrae.fr |
13 #' | 13 #' |
14 #' @import optparse | 14 #' @import optparse |
15 #' | 15 #' |
16 NULL | 16 |
17 | 17 |
18 assign("MS2SNOOP_VERSION", "1.0.1") | |
19 lockBinding("MS2SNOOP_VERSION", globalenv()) | |
20 | |
21 assign("MISSING_PARAMETER_ERROR", 1) | |
22 lockBinding("MISSING_PARAMETER_ERROR", globalenv()) | |
23 | |
24 assign("BAD_PARAMETER_VALUE_ERROR", 2) | |
25 lockBinding("BAD_PARAMETER_VALUE_ERROR", globalenv()) | |
26 | |
27 assign("MISSING_INPUT_FILE_ERROR", 3) | |
28 lockBinding("MISSING_INPUT_FILE_ERROR", globalenv()) | |
29 | |
30 assign("NO_ANY_RESULT_ERROR", 255) | |
31 lockBinding("NO_ANY_RESULT_ERROR", globalenv()) | |
18 | 32 |
19 assign("DEFAULT_PRECURSOR_PATH", "peaklist_precursors.tsv") | 33 assign("DEFAULT_PRECURSOR_PATH", "peaklist_precursors.tsv") |
20 assign("DEFAULT_FRAGMENTS_PATH", "peaklist_fragments.tsv") | 34 assign("DEFAULT_FRAGMENTS_PATH", "peaklist_fragments.tsv") |
21 assign("DEFAULT_COMPOUNDS_PATH", "compounds_pos.txt") | 35 assign("DEFAULT_COMPOUNDS_PATH", "compounds_pos.txt") |
22 assign("DEFAULT_OUTPUT_PATH", "compound_fragments_result.txt") | 36 assign("DEFAULT_OUTPUT_PATH", "compound_fragments_result.txt") |
43 assign("DEFAULT_EXTRACT_FRAGMENTS_TOLRT", 60) | 57 assign("DEFAULT_EXTRACT_FRAGMENTS_TOLRT", 60) |
44 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_R_THRESHOLD", globalenv()) | 58 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_R_THRESHOLD", globalenv()) |
45 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_SEUIL_RA", globalenv()) | 59 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_SEUIL_RA", globalenv()) |
46 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_TOLMZ", globalenv()) | 60 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_TOLMZ", globalenv()) |
47 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_TOLRT", globalenv()) | 61 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_TOLRT", globalenv()) |
48 | |
49 | |
50 debug <- FALSE | |
51 | 62 |
52 | 63 |
53 ######################################################################## | 64 ######################################################################## |
54 | 65 |
55 #' @title plot_pseudo_spectra | 66 #' @title plot_pseudo_spectra |
211 | 222 |
212 ## check if fragments corresponding to precursor are found in several | 223 ## check if fragments corresponding to precursor are found in several |
213 ## files (collision energy) | 224 ## files (collision energy) |
214 ## this lead to a processing for each fileid | 225 ## this lead to a processing for each fileid |
215 mf <- levels(as.factor(sprecini$fileid)) | 226 mf <- levels(as.factor(sprecini$fileid)) |
216 if (length(mf) > 1) { | 227 if (length(mf) > 1 && global_verbose) { |
217 cat(" several files detected for this compounds :\n") | 228 cat(" several files detected for this compounds :\n") |
218 } | 229 } |
219 | 230 |
220 for (f in seq_along(mf)) { | 231 for (f in seq_along(mf)) { |
221 | 232 |
237 sfrgtfil <- data.frame(sfrgtfil, mznominal) | 248 sfrgtfil <- data.frame(sfrgtfil, mznominal) |
238 | 249 |
239 ## creation of cross table row=scan col=mz X=ra | 250 ## creation of cross table row=scan col=mz X=ra |
240 vmz <- levels(as.factor(sfrgtfil$mznominal)) | 251 vmz <- levels(as.factor(sfrgtfil$mznominal)) |
241 | 252 |
242 cat(" fragments :", vmz) | 253 if (global_verbose) { |
254 cat(" fragments :", vmz) | |
255 } | |
243 | 256 |
244 ## mz of precursor in data precursor to check correlation with | 257 ## mz of precursor in data precursor to check correlation with |
245 mz_prec <- paste0("mz", round(mean(sprec$mz), mzdecimal)) | 258 mz_prec <- paste0("mz", round(mean(sprec$mz), mzdecimal)) |
246 | 259 |
247 for (m in seq_along(vmz)) { | 260 for (m in seq_along(vmz)) { |
269 all.x = TRUE, | 282 all.x = TRUE, |
270 all.y = TRUE | 283 all.y = TRUE |
271 ) | 284 ) |
272 } | 285 } |
273 } | 286 } |
274 if (debug) { | 287 if (global_debug) { |
288 print(ds_abs_int) | |
275 write.table( | 289 write.table( |
276 x = ds_abs_int, | 290 x = ds_abs_int, |
277 file = paste0(c_name, "ds_abs_int.txt"), | 291 file = paste0(c_name, "ds_abs_int.txt"), |
278 row.names = FALSE, | 292 row.names = FALSE, |
279 sep = "\t" | 293 sep = "\t" |
356 cat(" non detected in fragments file \n") | 370 cat(" non detected in fragments file \n") |
357 } | 371 } |
358 if (!is.null(res_comp_by_file)) { | 372 if (!is.null(res_comp_by_file)) { |
359 res_comp <- rbind(res_comp, res_comp_by_file) | 373 res_comp <- rbind(res_comp, res_comp_by_file) |
360 } | 374 } |
361 cat("\n") | 375 if (global_verbose) { |
376 cat("\n") | |
377 } | |
362 dev.off() | 378 dev.off() |
363 } | 379 } |
364 } else { | 380 } else { |
365 res_comp <- NULL | 381 res_comp <- NULL |
366 cat(" non detected in precursor file \n") | 382 cat(" non detected in precursor file \n") |
367 } | 383 } |
368 return(res_comp) | 384 return(res_comp) |
369 } | 385 } |
370 | 386 |
387 set_global <- function(var, value) { | |
388 assign(var, value, envir = globalenv()) | |
389 } | |
390 | |
391 set_debug <- function() { | |
392 set_global("global_debug", TRUE) | |
393 } | |
394 | |
395 unset_debug <- function() { | |
396 set_global("global_debug", FALSE) | |
397 } | |
398 | |
399 set_verbose <- function() { | |
400 set_global("global_verbose", TRUE) | |
401 } | |
402 | |
403 unset_verbose <- function() { | |
404 set_global("global_verbose", FALSE) | |
405 } | |
371 | 406 |
372 create_parser <- function() { | 407 create_parser <- function() { |
373 parser <- optparse::OptionParser() | 408 parser <- optparse::OptionParser() |
374 parser <- optparse::add_option( | 409 parser <- optparse::add_option( |
375 parser, | 410 parser, |
376 c("-v", "--verbose"), | 411 c("-v", "--verbose"), |
377 action = "store_true", | 412 action = "store_true", |
378 default = FALSE, | 413 default = FALSE, |
379 help = "Print extra output [default %default]" | 414 help = paste( |
415 "[default %default]", | |
416 "Print extra output" | |
417 ) | |
418 ) | |
419 parser <- optparse::add_option( | |
420 parser, | |
421 c("-V", "--version"), | |
422 action = "store_true", | |
423 default = FALSE, | |
424 help = "Prints version and exits" | |
425 ) | |
426 parser <- optparse::add_option( | |
427 parser, | |
428 c("-d", "--debug"), | |
429 action = "store_true", | |
430 default = FALSE, | |
431 help = paste( | |
432 "[default %default]", | |
433 "Print debug outputs" | |
434 ) | |
380 ) | 435 ) |
381 parser <- optparse::add_option( | 436 parser <- optparse::add_option( |
382 parser, | 437 parser, |
383 c("-o", "--output"), | 438 c("-o", "--output"), |
384 type = "character", | 439 type = "character", |
414 parser, | 469 parser, |
415 c("--tolmz"), | 470 c("--tolmz"), |
416 type = "numeric", | 471 type = "numeric", |
417 action = "store", | 472 action = "store", |
418 default = DEFAULT_TOLMZ, | 473 default = DEFAULT_TOLMZ, |
419 metavar = "number" | 474 metavar = "number", |
475 help = paste( | |
476 "[default %default]", | |
477 "Tolerance for MZ (in Dalton) to match the standard in the compounds" | |
478 ) | |
420 ) | 479 ) |
421 parser <- optparse::add_option( | 480 parser <- optparse::add_option( |
422 parser, | 481 parser, |
423 c("--tolrt"), | 482 c("--tolrt"), |
424 type = "integer", | 483 type = "integer", |
425 action = "store", | 484 action = "store", |
426 default = DEFAULT_TOLRT, | 485 default = DEFAULT_TOLRT, |
427 metavar = "number" | 486 metavar = "number", |
487 help = paste( | |
488 "[default %default]", | |
489 "RT (in seconds) to match the standard in the compounds" | |
490 ) | |
428 ) | 491 ) |
429 parser <- optparse::add_option( | 492 parser <- optparse::add_option( |
430 parser, | 493 parser, |
431 c("--seuil_ra"), | 494 c("--seuil_ra"), |
432 type = "numeric", | 495 type = "numeric", |
433 action = "store", | 496 action = "store", |
434 help = "relative intensity threshold", | |
435 default = DEFAULT_SEUIL_RA, | 497 default = DEFAULT_SEUIL_RA, |
436 metavar = "number" | 498 metavar = "number", |
499 help = paste( | |
500 "[default %default]", | |
501 "relative intensity threshold" | |
502 ), | |
437 ) | 503 ) |
438 parser <- optparse::add_option( | 504 parser <- optparse::add_option( |
439 parser, | 505 parser, |
440 c("--mzdecimal"), | 506 c("--mzdecimal"), |
441 type = "integer", | 507 type = "integer", |
442 default = DEFAULT_MZDECIMAL, | 508 default = DEFAULT_MZDECIMAL, |
443 action = "store", | 509 action = "store", |
444 help = "nb decimal for mz", | 510 help = paste( |
511 "[default %default]", | |
512 "Number of decimal to write for MZ" | |
513 ), | |
445 metavar = "number" | 514 metavar = "number" |
446 ) | 515 ) |
447 parser <- optparse::add_option( | 516 parser <- optparse::add_option( |
448 parser, | 517 parser, |
449 c("--r_threshold"), | 518 c("--r_threshold"), |
450 type = "integer", | 519 type = "integer", |
451 default = DEFAULT_R_THRESHOLD, | 520 default = DEFAULT_R_THRESHOLD, |
452 action = "store", | 521 action = "store", |
453 help = paste0( | 522 help = paste( |
454 "r pearson correlation threshold between precursor and fragment ", | 523 "[default %default]", |
524 "R-Pearson correlation threshold between precursor and fragment", | |
455 "absolute intensity" | 525 "absolute intensity" |
456 ), | 526 ), |
457 metavar = "number" | 527 metavar = "number" |
458 ) | 528 ) |
459 parser <- optparse::add_option( | 529 parser <- optparse::add_option( |
460 parser, | 530 parser, |
461 c("--min_number_scan"), | 531 c("--min_number_scan"), |
462 type = "numeric", | 532 type = "numeric", |
463 action = "store", | 533 action = "store", |
464 default = DEFAULT_MINNUMBERSCAN, | 534 default = DEFAULT_MINNUMBERSCAN, |
465 help = paste0( | 535 help = paste( |
466 "fragments are kept if there are found in a minimum number ", | 536 "[default %default]", |
467 "of scans" | 537 "Fragments are kept if there are found in a minimum number", |
538 "of min_number_scan scans" | |
468 ), | 539 ), |
469 metavar = "number" | 540 metavar = "number" |
470 ) | 541 ) |
471 return(parser) | 542 return(parser) |
472 } | 543 } |
473 | 544 |
545 stop_with_status <- function(msg, status) { | |
546 message(sprintf("Error: %s", msg)) | |
547 message(sprintf("Error code: %s", status)) | |
548 base::quit(status = status) | |
549 } | |
550 | |
551 check_args_validity <- function(args) { ## nolint cyclocomp_linter | |
552 sysvars <- Sys.getenv() | |
553 sysvarnames <- names(sysvars) | |
554 if (length(args$output) == 0 || nchar(args$output[1]) == 0) { | |
555 stop_with_status( | |
556 "Missing output parameters. Please set it with --output.", | |
557 MISSING_PARAMETER_ERROR | |
558 ) | |
559 } | |
560 if (length(args$precursors) == 0 || nchar(args$precursors[1]) == 0) { | |
561 stop_with_status( | |
562 "Missing precursors parameters. Please set it with --precursors.", | |
563 MISSING_PARAMETER_ERROR | |
564 ) | |
565 } | |
566 if (length(args$fragments) == 0 || nchar(args$fragments[1]) == 0) { | |
567 stop_with_status( | |
568 "Missing fragments parameters. Please set it with --fragments.", | |
569 MISSING_PARAMETER_ERROR | |
570 ) | |
571 } | |
572 if (length(args$compounds) == 0 || nchar(args$compounds[1]) == 0) { | |
573 stop_with_status( | |
574 "Missing compounds parameters. Please set it with --compounds.", | |
575 MISSING_PARAMETER_ERROR | |
576 ) | |
577 } | |
578 if (!file.exists(args$precursors)) { | |
579 stop_with_status( | |
580 sprintf( | |
581 "Precursors file %s does not exist or cannot be accessed.", | |
582 args$precursors | |
583 ), | |
584 MISSING_INPUT_FILE_ERROR | |
585 ) | |
586 } | |
587 if (!file.exists(args$fragments)) { | |
588 stop_with_status( | |
589 sprintf( | |
590 "Fragments file %s does not exist or cannot be accessed.", | |
591 args$fragments | |
592 ), | |
593 MISSING_INPUT_FILE_ERROR | |
594 ) | |
595 } | |
596 if (!file.exists(args$compounds)) { | |
597 stop_with_status( | |
598 sprintf( | |
599 "Compounds file %s does not exist or cannot be accessed.", | |
600 args$compounds | |
601 ), | |
602 MISSING_INPUT_FILE_ERROR | |
603 ) | |
604 } | |
605 if ( | |
606 "_GALAXY_JOB_HOME_DIR" %in% sysvarnames | |
607 || "_GALAXY_JOB_TMP_DIR" %in% sysvarnames | |
608 || "GALAXY_MEMORY_MB" %in% sysvarnames | |
609 || "GALAXY_MEMORY_MB_PER_SLOT" %in% sysvarnames | |
610 || "GALAXY_SLOTS" %in% sysvarnames | |
611 ) { | |
612 check_galaxy_args_validity(args) | |
613 } | |
614 } | |
615 | |
616 check_galaxy_args_validity <- function(args) { | |
617 if (!file.exists(args$output)) { | |
618 stop_with_status( | |
619 sprintf( | |
620 "Output file %s does not exist or cannot be accessed.", | |
621 args$output | |
622 ), | |
623 MISSING_INPUT_FILE_ERROR | |
624 ) | |
625 } | |
626 } | |
627 | |
474 main <- function(args) { | 628 main <- function(args) { |
475 ## FOLDER AND FILES | 629 if (args$version) { |
630 cat(sprintf("%s\n", MS2SNOOP_VERSION)) | |
631 base::quit(status = 0) | |
632 } | |
633 sessionInfo() | |
634 check_args_validity(args) | |
635 if (args$debug) { | |
636 set_debug() | |
637 } | |
638 if (args$verbose) { | |
639 set_verbose() | |
640 } | |
476 ## MSpurity precursors file | 641 ## MSpurity precursors file |
477 precursors <- read.table( | 642 precursors <- read.table( |
478 file = args$precursors, | 643 file = args$precursors, |
479 header = TRUE, | 644 header = TRUE, |
480 sep = "\t", | 645 sep = "\t", |
492 file = args$compounds, | 657 file = args$compounds, |
493 sep = "\t", | 658 sep = "\t", |
494 quote = "\"", | 659 quote = "\"", |
495 header = TRUE | 660 header = TRUE |
496 ) | 661 ) |
497 ## PARAMETERS | 662 |
498 ## tolerance for mz(dalton) rt(seconds) to match the standard in the compounds | 663 res_all <- NULL |
499 ## list with the precursor MSpurity file | |
500 tolmz <- args$tolmz | |
501 tolrt <- args$tolrt | |
502 | |
503 ## relative intensity threshold | |
504 seuil_ra <- args$seuil_ra | |
505 ## nb decimal for mz | |
506 mzdecimal <- args$mzdecimal | |
507 ## r pearson correlation threshold between precursor and | |
508 # #fragment absolute intensity | |
509 r_threshold <- args$r_threshold | |
510 ## fragments are kept if there are found in a minimum number of scans | |
511 min_number_scan <- args$min_number_scan | |
512 | |
513 for (i in seq_len(nrow(compounds))) { | 664 for (i in seq_len(nrow(compounds))) { |
514 ## loop execution for all compounds in the compounds file | 665 ## loop execution for all compounds in the compounds file |
515 res_cor <- NULL | 666 res_cor <- NULL |
516 res_cor <- extract_fragments( | 667 res_cor <- extract_fragments( |
517 precursors = precursors, | 668 precursors = precursors, |
518 fragments = fragments, | 669 fragments = fragments, |
519 mzref = compounds[[2]][i], | 670 mzref = compounds[[2]][i], |
520 rtref = compounds[[3]][i], | 671 rtref = compounds[[3]][i], |
521 c_name = compounds[[1]][i], | 672 c_name = compounds[[1]][i], |
522 min_number_scan = min_number_scan, | 673 min_number_scan = args$min_number_scan, |
523 mzdecimal = mzdecimal, | 674 mzdecimal = args$mzdecimal, |
524 r_threshold = r_threshold, | 675 r_threshold = args$r_threshold, |
525 seuil_ra = seuil_ra, | 676 seuil_ra = args$seuil_ra, |
526 tolmz = tolmz, | 677 tolmz = args$tolmz, |
527 tolrt = tolrt | 678 tolrt = args$tolrt |
528 ) | 679 ) |
529 if (i == 1 & !is.null(res_cor)) { | 680 if (!is.null(res_cor)) { |
530 res_all <- res_cor | 681 if (is.null(res_all)) { |
531 } else if (!is.null(res_cor)) { | 682 res_all <- res_cor |
532 res_all <- rbind(res_all, res_cor) | 683 } else { |
684 res_all <- rbind(res_all, res_cor) | |
685 } | |
533 } | 686 } |
534 } | 687 } |
535 | 688 |
536 if (is.null(res_all)) { | 689 if (is.null(res_all)) { |
537 stop("No result at all!") | 690 stop_with_status("No result at all!", NO_ANY_RESULT_ERROR) |
538 } | 691 } |
539 write.table( | 692 write.table( |
540 x = res_all, | 693 x = res_all, |
541 file = args$output, | 694 file = args$output, |
542 sep = "\t", | 695 sep = "\t", |
543 row.names = FALSE | 696 row.names = FALSE |
544 ) | 697 ) |
545 } | 698 } |
546 | 699 |
700 unset_debug() | |
701 unset_verbose() | |
547 args <- optparse::parse_args(create_parser()) | 702 args <- optparse::parse_args(create_parser()) |
548 sessionInfo() | |
549 main(args) | 703 main(args) |
550 | 704 |
551 warnings() | 705 warnings() |