Mercurial > repos > workflow4metabolomics > ms2snoop
comparison MS2snoop.R @ 1:df2672c37732 draft
planemo upload commit 42359ca78388ce5221bc88905a78c996c758aa43
| author | workflow4metabolomics | 
|---|---|
| date | Tue, 24 May 2022 18:14:49 +0000 | 
| parents | 91a3242fd67f | 
| children | c68c94865667 | 
   comparison
  equal
  deleted
  inserted
  replaced
| 0:91a3242fd67f | 1:df2672c37732 | 
|---|---|
| 11 #' tested against data from other labs. | 11 #' tested against data from other labs. | 
| 12 #' new maintainer: Lain Pavot - lain.pavot@inrae.fr | 12 #' new maintainer: Lain Pavot - lain.pavot@inrae.fr | 
| 13 #' | 13 #' | 
| 14 #' @import optparse | 14 #' @import optparse | 
| 15 #' | 15 #' | 
| 16 NULL | 16 | 
| 17 | 17 | 
| 18 assign("MS2SNOOP_VERSION", "1.0.1") | |
| 19 lockBinding("MS2SNOOP_VERSION", globalenv()) | |
| 20 | |
| 21 assign("MISSING_PARAMETER_ERROR", 1) | |
| 22 lockBinding("MISSING_PARAMETER_ERROR", globalenv()) | |
| 23 | |
| 24 assign("BAD_PARAMETER_VALUE_ERROR", 2) | |
| 25 lockBinding("BAD_PARAMETER_VALUE_ERROR", globalenv()) | |
| 26 | |
| 27 assign("MISSING_INPUT_FILE_ERROR", 3) | |
| 28 lockBinding("MISSING_INPUT_FILE_ERROR", globalenv()) | |
| 29 | |
| 30 assign("NO_ANY_RESULT_ERROR", 255) | |
| 31 lockBinding("NO_ANY_RESULT_ERROR", globalenv()) | |
| 18 | 32 | 
| 19 assign("DEFAULT_PRECURSOR_PATH", "peaklist_precursors.tsv") | 33 assign("DEFAULT_PRECURSOR_PATH", "peaklist_precursors.tsv") | 
| 20 assign("DEFAULT_FRAGMENTS_PATH", "peaklist_fragments.tsv") | 34 assign("DEFAULT_FRAGMENTS_PATH", "peaklist_fragments.tsv") | 
| 21 assign("DEFAULT_COMPOUNDS_PATH", "compounds_pos.txt") | 35 assign("DEFAULT_COMPOUNDS_PATH", "compounds_pos.txt") | 
| 22 assign("DEFAULT_OUTPUT_PATH", "compound_fragments_result.txt") | 36 assign("DEFAULT_OUTPUT_PATH", "compound_fragments_result.txt") | 
| 43 assign("DEFAULT_EXTRACT_FRAGMENTS_TOLRT", 60) | 57 assign("DEFAULT_EXTRACT_FRAGMENTS_TOLRT", 60) | 
| 44 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_R_THRESHOLD", globalenv()) | 58 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_R_THRESHOLD", globalenv()) | 
| 45 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_SEUIL_RA", globalenv()) | 59 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_SEUIL_RA", globalenv()) | 
| 46 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_TOLMZ", globalenv()) | 60 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_TOLMZ", globalenv()) | 
| 47 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_TOLRT", globalenv()) | 61 lockBinding("DEFAULT_EXTRACT_FRAGMENTS_TOLRT", globalenv()) | 
| 48 | |
| 49 | |
| 50 debug <- FALSE | |
| 51 | 62 | 
| 52 | 63 | 
| 53 ######################################################################## | 64 ######################################################################## | 
| 54 | 65 | 
| 55 #' @title plot_pseudo_spectra | 66 #' @title plot_pseudo_spectra | 
| 211 | 222 | 
| 212 ## check if fragments corresponding to precursor are found in several | 223 ## check if fragments corresponding to precursor are found in several | 
| 213 ## files (collision energy) | 224 ## files (collision energy) | 
| 214 ## this lead to a processing for each fileid | 225 ## this lead to a processing for each fileid | 
| 215 mf <- levels(as.factor(sprecini$fileid)) | 226 mf <- levels(as.factor(sprecini$fileid)) | 
| 216 if (length(mf) > 1) { | 227 if (length(mf) > 1 && global_verbose) { | 
| 217 cat(" several files detected for this compounds :\n") | 228 cat(" several files detected for this compounds :\n") | 
| 218 } | 229 } | 
| 219 | 230 | 
| 220 for (f in seq_along(mf)) { | 231 for (f in seq_along(mf)) { | 
| 221 | 232 | 
| 237 sfrgtfil <- data.frame(sfrgtfil, mznominal) | 248 sfrgtfil <- data.frame(sfrgtfil, mznominal) | 
| 238 | 249 | 
| 239 ## creation of cross table row=scan col=mz X=ra | 250 ## creation of cross table row=scan col=mz X=ra | 
| 240 vmz <- levels(as.factor(sfrgtfil$mznominal)) | 251 vmz <- levels(as.factor(sfrgtfil$mznominal)) | 
| 241 | 252 | 
| 242 cat(" fragments :", vmz) | 253 if (global_verbose) { | 
| 254 cat(" fragments :", vmz) | |
| 255 } | |
| 243 | 256 | 
| 244 ## mz of precursor in data precursor to check correlation with | 257 ## mz of precursor in data precursor to check correlation with | 
| 245 mz_prec <- paste0("mz", round(mean(sprec$mz), mzdecimal)) | 258 mz_prec <- paste0("mz", round(mean(sprec$mz), mzdecimal)) | 
| 246 | 259 | 
| 247 for (m in seq_along(vmz)) { | 260 for (m in seq_along(vmz)) { | 
| 269 all.x = TRUE, | 282 all.x = TRUE, | 
| 270 all.y = TRUE | 283 all.y = TRUE | 
| 271 ) | 284 ) | 
| 272 } | 285 } | 
| 273 } | 286 } | 
| 274 if (debug) { | 287 if (global_debug) { | 
| 288 print(ds_abs_int) | |
| 275 write.table( | 289 write.table( | 
| 276 x = ds_abs_int, | 290 x = ds_abs_int, | 
| 277 file = paste0(c_name, "ds_abs_int.txt"), | 291 file = paste0(c_name, "ds_abs_int.txt"), | 
| 278 row.names = FALSE, | 292 row.names = FALSE, | 
| 279 sep = "\t" | 293 sep = "\t" | 
| 356 cat(" non detected in fragments file \n") | 370 cat(" non detected in fragments file \n") | 
| 357 } | 371 } | 
| 358 if (!is.null(res_comp_by_file)) { | 372 if (!is.null(res_comp_by_file)) { | 
| 359 res_comp <- rbind(res_comp, res_comp_by_file) | 373 res_comp <- rbind(res_comp, res_comp_by_file) | 
| 360 } | 374 } | 
| 361 cat("\n") | 375 if (global_verbose) { | 
| 376 cat("\n") | |
| 377 } | |
| 362 dev.off() | 378 dev.off() | 
| 363 } | 379 } | 
| 364 } else { | 380 } else { | 
| 365 res_comp <- NULL | 381 res_comp <- NULL | 
| 366 cat(" non detected in precursor file \n") | 382 cat(" non detected in precursor file \n") | 
| 367 } | 383 } | 
| 368 return(res_comp) | 384 return(res_comp) | 
| 369 } | 385 } | 
| 370 | 386 | 
| 387 set_global <- function(var, value) { | |
| 388 assign(var, value, envir = globalenv()) | |
| 389 } | |
| 390 | |
| 391 set_debug <- function() { | |
| 392 set_global("global_debug", TRUE) | |
| 393 } | |
| 394 | |
| 395 unset_debug <- function() { | |
| 396 set_global("global_debug", FALSE) | |
| 397 } | |
| 398 | |
| 399 set_verbose <- function() { | |
| 400 set_global("global_verbose", TRUE) | |
| 401 } | |
| 402 | |
| 403 unset_verbose <- function() { | |
| 404 set_global("global_verbose", FALSE) | |
| 405 } | |
| 371 | 406 | 
| 372 create_parser <- function() { | 407 create_parser <- function() { | 
| 373 parser <- optparse::OptionParser() | 408 parser <- optparse::OptionParser() | 
| 374 parser <- optparse::add_option( | 409 parser <- optparse::add_option( | 
| 375 parser, | 410 parser, | 
| 376 c("-v", "--verbose"), | 411 c("-v", "--verbose"), | 
| 377 action = "store_true", | 412 action = "store_true", | 
| 378 default = FALSE, | 413 default = FALSE, | 
| 379 help = "Print extra output [default %default]" | 414 help = paste( | 
| 415 "[default %default]", | |
| 416 "Print extra output" | |
| 417 ) | |
| 418 ) | |
| 419 parser <- optparse::add_option( | |
| 420 parser, | |
| 421 c("-V", "--version"), | |
| 422 action = "store_true", | |
| 423 default = FALSE, | |
| 424 help = "Prints version and exits" | |
| 425 ) | |
| 426 parser <- optparse::add_option( | |
| 427 parser, | |
| 428 c("-d", "--debug"), | |
| 429 action = "store_true", | |
| 430 default = FALSE, | |
| 431 help = paste( | |
| 432 "[default %default]", | |
| 433 "Print debug outputs" | |
| 434 ) | |
| 380 ) | 435 ) | 
| 381 parser <- optparse::add_option( | 436 parser <- optparse::add_option( | 
| 382 parser, | 437 parser, | 
| 383 c("-o", "--output"), | 438 c("-o", "--output"), | 
| 384 type = "character", | 439 type = "character", | 
| 414 parser, | 469 parser, | 
| 415 c("--tolmz"), | 470 c("--tolmz"), | 
| 416 type = "numeric", | 471 type = "numeric", | 
| 417 action = "store", | 472 action = "store", | 
| 418 default = DEFAULT_TOLMZ, | 473 default = DEFAULT_TOLMZ, | 
| 419 metavar = "number" | 474 metavar = "number", | 
| 475 help = paste( | |
| 476 "[default %default]", | |
| 477 "Tolerance for MZ (in Dalton) to match the standard in the compounds" | |
| 478 ) | |
| 420 ) | 479 ) | 
| 421 parser <- optparse::add_option( | 480 parser <- optparse::add_option( | 
| 422 parser, | 481 parser, | 
| 423 c("--tolrt"), | 482 c("--tolrt"), | 
| 424 type = "integer", | 483 type = "integer", | 
| 425 action = "store", | 484 action = "store", | 
| 426 default = DEFAULT_TOLRT, | 485 default = DEFAULT_TOLRT, | 
| 427 metavar = "number" | 486 metavar = "number", | 
| 487 help = paste( | |
| 488 "[default %default]", | |
| 489 "RT (in seconds) to match the standard in the compounds" | |
| 490 ) | |
| 428 ) | 491 ) | 
| 429 parser <- optparse::add_option( | 492 parser <- optparse::add_option( | 
| 430 parser, | 493 parser, | 
| 431 c("--seuil_ra"), | 494 c("--seuil_ra"), | 
| 432 type = "numeric", | 495 type = "numeric", | 
| 433 action = "store", | 496 action = "store", | 
| 434 help = "relative intensity threshold", | |
| 435 default = DEFAULT_SEUIL_RA, | 497 default = DEFAULT_SEUIL_RA, | 
| 436 metavar = "number" | 498 metavar = "number", | 
| 499 help = paste( | |
| 500 "[default %default]", | |
| 501 "relative intensity threshold" | |
| 502 ), | |
| 437 ) | 503 ) | 
| 438 parser <- optparse::add_option( | 504 parser <- optparse::add_option( | 
| 439 parser, | 505 parser, | 
| 440 c("--mzdecimal"), | 506 c("--mzdecimal"), | 
| 441 type = "integer", | 507 type = "integer", | 
| 442 default = DEFAULT_MZDECIMAL, | 508 default = DEFAULT_MZDECIMAL, | 
| 443 action = "store", | 509 action = "store", | 
| 444 help = "nb decimal for mz", | 510 help = paste( | 
| 511 "[default %default]", | |
| 512 "Number of decimal to write for MZ" | |
| 513 ), | |
| 445 metavar = "number" | 514 metavar = "number" | 
| 446 ) | 515 ) | 
| 447 parser <- optparse::add_option( | 516 parser <- optparse::add_option( | 
| 448 parser, | 517 parser, | 
| 449 c("--r_threshold"), | 518 c("--r_threshold"), | 
| 450 type = "integer", | 519 type = "integer", | 
| 451 default = DEFAULT_R_THRESHOLD, | 520 default = DEFAULT_R_THRESHOLD, | 
| 452 action = "store", | 521 action = "store", | 
| 453 help = paste0( | 522 help = paste( | 
| 454 "r pearson correlation threshold between precursor and fragment ", | 523 "[default %default]", | 
| 524 "R-Pearson correlation threshold between precursor and fragment", | |
| 455 "absolute intensity" | 525 "absolute intensity" | 
| 456 ), | 526 ), | 
| 457 metavar = "number" | 527 metavar = "number" | 
| 458 ) | 528 ) | 
| 459 parser <- optparse::add_option( | 529 parser <- optparse::add_option( | 
| 460 parser, | 530 parser, | 
| 461 c("--min_number_scan"), | 531 c("--min_number_scan"), | 
| 462 type = "numeric", | 532 type = "numeric", | 
| 463 action = "store", | 533 action = "store", | 
| 464 default = DEFAULT_MINNUMBERSCAN, | 534 default = DEFAULT_MINNUMBERSCAN, | 
| 465 help = paste0( | 535 help = paste( | 
| 466 "fragments are kept if there are found in a minimum number ", | 536 "[default %default]", | 
| 467 "of scans" | 537 "Fragments are kept if there are found in a minimum number", | 
| 538 "of min_number_scan scans" | |
| 468 ), | 539 ), | 
| 469 metavar = "number" | 540 metavar = "number" | 
| 470 ) | 541 ) | 
| 471 return(parser) | 542 return(parser) | 
| 472 } | 543 } | 
| 473 | 544 | 
| 545 stop_with_status <- function(msg, status) { | |
| 546 message(sprintf("Error: %s", msg)) | |
| 547 message(sprintf("Error code: %s", status)) | |
| 548 base::quit(status = status) | |
| 549 } | |
| 550 | |
| 551 check_args_validity <- function(args) { ## nolint cyclocomp_linter | |
| 552 sysvars <- Sys.getenv() | |
| 553 sysvarnames <- names(sysvars) | |
| 554 if (length(args$output) == 0 || nchar(args$output[1]) == 0) { | |
| 555 stop_with_status( | |
| 556 "Missing output parameters. Please set it with --output.", | |
| 557 MISSING_PARAMETER_ERROR | |
| 558 ) | |
| 559 } | |
| 560 if (length(args$precursors) == 0 || nchar(args$precursors[1]) == 0) { | |
| 561 stop_with_status( | |
| 562 "Missing precursors parameters. Please set it with --precursors.", | |
| 563 MISSING_PARAMETER_ERROR | |
| 564 ) | |
| 565 } | |
| 566 if (length(args$fragments) == 0 || nchar(args$fragments[1]) == 0) { | |
| 567 stop_with_status( | |
| 568 "Missing fragments parameters. Please set it with --fragments.", | |
| 569 MISSING_PARAMETER_ERROR | |
| 570 ) | |
| 571 } | |
| 572 if (length(args$compounds) == 0 || nchar(args$compounds[1]) == 0) { | |
| 573 stop_with_status( | |
| 574 "Missing compounds parameters. Please set it with --compounds.", | |
| 575 MISSING_PARAMETER_ERROR | |
| 576 ) | |
| 577 } | |
| 578 if (!file.exists(args$precursors)) { | |
| 579 stop_with_status( | |
| 580 sprintf( | |
| 581 "Precursors file %s does not exist or cannot be accessed.", | |
| 582 args$precursors | |
| 583 ), | |
| 584 MISSING_INPUT_FILE_ERROR | |
| 585 ) | |
| 586 } | |
| 587 if (!file.exists(args$fragments)) { | |
| 588 stop_with_status( | |
| 589 sprintf( | |
| 590 "Fragments file %s does not exist or cannot be accessed.", | |
| 591 args$fragments | |
| 592 ), | |
| 593 MISSING_INPUT_FILE_ERROR | |
| 594 ) | |
| 595 } | |
| 596 if (!file.exists(args$compounds)) { | |
| 597 stop_with_status( | |
| 598 sprintf( | |
| 599 "Compounds file %s does not exist or cannot be accessed.", | |
| 600 args$compounds | |
| 601 ), | |
| 602 MISSING_INPUT_FILE_ERROR | |
| 603 ) | |
| 604 } | |
| 605 if ( | |
| 606 "_GALAXY_JOB_HOME_DIR" %in% sysvarnames | |
| 607 || "_GALAXY_JOB_TMP_DIR" %in% sysvarnames | |
| 608 || "GALAXY_MEMORY_MB" %in% sysvarnames | |
| 609 || "GALAXY_MEMORY_MB_PER_SLOT" %in% sysvarnames | |
| 610 || "GALAXY_SLOTS" %in% sysvarnames | |
| 611 ) { | |
| 612 check_galaxy_args_validity(args) | |
| 613 } | |
| 614 } | |
| 615 | |
| 616 check_galaxy_args_validity <- function(args) { | |
| 617 if (!file.exists(args$output)) { | |
| 618 stop_with_status( | |
| 619 sprintf( | |
| 620 "Output file %s does not exist or cannot be accessed.", | |
| 621 args$output | |
| 622 ), | |
| 623 MISSING_INPUT_FILE_ERROR | |
| 624 ) | |
| 625 } | |
| 626 } | |
| 627 | |
| 474 main <- function(args) { | 628 main <- function(args) { | 
| 475 ## FOLDER AND FILES | 629 if (args$version) { | 
| 630 cat(sprintf("%s\n", MS2SNOOP_VERSION)) | |
| 631 base::quit(status = 0) | |
| 632 } | |
| 633 sessionInfo() | |
| 634 check_args_validity(args) | |
| 635 if (args$debug) { | |
| 636 set_debug() | |
| 637 } | |
| 638 if (args$verbose) { | |
| 639 set_verbose() | |
| 640 } | |
| 476 ## MSpurity precursors file | 641 ## MSpurity precursors file | 
| 477 precursors <- read.table( | 642 precursors <- read.table( | 
| 478 file = args$precursors, | 643 file = args$precursors, | 
| 479 header = TRUE, | 644 header = TRUE, | 
| 480 sep = "\t", | 645 sep = "\t", | 
| 492 file = args$compounds, | 657 file = args$compounds, | 
| 493 sep = "\t", | 658 sep = "\t", | 
| 494 quote = "\"", | 659 quote = "\"", | 
| 495 header = TRUE | 660 header = TRUE | 
| 496 ) | 661 ) | 
| 497 ## PARAMETERS | 662 | 
| 498 ## tolerance for mz(dalton) rt(seconds) to match the standard in the compounds | 663 res_all <- NULL | 
| 499 ## list with the precursor MSpurity file | |
| 500 tolmz <- args$tolmz | |
| 501 tolrt <- args$tolrt | |
| 502 | |
| 503 ## relative intensity threshold | |
| 504 seuil_ra <- args$seuil_ra | |
| 505 ## nb decimal for mz | |
| 506 mzdecimal <- args$mzdecimal | |
| 507 ## r pearson correlation threshold between precursor and | |
| 508 # #fragment absolute intensity | |
| 509 r_threshold <- args$r_threshold | |
| 510 ## fragments are kept if there are found in a minimum number of scans | |
| 511 min_number_scan <- args$min_number_scan | |
| 512 | |
| 513 for (i in seq_len(nrow(compounds))) { | 664 for (i in seq_len(nrow(compounds))) { | 
| 514 ## loop execution for all compounds in the compounds file | 665 ## loop execution for all compounds in the compounds file | 
| 515 res_cor <- NULL | 666 res_cor <- NULL | 
| 516 res_cor <- extract_fragments( | 667 res_cor <- extract_fragments( | 
| 517 precursors = precursors, | 668 precursors = precursors, | 
| 518 fragments = fragments, | 669 fragments = fragments, | 
| 519 mzref = compounds[[2]][i], | 670 mzref = compounds[[2]][i], | 
| 520 rtref = compounds[[3]][i], | 671 rtref = compounds[[3]][i], | 
| 521 c_name = compounds[[1]][i], | 672 c_name = compounds[[1]][i], | 
| 522 min_number_scan = min_number_scan, | 673 min_number_scan = args$min_number_scan, | 
| 523 mzdecimal = mzdecimal, | 674 mzdecimal = args$mzdecimal, | 
| 524 r_threshold = r_threshold, | 675 r_threshold = args$r_threshold, | 
| 525 seuil_ra = seuil_ra, | 676 seuil_ra = args$seuil_ra, | 
| 526 tolmz = tolmz, | 677 tolmz = args$tolmz, | 
| 527 tolrt = tolrt | 678 tolrt = args$tolrt | 
| 528 ) | 679 ) | 
| 529 if (i == 1 & !is.null(res_cor)) { | 680 if (!is.null(res_cor)) { | 
| 530 res_all <- res_cor | 681 if (is.null(res_all)) { | 
| 531 } else if (!is.null(res_cor)) { | 682 res_all <- res_cor | 
| 532 res_all <- rbind(res_all, res_cor) | 683 } else { | 
| 684 res_all <- rbind(res_all, res_cor) | |
| 685 } | |
| 533 } | 686 } | 
| 534 } | 687 } | 
| 535 | 688 | 
| 536 if (is.null(res_all)) { | 689 if (is.null(res_all)) { | 
| 537 stop("No result at all!") | 690 stop_with_status("No result at all!", NO_ANY_RESULT_ERROR) | 
| 538 } | 691 } | 
| 539 write.table( | 692 write.table( | 
| 540 x = res_all, | 693 x = res_all, | 
| 541 file = args$output, | 694 file = args$output, | 
| 542 sep = "\t", | 695 sep = "\t", | 
| 543 row.names = FALSE | 696 row.names = FALSE | 
| 544 ) | 697 ) | 
| 545 } | 698 } | 
| 546 | 699 | 
| 700 unset_debug() | |
| 701 unset_verbose() | |
| 547 args <- optparse::parse_args(create_parser()) | 702 args <- optparse::parse_args(create_parser()) | 
| 548 sessionInfo() | |
| 549 main(args) | 703 main(args) | 
| 550 | 704 | 
| 551 warnings() | 705 warnings() | 
