comparison XSeekerPreparator.R @ 20:ce94e7a141bb draft default tip

" master branch Updating"
author lain
date Tue, 06 Dec 2022 10:18:10 +0000
parents 2937e72e5891
children
comparison
equal deleted inserted replaced
19:2937e72e5891 20:ce94e7a141bb
196 } 196 }
197 197
198 search_tree <- function(path, target) { 198 search_tree <- function(path, target) {
199 target <- tolower(target) 199 target <- tolower(target)
200 for (file in list.files(path)) { 200 for (file in list.files(path)) {
201 if (is.dir(file)) { 201 if (fs::is.dir(file)) {
202 result <- search_tree(file.path(path, file), target) 202 result <- search_tree(file.path(path, file), target)
203 if (!is.null(result)) { 203 if (!is.null(result)) {
204 return(result) 204 return(result)
205 } 205 }
206 } else if (tolower(file) == target) { 206 } else if (tolower(file) == target) {
398 return(csv_header_translator(translator, compounds)) 398 return(csv_header_translator(translator, compounds))
399 } 399 }
400 400
401 guess_translator <- function(header) { 401 guess_translator <- function(header) {
402 result <- list( 402 result <- list(
403 # HMDB_ID = NULL,
404 mz = NULL, 403 mz = NULL,
405 name = NULL, 404 name = NULL,
406 common_name = NULL, 405 common_name = NULL,
407 formula = NULL, 406 formula = NULL
408 # inchi_key = NULL
409 ) 407 )
410 asked_cols <- names(result) 408 asked_cols <- names(result)
411 for (asked_col in asked_cols) { 409 for (asked_col in asked_cols) {
412 for (col in header) { 410 for (col in header) {
413 if ((twisted <- tolower(col)) == asked_col 411 if ((twisted <- tolower(col)) == asked_col
469 ) 467 )
470 error <- tryCatch({ 468 error <- tryCatch({
471 process_sample_list( 469 process_sample_list(
472 orm, rdata, samples, 470 orm, rdata, samples,
473 show_percent = show_percent, 471 show_percent = show_percent,
474 file_grouping_var = options$class 472 file_grouping_var = options$class,
473 options = options
475 ) 474 )
476 NULL 475 NULL
477 }, error = function(e) { 476 }, error = function(e) {
478 message(e) 477 return(e)
479 e
480 }) 478 })
481 if (!is.null(mzml_tmp_dir)) { 479 if (!is.null(mzml_tmp_dir)) {
482 unlink(mzml_tmp_dir, recursive = TRUE) 480 unlink(mzml_tmp_dir, recursive = TRUE)
483 } 481 }
484 if (!is.null(error)) { 482 if (!is.null(error)) {
485 stop(error) 483 stop(error)
486 } 484 }
485 return(!is.null(error))
487 } 486 }
488 487
489 gather_mzml_files <- function(rdata) { 488 gather_mzml_files <- function(rdata) {
490 if (is.null(rdata$singlefile)) { 489 if (is.null(rdata$singlefile)) {
491 message("Extracting mxml files") 490 message("Extracting mxml files")
508 process_sample_list <- function( 507 process_sample_list <- function(
509 orm, 508 orm,
510 rdata, 509 rdata,
511 sample_names, 510 sample_names,
512 show_percent, 511 show_percent,
513 file_grouping_var = NULL 512 file_grouping_var = NULL,
513 options = list()
514 ) { 514 ) {
515 if (is.null(file_grouping_var)) { 515 if (is.null(file_grouping_var)) {
516 file_grouping_var <- find_grouping_var(rdata$variableMetadata) 516 file_grouping_var <- find_grouping_var(rdata$variableMetadata)
517 if (is.null(file_grouping_var)) { 517 if (is.null(file_grouping_var)) {
518 stop("Malformed variableMetada.") 518 stop("Malformed variableMetada.")
585 } 585 }
586 } 586 }
587 587
588 message("Parameters from previous processes extracted.") 588 message("Parameters from previous processes extracted.")
589 589
590
591 indices <- as.numeric(unique(var_meta[, file_grouping_var]))
592 if (any(is.null(names(singlefile)[indices]))) {
593 stop(sprintf(
594 paste(
595 "Indices defined by grouping variable %s are not all present",
596 "in singlefile names (%s).\nCannot continue. Indices: %s"
597 ),
598 file_grouping_var,
599 paste(names(singlefile), collapse = ", "),
600 paste(indices, collapse = ", ")
601 ))
602 }
603 smol_xcms_set <- orm$smol_xcms_set() 590 smol_xcms_set <- orm$smol_xcms_set()
604 mz_tab_info <- new.env() 591 mz_tab_info <- new.env()
605 g <- xcms::groups(xcms_set) 592 g <- xcms::groups(xcms_set)
606 mz_tab_info$group_length <- nrow(g) 593 mz_tab_info$group_length <- nrow(g)
607 mz_tab_info$dataset_path <- xcms::filepaths(xcms_set) 594 mz_tab_info$dataset_path <- xcms::filepaths(xcms_set)
621 608
622 invisible(smol_xcms_set$set_raw(blogified)$save()) 609 invisible(smol_xcms_set$set_raw(blogified)$save())
623 smol_xcms_set_id <- smol_xcms_set$get_id() 610 smol_xcms_set_id <- smol_xcms_set$get_id()
624 rm(smol_xcms_set) 611 rm(smol_xcms_set)
625 612
626 for (no in indices) { 613 for (no in seq_along(names(singlefile))) {
627 sample_name <- names(singlefile)[[no]] 614 sample_name <- names(singlefile)[[no]]
628 sample_path <- singlefile[[no]] 615 sample_path <- singlefile[[no]]
629 if ( 616 if (
630 is.na(no) 617 is.na(no)
631 || is.null(sample_path) 618 || is.null(sample_path)
758 next_pc_group, next_align_group 745 next_pc_group, next_align_group
759 ) { 746 ) {
760 field_names <- as.list(names(orm$feature()$fields__)) 747 field_names <- as.list(names(orm$feature()$fields__))
761 field_names[field_names == "id"] <- NULL 748 field_names[field_names == "id"] <- NULL
762 749
763 features <- list()
764 dummy_feature <- orm$feature() 750 dummy_feature <- orm$feature()
765 751
766 if (show_percent <- context$show_percent) { 752 if (show_percent <- context$show_percent) {
767 percent <- -1 753 percent <- -1
768 total <- nrow(var_meta) 754 total <- nrow(var_meta)
770 rows <- seq_len(nrow(var_meta)) 756 rows <- seq_len(nrow(var_meta))
771 if (PROCESS_SMOL_BATCH) { 757 if (PROCESS_SMOL_BATCH) {
772 758
773 rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))] 759 rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))]
774 } 760 }
775 cluster_row <- list() 761 # features <- list()
762 features <- as.list(rows) ## allocate all memory before processing
763 # cluster_row <- list()
764 cluster_row <- as.list(rows) ## allocate all memory before processing
776 for (row in rows) { 765 for (row in rows) {
777 if (show_percent && (row / total) * 100 > percent) { 766 if (show_percent && (row / total) * 100 > percent) {
778 percent <- percent + 1 767 percent <- percent + 1
779 message("\r", sprintf("\r%d %%", percent), appendLF = FALSE) 768 message("\r", sprintf("\r%d %%", percent), appendLF = FALSE)
780 } 769 }
841 dummy_feature, clusterID, 830 dummy_feature, clusterID,
842 context, curent_var_meta, next_pc_group, 831 context, curent_var_meta, next_pc_group,
843 next_align_group 832 next_align_group
844 ) 833 )
845 next_align_group <- next_align_group + 1 834 next_align_group <- next_align_group + 1
846 features[[length(features) + 1]] <- as.list(dummy_feature, field_names) 835 features[[row]] <- as.list(dummy_feature, field_names)
836 # features[[length(features) + 1]] <- as.list(dummy_feature, field_names)
847 dummy_feature$clear() 837 dummy_feature$clear()
848 } 838 }
849 rm(var_meta) 839 rm(var_meta)
850 message("") 840 message("")
851 message("Saving features") 841 message("Saving features")
961 cluster$set_clusterID(context$clusterID) 951 cluster$set_clusterID(context$clusterID)
962 } 952 }
963 } 953 }
964 cluster$save() 954 cluster$save()
965 feature$set_cluster(cluster) 955 feature$set_cluster(cluster)
956 feature$save()
966 return(cluster) 957 return(cluster)
967 } 958 }
968 959
969 complete_features <- function(orm, clusters, show_percent) { 960 complete_features <- function(orm, clusters, show_percent) {
970 total <- length(clusters) 961 total <- length(clusters)
1060 option_list <- list( 1051 option_list <- list(
1061 optparse::make_option( 1052 optparse::make_option(
1062 c("-v", "--version"), 1053 c("-v", "--version"),
1063 action = "store_true", 1054 action = "store_true",
1064 help = "Display this tool's version and exits" 1055 help = "Display this tool's version and exits"
1056 ),
1057 optparse::make_option(
1058 c("-V", "--verbose"),
1059 action = "store_true",
1060 help = "Does more verbose outputs",
1061 default = FALSE
1065 ), 1062 ),
1066 optparse::make_option( 1063 optparse::make_option(
1067 c("-i", "--input"), 1064 c("-i", "--input"),
1068 type = "character", 1065 type = "character",
1069 help = "The rdata path to import in XSeeker" 1066 help = "The rdata path to import in XSeeker"
1159 # } 1156 # }
1160 1157
1161 1158
1162 load(args$options$input, rdata <- new.env()) 1159 load(args$options$input, rdata <- new.env())
1163 1160
1164 process_rdata(orm, rdata, args$options) 1161 args$options$verbose <- (
1162 if (args$options$verbose) {
1163 message("Verbose outputs.")
1164 \(...) {
1165 message(sprintf(...))
1166 }
1167 } else {
1168 \(...) {
1169 }
1170 }
1171 )
1172
1173 err_code <- process_rdata(orm, rdata, args$options)
1165 1174
1166 quit(status = err_code) 1175 quit(status = err_code)