Mercurial > repos > lain > xseekerpreparator
comparison XSeekerPreparator.R @ 20:ce94e7a141bb draft default tip
" master branch Updating"
| author | lain |
|---|---|
| date | Tue, 06 Dec 2022 10:18:10 +0000 |
| parents | 2937e72e5891 |
| children |
comparison
equal
deleted
inserted
replaced
| 19:2937e72e5891 | 20:ce94e7a141bb |
|---|---|
| 196 } | 196 } |
| 197 | 197 |
| 198 search_tree <- function(path, target) { | 198 search_tree <- function(path, target) { |
| 199 target <- tolower(target) | 199 target <- tolower(target) |
| 200 for (file in list.files(path)) { | 200 for (file in list.files(path)) { |
| 201 if (is.dir(file)) { | 201 if (fs::is.dir(file)) { |
| 202 result <- search_tree(file.path(path, file), target) | 202 result <- search_tree(file.path(path, file), target) |
| 203 if (!is.null(result)) { | 203 if (!is.null(result)) { |
| 204 return(result) | 204 return(result) |
| 205 } | 205 } |
| 206 } else if (tolower(file) == target) { | 206 } else if (tolower(file) == target) { |
| 398 return(csv_header_translator(translator, compounds)) | 398 return(csv_header_translator(translator, compounds)) |
| 399 } | 399 } |
| 400 | 400 |
| 401 guess_translator <- function(header) { | 401 guess_translator <- function(header) { |
| 402 result <- list( | 402 result <- list( |
| 403 # HMDB_ID = NULL, | |
| 404 mz = NULL, | 403 mz = NULL, |
| 405 name = NULL, | 404 name = NULL, |
| 406 common_name = NULL, | 405 common_name = NULL, |
| 407 formula = NULL, | 406 formula = NULL |
| 408 # inchi_key = NULL | |
| 409 ) | 407 ) |
| 410 asked_cols <- names(result) | 408 asked_cols <- names(result) |
| 411 for (asked_col in asked_cols) { | 409 for (asked_col in asked_cols) { |
| 412 for (col in header) { | 410 for (col in header) { |
| 413 if ((twisted <- tolower(col)) == asked_col | 411 if ((twisted <- tolower(col)) == asked_col |
| 469 ) | 467 ) |
| 470 error <- tryCatch({ | 468 error <- tryCatch({ |
| 471 process_sample_list( | 469 process_sample_list( |
| 472 orm, rdata, samples, | 470 orm, rdata, samples, |
| 473 show_percent = show_percent, | 471 show_percent = show_percent, |
| 474 file_grouping_var = options$class | 472 file_grouping_var = options$class, |
| 473 options = options | |
| 475 ) | 474 ) |
| 476 NULL | 475 NULL |
| 477 }, error = function(e) { | 476 }, error = function(e) { |
| 478 message(e) | 477 return(e) |
| 479 e | |
| 480 }) | 478 }) |
| 481 if (!is.null(mzml_tmp_dir)) { | 479 if (!is.null(mzml_tmp_dir)) { |
| 482 unlink(mzml_tmp_dir, recursive = TRUE) | 480 unlink(mzml_tmp_dir, recursive = TRUE) |
| 483 } | 481 } |
| 484 if (!is.null(error)) { | 482 if (!is.null(error)) { |
| 485 stop(error) | 483 stop(error) |
| 486 } | 484 } |
| 485 return(!is.null(error)) | |
| 487 } | 486 } |
| 488 | 487 |
| 489 gather_mzml_files <- function(rdata) { | 488 gather_mzml_files <- function(rdata) { |
| 490 if (is.null(rdata$singlefile)) { | 489 if (is.null(rdata$singlefile)) { |
| 491 message("Extracting mxml files") | 490 message("Extracting mxml files") |
| 508 process_sample_list <- function( | 507 process_sample_list <- function( |
| 509 orm, | 508 orm, |
| 510 rdata, | 509 rdata, |
| 511 sample_names, | 510 sample_names, |
| 512 show_percent, | 511 show_percent, |
| 513 file_grouping_var = NULL | 512 file_grouping_var = NULL, |
| 513 options = list() | |
| 514 ) { | 514 ) { |
| 515 if (is.null(file_grouping_var)) { | 515 if (is.null(file_grouping_var)) { |
| 516 file_grouping_var <- find_grouping_var(rdata$variableMetadata) | 516 file_grouping_var <- find_grouping_var(rdata$variableMetadata) |
| 517 if (is.null(file_grouping_var)) { | 517 if (is.null(file_grouping_var)) { |
| 518 stop("Malformed variableMetada.") | 518 stop("Malformed variableMetada.") |
| 585 } | 585 } |
| 586 } | 586 } |
| 587 | 587 |
| 588 message("Parameters from previous processes extracted.") | 588 message("Parameters from previous processes extracted.") |
| 589 | 589 |
| 590 | |
| 591 indices <- as.numeric(unique(var_meta[, file_grouping_var])) | |
| 592 if (any(is.null(names(singlefile)[indices]))) { | |
| 593 stop(sprintf( | |
| 594 paste( | |
| 595 "Indices defined by grouping variable %s are not all present", | |
| 596 "in singlefile names (%s).\nCannot continue. Indices: %s" | |
| 597 ), | |
| 598 file_grouping_var, | |
| 599 paste(names(singlefile), collapse = ", "), | |
| 600 paste(indices, collapse = ", ") | |
| 601 )) | |
| 602 } | |
| 603 smol_xcms_set <- orm$smol_xcms_set() | 590 smol_xcms_set <- orm$smol_xcms_set() |
| 604 mz_tab_info <- new.env() | 591 mz_tab_info <- new.env() |
| 605 g <- xcms::groups(xcms_set) | 592 g <- xcms::groups(xcms_set) |
| 606 mz_tab_info$group_length <- nrow(g) | 593 mz_tab_info$group_length <- nrow(g) |
| 607 mz_tab_info$dataset_path <- xcms::filepaths(xcms_set) | 594 mz_tab_info$dataset_path <- xcms::filepaths(xcms_set) |
| 621 | 608 |
| 622 invisible(smol_xcms_set$set_raw(blogified)$save()) | 609 invisible(smol_xcms_set$set_raw(blogified)$save()) |
| 623 smol_xcms_set_id <- smol_xcms_set$get_id() | 610 smol_xcms_set_id <- smol_xcms_set$get_id() |
| 624 rm(smol_xcms_set) | 611 rm(smol_xcms_set) |
| 625 | 612 |
| 626 for (no in indices) { | 613 for (no in seq_along(names(singlefile))) { |
| 627 sample_name <- names(singlefile)[[no]] | 614 sample_name <- names(singlefile)[[no]] |
| 628 sample_path <- singlefile[[no]] | 615 sample_path <- singlefile[[no]] |
| 629 if ( | 616 if ( |
| 630 is.na(no) | 617 is.na(no) |
| 631 || is.null(sample_path) | 618 || is.null(sample_path) |
| 758 next_pc_group, next_align_group | 745 next_pc_group, next_align_group |
| 759 ) { | 746 ) { |
| 760 field_names <- as.list(names(orm$feature()$fields__)) | 747 field_names <- as.list(names(orm$feature()$fields__)) |
| 761 field_names[field_names == "id"] <- NULL | 748 field_names[field_names == "id"] <- NULL |
| 762 | 749 |
| 763 features <- list() | |
| 764 dummy_feature <- orm$feature() | 750 dummy_feature <- orm$feature() |
| 765 | 751 |
| 766 if (show_percent <- context$show_percent) { | 752 if (show_percent <- context$show_percent) { |
| 767 percent <- -1 | 753 percent <- -1 |
| 768 total <- nrow(var_meta) | 754 total <- nrow(var_meta) |
| 770 rows <- seq_len(nrow(var_meta)) | 756 rows <- seq_len(nrow(var_meta)) |
| 771 if (PROCESS_SMOL_BATCH) { | 757 if (PROCESS_SMOL_BATCH) { |
| 772 | 758 |
| 773 rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))] | 759 rows <- rows[1:as.integer(FAST_FEATURE_RATIO / 100.0 * length(rows))] |
| 774 } | 760 } |
| 775 cluster_row <- list() | 761 # features <- list() |
| 762 features <- as.list(rows) ## allocate all memory before processing | |
| 763 # cluster_row <- list() | |
| 764 cluster_row <- as.list(rows) ## allocate all memory before processing | |
| 776 for (row in rows) { | 765 for (row in rows) { |
| 777 if (show_percent && (row / total) * 100 > percent) { | 766 if (show_percent && (row / total) * 100 > percent) { |
| 778 percent <- percent + 1 | 767 percent <- percent + 1 |
| 779 message("\r", sprintf("\r%d %%", percent), appendLF = FALSE) | 768 message("\r", sprintf("\r%d %%", percent), appendLF = FALSE) |
| 780 } | 769 } |
| 841 dummy_feature, clusterID, | 830 dummy_feature, clusterID, |
| 842 context, curent_var_meta, next_pc_group, | 831 context, curent_var_meta, next_pc_group, |
| 843 next_align_group | 832 next_align_group |
| 844 ) | 833 ) |
| 845 next_align_group <- next_align_group + 1 | 834 next_align_group <- next_align_group + 1 |
| 846 features[[length(features) + 1]] <- as.list(dummy_feature, field_names) | 835 features[[row]] <- as.list(dummy_feature, field_names) |
| 836 # features[[length(features) + 1]] <- as.list(dummy_feature, field_names) | |
| 847 dummy_feature$clear() | 837 dummy_feature$clear() |
| 848 } | 838 } |
| 849 rm(var_meta) | 839 rm(var_meta) |
| 850 message("") | 840 message("") |
| 851 message("Saving features") | 841 message("Saving features") |
| 961 cluster$set_clusterID(context$clusterID) | 951 cluster$set_clusterID(context$clusterID) |
| 962 } | 952 } |
| 963 } | 953 } |
| 964 cluster$save() | 954 cluster$save() |
| 965 feature$set_cluster(cluster) | 955 feature$set_cluster(cluster) |
| 956 feature$save() | |
| 966 return(cluster) | 957 return(cluster) |
| 967 } | 958 } |
| 968 | 959 |
| 969 complete_features <- function(orm, clusters, show_percent) { | 960 complete_features <- function(orm, clusters, show_percent) { |
| 970 total <- length(clusters) | 961 total <- length(clusters) |
| 1060 option_list <- list( | 1051 option_list <- list( |
| 1061 optparse::make_option( | 1052 optparse::make_option( |
| 1062 c("-v", "--version"), | 1053 c("-v", "--version"), |
| 1063 action = "store_true", | 1054 action = "store_true", |
| 1064 help = "Display this tool's version and exits" | 1055 help = "Display this tool's version and exits" |
| 1056 ), | |
| 1057 optparse::make_option( | |
| 1058 c("-V", "--verbose"), | |
| 1059 action = "store_true", | |
| 1060 help = "Does more verbose outputs", | |
| 1061 default = FALSE | |
| 1065 ), | 1062 ), |
| 1066 optparse::make_option( | 1063 optparse::make_option( |
| 1067 c("-i", "--input"), | 1064 c("-i", "--input"), |
| 1068 type = "character", | 1065 type = "character", |
| 1069 help = "The rdata path to import in XSeeker" | 1066 help = "The rdata path to import in XSeeker" |
| 1159 # } | 1156 # } |
| 1160 | 1157 |
| 1161 | 1158 |
| 1162 load(args$options$input, rdata <- new.env()) | 1159 load(args$options$input, rdata <- new.env()) |
| 1163 | 1160 |
| 1164 process_rdata(orm, rdata, args$options) | 1161 args$options$verbose <- ( |
| 1162 if (args$options$verbose) { | |
| 1163 message("Verbose outputs.") | |
| 1164 \(...) { | |
| 1165 message(sprintf(...)) | |
| 1166 } | |
| 1167 } else { | |
| 1168 \(...) { | |
| 1169 } | |
| 1170 } | |
| 1171 ) | |
| 1172 | |
| 1173 err_code <- process_rdata(orm, rdata, args$options) | |
| 1165 | 1174 |
| 1166 quit(status = err_code) | 1175 quit(status = err_code) |
