Mercurial > repos > recetox > recetox_aplcms_align_features
diff utils.R @ 0:57c644d3f24c draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 19de0924a65bc65cbbf7c1fc17e9b5348305f95c
author | recetox |
---|---|
date | Fri, 10 Jun 2022 10:17:46 +0000 |
parents | |
children | abe783e0daca |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils.R Fri Jun 10 10:17:46 2022 +0000 @@ -0,0 +1,149 @@ +library(recetox.aplcms) + +align_features <- function(sample_names, ...) { + aligned <- feature.align(...) + feature_names <- seq_len(nrow(aligned$pk.times)) + + list( + mz_tolerance = as.numeric(aligned$mz.tol), + rt_tolerance = as.numeric(aligned$chr.tol), + rt_crosstab = as_feature_crosstab(feature_names, sample_names, aligned$pk.times), + int_crosstab = as_feature_crosstab(feature_names, sample_names, aligned$aligned.ftrs) + ) +} + +get_sample_name <- function(filename) { + tools::file_path_sans_ext(basename(filename)) +} + +as_feature_crosstab <- function(feature_names, sample_names, data) { + colnames(data) <- c("mz", "rt", "mz_min", "mz_max", sample_names) + rownames(data) <- feature_names + as.data.frame(data) +} + +as_feature_sample_table <- function(rt_crosstab, int_crosstab) { + feature_names <- rownames(rt_crosstab) + sample_names <- colnames(rt_crosstab)[- (1:4)] + + feature_table <- data.frame( + feature = feature_names, + mz = rt_crosstab[, 1], + rt = rt_crosstab[, 2] + ) + + # series of conversions to produce a table type from data.frame + rt_crosstab <- as.table(as.matrix(rt_crosstab[, - (1:4)])) + int_crosstab <- as.table(as.matrix(int_crosstab[, - (1:4)])) + + crosstab_axes <- list(feature = feature_names, sample = sample_names) + dimnames(rt_crosstab) <- dimnames(int_crosstab) <- crosstab_axes + + x <- as.data.frame(rt_crosstab, responseName = "sample_rt") + y <- as.data.frame(int_crosstab, responseName = "sample_intensity") + + data <- merge(x, y, by = c("feature", "sample")) + data <- merge(feature_table, data, by = "feature") + data +} + +load_features <- function(files) { + files_list <- sort_samples_by_acquisition_number(files) + features <- lapply(files_list, arrow::read_parquet) + features <- lapply(features, as.matrix) + return(features) +} + +save_data_as_parquet_files <- function(data, subdir) { + dir.create(subdir) + for (i in 0:(length(data) - 1)) { + filename <- file.path(subdir, paste0(subdir, "_features_", i, ".parquet")) + arrow::write_parquet(as.data.frame(data[i + 1]), filename) + } +} + +save_aligned_features <- function(aligned, rt_file, int_file, tol_file) { + arrow::write_parquet(as.data.frame(aligned$rt_crosstab), rt_file) + arrow::write_parquet(as.data.frame(aligned$int_crosstab), int_file) + + mz_tolerance <- c(aligned$mz_tolerance) + rt_tolerance <- c(aligned$rt_tolerance) + arrow::write_parquet(data.frame(mz_tolerance, rt_tolerance), tol_file) +} + +load_aligned_features <- function(rt_file, int_file, tol_file) { + rt_cross_table <- arrow::read_parquet(rt_file) + int_cross_table <- arrow::read_parquet(int_file) + tolerances_table <- arrow::read_parquet(tol_file) + + result <- list() + result$mz_tolerance <- tolerances_table$mz_tolerance + result$rt_tolerance <- tolerances_table$rt_tolerance + result$rt_crosstab <- rt_cross_table + result$int_crosstab <- int_cross_table + return(result) +} + +recover_signals <- function(cluster, + filenames, + extracted, + corrected, + aligned, + mz_tol = 1e-05, + mz_range = NA, + rt_range = NA, + use_observed_range = TRUE, + min_bandwidth = NA, + max_bandwidth = NA, + recover_min_count = 3) { + if (!is(cluster, "cluster")) { + cluster <- parallel::makeCluster(cluster) + on.exit(parallel::stopCluster(cluster)) + } + + clusterExport(cluster, c("extracted", "corrected", "aligned", "recover.weaker")) + clusterEvalQ(cluster, library("splines")) + + recovered <- parLapply(cluster, seq_along(filenames), function(i) { + recover.weaker( + loc = i, + filename = filenames[[i]], + this.f1 = extracted[[i]], + this.f2 = corrected[[i]], + pk.times = aligned$rt_crosstab, + aligned.ftrs = aligned$int_crosstab, + orig.tol = mz_tol, + align.mz.tol = aligned$mz_tolerance, + align.chr.tol = aligned$rt_tolerance, + mz.range = mz_range, + chr.range = rt_range, + use.observed.range = use_observed_range, + bandwidth = 0.5, + min.bw = min_bandwidth, + max.bw = max_bandwidth, + recover.min.count = recover_min_count + ) + }) + + feature_table <- aligned$rt_crosstab[, 1:4] + rt_crosstab <- cbind(feature_table, sapply(recovered, function(x) x$this.times)) + int_crosstab <- cbind(feature_table, sapply(recovered, function(x) x$this.ftrs)) + + feature_names <- rownames(feature_table) + sample_names <- colnames(aligned$rt_crosstab[, - (1:4)]) + + list( + extracted_features = lapply(recovered, function(x) x$this.f1), + corrected_features = lapply(recovered, function(x) x$this.f2), + rt_crosstab = as_feature_crosstab(feature_names, sample_names, rt_crosstab), + int_crosstab = as_feature_crosstab(feature_names, sample_names, int_crosstab) + ) +} + +create_feature_sample_table <- function(features) { + table <- as_feature_sample_table( + rt_crosstab = features$rt_crosstab, + int_crosstab = features$int_crosstab + ) + return(table) +}