diff main.R @ 0:067a308223e3 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 19de0924a65bc65cbbf7c1fc17e9b5348305f95c
author recetox
date Fri, 10 Jun 2022 10:18:24 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main.R	Fri Jun 10 10:18:24 2022 +0000
@@ -0,0 +1,123 @@
+library(recetox.aplcms)
+library(dplyr)
+
+save_extracted_features <- function(df, filename) {
+  df <- as.data.frame(df)
+  columns <- c("mz", "pos", "sd1", "sd2", "area")
+  arrow::write_parquet(df[columns], filename)
+}
+
+save_aligned_feature_table <- function(df, filename) {
+  columns <- c("feature", "mz", "rt", "sample", "sample_rt", "sample_intensity")
+  arrow::write_parquet(df[columns], filename)
+}
+
+save_recovered_feature_table <- function(df, filename, out_format) {
+  columns <- c("feature", "mz", "rt", "sample", "sample_rt", "sample_intensity")
+  if (out_format == "recetox") {
+    peak_table <- df[columns]
+    recetox_peak_table <- rcx_aplcms_to_rcx_xmsannotator(peak_table)
+    arrow::write_parquet(recetox_peak_table, filename)
+  } else {
+    arrow::write_parquet(df[columns], filename)
+  }
+}
+
+rcx_aplcms_to_rcx_xmsannotator <- function(peak_table) {
+    col_base <- c("feature", "mz", "rt")
+    output_table <- peak_table %>% distinct(across(any_of(col_base)))
+
+    for (level in levels(factor(peak_table$sample))) {
+        subdata <- peak_table %>%
+            filter(sample == level) %>%
+            select(any_of(c(col_base, "sample_intensity"))) %>%
+            rename(!!level := "sample_intensity")
+        output_table <- inner_join(output_table, subdata, by = col_base)
+    }
+    output_table <- output_table %>% rename(peak = feature)
+    return(output_table)
+}
+
+known_table_columns <- function() {
+  c("chemical_formula", "HMDB_ID", "KEGG_compound_ID", "mass", "ion.type",
+    "m.z", "Number_profiles_processed", "Percent_found", "mz_min", "mz_max",
+    "RT_mean", "RT_sd", "RT_min", "RT_max", "int_mean(log)", "int_sd(log)",
+    "int_min(log)", "int_max(log)")
+}
+
+save_known_table <- function(df, filename) {
+  columns <- known_table_columns()
+  arrow::write_parquet(df[columns], filename)
+}
+
+read_known_table <- function(filename) {
+  arrow::read_parquet(filename, col_select = known_table_columns())
+}
+
+save_pairing <- function(df, filename) {
+  write.table(df, filename, row.names = FALSE, col.names = c("new", "old"))
+}
+
+save_all_extracted_features <- function(dfs, filenames) {
+  filenames <- tools::file_path_sans_ext(basename(filenames))
+  filenames <- paste0(filenames, ".parquet")
+  filenames <- file.path("extracted", filenames)
+  dir.create("extracted")
+  mapply(save_extracted_features, dfs, filenames)
+}
+
+save_all_corrected_features <- function(dfs, filenames) {
+  filenames <- tools::file_path_sans_ext(basename(filenames))
+  filenames <- paste0(filenames, ".parquet")
+  filenames <- file.path("corrected", filenames)
+  dir.create("corrected")
+  mapply(save_extracted_features, dfs, filenames)
+}
+
+unsupervised_main <- function(sample_files, aligned_file, recovered_file, out_format, ...) {
+  sample_files <- sort_samples_by_acquisition_number(sample_files)
+
+  res <- unsupervised(filenames = sample_files, ...)
+
+  save_all_features(res, sample_files)
+  save_all_feature_tables(res$aligned_feature_sample_table, res$recovered_feature_sample_table, aligned_file, recovered_file, out_format)
+}
+
+hybrid_main <- function(sample_files, known_table_file, updated_known_table_file, pairing_file, aligned_file, recovered_file, out_format, ...) {
+  sample_files <- sort_samples_by_acquisition_number(sample_files)
+
+  known <- read_known_table(known_table_file)
+  res <- hybrid(filenames = sample_files, known_table = known, ...)
+
+  save_known_table(res$updated_known_table, updated_known_table_file)
+  save_pairing(res$features_known_table_pairing, pairing_file)
+
+  save_all_features(res, sample_files)
+  save_all_feature_tables(res$aligned_feature_sample_table, res$recovered_feature_sample_table, aligned_file, recovered_file, out_format)
+}
+
+save_all_features <- function(result, sample_files) {
+  save_all_extracted_features(result$extracted_features, sample_files)
+  save_all_corrected_features(result$corrected_features, sample_files)
+}
+
+save_all_feature_tables <- function(aligned_feature_sample_table,
+                                    recovered_feature_sample_table,
+                                    aligned_file,
+                                    recovered_file,
+                                    out_format) {
+  save_aligned_feature_table(aligned_feature_sample_table, aligned_file)
+  save_recovered_feature_table(recovered_feature_sample_table, recovered_file, out_format)
+}
+
+two_step_hybrid_main <- function(sample_files, known_table_file, updated_known_table_file, recovered_file, aligned_file, out_format, metadata, ...) {
+  sample_files <- sort_samples_by_acquisition_number(sample_files)
+  metadata <- read.table(metadata, sep = ",", header = TRUE)
+
+  known_table <- read_known_table(known_table_file)
+  res <- two.step.hybrid(filenames = sample_files, known.table = known_table, work_dir = getwd(), metadata = metadata, ...)
+
+  save_known_table(res$known_table, updated_known_table_file)
+  save_aligned_feature_table(res$aligned_features, aligned_file)
+  save_recovered_feature_table(res$final_features, recovered_file, out_format)
+}