diff waveica_wrapper.R @ 2:6480c6d5fa36 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 77bea6c84c5a932fa203723d4d5fea6147f06505
author recetox
date Fri, 23 Jun 2023 07:27:29 +0000
parents b77023c41c76
children dbbedb14b44c
line wrap: on
line diff
--- a/waveica_wrapper.R	Thu Sep 29 15:21:04 2022 +0000
+++ b/waveica_wrapper.R	Fri Jun 23 07:27:29 2023 +0000
@@ -1,6 +1,14 @@
-read_file <- function(file, metadata, ft_ext, mt_ext) {
+read_file <- function(file, metadata, ft_ext, mt_ext, transpose) {
   data <- read_data(file, ft_ext)
 
+  if (transpose) {
+    col_names <- c("sampleName", data[[1]])
+    t_data <- data[-1]
+    t_data <- t(t_data)
+    data <- data.frame(rownames(t_data), t_data)
+    colnames(data) <- col_names
+  }
+
   if (!is.na(metadata)) {
     mt_data <- read_data(metadata, mt_ext)
     data <- merge(mt_data, data, by = "sampleName")
@@ -24,6 +32,7 @@
 waveica <- function(file,
                     metadata = NA,
                     ext,
+                    transpose = FALSE,
                     wavelet_filter,
                     wavelet_length,
                     k,
@@ -31,17 +40,19 @@
                     t2,
                     alpha,
                     exclude_blanks) {
-
   # get input from the Galaxy, preprocess data
   ext <- strsplit(x = ext, split = "\\,")[[1]]
 
   ft_ext <- ext[1]
   mt_ext <- ext[2]
 
-  data <- read_file(file, metadata, ft_ext, mt_ext)
+  data <- read_file(file, metadata, ft_ext, mt_ext, transpose)
 
-  required_columns <- c("sampleName", "class", "sampleType", "injectionOrder", "batch")
-  verify_input_dataframe(data, required_columns)
+  required_columns <- c(
+    "sampleName", "class", "sampleType",
+    "injectionOrder", "batch"
+  )
+  data <- verify_input_dataframe(data, required_columns)
 
   data <- sort_by_injection_order(data)
 
@@ -76,28 +87,30 @@
 waveica_singlebatch <- function(file,
                                 metadata = NA,
                                 ext,
+                                transpose = FALSE,
                                 wavelet_filter,
                                 wavelet_length,
                                 k,
                                 alpha,
                                 cutoff,
                                 exclude_blanks) {
-
   # get input from the Galaxy, preprocess data
   ext <- strsplit(x = ext, split = "\\,")[[1]]
 
   ft_ext <- ext[1]
   mt_ext <- ext[2]
 
-  data <- read_file(file, metadata, ft_ext, mt_ext)
+  data <- read_file(file, metadata, ft_ext, mt_ext, transpose)
 
   required_columns <- c("sampleName", "class", "sampleType", "injectionOrder")
   optional_columns <- c("batch")
-  verify_input_dataframe(data, required_columns)
+
+  data <- verify_input_dataframe(data, required_columns)
 
   data <- sort_by_injection_order(data)
 
-  feature_columns <- colnames(data)[!colnames(data) %in% c(required_columns, optional_columns)]
+  feature_columns <- colnames(data)[!colnames(data) %in%
+    c(required_columns, optional_columns)]
   features <- data[, feature_columns]
   injection_order <- data$injectionOrder
 
@@ -112,7 +125,7 @@
   )
 
   data[, feature_columns] <- features
-
+  group <- enumerate_groups(as.character(data$sampleType))
   # remove blanks from dataset
   if (exclude_blanks) {
     data <- exclude_group(data, group)
@@ -142,9 +155,49 @@
     stop("Error: dataframe cannot contain NULL values!
 Make sure that your dataframe does not contain empty cells")
   } else if (!all(required_columns %in% colnames(data))) {
-    stop("Error: missing metadata!
-Make sure that the following columns are present in your dataframe: ", paste(required_columns, collapse = ", "))
+    stop(
+      "Error: missing metadata!
+Make sure that the following columns are present in your dataframe: ",
+      paste(required_columns, collapse = ", ")
+    )
   }
+
+  data <- verify_column_types(data, required_columns)
+
+  return(data)
+}
+
+verify_column_types <- function(data, required_columns) {
+  # Specify the column names and their expected types
+  column_types <- list(
+    "sampleName" = c("character", "factor"),
+    "class" = c("character", "factor"),
+    "sampleType" = c("character", "factor"),
+    "injectionOrder" = "integer",
+    "batch" = "integer"
+  )
+
+  column_types <- column_types[required_columns]
+
+  for (col_name in names(data)) {
+    actual_type <- class(data[[col_name]])
+    if (col_name %in% names(column_types)) {
+      expected_types <- column_types[[col_name]]
+
+      if (!actual_type %in% expected_types) {
+        stop(
+          "Column ", col_name, " is of type ", actual_type,
+          " but expected type is ",
+          paste(expected_types, collapse = " or "), "\n"
+        )
+      }
+    } else {
+      if (actual_type != "numeric") {
+        data[[col_name]] <- as.numeric(as.character(data[[col_name]]))
+      }
+    }
+  }
+  return(data)
 }
 
 
@@ -187,7 +240,10 @@
   if (ext == "csv") {
     write.csv(data, file = output, row.names = FALSE, quote = FALSE)
   } else if (ext == "tsv") {
-    write.table(data, file = output, sep = "\t", row.names = FALSE, quote = FALSE)
+    write.table(data,
+      file = output, sep = "\t",
+      row.names = FALSE, quote = FALSE
+    )
   } else {
     arrow::write_parquet(data, sink = output)
   }