diff waveica_wrapper.R @ 6:071a424241ec draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
author recetox
date Thu, 30 May 2024 14:54:02 +0000
parents e424fa636281
children 1a2aeb8137bf
line wrap: on
line diff
--- a/waveica_wrapper.R	Mon May 20 09:20:13 2024 +0000
+++ b/waveica_wrapper.R	Thu May 30 14:54:02 2024 +0000
@@ -1,32 +1,32 @@
 read_file <- function(file, metadata, ft_ext, mt_ext, transpose) {
-  data <- read_data(file, ft_ext)
+    data <- read_data(file, ft_ext)
 
-  if (transpose) {
-    col_names <- c("sampleName", data[[1]])
-    t_data <- data[-1]
-    t_data <- t(t_data)
-    data <- data.frame(rownames(t_data), t_data)
-    colnames(data) <- col_names
-  }
+    if (transpose) {
+        col_names <- c("sampleName", data[[1]])
+        t_data <- data[-1]
+        t_data <- t(t_data)
+        data <- data.frame(rownames(t_data), t_data)
+        colnames(data) <- col_names
+    }
 
-  if (!is.na(metadata)) {
-    mt_data <- read_data(metadata, mt_ext)
-    data <- merge(mt_data, data, by = "sampleName")
-  }
+    if (!is.na(metadata)) {
+        mt_data <- read_data(metadata, mt_ext)
+        data <- merge(mt_data, data, by = "sampleName")
+    }
 
-  return(data)
+    return(data)
 }
 
 read_data <- function(file, ext) {
-  if (ext == "csv") {
-    data <- read.csv(file, header = TRUE)
-  } else if (ext == "tsv") {
-    data <- read.csv(file, header = TRUE, sep = "\t")
-  } else {
-    data <- arrow::read_parquet(file)
-  }
+    if (ext == "csv") {
+        data <- read.csv(file, header = TRUE)
+    } else if (ext == "tsv") {
+        data <- read.csv(file, header = TRUE, sep = "\t")
+    } else {
+        data <- arrow::read_parquet(file)
+    }
 
-  return(data)
+    return(data)
 }
 
 waveica <- function(file,
@@ -40,48 +40,48 @@
                     t2,
                     alpha,
                     exclude_blanks) {
-  # get input from the Galaxy, preprocess data
-  ext <- strsplit(x = ext, split = "\\,")[[1]]
+    # get input from the Galaxy, preprocess data
+    ext <- strsplit(x = ext, split = "\\,")[[1]]
 
-  ft_ext <- ext[1]
-  mt_ext <- ext[2]
+    ft_ext <- ext[1]
+    mt_ext <- ext[2]
 
-  data <- read_file(file, metadata, ft_ext, mt_ext, transpose)
+    data <- read_file(file, metadata, ft_ext, mt_ext, transpose)
 
-  required_columns <- c(
-    "sampleName", "class", "sampleType",
-    "injectionOrder", "batch"
-  )
-  data <- verify_input_dataframe(data, required_columns)
+    required_columns <- c(
+        "sampleName", "class", "sampleType",
+        "injectionOrder", "batch"
+    )
+    data <- verify_input_dataframe(data, required_columns)
 
-  data <- sort_by_injection_order(data)
+    data <- sort_by_injection_order(data)
 
-  # separate data into features, batch and group
-  feature_columns <- colnames(data)[!colnames(data) %in% required_columns]
-  features <- data[, feature_columns]
-  group <- enumerate_groups(as.character(data$sampleType))
-  batch <- data$batch
+    # separate data into features, batch and group
+    feature_columns <- colnames(data)[!colnames(data) %in% required_columns]
+    features <- data[, feature_columns]
+    group <- enumerate_groups(as.character(data$sampleType))
+    batch <- data$batch
 
-  # run WaveICA
-  features <- recetox.waveica::waveica(
-    data = features,
-    wf = get_wf(wavelet_filter, wavelet_length),
-    batch = batch,
-    group = group,
-    K = k,
-    t = t,
-    t2 = t2,
-    alpha = alpha
-  )
+    # run WaveICA
+    features <- recetox.waveica::waveica(
+        data = features,
+        wf = get_wf(wavelet_filter, wavelet_length),
+        batch = batch,
+        group = group,
+        K = k,
+        t = t,
+        t2 = t2,
+        alpha = alpha
+    )
 
-  data[, feature_columns] <- features
+    data[, feature_columns] <- features
 
-  # remove blanks from dataset
-  if (exclude_blanks) {
-    data <- exclude_group(data, group)
-  }
+    # remove blanks from dataset
+    if (exclude_blanks) {
+        data <- exclude_group(data, group)
+    }
 
-  return(data)
+    return(data)
 }
 
 waveica_singlebatch <- function(file,
@@ -94,150 +94,150 @@
                                 alpha,
                                 cutoff,
                                 exclude_blanks) {
-  # get input from the Galaxy, preprocess data
-  ext <- strsplit(x = ext, split = "\\,")[[1]]
+    # get input from the Galaxy, preprocess data
+    ext <- strsplit(x = ext, split = "\\,")[[1]]
 
-  ft_ext <- ext[1]
-  mt_ext <- ext[2]
+    ft_ext <- ext[1]
+    mt_ext <- ext[2]
 
-  data <- read_file(file, metadata, ft_ext, mt_ext, transpose)
+    data <- read_file(file, metadata, ft_ext, mt_ext, transpose)
 
-  required_columns <- c("sampleName", "class", "sampleType", "injectionOrder")
-  optional_columns <- c("batch")
+    required_columns <- c("sampleName", "class", "sampleType", "injectionOrder")
+    optional_columns <- c("batch")
 
-  data <- verify_input_dataframe(data, required_columns)
+    data <- verify_input_dataframe(data, required_columns)
 
-  data <- sort_by_injection_order(data)
+    data <- sort_by_injection_order(data)
 
-  feature_columns <- colnames(data)[!colnames(data) %in% c(required_columns, optional_columns)]
-  features <- data[, feature_columns]
-  injection_order <- data$injectionOrder
+    feature_columns <- colnames(data)[!colnames(data) %in% c(required_columns, optional_columns)]
+    features <- data[, feature_columns]
+    injection_order <- data$injectionOrder
 
-  # run WaveICA
-  features <- recetox.waveica::waveica_nonbatchwise(
-    data = features,
-    wf = get_wf(wavelet_filter, wavelet_length),
-    injection_order = injection_order,
-    K = k,
-    alpha = alpha,
-    cutoff = cutoff
-  )
+    # run WaveICA
+    features <- recetox.waveica::waveica_nonbatchwise(
+        data = features,
+        wf = get_wf(wavelet_filter, wavelet_length),
+        injection_order = injection_order,
+        K = k,
+        alpha = alpha,
+        cutoff = cutoff
+    )
 
-  data[, feature_columns] <- features
-  group <- enumerate_groups(as.character(data$sampleType))
-  # remove blanks from dataset
-  if (exclude_blanks) {
-    data <- exclude_group(data, group)
-  }
+    data[, feature_columns] <- features
+    group <- enumerate_groups(as.character(data$sampleType))
+    # remove blanks from dataset
+    if (exclude_blanks) {
+        data <- exclude_group(data, group)
+    }
 
-  return(data)
+    return(data)
 }
 
 
 sort_by_injection_order <- function(data) {
-  if ("batch" %in% colnames(data)) {
-    data <- data[order(data[, "batch"], data[, "injectionOrder"], decreasing = FALSE), ]
-  } else {
-    data <- data[order(data[, "injectionOrder"], decreasing = FALSE), ]
-  }
-  return(data)
+    if ("batch" %in% colnames(data)) {
+        data <- data[order(data[, "batch"], data[, "injectionOrder"], decreasing = FALSE), ]
+    } else {
+        data <- data[order(data[, "injectionOrder"], decreasing = FALSE), ]
+    }
+    return(data)
 }
 
 
 verify_input_dataframe <- function(data, required_columns) {
-  if (anyNA(data)) {
-    stop("Error: dataframe cannot contain NULL values!
+    if (anyNA(data)) {
+        stop("Error: dataframe cannot contain NULL values!
 Make sure that your dataframe does not contain empty cells")
-  } else if (!all(required_columns %in% colnames(data))) {
-    stop(
-      "Error: missing metadata!
+    } else if (!all(required_columns %in% colnames(data))) {
+        stop(
+            "Error: missing metadata!
 Make sure that the following columns are present in your dataframe: ",
-      paste(required_columns, collapse = ", ")
-    )
-  }
+            paste(required_columns, collapse = ", ")
+        )
+    }
 
-  data <- verify_column_types(data, required_columns)
+    data <- verify_column_types(data, required_columns)
 
-  return(data)
+    return(data)
 }
 
 verify_column_types <- function(data, required_columns) {
-  # Specify the column names and their expected types
-  column_types <- list(
-    "sampleName" = c("character", "factor"),
-    "class" = c("character", "factor", "integer"),
-    "sampleType" = c("character", "factor"),
-    "injectionOrder" = "integer",
-    "batch" = "integer"
-  )
+    # Specify the column names and their expected types
+    column_types <- list(
+        "sampleName" = c("character", "factor"),
+        "class" = c("character", "factor", "integer"),
+        "sampleType" = c("character", "factor"),
+        "injectionOrder" = "integer",
+        "batch" = "integer"
+    )
 
-  column_types <- column_types[required_columns]
+    column_types <- column_types[required_columns]
 
-  for (col_name in names(data)) {
-    actual_type <- class(data[[col_name]])
-    if (col_name %in% names(column_types)) {
-      expected_types <- column_types[[col_name]]
+    for (col_name in names(data)) {
+        actual_type <- class(data[[col_name]])
+        if (col_name %in% names(column_types)) {
+            expected_types <- column_types[[col_name]]
 
-      if (!actual_type %in% expected_types) {
-        stop(
-          "Column ", col_name, " is of type ", actual_type,
-          " but expected type is ",
-          paste(expected_types, collapse = " or "), "\n"
-        )
-      }
-    } else {
-      if (actual_type != "numeric") {
-        data[[col_name]] <- as.numeric(as.character(data[[col_name]]))
-      }
+            if (!actual_type %in% expected_types) {
+                stop(
+                    "Column ", col_name, " is of type ", actual_type,
+                    " but expected type is ",
+                    paste(expected_types, collapse = " or "), "\n"
+                )
+            }
+        } else {
+            if (actual_type != "numeric") {
+                data[[col_name]] <- as.numeric(as.character(data[[col_name]]))
+            }
+        }
     }
-  }
-  return(data)
+    return(data)
 }
 
 
 # Match group labels with [blank/sample/qc] and enumerate them
 enumerate_groups <- function(group) {
-  group[grepl("blank", tolower(group))] <- 0
-  group[grepl("sample", tolower(group))] <- 1
-  group[grepl("qc", tolower(group))] <- 2
+    group[grepl("blank", tolower(group))] <- 0
+    group[grepl("sample", tolower(group))] <- 1
+    group[grepl("qc", tolower(group))] <- 2
 
-  return(group)
+    return(group)
 }
 
 
 # Create appropriate input for R wavelets function
 get_wf <- function(wavelet_filter, wavelet_length) {
-  wf <- paste(wavelet_filter, wavelet_length, sep = "")
+    wf <- paste(wavelet_filter, wavelet_length, sep = "")
 
-  # exception to the wavelet function
-  if (wf == "d2") {
-    wf <- "haar"
-  }
+    # exception to the wavelet function
+    if (wf == "d2") {
+        wf <- "haar"
+    }
 
-  return(wf)
+    return(wf)
 }
 
 
 # Exclude blanks from a dataframe
 exclude_group <- function(data, group) {
-  row_idx_to_exclude <- which(group %in% 0)
-  if (length(row_idx_to_exclude) > 0) {
-    data_without_blanks <- data[-c(row_idx_to_exclude), ]
-    cat("Blank samples have been excluded from the dataframe.\n")
-    return(data_without_blanks)
-  } else {
-    return(data)
-  }
+    row_idx_to_exclude <- which(group %in% 0)
+    if (length(row_idx_to_exclude) > 0) {
+        data_without_blanks <- data[-c(row_idx_to_exclude), ]
+        cat("Blank samples have been excluded from the dataframe.\n")
+        return(data_without_blanks)
+    } else {
+        return(data)
+    }
 }
 
 store_data <- function(data, output, ext) {
-  if (ext == "parquet") {
-    arrow::write_parquet(data, output)
-  } else {
-    write.table(data,
-      file = output, sep = "\t",
-      row.names = FALSE, quote = FALSE
-    )
-  }
-  cat("Normalization has been completed.\n")
+    if (ext == "parquet") {
+        arrow::write_parquet(data, output)
+    } else {
+        write.table(data,
+            file = output, sep = "\t",
+            row.names = FALSE, quote = FALSE
+        )
+    }
+    cat("Normalization has been completed.\n")
 }