Mercurial > repos > recetox > waveica
diff waveica_wrapper.R @ 7:1a2aeb8137bf draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
author | recetox |
---|---|
date | Thu, 06 Jun 2024 12:25:05 +0000 |
parents | 071a424241ec |
children | bf32ae95a06f |
line wrap: on
line diff
--- a/waveica_wrapper.R Thu May 30 14:54:02 2024 +0000 +++ b/waveica_wrapper.R Thu Jun 06 12:25:05 2024 +0000 @@ -3,10 +3,7 @@ if (transpose) { col_names <- c("sampleName", data[[1]]) - t_data <- data[-1] - t_data <- t(t_data) - data <- data.frame(rownames(t_data), t_data) - colnames(data) <- col_names + data <- tranpose_data(data, col_names) } if (!is.na(metadata)) { @@ -133,7 +130,6 @@ return(data) } - sort_by_injection_order <- function(data) { if ("batch" %in% colnames(data)) { data <- data[order(data[, "batch"], data[, "injectionOrder"], decreasing = FALSE), ] @@ -143,7 +139,6 @@ return(data) } - verify_input_dataframe <- function(data, required_columns) { if (anyNA(data)) { stop("Error: dataframe cannot contain NULL values! @@ -194,7 +189,6 @@ return(data) } - # Match group labels with [blank/sample/qc] and enumerate them enumerate_groups <- function(group) { group[grepl("blank", tolower(group))] <- 0 @@ -204,7 +198,6 @@ return(group) } - # Create appropriate input for R wavelets function get_wf <- function(wavelet_filter, wavelet_length) { wf <- paste(wavelet_filter, wavelet_length, sep = "") @@ -217,7 +210,6 @@ return(wf) } - # Exclude blanks from a dataframe exclude_group <- function(data, group) { row_idx_to_exclude <- which(group %in% 0) @@ -230,14 +222,62 @@ } } -store_data <- function(data, output, ext) { +store_data <- function(data, feature_output, metadata_output, ext, split_output = FALSE) { if (ext == "parquet") { - arrow::write_parquet(data, output) + if (split_output == TRUE) { + split_df <- split_output(data) + arrow::write_parquet(split_df$metadata, metadata_output) + arrow::write_parquet(split_df$feature_table, feature_output) + } else { + arrow::write_parquet(data, feature_output) + } } else { - write.table(data, - file = output, sep = "\t", - row.names = FALSE, quote = FALSE - ) + if (split_output == TRUE) { + split_df <- split_output(data) + write.table(split_df$metadata, + file = metadata_output, sep = "\t", + row.names = FALSE, quote = FALSE + ) + write.table(split_df$feature_table, + file = feature_output, sep = "\t", + row.names = FALSE, quote = FALSE + ) + } else { + write.table(data, + file = feature_output, sep = "\t", + row.names = FALSE, quote = FALSE + ) + } } cat("Normalization has been completed.\n") } + +split_output <- function(df) { + required_columns_set1 <- c("sampleName", "class", "sampleType", "injectionOrder", "batch") + required_columns_set2 <- c("sampleName", "class", "sampleType", "injectionOrder") + + if (all(required_columns_set1 %in% colnames(df))) { + metadata_df <- df[, required_columns_set1, drop = FALSE] + df <- df[, -c(2:5)] + } else if (all(required_columns_set2 %in% colnames(df))) { + metadata_df <- df[, required_columns_set2, drop = FALSE] + df <- df[, -c(2:4)] + } else { + stop("Neither set of required columns is present in the dataframe.") + } + + # Transpose the feature table + col_names <- c("id", as.vector(df[[1]])) + feature_table <- tranpose_data(df, col_names) + + return(list(metadata = metadata_df, feature_table = feature_table)) +} + +tranpose_data <- function(data, column_names) { + t_data <- data[-1] + t_data <- t(t_data) + tranposed_data <- data.frame(rownames(t_data), t_data) + colnames(tranposed_data) <- column_names + + return(tranposed_data) +}