Mercurial > repos > recetox > waveica
diff waveica_wrapper.R @ 6:071a424241ec draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
author | recetox |
---|---|
date | Thu, 30 May 2024 14:54:02 +0000 |
parents | e424fa636281 |
children | 1a2aeb8137bf |
line wrap: on
line diff
--- a/waveica_wrapper.R Mon May 20 09:20:13 2024 +0000 +++ b/waveica_wrapper.R Thu May 30 14:54:02 2024 +0000 @@ -1,32 +1,32 @@ read_file <- function(file, metadata, ft_ext, mt_ext, transpose) { - data <- read_data(file, ft_ext) + data <- read_data(file, ft_ext) - if (transpose) { - col_names <- c("sampleName", data[[1]]) - t_data <- data[-1] - t_data <- t(t_data) - data <- data.frame(rownames(t_data), t_data) - colnames(data) <- col_names - } + if (transpose) { + col_names <- c("sampleName", data[[1]]) + t_data <- data[-1] + t_data <- t(t_data) + data <- data.frame(rownames(t_data), t_data) + colnames(data) <- col_names + } - if (!is.na(metadata)) { - mt_data <- read_data(metadata, mt_ext) - data <- merge(mt_data, data, by = "sampleName") - } + if (!is.na(metadata)) { + mt_data <- read_data(metadata, mt_ext) + data <- merge(mt_data, data, by = "sampleName") + } - return(data) + return(data) } read_data <- function(file, ext) { - if (ext == "csv") { - data <- read.csv(file, header = TRUE) - } else if (ext == "tsv") { - data <- read.csv(file, header = TRUE, sep = "\t") - } else { - data <- arrow::read_parquet(file) - } + if (ext == "csv") { + data <- read.csv(file, header = TRUE) + } else if (ext == "tsv") { + data <- read.csv(file, header = TRUE, sep = "\t") + } else { + data <- arrow::read_parquet(file) + } - return(data) + return(data) } waveica <- function(file, @@ -40,48 +40,48 @@ t2, alpha, exclude_blanks) { - # get input from the Galaxy, preprocess data - ext <- strsplit(x = ext, split = "\\,")[[1]] + # get input from the Galaxy, preprocess data + ext <- strsplit(x = ext, split = "\\,")[[1]] - ft_ext <- ext[1] - mt_ext <- ext[2] + ft_ext <- ext[1] + mt_ext <- ext[2] - data <- read_file(file, metadata, ft_ext, mt_ext, transpose) + data <- read_file(file, metadata, ft_ext, mt_ext, transpose) - required_columns <- c( - "sampleName", "class", "sampleType", - "injectionOrder", "batch" - ) - data <- verify_input_dataframe(data, required_columns) + required_columns <- c( + "sampleName", "class", "sampleType", + "injectionOrder", "batch" + ) + data <- verify_input_dataframe(data, required_columns) - data <- sort_by_injection_order(data) + data <- sort_by_injection_order(data) - # separate data into features, batch and group - feature_columns <- colnames(data)[!colnames(data) %in% required_columns] - features <- data[, feature_columns] - group <- enumerate_groups(as.character(data$sampleType)) - batch <- data$batch + # separate data into features, batch and group + feature_columns <- colnames(data)[!colnames(data) %in% required_columns] + features <- data[, feature_columns] + group <- enumerate_groups(as.character(data$sampleType)) + batch <- data$batch - # run WaveICA - features <- recetox.waveica::waveica( - data = features, - wf = get_wf(wavelet_filter, wavelet_length), - batch = batch, - group = group, - K = k, - t = t, - t2 = t2, - alpha = alpha - ) + # run WaveICA + features <- recetox.waveica::waveica( + data = features, + wf = get_wf(wavelet_filter, wavelet_length), + batch = batch, + group = group, + K = k, + t = t, + t2 = t2, + alpha = alpha + ) - data[, feature_columns] <- features + data[, feature_columns] <- features - # remove blanks from dataset - if (exclude_blanks) { - data <- exclude_group(data, group) - } + # remove blanks from dataset + if (exclude_blanks) { + data <- exclude_group(data, group) + } - return(data) + return(data) } waveica_singlebatch <- function(file, @@ -94,150 +94,150 @@ alpha, cutoff, exclude_blanks) { - # get input from the Galaxy, preprocess data - ext <- strsplit(x = ext, split = "\\,")[[1]] + # get input from the Galaxy, preprocess data + ext <- strsplit(x = ext, split = "\\,")[[1]] - ft_ext <- ext[1] - mt_ext <- ext[2] + ft_ext <- ext[1] + mt_ext <- ext[2] - data <- read_file(file, metadata, ft_ext, mt_ext, transpose) + data <- read_file(file, metadata, ft_ext, mt_ext, transpose) - required_columns <- c("sampleName", "class", "sampleType", "injectionOrder") - optional_columns <- c("batch") + required_columns <- c("sampleName", "class", "sampleType", "injectionOrder") + optional_columns <- c("batch") - data <- verify_input_dataframe(data, required_columns) + data <- verify_input_dataframe(data, required_columns) - data <- sort_by_injection_order(data) + data <- sort_by_injection_order(data) - feature_columns <- colnames(data)[!colnames(data) %in% c(required_columns, optional_columns)] - features <- data[, feature_columns] - injection_order <- data$injectionOrder + feature_columns <- colnames(data)[!colnames(data) %in% c(required_columns, optional_columns)] + features <- data[, feature_columns] + injection_order <- data$injectionOrder - # run WaveICA - features <- recetox.waveica::waveica_nonbatchwise( - data = features, - wf = get_wf(wavelet_filter, wavelet_length), - injection_order = injection_order, - K = k, - alpha = alpha, - cutoff = cutoff - ) + # run WaveICA + features <- recetox.waveica::waveica_nonbatchwise( + data = features, + wf = get_wf(wavelet_filter, wavelet_length), + injection_order = injection_order, + K = k, + alpha = alpha, + cutoff = cutoff + ) - data[, feature_columns] <- features - group <- enumerate_groups(as.character(data$sampleType)) - # remove blanks from dataset - if (exclude_blanks) { - data <- exclude_group(data, group) - } + data[, feature_columns] <- features + group <- enumerate_groups(as.character(data$sampleType)) + # remove blanks from dataset + if (exclude_blanks) { + data <- exclude_group(data, group) + } - return(data) + return(data) } sort_by_injection_order <- function(data) { - if ("batch" %in% colnames(data)) { - data <- data[order(data[, "batch"], data[, "injectionOrder"], decreasing = FALSE), ] - } else { - data <- data[order(data[, "injectionOrder"], decreasing = FALSE), ] - } - return(data) + if ("batch" %in% colnames(data)) { + data <- data[order(data[, "batch"], data[, "injectionOrder"], decreasing = FALSE), ] + } else { + data <- data[order(data[, "injectionOrder"], decreasing = FALSE), ] + } + return(data) } verify_input_dataframe <- function(data, required_columns) { - if (anyNA(data)) { - stop("Error: dataframe cannot contain NULL values! + if (anyNA(data)) { + stop("Error: dataframe cannot contain NULL values! Make sure that your dataframe does not contain empty cells") - } else if (!all(required_columns %in% colnames(data))) { - stop( - "Error: missing metadata! + } else if (!all(required_columns %in% colnames(data))) { + stop( + "Error: missing metadata! Make sure that the following columns are present in your dataframe: ", - paste(required_columns, collapse = ", ") - ) - } + paste(required_columns, collapse = ", ") + ) + } - data <- verify_column_types(data, required_columns) + data <- verify_column_types(data, required_columns) - return(data) + return(data) } verify_column_types <- function(data, required_columns) { - # Specify the column names and their expected types - column_types <- list( - "sampleName" = c("character", "factor"), - "class" = c("character", "factor", "integer"), - "sampleType" = c("character", "factor"), - "injectionOrder" = "integer", - "batch" = "integer" - ) + # Specify the column names and their expected types + column_types <- list( + "sampleName" = c("character", "factor"), + "class" = c("character", "factor", "integer"), + "sampleType" = c("character", "factor"), + "injectionOrder" = "integer", + "batch" = "integer" + ) - column_types <- column_types[required_columns] + column_types <- column_types[required_columns] - for (col_name in names(data)) { - actual_type <- class(data[[col_name]]) - if (col_name %in% names(column_types)) { - expected_types <- column_types[[col_name]] + for (col_name in names(data)) { + actual_type <- class(data[[col_name]]) + if (col_name %in% names(column_types)) { + expected_types <- column_types[[col_name]] - if (!actual_type %in% expected_types) { - stop( - "Column ", col_name, " is of type ", actual_type, - " but expected type is ", - paste(expected_types, collapse = " or "), "\n" - ) - } - } else { - if (actual_type != "numeric") { - data[[col_name]] <- as.numeric(as.character(data[[col_name]])) - } + if (!actual_type %in% expected_types) { + stop( + "Column ", col_name, " is of type ", actual_type, + " but expected type is ", + paste(expected_types, collapse = " or "), "\n" + ) + } + } else { + if (actual_type != "numeric") { + data[[col_name]] <- as.numeric(as.character(data[[col_name]])) + } + } } - } - return(data) + return(data) } # Match group labels with [blank/sample/qc] and enumerate them enumerate_groups <- function(group) { - group[grepl("blank", tolower(group))] <- 0 - group[grepl("sample", tolower(group))] <- 1 - group[grepl("qc", tolower(group))] <- 2 + group[grepl("blank", tolower(group))] <- 0 + group[grepl("sample", tolower(group))] <- 1 + group[grepl("qc", tolower(group))] <- 2 - return(group) + return(group) } # Create appropriate input for R wavelets function get_wf <- function(wavelet_filter, wavelet_length) { - wf <- paste(wavelet_filter, wavelet_length, sep = "") + wf <- paste(wavelet_filter, wavelet_length, sep = "") - # exception to the wavelet function - if (wf == "d2") { - wf <- "haar" - } + # exception to the wavelet function + if (wf == "d2") { + wf <- "haar" + } - return(wf) + return(wf) } # Exclude blanks from a dataframe exclude_group <- function(data, group) { - row_idx_to_exclude <- which(group %in% 0) - if (length(row_idx_to_exclude) > 0) { - data_without_blanks <- data[-c(row_idx_to_exclude), ] - cat("Blank samples have been excluded from the dataframe.\n") - return(data_without_blanks) - } else { - return(data) - } + row_idx_to_exclude <- which(group %in% 0) + if (length(row_idx_to_exclude) > 0) { + data_without_blanks <- data[-c(row_idx_to_exclude), ] + cat("Blank samples have been excluded from the dataframe.\n") + return(data_without_blanks) + } else { + return(data) + } } store_data <- function(data, output, ext) { - if (ext == "parquet") { - arrow::write_parquet(data, output) - } else { - write.table(data, - file = output, sep = "\t", - row.names = FALSE, quote = FALSE - ) - } - cat("Normalization has been completed.\n") + if (ext == "parquet") { + arrow::write_parquet(data, output) + } else { + write.table(data, + file = output, sep = "\t", + row.names = FALSE, quote = FALSE + ) + } + cat("Normalization has been completed.\n") }