annotate waveica_wrapper.R @ 10:821062fc5782 draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
author recetox
date Fri, 04 Jul 2025 09:43:22 +0000
parents 6fc9f6dbcef5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
1 # Read data from a file in the specified format (csv, tsv/tabular, or parquet)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
2 read_data <- function(file, ext, transpose = FALSE) {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
3 # Reads a data file based on its extension and returns a data frame.
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
4 if (ext == "csv") {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
5 tryCatch(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
6 {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
7 data <- read.csv(file, header = TRUE)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
8 },
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
9 error = function(e) {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
10 stop(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
11 paste0(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
12 "Failed to read as CSV. The file may not ",
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
13 "be a valid text file or may be corrupted: ",
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
14 file, "\nError: ", e$message
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
15 )
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
16 )
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
17 }
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
18 )
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
19 } else if (ext == "tsv" || ext == "tabular") {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
20 tryCatch(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
21 {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
22 data <- read.csv(file, header = TRUE, sep = "\t")
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
23 },
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
24 error = function(e) {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
25 stop(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
26 paste0(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
27 "Failed to read as TSV/tabular.",
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
28 "The file may not be a valid text file or may be corrupted: ",
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
29 file, "\nError: ", e$message
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
30 )
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
31 )
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
32 }
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
33 )
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
34 } else if (ext == "parquet") {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
35 data <- arrow::read_parquet(file)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
36 } else {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
37 stop(paste("Unsupported file extension or format for reading:", ext))
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
38 }
1
b77023c41c76 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit d82e7dad96bebe9424ac7bf490e2786d82c3681a
recetox
parents: 0
diff changeset
39
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
40 original_first_colname <- colnames(data)[1]
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
41 if (transpose) {
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
42 col_names <- c("sampleName", data[[1]])
7
1a2aeb8137bf planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents: 6
diff changeset
43 data <- tranpose_data(data, col_names)
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
44 }
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
45 return(list(data = data, original_first_colname = original_first_colname))
1
b77023c41c76 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit d82e7dad96bebe9424ac7bf490e2786d82c3681a
recetox
parents: 0
diff changeset
46 }
b77023c41c76 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit d82e7dad96bebe9424ac7bf490e2786d82c3681a
recetox
parents: 0
diff changeset
47
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
48 # Main function for batchwise WaveICA normalization
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
49 waveica <- function(data_matrix_file,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
50 sample_metadata_file,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
51 ft_ext,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
52 mt_ext,
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
53 wavelet_filter,
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
54 wavelet_length,
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
55 k,
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
56 t,
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
57 t2,
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
58 alpha,
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
59 exclude_blanks,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
60 transpose = FALSE) {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
61 # Reads feature and metadata tables, merges them,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
62 # verifies columns, runs WaveICA, and returns normalized data.
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
63 read_features_response <- read_data(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
64 data_matrix_file, ft_ext,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
65 transpose
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
66 )
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
67 features <- read_features_response$data
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
68 original_first_colname <- read_features_response$original_first_colname
1
b77023c41c76 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit d82e7dad96bebe9424ac7bf490e2786d82c3681a
recetox
parents: 0
diff changeset
69
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
70 read_metadata_response <- read_data(sample_metadata_file, mt_ext)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
71 metadata <- read_metadata_response$data
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
72
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
73 required_columns <- c(
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
74 "sampleName", "class", "sampleType",
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
75 "injectionOrder", "batch"
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
76 )
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
77
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
78 metadata <- dplyr::select(metadata, required_columns)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
79
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
80 # Ensure both tables have a sampleName column
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
81 if (!"sampleName" %in% colnames(features) || !"sampleName" %in% colnames(metadata)) { # nolint
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
82 stop("Both feature and metadata tables must contain a 'sampleName' column.")
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
83 }
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
84 data <- merge(metadata, features, by = "sampleName")
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
85
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
86
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
87 data <- verify_input_dataframe(data, required_columns)
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
88
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
89 data <- sort_by_injection_order(data)
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
90
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
91 # Separate features, batch, and group columns
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
92 feature_columns <- colnames(data)[!colnames(data) %in% required_columns]
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
93 features <- data[, feature_columns]
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
94 group <- enumerate_groups(as.character(data$sampleType))
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
95 batch <- data$batch
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
96
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
97 # Check that wavelet level is not too high for the number of samples
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
98 max_level <- floor(log2(nrow(features)))
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
99 requested_level <- as.numeric(wavelet_length)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
100 if (requested_level > max_level) {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
101 stop(sprintf(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
102 paste0(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
103 "Wavelet length/level (%d) is too high for ",
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
104 "the number of samples (%d). Maximum allowed is %d."
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
105 ),
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
106 requested_level, nrow(features), max_level
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
107 ))
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
108 }
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
109 # Run WaveICA normalization
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
110 features <- recetox.waveica::waveica(
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
111 data = features,
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
112 wf = get_wf(wavelet_filter, wavelet_length),
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
113 batch = batch,
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
114 group = group,
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
115 K = k,
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
116 t = t,
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
117 t2 = t2,
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
118 alpha = alpha
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
119 )
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
120 non_feature_columns <- setdiff(colnames(data), feature_columns)
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
121
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
122 # Update the data frame with normalized features
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
123 data[, feature_columns] <- features
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
124
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
125 # Optionally remove blank samples
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
126 if (exclude_blanks) {
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
127 data <- exclude_group(data, group)
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
128 }
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
129 data <- final_data_processing(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
130 data, non_feature_columns,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
131 transpose, original_first_colname
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
132 )
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
133 data
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
134 }
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
135
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
136 # Main function for single-batch WaveICA normalization
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
137 waveica_singlebatch <- function(data_matrix_file,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
138 sample_metadata_file,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
139 ft_ext,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
140 mt_ext,
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
141 wavelet_filter,
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
142 wavelet_length,
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
143 k,
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
144 alpha,
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
145 cutoff,
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
146 exclude_blanks,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
147 transpose = FALSE) {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
148 # Reads feature and metadata tables, merges them,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
149 # verifies columns, runs WaveICA (single batch), and returns normalized data.
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
150 read_features_response <- read_data(data_matrix_file, ft_ext, transpose)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
151 features <- read_features_response$data
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
152 original_first_colname <- read_features_response$original_first_colname
1
b77023c41c76 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit d82e7dad96bebe9424ac7bf490e2786d82c3681a
recetox
parents: 0
diff changeset
153
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
154 read_data_response <- read_data(sample_metadata_file, mt_ext)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
155 metadata <- read_data_response$data
1
b77023c41c76 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit d82e7dad96bebe9424ac7bf490e2786d82c3681a
recetox
parents: 0
diff changeset
156
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
157 # Ensure both tables have a sampleName column
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
158 if (!"sampleName" %in% colnames(features) ||
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
159 !"sampleName" %in% colnames(metadata)) { # nolint
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
160 stop("Both feature and metadata tables must contain a 'sampleName' column.")
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
161 }
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
162 data <- merge(metadata, features, by = "sampleName")
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
163
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
164 required_columns <- c("sampleName", "class", "sampleType", "injectionOrder")
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
165 optional_columns <- c("batch")
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
166 data <- verify_input_dataframe(data, required_columns)
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
167
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
168 data <- sort_by_injection_order(data)
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
169
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
170 feature_columns <- colnames(data)[
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
171 !colnames(data) %in% c(required_columns, optional_columns)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
172 ]
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
173 features <- data[, feature_columns]
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
174 injection_order <- data$injectionOrder
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
175
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
176 # Run WaveICA normalization (single batch)
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
177 features <- recetox.waveica::waveica_nonbatchwise(
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
178 data = features,
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
179 wf = get_wf(wavelet_filter, wavelet_length),
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
180 injection_order = injection_order,
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
181 K = k,
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
182 alpha = alpha,
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
183 cutoff = cutoff
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
184 )
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
185 non_feature_columns <- setdiff(colnames(data), feature_columns)
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
186
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
187 # Update the data frame with normalized features
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
188 data[, feature_columns] <- features
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
189 group <- enumerate_groups(as.character(data$sampleType))
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
190 # Optionally remove blank samples
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
191 if (exclude_blanks) {
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
192 data <- exclude_group(data, group)
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
193 }
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
194 data <- final_data_processing(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
195 data, non_feature_columns,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
196 transpose, original_first_colname
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
197 )
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
198 data
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
199 }
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
200
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
201 # Sorts the data frame by batch and injection order (if batch exists),
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
202 # otherwise by injection order only
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
203 sort_by_injection_order <- function(data) {
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
204 if ("batch" %in% colnames(data)) {
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
205 data <- data[
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
206 order(data[, "batch"],
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
207 data[, "injectionOrder"],
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
208 decreasing = FALSE
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
209 ),
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
210 ]
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
211 } else {
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
212 data <- data[order(data[, "injectionOrder"], decreasing = FALSE), ]
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
213 }
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
214 data
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
215 }
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
216
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
217 # Verifies that required columns exist and that there are no missing values
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
218 verify_input_dataframe <- function(data, required_columns) {
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
219 if (anyNA(data)) {
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
220 stop("Error: dataframe cannot contain NULL values!
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
221 \nMake sure that your dataframe does not contain empty cells")
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
222 } else if (!all(required_columns %in% colnames(data))) {
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
223 stop(
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
224 "Error: missing metadata!
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
225 \nMake sure that the following columns are present in your dataframe: ",
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
226 paste(required_columns, collapse = ", ")
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
227 )
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
228 }
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
229 data <- verify_column_types(data, required_columns)
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
230 data
2
6480c6d5fa36 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 77bea6c84c5a932fa203723d4d5fea6147f06505
recetox
parents: 1
diff changeset
231 }
6480c6d5fa36 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 77bea6c84c5a932fa203723d4d5fea6147f06505
recetox
parents: 1
diff changeset
232
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
233 # Checks column types for required and feature columns
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
234 # and removes problematic feature columns
2
6480c6d5fa36 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 77bea6c84c5a932fa203723d4d5fea6147f06505
recetox
parents: 1
diff changeset
235 verify_column_types <- function(data, required_columns) {
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
236 # Checks that required columns have the correct type
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
237 # and removes non-numeric feature columns efficiently.
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
238 column_types <- list(
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
239 "sampleName" = c("character", "factor"),
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
240 "class" = c("character", "factor", "integer"),
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
241 "sampleType" = c("character", "factor"),
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
242 "injectionOrder" = "integer",
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
243 "batch" = "integer"
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
244 )
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
245 column_types <- column_types[required_columns]
2
6480c6d5fa36 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 77bea6c84c5a932fa203723d4d5fea6147f06505
recetox
parents: 1
diff changeset
246
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
247 # Check required columns' types (fast, vectorized)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
248 for (col_name in names(column_types)) {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
249 if (!col_name %in% names(data)) next
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
250 expected_types <- column_types[[col_name]]
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
251 actual_type <- class(data[[col_name]])
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
252 if (!actual_type %in% expected_types) {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
253 stop(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
254 "Column ", col_name, " is of type ", actual_type,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
255 " but expected type is ",
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
256 paste(expected_types, collapse = " or "), "\n"
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
257 )
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
258 }
2
6480c6d5fa36 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 77bea6c84c5a932fa203723d4d5fea6147f06505
recetox
parents: 1
diff changeset
259 }
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
260
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
261 # Identify feature columns (not required columns)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
262 feature_cols <- setdiff(names(data), required_columns)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
263 # Try to convert all feature columns to numeric in one go
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
264 # as well as suppressing warnings
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
265 data[feature_cols] <- suppressWarnings(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
266 lapply(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
267 data[feature_cols],
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
268 function(x) as.numeric(as.character(x))
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
269 )
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
270 )
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
271 # Find columns that are problematic (contain any NA after conversion)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
272 na_counts <- vapply(data[feature_cols], function(x) any(is.na(x)), logical(1))
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
273 removed_columns <- names(na_counts)[na_counts]
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
274 if (length(removed_columns) > 0) {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
275 message(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
276 "Removed problematic columns (non-numeric): ",
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
277 paste(removed_columns, collapse = ", ")
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
278 )
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
279 }
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
280
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
281 # Keep only good columns
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
282 keep_cols <- !(names(data) %in% removed_columns)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
283 data <- data[, keep_cols, drop = FALSE]
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
284 data
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
285 }
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
286
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
287 # Enumerates group labels: blank=0, sample=1, qc=2, standard=3
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
288 enumerate_groups <- function(group) {
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
289 group[grepl("blank", tolower(group))] <- 0
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
290 group[grepl("sample", tolower(group))] <- 1
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
291 group[grepl("qc", tolower(group))] <- 2
8
bf32ae95a06f planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 44e9371974b176490222f96d532df2421571cbaa
recetox
parents: 7
diff changeset
292 group[grepl("standard", tolower(group))] <- 3
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
293 group
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
294 }
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
295
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
296 # Returns the correct wavelet filter string for the R wavelets function
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
297 get_wf <- function(wavelet_filter, wavelet_length) {
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
298 wf <- paste(wavelet_filter, wavelet_length, sep = "")
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
299 # Exception for Daubechies-2
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
300 if (wf == "d2") {
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
301 wf <- "haar"
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
302 }
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
303 wf
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
304 }
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
305
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
306 # Removes blank samples (group==0) from the data frame
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
307 exclude_group <- function(data, group) {
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
308 row_idx_to_exclude <- which(group %in% 0)
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
309 if (length(row_idx_to_exclude) > 0) {
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
310 data_without_blanks <- data[-c(row_idx_to_exclude), ]
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
311 cat("Blank samples have been excluded from the dataframe.\n")
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
312 data_without_blanks
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
313 } else {
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
314 data
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
315 }
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
316 }
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
317
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
318 # Stores the output data in the requested format (csv, tsv/tabular, parquet),
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
319 # optionally splitting metadata and features
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
320 store_data <- function(data, feature_output, ext) {
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
321 if (ext == "parquet") {
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
322 arrow::write_parquet(data, feature_output)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
323 } else if (ext == "csv") {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
324 write.csv(data, file = feature_output, row.names = FALSE, quote = FALSE)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
325 } else if (ext == "tsv" || ext == "tabular") {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
326 write.table(data,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
327 file = feature_output, sep = "\t",
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
328 row.names = FALSE, quote = FALSE
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
329 )
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
330 } else {
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
331 stop(paste("Unsupported file extension:", ext))
6
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
332 }
071a424241ec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents: 5
diff changeset
333 cat("Normalization has been completed.\n")
0
328710890963 "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff changeset
334 }
7
1a2aeb8137bf planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents: 6
diff changeset
335
1a2aeb8137bf planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents: 6
diff changeset
336 tranpose_data <- function(data, column_names) {
1a2aeb8137bf planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents: 6
diff changeset
337 t_data <- data[-1]
1a2aeb8137bf planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents: 6
diff changeset
338 t_data <- t(t_data)
1a2aeb8137bf planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents: 6
diff changeset
339 tranposed_data <- data.frame(rownames(t_data), t_data)
1a2aeb8137bf planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents: 6
diff changeset
340 colnames(tranposed_data) <- column_names
1a2aeb8137bf planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents: 6
diff changeset
341
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
342 tranposed_data
7
1a2aeb8137bf planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents: 6
diff changeset
343 }
10
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
344
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
345
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
346 final_data_processing <- function(
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
347 data, non_feature_columns,
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
348 transpose, original_first_colname) {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
349 # Remove all columns that are in non_
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
350 # feature_columns, except the first column
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
351 cols_to_keep <- !(colnames(data) %in% non_feature_columns)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
352 cols_to_keep[1] <- TRUE # Always keep the first column
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
353 data <- data[, cols_to_keep, drop = FALSE]
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
354
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
355
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
356 if (transpose) {
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
357 # The first column becomes the new column names
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
358 new_colnames <- as.character(data[[1]])
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
359 # Remove the first column
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
360 data <- data[, -1, drop = FALSE]
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
361 # Transpose the rest
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
362 data <- t(data)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
363 # Convert to data frame
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
364 data <- as.data.frame(data, stringsAsFactors = FALSE)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
365 # The first row becomes the first column
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
366 first_col <- rownames(data)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
367 data <- cbind(first_col, data)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
368 # Set column names
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
369 colnames(data) <- c(colnames(data)[1], new_colnames)
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
370 rownames(data) <- NULL
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
371 }
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
372 colnames(data)[1] <- original_first_colname
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
373 data
821062fc5782 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents: 9
diff changeset
374 }