Mercurial > repos > recetox > waveica
annotate waveica_wrapper.R @ 10:821062fc5782 draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
author | recetox |
---|---|
date | Fri, 04 Jul 2025 09:43:22 +0000 |
parents | 6fc9f6dbcef5 |
children |
rev | line source |
---|---|
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
1 # Read data from a file in the specified format (csv, tsv/tabular, or parquet) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
2 read_data <- function(file, ext, transpose = FALSE) { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
3 # Reads a data file based on its extension and returns a data frame. |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
4 if (ext == "csv") { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
5 tryCatch( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
6 { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
7 data <- read.csv(file, header = TRUE) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
8 }, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
9 error = function(e) { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
10 stop( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
11 paste0( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
12 "Failed to read as CSV. The file may not ", |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
13 "be a valid text file or may be corrupted: ", |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
14 file, "\nError: ", e$message |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
15 ) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
16 ) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
17 } |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
18 ) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
19 } else if (ext == "tsv" || ext == "tabular") { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
20 tryCatch( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
21 { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
22 data <- read.csv(file, header = TRUE, sep = "\t") |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
23 }, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
24 error = function(e) { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
25 stop( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
26 paste0( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
27 "Failed to read as TSV/tabular.", |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
28 "The file may not be a valid text file or may be corrupted: ", |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
29 file, "\nError: ", e$message |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
30 ) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
31 ) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
32 } |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
33 ) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
34 } else if (ext == "parquet") { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
35 data <- arrow::read_parquet(file) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
36 } else { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
37 stop(paste("Unsupported file extension or format for reading:", ext)) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
38 } |
1
b77023c41c76
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit d82e7dad96bebe9424ac7bf490e2786d82c3681a
recetox
parents:
0
diff
changeset
|
39 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
40 original_first_colname <- colnames(data)[1] |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
41 if (transpose) { |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
42 col_names <- c("sampleName", data[[1]]) |
7
1a2aeb8137bf
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents:
6
diff
changeset
|
43 data <- tranpose_data(data, col_names) |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
44 } |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
45 return(list(data = data, original_first_colname = original_first_colname)) |
1
b77023c41c76
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit d82e7dad96bebe9424ac7bf490e2786d82c3681a
recetox
parents:
0
diff
changeset
|
46 } |
b77023c41c76
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit d82e7dad96bebe9424ac7bf490e2786d82c3681a
recetox
parents:
0
diff
changeset
|
47 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
48 # Main function for batchwise WaveICA normalization |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
49 waveica <- function(data_matrix_file, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
50 sample_metadata_file, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
51 ft_ext, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
52 mt_ext, |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
53 wavelet_filter, |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
54 wavelet_length, |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
55 k, |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
56 t, |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
57 t2, |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
58 alpha, |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
59 exclude_blanks, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
60 transpose = FALSE) { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
61 # Reads feature and metadata tables, merges them, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
62 # verifies columns, runs WaveICA, and returns normalized data. |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
63 read_features_response <- read_data( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
64 data_matrix_file, ft_ext, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
65 transpose |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
66 ) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
67 features <- read_features_response$data |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
68 original_first_colname <- read_features_response$original_first_colname |
1
b77023c41c76
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit d82e7dad96bebe9424ac7bf490e2786d82c3681a
recetox
parents:
0
diff
changeset
|
69 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
70 read_metadata_response <- read_data(sample_metadata_file, mt_ext) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
71 metadata <- read_metadata_response$data |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
72 |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
73 required_columns <- c( |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
74 "sampleName", "class", "sampleType", |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
75 "injectionOrder", "batch" |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
76 ) |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
77 |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
78 metadata <- dplyr::select(metadata, required_columns) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
79 |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
80 # Ensure both tables have a sampleName column |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
81 if (!"sampleName" %in% colnames(features) || !"sampleName" %in% colnames(metadata)) { # nolint |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
82 stop("Both feature and metadata tables must contain a 'sampleName' column.") |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
83 } |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
84 data <- merge(metadata, features, by = "sampleName") |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
85 |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
86 |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
87 data <- verify_input_dataframe(data, required_columns) |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
88 |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
89 data <- sort_by_injection_order(data) |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
90 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
91 # Separate features, batch, and group columns |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
92 feature_columns <- colnames(data)[!colnames(data) %in% required_columns] |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
93 features <- data[, feature_columns] |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
94 group <- enumerate_groups(as.character(data$sampleType)) |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
95 batch <- data$batch |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
96 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
97 # Check that wavelet level is not too high for the number of samples |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
98 max_level <- floor(log2(nrow(features))) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
99 requested_level <- as.numeric(wavelet_length) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
100 if (requested_level > max_level) { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
101 stop(sprintf( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
102 paste0( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
103 "Wavelet length/level (%d) is too high for ", |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
104 "the number of samples (%d). Maximum allowed is %d." |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
105 ), |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
106 requested_level, nrow(features), max_level |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
107 )) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
108 } |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
109 # Run WaveICA normalization |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
110 features <- recetox.waveica::waveica( |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
111 data = features, |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
112 wf = get_wf(wavelet_filter, wavelet_length), |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
113 batch = batch, |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
114 group = group, |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
115 K = k, |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
116 t = t, |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
117 t2 = t2, |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
118 alpha = alpha |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
119 ) |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
120 non_feature_columns <- setdiff(colnames(data), feature_columns) |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
121 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
122 # Update the data frame with normalized features |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
123 data[, feature_columns] <- features |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
124 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
125 # Optionally remove blank samples |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
126 if (exclude_blanks) { |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
127 data <- exclude_group(data, group) |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
128 } |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
129 data <- final_data_processing( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
130 data, non_feature_columns, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
131 transpose, original_first_colname |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
132 ) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
133 data |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
134 } |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
135 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
136 # Main function for single-batch WaveICA normalization |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
137 waveica_singlebatch <- function(data_matrix_file, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
138 sample_metadata_file, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
139 ft_ext, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
140 mt_ext, |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
141 wavelet_filter, |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
142 wavelet_length, |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
143 k, |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
144 alpha, |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
145 cutoff, |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
146 exclude_blanks, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
147 transpose = FALSE) { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
148 # Reads feature and metadata tables, merges them, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
149 # verifies columns, runs WaveICA (single batch), and returns normalized data. |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
150 read_features_response <- read_data(data_matrix_file, ft_ext, transpose) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
151 features <- read_features_response$data |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
152 original_first_colname <- read_features_response$original_first_colname |
1
b77023c41c76
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit d82e7dad96bebe9424ac7bf490e2786d82c3681a
recetox
parents:
0
diff
changeset
|
153 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
154 read_data_response <- read_data(sample_metadata_file, mt_ext) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
155 metadata <- read_data_response$data |
1
b77023c41c76
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit d82e7dad96bebe9424ac7bf490e2786d82c3681a
recetox
parents:
0
diff
changeset
|
156 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
157 # Ensure both tables have a sampleName column |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
158 if (!"sampleName" %in% colnames(features) || |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
159 !"sampleName" %in% colnames(metadata)) { # nolint |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
160 stop("Both feature and metadata tables must contain a 'sampleName' column.") |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
161 } |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
162 data <- merge(metadata, features, by = "sampleName") |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
163 |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
164 required_columns <- c("sampleName", "class", "sampleType", "injectionOrder") |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
165 optional_columns <- c("batch") |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
166 data <- verify_input_dataframe(data, required_columns) |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
167 |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
168 data <- sort_by_injection_order(data) |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
169 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
170 feature_columns <- colnames(data)[ |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
171 !colnames(data) %in% c(required_columns, optional_columns) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
172 ] |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
173 features <- data[, feature_columns] |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
174 injection_order <- data$injectionOrder |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
175 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
176 # Run WaveICA normalization (single batch) |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
177 features <- recetox.waveica::waveica_nonbatchwise( |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
178 data = features, |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
179 wf = get_wf(wavelet_filter, wavelet_length), |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
180 injection_order = injection_order, |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
181 K = k, |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
182 alpha = alpha, |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
183 cutoff = cutoff |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
184 ) |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
185 non_feature_columns <- setdiff(colnames(data), feature_columns) |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
186 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
187 # Update the data frame with normalized features |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
188 data[, feature_columns] <- features |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
189 group <- enumerate_groups(as.character(data$sampleType)) |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
190 # Optionally remove blank samples |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
191 if (exclude_blanks) { |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
192 data <- exclude_group(data, group) |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
193 } |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
194 data <- final_data_processing( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
195 data, non_feature_columns, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
196 transpose, original_first_colname |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
197 ) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
198 data |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
199 } |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
200 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
201 # Sorts the data frame by batch and injection order (if batch exists), |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
202 # otherwise by injection order only |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
203 sort_by_injection_order <- function(data) { |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
204 if ("batch" %in% colnames(data)) { |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
205 data <- data[ |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
206 order(data[, "batch"], |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
207 data[, "injectionOrder"], |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
208 decreasing = FALSE |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
209 ), |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
210 ] |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
211 } else { |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
212 data <- data[order(data[, "injectionOrder"], decreasing = FALSE), ] |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
213 } |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
214 data |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
215 } |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
216 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
217 # Verifies that required columns exist and that there are no missing values |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
218 verify_input_dataframe <- function(data, required_columns) { |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
219 if (anyNA(data)) { |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
220 stop("Error: dataframe cannot contain NULL values! |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
221 \nMake sure that your dataframe does not contain empty cells") |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
222 } else if (!all(required_columns %in% colnames(data))) { |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
223 stop( |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
224 "Error: missing metadata! |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
225 \nMake sure that the following columns are present in your dataframe: ", |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
226 paste(required_columns, collapse = ", ") |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
227 ) |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
228 } |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
229 data <- verify_column_types(data, required_columns) |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
230 data |
2
6480c6d5fa36
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 77bea6c84c5a932fa203723d4d5fea6147f06505
recetox
parents:
1
diff
changeset
|
231 } |
6480c6d5fa36
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 77bea6c84c5a932fa203723d4d5fea6147f06505
recetox
parents:
1
diff
changeset
|
232 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
233 # Checks column types for required and feature columns |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
234 # and removes problematic feature columns |
2
6480c6d5fa36
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 77bea6c84c5a932fa203723d4d5fea6147f06505
recetox
parents:
1
diff
changeset
|
235 verify_column_types <- function(data, required_columns) { |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
236 # Checks that required columns have the correct type |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
237 # and removes non-numeric feature columns efficiently. |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
238 column_types <- list( |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
239 "sampleName" = c("character", "factor"), |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
240 "class" = c("character", "factor", "integer"), |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
241 "sampleType" = c("character", "factor"), |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
242 "injectionOrder" = "integer", |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
243 "batch" = "integer" |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
244 ) |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
245 column_types <- column_types[required_columns] |
2
6480c6d5fa36
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 77bea6c84c5a932fa203723d4d5fea6147f06505
recetox
parents:
1
diff
changeset
|
246 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
247 # Check required columns' types (fast, vectorized) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
248 for (col_name in names(column_types)) { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
249 if (!col_name %in% names(data)) next |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
250 expected_types <- column_types[[col_name]] |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
251 actual_type <- class(data[[col_name]]) |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
252 if (!actual_type %in% expected_types) { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
253 stop( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
254 "Column ", col_name, " is of type ", actual_type, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
255 " but expected type is ", |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
256 paste(expected_types, collapse = " or "), "\n" |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
257 ) |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
258 } |
2
6480c6d5fa36
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 77bea6c84c5a932fa203723d4d5fea6147f06505
recetox
parents:
1
diff
changeset
|
259 } |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
260 |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
261 # Identify feature columns (not required columns) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
262 feature_cols <- setdiff(names(data), required_columns) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
263 # Try to convert all feature columns to numeric in one go |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
264 # as well as suppressing warnings |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
265 data[feature_cols] <- suppressWarnings( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
266 lapply( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
267 data[feature_cols], |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
268 function(x) as.numeric(as.character(x)) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
269 ) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
270 ) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
271 # Find columns that are problematic (contain any NA after conversion) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
272 na_counts <- vapply(data[feature_cols], function(x) any(is.na(x)), logical(1)) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
273 removed_columns <- names(na_counts)[na_counts] |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
274 if (length(removed_columns) > 0) { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
275 message( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
276 "Removed problematic columns (non-numeric): ", |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
277 paste(removed_columns, collapse = ", ") |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
278 ) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
279 } |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
280 |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
281 # Keep only good columns |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
282 keep_cols <- !(names(data) %in% removed_columns) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
283 data <- data[, keep_cols, drop = FALSE] |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
284 data |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
285 } |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
286 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
287 # Enumerates group labels: blank=0, sample=1, qc=2, standard=3 |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
288 enumerate_groups <- function(group) { |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
289 group[grepl("blank", tolower(group))] <- 0 |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
290 group[grepl("sample", tolower(group))] <- 1 |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
291 group[grepl("qc", tolower(group))] <- 2 |
8
bf32ae95a06f
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 44e9371974b176490222f96d532df2421571cbaa
recetox
parents:
7
diff
changeset
|
292 group[grepl("standard", tolower(group))] <- 3 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
293 group |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
294 } |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
295 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
296 # Returns the correct wavelet filter string for the R wavelets function |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
297 get_wf <- function(wavelet_filter, wavelet_length) { |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
298 wf <- paste(wavelet_filter, wavelet_length, sep = "") |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
299 # Exception for Daubechies-2 |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
300 if (wf == "d2") { |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
301 wf <- "haar" |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
302 } |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
303 wf |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
304 } |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
305 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
306 # Removes blank samples (group==0) from the data frame |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
307 exclude_group <- function(data, group) { |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
308 row_idx_to_exclude <- which(group %in% 0) |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
309 if (length(row_idx_to_exclude) > 0) { |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
310 data_without_blanks <- data[-c(row_idx_to_exclude), ] |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
311 cat("Blank samples have been excluded from the dataframe.\n") |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
312 data_without_blanks |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
313 } else { |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
314 data |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
315 } |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
316 } |
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
317 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
318 # Stores the output data in the requested format (csv, tsv/tabular, parquet), |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
319 # optionally splitting metadata and features |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
320 store_data <- function(data, feature_output, ext) { |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
321 if (ext == "parquet") { |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
322 arrow::write_parquet(data, feature_output) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
323 } else if (ext == "csv") { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
324 write.csv(data, file = feature_output, row.names = FALSE, quote = FALSE) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
325 } else if (ext == "tsv" || ext == "tabular") { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
326 write.table(data, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
327 file = feature_output, sep = "\t", |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
328 row.names = FALSE, quote = FALSE |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
329 ) |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
330 } else { |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
331 stop(paste("Unsupported file extension:", ext)) |
6
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
332 } |
071a424241ec
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit bc3445f7c41271b0062c7674108f57708d08dd28
recetox
parents:
5
diff
changeset
|
333 cat("Normalization has been completed.\n") |
0
328710890963
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
recetox
parents:
diff
changeset
|
334 } |
7
1a2aeb8137bf
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents:
6
diff
changeset
|
335 |
1a2aeb8137bf
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents:
6
diff
changeset
|
336 tranpose_data <- function(data, column_names) { |
1a2aeb8137bf
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents:
6
diff
changeset
|
337 t_data <- data[-1] |
1a2aeb8137bf
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents:
6
diff
changeset
|
338 t_data <- t(t_data) |
1a2aeb8137bf
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents:
6
diff
changeset
|
339 tranposed_data <- data.frame(rownames(t_data), t_data) |
1a2aeb8137bf
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents:
6
diff
changeset
|
340 colnames(tranposed_data) <- column_names |
1a2aeb8137bf
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents:
6
diff
changeset
|
341 |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
342 tranposed_data |
7
1a2aeb8137bf
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
recetox
parents:
6
diff
changeset
|
343 } |
10
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
344 |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
345 |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
346 final_data_processing <- function( |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
347 data, non_feature_columns, |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
348 transpose, original_first_colname) { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
349 # Remove all columns that are in non_ |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
350 # feature_columns, except the first column |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
351 cols_to_keep <- !(colnames(data) %in% non_feature_columns) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
352 cols_to_keep[1] <- TRUE # Always keep the first column |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
353 data <- data[, cols_to_keep, drop = FALSE] |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
354 |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
355 |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
356 if (transpose) { |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
357 # The first column becomes the new column names |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
358 new_colnames <- as.character(data[[1]]) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
359 # Remove the first column |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
360 data <- data[, -1, drop = FALSE] |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
361 # Transpose the rest |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
362 data <- t(data) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
363 # Convert to data frame |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
364 data <- as.data.frame(data, stringsAsFactors = FALSE) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
365 # The first row becomes the first column |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
366 first_col <- rownames(data) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
367 data <- cbind(first_col, data) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
368 # Set column names |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
369 colnames(data) <- c(colnames(data)[1], new_colnames) |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
370 rownames(data) <- NULL |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
371 } |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
372 colnames(data)[1] <- original_first_colname |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
373 data |
821062fc5782
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 2b8b1dcb2947c6503fd4f82904df708e4f88ea1d
recetox
parents:
9
diff
changeset
|
374 } |