Mercurial > repos > recetox > waveica
comparison waveica_wrapper.R @ 2:6480c6d5fa36 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 77bea6c84c5a932fa203723d4d5fea6147f06505
author | recetox |
---|---|
date | Fri, 23 Jun 2023 07:27:29 +0000 |
parents | b77023c41c76 |
children | dbbedb14b44c |
comparison
equal
deleted
inserted
replaced
1:b77023c41c76 | 2:6480c6d5fa36 |
---|---|
1 read_file <- function(file, metadata, ft_ext, mt_ext) { | 1 read_file <- function(file, metadata, ft_ext, mt_ext, transpose) { |
2 data <- read_data(file, ft_ext) | 2 data <- read_data(file, ft_ext) |
3 | |
4 if (transpose) { | |
5 col_names <- c("sampleName", data[[1]]) | |
6 t_data <- data[-1] | |
7 t_data <- t(t_data) | |
8 data <- data.frame(rownames(t_data), t_data) | |
9 colnames(data) <- col_names | |
10 } | |
3 | 11 |
4 if (!is.na(metadata)) { | 12 if (!is.na(metadata)) { |
5 mt_data <- read_data(metadata, mt_ext) | 13 mt_data <- read_data(metadata, mt_ext) |
6 data <- merge(mt_data, data, by = "sampleName") | 14 data <- merge(mt_data, data, by = "sampleName") |
7 } | 15 } |
22 } | 30 } |
23 | 31 |
24 waveica <- function(file, | 32 waveica <- function(file, |
25 metadata = NA, | 33 metadata = NA, |
26 ext, | 34 ext, |
35 transpose = FALSE, | |
27 wavelet_filter, | 36 wavelet_filter, |
28 wavelet_length, | 37 wavelet_length, |
29 k, | 38 k, |
30 t, | 39 t, |
31 t2, | 40 t2, |
32 alpha, | 41 alpha, |
33 exclude_blanks) { | 42 exclude_blanks) { |
34 | |
35 # get input from the Galaxy, preprocess data | 43 # get input from the Galaxy, preprocess data |
36 ext <- strsplit(x = ext, split = "\\,")[[1]] | 44 ext <- strsplit(x = ext, split = "\\,")[[1]] |
37 | 45 |
38 ft_ext <- ext[1] | 46 ft_ext <- ext[1] |
39 mt_ext <- ext[2] | 47 mt_ext <- ext[2] |
40 | 48 |
41 data <- read_file(file, metadata, ft_ext, mt_ext) | 49 data <- read_file(file, metadata, ft_ext, mt_ext, transpose) |
42 | 50 |
43 required_columns <- c("sampleName", "class", "sampleType", "injectionOrder", "batch") | 51 required_columns <- c( |
44 verify_input_dataframe(data, required_columns) | 52 "sampleName", "class", "sampleType", |
53 "injectionOrder", "batch" | |
54 ) | |
55 data <- verify_input_dataframe(data, required_columns) | |
45 | 56 |
46 data <- sort_by_injection_order(data) | 57 data <- sort_by_injection_order(data) |
47 | 58 |
48 # separate data into features, batch and group | 59 # separate data into features, batch and group |
49 feature_columns <- colnames(data)[!colnames(data) %in% required_columns] | 60 feature_columns <- colnames(data)[!colnames(data) %in% required_columns] |
74 } | 85 } |
75 | 86 |
76 waveica_singlebatch <- function(file, | 87 waveica_singlebatch <- function(file, |
77 metadata = NA, | 88 metadata = NA, |
78 ext, | 89 ext, |
90 transpose = FALSE, | |
79 wavelet_filter, | 91 wavelet_filter, |
80 wavelet_length, | 92 wavelet_length, |
81 k, | 93 k, |
82 alpha, | 94 alpha, |
83 cutoff, | 95 cutoff, |
84 exclude_blanks) { | 96 exclude_blanks) { |
85 | |
86 # get input from the Galaxy, preprocess data | 97 # get input from the Galaxy, preprocess data |
87 ext <- strsplit(x = ext, split = "\\,")[[1]] | 98 ext <- strsplit(x = ext, split = "\\,")[[1]] |
88 | 99 |
89 ft_ext <- ext[1] | 100 ft_ext <- ext[1] |
90 mt_ext <- ext[2] | 101 mt_ext <- ext[2] |
91 | 102 |
92 data <- read_file(file, metadata, ft_ext, mt_ext) | 103 data <- read_file(file, metadata, ft_ext, mt_ext, transpose) |
93 | 104 |
94 required_columns <- c("sampleName", "class", "sampleType", "injectionOrder") | 105 required_columns <- c("sampleName", "class", "sampleType", "injectionOrder") |
95 optional_columns <- c("batch") | 106 optional_columns <- c("batch") |
96 verify_input_dataframe(data, required_columns) | 107 |
108 data <- verify_input_dataframe(data, required_columns) | |
97 | 109 |
98 data <- sort_by_injection_order(data) | 110 data <- sort_by_injection_order(data) |
99 | 111 |
100 feature_columns <- colnames(data)[!colnames(data) %in% c(required_columns, optional_columns)] | 112 feature_columns <- colnames(data)[!colnames(data) %in% |
113 c(required_columns, optional_columns)] | |
101 features <- data[, feature_columns] | 114 features <- data[, feature_columns] |
102 injection_order <- data$injectionOrder | 115 injection_order <- data$injectionOrder |
103 | 116 |
104 # run WaveICA | 117 # run WaveICA |
105 features <- recetox.waveica::waveica_nonbatchwise( | 118 features <- recetox.waveica::waveica_nonbatchwise( |
110 alpha = alpha, | 123 alpha = alpha, |
111 cutoff = cutoff | 124 cutoff = cutoff |
112 ) | 125 ) |
113 | 126 |
114 data[, feature_columns] <- features | 127 data[, feature_columns] <- features |
115 | 128 group <- enumerate_groups(as.character(data$sampleType)) |
116 # remove blanks from dataset | 129 # remove blanks from dataset |
117 if (exclude_blanks) { | 130 if (exclude_blanks) { |
118 data <- exclude_group(data, group) | 131 data <- exclude_group(data, group) |
119 } | 132 } |
120 | 133 |
140 verify_input_dataframe <- function(data, required_columns) { | 153 verify_input_dataframe <- function(data, required_columns) { |
141 if (anyNA(data)) { | 154 if (anyNA(data)) { |
142 stop("Error: dataframe cannot contain NULL values! | 155 stop("Error: dataframe cannot contain NULL values! |
143 Make sure that your dataframe does not contain empty cells") | 156 Make sure that your dataframe does not contain empty cells") |
144 } else if (!all(required_columns %in% colnames(data))) { | 157 } else if (!all(required_columns %in% colnames(data))) { |
145 stop("Error: missing metadata! | 158 stop( |
146 Make sure that the following columns are present in your dataframe: ", paste(required_columns, collapse = ", ")) | 159 "Error: missing metadata! |
147 } | 160 Make sure that the following columns are present in your dataframe: ", |
161 paste(required_columns, collapse = ", ") | |
162 ) | |
163 } | |
164 | |
165 data <- verify_column_types(data, required_columns) | |
166 | |
167 return(data) | |
168 } | |
169 | |
170 verify_column_types <- function(data, required_columns) { | |
171 # Specify the column names and their expected types | |
172 column_types <- list( | |
173 "sampleName" = c("character", "factor"), | |
174 "class" = c("character", "factor"), | |
175 "sampleType" = c("character", "factor"), | |
176 "injectionOrder" = "integer", | |
177 "batch" = "integer" | |
178 ) | |
179 | |
180 column_types <- column_types[required_columns] | |
181 | |
182 for (col_name in names(data)) { | |
183 actual_type <- class(data[[col_name]]) | |
184 if (col_name %in% names(column_types)) { | |
185 expected_types <- column_types[[col_name]] | |
186 | |
187 if (!actual_type %in% expected_types) { | |
188 stop( | |
189 "Column ", col_name, " is of type ", actual_type, | |
190 " but expected type is ", | |
191 paste(expected_types, collapse = " or "), "\n" | |
192 ) | |
193 } | |
194 } else { | |
195 if (actual_type != "numeric") { | |
196 data[[col_name]] <- as.numeric(as.character(data[[col_name]])) | |
197 } | |
198 } | |
199 } | |
200 return(data) | |
148 } | 201 } |
149 | 202 |
150 | 203 |
151 # Match group labels with [blank/sample/qc] and enumerate them | 204 # Match group labels with [blank/sample/qc] and enumerate them |
152 enumerate_groups <- function(group) { | 205 enumerate_groups <- function(group) { |
185 | 238 |
186 store_data <- function(data, output, ext) { | 239 store_data <- function(data, output, ext) { |
187 if (ext == "csv") { | 240 if (ext == "csv") { |
188 write.csv(data, file = output, row.names = FALSE, quote = FALSE) | 241 write.csv(data, file = output, row.names = FALSE, quote = FALSE) |
189 } else if (ext == "tsv") { | 242 } else if (ext == "tsv") { |
190 write.table(data, file = output, sep = "\t", row.names = FALSE, quote = FALSE) | 243 write.table(data, |
244 file = output, sep = "\t", | |
245 row.names = FALSE, quote = FALSE | |
246 ) | |
191 } else { | 247 } else { |
192 arrow::write_parquet(data, sink = output) | 248 arrow::write_parquet(data, sink = output) |
193 } | 249 } |
194 cat("Normalization has been completed.\n") | 250 cat("Normalization has been completed.\n") |
195 } | 251 } |