Mercurial > repos > recetox > waveica
comparison waveica_wrapper.R @ 7:1a2aeb8137bf draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
author | recetox |
---|---|
date | Thu, 06 Jun 2024 12:25:05 +0000 |
parents | 071a424241ec |
children | bf32ae95a06f |
comparison
equal
deleted
inserted
replaced
6:071a424241ec | 7:1a2aeb8137bf |
---|---|
1 read_file <- function(file, metadata, ft_ext, mt_ext, transpose) { | 1 read_file <- function(file, metadata, ft_ext, mt_ext, transpose) { |
2 data <- read_data(file, ft_ext) | 2 data <- read_data(file, ft_ext) |
3 | 3 |
4 if (transpose) { | 4 if (transpose) { |
5 col_names <- c("sampleName", data[[1]]) | 5 col_names <- c("sampleName", data[[1]]) |
6 t_data <- data[-1] | 6 data <- tranpose_data(data, col_names) |
7 t_data <- t(t_data) | |
8 data <- data.frame(rownames(t_data), t_data) | |
9 colnames(data) <- col_names | |
10 } | 7 } |
11 | 8 |
12 if (!is.na(metadata)) { | 9 if (!is.na(metadata)) { |
13 mt_data <- read_data(metadata, mt_ext) | 10 mt_data <- read_data(metadata, mt_ext) |
14 data <- merge(mt_data, data, by = "sampleName") | 11 data <- merge(mt_data, data, by = "sampleName") |
131 } | 128 } |
132 | 129 |
133 return(data) | 130 return(data) |
134 } | 131 } |
135 | 132 |
136 | |
137 sort_by_injection_order <- function(data) { | 133 sort_by_injection_order <- function(data) { |
138 if ("batch" %in% colnames(data)) { | 134 if ("batch" %in% colnames(data)) { |
139 data <- data[order(data[, "batch"], data[, "injectionOrder"], decreasing = FALSE), ] | 135 data <- data[order(data[, "batch"], data[, "injectionOrder"], decreasing = FALSE), ] |
140 } else { | 136 } else { |
141 data <- data[order(data[, "injectionOrder"], decreasing = FALSE), ] | 137 data <- data[order(data[, "injectionOrder"], decreasing = FALSE), ] |
142 } | 138 } |
143 return(data) | 139 return(data) |
144 } | 140 } |
145 | |
146 | 141 |
147 verify_input_dataframe <- function(data, required_columns) { | 142 verify_input_dataframe <- function(data, required_columns) { |
148 if (anyNA(data)) { | 143 if (anyNA(data)) { |
149 stop("Error: dataframe cannot contain NULL values! | 144 stop("Error: dataframe cannot contain NULL values! |
150 Make sure that your dataframe does not contain empty cells") | 145 Make sure that your dataframe does not contain empty cells") |
192 } | 187 } |
193 } | 188 } |
194 return(data) | 189 return(data) |
195 } | 190 } |
196 | 191 |
197 | |
198 # Match group labels with [blank/sample/qc] and enumerate them | 192 # Match group labels with [blank/sample/qc] and enumerate them |
199 enumerate_groups <- function(group) { | 193 enumerate_groups <- function(group) { |
200 group[grepl("blank", tolower(group))] <- 0 | 194 group[grepl("blank", tolower(group))] <- 0 |
201 group[grepl("sample", tolower(group))] <- 1 | 195 group[grepl("sample", tolower(group))] <- 1 |
202 group[grepl("qc", tolower(group))] <- 2 | 196 group[grepl("qc", tolower(group))] <- 2 |
203 | 197 |
204 return(group) | 198 return(group) |
205 } | 199 } |
206 | 200 |
207 | |
208 # Create appropriate input for R wavelets function | 201 # Create appropriate input for R wavelets function |
209 get_wf <- function(wavelet_filter, wavelet_length) { | 202 get_wf <- function(wavelet_filter, wavelet_length) { |
210 wf <- paste(wavelet_filter, wavelet_length, sep = "") | 203 wf <- paste(wavelet_filter, wavelet_length, sep = "") |
211 | 204 |
212 # exception to the wavelet function | 205 # exception to the wavelet function |
214 wf <- "haar" | 207 wf <- "haar" |
215 } | 208 } |
216 | 209 |
217 return(wf) | 210 return(wf) |
218 } | 211 } |
219 | |
220 | 212 |
221 # Exclude blanks from a dataframe | 213 # Exclude blanks from a dataframe |
222 exclude_group <- function(data, group) { | 214 exclude_group <- function(data, group) { |
223 row_idx_to_exclude <- which(group %in% 0) | 215 row_idx_to_exclude <- which(group %in% 0) |
224 if (length(row_idx_to_exclude) > 0) { | 216 if (length(row_idx_to_exclude) > 0) { |
228 } else { | 220 } else { |
229 return(data) | 221 return(data) |
230 } | 222 } |
231 } | 223 } |
232 | 224 |
233 store_data <- function(data, output, ext) { | 225 store_data <- function(data, feature_output, metadata_output, ext, split_output = FALSE) { |
234 if (ext == "parquet") { | 226 if (ext == "parquet") { |
235 arrow::write_parquet(data, output) | 227 if (split_output == TRUE) { |
236 } else { | 228 split_df <- split_output(data) |
237 write.table(data, | 229 arrow::write_parquet(split_df$metadata, metadata_output) |
238 file = output, sep = "\t", | 230 arrow::write_parquet(split_df$feature_table, feature_output) |
239 row.names = FALSE, quote = FALSE | 231 } else { |
240 ) | 232 arrow::write_parquet(data, feature_output) |
233 } | |
234 } else { | |
235 if (split_output == TRUE) { | |
236 split_df <- split_output(data) | |
237 write.table(split_df$metadata, | |
238 file = metadata_output, sep = "\t", | |
239 row.names = FALSE, quote = FALSE | |
240 ) | |
241 write.table(split_df$feature_table, | |
242 file = feature_output, sep = "\t", | |
243 row.names = FALSE, quote = FALSE | |
244 ) | |
245 } else { | |
246 write.table(data, | |
247 file = feature_output, sep = "\t", | |
248 row.names = FALSE, quote = FALSE | |
249 ) | |
250 } | |
241 } | 251 } |
242 cat("Normalization has been completed.\n") | 252 cat("Normalization has been completed.\n") |
243 } | 253 } |
254 | |
255 split_output <- function(df) { | |
256 required_columns_set1 <- c("sampleName", "class", "sampleType", "injectionOrder", "batch") | |
257 required_columns_set2 <- c("sampleName", "class", "sampleType", "injectionOrder") | |
258 | |
259 if (all(required_columns_set1 %in% colnames(df))) { | |
260 metadata_df <- df[, required_columns_set1, drop = FALSE] | |
261 df <- df[, -c(2:5)] | |
262 } else if (all(required_columns_set2 %in% colnames(df))) { | |
263 metadata_df <- df[, required_columns_set2, drop = FALSE] | |
264 df <- df[, -c(2:4)] | |
265 } else { | |
266 stop("Neither set of required columns is present in the dataframe.") | |
267 } | |
268 | |
269 # Transpose the feature table | |
270 col_names <- c("id", as.vector(df[[1]])) | |
271 feature_table <- tranpose_data(df, col_names) | |
272 | |
273 return(list(metadata = metadata_df, feature_table = feature_table)) | |
274 } | |
275 | |
276 tranpose_data <- function(data, column_names) { | |
277 t_data <- data[-1] | |
278 t_data <- t(t_data) | |
279 tranposed_data <- data.frame(rownames(t_data), t_data) | |
280 colnames(tranposed_data) <- column_names | |
281 | |
282 return(tranposed_data) | |
283 } |