comparison waveica_wrapper.R @ 7:1a2aeb8137bf draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 013d7c85fa9d77b8a27d194b350cd6b2d127a80f
author recetox
date Thu, 06 Jun 2024 12:25:05 +0000
parents 071a424241ec
children bf32ae95a06f
comparison
equal deleted inserted replaced
6:071a424241ec 7:1a2aeb8137bf
1 read_file <- function(file, metadata, ft_ext, mt_ext, transpose) { 1 read_file <- function(file, metadata, ft_ext, mt_ext, transpose) {
2 data <- read_data(file, ft_ext) 2 data <- read_data(file, ft_ext)
3 3
4 if (transpose) { 4 if (transpose) {
5 col_names <- c("sampleName", data[[1]]) 5 col_names <- c("sampleName", data[[1]])
6 t_data <- data[-1] 6 data <- tranpose_data(data, col_names)
7 t_data <- t(t_data)
8 data <- data.frame(rownames(t_data), t_data)
9 colnames(data) <- col_names
10 } 7 }
11 8
12 if (!is.na(metadata)) { 9 if (!is.na(metadata)) {
13 mt_data <- read_data(metadata, mt_ext) 10 mt_data <- read_data(metadata, mt_ext)
14 data <- merge(mt_data, data, by = "sampleName") 11 data <- merge(mt_data, data, by = "sampleName")
131 } 128 }
132 129
133 return(data) 130 return(data)
134 } 131 }
135 132
136
137 sort_by_injection_order <- function(data) { 133 sort_by_injection_order <- function(data) {
138 if ("batch" %in% colnames(data)) { 134 if ("batch" %in% colnames(data)) {
139 data <- data[order(data[, "batch"], data[, "injectionOrder"], decreasing = FALSE), ] 135 data <- data[order(data[, "batch"], data[, "injectionOrder"], decreasing = FALSE), ]
140 } else { 136 } else {
141 data <- data[order(data[, "injectionOrder"], decreasing = FALSE), ] 137 data <- data[order(data[, "injectionOrder"], decreasing = FALSE), ]
142 } 138 }
143 return(data) 139 return(data)
144 } 140 }
145
146 141
147 verify_input_dataframe <- function(data, required_columns) { 142 verify_input_dataframe <- function(data, required_columns) {
148 if (anyNA(data)) { 143 if (anyNA(data)) {
149 stop("Error: dataframe cannot contain NULL values! 144 stop("Error: dataframe cannot contain NULL values!
150 Make sure that your dataframe does not contain empty cells") 145 Make sure that your dataframe does not contain empty cells")
192 } 187 }
193 } 188 }
194 return(data) 189 return(data)
195 } 190 }
196 191
197
198 # Match group labels with [blank/sample/qc] and enumerate them 192 # Match group labels with [blank/sample/qc] and enumerate them
199 enumerate_groups <- function(group) { 193 enumerate_groups <- function(group) {
200 group[grepl("blank", tolower(group))] <- 0 194 group[grepl("blank", tolower(group))] <- 0
201 group[grepl("sample", tolower(group))] <- 1 195 group[grepl("sample", tolower(group))] <- 1
202 group[grepl("qc", tolower(group))] <- 2 196 group[grepl("qc", tolower(group))] <- 2
203 197
204 return(group) 198 return(group)
205 } 199 }
206 200
207
208 # Create appropriate input for R wavelets function 201 # Create appropriate input for R wavelets function
209 get_wf <- function(wavelet_filter, wavelet_length) { 202 get_wf <- function(wavelet_filter, wavelet_length) {
210 wf <- paste(wavelet_filter, wavelet_length, sep = "") 203 wf <- paste(wavelet_filter, wavelet_length, sep = "")
211 204
212 # exception to the wavelet function 205 # exception to the wavelet function
214 wf <- "haar" 207 wf <- "haar"
215 } 208 }
216 209
217 return(wf) 210 return(wf)
218 } 211 }
219
220 212
221 # Exclude blanks from a dataframe 213 # Exclude blanks from a dataframe
222 exclude_group <- function(data, group) { 214 exclude_group <- function(data, group) {
223 row_idx_to_exclude <- which(group %in% 0) 215 row_idx_to_exclude <- which(group %in% 0)
224 if (length(row_idx_to_exclude) > 0) { 216 if (length(row_idx_to_exclude) > 0) {
228 } else { 220 } else {
229 return(data) 221 return(data)
230 } 222 }
231 } 223 }
232 224
233 store_data <- function(data, output, ext) { 225 store_data <- function(data, feature_output, metadata_output, ext, split_output = FALSE) {
234 if (ext == "parquet") { 226 if (ext == "parquet") {
235 arrow::write_parquet(data, output) 227 if (split_output == TRUE) {
236 } else { 228 split_df <- split_output(data)
237 write.table(data, 229 arrow::write_parquet(split_df$metadata, metadata_output)
238 file = output, sep = "\t", 230 arrow::write_parquet(split_df$feature_table, feature_output)
239 row.names = FALSE, quote = FALSE 231 } else {
240 ) 232 arrow::write_parquet(data, feature_output)
233 }
234 } else {
235 if (split_output == TRUE) {
236 split_df <- split_output(data)
237 write.table(split_df$metadata,
238 file = metadata_output, sep = "\t",
239 row.names = FALSE, quote = FALSE
240 )
241 write.table(split_df$feature_table,
242 file = feature_output, sep = "\t",
243 row.names = FALSE, quote = FALSE
244 )
245 } else {
246 write.table(data,
247 file = feature_output, sep = "\t",
248 row.names = FALSE, quote = FALSE
249 )
250 }
241 } 251 }
242 cat("Normalization has been completed.\n") 252 cat("Normalization has been completed.\n")
243 } 253 }
254
255 split_output <- function(df) {
256 required_columns_set1 <- c("sampleName", "class", "sampleType", "injectionOrder", "batch")
257 required_columns_set2 <- c("sampleName", "class", "sampleType", "injectionOrder")
258
259 if (all(required_columns_set1 %in% colnames(df))) {
260 metadata_df <- df[, required_columns_set1, drop = FALSE]
261 df <- df[, -c(2:5)]
262 } else if (all(required_columns_set2 %in% colnames(df))) {
263 metadata_df <- df[, required_columns_set2, drop = FALSE]
264 df <- df[, -c(2:4)]
265 } else {
266 stop("Neither set of required columns is present in the dataframe.")
267 }
268
269 # Transpose the feature table
270 col_names <- c("id", as.vector(df[[1]]))
271 feature_table <- tranpose_data(df, col_names)
272
273 return(list(metadata = metadata_df, feature_table = feature_table))
274 }
275
276 tranpose_data <- function(data, column_names) {
277 t_data <- data[-1]
278 t_data <- t(t_data)
279 tranposed_data <- data.frame(rownames(t_data), t_data)
280 colnames(tranposed_data) <- column_names
281
282 return(tranposed_data)
283 }