comparison waveica_wrapper.R @ 2:6480c6d5fa36 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 77bea6c84c5a932fa203723d4d5fea6147f06505
author recetox
date Fri, 23 Jun 2023 07:27:29 +0000
parents b77023c41c76
children dbbedb14b44c
comparison
equal deleted inserted replaced
1:b77023c41c76 2:6480c6d5fa36
1 read_file <- function(file, metadata, ft_ext, mt_ext) { 1 read_file <- function(file, metadata, ft_ext, mt_ext, transpose) {
2 data <- read_data(file, ft_ext) 2 data <- read_data(file, ft_ext)
3
4 if (transpose) {
5 col_names <- c("sampleName", data[[1]])
6 t_data <- data[-1]
7 t_data <- t(t_data)
8 data <- data.frame(rownames(t_data), t_data)
9 colnames(data) <- col_names
10 }
3 11
4 if (!is.na(metadata)) { 12 if (!is.na(metadata)) {
5 mt_data <- read_data(metadata, mt_ext) 13 mt_data <- read_data(metadata, mt_ext)
6 data <- merge(mt_data, data, by = "sampleName") 14 data <- merge(mt_data, data, by = "sampleName")
7 } 15 }
22 } 30 }
23 31
24 waveica <- function(file, 32 waveica <- function(file,
25 metadata = NA, 33 metadata = NA,
26 ext, 34 ext,
35 transpose = FALSE,
27 wavelet_filter, 36 wavelet_filter,
28 wavelet_length, 37 wavelet_length,
29 k, 38 k,
30 t, 39 t,
31 t2, 40 t2,
32 alpha, 41 alpha,
33 exclude_blanks) { 42 exclude_blanks) {
34
35 # get input from the Galaxy, preprocess data 43 # get input from the Galaxy, preprocess data
36 ext <- strsplit(x = ext, split = "\\,")[[1]] 44 ext <- strsplit(x = ext, split = "\\,")[[1]]
37 45
38 ft_ext <- ext[1] 46 ft_ext <- ext[1]
39 mt_ext <- ext[2] 47 mt_ext <- ext[2]
40 48
41 data <- read_file(file, metadata, ft_ext, mt_ext) 49 data <- read_file(file, metadata, ft_ext, mt_ext, transpose)
42 50
43 required_columns <- c("sampleName", "class", "sampleType", "injectionOrder", "batch") 51 required_columns <- c(
44 verify_input_dataframe(data, required_columns) 52 "sampleName", "class", "sampleType",
53 "injectionOrder", "batch"
54 )
55 data <- verify_input_dataframe(data, required_columns)
45 56
46 data <- sort_by_injection_order(data) 57 data <- sort_by_injection_order(data)
47 58
48 # separate data into features, batch and group 59 # separate data into features, batch and group
49 feature_columns <- colnames(data)[!colnames(data) %in% required_columns] 60 feature_columns <- colnames(data)[!colnames(data) %in% required_columns]
74 } 85 }
75 86
76 waveica_singlebatch <- function(file, 87 waveica_singlebatch <- function(file,
77 metadata = NA, 88 metadata = NA,
78 ext, 89 ext,
90 transpose = FALSE,
79 wavelet_filter, 91 wavelet_filter,
80 wavelet_length, 92 wavelet_length,
81 k, 93 k,
82 alpha, 94 alpha,
83 cutoff, 95 cutoff,
84 exclude_blanks) { 96 exclude_blanks) {
85
86 # get input from the Galaxy, preprocess data 97 # get input from the Galaxy, preprocess data
87 ext <- strsplit(x = ext, split = "\\,")[[1]] 98 ext <- strsplit(x = ext, split = "\\,")[[1]]
88 99
89 ft_ext <- ext[1] 100 ft_ext <- ext[1]
90 mt_ext <- ext[2] 101 mt_ext <- ext[2]
91 102
92 data <- read_file(file, metadata, ft_ext, mt_ext) 103 data <- read_file(file, metadata, ft_ext, mt_ext, transpose)
93 104
94 required_columns <- c("sampleName", "class", "sampleType", "injectionOrder") 105 required_columns <- c("sampleName", "class", "sampleType", "injectionOrder")
95 optional_columns <- c("batch") 106 optional_columns <- c("batch")
96 verify_input_dataframe(data, required_columns) 107
108 data <- verify_input_dataframe(data, required_columns)
97 109
98 data <- sort_by_injection_order(data) 110 data <- sort_by_injection_order(data)
99 111
100 feature_columns <- colnames(data)[!colnames(data) %in% c(required_columns, optional_columns)] 112 feature_columns <- colnames(data)[!colnames(data) %in%
113 c(required_columns, optional_columns)]
101 features <- data[, feature_columns] 114 features <- data[, feature_columns]
102 injection_order <- data$injectionOrder 115 injection_order <- data$injectionOrder
103 116
104 # run WaveICA 117 # run WaveICA
105 features <- recetox.waveica::waveica_nonbatchwise( 118 features <- recetox.waveica::waveica_nonbatchwise(
110 alpha = alpha, 123 alpha = alpha,
111 cutoff = cutoff 124 cutoff = cutoff
112 ) 125 )
113 126
114 data[, feature_columns] <- features 127 data[, feature_columns] <- features
115 128 group <- enumerate_groups(as.character(data$sampleType))
116 # remove blanks from dataset 129 # remove blanks from dataset
117 if (exclude_blanks) { 130 if (exclude_blanks) {
118 data <- exclude_group(data, group) 131 data <- exclude_group(data, group)
119 } 132 }
120 133
140 verify_input_dataframe <- function(data, required_columns) { 153 verify_input_dataframe <- function(data, required_columns) {
141 if (anyNA(data)) { 154 if (anyNA(data)) {
142 stop("Error: dataframe cannot contain NULL values! 155 stop("Error: dataframe cannot contain NULL values!
143 Make sure that your dataframe does not contain empty cells") 156 Make sure that your dataframe does not contain empty cells")
144 } else if (!all(required_columns %in% colnames(data))) { 157 } else if (!all(required_columns %in% colnames(data))) {
145 stop("Error: missing metadata! 158 stop(
146 Make sure that the following columns are present in your dataframe: ", paste(required_columns, collapse = ", ")) 159 "Error: missing metadata!
147 } 160 Make sure that the following columns are present in your dataframe: ",
161 paste(required_columns, collapse = ", ")
162 )
163 }
164
165 data <- verify_column_types(data, required_columns)
166
167 return(data)
168 }
169
170 verify_column_types <- function(data, required_columns) {
171 # Specify the column names and their expected types
172 column_types <- list(
173 "sampleName" = c("character", "factor"),
174 "class" = c("character", "factor"),
175 "sampleType" = c("character", "factor"),
176 "injectionOrder" = "integer",
177 "batch" = "integer"
178 )
179
180 column_types <- column_types[required_columns]
181
182 for (col_name in names(data)) {
183 actual_type <- class(data[[col_name]])
184 if (col_name %in% names(column_types)) {
185 expected_types <- column_types[[col_name]]
186
187 if (!actual_type %in% expected_types) {
188 stop(
189 "Column ", col_name, " is of type ", actual_type,
190 " but expected type is ",
191 paste(expected_types, collapse = " or "), "\n"
192 )
193 }
194 } else {
195 if (actual_type != "numeric") {
196 data[[col_name]] <- as.numeric(as.character(data[[col_name]]))
197 }
198 }
199 }
200 return(data)
148 } 201 }
149 202
150 203
151 # Match group labels with [blank/sample/qc] and enumerate them 204 # Match group labels with [blank/sample/qc] and enumerate them
152 enumerate_groups <- function(group) { 205 enumerate_groups <- function(group) {
185 238
186 store_data <- function(data, output, ext) { 239 store_data <- function(data, output, ext) {
187 if (ext == "csv") { 240 if (ext == "csv") {
188 write.csv(data, file = output, row.names = FALSE, quote = FALSE) 241 write.csv(data, file = output, row.names = FALSE, quote = FALSE)
189 } else if (ext == "tsv") { 242 } else if (ext == "tsv") {
190 write.table(data, file = output, sep = "\t", row.names = FALSE, quote = FALSE) 243 write.table(data,
244 file = output, sep = "\t",
245 row.names = FALSE, quote = FALSE
246 )
191 } else { 247 } else {
192 arrow::write_parquet(data, sink = output) 248 arrow::write_parquet(data, sink = output)
193 } 249 }
194 cat("Normalization has been completed.\n") 250 cat("Normalization has been completed.\n")
195 } 251 }