comparison waveica_wrapper.R @ 0:328710890963 draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/waveica commit 98e541a75678eee749261630610b946c258fd5f3"
author recetox
date Wed, 23 Mar 2022 11:35:30 +0000
parents
children b77023c41c76
comparison
equal deleted inserted replaced
-1:000000000000 0:328710890963
1 waveica <- function(data,
2 wavelet_filter,
3 wavelet_length,
4 k,
5 t,
6 t2,
7 alpha,
8 exclude_blanks) {
9
10 # get input from the Galaxy, preprocess data
11 data <- read.csv(data, header = TRUE)
12
13 required_columns <- c("sampleName", "class", "sampleType", "injectionOrder", "batch")
14 verify_input_dataframe(data, required_columns)
15
16 data <- sort_by_injection_order(data)
17
18 # separate data into features, batch and group
19 feature_columns <- colnames(data)[!colnames(data) %in% required_columns]
20 features <- data[, feature_columns]
21 group <- enumerate_groups(as.character(data$sampleType))
22 batch <- data$batch
23
24 # run WaveICA
25 features <- recetox.waveica::waveica(
26 data = features,
27 wf = get_wf(wavelet_filter, wavelet_length),
28 batch = batch,
29 group = group,
30 K = k,
31 t = t,
32 t2 = t2,
33 alpha = alpha
34 )
35
36 data[, feature_columns] <- features
37
38 # remove blanks from dataset
39 if (exclude_blanks) {
40 data <- exclude_group(data, group)
41 }
42
43 return(data)
44 }
45
46
47 waveica_singlebatch <- function(data,
48 wavelet_filter,
49 wavelet_length,
50 k,
51 alpha,
52 cutoff,
53 exclude_blanks) {
54
55 # get input from the Galaxy, preprocess data
56 data <- read.csv(data, header = TRUE)
57
58 required_columns <- c("sampleName", "class", "sampleType", "injectionOrder")
59 optional_columns <- c("batch")
60 verify_input_dataframe(data, required_columns)
61
62 data <- sort_by_injection_order(data)
63
64 feature_columns <- colnames(data)[!colnames(data) %in% c(required_columns, optional_columns)]
65 features <- data[, feature_columns]
66 injection_order <- data$injectionOrder
67
68 # run WaveICA
69 features <- recetox.waveica::waveica_nonbatchwise(
70 data = features,
71 wf = get_wf(wavelet_filter, wavelet_length),
72 injection_order = injection_order,
73 K = k,
74 alpha = alpha,
75 cutoff = cutoff
76 )
77
78 data[, feature_columns] <- features
79
80 # remove blanks from dataset
81 if (exclude_blanks) {
82 data <- exclude_group(data, group)
83 }
84
85 return(data)
86 }
87
88
89 sort_by_injection_order <- function(data) {
90 if ("batch" %in% colnames(data)) {
91 data <- data[order(data[, "batch"],
92 data[, "injectionOrder"],
93 decreasing = FALSE
94 ), ]
95 } else {
96 data <- data[order(data[, "injectionOrder"],
97 decreasing = FALSE
98 ), ]
99 }
100 return(data)
101 }
102
103
104 verify_input_dataframe <- function(data, required_columns) {
105 if (anyNA(data)) {
106 stop("Error: dataframe cannot contain NULL values!
107 Make sure that your dataframe does not contain empty cells")
108 } else if (!all(required_columns %in% colnames(data))) {
109 stop("Error: missing metadata!
110 Make sure that the following columns are present in your dataframe: ", paste(required_columns, collapse = ", "))
111 }
112 }
113
114
115 # Match group labels with [blank/sample/qc] and enumerate them
116 enumerate_groups <- function(group) {
117 group[grepl("blank", tolower(group))] <- 0
118 group[grepl("sample", tolower(group))] <- 1
119 group[grepl("qc", tolower(group))] <- 2
120
121 return(group)
122 }
123
124
125 # Create appropriate input for R wavelets function
126 get_wf <- function(wavelet_filter, wavelet_length) {
127 wf <- paste(wavelet_filter, wavelet_length, sep = "")
128
129 # exception to the wavelet function
130 if (wf == "d2") {
131 wf <- "haar"
132 }
133
134 return(wf)
135 }
136
137
138 # Exclude blanks from a dataframe
139 exclude_group <- function(data, group) {
140 row_idx_to_exclude <- which(group %in% 0)
141 if (length(row_idx_to_exclude) > 0) {
142 data_without_blanks <- data[-c(row_idx_to_exclude), ]
143 cat("Blank samples have been excluded from the dataframe.\n")
144 return(data_without_blanks)
145 } else {
146 return(data)
147 }
148 }
149
150
151 # Store output of WaveICA in a tsv file
152 store_data <- function(data, output) {
153 write.table(data, file = output, sep = "\t", row.names = FALSE, quote = FALSE)
154 cat("Normalization has been completed.\n")
155 }