Mercurial > repos > devteam > dwt_cor_avb_all
comparison execute_dwt_cor_aVb_all.R @ 2:e01e8a9a82f4 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/dwt_cor_avb_all commit f929353ffb0623f2218d7dec459c7da62f3b0d24"
author | devteam |
---|---|
date | Mon, 06 Jul 2020 20:31:38 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:8564f6927b87 | 2:e01e8a9a82f4 |
---|---|
1 ################################################################################# | |
2 ## code to do all correlation tests of form: motif(a) vs. motif(b) | |
3 ## add code to create null bands by permuting the original data series | |
4 ## generate plots and table matrix of correlation coefficients including p-values | |
5 ################################################################################# | |
6 library("wavethresh"); | |
7 library("waveslim"); | |
8 | |
9 options(echo = FALSE) | |
10 | |
11 ## normalize data | |
12 norm <- function(data) { | |
13 v <- (data - mean(data)) / sd(data); | |
14 if (sum(is.na(v)) >= 1) { | |
15 v <- data; | |
16 } | |
17 return(v); | |
18 } | |
19 | |
20 dwt_cor <- function(data_short, names_short, data_long, names_long, test, pdf, table, filter = 4, bc = "symmetric", method = "kendall", wf = "haar", boundary = "reflection") { | |
21 print(test); | |
22 print(pdf); | |
23 print(table); | |
24 | |
25 pdf(file = pdf); | |
26 final_pvalue <- NULL; | |
27 title <- NULL; | |
28 | |
29 short_levels <- wavethresh::wd(data_short[, 1], filter.number = filter, bc = bc)$nlevels; | |
30 title <- c("motif1", "motif2"); | |
31 for (i in 1:short_levels) { | |
32 title <- c(title, paste(i, "cor", sep = "_"), paste(i, "pval", sep = "_")); | |
33 } | |
34 print(title); | |
35 | |
36 ## normalize the raw data | |
37 data_short <- apply(data_short, 2, norm); | |
38 data_long <- apply(data_long, 2, norm); | |
39 | |
40 ## loop to compare a vs b | |
41 for (i in seq_len(length(names_short))) { | |
42 for (j in seq_len(i - 1)) { | |
43 ## Kendall Tau | |
44 ## DWT wavelet correlation function | |
45 ## include significance to compare | |
46 wave1_dwt <- NULL; | |
47 wave2_dwt <- NULL; | |
48 tau_dwt <- NULL; | |
49 out <- NULL; | |
50 | |
51 print(names_short[i]); | |
52 print(names_long[j]); | |
53 | |
54 ## need exit if not comparing motif(a) vs motif(a) | |
55 if (names_short[i] == names_long[j]) { | |
56 stop(paste("motif", names_short[i], "is the same as", names_long[j], sep = " ")); | |
57 } | |
58 else { | |
59 wave1_dwt <- waveslim::dwt(data_short[, i], wf = wf, short_levels, boundary = boundary); | |
60 wave2_dwt <- waveslim::dwt(data_long[, j], wf = wf, short_levels, boundary = boundary); | |
61 tau_dwt <- vector(length = short_levels) | |
62 | |
63 ## perform cor test on wavelet coefficients per scale | |
64 for (level in 1:short_levels) { | |
65 w1_level <- NULL; | |
66 w2_level <- NULL; | |
67 w1_level <- (wave1_dwt[[level]]); | |
68 w2_level <- (wave2_dwt[[level]]); | |
69 tau_dwt[level] <- cor.test(w1_level, w2_level, method = method)$estimate; | |
70 } | |
71 | |
72 ## CI bands by permutation of time series | |
73 feature1 <- NULL; | |
74 feature2 <- NULL; | |
75 feature1 <- data_short[, i]; | |
76 feature2 <- data_long[, j]; | |
77 null <- NULL; | |
78 results <- NULL; | |
79 med <- NULL; | |
80 cor_25 <- NULL; | |
81 cor_975 <- NULL; | |
82 | |
83 for (k in 1:1000) { | |
84 nk_1 <- NULL; | |
85 nk_2 <- NULL; | |
86 null_levels <- NULL; | |
87 cor <- NULL; | |
88 null_wave1 <- NULL; | |
89 null_wave2 <- NULL; | |
90 | |
91 nk_1 <- sample(feature1, length(feature1), replace = FALSE); | |
92 nk_2 <- sample(feature2, length(feature2), replace = FALSE); | |
93 null_levels <- wavethresh::wd(nk_1, filter.number = filter, bc = bc)$nlevels; | |
94 cor <- vector(length = null_levels); | |
95 null_wave1 <- waveslim::dwt(nk_1, wf = wf, short_levels, boundary = boundary); | |
96 null_wave2 <- waveslim::dwt(nk_2, wf = wf, short_levels, boundary = boundary); | |
97 | |
98 for (level in 1:null_levels) { | |
99 null_level1 <- NULL; | |
100 null_level2 <- NULL; | |
101 null_level1 <- (null_wave1[[level]]); | |
102 null_level2 <- (null_wave2[[level]]); | |
103 cor[level] <- cor.test(null_level1, null_level2, method = method)$estimate; | |
104 } | |
105 null <- rbind(null, cor); | |
106 } | |
107 | |
108 null <- apply(null, 2, sort, na.last = TRUE); | |
109 cor_25 <- null[25, ]; | |
110 cor_975 <- null[975, ]; | |
111 med <- (apply(null, 2, median, na.rm = TRUE)); | |
112 | |
113 ## plot | |
114 results <- cbind(tau_dwt, cor_25, cor_975); | |
115 matplot(results, type = "b", pch = "*", lty = 1, col = c(1, 2, 2), ylim = c(-1, 1), xlab = "Wavelet Scale", ylab = "Wavelet Correlation Kendall's Tau", main = (paste(test, names_short[i], "vs.", names_long[j], sep = " ")), cex.main = 0.75); | |
116 abline(h = 0); | |
117 | |
118 ## get pvalues by comparison to null distribution | |
119 ### modify pval calculation for error type II of T test #### | |
120 out <- c(names_short[i], names_long[j]); | |
121 for (m in seq_len(length(tau_dwt))) { | |
122 print(m); | |
123 print(tau_dwt[m]); | |
124 out <- c(out, format(tau_dwt[m], digits = 3)); | |
125 pv <- NULL; | |
126 if (is.na(tau_dwt[m])) { | |
127 pv <- "NA"; | |
128 } | |
129 else{ | |
130 if (tau_dwt[m] >= med[m]) { | |
131 ## R tail test | |
132 pv <- (length(which(null[, m] >= tau_dwt[m]))) / (length(na.exclude(null[, m]))); | |
133 } | |
134 else { | |
135 if (tau_dwt[m] < med[m]) { | |
136 ## L tail test | |
137 pv <- (length(which(null[, m] <= tau_dwt[m]))) / (length(na.exclude(null[, m]))); | |
138 } | |
139 } | |
140 } | |
141 out <- c(out, pv); | |
142 print(pv); | |
143 } | |
144 final_pvalue <- rbind(final_pvalue, out); | |
145 print(out); | |
146 } | |
147 } | |
148 } | |
149 colnames(final_pvalue) <- title; | |
150 write.table(final_pvalue, file = table, sep = "\t", quote = FALSE, row.names = FALSE) | |
151 dev.off(); | |
152 } | |
153 | |
154 ## execute | |
155 ## read in data | |
156 args <- commandArgs(trailingOnly = TRUE) | |
157 | |
158 input_data1 <- NULL; | |
159 input_data2 <- NULL; | |
160 input_data_short1 <- NULL; | |
161 input_data_short2 <- NULL; | |
162 input_data_names_short1 <- NULL; | |
163 input_data_names_short2 <- NULL; | |
164 | |
165 input_data1 <- read.delim(args[1]); | |
166 input_data_short1 <- input_data1[, +c(seq_len(ncol(input_data1)))]; | |
167 input_data_names_short1 <- colnames(input_data_short1); | |
168 | |
169 input_data2 <- read.delim(args[2]); | |
170 input_data_short2 <- input_data2[, +c(seq_len(ncol(input_data2)))]; | |
171 input_data_names_short2 <- colnames(input_data_short2); | |
172 | |
173 # cor test for motif(a) in input_data1 vs motif(b) in input_data2 | |
174 dwt_cor(input_data_short1, input_data_names_short1, input_data_short2, input_data_names_short2, test = "cor_aVb_all", pdf = args[3], table = args[4]); | |
175 print("done with the correlation test"); |