# HG changeset patch # User devteam # Date 1594082067 14400 # Node ID 6c29c7e347e80adc9dccaf366984f5eb9f251c5e # Parent 7a15159140d1f8c9b9d7e1fe6080bfaf5758175e "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/dwt_var_perfeature commit f929353ffb0623f2218d7dec459c7da62f3b0d24" diff -r 7a15159140d1 -r 6c29c7e347e8 execute_dwt_var_perFeature.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/execute_dwt_var_perFeature.R Mon Jul 06 20:34:27 2020 -0400 @@ -0,0 +1,147 @@ +##################################################################### +## plot multiscale wavelet variance +## create null bands by permuting the original data series +## generate plots and table of wavelet variance including p-values +####################################################################### +options(echo = FALSE) +library("wavethresh"); +library("waveslim"); +library("bitops"); + +## to determine if data is properly formatted 2^N observations +is_power2 <- function(x) { + x && !(bitops::bitAnd(x, x - 1)); +} + +## dwt : discrete wavelet transform using Haar wavelet filter, simplest wavelet function but later can modify to let user-define the wavelet filter function +dwt_var_permut_get_max <- function(data, names, alpha, filter = 1, family = "DaubExPhase", bc = "symmetric", method = "kendall", wf = "haar", boundary = "reflection") { + title <- NULL; + final_pvalue <- NULL; + j <- NULL; + scale <- NULL; + out <- NULL; + + print(class(data)); + print(names); + print(alpha); + + par(mar = c(5, 4, 4, 3), oma = c(4, 4, 3, 2), xaxt = "s", cex = 1, las = 1); + + title <- c("Wavelet", "Variance", "Pvalue", "Test"); + print(title); + + for (i in seq_len(length(names))) { + temp <- NULL; + results <- NULL; + wave1_dwt <- NULL; + + ## if data fails formatting check, do something + print(is.numeric(as.matrix(data)[, i])); + if (!is.numeric(as.matrix(data)[, i])) { + stop("data must be a numeric vector"); + } + print(length(as.matrix(data)[, i])); + print(is_power2(length(as.matrix(data)[, i]))); + if (!is_power2(length(as.matrix(data)[, i]))) { + stop("data length must be a power of two"); + } + j <- wavethresh::wd(as.matrix(data)[, i], filter.number = filter, family = family, bc = bc)$nlevels; + print(j); + temp <- vector(length = j); + wave1_dwt <- waveslim::dwt(as.matrix(data)[, i], wf = wf, j, boundary = boundary); + + temp <- waveslim::wave.variance(wave1_dwt)[- (j + 1), 1]; + print(temp); + + ##permutations code : + feature1 <- NULL; + null <- NULL; + var_lower <- NULL; + limit_lower <- NULL; + var_upper <- NULL; + limit_upper <- NULL; + med <- NULL; + + limit_lower <- alpha / 2 * 1000; + print(limit_lower); + limit_upper <- (1 - alpha / 2) * 1000; + print(limit_upper); + + feature1 <- as.matrix(data)[, i]; + for (k in 1:1000) { + nk_1 <- NULL; + null_levels <- NULL; + var <- NULL; + null_wave1 <- NULL; + + nk_1 <- sample(feature1, length(feature1), replace = FALSE); + null_levels <- wavethresh::wd(nk_1, filter.number = filter, family = family, bc = bc)$nlevels; + var <- vector(length = length(null_levels)); + null_wave1 <- waveslim::dwt(nk_1, wf = wf, j, boundary = boundary); + var <- waveslim::wave.variance(null_wave1)[- (null_levels + 1), 1]; + null <- rbind(null, var); + } + null <- apply(null, 2, sort, na.last = TRUE); + var_lower <- null[limit_lower, ]; + var_upper <- null[limit_upper, ]; + med <- (apply(null, 2, median, na.rm = TRUE)); + + ## plot + results <- cbind(temp, var_lower, var_upper); + print(results); + matplot(results, type = "b", pch = "*", lty = 1, col = c(1, 2, 2), xaxt = "n", xlab = "Wavelet Scale", ylab = "Wavelet variance"); + mtext(names[i], side = 3, line = 0.5, cex = 1); + axis(1, at = 1:j, labels = c(2 ^ (0:(j - 1))), las = 3, cex.axis = 1); + + ## get pvalues by comparison to null distribution + for (m in seq_len(length(temp))) { + print(paste("scale", m, sep = " ")); + print(paste("var", temp[m], sep = " ")); + print(paste("med", med[m], sep = " ")); + pv <- NULL; + tail <- NULL; + scale <- NULL; + scale <- 2 ^ (m - 1); + if (temp[m] >= med[m]) { + ## R tail test + print("R"); + tail <- "R"; + pv <- (length(which(null[, m] >= temp[m]))) / (length(na.exclude(null[, m]))); + } else { + if (temp[m] < med[m]) { + ## L tail test + print("L"); + tail <- "L"; + pv <- (length(which(null[, m] <= temp[m]))) / (length(na.exclude(null[, m]))); + } + } + print(pv); + out <- rbind(out, c(paste("Scale", scale, sep = "_"), format(temp[m], digits = 3), pv, tail)); + } + final_pvalue <- rbind(final_pvalue, out); + } + colnames(final_pvalue) <- title; + return(final_pvalue); +} + +## execute +## read in data +args <- commandArgs(trailingOnly = TRUE) + +data_test <- NULL; +final <- NULL; +sub <- NULL; +sub_names <- NULL; +data_test <- read.delim(args[1], header = FALSE); +pdf(file = args[5], width = 11, height = 8) +for (f in strsplit(args[2], ",")) { + f <- as.integer(f) + if (f > ncol(data_test)) + stop(paste("column", f, "doesn't exist")); + sub <- data_test[, f]; + sub_names <- colnames(data_test)[f]; + final <- rbind(final, dwt_var_permut_get_max(sub, sub_names, as.double(args[3]))); +} + +dev.off(); +write.table(final, file = args[4], sep = "\t", quote = FALSE, row.names = FALSE); diff -r 7a15159140d1 -r 6c29c7e347e8 execute_dwt_var_perFeature.pl --- a/execute_dwt_var_perFeature.pl Tue Aug 14 10:32:11 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,199 +0,0 @@ -#!/usr/bin/perl -w -# Author: Erika Kvikstad - -use warnings; -use IO::Handle; -use POSIX qw(floor ceil); - -$usage = "execute_dwt_var_perFeature.pl [TABULAR.in] [FEATURE] [ALPHA] [TABULAR.out] [PDF.out] \n"; -die $usage unless @ARGV == 5; - -#get the input arguments -my $inputFile = $ARGV[0]; -my @features = split(/,/,$ARGV[1]); -my $features_count = scalar(@features); -my $alpha = $ARGV[2]; -my $outFile1 = $ARGV[3]; -my $outFile2 = $ARGV[4]; - -open (INPUT, "<", $inputFile) || die("Could not open file $inputFile \n"); -open (OUTPUT2, ">", $outFile1) || die("Could not open file $outFile1 \n"); -open (OUTPUT3, ">", $outFile2) || die("Could not open file $outFile2 \n"); -#open (ERROR, ">", "error.txt") or die ("Could not open file error.txt \n"); - -# choosing meaningful names for the output files -$pvalue = $outFile1; -$pdf = $outFile2; - -# write R script -$r_script = "get_dwt_varPermut.r"; - -open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n"; - -print Rcmd " - ###################################################################### - # plot multiscale wavelet variance - # create null bands by permuting the original data series - # generate plots and table of wavelet variance including p-values - ###################################################################### - options(echo = FALSE) - #library(\"Rwave\"); - #library(\"wavethresh\"); - #library(\"waveslim\"); - # turn off diagnostics for de-bugging only, turn back on for functional tests on test - suppressMessages(require(\"Rwave\",quietly=TRUE,warn.conflicts = FALSE)); - suppressMessages(require(\"wavethresh\",quietly=TRUE,warn.conflicts = FALSE)); - suppressMessages(require(\"waveslim\",quietly=TRUE,warn.conflicts = FALSE)); - suppressMessages(require(\"bitops\",quietly=TRUE,warn.conflicts = FALSE)); - - # to determine if data is properly formatted 2^N observations - is.power2<- function(x){x && !(bitAnd(x,x - 1));} - - # dwt : discrete wavelet transform using Haar wavelet filter, simplest wavelet function but later can modify to let user-define the wavelet filter function - dwt_var_permut_getMax <- function(data, names, alpha, filter = 1,family=\"DaubExPhase\", bc = \"symmetric\", method = \"kendall\", wf = \"haar\", boundary = \"reflection\") { - max_var = NULL; - matrix = NULL; - title = NULL; - final_pvalue = NULL; - J = NULL; - scale = NULL; - out = NULL; - - print(class(data)); - print(names); - print(alpha); - - par(mar=c(5,4,4,3),oma = c(4, 4, 3, 2), xaxt = \"s\", cex = 1, las = 1); - - title<-c(\"Wavelet\",\"Variance\",\"Pvalue\",\"Test\"); - print(title); - - for(i in 1:length(names)){ - temp = NULL; - results = NULL; - wave1.dwt = NULL; - - # if data fails formatting check, do something - - print(is.numeric(as.matrix(data)[, i])); - if(!is.numeric(as.matrix(data)[, i])) - stop(\"data must be a numeric vector\"); - - print(length(as.matrix(data)[, i])); - print(is.power2(length(as.matrix(data)[, i]))); - if(!is.power2(length(as.matrix(data)[, i]))) - stop(\"data length must be a power of two\"); - - - J <- wd(as.matrix(data)[, i], filter.number = filter, family=family, bc = bc)\$nlevels; - print(J); - temp <- vector(length = J); - wave1.dwt <- dwt(as.matrix(data)[, i], wf = wf, J, boundary = boundary); - #print(wave1.dwt); - - temp <- wave.variance(wave1.dwt)[-(J+1), 1]; - print(temp); - - #permutations code : - feature1 = NULL; - null = NULL; - var_lower=limit_lower=NULL; - var_upper=limit_upper=NULL; - med = NULL; - - limit_lower = alpha/2*1000; - print(limit_lower); - limit_upper = (1-alpha/2)*1000; - print(limit_upper); - - feature1 = as.matrix(data)[,i]; - for (k in 1:1000) { - nk_1 = NULL; - null.levels = NULL; - var = NULL; - null_wave1 = NULL; - - nk_1 = sample(feature1, length(feature1), replace = FALSE); - null.levels <- wd(nk_1, filter.number = filter,family=family ,bc = bc)\$nlevels; - var <- vector(length = length(null.levels)); - null_wave1 <- dwt(nk_1, wf = wf, J, boundary = boundary); - var<- wave.variance(null_wave1)[-(null.levels+1), 1]; - null= rbind(null, var); - } - null <- apply(null, 2, sort, na.last = TRUE); - var_lower <- null[limit_lower, ]; - var_upper <- null[limit_upper, ]; - med <- (apply(null, 2, median, na.rm = TRUE)); - - # plot - results <- cbind(temp, var_lower, var_upper); - print(results); - matplot(results, type = \"b\", pch = \"*\", lty = 1, col = c(1, 2, 2),xaxt='n',xlab=\"Wavelet Scale\",ylab=\"Wavelet variance\" ); - mtext(names[i], side = 3, line = 0.5, cex = 1); - axis(1, at = 1:J , labels=c(2^(0:(J-1))), las = 3, cex.axis = 1); - - # get pvalues by comparison to null distribution - #out <- (names[i]); - for (m in 1:length(temp)){ - print(paste(\"scale\", m, sep = \" \")); - print(paste(\"var\", temp[m], sep = \" \")); - print(paste(\"med\", med[m], sep = \" \")); - pv = tail =scale = NULL; - scale=2^(m-1); - #out <- c(out, format(temp[m], digits = 3)); - if (temp[m] >= med[m]){ - # R tail test - print(\"R\"); - tail <- \"R\"; - pv <- (length(which(null[, m] >= temp[m])))/(length(na.exclude(null[, m]))); - - } else { - if (temp[m] < med[m]){ - # L tail test - print(\"L\"); - tail <- \"L\"; - pv <- (length(which(null[, m] <= temp[m])))/(length(na.exclude(null[, m]))); - } - } - print(pv); - out<-rbind(out,c(paste(\"Scale\", scale, sep=\"_\"),format(temp[m], digits = 3),pv,tail)); - } - final_pvalue <-rbind(final_pvalue, out); - } - colnames(final_pvalue) <- title; - return(final_pvalue); -}\n"; - -print Rcmd " -# execute -# read in data -data_test = final = NULL; -sub = sub_names = NULL; -data_test <- read.delim(\"$inputFile\",header=FALSE); -pdf(file = \"$pdf\", width = 11, height = 8)\n"; - -for ($x=0;$x<$features_count;$x++){ - $feature=$features[$x]; -print Rcmd " - if ($feature > ncol(data_test)) - stop(\"column $feature doesn't exist\"); - sub<-data_test[,$feature]; - #sub_names <- colnames(data_test); - sub_names<-colnames(data_test)[$feature]; - final <- rbind(final,dwt_var_permut_getMax(sub, sub_names,$alpha));\n"; -} - -print Rcmd " - - dev.off(); - write.table(final, file = \"$pvalue\", sep = \"\\t\", quote = FALSE, row.names = FALSE); - -#eof\n"; - -close Rcmd; -system("R --no-restore --no-save --no-readline < $r_script > $r_script.out"); - -#close the input and output and error files -close(OUTPUT3); -close(OUTPUT2); -close(INPUT); diff -r 7a15159140d1 -r 6c29c7e347e8 execute_dwt_var_perFeature.xml --- a/execute_dwt_var_perFeature.xml Tue Aug 14 10:32:11 2018 -0400 +++ b/execute_dwt_var_perFeature.xml Mon Jul 06 20:34:27 2020 -0400 @@ -1,18 +1,15 @@ - + using Discrete Wavelet Transfoms - perl - r-base r-bitops r-waveslim r-wavethresh - r-rwave - - perl '$__tool_directory__/execute_dwt_var_perFeature.pl' - '$inputFile' - $feature - $alpha + + Rscript --vanilla '$__tool_directory__/execute_dwt_var_perFeature.R' + '$inputFile' + '$feature' + '$alpha' '$outputFile1' '$outputFile2' @@ -23,8 +20,8 @@ - - + +