Mercurial > repos > devteam > dwt_var_perfeature

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/execute_dwt_var_perFeature.R	Mon Jul 06 20:34:27 2020 -0400
@@ -0,0 +1,147 @@
+#####################################################################
+## plot multiscale wavelet variance
+## create null bands by permuting the original data series
+## generate plots and table of wavelet variance including p-values
+#######################################################################
+options(echo = FALSE)
+library("wavethresh");
+library("waveslim");
+library("bitops");
+
+## to determine if data is properly formatted 2^N observations
+is_power2 <- function(x) {
+    x && !(bitops::bitAnd(x, x - 1));
+}
+
+## dwt : discrete wavelet transform using Haar wavelet filter, simplest wavelet function but later can modify to let user-define the wavelet filter function
+dwt_var_permut_get_max <- function(data, names, alpha, filter = 1, family = "DaubExPhase", bc = "symmetric", method = "kendall", wf = "haar", boundary = "reflection") {
+    title <- NULL;
+    final_pvalue <- NULL;
+    j <- NULL;
+    scale <- NULL;
+    out <- NULL;
+
+    print(class(data));
+    print(names);
+    print(alpha);
+
+    par(mar = c(5, 4, 4, 3), oma = c(4, 4, 3, 2), xaxt = "s", cex = 1, las = 1);
+
+    title <- c("Wavelet", "Variance", "Pvalue", "Test");
+    print(title);
+
+    for (i in seq_len(length(names))) {
+        temp <- NULL;
+        results <- NULL;
+        wave1_dwt <- NULL;
+
+        ## if data fails formatting check, do something
+        print(is.numeric(as.matrix(data)[, i]));
+        if (!is.numeric(as.matrix(data)[, i])) {
+            stop("data must be a numeric vector");
+        }
+        print(length(as.matrix(data)[, i]));
+        print(is_power2(length(as.matrix(data)[, i])));
+        if (!is_power2(length(as.matrix(data)[, i]))) {
+            stop("data length must be a power of two");
+        }
+        j <- wavethresh::wd(as.matrix(data)[, i], filter.number = filter, family = family, bc = bc)$nlevels;
+        print(j);
+        temp <- vector(length = j);
+        wave1_dwt <- waveslim::dwt(as.matrix(data)[, i], wf = wf, j, boundary = boundary);
+
+        temp <- waveslim::wave.variance(wave1_dwt)[- (j + 1), 1];
+        print(temp);
+
+        ##permutations code :
+        feature1 <- NULL;
+        null <- NULL;
+        var_lower <- NULL;
+        limit_lower <- NULL;
+        var_upper <- NULL;
+        limit_upper <- NULL;
+        med <- NULL;
+
+        limit_lower <- alpha / 2 * 1000;
+        print(limit_lower);
+        limit_upper <- (1 - alpha / 2) * 1000;
+        print(limit_upper);
+
+        feature1 <- as.matrix(data)[, i];
+        for (k in 1:1000) {
+            nk_1 <- NULL;
+            null_levels <- NULL;
+            var <- NULL;
+            null_wave1 <- NULL;
+
+            nk_1 <- sample(feature1, length(feature1), replace = FALSE);
+            null_levels <- wavethresh::wd(nk_1, filter.number = filter, family = family, bc = bc)$nlevels;
+            var <- vector(length = length(null_levels));
+            null_wave1 <- waveslim::dwt(nk_1, wf = wf, j, boundary = boundary);
+            var <- waveslim::wave.variance(null_wave1)[- (null_levels + 1), 1];
+            null <- rbind(null, var);
+        }
+        null <- apply(null, 2, sort, na.last = TRUE);
+        var_lower <- null[limit_lower, ];
+        var_upper <- null[limit_upper, ];
+        med <- (apply(null, 2, median, na.rm = TRUE));
+
+        ## plot
+        results <- cbind(temp, var_lower, var_upper);
+        print(results);
+        matplot(results, type = "b", pch = "*", lty = 1, col = c(1, 2, 2), xaxt = "n", xlab = "Wavelet Scale", ylab = "Wavelet variance");
+        mtext(names[i], side = 3, line = 0.5, cex = 1);
+        axis(1, at = 1:j, labels = c(2 ^ (0:(j - 1))), las = 3, cex.axis = 1);
+
+        ## get pvalues by comparison to null distribution
+        for (m in seq_len(length(temp))) {
+            print(paste("scale", m, sep = " "));
+            print(paste("var", temp[m], sep = " "));
+            print(paste("med", med[m], sep = " "));
+            pv <- NULL;
+            tail <- NULL;
+            scale <- NULL;
+            scale <- 2 ^ (m - 1);
+            if (temp[m] >= med[m]) {
+                ## R tail test
+                print("R");
+                tail <- "R";
+                pv <- (length(which(null[, m] >= temp[m]))) / (length(na.exclude(null[, m])));
+            } else {
+                if (temp[m] < med[m]) {
+                    ## L tail test
+                    print("L");
+                    tail <- "L";
+                    pv <- (length(which(null[, m] <= temp[m]))) / (length(na.exclude(null[, m])));
+                }
+            }
+            print(pv);
+            out <- rbind(out, c(paste("Scale", scale, sep = "_"), format(temp[m], digits = 3), pv, tail));
+        }
+        final_pvalue <- rbind(final_pvalue, out);
+    }
+    colnames(final_pvalue) <- title;
+    return(final_pvalue);
+}
+
+## execute
+## read in data
+args <- commandArgs(trailingOnly = TRUE)
+
+data_test <- NULL;
+final <- NULL;
+sub <- NULL;
+sub_names <- NULL;
+data_test <- read.delim(args[1], header = FALSE);
+pdf(file = args[5], width = 11, height = 8)
+for (f in strsplit(args[2], ",")) {
+    f <- as.integer(f)
+    if (f > ncol(data_test))
+        stop(paste("column", f, "doesn't exist"));
+    sub <- data_test[, f];
+    sub_names <- colnames(data_test)[f];
+    final <- rbind(final, dwt_var_permut_get_max(sub, sub_names, as.double(args[3])));
+}
+
+dev.off();
+write.table(final, file = args[4], sep = "\t", quote = FALSE, row.names = FALSE);
--- a/execute_dwt_var_perFeature.pl	Tue Aug 14 10:32:11 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,199 +0,0 @@
-#!/usr/bin/perl -w
-# Author: Erika Kvikstad
-
-use warnings;
-use IO::Handle;
-use POSIX qw(floor ceil);
-
-$usage = "execute_dwt_var_perFeature.pl [TABULAR.in] [FEATURE] [ALPHA] [TABULAR.out] [PDF.out] \n";
-die $usage unless @ARGV == 5;
-
-#get the input arguments
-my $inputFile = $ARGV[0];
-my @features = split(/,/,$ARGV[1]);
-my $features_count = scalar(@features);
-my $alpha = $ARGV[2];
-my $outFile1 = $ARGV[3];
-my $outFile2 = $ARGV[4];
-
-open (INPUT, "<", $inputFile) || die("Could not open file $inputFile \n");
-open (OUTPUT2, ">", $outFile1) || die("Could not open file $outFile1 \n");
-open (OUTPUT3, ">", $outFile2) || die("Could not open file $outFile2 \n");
-#open (ERROR,  ">", "error.txt")  or die ("Could not open file error.txt \n");
-
-# choosing meaningful names for the output files
-$pvalue = $outFile1;
-$pdf = $outFile2;
-
-# write R script
-$r_script = "get_dwt_varPermut.r";
-
-open(Rcmd, ">", "$r_script") or die "Cannot open $r_script \n\n";
-
-print Rcmd "
-	######################################################################
-	# plot multiscale wavelet variance
-	# create null bands by permuting the original data series
-	# generate plots and table of wavelet variance including p-values
-	######################################################################
-	options(echo = FALSE)
-	#library(\"Rwave\");
-	#library(\"wavethresh\");
-	#library(\"waveslim\");
-	# turn off diagnostics for de-bugging only, turn back on for functional tests on test
-	suppressMessages(require(\"Rwave\",quietly=TRUE,warn.conflicts = FALSE));
-	suppressMessages(require(\"wavethresh\",quietly=TRUE,warn.conflicts = FALSE));
-	suppressMessages(require(\"waveslim\",quietly=TRUE,warn.conflicts = FALSE));
-	suppressMessages(require(\"bitops\",quietly=TRUE,warn.conflicts = FALSE));
-
-	# to determine if data is properly formatted 2^N observations
-	is.power2<- function(x){x && !(bitAnd(x,x - 1));}
-
-	# dwt : discrete wavelet transform using Haar wavelet filter, simplest wavelet function but later can modify to let user-define the wavelet filter function
-	dwt_var_permut_getMax <- function(data, names, alpha, filter = 1,family=\"DaubExPhase\", bc = \"symmetric\", method = \"kendall\", wf = \"haar\", boundary = \"reflection\") {
-		max_var = NULL;
-    		matrix = NULL;
-		title = NULL;
-    		final_pvalue = NULL;
-		J = NULL;
-		scale = NULL;
-		out = NULL;
-
-	print(class(data));
-    	print(names);
-	print(alpha);
-
-	par(mar=c(5,4,4,3),oma = c(4, 4, 3, 2), xaxt = \"s\", cex = 1, las = 1);
-
-	title<-c(\"Wavelet\",\"Variance\",\"Pvalue\",\"Test\");
-	print(title);
-
-    	for(i in 1:length(names)){
-		temp = NULL;
-		results = NULL;
-		wave1.dwt = NULL;
-
-		# if data fails formatting check, do something
-
-		print(is.numeric(as.matrix(data)[, i]));
-		if(!is.numeric(as.matrix(data)[, i]))
-			stop(\"data must be a numeric vector\");
-
-		print(length(as.matrix(data)[, i]));
-		print(is.power2(length(as.matrix(data)[, i])));
-		if(!is.power2(length(as.matrix(data)[, i])))
-			stop(\"data length must be a power of two\");
-
-
-    		J <- wd(as.matrix(data)[, i], filter.number = filter, family=family, bc = bc)\$nlevels;
-		print(J);
-            	temp <- vector(length = J);
-               	wave1.dwt <- dwt(as.matrix(data)[, i], wf = wf, J, boundary = boundary);
-		#print(wave1.dwt);
-
-                temp <- wave.variance(wave1.dwt)[-(J+1), 1];
-		print(temp);
-
-                #permutations code :
-                feature1 = NULL;
-		null = NULL;
-		var_lower=limit_lower=NULL;
-		var_upper=limit_upper=NULL;
-		med = NULL;
-
-		limit_lower = alpha/2*1000;
-		print(limit_lower);
-		limit_upper = (1-alpha/2)*1000;
-		print(limit_upper);
-
-		feature1 = as.matrix(data)[,i];
-                for (k in 1:1000) {
-			nk_1 = NULL;
-			null.levels = NULL;
-			var = NULL;
-			null_wave1 = NULL;
-
-                       	nk_1 = sample(feature1, length(feature1), replace = FALSE);
-                       	null.levels <- wd(nk_1, filter.number = filter,family=family ,bc = bc)\$nlevels;
-                       	var <- vector(length = length(null.levels));
-                       	null_wave1 <- dwt(nk_1, wf = wf, J, boundary = boundary);
-                       	var<- wave.variance(null_wave1)[-(null.levels+1), 1];
-                       	null= rbind(null, var);
-               }
-               null <- apply(null, 2, sort, na.last = TRUE);
-               var_lower <- null[limit_lower, ];
-               var_upper <- null[limit_upper, ];
-               med <- (apply(null, 2, median, na.rm = TRUE));
-
-               # plot
-               results <- cbind(temp, var_lower, var_upper);
-		print(results);
-                matplot(results, type = \"b\", pch = \"*\", lty = 1, col = c(1, 2, 2),xaxt='n',xlab=\"Wavelet Scale\",ylab=\"Wavelet variance\" );
-		mtext(names[i], side = 3, line = 0.5, cex = 1);
-		axis(1, at = 1:J , labels=c(2^(0:(J-1))), las = 3, cex.axis = 1);
-
-                # get pvalues by comparison to null distribution
-		#out <- (names[i]);
-                for (m in 1:length(temp)){
-                    	print(paste(\"scale\", m, sep = \" \"));
-                       	print(paste(\"var\", temp[m], sep = \" \"));
-                       	print(paste(\"med\", med[m], sep = \" \"));
-                       	pv = tail =scale = NULL;
-			scale=2^(m-1);
-			#out <- c(out, format(temp[m], digits = 3));
-                       	if (temp[m] >= med[m]){
-                       		# R tail test
-                           	print(\"R\");
-	                       	tail <- \"R\";
-                            	pv <- (length(which(null[, m] >= temp[m])))/(length(na.exclude(null[, m])));
-
-                       	} else {
-                       		if (temp[m] < med[m]){
-                               		# L tail test
-                               		print(\"L\");
-	                            	tail <- \"L\";
-                                	pv <- (length(which(null[, m] <= temp[m])))/(length(na.exclude(null[, m])));
-                        	}
-			}
-			print(pv);
-			out<-rbind(out,c(paste(\"Scale\", scale, sep=\"_\"),format(temp[m], digits = 3),pv,tail));
-                }
-		final_pvalue <-rbind(final_pvalue, out);
-  	}
-	colnames(final_pvalue) <- title;
-    	return(final_pvalue);
-}\n";
-
-print Rcmd "
-# execute
-# read in data
-data_test = final = NULL;
-sub = sub_names = NULL;
-data_test <- read.delim(\"$inputFile\",header=FALSE);
-pdf(file = \"$pdf\", width = 11, height = 8)\n";
-
-for ($x=0;$x<$features_count;$x++){
-	$feature=$features[$x];
-print Rcmd "
-	if ($feature > ncol(data_test))
-		stop(\"column $feature doesn't exist\");
-	sub<-data_test[,$feature];
-	#sub_names <- colnames(data_test);
-	sub_names<-colnames(data_test)[$feature];
-	final <- rbind(final,dwt_var_permut_getMax(sub, sub_names,$alpha));\n";
-}
-
-print Rcmd "
-
-	dev.off();
-	write.table(final, file = \"$pvalue\", sep = \"\\t\", quote = FALSE, row.names = FALSE);
-
-#eof\n";
-
-close Rcmd;
-system("R --no-restore --no-save --no-readline < $r_script > $r_script.out");
-
-#close the input and output and error files
-close(OUTPUT3);
-close(OUTPUT2);
-close(INPUT);
--- a/execute_dwt_var_perFeature.xml	Tue Aug 14 10:32:11 2018 -0400
+++ b/execute_dwt_var_perFeature.xml	Mon Jul 06 20:34:27 2020 -0400
@@ -1,18 +1,15 @@
-<tool id="dwt_var1" name="Wavelet variance" version="1.0.1">
+<tool id="dwt_var1" name="Wavelet variance" version="1.0.2">
     <description>using Discrete Wavelet Transfoms</description>
     <requirements>
-        <requirement type="package" version="5.26.2.1">perl</requirement>
-        <requirement type="package" version="3.4.1">r-base</requirement>
         <requirement type="package" version="1.0_6">r-bitops</requirement>
         <requirement type="package" version="1.7.5">r-waveslim</requirement>
         <requirement type="package" version="4.6.8">r-wavethresh</requirement>
-        <requirement type="package" version="2.4_5">r-rwave</requirement>
     </requirements>
-    <command>
-      perl '$__tool_directory__/execute_dwt_var_perFeature.pl'
-      '$inputFile'
-        $feature
-        $alpha
+    <command detect_errors="exit_code">
+      Rscript --vanilla '$__tool_directory__/execute_dwt_var_perFeature.R'
+        '$inputFile'
+        '$feature'
+        '$alpha'
         '$outputFile1'
         '$outputFile2'
     </command>
@@ -23,8 +20,8 @@
         <param name="alpha" type="float" value="0.05" label="alpha (significance level)" />
     </inputs>
     <outputs>
-        <data format="tabular" name="outputFile1"/>
-        <data format="pdf" name="outputFile2"/>
+        <data format="tabular" name="outputFile1" label="${tool.name} on ${on_string}: statistics"/>
+        <data format="pdf" name="outputFile2" label="${tool.name} on ${on_string}: pdf"/>
     </outputs>
     <tests>
         <test>