Mercurial > repos > md-anderson-bioinformatics > heat_map_creation
changeset 39:436f03b71cf6 draft
Uploaded
author | insilico-bob |
---|---|
date | Thu, 20 Jun 2019 11:31:24 -0400 |
parents | 605ec840a06b |
children | 8f8ab332a050 |
files | CHM.R CHM_Advanced.R heatmap.sh heatmap_advanced.sh mda_advanced_heatmap_gen.xml mda_heatmap_gen.py mda_heatmap_gen.xml |
diffstat | 7 files changed, 1003 insertions(+), 686 deletions(-) [+] |
line wrap: on
line diff
--- a/CHM.R Thu Jun 20 11:30:12 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,130 +0,0 @@ -### This method generates a row and column ordering given an input matrix and ordering methods. -### -### matrixData - numeric matrix -### rowOrderMethod - Hierarchical, Original, Random -### rowDistanceMeasure - For clustering, distance measure. May be: euclidean, binary, manhattan, maximum, canberra, minkowski, or correlation. -### rowAgglomerationMethod - For clustering, agglomeration method. May be: 'average' for Average Linkage, 'complete' for Complete Linkage, -### 'single' for Single Linkage, 'ward', 'mcquitty', 'median', or 'centroid'. -### colOrderMethod -### colDistanceMeasure -### colAgglomerationMethod -### rowOrderFile - output file of order of rows -### rowDendroFile - output file of row dendrogram -### colOrderFile - output file of order of cols -### colDendroFile - output file of col dendrogram -### rowCut - For rows the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off. -### colCut - For columns the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off. - -performDataOrdering<-function(dataFile, rowOrderMethod, rowDistanceMeasure, rowAgglomerationMethod, colOrderMethod, colDistanceMeasure, colAgglomerationMethod,rowOrderFile, colOrderFile, rowDendroFile, colDendroFile, rowCut, colCut) -{ - dataMatrix = read.table(dataFile, header=TRUE, sep = "\t", check.names = FALSE, row.names = 1, as.is=TRUE, na.strings=c("NA","N/A","-","?")) - rowOrder <- createOrdering(dataMatrix, rowOrderMethod, "row", rowDistanceMeasure, rowAgglomerationMethod) - if (rowOrderMethod == "Hierarchical") { - writeHCDataTSVs(rowOrder, rowDendroFile, rowOrderFile) - } - - colOrder <- createOrdering(dataMatrix, colOrderMethod, "col", colDistanceMeasure, colAgglomerationMethod) - if (colOrderMethod == "Hierarchical") { - writeHCDataTSVs(colOrder, colDendroFile, colOrderFile) - writeHCCut(colOrder, colCut, paste(colOrderFile,".cut", sep="")) - } -} - -#creates output files for hclust ordering -writeHCDataTSVs<-function(uDend, outputHCDataFileName, outputHCOrderFileName) -{ - data<-cbind(uDend$merge, uDend$height, deparse.level=0) - colnames(data)<-c("A", "B", "Height") - write.table(data, file = outputHCDataFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) - - data=matrix(,length(uDend$labels),2); - for (i in 1:length(uDend$labels)) { - print(uDend$labels[i]) - data[i,1] = uDend$labels[i]; - data[i,2] = which(uDend$order==i); - } - colnames(data)<-c("Id", "Order") - write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) -} - -#creates a classification file based on user specified cut of dendrogram -writeHCCut<-function(uDend, cutNum, outputCutFileName) -{ - if (cutNum < 2) { - return() - } - print (paste("Writing cut file ", outputCutFileName)) - cut <- cutree(uDend, cutNum); - id <- names(cut); - data=matrix(,length(cut),2); - for (i in 1:length(cut)) { - data[i,1] = id[i]; - data[i,2] = sprintf("Cluster %d", cut[i]); - } - - write.table(data, file = outputCutFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE, col.names = FALSE); -} - - -createOrdering<-function(matrixData, orderMethod, direction, distanceMeasure, agglomerationMethod) -{ - ordering <- NULL - - if (orderMethod == "Hierarchical") - { - - # Compute dendrogram for "Distance Metric" - distVals <- NULL - if(direction=="row") { - if (distanceMeasure == "correlation") { - geneGeneCor <- cor(t(matrixData), use="pairwise") - distVals <- as.dist((1-geneGeneCor)/2) - } else { - distVals <- dist(matrixData, method=distanceMeasure) - } - } else { #column - if (distanceMeasure == "correlation") { - geneGeneCor <- cor(matrixData, use="pairwise") - distVals <- as.dist((1-geneGeneCor)/2) - } else { - distVals <- dist(t(matrixData), method=distanceMeasure) - } - } - -# if (agglomerationMethod == "ward") { -# ordering <- hclust(distVals * distVals, method="ward.D2") -# } else { - ordering <- hclust(distVals, method=agglomerationMethod) -# } - } - else if (orderMethod == "Random") - { - if(direction=="row") { - headerList <- rownames(matrixData) - ordering <- sample(headerList, length(headerList)) - } else { - headerList <- colnames(matrixData) - ordering <- sample(headerList, length(headerList)) - } - } - else if (orderMethod == "Original") - { - if(direction=="row") { - ordering <- rownames(matrixData) - } else { - ordering <- colnames(matrixData) - } - } else { - stop("createOrdering -- failed to find ordering method") - } - return(ordering) -} -### Initialize command line arguments and call performDataOrdering - -options(warn=-1) - -args = commandArgs(TRUE) - -performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11], rowCut=args[12], colCut=args[13]) - -#suppressWarnings(performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11]))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CHM_Advanced.R Thu Jun 20 11:31:24 2019 -0400 @@ -0,0 +1,131 @@ +### This method generates a row and column ordering given an input matrix and ordering methods. +### +### matrixData - numeric matrix +### rowOrderMethod - Hierarchical, Original, Random +### rowDistanceMeasure - For clustering, distance measure. May be: euclidean, binary, manhattan, maximum, canberra, minkowski, or correlation. +### rowAgglomerationMethod - For clustering, agglomeration method. May be: 'average' for Average Linkage, 'complete' for Complete Linkage, +### 'single' for Single Linkage, 'ward', 'mcquitty', 'median', or 'centroid'. +### colOrderMethod +### colDistanceMeasure +### colAgglomerationMethod +### rowOrderFile - output file of order of rows +### rowDendroFile - output file of row dendrogram +### colOrderFile - output file of order of cols +### colDendroFile - output file of col dendrogram +### rowCut - For rows the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off. +### colCut - For columns the number of classifications to automatically generate based on dendrogram into a classification file. 0 for turned off. + +performDataOrdering<-function(dataFile, rowOrderMethod, rowDistanceMeasure, rowAgglomerationMethod, colOrderMethod, colDistanceMeasure, colAgglomerationMethod,rowOrderFile, colOrderFile, rowDendroFile, colDendroFile, rowCut, colCut) +{ + dataMatrix = read.table(dataFile, header=TRUE, sep = "\t", check.names = FALSE, row.names = 1, as.is=TRUE, na.strings=c("NA","N/A","-","?")) + rowOrder <- createOrdering(dataMatrix, rowOrderMethod, "row", rowDistanceMeasure, rowAgglomerationMethod) + if (rowOrderMethod == "Hierarchical") { + writeHCDataTSVs(rowOrder, rowDendroFile, rowOrderFile) + if (rowCut != 0) { + writeHCCut(rowOrder, rowCut, paste(rowOrderFile,".cut", sep="")) + } + } + + colOrder <- createOrdering(dataMatrix, colOrderMethod, "col", colDistanceMeasure, colAgglomerationMethod) + if (colOrderMethod == "Hierarchical") { + writeHCDataTSVs(colOrder, colDendroFile, colOrderFile) + if (colCut != 0) { + writeHCCut(colOrder, colCut, paste(colOrderFile,".cut", sep="")) + } + } +} + +#creates output files for hclust ordering +writeHCDataTSVs<-function(uDend, outputHCDataFileName, outputHCOrderFileName) +{ + data<-cbind(uDend$merge, uDend$height, deparse.level=0) + colnames(data)<-c("A", "B", "Height") + write.table(data, file = outputHCDataFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) + + data=matrix(,length(uDend$labels),2); + for (i in 1:length(uDend$labels)) { + data[i,1] = uDend$labels[i]; + data[i,2] = which(uDend$order==i); + } + colnames(data)<-c("Id", "Order") + write.table(data, file = outputHCOrderFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE) +} + +#creates a classification file based on user specified cut of dendrogram +writeHCCut<-function(uDend, cutNum, outputCutFileName) +{ + print (paste("Writing cut file ", outputCutFileName)) + cut <- cutree(uDend, cutNum); + id <- names(cut); + data=matrix(,length(cut),2); + for (i in 1:length(cut)) { + data[i,1] = id[i]; + data[i,2] = sprintf("Cluster %d", cut[i]); + } + + write.table(data, file = outputCutFileName, append = FALSE, quote = FALSE, sep = "\t", row.names=FALSE, col.names = FALSE); +} + + +createOrdering<-function(matrixData, orderMethod, direction, distanceMeasure, agglomerationMethod) +{ + ordering <- NULL + + if (orderMethod == "Hierarchical") + { + + # Compute dendrogram for "Distance Metric" + distVals <- NULL + if(direction=="row") { + if (distanceMeasure == "correlation") { + geneGeneCor <- cor(t(matrixData), use="pairwise") + distVals <- as.dist((1-geneGeneCor)/2) + } else { + distVals <- dist(matrixData, method=distanceMeasure) + } + } else { #column + if (distanceMeasure == "correlation") { + geneGeneCor <- cor(matrixData, use="pairwise") + distVals <- as.dist((1-geneGeneCor)/2) + } else { + distVals <- dist(t(matrixData), method=distanceMeasure) + } + } + +# if (agglomerationMethod == "ward") { +# ordering <- hclust(distVals * distVals, method="ward.D2") +# } else { + ordering <- hclust(distVals, method=agglomerationMethod) +# } + } + else if (orderMethod == "Random") + { + if(direction=="row") { + headerList <- rownames(matrixData) + ordering <- sample(headerList, length(headerList)) + } else { + headerList <- colnames(matrixData) + ordering <- sample(headerList, length(headerList)) + } + } + else if (orderMethod == "Original") + { + if(direction=="row") { + ordering <- rownames(matrixData) + } else { + ordering <- colnames(matrixData) + } + } else { + stop("createOrdering -- failed to find ordering method") + } + return(ordering) +} +### Initialize command line arguments and call performDataOrdering + +options(warn=-1) + +args = commandArgs(TRUE) + +performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11], rowCut=args[12], colCut=args[13]) + +#suppressWarnings(performDataOrdering(dataFile=args[1], rowOrderMethod=args[2], rowDistanceMeasure=args[3], rowAgglomerationMethod=args[4], colOrderMethod=args[5], colDistanceMeasure=args[6], colAgglomerationMethod=args[7],rowOrderFile=args[8], colOrderFile=args[9], rowDendroFile=args[10], colDendroFile=args[11]))
--- a/heatmap.sh Thu Jun 20 11:30:12 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,147 +0,0 @@ -#echo "1: " $1" 2: " $2" 3: " $3" 4: "$4" 5: "$5 " 6: "$6 "7: "$7" 8: "$8 " 9: "$9" 10: "${10}" 11: "${11} "12: "${12} -#echo " 13: "${13}" 14: "${14}" 15: "${15}" 16: "${16} "17: "${17}" 18: "${18}" 19: "${19}" 20: "${20}" 21: "${21} " 22: "${22}" 23:" ${23} - -#Count total number of parameters and classification parameters -parmSize=0 -classSize=0 -matrixSize=0 -for i in "$@"; do - currParm=$(cut -d'|' -f1 <<< $i) - parmSize=$((parmSize+1)) - if [ $currParm = "classification" ] - then - classSize=$((classSize+1)) - fi -done - -#Get tool data and tool install directories -tooldir=$1 -tooldata=$2 -#create temp directory for row and col order and dendro files. -tdir=$tooldata/$(date +%y%m%d%M%S) -mkdir $tdir -#echo "tdir: "$tdir - -#Extract parameters for row and column order and dendro files -rowOrderFile=$tdir/ROfile.txt -rowDendroFile=$tdir/RDfile.txt -colOrderFile=$tdir/COfile.txt -colDendroFile=$tdir/CDfile.txt -rowOrderJson='"order_file": "'$rowOrderFile'",' -rowDendroJson='"dendro_file": "'$rowDendroFile'",' -colOrderJson='"order_file": "'$colOrderFile'",' -colDendroJson='"dendro_file": "'$colDendroFile'",' - -#BEGIN: Construct JSON for all non-repeating parameters -parmJson='{' -rowConfigJson='"row_configuration": {' -colConfigJson='"col_configuration": {' - -ctr=0 -for i in "$@"; do - if [ $ctr -gt 1 ] - then - currParm=$(cut -d'|' -f1 <<< $i) - if [ $currParm != "matrix_files" ] && [ $currParm != "row_configuration" ] && [ $currParm != "col_configuration" ] && [ $currParm != "classification" ] - then - #Parse pipe-delimited parameter parameter - parmJson=$parmJson' "'$(cut -d'|' -f1 <<< $i)'":"'$(cut -d'|' -f2 <<< $i)'",' - fi - if [ $currParm = "row_configuration" ] - then - rowOrder=$(cut -d'|' -f3 <<< $i) - rowDistance=$(cut -d'|' -f5 <<< $i) - rowAgglomeration=$(cut -d'|' -f7 <<< $i) - rowCuts=$(cut -d'|' -f9 <<< $i) - rowLabels=$(cut -d'|' -f11 <<< $i) - dataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$rowLabels'"]' - if [ $rowOrder = 'Hierarchical' ] - then - rowConfigJson=$rowConfigJson$rowOrderJson$rowDendroJson - fi - rowConfigJson=$rowConfigJson'"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'",'$dataTypeJson'},' - fi - if [ $currParm = "col_configuration" ] - then - colOrder=$(cut -d'|' -f3 <<< $i) - colDistance=$(cut -d'|' -f5 <<< $i) - colAgglomeration=$(cut -d'|' -f7 <<< $i) - colCuts=$(cut -d'|' -f9 <<< $i) - colLabels=$(cut -d'|' -f11 <<< $i) - dataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$colLabels'"]' - if [ $colOrder = 'Hierarchical' ] - then - colConfigJson=$colConfigJson$colOrderJson$colDendroJson - fi - colConfigJson=$colConfigJson'"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'",'$dataTypeJson'},' - fi - fi - ctr=$((ctr+1)) -done -#END: Construct JSON for all non-repeating parameters -#echo "rowCuts: "$rowCuts -#echo "colCuts: "$colCuts -#echo "ROW CONFIG JSON: "$rowConfigJson -#echo "COL CONFIG JSON: "$colConfigJson - -#BEGIN: Construct JSON for data layers -matrixJson='"matrix_files": [ ' -inputMatrix='' -for i in "$@"; do - currParm=$(cut -d'|' -f1 <<< $i) - if [ $currParm = "matrix_files" ] - then - #Parse pipe-delimited parameter parameter - matrixJson=$matrixJson' {"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'"}' - inputMatrix=$(cut -d'|' -f3 <<< $i) - fi -done -matrixJson=$matrixJson"]," -#END: Construct JSON for data layers - -#BEGIN: Construct JSON for classification files -classJson='"classification_files": [ ' -classIter=0 -for i in "$@"; do - currParm=$(cut -d'|' -f1 <<< $i) - if [ $currParm = "classification" ] - then - classIter=$((classIter+1)) - #Parse pipe-delimited 3-part classification bar parameter - classJson=$classJson' {"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'"' - classCat=$(cut -d'|' -f7 <<< $i) - classColorType=$(cut -d'_' -f2 <<< $classCat) - classJson=$classJson',' - classJson=$classJson' "position":"'$(cut -d'_' -f1 <<< $classCat)'","color_map": {"type":"'$classColorType'"}}' - if [ $classIter -lt $classSize ] - then - classJson=$classJson',' - fi - fi -done -classJson=$classJson']' -#END: Construct JSON for classification files - -parmJson=$parmJson$matrixJson$rowConfigJson$colConfigJson$classJson -parmJson=$parmJson'}' -#echo "HEATMAP PARAMETERS JSON: "$parmJson - -#run R to cluster matrix -output="$(R --slave --vanilla --file=$tooldir/CHM.R --args $inputMatrix $rowOrder $rowDistance $rowAgglomeration $colOrder $colDistance $colAgglomeration $rowOrderFile $colOrderFile $rowDendroFile $colDendroFile $rowCuts $colCuts $rowLabels $colLabels 2>&1)" -rc=$?; -if [ $rc != 0 ] -then - echo $output; - if [ `echo "$output" | grep -c "Inf in foreign function call"` -gt 0 ] - then - echo ""; - echo "Note: This error can occur when there is no variation in a row or column. Try a different distance measure or remove rows/columns without variation."; - echo "This error may also be caused when a covariate file has inadvertently been selected as an Input Matrix. Check your Input Matrix entry."; - fi - exit $rc; -fi - -#call java program to generate NGCHM viewer files. -java -jar $tooldir/GalaxyMapGen.jar "$parmJson" -#clean up tempdir -rm -rf $tdir
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/heatmap_advanced.sh Thu Jun 20 11:31:24 2019 -0400 @@ -0,0 +1,369 @@ +#echo "1: " $1 " 2: " $2 " 3: " $3 " 4: " $4 " 5: " $5 " 6: " $6 " 7: " $7 " 8: " $8 " 9: " $9 " 10: " ${10} +#echo "11: " ${11} " 12: " ${12} 13: " ${13} 14: " ${14} " 15: " ${15} " 16: " ${16} " 17: " ${17} " 18: " ${18} " 19: " ${19} " 20: " ${20} +#echo "21: "${21}" 22: "${22}" 23: "${23}" 24: "${24}" 25: "${25}" 26: "${26}" 27: "${27}" 28: "${28}" 29: "${29}" 30: "${30} + +#Count total number of parameters, dataLayer parameters, and classification parameters +parmSize=0 +classSize=0 +dataLayerSize=0 +attribSize=0 +for i in "$@"; do + currParm=$(cut -d'|' -f1 <<< $i) + parmSize=$((parmSize+1)) + if [ $currParm = "classification" ] + then + classSize=$((classSize+1)) + fi + if [ $currParm = "matrix_files" ] + then + dataLayerSize=$((dataLayerSize+1)) + fi + if [ $currParm = "attribute" ] + then + attribSize=$((attribSize+1)) + fi +done + +if [ $dataLayerSize -lt 1 ] +then + noDataLayer="ERROR: No Heat Map Matrices provided. Please add at least one Heat Map Matrix to your request and try again." + echo $noDataLayer + exit $noDataLayer +fi + +#Get tool data and tool install directories +tooldir=$1 +tooldata=$2 +#create temp directory for row and col order and dendro files. +tdir=$tooldata/$(date +%y%m%d%M%S) +mkdir $tdir +#echo "tdir: "$tdir + +#Extract parameters for row and column order and dendro files +rowOrderFile=$tdir/ROfile.txt +rowDendroFile=$tdir/RDfile.txt +colOrderFile=$tdir/COfile.txt +colDendroFile=$tdir/CDfile.txt +rowOrderJson='"order_file": "'$rowOrderFile'",' +rowDendroJson='"dendro_file": "'$rowDendroFile'",' +colOrderJson='"order_file": "'$colOrderFile'",' +colDendroJson='"dendro_file": "'$colDendroFile'",' + +#BEGIN: Construct JSON for all non-repeating parameters +parmJson='{' +rowConfigJson='"row_configuration": {' +colConfigJson='"col_configuration": {' + +ctr=0 +for i in "$@"; do + if [ $ctr -gt 1 ] + then + currParm=$(cut -d'|' -f1 <<< $i) + if [ $currParm != "matrix_files" ] && [ $currParm != "row_configuration" ] && [ $currParm != "col_configuration" ] && [ $currParm != "classification" ] && [ $currParm != "attribute" ] + then + #Parse pipe-delimited parameter parameter + parmJson=$parmJson' "'$(cut -d'|' -f1 <<< $i)'":"'$(cut -d'|' -f2 <<< $i)'",' + fi + if [ $currParm = "row_configuration" ] + then + rowOrder=$(cut -d'|' -f3 <<< $i) + rowDistance=$(cut -d'|' -f5 <<< $i) + rowAgglomeration=$(cut -d'|' -f7 <<< $i) + rowCuts=$(cut -d'|' -f9 <<< $i) + rowLabels=$(cut -d'|' -f11 <<< $i) + rowDataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$rowLabels'"],' + rowCutType=$(cut -d'|' -f16 <<< $i) + rowTopItemsJson='' + rowTopItems=$(cut -d'|' -f13 <<< $i) + if [ $rowTopItems != "None" ] && [ $rowTopItems != "" ] + then + rowTopItemsJson='"'$(cut -d'|' -f12 <<< $i)'": [' + rowTopItems=${rowTopItems//,/'","'} + rowTopItemsJson=$rowTopItemsJson'"'$rowTopItems'"],' + fi + rowCutsJson='' + if [ $rowCutType != "none" ] + then + cutValues=$(cut -d'|' -f15 <<< $i) + if [ $cutValues != "None" ] && [ $cutValues != "0" ] + then + if [ $rowCutType = "treecuts" ] + then + rowCutsJson=$rowCutsJson'"tree_cuts": "'$cutValues'",' + rowCutsJson=$rowCutsJson'"cut_width": "5",' + fi + if [ $rowCutType = "positional" ] + then + rowCutErrorVal=0 + [[ $cutValues != ?(-)+([0-9,]) ]] && rowCutErrorVal=$((rowCutErrorVal+1)) + if [ $rowCutErrorVal -gt 0 ] + then + echo "GALAXY PARAMETER WARNING: Non-numeric values found for Row Fixed Gap Locations. Ignoring parameter value: "$cutValues + else + rowCutsJson=$rowCutsJson'"cut_locations": ['$cutValues'],' + rowCutsJson=$rowCutsJson'"cut_width": "5",' + fi + fi + fi + fi + rowConfigJson=$rowConfigJson$rowDataTypeJson$rowCutsJson$rowTopItemsJson + if [ $rowOrder = 'Hierarchical' ] + then + rowConfigJson=$rowConfigJson$rowOrderJson$rowDendroJson + fi + rowConfigJson=$rowConfigJson' "'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'","'$(cut -d'|' -f17 <<< $i)'":"'$(cut -d'|' -f18 <<< $i)'","'$(cut -d'|' -f19 <<< $i)'":"'$(cut -d'|' -f20 <<< $i)'"},' + fi + if [ $currParm = "col_configuration" ] + then + colOrder=$(cut -d'|' -f3 <<< $i) + colDistance=$(cut -d'|' -f5 <<< $i) + colAgglomeration=$(cut -d'|' -f7 <<< $i) + colCuts=$(cut -d'|' -f9 <<< $i) + colLabels=$(cut -d'|' -f11 <<< $i) + colDataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$colLabels'"],' + colCutType=$(cut -d'|' -f16 <<< $i) + colTopItemsJson='' + colTopItems=$(cut -d'|' -f13 <<< $i) + if [ $colTopItems != "None" ] && [ $colTopItems != "" ] + then + colTopItemsJson='"'$(cut -d'|' -f12 <<< $i)'": [' + colTopItems=${colTopItems//,/'","'} + colTopItemsJson=$colTopItemsJson'"'$colTopItems'"],' + fi + colCutsJson='' + if [ $colCutType != "none" ] + then + cutValues=$(cut -d'|' -f15 <<< $i) + if [ $cutValues != "None" ] && [ $cutValues != "0" ] + then + if [ $colCutType = "treecuts" ] + then + colCutsJson=$colCutsJson'"tree_cuts": "'$cutValues'",' + colCutsJson=$colCutsJson'"cut_width": "5",' + fi + if [ $colCutType = "positional" ] + then + colCutErrorVal=0 + [[ $cutValues != ?(-)+([0-9,]) ]] && colCutErrorVal=$((colCutErrorVal+1)) + if [ $colCutErrorVal -gt 0 ] + then + echo "GALAXY PARAMETER WARNING: Non-numeric values found for Column Fixed Gap Locations. Ignoring parameter value: "$cutValues + else + colCutsJson=$colCutsJson'"cut_locations": ['$cutValues'],' + colCutsJson=$colCutsJson'"cut_width": "5",' + fi + fi + fi + fi + colConfigJson=$colConfigJson$colDataTypeJson$colCutsJson$colTopItemsJson + if [ $colOrder = 'Hierarchical' ] + then + colConfigJson=$colConfigJson$colOrderJson$colDendroJson + fi + colConfigJson=$colConfigJson' "'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'","'$(cut -d'|' -f17 <<< $i)'":"'$(cut -d'|' -f18 <<< $i)'","'$(cut -d'|' -f19 <<< $i)'":"'$(cut -d'|' -f20 <<< $i)'"},' + fi + fi + ctr=$((ctr+1)) +done + +#END: Construct JSON for all non-repeating parameters +#echo "rowOrder: "$rowOrder +#echo "rowDistance: "$rowDistance +#echo "rowAgglomeration: "$rowAgglomeration +#echo "rowCuts: "$rowCuts +#echo "rowLabels: "$rowLabels +#echo "ROW CONFIG JSON: "$rowConfigJson +#echo "colOrder: "$colOrder +#echo "colDistance: "$colDistance +#echo "colAgglomeration: "$colAgglomeration +#echo "colCuts: "$colCuts +#echo "colLabels: "$colLabels +#echo "COL CONFIG JSON: "$colConfigJson + +#BEGIN: Construct JSON for data layers +matrixJson='"matrix_files": [ ' +inputMatrix='' +dataLayerIter=0 +dataLayerNames='' +for i in "$@"; do + currParm=$(cut -d'|' -f1 <<< $i) + if [ $currParm = "matrix_files" ] + then + if [ $dataLayerIter -lt 1 ] + then + inputMatrix=$(cut -d'|' -f3 <<< $i) + fi + currMatrixName=$(cut -d'|' -f5 <<< $i) + dataLayerIter=$((dataLayerIter+1)) + if [[ $dataLayerNames =~ $currMatrixName ]] + then + currMatrixName=$currMatrixName$dataLayerIter + fi + dataLayerNames=$dataLayerNames$currMatrixName + colorPref=$(cut -d'|' -f16 <<< $i) + colorMapJson='' + if [ $colorPref = "defined" ] + then + #validations to place leading zero on first breakpoint (if necessary) + b1=$(cut -d'|' -f20 <<< $i) + b1first=$(cut -d'.' -f1 <<< $b1) + if [ $b1first = "-" ] + then + b1="-0."$(cut -d'.' -f2 <<< $b1) + fi + if [ "$b1first" = "" ] + then + b1="0"$b1 + fi + #validations to place leading zero on second breakpoint (if necessary) + b2=$(cut -d'|' -f21 <<< $i) + b2first=$(cut -d'.' -f1 <<< $b2) + if [ $b2first = "-" ] + then + b2="-0."$(cut -d'.' -f2 <<< $b2) + fi + if [ "$b2first" = "" ] + then + b2="0"$b2 + fi + #validations to place leading zero on third breakpoint (if necessary) + b3=$(cut -d'|' -f22 <<< $i) + b3first=$(cut -d'.' -f1 <<< $b3) + if [ $b3first = "-" ] + then + b3="-0."$(cut -d'.' -f2 <<< $b3) + fi + if [ "$b3first" = "" ] + then + b3="0"$b3 + fi + #validation to ensure that all entered breakpoints are numeric values + regExp='^[+-]?([0-9]+\.?|[0-9]*\.[0-9]+)$' + if [[ $b1 =~ $regExp ]] && [[ $b2 =~ $regExp ]] && [[ $b3 =~ $regExp ]] + then + colorMapJson=$colorMapJson'"color_map": {"colors": ["'$(cut -d'|' -f17 <<< $i)'","'$(cut -d'|' -f18 <<< $i)'","'$(cut -d'|' -f19 <<< $i)'"],' + colorMapJson=$colorMapJson'"thresholds": ['$b1','$b2','$b3'],' + colorMapJson=$colorMapJson'"missing":"'$(cut -d'|' -f23 <<< $i)'"},' + else + echo "GALAXY PARAMETER WARNING: Not all user-defined breakpoints are numbers. Defined breakpoints and colors will be ignored." + fi + fi + #Parse pipe-delimited parameter parameter + matrixJson=$matrixJson' {'$colorMapJson'"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$currMatrixName'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'","'$(cut -d'|' -f8 <<< $i)'":"'$(cut -d'|' -f9 <<< $i)'","'$(cut -d'|' -f10 <<< $i)'":"'$(cut -d'|' -f11 <<< $i)'","'$(cut -d'|' -f12 <<< $i)'":"'$(cut -d'|' -f13 <<< $i)'","'$(cut -d'|' -f14 <<< $i)'":"'$(cut -d'|' -f15 <<< $i)'"}' + if [ $dataLayerIter -lt $dataLayerSize ] + then + matrixJson=$matrixJson',' + fi + fi +done +matrixJson=$matrixJson"]," +#END: Construct JSON for data layers +#echo "DATA LAYER JSON: "$matrixJson +#echo "INPUT MATRIX: "$inputMatrix + +#BEGIN: Construct JSON for attributes +attribJson='"chm_attributes": [ ' +attribIter=0 +for i in "$@"; do + currParm=$(cut -d'|' -f1 <<< $i) + if [ $currParm = "attribute" ] + then + attribIter=$((attribIter+1)) + attribParam=$(cut -d'|' -f2 <<< $i) + #Parse pipe-delimited 2-part data layer parameter + attribJson=$attribJson' {"'$(cut -d':' -f1 <<< $attribParam)'":"'$(cut -d':' -f2 <<< $attribParam)'"}' + if [ $attribIter -lt $attribSize ] + then + attribJson=$attribJson',' + fi + fi +done +attribJson=$attribJson'],' +#END: Construct JSON for attributes +#echo "ATTRIB JSON: "$attribJson + +#BEGIN: Construct JSON for classification files +classJson='"classification_files": [ ' +colCutClass='' +rowCutClass='' +if [ $rowCuts -gt 1 ] +then + rowCutClass='{"name": "Class", "path": "'$tdir'/ROfile.txt.cut","position": "row", "color_map": {"type": "discrete"}, "bar_type": "color_plot"}' +fi + +if [ $colCuts -gt 1 ] +then + if [ $rowCuts -gt 1 ] + then + rowCutClass=$rowCutClass',' + fi + colCutClass='{"name": "Class", "path": "'$tdir'/COfile.txt.cut","position": "column", "color_map": {"type": "discrete"}, "bar_type": "color_plot"}' + if [ $classSize -gt 0 ] + then + colCutClass=$colCutClass',' + fi +else + if [ $rowCuts -gt 1 ] && [ $classSize -gt 0 ] + then + rowCutClass=$rowCutClass',' + fi +fi + +classJson=$classJson$rowCutClass$colCutClass +classIter=0 +for i in "$@"; do + currParm=$(cut -d'|' -f1 <<< $i) + if [ $currParm = "classification" ] + then + classIter=$((classIter+1)) + className=$(cut -d'|' -f3 <<< $i) + #Parse pipe-delimited 3-part classification bar parameter + classJson=$classJson' {"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f8 <<< $i)'":"'$(cut -d'|' -f9 <<< $i)'","'$(cut -d'|' -f12 <<< $i)'":"'$(cut -d'|' -f13 <<< $i)'","'$(cut -d'|' -f14 <<< $i)'":"'$(cut -d'|' -f15 <<< $i)'"' + classCat=$(cut -d'|' -f7 <<< $i) + classColorType=$(cut -d'_' -f2 <<< $classCat) + classJson=$classJson',' + classHeight=$(cut -d'|' -f11 <<< $i) + heightErrorVal=0 + [[ $classHeight != ?(-)+([0-9]) ]] && heightErrorVal=$((heightErrorVal+1)) + if [ $heightErrorVal -gt 0 ] + then + echo 'GALAXY PARAMETER WARNING: Non-numeric values found for covariate bar ('$className') height. Height value ignored and default of 15 used: '$classHeight + else + classJson=$classJson'"height": "'$classHeight'",' + fi + classJson=$classJson' "position":"'$(cut -d'_' -f1 <<< $classCat)'","color_map": {"type":"'$classColorType'"}}' + if [ $classIter -lt $classSize ] + then + classJson=$classJson',' + fi + fi +done +classJson=$classJson']' +#END: Construct JSON for classification files +#echo "CLASSIFICATION JSON: "$classJson + +#Complete construction of Parameter JSON file by adding all JSON sections created above +parmJson=$parmJson$rowConfigJson$colConfigJson$attribJson$matrixJson$classJson +parmJson=$parmJson'}' +#echo "COMPLETED PARAMETER JSON: "$parmJson + +#run R to cluster matrix +output="$(R --slave --vanilla --file=$tooldir/CHM_Advanced.R --args $inputMatrix $rowOrder $rowDistance $rowAgglomeration $colOrder $colDistance $colAgglomeration $rowOrderFile $colOrderFile $rowDendroFile $colDendroFile $rowCuts $colCuts $rowLabels $colLabels 2>&1)" +# Check for errors from R step, log them if found, and exit script +rc=$?; +if [ $rc != 0 ] +then + echo $output; + if [ `echo "$output" | grep -c "Inf in foreign function call"` -gt 0 ] + then + echo ""; + echo "NOTE 1: This error can occur when a covariate file has inadvertently been selected as an Input Matrix. Check your Input Matrix entry."; + echo "NOTE 2: This error can occur when there is no variation in a data rows or columns in the input matrix. Try a different distance measure or remove rows/columns without variation."; + fi + exit $rc; +fi + +#Call java program to generate NGCHM viewer files. +java -jar $tooldir/GalaxyMapGen.jar "$parmJson" +#clean up tempdir +rm -rf $tdir
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mda_advanced_heatmap_gen.xml Thu Jun 20 11:31:24 2019 -0400 @@ -0,0 +1,503 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<tool id="mda_advanced_heatmap_gen" name="Advanced NG-CHM Generator" version="2.3"> + <requirements> + <requirement type="package" version="3.4.1">r-base</requirement> + <requirement type="package" version="8.0.144">openjdk</requirement> + </requirements> + <description> Create Clustered Heat Maps with Advanced Options</description> +<command interpreter="bash" detect_errors="aggressive">$__tool_directory__/heatmap_advanced.sh "$__tool_directory__" "$__tool_data_path__/" "chm_name|Heat_Map_$hmname" "chm_description|$hmdesc" "summary_width|$summaryDisplayWidth" + "row_configuration|order_method|${d_rows.rowOrderMethod}|distance_metric|${d_rows.rowDistanceMeasure}|agglomeration_method|${d_rows.rowAgglomerationMethod}|tree_covar_cuts|${d_rows.rowDendroCut}|data_type|$rowDataType|top_items|$rowTopItems|tree_cuts|${d_rows.rcutrows.rowDendroTreeCut}|${d_rows.rcutrows.raddcuts}|dendro_show|${d_rows.rowDendroShow}|dendro_height|${d_rows.rowDendroHeight}" + "col_configuration|order_method|${d_cols.columnOrderMethod}|distance_metric|${d_cols.columnDistanceMeasure}|agglomeration_method|${d_cols.columnAgglomerationMethod}|tree_covar_cuts|${d_cols.colDendroCut}|data_type|$colDataType|top_items|$colTopItems|tree_cuts|${d_cols.ccutrows.colDendroTreeCut}|${d_cols.ccutrows.caddcuts}|dendro_show|${d_cols.columnDendroShow}|dendro_height|${d_cols.columnDendroHeight}" + #for $attr in $hm_attribute + 'attribute|${attr.attrbute_key}':'${attr.attrbute_value}' + #end for + #for $mx in $matrices + 'matrix_files|path|$mx.dataLayer|name|${mx.dataLayerName}|summary_method|${mx.summarymethod}|selection_color|${mx.dataLayerSelectionColor}|cuts_color|${mx.dataLayerCutsColor}|grid_color|${mx.dataLayerGridColor}|grid_show|${mx.dataLayerGridShow}|${mx.colorsBreaks.setColorsBreaks}|${mx.colorsBreaks.matrixColor1}|${mx.colorsBreaks.matrixColor2}|${mx.colorsBreaks.matrixColor3}|${mx.colorsBreaks.matrixBreak1}|${mx.colorsBreaks.matrixBreak2}|${mx.colorsBreaks.matrixBreak3}|${mx.colorsBreaks.missingColor}' + #end for + #for $op in $operations + 'classification|name|${op.class_name}|path|${op.repeatinput.file_name}|category|${op.cattype.cat}|bar_type|${op.cattype.scatbar.bartype}|height|${op.classHeight}|fg_color|${op.cattype.scatbar.fg_color}|bg_color|${op.cattype.scatbar.bg_color}' + #end for + 'output_location|$output' + </command> + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + <inputs> + <repeat name="matrices" title="Heat Map Matrices"> + <param name="dataLayer" type="data" format="Tabular" label="Input Data Matrix" help="Tab delimited text file with row labels, column labels, and data."/> + <param name="dataLayerName" size="40" type="text" value="Data_Layer_name" label="Data Layer Name" help="Name for data layer (no spaces)."> + <sanitizer> + <valid> + <add preset="string.printable"/> + <remove value="""/> + <remove value="'"/> + <remove value=" "/> + </valid> + </sanitizer> + </param> + <param name="summarymethod" type="select" label="Data Summarization Method" help="For large matrices, the selected method is used to aggregate data values in the summary view."> + <option value="average">Average</option> + <option value="sample">Sample</option> + <option value="mode">Mode</option> + </param> + <conditional name="colorsBreaks"> + <param name="setColorsBreaks" type="select" label="Colors and Breakpoints" help="Select whether to set your own colors and breakpoints or use default values."> + <option value="none">Use System Generated Colors and Breakpoints</option> + <option value="defined">Define Your Own Colors and Breakpoints</option> + </param> + <when value="none"> + <param name="matrixColor1" type="text" size="0" hidden="true" value="0"/> + <param name="matrixBreak1" type="text" size="0" hidden="true" value="0"/> + <param name="matrixColor2" type="text" size="0" hidden="true" value="0"/> + <param name="matrixBreak2" type="text" size="0" hidden="true" value="0"/> + <param name="matrixColor3" type="text" size="0" hidden="true" value="0"/> + <param name="matrixBreak3" type="text" size="0" hidden="true" value="0"/> + <param name="missingColor" type="text" size="0" hidden="true" value="0"/> + </when> + <when value="defined"> + <param name="matrixColor1" type="color" label="Color for Breakpoint One" value="#ff0000"/> + <param name="matrixBreak1" type="text" label="Value for Breakpoint One" value="-1"/> + <param name="matrixColor2" type="color" label="Color for Breakpoint Two" value="#ffffff"/> + <param name="matrixBreak2" type="text" label="Value for Breakpoint Two" value="0"/> + <param name="matrixColor3" type="color" label="Color for Breakpoint Three" value="#0000ff"/> + <param name="matrixBreak3" type="text" label="Value for Breakpoint Three" value="1"/> + <param name="missingColor" type="color" label="Color for Missing Values" value="#000000"/> + </when> + </conditional> + <param name="dataLayerSelectionColor" type="color" label="Color for selection box" value="#00ff38"/> + <param name="dataLayerCutsColor" type="color" label="Color for Heat Map Gaps" value="#ffffff"/> + <param name="dataLayerGridColor" type="color" label="Color for Heat Map Grid" value="#ffffff"/> + <param name="dataLayerGridShow" type="select" label="Show Heat Map Grid"> + <option value="Y">Yes</option> + <option value="N">No</option> + </param> + </repeat> + <param name="hmname" size="40" type="text" value="Heat_Map_name" label="Heat Map Name" help="Short Name for heat map (no spaces)."/> + <sanitizer> + <valid> + <add preset="string.printable"/> + <remove value="""/> + <remove value="'"/> + <remove value=" "/> + </valid> + </sanitizer> + <param name="hmdesc" size="100" optional="true" type="text" value="Heat_Map_description" label="Heat Map Description" help="Longer description of the heat map contents."/> + <sanitizer> + <valid> + <add preset="string.printable"/> + <remove value="""/> + <remove value="'"/> + <remove value=" "/> + </valid> + </sanitizer> + <param name="summaryDisplayWidth" type="select" label="Summary Display Width %" help="Sets the percentage of the viewer display of the summary panel."> + <option value="50">50%</option> + <option value="10">10%</option> + <option value="20">20%</option> + <option value="30">30%</option> + <option value="40">40%</option> + <option value="60">60%</option> + <option value="70">70%</option> + <option value="80">80%</option> + <option value="90">90%</option> + </param> + <conditional name="d_rows"> + <param name="rowOrderMethod" type="select" label="Row ordering method" help="Determine if rows should be clustered, randomized, or remain as is."> + <option value="Hierarchical">Hierarchical Clustering</option> + <option value="Original">Original Order</option> + <option value="Random">Random</option> + </param> + <when value="Hierarchical"> + <param name="rowDistanceMeasure" type="select" label="Row Distance Metric" help="For clustering, select the method of determining distance between rows."> + <option value="euclidean">Euclidean</option> + <!-- <option value="binary">Binary</option> ** breaks dendrogram --> + <option value="manhattan">Manhattan</option> + <option value="maximum">Maximum</option> + <!-- <option value="canberra">Canberra</option> ** breaks dendrogram --> + <option value="minkowski">Minkowski</option> + <!-- <option value="correlation">Correlation</option> ** breaks dendrogram --> + </param> + <param name="rowAgglomerationMethod" type="select" label="Row Clustering Method" help="For clustering, select algorithm for building clusters."> + <option value="average">Average Linkage</option> + <option value="complete">Complete Linkage</option> + <option value="single">Single Linkage</option> + <option value="ward" selected="true">Ward</option> + <option value="mcquitty">Mcquitty</option> + <!-- <option value="median">Median</option> ** breaks dendrogram + <option value="centroid">Centroid</option> ** breaks dendrogram --> + </param> + <param name="rowDendroShow" type="select" label="Show Row Dendrogram" help="For setting the visibility of the row dendrogram."> + <option value="ALL">Summary and Detail</option> + <option value="SUMMARY">Summary Only</option> + <option value="NONE">Hide</option> + </param> + <param name="rowDendroHeight" type="select" label="Row Dendrogram Display Height" help="For adjusting the displayed height of the dendrogram bar."> + <option value="100">100%</option> + <option value="50">50%</option> + <option value="75">75%</option> + <option value="125">125%</option> + <option value="150">150%</option> + <option value="200">200%</option> + </param> + <param name="rowDendroCut" type="select" label="Row Cluster Covariate Bar" help="To generate a row covariate bar based on clusters, select the number of clusters(classes) to use."> + <option value="0" selected="true" >None</option> + <option value="2" >2</option> + <option value="3" >3</option> + <option value="4" >4</option> + <option value="5" >5</option> + <option value="6" >6</option> + <option value="7" >7</option> + <option value="8" >8</option> + <option value="9" >9</option> + <option value="10" >10</option> + </param> + <conditional name="rcutrows"> + <param name="raddcuts" type="select" label="Add row gap(s)" help="To separate portions of the heat map with gaps, select a gap method."> + <option value="none">None</option> + <option value="treecuts">Cluster-Based Gaps</option> + <option value="positional">Fixed Gaps</option> + </param> + <when value="none"> + <param name="rowDendroTreeCut" type="text" size="0" hidden="true" value="0"/> + </when> + <when value="treecuts"> + <param name="rowDendroTreeCut" type="select" label="Cluster-Based Gap" help="Gaps will be placed between clusters. Select the number of clusters." > + <option value="0" selected="true" >None</option> + <option value="2" >2</option> + <option value="3" >3</option> + <option value="4" >4</option> + <option value="5" >5</option> + <option value="6" >6</option> + <option value="7" >7</option> + <option value="8" >8</option> + <option value="9" >9</option> + <option value="10" >10</option> + </param> + </when> + <when value="positional"> + <param name="rowDendroTreeCut" type="text" size="100" value="None" label="Fixed Gap Location(s)" help="Enter a comma delimited list of row numbers where gaps should be created."/> + </when> + </conditional> + </when> + <when value="Original"> + <param name="rowDistanceMeasure" type="text" size="0" hidden="true" value="n/a"/> + <param name="rowAgglomerationMethod" type="text" size="0" hidden="true" value="n/a"/> + <param name="rowDendroCut" type="text" size="0" hidden="true" value="0"/> + <conditional name="rcutrows"> + <param name="raddcuts" type="select" label="Add row gap(s)" help="To separate portions of the heat map with gaps, select a gap method."> + <option value="none">None</option> + <option value="positional">Fixed Gaps</option> + </param> + <when value="none"> + <param name="rowDendroTreeCut" type="text" size="100" value="None" hidden="true"/> + </when> + <when value="positional"> + <param name="rowDendroTreeCut" type="text" size="100" value="None" label="Fixed Gap Location(s)" help="Enter a comma delimited list of row numbers where gaps should be created."/> + </when> + </conditional> + </when> + <when value="Random"> + <param name="rowDistanceMeasure" type="text" size="0" hidden="true" value="n/a"/> + <param name="rowAgglomerationMethod" type="text" size="0" hidden="true" value="n/a"/> + <param name="rowDendroCut" type="text" size="0" hidden="true" value="0"/> + <conditional name="rcutrows"> + <param name="raddcuts" type="select" label="Add row gap(s)" help="To separate portions of the heat map with gaps, select a gap method."> + <option value="none">None</option> + <option value="positional">Fixed Gaps</option> + </param> + <when value="none"> + <param name="rowDendroTreeCut" type="text" size="100" value="None" hidden="true"/> + </when> + <when value="positional"> + <param name="rowDendroTreeCut" type="text" size="100" value="None" label="Fixed Gap Location(s)" help="Enter a comma delimited list of row numbers where gaps should be created."/> + </when> + </conditional> + </when> + </conditional> + <param name="rowTopItems" size="100" type="text" value="None" label="Row Top Items" help="A few rows can be labeled in the full summary heat map. To do so provide a comma delimited list of row labels."/> + <param name="rowDataType" type="select" label="Row Label Type" help="Enable label driven link-outs by specifying the type of row labels."> + <option value="labels" selected="true" >None</option> + <option value="bio.probe.affymetrix" >Affymetrix Probe Id</option> + <option value="bio.feature.agilent" >Agilent Id</option> + <option value="bio.sample.cbioportal" >cBioPortal sample Id</option> + <option value="bio.transcript.ensemble" >Ensemble transcript Id</option> + <option value="bio.gene.entrez" >Gene Entrez Id</option> + <option value="bio.gene.hugo" >Gene HUGO symbol</option> + <option value="bio.go" >Gene Ontology (GO) Id</option> + <option value="bio.geo.acc" >GEO Accession Id</option> + <option value="bio.probe.illumina" >Illumina Probe Id</option> + <option value="bio.probe.infinium" >Infinium Probe Id</option> + <option value="bio.pathway.mdanderson" >MD Anderson pathway Id</option> + <option value="bio.mirna" >miRNA Id</option> + <option value="bio.mirna.mimat" >miRNA MIMAT Id</option> + <option value="bio.pubmed" >Pubmed Id</option> + <option value="bio.pubmed.search" >Pubmed Search Term</option> + <option value="scholar" >Scholarly term</option> + <option value="bio.gene.unigene" >Unigene CId</option> + <option value="bio.protein.uniprot" >UniProt Id</option> + </param> + <conditional name="d_cols"> + <param name="columnOrderMethod" type="select" label="Column ordering method" help="Determine if columns should be clustered, randomized, or remain as is."> + <option value="Hierarchical">Hierarchical Clustering</option> + <option value="Original">Original Order</option> + <option value="Random">Random</option> + </param> + <when value="Hierarchical"> + <param name="columnDistanceMeasure" type="select" label="Column Distance Metric" help="For clustering, select the method of determining distance between columns."> + <option value="euclidean">Euclidean</option> + <!-- <option value="binary">Binary</option> ** breaks dendrogram --> + <option value="manhattan">Manhattan</option> + <option value="maximum">Maximum</option> + <!-- <option value="canberra">Canberra</option> ** breaks dendrogram --> + <option value="minkowski">Minkowski</option> + <!-- <option value="correlation">Correlation</option> ** breaks dendrogram --> + </param> + <param name="columnAgglomerationMethod" type="select" label="Column Clustering Method" help="For clustering, select algorithm for building clusters."> + <option value="average">Average Linkage</option> + <option value="complete">Complete Linkage</option> + <option value="single">Single Linkage</option> + <option value="ward" selected="true">Ward</option> + <option value="mcquitty">Mcquitty</option> + <!-- <option value="median">Median</option> ** breaks dendrogram + <option value="centroid">Centroid</option> ** breaks dendrogram --> + </param> + <param name="columnDendroShow" type="select" label="Show Column Dendrogram" help="For setting the visibility of the column dendrogram."> + <option value="ALL">Summary and Detail</option> + <option value="SUMMARY">Summary Only</option> + <option value="NONE">Hide</option> + </param> + <param name="columnDendroHeight" type="select" label="Column Dendrogram Display Height" help="For adjusting the displayed height of the dendrogram bar."> + <option value="100">100%</option> + <option value="50">50%</option> + <option value="75">75%</option> + <option value="125">125%</option> + <option value="150">150%</option> + <option value="200">200%</option> + </param> + <param name="colDendroCut" type="select" label="Column Cluster Covariate Bar" help="To generate a column covariate bar based on clusters, select the number of clusters(classes) to use."> + <option value="0" selected="true" >None</option> + <option value="2" >2</option> + <option value="3" >3</option> + <option value="4" >4</option> + <option value="5" >5</option> + <option value="6" >6</option> + <option value="7" >7</option> + <option value="8" >8</option> + <option value="9" >9</option> + <option value="10" >10</option> + </param> + <conditional name="ccutrows"> + <param name="caddcuts" type="select" label="Add column gap(s)" help="To separate portions of the heat map with gaps, select a gap method."> + <option value="none">None</option> + <option value="treecuts">Cluster-Based Gaps</option> + <option value="positional">Fixed Gaps</option> + </param> + <when value="none"> + <param name="colDendroTreeCut" type="text" size="0" hidden="true" value="0"/> + </when> + <when value="treecuts"> + <param name="colDendroTreeCut" type="select" label="Cluster-Based Gap" help="Gaps will be placed between clusters. Select the number of clusters."> + <option value="0" selected="true" >None</option> + <option value="2" >2</option> + <option value="3" >3</option> + <option value="4" >4</option> + <option value="5" >5</option> + <option value="6" >6</option> + <option value="7" >7</option> + <option value="8" >8</option> + <option value="9" >9</option> + <option value="10" >10</option> + </param> + </when> + <when value="positional"> + <param name="colDendroTreeCut" type="text" size="100" value="None" label="Fixed Gap Location(s)" help="Enter a comma delimited list of row numbers where gaps should be created."/> + </when> + </conditional> + </when> + <when value="Original"> + <param name="columnDistanceMeasure" type="text" size="0" hidden="true" value="n/a"/> + <param name="columnAgglomerationMethod" type="text" size="0" hidden="true" value="n/a"/> + <param name="colDendroCut" type="text" size="0" hidden="true" value="0"/> + <conditional name="ccutrows"> + <param name="caddcuts" type="select" label="Add column gap(s)" help="To separate portions of the heat map with gaps, select a gap method."> + <option value="none">None</option> + <option value="positional">Fixed Gaps</option> + </param> + <when value="none"> + <param name="colDendroTreeCut" type="text" size="100" value="None" hidden="true"/> + </when> + <when value="positional"> + <param name="colDendroTreeCut" type="text" size="100" value="None" label="Fixed Gap Location(s)" help="Enter a comma delimited list of row numbers where gaps should be created."/> + </when> + </conditional> + </when> + <when value="Random"> + <param name="columnDistanceMeasure" type="text" size="0" hidden="true" value="n/a"/> + <param name="columnAgglomerationMethod" type="text" size="0" hidden="true" value="n/a"/> + <param name="colDendroCut" type="text" size="0" hidden="true" value="0"/> + <conditional name="ccutrows"> + <param name="caddcuts" type="select" label="Add column gap(s)" help="To separate portions of the heat map with gaps, select a gap method."> + <option value="none">None</option> + <option value="positional">Fixed Gaps</option> + </param> + <when value="none"> + <param name="colDendroTreeCut" type="text" size="100" value="None" hidden="true"/> + </when> + <when value="positional"> + <param name="colDendroTreeCut" type="text" size="100" value="None" label="Fixed Gap Location(s)" help="Enter a comma delimited list of row numbers where gaps should be created."/> + </when> + </conditional> + </when> + </conditional> + <param name="colTopItems" size="100" type="text" value="None" label="Column Top Items" help="A few columns can be labeled in the full summary heat map. To do so provide a comma delimited list of column labels."/> + <param name="colDataType" type="select" label="Column Label Type" help="Enable label driven link-outs by specifying the type of column labels."> + <option value="labels" selected="true" >None</option> + <option value="bio.probe.affymetrix" >Affymetrix Probe Id</option> + <option value="bio.feature.agilent" >Agilent Id</option> + <option value="bio.sample.cbioportal" >cBioPortal sample Id</option> + <option value="bio.transcript.ensemble" >Ensemble transcript Id</option> + <option value="bio.gene.entrez" >Gene Entrez Id</option> + <option value="bio.gene.hugo" >Gene HUGO symbol</option> + <option value="bio.go" >Gene Ontology (GO) Id</option> + <option value="bio.geo.acc" >GEO Accession Id</option> + <option value="bio.probe.illumina" >Illumina Probe Id</option> + <option value="bio.probe.infinium" >Infinium Probe Id</option> + <option value="bio.pathway.mdanderson" >MD Anderson pathway Id</option> + <option value="bio.mirna" >miRNA Id</option> + <option value="bio.mirna.mimat" >miRNA MIMAT Id</option> + <option value="bio.pubmed" >Pubmed Id</option> + <option value="bio.pubmed.search" >Pubmed Search Term</option> + <option value="scholar" >Scholarly term</option> + <option value="bio.gene.unigene" >Unigene CId</option> + <option value="bio.protein.uniprot" >UniProt Id</option> + </param> + <repeat name="operations" title="Covariate Bars"> + <param name="class_name" size="25" type="text" value="" label="Covariate Name" help="Covariate heat map display label."> + <sanitizer> + <valid> + <add preset="string.printable"/> + <remove value="""/> + <remove value="'"/> + <remove value=" "/> + </valid> + </sanitizer> + </param> + <param name="repeatinput" type="data" format="Tabular" label="Covariate File" help="Tab delimited text file with row or column label and covariate value on each line."/> + <param name="classHeight" size="10" type="text" value="15" label="Covariate Display Height" help="Set the display height for column covariates and width for row covariates."/> + <conditional name="cattype"> + <param name="cat" type="select" label="Covariate Type" help="Identify the covariate as belonging to rows or columns and containing categorical or continuous values."> + <option value="row_discrete" >Row Categorical</option> + <option value="row_continuous" >Row Continuous</option> + <option value="column_discrete" >Column Categorical</option> + <option value="column_continuous" >Column Continuous</option> + </param> + <when value="row_continuous"> + <conditional name="scatbar"> + <param name="bartype" type="select" label="Display Type"> + <option value="color_plot" >Standard</option> + <option value="bar_plot" >Bar Plot</option> + <option value="scatter_plot" >Scatter Plot</option> + </param> + <when value="color_plot"> + <param name="bg_color" type="text" size="0" hidden="true" value="#ffffff"/> + <param name="fg_color" type="text" size="0" hidden="true" value="#000000"/> + </when> + <when value="bar_plot"> + <param name="bg_color" type="color" label="Color for Bar Plot Background" value="#ffffff"/> + <param name="fg_color" type="color" label="Color for Bar Plot Foreground" value="#000000"/> + </when> + <when value="scatter_plot"> + <param name="bg_color" type="color" label="Color for Scatter Plot Background" value="#ffffff"/> + <param name="fg_color" type="color" label="Color for Scatter Plot Foreground" value="#000000"/> + </when> + </conditional> + </when> + <when value="column_continuous"> + <conditional name="scatbar"> + <param name="bartype" type="select" label="Display Type"> + <option value="color_plot" >Standard</option> + <option value="bar_plot" >Bar Plot</option> + <option value="scatter_plot" >Scatter Plot</option> + </param> + <when value="color_plot"> + <param name="bg_color" type="text" size="0" hidden="true" value="#ffffff"/> + <param name="fg_color" type="text" size="0" hidden="true" value="#000000"/> + </when> + <when value="bar_plot"> + <param name="bg_color" type="color" label="Color for Bar Plot Background" value="#ffffff"/> + <param name="fg_color" type="color" label="Color for Bar Plot Foreground" value="#000000"/> + </when> + <when value="scatter_plot"> + <param name="bg_color" type="color" label="Color for Scatter Plot Background" value="#ffffff"/> + <param name="fg_color" type="color" label="Color for Scatter Plot Foreground" value="#000000"/> + </when> + </conditional> + </when> + <when value="column_discrete"> + <conditional name="scatbar"> + <param name="bartype" type="select" hidden="true"> + <option value="color_plot" >Standard</option> + </param> + <when value="color_plot"> + <param name="bg_color" type="text" size="0" hidden="true" value="#ffffff"/> + <param name="fg_color" type="text" size="0" hidden="true" value="#000000"/> + </when> + </conditional> + </when> + <when value="row_discrete"> + <conditional name="scatbar"> + <param name="bartype" type="select" hidden="true"> + <option value="color_plot" >Standard</option> + </param> + <when value="color_plot"> + <param name="bg_color" type="text" size="0" hidden="true" value="#ffffff"/> + <param name="fg_color" type="text" size="0" hidden="true" value="#000000"/> + </when> + </conditional> + </when> + </conditional> + </repeat> + <repeat name="hm_attribute" title="Heat Map Attributes"> + <param name="attrbute_key" size="50" type="text" value="" label="Heat Map Attribute Key" help="For map level attributes. Enter the key (no spaces)."> + <sanitizer invalid_char="_"> + <valid initial=""> + <add preset="string.letters"/> + <add preset="string.digits"/> + </valid> + <mapping initial=""> + </mapping> + </sanitizer> + </param> + <param name="attrbute_value" size="50" type="text" label="Heat Map Attributes Value" help="For map level attributes. Enter the value (no spaces)."> + <sanitizer invalid_char="_"> + <valid initial=""> + <add preset="string.letters"/> + <add preset="string.digits"/> + </valid> + <mapping initial=""> + </mapping> + </sanitizer> + </param> + </repeat> + </inputs> + <outputs> + <data name="output" label='Heat_Map_$hmname' format="ngchm"/> + </outputs> + <tests> + <test> + <param name="inputmatrix" value="400x400.txt" /> + <param name="hmname" value="testRun" /> + <param name="$hmdesc" value="validateTool" /> + <param name="summarymethod" value="Average" /> + <param name="rowOrderMethod" value="Hierarchical" /> + <param name="rowDistanceMeasure" value="Manhattan" /> + <param name="rowAgglomerationMethod" value="Ward" /> + <param name="columnOrderMethod" value="Hierarchical" /> + <param name="columnDistanceMeasure" value="Manhattan" /> + <param name="columnAgglomerationMethod" value="Ward" /> + <output name="output" file="Galaxy400x400-noCovariates.ngchm" lines_diff="10" /> + + </test> +<!-- galaxy/test-data/ dir where the input and output file that should match tool output will be copied --> + </tests> + </tool>
--- a/mda_heatmap_gen.py Thu Jun 20 11:30:12 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,250 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# python shell program to validate ng-chm heat map input matrix file and covariate file formats before calling java shell -- bob brown - -import subprocess #you must import subprocess so that python can talk to the command line -import sys -import os -import re -#import config -import traceback -#import commons - -#ConfigVals = config.Config("../rppaConf.txt") - -def main(): - - try: - print '\nStarting Heat Map file validation ......' - #print "\nheat map sys args len and values = ",len(sys.argv), str(sys.argv) #, '++',argvals - - - error= False - endCovarParam= len(sys.argv)-2 # IF any ending of loc for covar triplet info - startCovarParam= 17 # beginning loc for covar triplet info - inMatrix= sys.argv[3] - - for i in range( endCovarParam, 15, -3): - if len(sys.argv[i]) > 6: - if sys.argv[i][0:4].find('row_') == 0 or sys.argv[i][0:7].find('column_') == 0: # 0 is match start position - startCovarParam= i-2 - #print "\nHeat map arg 3 and start covariate index on = " ,str(sys.argv[3]),' - ', startCovarParam, ' covar name= ',str(sys.argv[startCovarParam:]) - #else: print '\nCovariate param row or column not found at i', i, str(sys.argv[i]) - - #test inMatrix= "/Users/bobbrown/Desktop/NGCHM-Galaxy-Test-Files/400x400firstRowShift.txt" - #test covarFN= '/Users/bobbrown/Desktop/400x400-column-covariate-continuous-TestingErrors.txt' - #test row_col_cat_contin= 'column_continuous' - #test row_col_cat_contin= 'column_categorical' - #test covarLabel = 'bob test' - #test numCovariates= 1 - - errorInMatrix,inMatrixRowLabels,inMatrixColLabels= ValidateHMInputMatrix(inMatrix) # verify input matrix - - print "\nFirst & last Row labels ", inMatrixRowLabels[0],inMatrixRowLabels[-1]," and Columns ", inMatrixColLabels[0],inMatrixColLabels[-1], " number Rows= ",len(inMatrixRowLabels)," number Columns= ",len(inMatrixColLabels) - - # continue reviewing covariates to catch any errors in any of the input info - if len(inMatrixRowLabels) < 5 or len(inMatrixColLabels) < 5: - errorInMatrix = True - print '\n----ERROR Input matrix has too few columns and rows need to ignore validating covariate files for now' - - elif not errorInMatrix: - print "\n++++ SUCCESS the Input Matrix looks good\n\n" - - i= startCovarParam - while i < (len(sys.argv)-2): # todo verify this works with advances tool is one other 0->n param after this - covarLabel= sys.argv[i] - covarLabel= covarLabel.replace(' ','') - covarFN= sys.argv[i+1] - covarFN= covarFN.replace(' ','') - row_col_cat_contin= sys.argv[i+2] - row_col_cat_contin= row_col_cat_contin.replace(' ','') - i +=3 - - print "\nSTART Validating covariate file with label= ", covarLabel, " and type= ",row_col_cat_contin - - error= ValidateHMCorvarFile(covarLabel, covarFN, row_col_cat_contin,inMatrixRowLabels,inMatrixColLabels) # check covariate files - - if error or errorInMatrix: - print"\n---ERROR issues found in input or covariate files\n " - sys.stderr.write( "\nERROR issues found in input or covariate files see errors in Standard Output\n\n ") - sys.exit(3) - - - print"\n FINISHED -- Validation of the Input Matrix and Covariate files (if any)\n\n" - - #print" next running the clustered heat map generator \n",str(sys.argv[11])+"/heatmap.sh "+ str(sys.argv[1:]) - # p = subprocess.Popen([str(sys.argv[1])+"/heatmap.sh "+ argvals], shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - #p = subprocess.Popen([str(sys.argv[11])+"/heatmap.sh "+ str(sys.argv[1:])], shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - - #retval = p.wait() - #print ' Cluster and Viewer returned\n' - #for line in p.stdout.readlines(): - # print line - -# else: -# sys.stderr.write("\nERROR -- The Heat Map Generator encountered the above errors with the input file(s)\n\n") -# sys.exit(3) # this will error it out :) -# except: -# sys.stderr.write(str(traceback.format_exc())) -# sys.exit(3) # this will error it out :) - except Exception, err: - sys.stderr.write('ERROR: %sn' % str(err)) - - return - -#+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- - -def ValidateHMInputMatrix(inputMatrixPath): # This sub routine ensures that the slide design input by the user matches a slide design on record - - try: - error= True - - inputMatrixPath= inputMatrixPath.replace(' ','') - - inMatrixFH= open( inputMatrixPath, 'rU') - - #print "\nInput matrix path and name ", inputMatrixPath,"\n" - error= False - - countRow= 0 - lenRow1= 0 - lenAllRows= 0 - inMatrixRowLabels= [] - inMatrixColLabels= [] - - for rawRow in inMatrixFH: - countRow +=1 - - rawRow= rawRow.replace('\n','') - eachRow= rawRow.split('\t') - if countRow < 2: print 'Input Matrix start 1 to 10= ',eachRow[:10], '\n' - - if countRow == 1: - lenRow1= len(eachRow) - inMatrixColLabels= eachRow - for j in range(1,lenRow1): - tmp= re.search('[abcdefghijklmnopqrstuvwxyz]',eachRow[j].lower()) - try: - if tmp.group(0) == '': # if doesn't exist then error - tmp= tmp - except Exception as e: - print("\n--+-+- ERROR Column Headers at position "+str(j+1)+" value appears to be non-alphanumeric --"+str(eachRow[j])+"--") - sys.stderr.write("\n--+-+- ERROR Column Headers at position "+str(j+1)+" value appears to be non-alphanumeric --"+str(eachRow[j])+"--") - error= True - - if lenRow1 < 3: # likely is covariate file not input matrix - print"----WARNING Input number of columns= " , str(lenRow1)," is too few likely input matrix is really a covariate file" - SystemError ("----WARNING Input number of columns= " + str(lenRow1)+" is too few likely input matrix is really a covariate file") - #error= True - #sys.err= 2 - elif countRow == 2: - lenAllRows= len(eachRow) - if (lenAllRows == lenRow1) or (lenAllRows == lenRow1+1): #or (lenAllRows- lenRow1 == 0 or 1): - print"Validating Input matrix, number of Labeled Columns = ", str(lenAllRows) - inMatrixRowLabels.append(eachRow[0]) - -# allow other error to occur first -# tmp= re.search('[abcdefghijklmnopqrstuvwxyz]',eachRow[0].lower()) -# try: -# if tmp.group(0) == '': # if doesn't exist then error -# tmp= tmp -# except Exception as e: -# print("\n--+-+- WARNING Row Label at row 2 value appears to be non-alphanumeric --"+str(eachRow[j])+"--") -# sys.stderr.write("\n--+-+- WARNING Row Label at row 2 value appears to be non-alphanumeric --"+str(eachRow[j])+"--") -# #error= True - if (lenAllRows == lenRow1) and (inMatrixColLabels[0]==''): inMatrixColLabels.pop(0) #remove blank first cell - - else: - print( "\n--ERROR Input matrix number columns= "+str(lenRow1)+" in first row and the second row= "+str(lenAllRows)+" mismatch ") - sys.stderr.write( "\n--ERROR Input matrix number columns= "+str(lenRow1)+" in first row and the second row= "+str(lenAllRows)+" mismatch ") - error= True - sys.err= 6 - elif (lenRow1 != len(eachRow) and lenRow1+1 != len(eachRow)): - print ("\n--ERROR Input Row "+ str(countRow)+" number of columns= "+str(len(eachRow))+" is a length mismatch with row 2 length "+str( lenAllRows)) - sys.stderr.write ("\n--ERROR Input Row "+ str(countRow)+" number of columns= "+str(len(eachRow))+" is a length mismatch with row 2 length "+str( lenAllRows)) - error= True - sys.err= 7 - else: - inMatrixRowLabels.append(eachRow[0]) - tmp= re.search('[abcdefghijklmnopqrstuvwxyz]',eachRow[0].lower()) - try: - if tmp.group(0) == '': # if doesn't exist then error - tmp= tmp - except Exception as e: - print"-+-+- WARNING Row Label at row "+str(countRow)+" value appears to be non-alphanumeric --"+str(eachRow[j]) - sys.stderr.write("\n--+-+- WARNING Row Label at row "+str(countRow)+" value appears to be non-alphanumeric "+str(eachRow[j])) - - - if len(inMatrixColLabels) > 0: - if (inMatrixColLabels[-1] =='') or (inMatrixColLabels[-1] =='\n'): inMatrixColLabels.pop() - - inMatrixFH.close() - - #print error, lenAllRows, len(eachRow), eachRow[0] - except: - #inMatrixFH.close() - sys.stderr.write(str(traceback.format_exc())) - error= True - - return error,inMatrixRowLabels,inMatrixColLabels - - #+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- - -def ValidateHMCorvarFile(covarLabel, covariateFilePath, row_col_cat_contin, inMatrixRowLabels,inMatrixColLabels): # This sub routine ensures that the slide design input by the user matches a slide design on record - -# verify -# 1 That covar file labels match the col or row labels 1 to 1 -# 2 That if a continuous covar file that the 2nd field is not all text hard to tell if '-' or 'e exponent' -# 3 That the length of the covar file matches the row or col length of the input matrix - - error= True - try: - - covFH= open( covariateFilePath, 'rU') - countRow= 0 - - error= False - - for rawRow in covFH: - countRow +=1 - rawRow= rawRow.replace('\n','') - eachRow= rawRow.split('\t') - if countRow== 0: print "\nCovariance file info - label ",str(covarLabel)," row/col categorical or continous",row_col_cat_contin," first row ",str(eachrow) - - if len(eachRow) < 2 and countRow > 1: - print("----ERROR Input Row "+str(countRow)+" does not have a label and/or value ") - sys.stderr.write("----ERROR Input Row "+str(countRow)+" does not have a label/or and value") - error= True - sys.err= 8 - #return error - elif len(eachRow) > 1: - tmp= re.search('[abcdefghijklmnopqrstuvwxyz]',eachRow[0].lower()) - try: - if tmp.group(0) == '': # if doesn't exist then error - tmp= tmp - except Exception as e: - print"\n-+-+- WARNING Covariate Label at row "+str(countRow)+" value appears to be non-alphanumeric --", eachRow[0],"--" - sys.stderr.write("\n--+-+- WARNING Row Headers at row "+str(countRow)+" value appears to be non-alphanumeric --"+str(eachRow[0])+"--") - - if not error: - if row_col_cat_contin[-4:] == 'uous': # verify continuous is number-ish - tmp= re.search('[+-.0123456789eE]',eachRow[1]) - try: - if tmp.group(0) == '': - tmp= tmp - except Exception as e: - print("\n-+-+-WARNING Input Row "+str(countRow)+" covariance continuous value appears to be non-numeric --"+ str(eachRow[1])+"--") - sys.stderr.write("\n-+-+-WARNING Input Row "+str(countRow)+" covariance continuous value appears to be non-numeric --"+ str(eachRow[1])+"--") - #error= True - except: - sys.stderr.write(str(traceback.format_exc())) - - covFH.close() - - return error - - -if __name__ == "__main__": - main() - -
--- a/mda_heatmap_gen.xml Thu Jun 20 11:30:12 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,159 +0,0 @@ -<?xml version="1.0" encoding="UTF-8" ?> -<tool id="mda_heatmap_gen" name="NG-CHM Generator" version="2.3"> - <requirements> - <requirement type="package" version="3.4.1">r-base</requirement> - <requirement type="package" version="8.0.144">openjdk</requirement> - </requirements> - <description>Create Clustered Heat Maps</description> - <command interpreter="bash" detect_errors="aggressive">$__tool_directory__/heatmap.sh "$__tool_directory__" "$__tool_data_path__/" "chm_name|Heat_Map_$hmname" "chm_description|$hmdesc" - "matrix_files|path|$inputmatrix|name|datalayer|summary_method|$summarymethod" - "row_configuration|order_method|${d_rows.rowOrderMethod}|distance_metric|${d_rows.rowDistanceMeasure}|agglomeration_method|${d_rows.rowAgglomerationMethod}|tree_covar_cuts|0|data_type|labels" - "col_configuration|order_method|${d_cols.columnOrderMethod}|distance_metric|${d_cols.columnDistanceMeasure}|agglomeration_method|${d_cols.columnAgglomerationMethod}|tree_covar_cuts|0|data_type|labels" - #for $op in $operations - 'classification|name|${op.class_name}|path|${op.repeatinput.file_name}|category|${op.cat}' - #end for - 'output_location|$output' - </command> - <stdio> - <exit_code range="1:" level="fatal" /> - </stdio> - <inputs> - <param name="inputmatrix" type="data" format="Tabular" label="Input Data Matrix" help="Tab delimited text file with row labels, column labels, and data." /> - <param name="hmname" size="40" type="text" value="Heat_Map_name" label="Heat Map Name" help="Short Name for heat map (no spaces)."/> - <sanitizer> - <valid> - <add preset="string.printable"/> - <remove value="""/> - <remove value="'"/> - <remove value=" "/> - </valid> - </sanitizer> - <param name="hmdesc" size="100" optional="true" type="text" value="Heat_Map_description" label="Heat Map Description" help="Longer description of the heat map contents."/> - <sanitizer> - <valid> - <add preset="string.printable"/> - <add value="string.letters"/> - <add value="string.digits"/> - <add value="-"/> - <add value="_"/> - <remove value="""/> - <remove value="'"/> - <remove value=" "/> - </valid> - </sanitizer> - <param name="summarymethod" type="select" label="Data Summarization Method" help="For large matrices, the selected method is used to aggregate data values in the summary view."> - <option value="average">Average</option> - <option value="sample">Sample</option> - <option value="mode">Mode</option> - </param> - <conditional name="d_rows"> - <param name="rowOrderMethod" type="select" label="Row ordering method" help="Determine if rows should be clustered, randomized, or remain as is."> - <option value="Hierarchical">Hierarchical Clustering</option> - <option value="Original">Original Order</option> - <option value="Random">Random</option> - </param> - <when value="Hierarchical"> - <param name="rowDistanceMeasure" type="select" label="Row Distance Metric" help="For clustering, select the method of determining distance between rows"> - <option value="euclidean">Euclidean</option> - <!-- <option value="binary">Binary</option> ** breaks dendrogram --> - <option value="manhattan">Manhattan</option> - <option value="maximum">Maximum</option> - <!-- <option value="canberra">Canberra</option> ** breaks dendrogram --> - <option value="minkowski">Minkowski</option> - <!-- <option value="correlation">Correlation</option> ** breaks dendrogram --> - </param> - <param name="rowAgglomerationMethod" type="select" label="Row Clustering Method" help="For clustering, select algorithm for building clusters."> - <option value="average">Average Linkage</option> - <option value="complete">Complete Linkage</option> - <option value="single">Single Linkage</option> - <option value="ward" selected="true">Ward</option> - <option value="mcquitty">Mcquitty</option> - <!-- <option value="median">Median</option> ** breaks dendrogram - <option value="centroid">Centroid</option> ** breaks dendrogram --> - </param> - </when> - <when value="Original"> - <param name="rowDistanceMeasure" type="text" size="0" hidden="true" value="n/a"/> - <param name="rowAgglomerationMethod" type="text" size="0" hidden="true" value="n/a"/> - </when> - <when value="Random"> - <param name="rowDistanceMeasure" type="text" size="0" hidden="true" value="n/a"/> - <param name="rowAgglomerationMethod" type="text" size="0" hidden="true" value="n/a"/> - </when> - </conditional> - <conditional name="d_cols"> - <param name="columnOrderMethod" type="select" label="Column ordering method" help="Determine if columns should be clustered, randomized, or remain as is."> - <option value="Hierarchical">Hierarchical Clustering</option> - <option value="Original">Original Order</option> - <option value="Random">Random</option> - </param> - <when value="Hierarchical"> - <param name="columnDistanceMeasure" type="select" label="Column Distance Metric" help="For clustering, select the method of determining distance between columns"> - <option value="euclidean">Euclidean</option> - <!-- <option value="binary">Binary</option> ** breaks dendrogram --> - <option value="manhattan">Manhattan</option> - <option value="maximum">Maximum</option> - <!-- <option value="canberra">Canberra</option> ** breaks dendrogram --> - <option value="minkowski">Minkowski</option> - <!-- <option value="correlation">Correlation</option> ** breaks dendrogram --> - </param> - <param name="columnAgglomerationMethod" type="select" label="Column Clustering Method" help="For clustering, select algorithm for building clusters."> - <option value="average">Average Linkage</option> - <option value="complete">Complete Linkage</option> - <option value="single">Single Linkage</option> - <option value="ward" selected="true">Ward</option> - <option value="mcquitty">Mcquitty</option> - <!-- <option value="median">Median</option> ** breaks dendrogram - <option value="centroid">Centroid</option> ** breaks dendrogram --> - </param> - </when> - <when value="Original"> - <param name="columnDistanceMeasure" type="text" size="0" hidden="true" value="n/a"/> - <param name="columnAgglomerationMethod" type="text" size="0" hidden="true" value="n/a"/> - </when> - <when value="Random"> - <param name="columnDistanceMeasure" type="text" size="0" hidden="true" value="n/a"/> - <param name="columnAgglomerationMethod" type="text" size="0" hidden="true" value="n/a"/> - </when> - </conditional> - <repeat name="operations" title="Covariate Bars"> - <param name="class_name" size="25" type="text" value="" label="Covariate Name" help="Covariate heat map display label."> - <sanitizer> - <valid> - <add preset="string.printable"/> - <remove value="""/> - <remove value="'"/> - <remove value=" "/> - </valid> - </sanitizer> - </param> - <param name="repeatinput" type="data" format="Tabular" label="Covariate File" help="Tab delimited text file with row or column label and covariate value on each line."/> - <param name="cat" type="select" label="Axis Covariate Type" help="Identify the covariate as belonging to rows or columns and containing categorical or continuous values."> - <option value="row_discrete" >Row Categorical</option> - <option value="row_continuous" >Row Continuous</option> - <option value="column_discrete" >Column Categorical</option> - <option value="column_continuous" >Column Continuous</option> - </param> - </repeat> - </inputs> - <outputs> - <data name="output" label='Heat_Map_$hmname' format="ngchm"/> - </outputs> - <tests> - <test> - <param name="inputmatrix" value="400x400.txt" /> - <param name="hmname" value="testRun" /> - <param name="$hmdesc" value="validateTool" /> - <param name="summarymethod" value="Average" /> - <param name="rowOrderMethod" value="Hierarchical" /> - <param name="rowDistanceMeasure" value="Manhattan" /> - <param name="rowAgglomerationMethod" value="Ward" /> - <param name="columnOrderMethod" value="Hierarchical" /> - <param name="columnDistanceMeasure" value="Manhattan" /> - <param name="columnAgglomerationMethod" value="Ward" /> - <output name="output" file="Galaxy400x400-noCovariates.ngchm" lines_diff="10" /> - - </test> -<!-- galaxy/test-data/ dir where the input and output file that should match tool output will be copied --> - </tests> - </tool>