diff heatmap_advanced.sh @ 6:1f13d304ddbd draft

Uploaded
author insilico-bob
date Thu, 20 Jun 2019 11:36:53 -0400
parents 19382473a76b
children c5489978071a
line wrap: on
line diff
--- a/heatmap_advanced.sh	Thu Nov 08 14:59:57 2018 -0500
+++ b/heatmap_advanced.sh	Thu Jun 20 11:36:53 2019 -0400
@@ -1,39 +1,355 @@
-#echo "1: " $1 
-#echo "2: " $2 
-#echo "3: " $3 
-#echo "4: " $4 
-#echo "5: " $5 
-#echo "6: " $6 
-#echo "7: " $7 
-#echo "8: " $8 
-#echo "9: " $9 
-#echo "10: " ${10} 
-#echo "11: " ${11}  
-#echo "12: " ${12} 
-#echo "13: " ${13} 
-#echo "14: " ${14}  
-#echo "15: " ${15}  
-#echo "16: " ${16}  
-#echo "17: " ${17}  
-#echo "18: " ${18} 
-#echo "19: " ${19} 
-#echo "20: " ${20} 
-#echo "21: " ${21} 
-#echo "22: " ${22} 
-#echo "23: " ${23} 
-#echo "23: " ${24} 
-#echo $1 $2 $3 $4 $5 $6 $7 $8 $9 ${10} ${11} ${12} ${13} ${14} ${15} ${16} ${17} ${18} ${19} ${20} ${21} ${22} ${23}
+#echo "1: " $1  " 2: " $2 " 3: " $3 " 4: " $4  " 5: " $5 " 6: " $6 " 7: " $7 " 8: " $8 " 9: " $9 " 10: " ${10} 
+#echo "11: " ${11} " 12: " ${12} 13: " ${13} 14: " ${14} " 15: " ${15} " 16: " ${16} " 17: " ${17} " 18: " ${18} " 19: " ${19} " 20: " ${20} 
+#echo "21: "${21}" 22: "${22}" 23: "${23}" 24: "${24}" 25: "${25}" 26: "${26}" 27: "${27}" 28: "${28}" 29: "${29}" 30: "${30}
 
-#get tool data and tool install directories
-tooldir=$(cut -d';' -f1 <<< ${12})
-tooldata=$(cut -d';' -f2 <<< ${12})
+#Count total number of parameters, dataLayer parameters, and classification parameters
+parmSize=0
+classSize=0
+dataLayerSize=0
+attribSize=0
+for i in "$@"; do
+	currParm=$(cut -d'|' -f1 <<< $i)
+	parmSize=$((parmSize+1))
+	if [ $currParm = "classification" ]
+	then
+		classSize=$((classSize+1))
+  	fi
+	if [ $currParm = "matrix_files" ]
+	then
+		dataLayerSize=$((dataLayerSize+1))
+  	fi
+	if [ $currParm = "attribute" ]
+	then
+		attribSize=$((attribSize+1))
+  	fi
+done
 
+if [ $dataLayerSize -lt 1 ]
+then
+	noDataLayer="ERROR: No Heat Map Matrices provided.  Please add at least one Heat Map Matrix to your request and try again."
+   	echo $noDataLayer
+   	exit $noDataLayer
+fi
+
+#Get tool data and tool install directories
+tooldir=$1
+tooldata=$2
 #create temp directory for row and col order and dendro files.
 tdir=$tooldata/$(date +%y%m%d%M%S)
-echo $tdir
 mkdir $tdir
+#echo "tdir: "$tdir
+
+#Extract parameters for row and column order and dendro files
+rowOrderFile=$tdir/ROfile.txt
+rowDendroFile=$tdir/RDfile.txt
+colOrderFile=$tdir/COfile.txt
+colDendroFile=$tdir/CDfile.txt
+rowOrderJson='"order_file": "'$rowOrderFile'",'
+rowDendroJson='"dendro_file": "'$rowDendroFile'",'
+colOrderJson='"order_file": "'$colOrderFile'",'
+colDendroJson='"dendro_file": "'$colDendroFile'",'
+
+#BEGIN: Construct JSON for all non-repeating parameters
+parmJson='{'
+rowConfigJson='"row_configuration": {'
+colConfigJson='"col_configuration": {'
+
+ctr=0
+for i in "$@"; do
+	if [ $ctr -gt 1 ]
+	then
+		currParm=$(cut -d'|' -f1 <<< $i)
+		if [ $currParm != "matrix_files" ] && [ $currParm != "row_configuration" ] && [ $currParm != "col_configuration" ] && [ $currParm != "classification" ] && [ $currParm != "attribute" ]
+		then
+			#Parse pipe-delimited parameter parameter
+			parmJson=$parmJson' "'$(cut -d'|' -f1 <<< $i)'":"'$(cut -d'|' -f2 <<< $i)'",'
+	  	fi
+		if [ $currParm = "row_configuration" ]
+		then
+			rowOrder=$(cut -d'|' -f3 <<< $i)
+			rowDistance=$(cut -d'|' -f5 <<< $i)
+			rowAgglomeration=$(cut -d'|' -f7 <<< $i)
+			rowCuts=$(cut -d'|' -f9 <<< $i)
+			rowLabels=$(cut -d'|' -f11 <<< $i)
+			rowDataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$rowLabels'"],'
+			rowCutType=$(cut -d'|' -f16 <<< $i)
+			rowTopItemsJson=''
+			rowTopItems=$(cut -d'|' -f13 <<< $i)
+			if [ $rowTopItems != "None" ] && [ $rowTopItems != "" ]
+			then
+				rowTopItemsJson='"'$(cut -d'|' -f12 <<< $i)'": ['
+				rowTopItems=${rowTopItems//,/'","'}
+				rowTopItemsJson=$rowTopItemsJson'"'$rowTopItems'"],'
+			fi
+			rowCutsJson=''
+			if [ $rowCutType != "none" ]
+			then
+				cutValues=$(cut -d'|' -f15 <<< $i) 
+				if [ $cutValues != "None" ] && [ $cutValues != "0" ]
+				then
+					if [ $rowCutType = "treecuts" ]
+					then
+						rowCutsJson=$rowCutsJson'"tree_cuts": "'$cutValues'",' 
+						rowCutsJson=$rowCutsJson'"cut_width": "5",' 
+					fi
+					if [ $rowCutType = "positional" ]
+					then
+						rowCutErrorVal=0
+						[[ $cutValues != ?(-)+([0-9,]) ]] && rowCutErrorVal=$((rowCutErrorVal+1))
+						if [ $rowCutErrorVal -gt 0 ]
+						then
+	   						echo "GALAXY PARAMETER WARNING: Non-numeric values found for Row Fixed Gap Locations. Ignoring parameter value: "$cutValues
+						else
+							rowCutsJson=$rowCutsJson'"cut_locations": ['$cutValues'],' 
+							rowCutsJson=$rowCutsJson'"cut_width": "5",' 
+						fi
+					fi
+				fi
+			fi
+			rowConfigJson=$rowConfigJson$rowDataTypeJson$rowCutsJson$rowTopItemsJson
+			if [ $rowOrder = 'Hierarchical' ]
+			then
+				rowConfigJson=$rowConfigJson$rowOrderJson$rowDendroJson
+			fi
+			rowConfigJson=$rowConfigJson' "'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'","'$(cut -d'|' -f17 <<< $i)'":"'$(cut -d'|' -f18 <<< $i)'","'$(cut -d'|' -f19 <<< $i)'":"'$(cut -d'|' -f20 <<< $i)'"},'
+	  	fi
+		if [ $currParm = "col_configuration" ]
+		then
+			colOrder=$(cut -d'|' -f3 <<< $i)
+			colDistance=$(cut -d'|' -f5 <<< $i)
+			colAgglomeration=$(cut -d'|' -f7 <<< $i)
+			colCuts=$(cut -d'|' -f9 <<< $i)
+			colLabels=$(cut -d'|' -f11 <<< $i)
+			colDataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$colLabels'"],'
+			colCutType=$(cut -d'|' -f16 <<< $i)
+			colTopItemsJson=''
+			colTopItems=$(cut -d'|' -f13 <<< $i)
+			if [ $colTopItems != "None" ] && [ $colTopItems != "" ]
+			then
+				colTopItemsJson='"'$(cut -d'|' -f12 <<< $i)'": ['
+				colTopItems=${colTopItems//,/'","'}
+				colTopItemsJson=$colTopItemsJson'"'$colTopItems'"],'
+			fi
+			colCutsJson=''
+			if [ $colCutType != "none" ]
+			then
+				cutValues=$(cut -d'|' -f15 <<< $i) 
+				if [ $cutValues != "None" ] && [ $cutValues != "0" ]
+				then
+					if [ $colCutType = "treecuts" ]
+					then
+						colCutsJson=$colCutsJson'"tree_cuts": "'$cutValues'",' 
+						colCutsJson=$colCutsJson'"cut_width": "5",' 
+					fi
+					if [ $colCutType = "positional" ]
+					then
+						colCutErrorVal=0
+						[[ $cutValues != ?(-)+([0-9,]) ]] && colCutErrorVal=$((colCutErrorVal+1))
+						if [ $colCutErrorVal -gt 0 ]
+						then
+	   						echo "GALAXY PARAMETER WARNING: Non-numeric values found for Column Fixed Gap Locations. Ignoring parameter value: "$cutValues
+						else
+							colCutsJson=$colCutsJson'"cut_locations": ['$cutValues'],' 
+							colCutsJson=$colCutsJson'"cut_width": "5",' 
+						fi
+					fi
+				fi
+			fi
+			colConfigJson=$colConfigJson$colDataTypeJson$colCutsJson$colTopItemsJson
+			if [ $colOrder = 'Hierarchical' ]
+			then
+				colConfigJson=$colConfigJson$colOrderJson$colDendroJson
+			fi
+			colConfigJson=$colConfigJson' "'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'","'$(cut -d'|' -f17 <<< $i)'":"'$(cut -d'|' -f18 <<< $i)'","'$(cut -d'|' -f19 <<< $i)'":"'$(cut -d'|' -f20 <<< $i)'"},'
+	  	fi
+	 fi
+	 ctr=$((ctr+1))
+done
+
+#END: Construct JSON for all non-repeating parameters
+#echo "rowOrder: "$rowOrder
+#echo "rowDistance: "$rowDistance
+#echo "rowAgglomeration: "$rowAgglomeration
+#echo "rowCuts: "$rowCuts
+#echo "rowLabels: "$rowLabels
+#echo "ROW CONFIG JSON: "$rowConfigJson
+#echo "colOrder: "$colOrder
+#echo "colDistance: "$colDistance
+#echo "colAgglomeration: "$colAgglomeration
+#echo "colCuts: "$colCuts
+#echo "colLabels: "$colLabels
+#echo "COL CONFIG JSON: "$colConfigJson
+
+#BEGIN: Construct JSON for data layers
+matrixJson='"matrix_files": [ '
+inputMatrix=''
+dataLayerIter=0
+dataLayerNames=''
+for i in "$@"; do
+	currParm=$(cut -d'|' -f1 <<< $i)
+	if [ $currParm = "matrix_files" ]
+	then
+		if [ $dataLayerIter -lt 1 ]		
+		then
+			inputMatrix=$(cut -d'|' -f3 <<< $i)
+		fi
+		currMatrixName=$(cut -d'|' -f5 <<< $i)
+		dataLayerIter=$((dataLayerIter+1))
+  		if [[ $dataLayerNames =~ $currMatrixName ]]
+		then
+   			currMatrixName=$currMatrixName$dataLayerIter
+		fi 
+  		dataLayerNames=$dataLayerNames$currMatrixName
+  		colorPref=$(cut -d'|' -f16 <<< $i)
+  		colorMapJson=''
+  		if [ $colorPref = "defined" ]
+		then
+			#validations to place leading zero on first breakpoint (if necessary)
+			b1=$(cut -d'|' -f20 <<< $i)
+			b1first=$(cut -d'.' -f1 <<< $b1)
+			if [ $b1first = "-" ]
+			then 
+				b1="-0."$(cut -d'.' -f2 <<< $b1)
+			fi
+			if [ "$b1first" = "" ]
+			then 
+				b1="0"$b1
+			fi
+			#validations to place leading zero on second breakpoint (if necessary)
+			b2=$(cut -d'|' -f21 <<< $i)
+			b2first=$(cut -d'.' -f1 <<< $b2)
+			if [ $b2first = "-" ]
+			then 
+				b2="-0."$(cut -d'.' -f2 <<< $b2)
+			fi
+			if [ "$b2first" = "" ]
+			then 
+				b2="0"$b2
+			fi
+			#validations to place leading zero on third breakpoint (if necessary)
+			b3=$(cut -d'|' -f22 <<< $i)
+			b3first=$(cut -d'.' -f1 <<< $b3)
+			if [ $b3first = "-" ]
+			then 
+				b3="-0."$(cut -d'.' -f2 <<< $b3)
+			fi
+			if [ "$b3first" = "" ]
+			then 
+				b3="0"$b3
+			fi
+			#validation to ensure that all entered breakpoints are numeric values
+			regExp='^[+-]?([0-9]+\.?|[0-9]*\.[0-9]+)$'
+		    if [[ $b1 =~ $regExp ]] && [[ $b2 =~ $regExp ]] && [[ $b3 =~ $regExp ]]
+		    then
+				colorMapJson=$colorMapJson'"color_map": {"colors": ["'$(cut -d'|' -f17 <<< $i)'","'$(cut -d'|' -f18 <<< $i)'","'$(cut -d'|' -f19 <<< $i)'"],'
+				colorMapJson=$colorMapJson'"thresholds": ['$b1','$b2','$b3'],'
+				colorMapJson=$colorMapJson'"missing":"'$(cut -d'|' -f23 <<< $i)'"},'
+		    else
+		        echo "GALAXY PARAMETER WARNING: Not all user-defined breakpoints are numbers. Defined breakpoints and colors will be ignored."
+		    fi
+		fi  		
+		#Parse pipe-delimited parameter parameter
+		matrixJson=$matrixJson' {'$colorMapJson'"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$currMatrixName'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'","'$(cut -d'|' -f8 <<< $i)'":"'$(cut -d'|' -f9 <<< $i)'","'$(cut -d'|' -f10 <<< $i)'":"'$(cut -d'|' -f11 <<< $i)'","'$(cut -d'|' -f12 <<< $i)'":"'$(cut -d'|' -f13 <<< $i)'","'$(cut -d'|' -f14 <<< $i)'":"'$(cut -d'|' -f15 <<< $i)'"}'
+		if [ $dataLayerIter -lt $dataLayerSize ]		
+		then
+			matrixJson=$matrixJson','
+		fi
+  	fi
+done
+matrixJson=$matrixJson"],"
+#END: Construct JSON for data layers
+#echo "DATA LAYER JSON: "$matrixJson
+#echo "INPUT MATRIX: "$inputMatrix
+
+#BEGIN: Construct JSON for attributes
+attribJson='"chm_attributes": [ '
+attribIter=0
+for i in "$@"; do
+	currParm=$(cut -d'|' -f1 <<< $i)
+	if [ $currParm = "attribute" ]
+	then
+		attribIter=$((attribIter+1))
+		attribParam=$(cut -d'|' -f2 <<< $i)
+		#Parse pipe-delimited 2-part data layer parameter
+		attribJson=$attribJson' {"'$(cut -d':' -f1 <<< $attribParam)'":"'$(cut -d':' -f2 <<< $attribParam)'"}'
+		if [ $attribIter -lt $attribSize ]		
+		then
+			attribJson=$attribJson','
+		fi
+  	fi
+done
+attribJson=$attribJson'],'
+#END: Construct JSON for attributes
+#echo "ATTRIB JSON: "$attribJson
+
+#BEGIN: Construct JSON for classification files
+classJson='"classification_files": [ '
+colCutClass=''
+rowCutClass=''
+if [ $rowCuts -gt 1 ]
+then
+	rowCutClass='{"name": "Class", "path": "'$tdir'/ROfile.txt.cut","position": "row", "color_map": {"type": "discrete"}, "bar_type": "color_plot"}'
+fi
+
+if [ $colCuts -gt 1 ]
+then
+	if [ $rowCuts -gt 1 ] 
+	then
+		rowCutClass=$rowCutClass','
+	fi
+	colCutClass='{"name": "Class", "path": "'$tdir'/COfile.txt.cut","position": "column", "color_map": {"type": "discrete"}, "bar_type": "color_plot"}'
+	if [ $classSize -gt 0 ] 
+	then
+		colCutClass=$colCutClass','
+	fi
+else
+	if [ $rowCuts -gt 1 ] && [ $classSize -gt 0 ] 
+	then
+		rowCutClass=$rowCutClass','
+	fi
+fi
+
+classJson=$classJson$rowCutClass$colCutClass
+classIter=0
+for i in "$@"; do
+	currParm=$(cut -d'|' -f1 <<< $i)
+	if [ $currParm = "classification" ]
+	then
+		classIter=$((classIter+1))
+		className=$(cut -d'|' -f3 <<< $i)
+		#Parse pipe-delimited 3-part classification bar parameter
+		classJson=$classJson' {"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f8 <<< $i)'":"'$(cut -d'|' -f9 <<< $i)'","'$(cut -d'|' -f12 <<< $i)'":"'$(cut -d'|' -f13 <<< $i)'","'$(cut -d'|' -f14 <<< $i)'":"'$(cut -d'|' -f15 <<< $i)'"'
+		classCat=$(cut -d'|' -f7 <<< $i)
+		classColorType=$(cut -d'_' -f2 <<< $classCat)
+		classJson=$classJson','
+		classHeight=$(cut -d'|' -f11 <<< $i)
+		heightErrorVal=0
+		[[ $classHeight != ?(-)+([0-9]) ]] && heightErrorVal=$((heightErrorVal+1))
+		if [ $heightErrorVal -gt 0 ]
+		then
+			echo 'GALAXY PARAMETER WARNING: Non-numeric values found for covariate bar ('$className') height.  Height value ignored and default of 15 used: '$classHeight
+		else
+			classJson=$classJson'"height": "'$classHeight'",' 
+		fi
+		classJson=$classJson' "position":"'$(cut -d'_' -f1 <<< $classCat)'","color_map": {"type":"'$classColorType'"}}'
+		if [ $classIter -lt $classSize ]		
+		then
+			classJson=$classJson','
+		fi
+  	fi
+done
+classJson=$classJson']'
+#END: Construct JSON for classification files
+#echo "CLASSIFICATION JSON: "$classJson
+
+#Complete construction of Parameter JSON file by adding all JSON sections created above
+parmJson=$parmJson$rowConfigJson$colConfigJson$attribJson$matrixJson$classJson
+parmJson=$parmJson'}'
+#echo "COMPLETED PARAMETER JSON: "$parmJson
+
 #run R to cluster matrix
-output="$(R --slave --vanilla --file=$tooldir/CHM_Advanced.R --args $4 $5 $6 $7 $8 $9 ${10} $tdir/ROfile.txt $tdir/COfile.txt $tdir/RDfile.txt $tdir/CDfile.txt ${13} ${14} ${15} ${16} 2>&1)"
+output="$(R --slave --vanilla --file=$tooldir/CHM_Advanced.R --args $inputMatrix $rowOrder $rowDistance $rowAgglomeration $colOrder $colDistance $colAgglomeration $rowOrderFile $colOrderFile $rowDendroFile $colDendroFile $rowCuts $colCuts $rowLabels $colLabels 2>&1)"
+# Check for errors from R step, log them if found, and exit script
 rc=$?;
 if [ $rc != 0 ]
 then
@@ -41,40 +357,13 @@
   if [ `echo "$output" | grep -c "Inf in foreign function call"` -gt 0 ]
   then
     echo "";
-    echo "Note: This error can occur when there is no variation in a row or column.  Try a different distance measure or remove rows/columns without variation.";
-    echo "This error may also be caused when a covariate file has inadvertently been selected as an Input Matrix.  Check your Input Matrix entry.";
+    echo "NOTE 1: This error can occur when a covariate file has inadvertently been selected as an Input Matrix.  Check your Input Matrix entry.";
+    echo "NOTE 2: This error can occur when there is no variation in a data rows or columns in the input matrix.  Try a different distance measure or remove rows/columns without variation.";
   fi
   exit $rc;
 fi
  
-#there are a variable number of triplicate parameters for classification bars
-count=0
-classifications=''
-
-#if row cut was done, add that autogenerated classification
-if [ ${13} -gt 1 ]
-then
-  classifications="Class $tdir/ROfile.txt.cut row_categorical color_plot"
-fi
-
-#if col cut was done, add that autogenerated classification
-if [ ${14} -gt 1 ]
-then
-  classifications="$classifications Class $tdir/COfile.txt.cut col_categorical  color_plot"
-fi
-
-#now add the user provided classification files 
-for i in "$@"; do
-  if [ $count -gt 20 ]
-  then
-#    echo "class entry: " $i
-    classifications=$classifications' '$i
-  fi
-  count=$((count+1))
-done
-#echo "classifications: " $classifications
-
-#call java program to generate NGCHM viewer files.
-java -jar $tooldir/GalaxyMapGen.jar "${1}" "${2}" "${3}" DataLayer1 $4 linear ${15} ${16} $5 $6 $7 $tdir/ROfile.txt $tdir/RDfile.txt "${17}" "${19}" $8 $9 ${10} $tdir/COfile.txt $tdir/CDfile.txt "${18}" "${20}" ${11} "${21}" $classifications  
+#Call java program to generate NGCHM viewer files.
+java -jar $tooldir/GalaxyMapGen.jar "$parmJson"
 #clean up tempdir
 rm -rf $tdir