view heatmap_advanced.sh @ 6:1f13d304ddbd draft

Uploaded
author insilico-bob
date Thu, 20 Jun 2019 11:36:53 -0400
parents 19382473a76b
children c5489978071a
line wrap: on
line source

#echo "1: " $1  " 2: " $2 " 3: " $3 " 4: " $4  " 5: " $5 " 6: " $6 " 7: " $7 " 8: " $8 " 9: " $9 " 10: " ${10} 
#echo "11: " ${11} " 12: " ${12} 13: " ${13} 14: " ${14} " 15: " ${15} " 16: " ${16} " 17: " ${17} " 18: " ${18} " 19: " ${19} " 20: " ${20} 
#echo "21: "${21}" 22: "${22}" 23: "${23}" 24: "${24}" 25: "${25}" 26: "${26}" 27: "${27}" 28: "${28}" 29: "${29}" 30: "${30}

#Count total number of parameters, dataLayer parameters, and classification parameters
parmSize=0
classSize=0
dataLayerSize=0
attribSize=0
for i in "$@"; do
	currParm=$(cut -d'|' -f1 <<< $i)
	parmSize=$((parmSize+1))
	if [ $currParm = "classification" ]
	then
		classSize=$((classSize+1))
  	fi
	if [ $currParm = "matrix_files" ]
	then
		dataLayerSize=$((dataLayerSize+1))
  	fi
	if [ $currParm = "attribute" ]
	then
		attribSize=$((attribSize+1))
  	fi
done

if [ $dataLayerSize -lt 1 ]
then
	noDataLayer="ERROR: No Heat Map Matrices provided.  Please add at least one Heat Map Matrix to your request and try again."
   	echo $noDataLayer
   	exit $noDataLayer
fi

#Get tool data and tool install directories
tooldir=$1
tooldata=$2
#create temp directory for row and col order and dendro files.
tdir=$tooldata/$(date +%y%m%d%M%S)
mkdir $tdir
#echo "tdir: "$tdir

#Extract parameters for row and column order and dendro files
rowOrderFile=$tdir/ROfile.txt
rowDendroFile=$tdir/RDfile.txt
colOrderFile=$tdir/COfile.txt
colDendroFile=$tdir/CDfile.txt
rowOrderJson='"order_file": "'$rowOrderFile'",'
rowDendroJson='"dendro_file": "'$rowDendroFile'",'
colOrderJson='"order_file": "'$colOrderFile'",'
colDendroJson='"dendro_file": "'$colDendroFile'",'

#BEGIN: Construct JSON for all non-repeating parameters
parmJson='{'
rowConfigJson='"row_configuration": {'
colConfigJson='"col_configuration": {'

ctr=0
for i in "$@"; do
	if [ $ctr -gt 1 ]
	then
		currParm=$(cut -d'|' -f1 <<< $i)
		if [ $currParm != "matrix_files" ] && [ $currParm != "row_configuration" ] && [ $currParm != "col_configuration" ] && [ $currParm != "classification" ] && [ $currParm != "attribute" ]
		then
			#Parse pipe-delimited parameter parameter
			parmJson=$parmJson' "'$(cut -d'|' -f1 <<< $i)'":"'$(cut -d'|' -f2 <<< $i)'",'
	  	fi
		if [ $currParm = "row_configuration" ]
		then
			rowOrder=$(cut -d'|' -f3 <<< $i)
			rowDistance=$(cut -d'|' -f5 <<< $i)
			rowAgglomeration=$(cut -d'|' -f7 <<< $i)
			rowCuts=$(cut -d'|' -f9 <<< $i)
			rowLabels=$(cut -d'|' -f11 <<< $i)
			rowDataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$rowLabels'"],'
			rowCutType=$(cut -d'|' -f16 <<< $i)
			rowTopItemsJson=''
			rowTopItems=$(cut -d'|' -f13 <<< $i)
			if [ $rowTopItems != "None" ] && [ $rowTopItems != "" ]
			then
				rowTopItemsJson='"'$(cut -d'|' -f12 <<< $i)'": ['
				rowTopItems=${rowTopItems//,/'","'}
				rowTopItemsJson=$rowTopItemsJson'"'$rowTopItems'"],'
			fi
			rowCutsJson=''
			if [ $rowCutType != "none" ]
			then
				cutValues=$(cut -d'|' -f15 <<< $i) 
				if [ $cutValues != "None" ] && [ $cutValues != "0" ]
				then
					if [ $rowCutType = "treecuts" ]
					then
						rowCutsJson=$rowCutsJson'"tree_cuts": "'$cutValues'",' 
						rowCutsJson=$rowCutsJson'"cut_width": "5",' 
					fi
					if [ $rowCutType = "positional" ]
					then
						rowCutErrorVal=0
						[[ $cutValues != ?(-)+([0-9,]) ]] && rowCutErrorVal=$((rowCutErrorVal+1))
						if [ $rowCutErrorVal -gt 0 ]
						then
	   						echo "GALAXY PARAMETER WARNING: Non-numeric values found for Row Fixed Gap Locations. Ignoring parameter value: "$cutValues
						else
							rowCutsJson=$rowCutsJson'"cut_locations": ['$cutValues'],' 
							rowCutsJson=$rowCutsJson'"cut_width": "5",' 
						fi
					fi
				fi
			fi
			rowConfigJson=$rowConfigJson$rowDataTypeJson$rowCutsJson$rowTopItemsJson
			if [ $rowOrder = 'Hierarchical' ]
			then
				rowConfigJson=$rowConfigJson$rowOrderJson$rowDendroJson
			fi
			rowConfigJson=$rowConfigJson' "'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'","'$(cut -d'|' -f17 <<< $i)'":"'$(cut -d'|' -f18 <<< $i)'","'$(cut -d'|' -f19 <<< $i)'":"'$(cut -d'|' -f20 <<< $i)'"},'
	  	fi
		if [ $currParm = "col_configuration" ]
		then
			colOrder=$(cut -d'|' -f3 <<< $i)
			colDistance=$(cut -d'|' -f5 <<< $i)
			colAgglomeration=$(cut -d'|' -f7 <<< $i)
			colCuts=$(cut -d'|' -f9 <<< $i)
			colLabels=$(cut -d'|' -f11 <<< $i)
			colDataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$colLabels'"],'
			colCutType=$(cut -d'|' -f16 <<< $i)
			colTopItemsJson=''
			colTopItems=$(cut -d'|' -f13 <<< $i)
			if [ $colTopItems != "None" ] && [ $colTopItems != "" ]
			then
				colTopItemsJson='"'$(cut -d'|' -f12 <<< $i)'": ['
				colTopItems=${colTopItems//,/'","'}
				colTopItemsJson=$colTopItemsJson'"'$colTopItems'"],'
			fi
			colCutsJson=''
			if [ $colCutType != "none" ]
			then
				cutValues=$(cut -d'|' -f15 <<< $i) 
				if [ $cutValues != "None" ] && [ $cutValues != "0" ]
				then
					if [ $colCutType = "treecuts" ]
					then
						colCutsJson=$colCutsJson'"tree_cuts": "'$cutValues'",' 
						colCutsJson=$colCutsJson'"cut_width": "5",' 
					fi
					if [ $colCutType = "positional" ]
					then
						colCutErrorVal=0
						[[ $cutValues != ?(-)+([0-9,]) ]] && colCutErrorVal=$((colCutErrorVal+1))
						if [ $colCutErrorVal -gt 0 ]
						then
	   						echo "GALAXY PARAMETER WARNING: Non-numeric values found for Column Fixed Gap Locations. Ignoring parameter value: "$cutValues
						else
							colCutsJson=$colCutsJson'"cut_locations": ['$cutValues'],' 
							colCutsJson=$colCutsJson'"cut_width": "5",' 
						fi
					fi
				fi
			fi
			colConfigJson=$colConfigJson$colDataTypeJson$colCutsJson$colTopItemsJson
			if [ $colOrder = 'Hierarchical' ]
			then
				colConfigJson=$colConfigJson$colOrderJson$colDendroJson
			fi
			colConfigJson=$colConfigJson' "'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'","'$(cut -d'|' -f17 <<< $i)'":"'$(cut -d'|' -f18 <<< $i)'","'$(cut -d'|' -f19 <<< $i)'":"'$(cut -d'|' -f20 <<< $i)'"},'
	  	fi
	 fi
	 ctr=$((ctr+1))
done

#END: Construct JSON for all non-repeating parameters
#echo "rowOrder: "$rowOrder
#echo "rowDistance: "$rowDistance
#echo "rowAgglomeration: "$rowAgglomeration
#echo "rowCuts: "$rowCuts
#echo "rowLabels: "$rowLabels
#echo "ROW CONFIG JSON: "$rowConfigJson
#echo "colOrder: "$colOrder
#echo "colDistance: "$colDistance
#echo "colAgglomeration: "$colAgglomeration
#echo "colCuts: "$colCuts
#echo "colLabels: "$colLabels
#echo "COL CONFIG JSON: "$colConfigJson

#BEGIN: Construct JSON for data layers
matrixJson='"matrix_files": [ '
inputMatrix=''
dataLayerIter=0
dataLayerNames=''
for i in "$@"; do
	currParm=$(cut -d'|' -f1 <<< $i)
	if [ $currParm = "matrix_files" ]
	then
		if [ $dataLayerIter -lt 1 ]		
		then
			inputMatrix=$(cut -d'|' -f3 <<< $i)
		fi
		currMatrixName=$(cut -d'|' -f5 <<< $i)
		dataLayerIter=$((dataLayerIter+1))
  		if [[ $dataLayerNames =~ $currMatrixName ]]
		then
   			currMatrixName=$currMatrixName$dataLayerIter
		fi 
  		dataLayerNames=$dataLayerNames$currMatrixName
  		colorPref=$(cut -d'|' -f16 <<< $i)
  		colorMapJson=''
  		if [ $colorPref = "defined" ]
		then
			#validations to place leading zero on first breakpoint (if necessary)
			b1=$(cut -d'|' -f20 <<< $i)
			b1first=$(cut -d'.' -f1 <<< $b1)
			if [ $b1first = "-" ]
			then 
				b1="-0."$(cut -d'.' -f2 <<< $b1)
			fi
			if [ "$b1first" = "" ]
			then 
				b1="0"$b1
			fi
			#validations to place leading zero on second breakpoint (if necessary)
			b2=$(cut -d'|' -f21 <<< $i)
			b2first=$(cut -d'.' -f1 <<< $b2)
			if [ $b2first = "-" ]
			then 
				b2="-0."$(cut -d'.' -f2 <<< $b2)
			fi
			if [ "$b2first" = "" ]
			then 
				b2="0"$b2
			fi
			#validations to place leading zero on third breakpoint (if necessary)
			b3=$(cut -d'|' -f22 <<< $i)
			b3first=$(cut -d'.' -f1 <<< $b3)
			if [ $b3first = "-" ]
			then 
				b3="-0."$(cut -d'.' -f2 <<< $b3)
			fi
			if [ "$b3first" = "" ]
			then 
				b3="0"$b3
			fi
			#validation to ensure that all entered breakpoints are numeric values
			regExp='^[+-]?([0-9]+\.?|[0-9]*\.[0-9]+)$'
		    if [[ $b1 =~ $regExp ]] && [[ $b2 =~ $regExp ]] && [[ $b3 =~ $regExp ]]
		    then
				colorMapJson=$colorMapJson'"color_map": {"colors": ["'$(cut -d'|' -f17 <<< $i)'","'$(cut -d'|' -f18 <<< $i)'","'$(cut -d'|' -f19 <<< $i)'"],'
				colorMapJson=$colorMapJson'"thresholds": ['$b1','$b2','$b3'],'
				colorMapJson=$colorMapJson'"missing":"'$(cut -d'|' -f23 <<< $i)'"},'
		    else
		        echo "GALAXY PARAMETER WARNING: Not all user-defined breakpoints are numbers. Defined breakpoints and colors will be ignored."
		    fi
		fi  		
		#Parse pipe-delimited parameter parameter
		matrixJson=$matrixJson' {'$colorMapJson'"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$currMatrixName'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'","'$(cut -d'|' -f8 <<< $i)'":"'$(cut -d'|' -f9 <<< $i)'","'$(cut -d'|' -f10 <<< $i)'":"'$(cut -d'|' -f11 <<< $i)'","'$(cut -d'|' -f12 <<< $i)'":"'$(cut -d'|' -f13 <<< $i)'","'$(cut -d'|' -f14 <<< $i)'":"'$(cut -d'|' -f15 <<< $i)'"}'
		if [ $dataLayerIter -lt $dataLayerSize ]		
		then
			matrixJson=$matrixJson','
		fi
  	fi
done
matrixJson=$matrixJson"],"
#END: Construct JSON for data layers
#echo "DATA LAYER JSON: "$matrixJson
#echo "INPUT MATRIX: "$inputMatrix

#BEGIN: Construct JSON for attributes
attribJson='"chm_attributes": [ '
attribIter=0
for i in "$@"; do
	currParm=$(cut -d'|' -f1 <<< $i)
	if [ $currParm = "attribute" ]
	then
		attribIter=$((attribIter+1))
		attribParam=$(cut -d'|' -f2 <<< $i)
		#Parse pipe-delimited 2-part data layer parameter
		attribJson=$attribJson' {"'$(cut -d':' -f1 <<< $attribParam)'":"'$(cut -d':' -f2 <<< $attribParam)'"}'
		if [ $attribIter -lt $attribSize ]		
		then
			attribJson=$attribJson','
		fi
  	fi
done
attribJson=$attribJson'],'
#END: Construct JSON for attributes
#echo "ATTRIB JSON: "$attribJson

#BEGIN: Construct JSON for classification files
classJson='"classification_files": [ '
colCutClass=''
rowCutClass=''
if [ $rowCuts -gt 1 ]
then
	rowCutClass='{"name": "Class", "path": "'$tdir'/ROfile.txt.cut","position": "row", "color_map": {"type": "discrete"}, "bar_type": "color_plot"}'
fi

if [ $colCuts -gt 1 ]
then
	if [ $rowCuts -gt 1 ] 
	then
		rowCutClass=$rowCutClass','
	fi
	colCutClass='{"name": "Class", "path": "'$tdir'/COfile.txt.cut","position": "column", "color_map": {"type": "discrete"}, "bar_type": "color_plot"}'
	if [ $classSize -gt 0 ] 
	then
		colCutClass=$colCutClass','
	fi
else
	if [ $rowCuts -gt 1 ] && [ $classSize -gt 0 ] 
	then
		rowCutClass=$rowCutClass','
	fi
fi

classJson=$classJson$rowCutClass$colCutClass
classIter=0
for i in "$@"; do
	currParm=$(cut -d'|' -f1 <<< $i)
	if [ $currParm = "classification" ]
	then
		classIter=$((classIter+1))
		className=$(cut -d'|' -f3 <<< $i)
		#Parse pipe-delimited 3-part classification bar parameter
		classJson=$classJson' {"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f8 <<< $i)'":"'$(cut -d'|' -f9 <<< $i)'","'$(cut -d'|' -f12 <<< $i)'":"'$(cut -d'|' -f13 <<< $i)'","'$(cut -d'|' -f14 <<< $i)'":"'$(cut -d'|' -f15 <<< $i)'"'
		classCat=$(cut -d'|' -f7 <<< $i)
		classColorType=$(cut -d'_' -f2 <<< $classCat)
		classJson=$classJson','
		classHeight=$(cut -d'|' -f11 <<< $i)
		heightErrorVal=0
		[[ $classHeight != ?(-)+([0-9]) ]] && heightErrorVal=$((heightErrorVal+1))
		if [ $heightErrorVal -gt 0 ]
		then
			echo 'GALAXY PARAMETER WARNING: Non-numeric values found for covariate bar ('$className') height.  Height value ignored and default of 15 used: '$classHeight
		else
			classJson=$classJson'"height": "'$classHeight'",' 
		fi
		classJson=$classJson' "position":"'$(cut -d'_' -f1 <<< $classCat)'","color_map": {"type":"'$classColorType'"}}'
		if [ $classIter -lt $classSize ]		
		then
			classJson=$classJson','
		fi
  	fi
done
classJson=$classJson']'
#END: Construct JSON for classification files
#echo "CLASSIFICATION JSON: "$classJson

#Complete construction of Parameter JSON file by adding all JSON sections created above
parmJson=$parmJson$rowConfigJson$colConfigJson$attribJson$matrixJson$classJson
parmJson=$parmJson'}'
#echo "COMPLETED PARAMETER JSON: "$parmJson

#run R to cluster matrix
output="$(R --slave --vanilla --file=$tooldir/CHM_Advanced.R --args $inputMatrix $rowOrder $rowDistance $rowAgglomeration $colOrder $colDistance $colAgglomeration $rowOrderFile $colOrderFile $rowDendroFile $colDendroFile $rowCuts $colCuts $rowLabels $colLabels 2>&1)"
# Check for errors from R step, log them if found, and exit script
rc=$?;
if [ $rc != 0 ]
then
  echo $output;
  if [ `echo "$output" | grep -c "Inf in foreign function call"` -gt 0 ]
  then
    echo "";
    echo "NOTE 1: This error can occur when a covariate file has inadvertently been selected as an Input Matrix.  Check your Input Matrix entry.";
    echo "NOTE 2: This error can occur when there is no variation in a data rows or columns in the input matrix.  Try a different distance measure or remove rows/columns without variation.";
  fi
  exit $rc;
fi
 
#Call java program to generate NGCHM viewer files.
java -jar $tooldir/GalaxyMapGen.jar "$parmJson"
#clean up tempdir
rm -rf $tdir