Mercurial > repos > md-anderson-bioinformatics > heat_map_creation
changeset 38:605ec840a06b draft
Uploaded
author | insilico-bob |
---|---|
date | Thu, 20 Jun 2019 11:30:12 -0400 |
parents | f2272e907f1b |
children | 436f03b71cf6 |
files | CHM.R GalaxyMapGen.jar heatmap.sh mda_heatmap_gen.xml mda_heatmap_viz.zip |
diffstat | 5 files changed, 146 insertions(+), 58 deletions(-) [+] |
line wrap: on
line diff
--- a/CHM.R Thu Nov 08 14:59:04 2018 -0500 +++ b/CHM.R Thu Jun 20 11:30:12 2019 -0400 @@ -17,7 +17,7 @@ performDataOrdering<-function(dataFile, rowOrderMethod, rowDistanceMeasure, rowAgglomerationMethod, colOrderMethod, colDistanceMeasure, colAgglomerationMethod,rowOrderFile, colOrderFile, rowDendroFile, colDendroFile, rowCut, colCut) { - dataMatrix = read.table(dataFile, header=TRUE, sep = "\t", row.names = 1, as.is=TRUE, na.strings=c("NA","N/A","-","?")) + dataMatrix = read.table(dataFile, header=TRUE, sep = "\t", check.names = FALSE, row.names = 1, as.is=TRUE, na.strings=c("NA","N/A","-","?")) rowOrder <- createOrdering(dataMatrix, rowOrderMethod, "row", rowDistanceMeasure, rowAgglomerationMethod) if (rowOrderMethod == "Hierarchical") { writeHCDataTSVs(rowOrder, rowDendroFile, rowOrderFile) @@ -39,6 +39,7 @@ data=matrix(,length(uDend$labels),2); for (i in 1:length(uDend$labels)) { + print(uDend$labels[i]) data[i,1] = uDend$labels[i]; data[i,2] = which(uDend$order==i); }
--- a/heatmap.sh Thu Nov 08 14:59:04 2018 -0500 +++ b/heatmap.sh Thu Jun 20 11:30:12 2019 -0400 @@ -1,16 +1,133 @@ -#echo $1 $2 $3 $4 $5 $6 $7 $8 $9 ${10} ${11} ${12} ${13} ${14} ${15} ${16} ${17} ${18} +#echo "1: " $1" 2: " $2" 3: " $3" 4: "$4" 5: "$5 " 6: "$6 "7: "$7" 8: "$8 " 9: "$9" 10: "${10}" 11: "${11} "12: "${12} +#echo " 13: "${13}" 14: "${14}" 15: "${15}" 16: "${16} "17: "${17}" 18: "${18}" 19: "${19}" 20: "${20}" 21: "${21} " 22: "${22}" 23:" ${23} -#get tool data and tool install directories -tooldir=$(cut -d';' -f1 <<< ${12}) -tooldata=$(cut -d';' -f2 <<< ${12}) +#Count total number of parameters and classification parameters +parmSize=0 +classSize=0 +matrixSize=0 +for i in "$@"; do + currParm=$(cut -d'|' -f1 <<< $i) + parmSize=$((parmSize+1)) + if [ $currParm = "classification" ] + then + classSize=$((classSize+1)) + fi +done +#Get tool data and tool install directories +tooldir=$1 +tooldata=$2 #create temp directory for row and col order and dendro files. tdir=$tooldata/$(date +%y%m%d%M%S) -echo $tdir mkdir $tdir +#echo "tdir: "$tdir + +#Extract parameters for row and column order and dendro files +rowOrderFile=$tdir/ROfile.txt +rowDendroFile=$tdir/RDfile.txt +colOrderFile=$tdir/COfile.txt +colDendroFile=$tdir/CDfile.txt +rowOrderJson='"order_file": "'$rowOrderFile'",' +rowDendroJson='"dendro_file": "'$rowDendroFile'",' +colOrderJson='"order_file": "'$colOrderFile'",' +colDendroJson='"dendro_file": "'$colDendroFile'",' + +#BEGIN: Construct JSON for all non-repeating parameters +parmJson='{' +rowConfigJson='"row_configuration": {' +colConfigJson='"col_configuration": {' + +ctr=0 +for i in "$@"; do + if [ $ctr -gt 1 ] + then + currParm=$(cut -d'|' -f1 <<< $i) + if [ $currParm != "matrix_files" ] && [ $currParm != "row_configuration" ] && [ $currParm != "col_configuration" ] && [ $currParm != "classification" ] + then + #Parse pipe-delimited parameter parameter + parmJson=$parmJson' "'$(cut -d'|' -f1 <<< $i)'":"'$(cut -d'|' -f2 <<< $i)'",' + fi + if [ $currParm = "row_configuration" ] + then + rowOrder=$(cut -d'|' -f3 <<< $i) + rowDistance=$(cut -d'|' -f5 <<< $i) + rowAgglomeration=$(cut -d'|' -f7 <<< $i) + rowCuts=$(cut -d'|' -f9 <<< $i) + rowLabels=$(cut -d'|' -f11 <<< $i) + dataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$rowLabels'"]' + if [ $rowOrder = 'Hierarchical' ] + then + rowConfigJson=$rowConfigJson$rowOrderJson$rowDendroJson + fi + rowConfigJson=$rowConfigJson'"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'",'$dataTypeJson'},' + fi + if [ $currParm = "col_configuration" ] + then + colOrder=$(cut -d'|' -f3 <<< $i) + colDistance=$(cut -d'|' -f5 <<< $i) + colAgglomeration=$(cut -d'|' -f7 <<< $i) + colCuts=$(cut -d'|' -f9 <<< $i) + colLabels=$(cut -d'|' -f11 <<< $i) + dataTypeJson='"'$(cut -d'|' -f10 <<< $i)'":["'$colLabels'"]' + if [ $colOrder = 'Hierarchical' ] + then + colConfigJson=$colConfigJson$colOrderJson$colDendroJson + fi + colConfigJson=$colConfigJson'"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'",'$dataTypeJson'},' + fi + fi + ctr=$((ctr+1)) +done +#END: Construct JSON for all non-repeating parameters +#echo "rowCuts: "$rowCuts +#echo "colCuts: "$colCuts +#echo "ROW CONFIG JSON: "$rowConfigJson +#echo "COL CONFIG JSON: "$colConfigJson + +#BEGIN: Construct JSON for data layers +matrixJson='"matrix_files": [ ' +inputMatrix='' +for i in "$@"; do + currParm=$(cut -d'|' -f1 <<< $i) + if [ $currParm = "matrix_files" ] + then + #Parse pipe-delimited parameter parameter + matrixJson=$matrixJson' {"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'","'$(cut -d'|' -f6 <<< $i)'":"'$(cut -d'|' -f7 <<< $i)'"}' + inputMatrix=$(cut -d'|' -f3 <<< $i) + fi +done +matrixJson=$matrixJson"]," +#END: Construct JSON for data layers + +#BEGIN: Construct JSON for classification files +classJson='"classification_files": [ ' +classIter=0 +for i in "$@"; do + currParm=$(cut -d'|' -f1 <<< $i) + if [ $currParm = "classification" ] + then + classIter=$((classIter+1)) + #Parse pipe-delimited 3-part classification bar parameter + classJson=$classJson' {"'$(cut -d'|' -f2 <<< $i)'":"'$(cut -d'|' -f3 <<< $i)'","'$(cut -d'|' -f4 <<< $i)'":"'$(cut -d'|' -f5 <<< $i)'"' + classCat=$(cut -d'|' -f7 <<< $i) + classColorType=$(cut -d'_' -f2 <<< $classCat) + classJson=$classJson',' + classJson=$classJson' "position":"'$(cut -d'_' -f1 <<< $classCat)'","color_map": {"type":"'$classColorType'"}}' + if [ $classIter -lt $classSize ] + then + classJson=$classJson',' + fi + fi +done +classJson=$classJson']' +#END: Construct JSON for classification files + +parmJson=$parmJson$matrixJson$rowConfigJson$colConfigJson$classJson +parmJson=$parmJson'}' +#echo "HEATMAP PARAMETERS JSON: "$parmJson #run R to cluster matrix -output="$(R --slave --vanilla --file=$tooldir/CHM.R --args $4 $5 $6 $7 $8 $9 ${10} $tdir/ROfile.txt $tdir/COfile.txt $tdir/RDfile.txt $tdir/CDfile.txt ${13} ${14} ${15} ${16} 2>&1)" +output="$(R --slave --vanilla --file=$tooldir/CHM.R --args $inputMatrix $rowOrder $rowDistance $rowAgglomeration $colOrder $colDistance $colAgglomeration $rowOrderFile $colOrderFile $rowDendroFile $colDendroFile $rowCuts $colCuts $rowLabels $colLabels 2>&1)" rc=$?; if [ $rc != 0 ] then @@ -24,33 +141,7 @@ exit $rc; fi -#there are a variable number of triplicate parameters for classification bars -count=0 -classifications='' - -#if row cut was done, add that autogenerated classification -if [ ${13} -gt 1 ] -then - classifications="Class $tdir/ROfile.txt.cut row_categorical" -fi - -#if col cut was done, add that autogenerated classification -if [ ${14} -gt 1 ] -then - classifications="$classifications Class $tdir/COfile.txt.cut col_categorical" -fi - -#now add the user provided classification files -for i in "$@"; do - if [ $count -gt 16 ] - then - classifications=$classifications' '$i - fi - count=$((count+1)) -done -echo "classifications " $classifications -echo "${11} " ${11} #call java program to generate NGCHM viewer files. -java -jar $tooldir/GalaxyMapGen.jar "${1}" "${2}" "${3}" DataLayer1 $4 linear ${15} ${16} $5 $6 $7 $tdir/ROfile.txt $tdir/RDfile.txt $8 $9 ${10} $tdir/COfile.txt $tdir/CDfile.txt ${11} $classifications +java -jar $tooldir/GalaxyMapGen.jar "$parmJson" #clean up tempdir rm -rf $tdir
--- a/mda_heatmap_gen.xml Thu Nov 08 14:59:04 2018 -0500 +++ b/mda_heatmap_gen.xml Thu Jun 20 11:30:12 2019 -0400 @@ -2,16 +2,17 @@ <tool id="mda_heatmap_gen" name="NG-CHM Generator" version="2.3"> <requirements> <requirement type="package" version="3.4.1">r-base</requirement> - <requirement type="package" version="8">openjdk</requirement> + <requirement type="package" version="8.0.144">openjdk</requirement> </requirements> <description>Create Clustered Heat Maps</description> - <command interpreter="bash" detect_errors="aggressive">$__tool_directory__/heatmap.sh "standard" "Heat_Map_$hmname" "$hmdesc" '$inputmatrix' ${d_rows.rowOrderMethod} ${d_rows.rowDistanceMeasure} ${d_rows.rowAgglomerationMethod} ${d_cols.columnOrderMethod} ${d_cols.columnDistanceMeasure} ${d_cols.columnAgglomerationMethod} $summarymethod '$__tool_directory__;$__tool_data_path__/' 0 0 labels labels 'None' + <command interpreter="bash" detect_errors="aggressive">$__tool_directory__/heatmap.sh "$__tool_directory__" "$__tool_data_path__/" "chm_name|Heat_Map_$hmname" "chm_description|$hmdesc" + "matrix_files|path|$inputmatrix|name|datalayer|summary_method|$summarymethod" + "row_configuration|order_method|${d_rows.rowOrderMethod}|distance_metric|${d_rows.rowDistanceMeasure}|agglomeration_method|${d_rows.rowAgglomerationMethod}|tree_covar_cuts|0|data_type|labels" + "col_configuration|order_method|${d_cols.columnOrderMethod}|distance_metric|${d_cols.columnDistanceMeasure}|agglomeration_method|${d_cols.columnAgglomerationMethod}|tree_covar_cuts|0|data_type|labels" #for $op in $operations - '${op.class_name}' - '${op.repeatinput.file_name}' - '${op.cat}' - #end for - '$output' + 'classification|name|${op.class_name}|path|${op.repeatinput.file_name}|category|${op.cat}' + #end for + 'output_location|$output' </command> <stdio> <exit_code range="1:" level="fatal" /> @@ -22,11 +23,6 @@ <sanitizer> <valid> <add preset="string.printable"/> -<!-- <add value="string.letters"/> - <add value="string.digits"/> - <add value="-"/> - <add value="_"/> ---> <remove value="""/> <remove value="'"/> <remove value=" "/> @@ -59,12 +55,12 @@ <when value="Hierarchical"> <param name="rowDistanceMeasure" type="select" label="Row Distance Metric" help="For clustering, select the method of determining distance between rows"> <option value="euclidean">Euclidean</option> - <option value="binary">Binary</option> + <!-- <option value="binary">Binary</option> ** breaks dendrogram --> <option value="manhattan">Manhattan</option> <option value="maximum">Maximum</option> - <option value="canberra">Canberra</option> + <!-- <option value="canberra">Canberra</option> ** breaks dendrogram --> <option value="minkowski">Minkowski</option> - <option value="correlation">Correlation</option> + <!-- <option value="correlation">Correlation</option> ** breaks dendrogram --> </param> <param name="rowAgglomerationMethod" type="select" label="Row Clustering Method" help="For clustering, select algorithm for building clusters."> <option value="average">Average Linkage</option> @@ -72,8 +68,8 @@ <option value="single">Single Linkage</option> <option value="ward" selected="true">Ward</option> <option value="mcquitty">Mcquitty</option> - <option value="median">Median</option> - <option value="centroid">Centroid</option> + <!-- <option value="median">Median</option> ** breaks dendrogram + <option value="centroid">Centroid</option> ** breaks dendrogram --> </param> </when> <when value="Original"> @@ -94,12 +90,12 @@ <when value="Hierarchical"> <param name="columnDistanceMeasure" type="select" label="Column Distance Metric" help="For clustering, select the method of determining distance between columns"> <option value="euclidean">Euclidean</option> - <option value="binary">Binary</option> + <!-- <option value="binary">Binary</option> ** breaks dendrogram --> <option value="manhattan">Manhattan</option> <option value="maximum">Maximum</option> - <option value="canberra">Canberra</option> + <!-- <option value="canberra">Canberra</option> ** breaks dendrogram --> <option value="minkowski">Minkowski</option> - <option value="correlation">Correlation</option> + <!-- <option value="correlation">Correlation</option> ** breaks dendrogram --> </param> <param name="columnAgglomerationMethod" type="select" label="Column Clustering Method" help="For clustering, select algorithm for building clusters."> <option value="average">Average Linkage</option> @@ -107,8 +103,8 @@ <option value="single">Single Linkage</option> <option value="ward" selected="true">Ward</option> <option value="mcquitty">Mcquitty</option> - <option value="median">Median</option> - <option value="centroid">Centroid</option> + <!-- <option value="median">Median</option> ** breaks dendrogram + <option value="centroid">Centroid</option> ** breaks dendrogram --> </param> </when> <when value="Original"> @@ -133,9 +129,9 @@ </param> <param name="repeatinput" type="data" format="Tabular" label="Covariate File" help="Tab delimited text file with row or column label and covariate value on each line."/> <param name="cat" type="select" label="Axis Covariate Type" help="Identify the covariate as belonging to rows or columns and containing categorical or continuous values."> - <option value="row_categorical" >Row Categorical</option> + <option value="row_discrete" >Row Categorical</option> <option value="row_continuous" >Row Continuous</option> - <option value="column_categorical" >Column Categorical</option> + <option value="column_discrete" >Column Categorical</option> <option value="column_continuous" >Column Continuous</option> </param> </repeat>