Mercurial > repos > bornea > prohits_dotplot_generator
comparison Dotplot_Release/dotplot.bash @ 3:bc752a05f16d draft
Uploaded
| author | bornea |
|---|---|
| date | Tue, 15 Mar 2016 15:25:15 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:cfe2edb1c5d8 | 3:bc752a05f16d |
|---|---|
| 1 #!/bin/bash | |
| 2 #SCRIPT=$(readlink -e $0) | |
| 3 #SCRIPTPATH=`dirname $SCRIPT` | |
| 4 pushd `dirname $0` > /dev/null | |
| 5 SCRIPTPATH=`pwd` | |
| 6 popd > /dev/null | |
| 7 | |
| 8 usage() { printf "Usage: $0 | |
| 9 [-f <saint_file_name.txt>] | |
| 10 [-i <0 for SaintExpress format, 1 for other>] | |
| 11 [-c <clustering to perform. Options: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering; see options -b and -p)>] | |
| 12 [-n <clustering type to be performed if option -c is set to \"h\">] | |
| 13 [-d <distance metric to use if option -c is set to \"h\">] | |
| 14 [-b <list of bait proteins in display order (see option -c n)>] | |
| 15 [-p <list of prey proteins in display order (see option -c n). Set this to \"all\" if you want to include all preys and cluster them>] | |
| 16 [-s <primary FDR cutoff [0-1, recommended=0.01]>] | |
| 17 [-t <secondary FDR cutoff [must be less than the primary, recommended=0.025]> | |
| 18 [-x <spectral count minimum. Only preys with >= this will be used]> | |
| 19 [-m <maximum spectral count>] | |
| 20 [-N <normalization, 0 for no (default), 1 for yes, 2 for normalization based on significant preys counts (prey FDR <= option -t)>] | |
| 21 [-C <FDR cutoff for normalization if using option -N 2 (deafult is -t)>]\n" | |
| 22 1>&2; exit 1; } | |
| 23 | |
| 24 N=0 | |
| 25 n="ward" | |
| 26 d="canberra" | |
| 27 x=0 | |
| 28 i=0 | |
| 29 while getopts ":f:i:s:t:x:m:c:n:d:b:p:N:C:" o; do | |
| 30 case "${o}" in | |
| 31 f) | |
| 32 f=${OPTARG} | |
| 33 ;; | |
| 34 i) | |
| 35 i=${OPTARG} | |
| 36 ;; | |
| 37 s) | |
| 38 s=${OPTARG} | |
| 39 ;; | |
| 40 t) | |
| 41 t=${OPTARG} | |
| 42 ;; | |
| 43 x) | |
| 44 x=${OPTARG} | |
| 45 ;; | |
| 46 m) | |
| 47 m=${OPTARG} | |
| 48 ;; | |
| 49 c) | |
| 50 c=${OPTARG} | |
| 51 ;; | |
| 52 n) | |
| 53 n=${OPTARG} | |
| 54 ;; | |
| 55 d) | |
| 56 d=${OPTARG} | |
| 57 ;; | |
| 58 b) | |
| 59 b=${OPTARG} | |
| 60 ;; | |
| 61 p) | |
| 62 p=${OPTARG} | |
| 63 ;; | |
| 64 N) | |
| 65 N=${OPTARG} | |
| 66 ;; | |
| 67 C) | |
| 68 C=${OPTARG} | |
| 69 ;; | |
| 70 *) | |
| 71 usage | |
| 72 ;; | |
| 73 esac | |
| 74 done | |
| 75 shift $((OPTIND-1)) | |
| 76 | |
| 77 filename=${f%%.*} | |
| 78 echo "Saint input file = ${f}" | |
| 79 echo "Primary FDR cutoff = ${s}" | |
| 80 echo "Secondary FDR cutoff for dotplot = ${t}" | |
| 81 echo "Minimum spectral count for significant preys = ${x}" | |
| 82 echo "Maximum spectral count for dot plot = ${m}" | |
| 83 | |
| 84 if [ -z "${f}" ] || [ -z "${s}" ] || [ -z "${t}" ] || [ -z "${m}" ] || [ -z "${c}" ]; then | |
| 85 usage | |
| 86 fi | |
| 87 | |
| 88 if [ "${i}" == 1 ]; then | |
| 89 $SCRIPTPATH/SaintConvert.pl -i ${f} | |
| 90 f="mockSaintExpress.txt" | |
| 91 fi | |
| 92 | |
| 93 if [ "${x}" -ge "${m}" ]; then | |
| 94 echo "spectral count minimum (${x}) cannot be greater than or equal to the maximum (${m})" | |
| 95 exit 1; | |
| 96 elif [ "${x}" -lt 0 ]; then | |
| 97 echo "spectral count minimum (${x}) cannot be less than 0. Setting to 0 and continuing" | |
| 98 x=0 | |
| 99 fi | |
| 100 | |
| 101 ###Check for normalization | |
| 102 | |
| 103 if [ "${N}" == 1 ]; then | |
| 104 printf "\nNormalization is being performed\n" | |
| 105 $SCRIPTPATH/Normalization.R ${f} | |
| 106 f="norm_saint.txt" | |
| 107 elif [ "${N}" == 2 ]; then | |
| 108 printf "\nNormalization is being performed\n" | |
| 109 if [ -z "${C}" ]; then | |
| 110 C=${t} | |
| 111 fi | |
| 112 $SCRIPTPATH/Normalization_sigpreys.R ${f} ${C} | |
| 113 f="norm_saint.txt" | |
| 114 fi | |
| 115 | |
| 116 | |
| 117 ###Check for clustering etc | |
| 118 | |
| 119 if [ "${c}" == "h" ] && [ -z "${n}" ]; then | |
| 120 printf "\nHierarchial clustering was selected (-c = h), but no clustering method (-n) was chosen.\n" | |
| 121 printf "The input parameter -n must be set to one of \"average\", \"centroid\", \"complete\", \"mcquitty\",\n" | |
| 122 printf "\"median\", \"single\" or \"ward\". \"ward\" will be selected as default.\n\n" | |
| 123 n="ward" | |
| 124 elif [ "${c}" == "h" ] && [ -n "${n}" ]; then | |
| 125 if [ "${n}" == "average" ] || [ "${n}" == "centroid" ] || [ "${n}" == "complete" ] || [ "${n}" == "mcquitty" ] || [ "${n}" == "median" ] || [ "${n}" == "single" ] || [ "${n}" == "ward" ]; then | |
| 126 printf "\nHierarchical clustering (method = ${n}) will be performed\n\n" | |
| 127 else | |
| 128 printf "\n${n} is not a valid Hierarchical clustering method.\n" | |
| 129 printf "Choose one of \"average\", \"centroid\", \"complete\", \"mcquitty\", \"median\", \"single\" or \"ward\"\n\n" | |
| 130 exit 1 | |
| 131 fi | |
| 132 fi | |
| 133 | |
| 134 p_c=0 | |
| 135 if [ "${c}" == "h" ] && [ -z "${d}" ]; then | |
| 136 printf "\nHierarchial clustering was selected (-c = h), but no distance metric (-d) was chosen.\n" | |
| 137 printf "The input parameter -d must be set to one of \"binary\", \"canberra\", \"euclidean\",\n" | |
| 138 printf "\"manhattan\", \"maximum\" or \"minkowski\". \"canberra\" will be selected as default.\n\n" | |
| 139 d="canberra" | |
| 140 elif [ "${c}" == "h" ] && [ -n "${d}" ]; then | |
| 141 if [ "${d}" == "binary" ] || [ "${d}" == "canberra" ] || [ "${d}" == "euclidean" ] || [ "${d}" == "manhattan" ] || [ "${d}" == "maximum" ] || [ "${d}" == "minkowski" ]; then | |
| 142 printf "\nHierarchical clustering (distance metric = ${d}) will be performed\n\n" | |
| 143 else | |
| 144 printf "\n${d} is not a valid Hierarchical clustering distance metric.\n" | |
| 145 printf "Choose one of \"binary\", \"canberra\", \"euclidean\", \"manhattan\", \"maximum\" or \"minkowski\"\n\n" | |
| 146 exit 1 | |
| 147 fi | |
| 148 fi | |
| 149 | |
| 150 if [ "${c}" == "n" ] && [ -z "${b}" ]; then | |
| 151 printf "\n\"No Clustering\" option was selected (-c = n), but no bait list was included (option -b).\n" | |
| 152 printf "Bait list must be in .txt formart.\n\n" | |
| 153 exit 1 | |
| 154 elif [ "${c}" == "n" ] && [ -z "${p}" ]; then | |
| 155 printf "\n\"No Clustering\" option was selected (-c = n), but no prey list was included (option -p).\n" | |
| 156 printf "Prey list must be in .txt formart.\n\n" | |
| 157 exit 1 | |
| 158 elif [ "${c}" == "n" ] && [ "${p}" == "all" ]; then | |
| 159 printf "\n\"No Clustering\" option was selected (-c = n) for baits, but preys will still be clustered.\n" | |
| 160 printf "using \"ward\" and \"canberra\" as defaults or options as supplied on command line.\n\n" | |
| 161 p="empty" | |
| 162 p_c=1 | |
| 163 n="ward" | |
| 164 d="canberra" | |
| 165 fi | |
| 166 | |
| 167 | |
| 168 ###Check number of baits | |
| 169 | |
| 170 bait_n=$(perl $SCRIPTPATH/BaitCheck.pl -i ${f}) | |
| 171 echo "Number of baits = "$bait_n | |
| 172 printf "\n\n" | |
| 173 | |
| 174 if [ "${c}" == "b" ] && [ $bait_n == 2 ]; then | |
| 175 printf "\nWarning only 2 baits are present. Biclustering will not performed.\n" | |
| 176 printf "Hierarchical clustering (method = ward) will be performed instead.\n\n" | |
| 177 c="h" | |
| 178 n="ward" | |
| 179 fi | |
| 180 | |
| 181 | |
| 182 ###Generate plots | |
| 183 | |
| 184 if [ "${c}" == "b" ]; then | |
| 185 printf "\nBiclustering will be performed\n\n" | |
| 186 $SCRIPTPATH/Step1_data_reformating.R ${f} ${s} ${filename} | |
| 187 $SCRIPTPATH/Step2_data_filtering.R ${filename}_matrix.txt ${x} ${filename} | |
| 188 GSL_RNG_SEED=123 $SCRIPTPATH/Step3_nestedcluster ${filename}.dat $SCRIPTPATH/biclust_param.txt | |
| 189 $SCRIPTPATH/Step4_biclustering.R ${filename}.dat | |
| 190 | |
| 191 $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x} | |
| 192 $SCRIPTPATH/R_dotPlot.R ${s} ${t} ${m} | |
| 193 mkdir Output_${filename} | |
| 194 mkdir Output_${filename}/TempData_${filename} | |
| 195 mv bait_lists Output_${filename}/TempData_${filename} | |
| 196 mv Clusters Output_${filename}/TempData_${filename} | |
| 197 mv MCMCparameters Output_${filename}/TempData_${filename} | |
| 198 mv NestedClusters Output_${filename}/TempData_${filename} | |
| 199 mv NestedMu Output_${filename}/TempData_${filename} | |
| 200 mv NestedSigma2 Output_${filename}/TempData_${filename} | |
| 201 mv OPTclusters Output_${filename}/TempData_${filename} | |
| 202 mv ${filename}_matrix.txt Output_${filename}/TempData_${filename} | |
| 203 mv ${filename}.dat Output_${filename}/TempData_${filename} | |
| 204 mv SC_data.txt Output_${filename}/TempData_${filename} | |
| 205 mv FDR_data.txt Output_${filename}/TempData_${filename} | |
| 206 mv clustered_matrix.txt Output_${filename}/TempData_${filename} | |
| 207 mv singletons.txt Output_${filename}/TempData_${filename} | |
| 208 mv bait2bait_matrix.txt Output_${filename}/TempData_${filename} | |
| 209 mv baitClusters Output_${filename}/TempData_${filename} | |
| 210 mv clusteredData Output_${filename}/TempData_${filename} | |
| 211 mv dotplot.pdf Output_${filename} | |
| 212 mv bait2bait.pdf Output_${filename} | |
| 213 mv estimated.pdf Output_${filename} | |
| 214 mv stats.pdf Output_${filename} | |
| 215 cp $SCRIPTPATH/legend.pdf Output_${filename} | |
| 216 elif [ "${c}" == "h" ]; then | |
| 217 | |
| 218 $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x} | |
| 219 $SCRIPTPATH/R_dotPlot_hc.R ${s} ${t} ${m} ${n} ${d} $SCRIPTPATH | |
| 220 | |
| 221 mkdir Output_${filename} | |
| 222 mkdir Output_${filename}/TempData_${filename} | |
| 223 mv dotplot.pdf Output_${filename} | |
| 224 mv heatmap_borders.pdf Output_${filename} | |
| 225 mv heatmap_no_borders.pdf Output_${filename} | |
| 226 mv bait2bait.pdf Output_${filename} | |
| 227 mv SC_data.txt Output_${filename}/TempData_${filename} | |
| 228 mv FDR_data.txt Output_${filename}/TempData_${filename} | |
| 229 cp $SCRIPTPATH/legend.pdf Output_${filename} | |
| 230 elif [ "${c}" == "n" ]; then | |
| 231 | |
| 232 $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x} | |
| 233 echo "$SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH" | |
| 234 $SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH | |
| 235 | |
| 236 mkdir Output_${filename} | |
| 237 mkdir Output_${filename}/TempData_${filename} | |
| 238 mv dotplot.pdf Output_${filename} | |
| 239 mv heatmap_borders.pdf Output_${filename} | |
| 240 mv heatmap_no_borders.pdf Output_${filename} | |
| 241 mv SC_data.txt Output_${filename}/TempData_${filename} | |
| 242 mv FDR_data.txt Output_${filename}/TempData_${filename} | |
| 243 cp $SCRIPTPATH/legend.pdf Output_${filename} | |
| 244 else | |
| 245 printf -- "-c must be one of [b, h, n]: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering>\n" | |
| 246 exit 1; | |
| 247 fi | |
| 248 | |
| 249 if [ "${N}" == "1" ] || [ "${N}" == "2" ]; then | |
| 250 mv norm_saint.txt Output_${filename}/TempData_${filename} | |
| 251 fi | |
| 252 |
