| 0 | 1 #!/bin/bash | 
|  | 2 #SCRIPT=$(readlink -e $0) | 
|  | 3 #SCRIPTPATH=`dirname $SCRIPT` | 
|  | 4 pushd `dirname $0` > /dev/null | 
|  | 5 SCRIPTPATH=`pwd` | 
|  | 6 popd > /dev/null | 
|  | 7 | 
|  | 8 usage() { printf "Usage: $0 | 
|  | 9 [-f <saint_file_name.txt>] | 
|  | 10 [-i <0 for SaintExpress format, 1 for other>] | 
|  | 11 [-c <clustering to perform. Options: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering; see options -b and -p)>] | 
|  | 12 [-n <clustering type to be performed if option -c is set to \"h\">] | 
|  | 13 [-d <distance metric to use if option -c is set to \"h\">] | 
|  | 14 [-b <list of bait proteins in display order (see option -c n)>] | 
|  | 15 [-p <list of prey proteins in display order (see option -c n). Set this to \"all\" if you want to include all preys and cluster them>] | 
|  | 16 [-s <primary FDR cutoff [0-1, recommended=0.01]>] | 
|  | 17 [-t <secondary FDR cutoff [must be less than the primary, recommended=0.025]> | 
|  | 18 [-x <spectral count minimum. Only preys with >= this will be used]> | 
|  | 19 [-m <maximum spectral count>] | 
|  | 20 [-N <normalization, 0 for no (default), 1 for yes, 2 for normalization based on significant preys counts (prey FDR <= option -t)>] | 
|  | 21 [-C <FDR cutoff for normalization if using option -N 2 (deafult is -t)>]\n" | 
|  | 22 1>&2; exit 1; } | 
|  | 23 | 
|  | 24 N=0 | 
|  | 25 n="ward" | 
|  | 26 d="canberra" | 
|  | 27 x=0 | 
|  | 28 i=0 | 
|  | 29 while getopts ":f:i:s:t:x:m:c:n:d:b:p:N:C:" o; do | 
|  | 30     case "${o}" in | 
|  | 31         f) | 
|  | 32             f=${OPTARG} | 
|  | 33             ;; | 
|  | 34         i) | 
|  | 35 	    i=${OPTARG} | 
|  | 36             ;; | 
|  | 37         s) | 
|  | 38             s=${OPTARG} | 
|  | 39             ;; | 
|  | 40 	t) | 
|  | 41             t=${OPTARG} | 
|  | 42             ;; | 
|  | 43         x) | 
|  | 44 	    x=${OPTARG} | 
|  | 45             ;; | 
|  | 46 	m) | 
|  | 47             m=${OPTARG} | 
|  | 48             ;; | 
|  | 49 	c) | 
|  | 50             c=${OPTARG} | 
|  | 51 	    ;; | 
|  | 52 	n) | 
|  | 53 	    n=${OPTARG} | 
|  | 54 	    ;; | 
|  | 55 	d) | 
|  | 56 	    d=${OPTARG} | 
|  | 57 	    ;; | 
|  | 58 	b) | 
|  | 59             b=${OPTARG} | 
|  | 60 	    ;; | 
|  | 61 	p) | 
|  | 62 	    p=${OPTARG} | 
|  | 63 	    ;; | 
|  | 64 	N) | 
|  | 65 	    N=${OPTARG} | 
|  | 66 	    ;; | 
|  | 67 	C) | 
|  | 68 	    C=${OPTARG} | 
|  | 69 	    ;; | 
|  | 70         *) | 
|  | 71             usage | 
|  | 72             ;; | 
|  | 73     esac | 
|  | 74 done | 
|  | 75 shift $((OPTIND-1)) | 
|  | 76 | 
|  | 77 filename=${f%%.*} | 
|  | 78 echo "Saint input file = ${f}" | 
|  | 79 echo "Primary FDR cutoff = ${s}" | 
|  | 80 echo "Secondary FDR cutoff for dotplot = ${t}" | 
|  | 81 echo "Minimum spectral count for significant preys = ${x}" | 
|  | 82 echo "Maximum spectral count for dot plot = ${m}" | 
|  | 83 | 
|  | 84 if [ -z "${f}" ] || [ -z "${s}" ] || [ -z "${t}" ] || [ -z "${m}" ] || [ -z "${c}" ]; then | 
|  | 85     usage | 
|  | 86 fi | 
|  | 87 | 
|  | 88 if [ "${i}" == 1 ]; then | 
|  | 89 	$SCRIPTPATH/SaintConvert.pl -i ${f} | 
|  | 90 	f="mockSaintExpress.txt" | 
|  | 91 fi | 
|  | 92 | 
|  | 93 if [ "${x}" -ge "${m}" ]; then | 
|  | 94 	echo "spectral count minimum (${x}) cannot be greater than or equal to the maximum (${m})" | 
|  | 95 	exit 1; | 
|  | 96 elif [ "${x}" -lt 0 ]; then | 
|  | 97 	echo "spectral count minimum (${x}) cannot be less than 0. Setting to 0 and continuing" | 
|  | 98 	x=0 | 
|  | 99 fi | 
|  | 100 | 
|  | 101 ###Check for normalization | 
|  | 102 | 
|  | 103 if [ "${N}" == 1 ]; then | 
|  | 104 	printf "\nNormalization is being performed\n" | 
|  | 105 	$SCRIPTPATH/Normalization.R ${f} | 
|  | 106 	f="norm_saint.txt" | 
|  | 107 elif [ "${N}" == 2 ]; then | 
|  | 108 	printf "\nNormalization is being performed\n" | 
|  | 109 	if [ -z "${C}" ]; then | 
|  | 110 		C=${t} | 
|  | 111 	fi | 
|  | 112 	$SCRIPTPATH/Normalization_sigpreys.R ${f} ${C} | 
|  | 113 	f="norm_saint.txt" | 
|  | 114 fi | 
|  | 115 | 
|  | 116 | 
|  | 117 ###Check for clustering etc | 
|  | 118 | 
|  | 119 if [ "${c}" == "h" ] && [ -z "${n}" ]; then | 
|  | 120 	printf "\nHierarchial clustering was selected (-c = h), but no clustering method (-n) was chosen.\n" | 
|  | 121 	printf "The input parameter -n must be set to one of \"average\", \"centroid\", \"complete\", \"mcquitty\",\n" | 
|  | 122 	printf "\"median\", \"single\" or \"ward\". \"ward\" will be selected as default.\n\n" | 
|  | 123 	n="ward" | 
|  | 124 elif [ "${c}" == "h" ] && [ -n "${n}" ]; then | 
|  | 125 	if [ "${n}" == "average" ] || [ "${n}" == "centroid" ] || [ "${n}" == "complete" ] || [ "${n}" == "mcquitty" ] || [ "${n}" == "median" ] || [  "${n}" == "single" ] || [ "${n}" == "ward" ]; then | 
|  | 126 		printf "\nHierarchical clustering (method = ${n}) will be performed\n\n" | 
|  | 127 	else | 
|  | 128 		printf "\n${n} is not a valid Hierarchical clustering method.\n" | 
|  | 129 		printf "Choose one of \"average\", \"centroid\", \"complete\", \"mcquitty\", \"median\", \"single\" or \"ward\"\n\n" | 
|  | 130 		exit 1 | 
|  | 131 	fi | 
|  | 132 fi | 
|  | 133 | 
|  | 134 p_c=0 | 
|  | 135 if [ "${c}" == "h" ] && [ -z "${d}" ]; then | 
|  | 136 	printf "\nHierarchial clustering was selected (-c = h), but no distance metric (-d) was chosen.\n" | 
|  | 137 	printf "The input parameter -d must be set to one of  \"binary\", \"canberra\", \"euclidean\",\n" | 
|  | 138 	printf "\"manhattan\", \"maximum\" or \"minkowski\". \"canberra\" will be selected as default.\n\n" | 
|  | 139 	d="canberra" | 
|  | 140 elif [ "${c}" == "h" ] && [ -n "${d}" ]; then | 
|  | 141 	if [ "${d}" == "binary" ] || [ "${d}" == "canberra" ] || [ "${d}" == "euclidean" ] || [ "${d}" == "manhattan" ] || [ "${d}" == "maximum" ] || [  "${d}" == "minkowski" ]; then | 
|  | 142 		printf "\nHierarchical clustering (distance metric = ${d}) will be performed\n\n" | 
|  | 143 	else | 
|  | 144 		printf "\n${d} is not a valid Hierarchical clustering distance metric.\n" | 
|  | 145 		printf "Choose one of  \"binary\", \"canberra\", \"euclidean\", \"manhattan\", \"maximum\" or \"minkowski\"\n\n" | 
|  | 146 		exit 1 | 
|  | 147 	fi | 
|  | 148 fi | 
|  | 149 | 
|  | 150 if [ "${c}" == "n" ] && [ -z "${b}" ]; then | 
|  | 151 	printf "\n\"No Clustering\" option was selected (-c = n), but no bait list was included (option -b).\n" | 
|  | 152 	printf "Bait list must be in .txt formart.\n\n" | 
|  | 153 	exit 1 | 
|  | 154 elif [ "${c}" == "n" ] && [ -z "${p}" ]; then | 
|  | 155 	printf "\n\"No Clustering\" option was selected (-c = n), but no prey list was included (option -p).\n" | 
|  | 156 	printf "Prey list must be in .txt formart.\n\n" | 
|  | 157 	exit 1 | 
|  | 158 elif [ "${c}" == "n" ] && [ "${p}" == "all" ]; then | 
|  | 159 	printf "\n\"No Clustering\" option was selected (-c = n) for baits, but preys will still be clustered.\n" | 
|  | 160 	printf "using \"ward\" and \"canberra\" as defaults or options as supplied on command line.\n\n" | 
|  | 161 	p="empty" | 
|  | 162 	p_c=1 | 
|  | 163 	n="ward" | 
|  | 164 	d="canberra" | 
|  | 165 fi | 
|  | 166 | 
|  | 167 | 
|  | 168 ###Check number of baits | 
|  | 169 | 
|  | 170 bait_n=$(perl $SCRIPTPATH/BaitCheck.pl -i ${f}) | 
|  | 171 echo "Number of baits = "$bait_n | 
|  | 172 printf "\n\n" | 
|  | 173 | 
|  | 174 if [ "${c}" == "b" ] && [ $bait_n == 2 ]; then | 
|  | 175 	printf "\nWarning only 2 baits are present. Biclustering will not performed.\n" | 
|  | 176 	printf "Hierarchical clustering (method = ward) will be performed instead.\n\n" | 
|  | 177 	c="h" | 
|  | 178 	n="ward" | 
|  | 179 fi | 
|  | 180 | 
|  | 181 | 
|  | 182 ###Generate plots | 
|  | 183 | 
|  | 184 if [ "${c}" == "b" ]; then | 
|  | 185 	printf "\nBiclustering will be performed\n\n" | 
|  | 186 	$SCRIPTPATH/Step1_data_reformating.R ${f} ${s} ${filename} | 
|  | 187 	$SCRIPTPATH/Step2_data_filtering.R ${filename}_matrix.txt ${x} ${filename} | 
|  | 188 	GSL_RNG_SEED=123  $SCRIPTPATH/Step3_nestedcluster ${filename}.dat $SCRIPTPATH/biclust_param.txt | 
|  | 189 	$SCRIPTPATH/Step4_biclustering.R ${filename}.dat | 
|  | 190 | 
|  | 191 	$SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x} | 
|  | 192 	$SCRIPTPATH/R_dotPlot.R ${s} ${t} ${m} | 
|  | 193 	mkdir Output_${filename} | 
|  | 194 	mkdir Output_${filename}/TempData_${filename} | 
|  | 195 	mv bait_lists Output_${filename}/TempData_${filename} | 
|  | 196 	mv Clusters Output_${filename}/TempData_${filename} | 
|  | 197 	mv MCMCparameters Output_${filename}/TempData_${filename} | 
|  | 198 	mv NestedClusters Output_${filename}/TempData_${filename} | 
|  | 199 	mv NestedMu Output_${filename}/TempData_${filename} | 
|  | 200 	mv NestedSigma2 Output_${filename}/TempData_${filename} | 
|  | 201 	mv OPTclusters Output_${filename}/TempData_${filename} | 
|  | 202 	mv ${filename}_matrix.txt Output_${filename}/TempData_${filename} | 
|  | 203 	mv ${filename}.dat Output_${filename}/TempData_${filename} | 
|  | 204 	mv SC_data.txt Output_${filename}/TempData_${filename} | 
|  | 205 	mv FDR_data.txt Output_${filename}/TempData_${filename} | 
|  | 206 	mv clustered_matrix.txt Output_${filename}/TempData_${filename} | 
|  | 207 	mv singletons.txt Output_${filename}/TempData_${filename} | 
|  | 208 	mv bait2bait_matrix.txt Output_${filename}/TempData_${filename} | 
|  | 209 	mv baitClusters Output_${filename}/TempData_${filename} | 
|  | 210 	mv clusteredData Output_${filename}/TempData_${filename} | 
|  | 211 	mv dotplot.pdf Output_${filename} | 
|  | 212 	mv bait2bait.pdf Output_${filename} | 
|  | 213 	mv estimated.pdf Output_${filename} | 
|  | 214 	mv stats.pdf Output_${filename} | 
|  | 215 	cp $SCRIPTPATH/legend.pdf Output_${filename} | 
|  | 216 elif [ "${c}" == "h" ]; then | 
|  | 217 | 
|  | 218 	$SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x} | 
|  | 219 	$SCRIPTPATH/R_dotPlot_hc.R ${s} ${t} ${m} ${n} ${d} $SCRIPTPATH | 
|  | 220 | 
|  | 221 	mkdir Output_${filename} | 
|  | 222 	mkdir Output_${filename}/TempData_${filename} | 
|  | 223 	mv dotplot.pdf Output_${filename} | 
|  | 224 	mv heatmap_borders.pdf Output_${filename} | 
|  | 225 	mv heatmap_no_borders.pdf Output_${filename} | 
|  | 226 	mv bait2bait.pdf Output_${filename} | 
|  | 227 	mv SC_data.txt Output_${filename}/TempData_${filename} | 
|  | 228 	mv FDR_data.txt Output_${filename}/TempData_${filename} | 
|  | 229 	cp $SCRIPTPATH/legend.pdf Output_${filename} | 
|  | 230 elif [ "${c}" == "n" ]; then | 
|  | 231 | 
|  | 232 	$SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x} | 
|  | 233 	echo "$SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH" | 
|  | 234 	$SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH | 
|  | 235 | 
|  | 236 	mkdir Output_${filename} | 
|  | 237 	mkdir Output_${filename}/TempData_${filename} | 
|  | 238 	mv dotplot.pdf Output_${filename} | 
|  | 239 	mv heatmap_borders.pdf Output_${filename} | 
|  | 240 	mv heatmap_no_borders.pdf Output_${filename} | 
|  | 241 	mv SC_data.txt Output_${filename}/TempData_${filename} | 
|  | 242 	mv FDR_data.txt Output_${filename}/TempData_${filename} | 
|  | 243 	cp $SCRIPTPATH/legend.pdf Output_${filename} | 
|  | 244 else | 
|  | 245 	printf -- "-c must be one of [b, h, n]:  b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering>\n" | 
|  | 246 	exit 1; | 
|  | 247 fi | 
|  | 248 | 
|  | 249 if [ "${N}" == "1" ] || [ "${N}" == "2" ]; then | 
|  | 250 	mv norm_saint.txt Output_${filename}/TempData_${filename} | 
|  | 251 fi | 
|  | 252 |