| 
3
 | 
     1 #!/bin/bash
 | 
| 
 | 
     2 #SCRIPT=$(readlink -e $0)
 | 
| 
 | 
     3 #SCRIPTPATH=`dirname $SCRIPT`
 | 
| 
 | 
     4 pushd `dirname $0` > /dev/null
 | 
| 
 | 
     5 SCRIPTPATH=`pwd`
 | 
| 
 | 
     6 popd > /dev/null
 | 
| 
 | 
     7 
 | 
| 
 | 
     8 usage() { printf "Usage: $0 
 | 
| 
 | 
     9 [-f <saint_file_name.txt>]
 | 
| 
 | 
    10 [-i <0 for SaintExpress format, 1 for other>]
 | 
| 
 | 
    11 [-c <clustering to perform. Options: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering; see options -b and -p)>]
 | 
| 
 | 
    12 [-n <clustering type to be performed if option -c is set to \"h\">]
 | 
| 
 | 
    13 [-d <distance metric to use if option -c is set to \"h\">]
 | 
| 
 | 
    14 [-b <list of bait proteins in display order (see option -c n)>]
 | 
| 
 | 
    15 [-p <list of prey proteins in display order (see option -c n). Set this to \"all\" if you want to include all preys and cluster them>]
 | 
| 
 | 
    16 [-s <primary FDR cutoff [0-1, recommended=0.01]>]
 | 
| 
 | 
    17 [-t <secondary FDR cutoff [must be less than the primary, recommended=0.025]>
 | 
| 
 | 
    18 [-x <spectral count minimum. Only preys with >= this will be used]>
 | 
| 
 | 
    19 [-m <maximum spectral count>]
 | 
| 
 | 
    20 [-N <normalization, 0 for no (default), 1 for yes, 2 for normalization based on significant preys counts (prey FDR <= option -t)>]
 | 
| 
 | 
    21 [-C <FDR cutoff for normalization if using option -N 2 (deafult is -t)>]\n"
 | 
| 
 | 
    22 1>&2; exit 1; }
 | 
| 
 | 
    23 
 | 
| 
 | 
    24 N=0
 | 
| 
 | 
    25 n="ward"
 | 
| 
 | 
    26 d="canberra"
 | 
| 
 | 
    27 x=0
 | 
| 
 | 
    28 i=0
 | 
| 
 | 
    29 while getopts ":f:i:s:t:x:m:c:n:d:b:p:N:C:" o; do
 | 
| 
 | 
    30     case "${o}" in
 | 
| 
 | 
    31         f)
 | 
| 
 | 
    32             f=${OPTARG}
 | 
| 
 | 
    33             ;;
 | 
| 
 | 
    34         i)
 | 
| 
 | 
    35 	    i=${OPTARG}
 | 
| 
 | 
    36             ;;
 | 
| 
 | 
    37         s)
 | 
| 
 | 
    38             s=${OPTARG}
 | 
| 
 | 
    39             ;;
 | 
| 
 | 
    40 	t)
 | 
| 
 | 
    41             t=${OPTARG}
 | 
| 
 | 
    42             ;;
 | 
| 
 | 
    43         x)
 | 
| 
 | 
    44 	    x=${OPTARG}
 | 
| 
 | 
    45             ;;
 | 
| 
 | 
    46 	m)
 | 
| 
 | 
    47             m=${OPTARG}
 | 
| 
 | 
    48             ;;
 | 
| 
 | 
    49 	c)
 | 
| 
 | 
    50             c=${OPTARG}
 | 
| 
 | 
    51 	    ;;
 | 
| 
 | 
    52 	n)
 | 
| 
 | 
    53 	    n=${OPTARG}
 | 
| 
 | 
    54 	    ;;
 | 
| 
 | 
    55 	d)
 | 
| 
 | 
    56 	    d=${OPTARG}
 | 
| 
 | 
    57 	    ;;
 | 
| 
 | 
    58 	b)
 | 
| 
 | 
    59             b=${OPTARG}
 | 
| 
 | 
    60 	    ;;
 | 
| 
 | 
    61 	p)
 | 
| 
 | 
    62 	    p=${OPTARG}
 | 
| 
 | 
    63 	    ;;
 | 
| 
 | 
    64 	N)
 | 
| 
 | 
    65 	    N=${OPTARG}
 | 
| 
 | 
    66 	    ;;
 | 
| 
 | 
    67 	C)
 | 
| 
 | 
    68 	    C=${OPTARG}
 | 
| 
 | 
    69 	    ;;
 | 
| 
 | 
    70         *)
 | 
| 
 | 
    71             usage
 | 
| 
 | 
    72             ;;
 | 
| 
 | 
    73     esac
 | 
| 
 | 
    74 done
 | 
| 
 | 
    75 shift $((OPTIND-1))
 | 
| 
 | 
    76 
 | 
| 
 | 
    77 filename=${f%%.*}
 | 
| 
 | 
    78 echo "Saint input file = ${f}"
 | 
| 
 | 
    79 echo "Primary FDR cutoff = ${s}"
 | 
| 
 | 
    80 echo "Secondary FDR cutoff for dotplot = ${t}"
 | 
| 
 | 
    81 echo "Minimum spectral count for significant preys = ${x}"
 | 
| 
 | 
    82 echo "Maximum spectral count for dot plot = ${m}"
 | 
| 
 | 
    83 
 | 
| 
 | 
    84 if [ -z "${f}" ] || [ -z "${s}" ] || [ -z "${t}" ] || [ -z "${m}" ] || [ -z "${c}" ]; then
 | 
| 
 | 
    85     usage
 | 
| 
 | 
    86 fi
 | 
| 
 | 
    87 
 | 
| 
 | 
    88 if [ "${i}" == 1 ]; then
 | 
| 
 | 
    89 	$SCRIPTPATH/SaintConvert.pl -i ${f}
 | 
| 
 | 
    90 	f="mockSaintExpress.txt"
 | 
| 
 | 
    91 fi
 | 
| 
 | 
    92 
 | 
| 
 | 
    93 if [ "${x}" -ge "${m}" ]; then
 | 
| 
 | 
    94 	echo "spectral count minimum (${x}) cannot be greater than or equal to the maximum (${m})"
 | 
| 
 | 
    95 	exit 1;
 | 
| 
 | 
    96 elif [ "${x}" -lt 0 ]; then
 | 
| 
 | 
    97 	echo "spectral count minimum (${x}) cannot be less than 0. Setting to 0 and continuing"
 | 
| 
 | 
    98 	x=0
 | 
| 
 | 
    99 fi
 | 
| 
 | 
   100 
 | 
| 
 | 
   101 ###Check for normalization
 | 
| 
 | 
   102 
 | 
| 
 | 
   103 if [ "${N}" == 1 ]; then
 | 
| 
 | 
   104 	printf "\nNormalization is being performed\n"
 | 
| 
 | 
   105 	$SCRIPTPATH/Normalization.R ${f}
 | 
| 
 | 
   106 	f="norm_saint.txt"
 | 
| 
 | 
   107 elif [ "${N}" == 2 ]; then
 | 
| 
 | 
   108 	printf "\nNormalization is being performed\n"
 | 
| 
 | 
   109 	if [ -z "${C}" ]; then
 | 
| 
 | 
   110 		C=${t}
 | 
| 
 | 
   111 	fi
 | 
| 
 | 
   112 	$SCRIPTPATH/Normalization_sigpreys.R ${f} ${C}
 | 
| 
 | 
   113 	f="norm_saint.txt"
 | 
| 
 | 
   114 fi
 | 
| 
 | 
   115 
 | 
| 
 | 
   116 
 | 
| 
 | 
   117 ###Check for clustering etc
 | 
| 
 | 
   118 
 | 
| 
 | 
   119 if [ "${c}" == "h" ] && [ -z "${n}" ]; then
 | 
| 
 | 
   120 	printf "\nHierarchial clustering was selected (-c = h), but no clustering method (-n) was chosen.\n"
 | 
| 
 | 
   121 	printf "The input parameter -n must be set to one of \"average\", \"centroid\", \"complete\", \"mcquitty\",\n"
 | 
| 
 | 
   122 	printf "\"median\", \"single\" or \"ward\". \"ward\" will be selected as default.\n\n"
 | 
| 
 | 
   123 	n="ward"
 | 
| 
 | 
   124 elif [ "${c}" == "h" ] && [ -n "${n}" ]; then
 | 
| 
 | 
   125 	if [ "${n}" == "average" ] || [ "${n}" == "centroid" ] || [ "${n}" == "complete" ] || [ "${n}" == "mcquitty" ] || [ "${n}" == "median" ] || [  "${n}" == "single" ] || [ "${n}" == "ward" ]; then
 | 
| 
 | 
   126 		printf "\nHierarchical clustering (method = ${n}) will be performed\n\n"
 | 
| 
 | 
   127 	else
 | 
| 
 | 
   128 		printf "\n${n} is not a valid Hierarchical clustering method.\n"
 | 
| 
 | 
   129 		printf "Choose one of \"average\", \"centroid\", \"complete\", \"mcquitty\", \"median\", \"single\" or \"ward\"\n\n"
 | 
| 
 | 
   130 		exit 1
 | 
| 
 | 
   131 	fi
 | 
| 
 | 
   132 fi
 | 
| 
 | 
   133 
 | 
| 
 | 
   134 p_c=0
 | 
| 
 | 
   135 if [ "${c}" == "h" ] && [ -z "${d}" ]; then
 | 
| 
 | 
   136 	printf "\nHierarchial clustering was selected (-c = h), but no distance metric (-d) was chosen.\n"
 | 
| 
 | 
   137 	printf "The input parameter -d must be set to one of  \"binary\", \"canberra\", \"euclidean\",\n"
 | 
| 
 | 
   138 	printf "\"manhattan\", \"maximum\" or \"minkowski\". \"canberra\" will be selected as default.\n\n"
 | 
| 
 | 
   139 	d="canberra"
 | 
| 
 | 
   140 elif [ "${c}" == "h" ] && [ -n "${d}" ]; then
 | 
| 
 | 
   141 	if [ "${d}" == "binary" ] || [ "${d}" == "canberra" ] || [ "${d}" == "euclidean" ] || [ "${d}" == "manhattan" ] || [ "${d}" == "maximum" ] || [  "${d}" == "minkowski" ]; then
 | 
| 
 | 
   142 		printf "\nHierarchical clustering (distance metric = ${d}) will be performed\n\n"
 | 
| 
 | 
   143 	else
 | 
| 
 | 
   144 		printf "\n${d} is not a valid Hierarchical clustering distance metric.\n"
 | 
| 
 | 
   145 		printf "Choose one of  \"binary\", \"canberra\", \"euclidean\", \"manhattan\", \"maximum\" or \"minkowski\"\n\n"
 | 
| 
 | 
   146 		exit 1
 | 
| 
 | 
   147 	fi
 | 
| 
 | 
   148 fi
 | 
| 
 | 
   149 
 | 
| 
 | 
   150 if [ "${c}" == "n" ] && [ -z "${b}" ]; then
 | 
| 
 | 
   151 	printf "\n\"No Clustering\" option was selected (-c = n), but no bait list was included (option -b).\n"
 | 
| 
 | 
   152 	printf "Bait list must be in .txt formart.\n\n"
 | 
| 
 | 
   153 	exit 1
 | 
| 
 | 
   154 elif [ "${c}" == "n" ] && [ -z "${p}" ]; then
 | 
| 
 | 
   155 	printf "\n\"No Clustering\" option was selected (-c = n), but no prey list was included (option -p).\n"
 | 
| 
 | 
   156 	printf "Prey list must be in .txt formart.\n\n"
 | 
| 
 | 
   157 	exit 1
 | 
| 
 | 
   158 elif [ "${c}" == "n" ] && [ "${p}" == "all" ]; then
 | 
| 
 | 
   159 	printf "\n\"No Clustering\" option was selected (-c = n) for baits, but preys will still be clustered.\n"
 | 
| 
 | 
   160 	printf "using \"ward\" and \"canberra\" as defaults or options as supplied on command line.\n\n"
 | 
| 
 | 
   161 	p="empty"
 | 
| 
 | 
   162 	p_c=1
 | 
| 
 | 
   163 	n="ward"
 | 
| 
 | 
   164 	d="canberra"
 | 
| 
 | 
   165 fi
 | 
| 
 | 
   166 
 | 
| 
 | 
   167 
 | 
| 
 | 
   168 ###Check number of baits
 | 
| 
 | 
   169 
 | 
| 
 | 
   170 bait_n=$(perl $SCRIPTPATH/BaitCheck.pl -i ${f})
 | 
| 
 | 
   171 echo "Number of baits = "$bait_n
 | 
| 
 | 
   172 printf "\n\n"
 | 
| 
 | 
   173 
 | 
| 
 | 
   174 if [ "${c}" == "b" ] && [ $bait_n == 2 ]; then
 | 
| 
 | 
   175 	printf "\nWarning only 2 baits are present. Biclustering will not performed.\n"
 | 
| 
 | 
   176 	printf "Hierarchical clustering (method = ward) will be performed instead.\n\n"
 | 
| 
 | 
   177 	c="h"
 | 
| 
 | 
   178 	n="ward"
 | 
| 
 | 
   179 fi
 | 
| 
 | 
   180 
 | 
| 
 | 
   181 
 | 
| 
 | 
   182 ###Generate plots
 | 
| 
 | 
   183 
 | 
| 
 | 
   184 if [ "${c}" == "b" ]; then
 | 
| 
 | 
   185 	printf "\nBiclustering will be performed\n\n"
 | 
| 
 | 
   186 	$SCRIPTPATH/Step1_data_reformating.R ${f} ${s} ${filename}
 | 
| 
 | 
   187 	$SCRIPTPATH/Step2_data_filtering.R ${filename}_matrix.txt ${x} ${filename}
 | 
| 
 | 
   188 	GSL_RNG_SEED=123  $SCRIPTPATH/Step3_nestedcluster ${filename}.dat $SCRIPTPATH/biclust_param.txt
 | 
| 
 | 
   189 	$SCRIPTPATH/Step4_biclustering.R ${filename}.dat
 | 
| 
 | 
   190 
 | 
| 
 | 
   191 	$SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}
 | 
| 
 | 
   192 	$SCRIPTPATH/R_dotPlot.R ${s} ${t} ${m}
 | 
| 
 | 
   193 	mkdir Output_${filename}
 | 
| 
 | 
   194 	mkdir Output_${filename}/TempData_${filename}
 | 
| 
 | 
   195 	mv bait_lists Output_${filename}/TempData_${filename}
 | 
| 
 | 
   196 	mv Clusters Output_${filename}/TempData_${filename}
 | 
| 
 | 
   197 	mv MCMCparameters Output_${filename}/TempData_${filename}
 | 
| 
 | 
   198 	mv NestedClusters Output_${filename}/TempData_${filename}
 | 
| 
 | 
   199 	mv NestedMu Output_${filename}/TempData_${filename}
 | 
| 
 | 
   200 	mv NestedSigma2 Output_${filename}/TempData_${filename}
 | 
| 
 | 
   201 	mv OPTclusters Output_${filename}/TempData_${filename}
 | 
| 
 | 
   202 	mv ${filename}_matrix.txt Output_${filename}/TempData_${filename}
 | 
| 
 | 
   203 	mv ${filename}.dat Output_${filename}/TempData_${filename}
 | 
| 
 | 
   204 	mv SC_data.txt Output_${filename}/TempData_${filename}
 | 
| 
 | 
   205 	mv FDR_data.txt Output_${filename}/TempData_${filename}
 | 
| 
 | 
   206 	mv clustered_matrix.txt Output_${filename}/TempData_${filename}
 | 
| 
 | 
   207 	mv singletons.txt Output_${filename}/TempData_${filename}
 | 
| 
 | 
   208 	mv bait2bait_matrix.txt Output_${filename}/TempData_${filename}
 | 
| 
 | 
   209 	mv baitClusters Output_${filename}/TempData_${filename}
 | 
| 
 | 
   210 	mv clusteredData Output_${filename}/TempData_${filename}
 | 
| 
 | 
   211 	mv dotplot.pdf Output_${filename}
 | 
| 
 | 
   212 	mv bait2bait.pdf Output_${filename} 
 | 
| 
 | 
   213 	mv estimated.pdf Output_${filename} 
 | 
| 
 | 
   214 	mv stats.pdf Output_${filename}
 | 
| 
 | 
   215 	cp $SCRIPTPATH/legend.pdf Output_${filename}
 | 
| 
 | 
   216 elif [ "${c}" == "h" ]; then
 | 
| 
 | 
   217 
 | 
| 
 | 
   218 	$SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}
 | 
| 
 | 
   219 	$SCRIPTPATH/R_dotPlot_hc.R ${s} ${t} ${m} ${n} ${d} $SCRIPTPATH
 | 
| 
 | 
   220 
 | 
| 
 | 
   221 	mkdir Output_${filename}
 | 
| 
 | 
   222 	mkdir Output_${filename}/TempData_${filename}
 | 
| 
 | 
   223 	mv dotplot.pdf Output_${filename}
 | 
| 
 | 
   224 	mv heatmap_borders.pdf Output_${filename}
 | 
| 
 | 
   225 	mv heatmap_no_borders.pdf Output_${filename}
 | 
| 
 | 
   226 	mv bait2bait.pdf Output_${filename}
 | 
| 
 | 
   227 	mv SC_data.txt Output_${filename}/TempData_${filename}
 | 
| 
 | 
   228 	mv FDR_data.txt Output_${filename}/TempData_${filename}
 | 
| 
 | 
   229 	cp $SCRIPTPATH/legend.pdf Output_${filename}
 | 
| 
 | 
   230 elif [ "${c}" == "n" ]; then
 | 
| 
 | 
   231 	
 | 
| 
 | 
   232 	$SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}
 | 
| 
 | 
   233 	echo "$SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH"
 | 
| 
 | 
   234 	$SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH
 | 
| 
 | 
   235 
 | 
| 
 | 
   236 	mkdir Output_${filename}
 | 
| 
 | 
   237 	mkdir Output_${filename}/TempData_${filename}
 | 
| 
 | 
   238 	mv dotplot.pdf Output_${filename}
 | 
| 
 | 
   239 	mv heatmap_borders.pdf Output_${filename}
 | 
| 
 | 
   240 	mv heatmap_no_borders.pdf Output_${filename}
 | 
| 
 | 
   241 	mv SC_data.txt Output_${filename}/TempData_${filename}
 | 
| 
 | 
   242 	mv FDR_data.txt Output_${filename}/TempData_${filename}
 | 
| 
 | 
   243 	cp $SCRIPTPATH/legend.pdf Output_${filename}
 | 
| 
 | 
   244 else
 | 
| 
 | 
   245 	printf -- "-c must be one of [b, h, n]:  b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering>\n"
 | 
| 
 | 
   246 	exit 1;
 | 
| 
 | 
   247 fi
 | 
| 
 | 
   248 
 | 
| 
 | 
   249 if [ "${N}" == "1" ] || [ "${N}" == "2" ]; then
 | 
| 
 | 
   250 	mv norm_saint.txt Output_${filename}/TempData_${filename}
 | 
| 
 | 
   251 fi
 | 
| 
 | 
   252 
 |