comparison Dotplot_Release/dotplot.bash @ 0:dfa3436beb67 draft

Uploaded
author bornea
date Fri, 29 Jan 2016 09:56:02 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfa3436beb67
1 #!/bin/bash
2 #SCRIPT=$(readlink -e $0)
3 #SCRIPTPATH=`dirname $SCRIPT`
4 pushd `dirname $0` > /dev/null
5 SCRIPTPATH=`pwd`
6 popd > /dev/null
7
8 usage() { printf "Usage: $0
9 [-f <saint_file_name.txt>]
10 [-i <0 for SaintExpress format, 1 for other>]
11 [-c <clustering to perform. Options: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering; see options -b and -p)>]
12 [-n <clustering type to be performed if option -c is set to \"h\">]
13 [-d <distance metric to use if option -c is set to \"h\">]
14 [-b <list of bait proteins in display order (see option -c n)>]
15 [-p <list of prey proteins in display order (see option -c n). Set this to \"all\" if you want to include all preys and cluster them>]
16 [-s <primary FDR cutoff [0-1, recommended=0.01]>]
17 [-t <secondary FDR cutoff [must be less than the primary, recommended=0.025]>
18 [-x <spectral count minimum. Only preys with >= this will be used]>
19 [-m <maximum spectral count>]
20 [-N <normalization, 0 for no (default), 1 for yes, 2 for normalization based on significant preys counts (prey FDR <= option -t)>]
21 [-C <FDR cutoff for normalization if using option -N 2 (deafult is -t)>]\n"
22 1>&2; exit 1; }
23
24 N=0
25 n="ward"
26 d="canberra"
27 x=0
28 i=0
29 while getopts ":f:i:s:t:x:m:c:n:d:b:p:N:C:" o; do
30 case "${o}" in
31 f)
32 f=${OPTARG}
33 ;;
34 i)
35 i=${OPTARG}
36 ;;
37 s)
38 s=${OPTARG}
39 ;;
40 t)
41 t=${OPTARG}
42 ;;
43 x)
44 x=${OPTARG}
45 ;;
46 m)
47 m=${OPTARG}
48 ;;
49 c)
50 c=${OPTARG}
51 ;;
52 n)
53 n=${OPTARG}
54 ;;
55 d)
56 d=${OPTARG}
57 ;;
58 b)
59 b=${OPTARG}
60 ;;
61 p)
62 p=${OPTARG}
63 ;;
64 N)
65 N=${OPTARG}
66 ;;
67 C)
68 C=${OPTARG}
69 ;;
70 *)
71 usage
72 ;;
73 esac
74 done
75 shift $((OPTIND-1))
76
77 filename=${f%%.*}
78 echo "Saint input file = ${f}"
79 echo "Primary FDR cutoff = ${s}"
80 echo "Secondary FDR cutoff for dotplot = ${t}"
81 echo "Minimum spectral count for significant preys = ${x}"
82 echo "Maximum spectral count for dot plot = ${m}"
83
84 if [ -z "${f}" ] || [ -z "${s}" ] || [ -z "${t}" ] || [ -z "${m}" ] || [ -z "${c}" ]; then
85 usage
86 fi
87
88 if [ "${i}" == 1 ]; then
89 $SCRIPTPATH/SaintConvert.pl -i ${f}
90 f="mockSaintExpress.txt"
91 fi
92
93 if [ "${x}" -ge "${m}" ]; then
94 echo "spectral count minimum (${x}) cannot be greater than or equal to the maximum (${m})"
95 exit 1;
96 elif [ "${x}" -lt 0 ]; then
97 echo "spectral count minimum (${x}) cannot be less than 0. Setting to 0 and continuing"
98 x=0
99 fi
100
101 ###Check for normalization
102
103 if [ "${N}" == 1 ]; then
104 printf "\nNormalization is being performed\n"
105 $SCRIPTPATH/Normalization.R ${f}
106 f="norm_saint.txt"
107 elif [ "${N}" == 2 ]; then
108 printf "\nNormalization is being performed\n"
109 if [ -z "${C}" ]; then
110 C=${t}
111 fi
112 $SCRIPTPATH/Normalization_sigpreys.R ${f} ${C}
113 f="norm_saint.txt"
114 fi
115
116
117 ###Check for clustering etc
118
119 if [ "${c}" == "h" ] && [ -z "${n}" ]; then
120 printf "\nHierarchial clustering was selected (-c = h), but no clustering method (-n) was chosen.\n"
121 printf "The input parameter -n must be set to one of \"average\", \"centroid\", \"complete\", \"mcquitty\",\n"
122 printf "\"median\", \"single\" or \"ward\". \"ward\" will be selected as default.\n\n"
123 n="ward"
124 elif [ "${c}" == "h" ] && [ -n "${n}" ]; then
125 if [ "${n}" == "average" ] || [ "${n}" == "centroid" ] || [ "${n}" == "complete" ] || [ "${n}" == "mcquitty" ] || [ "${n}" == "median" ] || [ "${n}" == "single" ] || [ "${n}" == "ward" ]; then
126 printf "\nHierarchical clustering (method = ${n}) will be performed\n\n"
127 else
128 printf "\n${n} is not a valid Hierarchical clustering method.\n"
129 printf "Choose one of \"average\", \"centroid\", \"complete\", \"mcquitty\", \"median\", \"single\" or \"ward\"\n\n"
130 exit 1
131 fi
132 fi
133
134 p_c=0
135 if [ "${c}" == "h" ] && [ -z "${d}" ]; then
136 printf "\nHierarchial clustering was selected (-c = h), but no distance metric (-d) was chosen.\n"
137 printf "The input parameter -d must be set to one of \"binary\", \"canberra\", \"euclidean\",\n"
138 printf "\"manhattan\", \"maximum\" or \"minkowski\". \"canberra\" will be selected as default.\n\n"
139 d="canberra"
140 elif [ "${c}" == "h" ] && [ -n "${d}" ]; then
141 if [ "${d}" == "binary" ] || [ "${d}" == "canberra" ] || [ "${d}" == "euclidean" ] || [ "${d}" == "manhattan" ] || [ "${d}" == "maximum" ] || [ "${d}" == "minkowski" ]; then
142 printf "\nHierarchical clustering (distance metric = ${d}) will be performed\n\n"
143 else
144 printf "\n${d} is not a valid Hierarchical clustering distance metric.\n"
145 printf "Choose one of \"binary\", \"canberra\", \"euclidean\", \"manhattan\", \"maximum\" or \"minkowski\"\n\n"
146 exit 1
147 fi
148 fi
149
150 if [ "${c}" == "n" ] && [ -z "${b}" ]; then
151 printf "\n\"No Clustering\" option was selected (-c = n), but no bait list was included (option -b).\n"
152 printf "Bait list must be in .txt formart.\n\n"
153 exit 1
154 elif [ "${c}" == "n" ] && [ -z "${p}" ]; then
155 printf "\n\"No Clustering\" option was selected (-c = n), but no prey list was included (option -p).\n"
156 printf "Prey list must be in .txt formart.\n\n"
157 exit 1
158 elif [ "${c}" == "n" ] && [ "${p}" == "all" ]; then
159 printf "\n\"No Clustering\" option was selected (-c = n) for baits, but preys will still be clustered.\n"
160 printf "using \"ward\" and \"canberra\" as defaults or options as supplied on command line.\n\n"
161 p="empty"
162 p_c=1
163 n="ward"
164 d="canberra"
165 fi
166
167
168 ###Check number of baits
169
170 bait_n=$(perl $SCRIPTPATH/BaitCheck.pl -i ${f})
171 echo "Number of baits = "$bait_n
172 printf "\n\n"
173
174 if [ "${c}" == "b" ] && [ $bait_n == 2 ]; then
175 printf "\nWarning only 2 baits are present. Biclustering will not performed.\n"
176 printf "Hierarchical clustering (method = ward) will be performed instead.\n\n"
177 c="h"
178 n="ward"
179 fi
180
181
182 ###Generate plots
183
184 if [ "${c}" == "b" ]; then
185 printf "\nBiclustering will be performed\n\n"
186 $SCRIPTPATH/Step1_data_reformating.R ${f} ${s} ${filename}
187 $SCRIPTPATH/Step2_data_filtering.R ${filename}_matrix.txt ${x} ${filename}
188 GSL_RNG_SEED=123 $SCRIPTPATH/Step3_nestedcluster ${filename}.dat $SCRIPTPATH/biclust_param.txt
189 $SCRIPTPATH/Step4_biclustering.R ${filename}.dat
190
191 $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}
192 $SCRIPTPATH/R_dotPlot.R ${s} ${t} ${m}
193 mkdir Output_${filename}
194 mkdir Output_${filename}/TempData_${filename}
195 mv bait_lists Output_${filename}/TempData_${filename}
196 mv Clusters Output_${filename}/TempData_${filename}
197 mv MCMCparameters Output_${filename}/TempData_${filename}
198 mv NestedClusters Output_${filename}/TempData_${filename}
199 mv NestedMu Output_${filename}/TempData_${filename}
200 mv NestedSigma2 Output_${filename}/TempData_${filename}
201 mv OPTclusters Output_${filename}/TempData_${filename}
202 mv ${filename}_matrix.txt Output_${filename}/TempData_${filename}
203 mv ${filename}.dat Output_${filename}/TempData_${filename}
204 mv SC_data.txt Output_${filename}/TempData_${filename}
205 mv FDR_data.txt Output_${filename}/TempData_${filename}
206 mv clustered_matrix.txt Output_${filename}/TempData_${filename}
207 mv singletons.txt Output_${filename}/TempData_${filename}
208 mv bait2bait_matrix.txt Output_${filename}/TempData_${filename}
209 mv baitClusters Output_${filename}/TempData_${filename}
210 mv clusteredData Output_${filename}/TempData_${filename}
211 mv dotplot.pdf Output_${filename}
212 mv bait2bait.pdf Output_${filename}
213 mv estimated.pdf Output_${filename}
214 mv stats.pdf Output_${filename}
215 cp $SCRIPTPATH/legend.pdf Output_${filename}
216 elif [ "${c}" == "h" ]; then
217
218 $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}
219 $SCRIPTPATH/R_dotPlot_hc.R ${s} ${t} ${m} ${n} ${d} $SCRIPTPATH
220
221 mkdir Output_${filename}
222 mkdir Output_${filename}/TempData_${filename}
223 mv dotplot.pdf Output_${filename}
224 mv heatmap_borders.pdf Output_${filename}
225 mv heatmap_no_borders.pdf Output_${filename}
226 mv bait2bait.pdf Output_${filename}
227 mv SC_data.txt Output_${filename}/TempData_${filename}
228 mv FDR_data.txt Output_${filename}/TempData_${filename}
229 cp $SCRIPTPATH/legend.pdf Output_${filename}
230 elif [ "${c}" == "n" ]; then
231
232 $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}
233 echo "$SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH"
234 $SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH
235
236 mkdir Output_${filename}
237 mkdir Output_${filename}/TempData_${filename}
238 mv dotplot.pdf Output_${filename}
239 mv heatmap_borders.pdf Output_${filename}
240 mv heatmap_no_borders.pdf Output_${filename}
241 mv SC_data.txt Output_${filename}/TempData_${filename}
242 mv FDR_data.txt Output_${filename}/TempData_${filename}
243 cp $SCRIPTPATH/legend.pdf Output_${filename}
244 else
245 printf -- "-c must be one of [b, h, n]: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering>\n"
246 exit 1;
247 fi
248
249 if [ "${N}" == "1" ] || [ "${N}" == "2" ]; then
250 mv norm_saint.txt Output_${filename}/TempData_${filename}
251 fi
252