comparison pal_finder_wrapper.sh @ 0:3f908e7fff4f draft

Uploaded first version to toolshed.
author pjbriggs
date Thu, 11 Dec 2014 09:23:24 -0500
parents
children 771ebe02636f
comparison
equal deleted inserted replaced
-1:000000000000 0:3f908e7fff4f
1 #!/bin/sh
2 #
3 # pal_finder_wrapper.sh: run pal_finder perl script as a Galaxy tool
4 #
5 # Usage: run_palfinder.sh FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY FILTERED_MICROSATS [OPTIONS]
6 # run_palfinder.sh --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]
7 #
8 # Options:
9 #
10 # --primer-prefix PREFIX: prefix added to the beginning of all primer names (prPrefixName)
11 # --2merMinReps N: miniumum number of 2-mer repeat units to detect (0=ignore units of this size)
12 # --3merMinReps N
13 # --4merMinReps N
14 # --5merMinReps N
15 # --6merMinReps N
16 # --primer-mispriming-library FASTA: specify a Fasta file with sequences to avoid amplifying
17 # --primer-opt-size VALUE: optimum primer length
18 # --primer-min-size VALUE: minimum acceptable primer length
19 # --primer-max-size VALUE: maximum acceptable primer length
20 # --primer-min-gc VALUE: minimum allowable percentage of Gs and Cs in any primer
21 # --primer-max-gc VALUE: maximum allowable percentage of Gs and Cs
22 # --primer-gc-clamp VALUE: number of consecutive Gs and Cs at 3' end of both left and right primer
23 # --primer-max-end-gc VALUE: max number of Gs or Cs in last five 3' bases of left or right primer
24 # --primer-min-tm VALUE: minimum acceptable melting temperature (Celsius) for a primer oligo
25 # --primer-max-tm VALUE: maximum acceptable melting temperature (Celsius)
26 # --primer-opt-tm VALUE: optimum melting temperature (Celsius)
27 # --primer-pair-max-diff-tm VALUE: max difference between melting temps of left & right primers
28 # --output_config_file FNAME: write a copy of the config.txt file to FNAME
29 # --filter_microsats FNAME: run Graeme Fox's Perl script to filter and sort the
30 # microsatellites from pal_finder and write to FNAME
31 #
32 # pal_finder is available from http://sourceforge.net/projects/palfinder/
33 #
34 # primer3 is available from http://primer3.sourceforge.net/releases.php
35 # (nb needs version 2.0.0-alpha)
36 #
37 # Explicitly set the locations of the pal_finder script, data files and the primer3
38 # executable by setting the following variables in the environment:
39 #
40 # * PALFINDER_SCRIPT_DIR: location of the pal_finder Perl script (defaults to
41 # /usr/bin)
42 # * PALFINDER_DATA_DIR: location of the pal_finder data files (specifically
43 # config.txt and simple.ref; defaults to /usr/share/pal_finder_v0.02.04)
44 # * PRIMER3_CORE_EXE: name of the primer3_core program, which should include the
45 # full path if it's not on the Galaxy user's PATH (defaults to primer3_core)
46 #
47 echo $*
48 #
49 # Initialise locations of scripts, data and executables
50 #
51 # Set these in the environment to overide at execution time
52 : ${PALFINDER_SCRIPT_DIR:=/usr/bin}
53 : ${PALFINDER_DATA_DIR:=/usr/share/pal_finder_v0.02.04}
54 : ${PRIMER3_CORE_EXE:=primer3_core}
55 #
56 # Filter script is in the same directory as this script
57 PALFINDER_FILTER_PL=$(dirname $0)/pal_finder_filter.pl
58 if [ ! -f $PALFINDER_FILTER_PL ] ; then
59 echo No pal_finder_filter.pl script >&2
60 exit 1
61 fi
62 #
63 # Check that we have all the components
64 function have_program() {
65 local program=$1
66 local got_program=$(which $program 2>&1 | grep "no $(basename $program) in")
67 if [ -z "$got_program" ] ; then
68 echo yes
69 else
70 echo no
71 fi
72 }
73 if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then
74 echo "ERROR primer3_core missing: ${PRIMER3_CORE_EXE} not found" >&2
75 exit 1
76 fi
77 if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then
78 echo "ERROR pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" >&2
79 exit 1
80 fi
81 if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then
82 echo "ERROR pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" >&2
83 exit 1
84 fi
85 #
86 # Initialise parameters used in the config.txt file
87 PRIMER_PREFIX="test"
88 MIN_2_MER_REPS=6
89 MIN_3_MER_REPS=0
90 MIN_4_MER_REPS=0
91 MIN_5_MER_REPS=0
92 MIN_6_MER_REPS=0
93 PRIMER_MISPRIMING_LIBRARY=$PALFINDER_DATA_DIR/simple.ref
94 PRIMER_OPT_SIZE=
95 PRIMER_MAX_SIZE=
96 PRIMER_MIN_SIZE=
97 PRIMER_MAX_GC=
98 PRIMER_MIN_GC=
99 PRIMER_GC_CLAMP=
100 PRIMER_MAX_END_GC=
101 PRIMER_OPT_TM=
102 PRIMER_MAX_TM=
103 PRIMER_MIN_TM=
104 PRIMER_PAIR_MAX_DIFF_TM=
105 OUTPUT_CONFIG_FILE=
106 FILTERED_MICROSATS=
107 #
108 # Collect command line arguments
109 if [ $# -lt 2 ] ; then
110 echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]"
111 echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]"
112 exit
113 fi
114 if [ "$1" == "--454" ] ; then
115 PLATFORM="454"
116 FNA=$2
117 else
118 PLATFORM="Illumina"
119 FASTQ_R1=$1
120 FASTQ_R2=$2
121 fi
122 MICROSAT_SUMMARY=$3
123 PAL_SUMMARY=$4
124 shift; shift; shift; shift
125 #
126 # Collect command line options
127 while [ ! -z "$1" ] ; do
128 case "$1" in
129 --primer-prefix)
130 shift
131 PRIMER_PREFIX=$1
132 ;;
133 --2merMinReps)
134 shift
135 MIN_2_MER_REPS=$1
136 ;;
137 --3merMinReps)
138 shift
139 MIN_3_MER_REPS=$1
140 ;;
141 --4merMinReps)
142 shift
143 MIN_4_MER_REPS=$1
144 ;;
145 --5merMinReps)
146 shift
147 MIN_5_MER_REPS=$1
148 ;;
149 --6merMinReps)
150 shift
151 MIN_6_MER_REPS=$1
152 ;;
153 --primer-mispriming-library)
154 shift
155 PRIMER_MISPRIMING_LIBRARY=$1
156 ;;
157 --primer-opt-size)
158 shift
159 PRIMER_OPT_SIZE=$1
160 ;;
161 --primer-max-size)
162 shift
163 PRIMER_MAX_SIZE=$1
164 ;;
165 --primer-min-size)
166 shift
167 PRIMER_MIN_SIZE=$1
168 ;;
169 --primer-max-gc)
170 shift
171 PRIMER_MAX_GC=$1
172 ;;
173 --primer-min-gc)
174 shift
175 PRIMER_MIN_GC=$1
176 ;;
177 --primer-gc-clamp)
178 shift
179 PRIMER_GC_CLAMP=$1
180 ;;
181 --primer-max-end-gc)
182 shift
183 PRIMER_MAX_END_GC=$1
184 ;;
185 --primer-opt-tm)
186 shift
187 PRIMER_OPT_TM=$1
188 ;;
189 --primer-max-tm)
190 shift
191 PRIMER_MAX_TM=$1
192 ;;
193 --primer-min-tm)
194 shift
195 PRIMER_MIN_TM=$1
196 ;;
197 --primer-pair-max-diff-tm)
198 shift
199 PRIMER_PAIR_MAX_DIFF_TM=$1
200 ;;
201 --output_config_file)
202 shift
203 OUTPUT_CONFIG_FILE=$1
204 ;;
205 --filter_microsats)
206 shift
207 FILTERED_MICROSATS=$1
208 ;;
209 *)
210 echo Unknown option: $1 >&2
211 exit 1
212 ;;
213 esac
214 shift
215 done
216 #
217 # Check that primer3_core is available
218 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"`
219 if [ -z "$got_primer3" ] ; then
220 echo ERROR primer3_core not found >&2
221 exit 1
222 fi
223 #
224 # Set up the working dir
225 if [ "$PLATFORM" == "Illumina" ] ; then
226 # Paired end Illumina data as input
227 if [ $FASTQ_R1 == $FASTQ_R2 ] ; then
228 echo ERROR R1 and R2 fastqs are the same file >&2
229 exit 1
230 fi
231 ln -s $FASTQ_R1
232 ln -s $FASTQ_R2
233 fastq_r1=$(basename $FASTQ_R1)
234 fastq_r2=$(basename $FASTQ_R2)
235 else
236 # 454 data as input
237 ln -s $FNA
238 fna=$(basename $FNA)
239 fi
240 ln -s $PRIMER_MISPRIMING_LIBRARY
241 PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY)
242 mkdir Output
243 #
244 # Copy in the default config.txt file
245 /bin/cp $PALFINDER_DATA_DIR/config.txt .
246 #
247 # Update the config.txt file with new values
248 function set_config_value() {
249 local key=$1
250 local value=$2
251 local config_txt=$3
252 if [ -z "$value" ] ; then
253 echo "No value for $key, left as default"
254 else
255 echo Setting "$key" to "$value"
256 sed -i 's,^'"$key"' .*,'"$key"' '"$value"',' $config_txt
257 fi
258 }
259 # Input files
260 set_config_value platform $PLATFORM config.txt
261 if [ "$PLATFORM" == "Illumina" ] ; then
262 set_config_value inputFormat fastq config.txt
263 set_config_value pairedEnd 1 config.txt
264 set_config_value inputReadFile $fastq_r1 config.txt
265 set_config_value pairedReadFile $fastq_r2 config.txt
266 else
267 set_config_value inputFormat fasta config.txt
268 set_config_value pairedEnd 0 config.txt
269 set_config_value input454reads $fna config.txt
270 fi
271 # Output files
272 set_config_value MicrosatSumOut Output/microsat_summary.txt config.txt
273 set_config_value PALsummaryOut Output/PAL_summary.txt config.txt
274 # Microsat info
275 set_config_value 2merMinReps $MIN_2_MER_REPS config.txt
276 set_config_value 3merMinReps $MIN_3_MER_REPS config.txt
277 set_config_value 4merMinReps $MIN_4_MER_REPS config.txt
278 set_config_value 5merMinReps $MIN_5_MER_REPS config.txt
279 set_config_value 6merMinReps $MIN_6_MER_REPS config.txt
280 # Primer3 settings
281 set_config_value primer3input Output/pr3in.txt config.txt
282 set_config_value primer3output Output/pr3out.txt config.txt
283 set_config_value primer3executable $PRIMER3_CORE_EXE config.txt
284 set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt
285 set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt
286 set_config_value PRIMER_OPT_SIZE "$PRIMER_OPT_SIZE" config.txt
287 set_config_value PRIMER_MIN_SIZE "$PRIMER_MIN_SIZE" config.txt
288 set_config_value PRIMER_MAX_SIZE "$PRIMER_MAX_SIZE" config.txt
289 set_config_value PRIMER_MIN_GC "$PRIMER_MIN_GC" config.txt
290 set_config_value PRIMER_MAX_GC "$PRIMER_MAX_GC" config.txt
291 set_config_value PRIMER_GC_CLAMP "$PRIMER_GC_CLAMP" config.txt
292 set_config_value PRIMER_MAX_END_GC "$PRIMER_MAX_END_GC" config.txt
293 set_config_value PRIMER_MIN_TM "$PRIMER_MIN_TM" config.txt
294 set_config_value PRIMER_MAX_TM "$PRIMER_MAX_TM" config.txt
295 set_config_value PRIMER_OPT_TM "$PRIMER_OPT_TM" config.txt
296 set_config_value PRIMER_PAIR_MAX_DIFF_TM "$PRIMER_PAIR_MAX_DIFF_TM" config.txt
297 #
298 # Run pal_finder
299 perl $PALFINDER_SCRIPT_DIR/pal_finder_v0.02.04.pl config.txt 2>&1 | tee pal_finder.log
300 #
301 # Check that log ends with "Done!!" message
302 if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then
303 echo ERROR pal_finder failed to complete successfully >&2
304 exit 1
305 fi
306 #
307 # Run the pal_finder_filter.pl script from Graeme Fox
308 if [ ! -z "$FILTERED_MICROSATS" ] ; then
309 perl $PALFINDER_FILTER_PL Output/PAL_summary.txt 2>&1
310 if [ $? -ne 0 ] ; then
311 echo ERROR pal_finder_filter.pl exited with non-zero status >&2
312 exit 1
313 elif [ ! -f pal_finder_filter_output.txt ] ; then
314 echo ERROR no output from pal_finder_filter.pl >&2
315 exit 1
316 fi
317 fi
318 #
319 # Clean up
320 if [ -f Output/microsat_summary.txt ] ; then
321 /bin/mv Output/microsat_summary.txt $MICROSAT_SUMMARY
322 fi
323 if [ -f Output/PAL_summary.txt ] ; then
324 /bin/mv Output/PAL_summary.txt $PAL_SUMMARY
325 fi
326 if [ ! -z "$FILTERED_MICROSATS" ] && [ -f pal_finder_filter_output.txt ] ; then
327 echo Moving pal_finder_filter_output.txt to $FILTERED_MICROSATS
328 /bin/mv pal_finder_filter_output.txt $FILTERED_MICROSATS
329 fi
330 if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then
331 /bin/mv config.txt $OUTPUT_CONFIG_FILE
332 fi
333 ##
334 #