Mercurial > repos > pjbriggs > pal_finder
annotate pal_finder_wrapper.sh @ 9:52dbe2089d14 draft default tip
Version 0.02.04.8 (update fastq subsetting).
author | pjbriggs |
---|---|
date | Wed, 04 Jul 2018 06:05:52 -0400 |
parents | 4e625d3672ba |
children |
rev | line source |
---|---|
0 | 1 #!/bin/sh |
2 # | |
3 # pal_finder_wrapper.sh: run pal_finder perl script as a Galaxy tool | |
4 # | |
5 # Usage: run_palfinder.sh FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY FILTERED_MICROSATS [OPTIONS] | |
6 # run_palfinder.sh --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS] | |
7 # | |
8 # Options: | |
9 # | |
10 # --primer-prefix PREFIX: prefix added to the beginning of all primer names (prPrefixName) | |
11 # --2merMinReps N: miniumum number of 2-mer repeat units to detect (0=ignore units of this size) | |
12 # --3merMinReps N | |
13 # --4merMinReps N | |
14 # --5merMinReps N | |
15 # --6merMinReps N | |
16 # --primer-mispriming-library FASTA: specify a Fasta file with sequences to avoid amplifying | |
17 # --primer-opt-size VALUE: optimum primer length | |
18 # --primer-min-size VALUE: minimum acceptable primer length | |
19 # --primer-max-size VALUE: maximum acceptable primer length | |
20 # --primer-min-gc VALUE: minimum allowable percentage of Gs and Cs in any primer | |
21 # --primer-max-gc VALUE: maximum allowable percentage of Gs and Cs | |
22 # --primer-gc-clamp VALUE: number of consecutive Gs and Cs at 3' end of both left and right primer | |
23 # --primer-max-end-gc VALUE: max number of Gs or Cs in last five 3' bases of left or right primer | |
24 # --primer-min-tm VALUE: minimum acceptable melting temperature (Celsius) for a primer oligo | |
25 # --primer-max-tm VALUE: maximum acceptable melting temperature (Celsius) | |
26 # --primer-opt-tm VALUE: optimum melting temperature (Celsius) | |
27 # --primer-pair-max-diff-tm VALUE: max difference between melting temps of left & right primers | |
28 # --output_config_file FNAME: write a copy of the config.txt file to FNAME | |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
29 # --bad_primer_ranges FNAME: write a list of the read IDs generating bad primer ranges to FNAME |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
30 # --filter_microsats FNAME: write output of filter options to FNAME |
2 | 31 # -assembly FNAME: run the 'assembly' filter option and write to FNAME |
32 # -primers: run the 'primers' filter option | |
33 # -occurrences: run the 'occurrences' filter option | |
34 # -rankmotifs: run the 'rankmotifs' filter option | |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
35 # --subset N: use a subset of reads of size N |
0 | 36 # |
37 # pal_finder is available from http://sourceforge.net/projects/palfinder/ | |
38 # | |
39 # primer3 is available from http://primer3.sourceforge.net/releases.php | |
40 # (nb needs version 2.0.0-alpha) | |
41 # | |
42 # Explicitly set the locations of the pal_finder script, data files and the primer3 | |
43 # executable by setting the following variables in the environment: | |
44 # | |
45 # * PALFINDER_SCRIPT_DIR: location of the pal_finder Perl script (defaults to | |
46 # /usr/bin) | |
47 # * PALFINDER_DATA_DIR: location of the pal_finder data files (specifically | |
48 # config.txt and simple.ref; defaults to /usr/share/pal_finder_v0.02.04) | |
49 # * PRIMER3_CORE_EXE: name of the primer3_core program, which should include the | |
50 # full path if it's not on the Galaxy user's PATH (defaults to primer3_core) | |
51 # | |
1
771ebe02636f
Uploaded version 0.02.04.2: fix bug that causes tool to fail when prefix includes spaces; add explicit dependency on Perl 5.16.3.
pjbriggs
parents:
0
diff
changeset
|
52 echo "### $(basename $0) ###" |
0 | 53 echo $* |
54 # | |
6 | 55 # Maximum size reporting log file contents |
56 MAX_LINES=500 | |
57 # | |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
58 # Get helper functions |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
59 . $(dirname $0)/pal_finder_wrapper_utils.sh |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
60 # |
0 | 61 # Initialise locations of scripts, data and executables |
62 # | |
63 # Set these in the environment to overide at execution time | |
64 : ${PALFINDER_SCRIPT_DIR:=/usr/bin} | |
65 : ${PALFINDER_DATA_DIR:=/usr/share/pal_finder_v0.02.04} | |
66 : ${PRIMER3_CORE_EXE:=primer3_core} | |
67 # | |
68 # Filter script is in the same directory as this script | |
3
e1a14ed7a9d6
Updated to version 0.02.04.4 (new pal_filter script)
pjbriggs
parents:
2
diff
changeset
|
69 PALFINDER_FILTER=$(dirname $0)/pal_filter.py |
2 | 70 if [ ! -f $PALFINDER_FILTER ] ; then |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
71 fatal No $PALFINDER_FILTER script |
0 | 72 fi |
73 # | |
74 # Check that we have all the components | |
75 if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then | |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
76 fatal "primer3_core missing: ${PRIMER3_CORE_EXE} not found" |
0 | 77 fi |
78 if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then | |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
79 fatal "pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" |
0 | 80 fi |
81 if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then | |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
82 fatal "pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" |
0 | 83 fi |
84 # | |
85 # Initialise parameters used in the config.txt file | |
86 PRIMER_PREFIX="test" | |
87 MIN_2_MER_REPS=6 | |
88 MIN_3_MER_REPS=0 | |
89 MIN_4_MER_REPS=0 | |
90 MIN_5_MER_REPS=0 | |
91 MIN_6_MER_REPS=0 | |
92 PRIMER_MISPRIMING_LIBRARY=$PALFINDER_DATA_DIR/simple.ref | |
93 PRIMER_OPT_SIZE= | |
94 PRIMER_MAX_SIZE= | |
95 PRIMER_MIN_SIZE= | |
96 PRIMER_MAX_GC= | |
97 PRIMER_MIN_GC= | |
98 PRIMER_GC_CLAMP= | |
99 PRIMER_MAX_END_GC= | |
100 PRIMER_OPT_TM= | |
101 PRIMER_MAX_TM= | |
102 PRIMER_MIN_TM= | |
103 PRIMER_PAIR_MAX_DIFF_TM= | |
104 OUTPUT_CONFIG_FILE= | |
2 | 105 OUTPUT_ASSEMBLY= |
0 | 106 FILTERED_MICROSATS= |
2 | 107 FILTER_OPTIONS= |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
108 SUBSET= |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
109 RANDOM_SEED=568765 |
0 | 110 # |
111 # Collect command line arguments | |
112 if [ $# -lt 2 ] ; then | |
113 echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" | |
114 echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" | |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
115 fatal "Bad command line" |
0 | 116 fi |
117 if [ "$1" == "--454" ] ; then | |
118 PLATFORM="454" | |
119 FNA=$2 | |
120 else | |
121 PLATFORM="Illumina" | |
122 FASTQ_R1=$1 | |
123 FASTQ_R2=$2 | |
124 fi | |
125 MICROSAT_SUMMARY=$3 | |
126 PAL_SUMMARY=$4 | |
127 shift; shift; shift; shift | |
128 # | |
129 # Collect command line options | |
130 while [ ! -z "$1" ] ; do | |
131 case "$1" in | |
132 --primer-prefix) | |
133 shift | |
2 | 134 # Convert all non-alphanumeric characters to underscores in prefix |
135 PRIMER_PREFIX=$(echo -n $1 | tr -s -c "[:alnum:]" "_") | |
0 | 136 ;; |
137 --2merMinReps) | |
138 shift | |
139 MIN_2_MER_REPS=$1 | |
140 ;; | |
141 --3merMinReps) | |
142 shift | |
143 MIN_3_MER_REPS=$1 | |
144 ;; | |
145 --4merMinReps) | |
146 shift | |
147 MIN_4_MER_REPS=$1 | |
148 ;; | |
149 --5merMinReps) | |
150 shift | |
151 MIN_5_MER_REPS=$1 | |
152 ;; | |
153 --6merMinReps) | |
154 shift | |
155 MIN_6_MER_REPS=$1 | |
156 ;; | |
157 --primer-mispriming-library) | |
158 shift | |
159 PRIMER_MISPRIMING_LIBRARY=$1 | |
160 ;; | |
161 --primer-opt-size) | |
162 shift | |
163 PRIMER_OPT_SIZE=$1 | |
164 ;; | |
165 --primer-max-size) | |
166 shift | |
167 PRIMER_MAX_SIZE=$1 | |
168 ;; | |
169 --primer-min-size) | |
170 shift | |
171 PRIMER_MIN_SIZE=$1 | |
172 ;; | |
173 --primer-max-gc) | |
174 shift | |
175 PRIMER_MAX_GC=$1 | |
176 ;; | |
177 --primer-min-gc) | |
178 shift | |
179 PRIMER_MIN_GC=$1 | |
180 ;; | |
181 --primer-gc-clamp) | |
182 shift | |
183 PRIMER_GC_CLAMP=$1 | |
184 ;; | |
185 --primer-max-end-gc) | |
186 shift | |
187 PRIMER_MAX_END_GC=$1 | |
188 ;; | |
189 --primer-opt-tm) | |
190 shift | |
191 PRIMER_OPT_TM=$1 | |
192 ;; | |
193 --primer-max-tm) | |
194 shift | |
195 PRIMER_MAX_TM=$1 | |
196 ;; | |
197 --primer-min-tm) | |
198 shift | |
199 PRIMER_MIN_TM=$1 | |
200 ;; | |
201 --primer-pair-max-diff-tm) | |
202 shift | |
203 PRIMER_PAIR_MAX_DIFF_TM=$1 | |
204 ;; | |
205 --output_config_file) | |
206 shift | |
207 OUTPUT_CONFIG_FILE=$1 | |
208 ;; | |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
209 --bad_primer_ranges) |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
210 shift |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
211 BAD_PRIMER_RANGES=$1 |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
212 ;; |
0 | 213 --filter_microsats) |
214 shift | |
215 FILTERED_MICROSATS=$1 | |
216 ;; | |
2 | 217 -primers|-occurrences|-rankmotifs) |
218 FILTER_OPTIONS="$FILTER_OPTIONS $1" | |
219 ;; | |
220 -assembly) | |
221 FILTER_OPTIONS="$FILTER_OPTIONS $1" | |
222 shift | |
223 OUTPUT_ASSEMBLY=$1 | |
224 ;; | |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
225 --subset) |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
226 shift |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
227 SUBSET=$1 |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
228 ;; |
0 | 229 *) |
230 echo Unknown option: $1 >&2 | |
231 exit 1 | |
232 ;; | |
233 esac | |
234 shift | |
235 done | |
236 # | |
237 # Check that primer3_core is available | |
238 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"` | |
239 if [ -z "$got_primer3" ] ; then | |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
240 fatal "primer3_core not found" |
0 | 241 fi |
242 # | |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
243 # Check the n-mers specification |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
244 if [ $MIN_6_MER_REPS -ne 0 ] ; then |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
245 if [ $MIN_5_MER_REPS -eq 0 ] ; then |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
246 fatal "Minimum number of 5-mers cannot be zero if number of 6-mers is non-zero" |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
247 fi |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
248 fi |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
249 if [ $MIN_5_MER_REPS -ne 0 ] ; then |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
250 if [ $MIN_4_MER_REPS -eq 0 ] ; then |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
251 fatal "Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero" |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
252 fi |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
253 fi |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
254 if [ $MIN_4_MER_REPS -ne 0 ] ; then |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
255 if [ $MIN_3_MER_REPS -eq 0 ] ; then |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
256 fatal "Minimum number of 3-mers cannot be zero if number of 4-mers is non-zero" |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
257 fi |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
258 fi |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
259 if [ $MIN_2_MER_REPS -eq 0 ] ; then |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
260 fatal "Minimum number of 2-mer repeats cannot be zero" |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
261 fi |
0 | 262 # Set up the working dir |
263 if [ "$PLATFORM" == "Illumina" ] ; then | |
264 # Paired end Illumina data as input | |
265 if [ $FASTQ_R1 == $FASTQ_R2 ] ; then | |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
266 fatal ERROR R1 and R2 fastqs are the same file |
0 | 267 fi |
268 ln -s $FASTQ_R1 | |
269 ln -s $FASTQ_R2 | |
270 fastq_r1=$(basename $FASTQ_R1) | |
271 fastq_r2=$(basename $FASTQ_R2) | |
272 else | |
273 # 454 data as input | |
274 ln -s $FNA | |
275 fna=$(basename $FNA) | |
276 fi | |
277 ln -s $PRIMER_MISPRIMING_LIBRARY | |
278 PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY) | |
279 mkdir Output | |
280 # | |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
281 # Use a subset of reads |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
282 if [ ! -z "$SUBSET" ] ; then |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
283 echo "### Extracting subset of reads ###" |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
284 $(dirname $0)/fastq_subset.py -n $SUBSET -s $RANDOM_SEED $fastq_r1 $fastq_r2 |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
285 fastq_r1="subset_r1.fq" |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
286 fastq_r2="subset_r2.fq" |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
287 fi |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
288 # |
0 | 289 # Copy in the default config.txt file |
1
771ebe02636f
Uploaded version 0.02.04.2: fix bug that causes tool to fail when prefix includes spaces; add explicit dependency on Perl 5.16.3.
pjbriggs
parents:
0
diff
changeset
|
290 echo "### Creating config.txt file for pal_finder run ###" |
0 | 291 /bin/cp $PALFINDER_DATA_DIR/config.txt . |
292 # | |
293 # Update the config.txt file with new values | |
294 # Input files | |
295 set_config_value platform $PLATFORM config.txt | |
296 if [ "$PLATFORM" == "Illumina" ] ; then | |
297 set_config_value inputFormat fastq config.txt | |
298 set_config_value pairedEnd 1 config.txt | |
299 set_config_value inputReadFile $fastq_r1 config.txt | |
300 set_config_value pairedReadFile $fastq_r2 config.txt | |
301 else | |
302 set_config_value inputFormat fasta config.txt | |
303 set_config_value pairedEnd 0 config.txt | |
304 set_config_value input454reads $fna config.txt | |
305 fi | |
306 # Output files | |
307 set_config_value MicrosatSumOut Output/microsat_summary.txt config.txt | |
308 set_config_value PALsummaryOut Output/PAL_summary.txt config.txt | |
309 # Microsat info | |
310 set_config_value 2merMinReps $MIN_2_MER_REPS config.txt | |
311 set_config_value 3merMinReps $MIN_3_MER_REPS config.txt | |
312 set_config_value 4merMinReps $MIN_4_MER_REPS config.txt | |
313 set_config_value 5merMinReps $MIN_5_MER_REPS config.txt | |
314 set_config_value 6merMinReps $MIN_6_MER_REPS config.txt | |
315 # Primer3 settings | |
316 set_config_value primer3input Output/pr3in.txt config.txt | |
317 set_config_value primer3output Output/pr3out.txt config.txt | |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
318 set_config_value keepPrimer3files 1 config.txt |
0 | 319 set_config_value primer3executable $PRIMER3_CORE_EXE config.txt |
320 set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt | |
321 set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt | |
322 set_config_value PRIMER_OPT_SIZE "$PRIMER_OPT_SIZE" config.txt | |
323 set_config_value PRIMER_MIN_SIZE "$PRIMER_MIN_SIZE" config.txt | |
324 set_config_value PRIMER_MAX_SIZE "$PRIMER_MAX_SIZE" config.txt | |
325 set_config_value PRIMER_MIN_GC "$PRIMER_MIN_GC" config.txt | |
326 set_config_value PRIMER_MAX_GC "$PRIMER_MAX_GC" config.txt | |
327 set_config_value PRIMER_GC_CLAMP "$PRIMER_GC_CLAMP" config.txt | |
328 set_config_value PRIMER_MAX_END_GC "$PRIMER_MAX_END_GC" config.txt | |
329 set_config_value PRIMER_MIN_TM "$PRIMER_MIN_TM" config.txt | |
330 set_config_value PRIMER_MAX_TM "$PRIMER_MAX_TM" config.txt | |
331 set_config_value PRIMER_OPT_TM "$PRIMER_OPT_TM" config.txt | |
332 set_config_value PRIMER_PAIR_MAX_DIFF_TM "$PRIMER_PAIR_MAX_DIFF_TM" config.txt | |
333 # | |
334 # Run pal_finder | |
1
771ebe02636f
Uploaded version 0.02.04.2: fix bug that causes tool to fail when prefix includes spaces; add explicit dependency on Perl 5.16.3.
pjbriggs
parents:
0
diff
changeset
|
335 echo "### Running pal_finder ###" |
6 | 336 perl $PALFINDER_SCRIPT_DIR/pal_finder_v0.02.04.pl config.txt 1>pal_finder.log 2>&1 |
337 echo "### pal_finder finished ###" | |
338 # | |
339 # Handlers the pal_finder log file | |
340 echo "### Output from pal_finder ###" | |
341 if [ $(wc -l pal_finder.log | cut -d" " -f1) -gt $MAX_LINES ] ; then | |
342 echo WARNING output too long, truncated to last $MAX_LINES lines: | |
343 echo ... | |
344 fi | |
345 tail -$MAX_LINES pal_finder.log | |
0 | 346 # |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
347 # Check for success/failure |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
348 if [ ! -z "$(tail -n 1 pal_finder.log | grep 'No microsatellites found in any reads. Ending script.')" ] ; then |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
349 # No microsatellites found |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
350 fatal ERROR pal_finder failed to locate any microsatellites |
0 | 351 exit 1 |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
352 elif [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
353 # Log doesn't end with "Done!!" (indicates failure) |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
354 fatal ERROR pal_finder failed to complete successfully |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
355 fi |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
356 echo "### pal_finder finished ###" |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
357 # |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
358 # Check for errors in pal_finder output |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
359 echo "### Checking for errors ###" |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
360 if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
361 echo WARNING primer3 terminated prematurely due to bad product size ranges |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
362 $(find_bad_primer_ranges Output/pr3in.txt bad_primer_ranges.txt) |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
363 N_BAD_PRIMERS=$(cat bad_primer_ranges.txt | wc -l) |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
364 if [ -z "$BAD_PRIMER_RANGES" ] ; then |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
365 # No output file so report to stderr |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
366 cat <<EOF |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
367 |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
368 Pal_finder generated bad ranges for the following read IDs: |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
369 |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
370 EOF |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
371 cat bad_primer_ranges.txt |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
372 cat <<EOF |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
373 |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
374 This error can occur when input data contains short R1 reads and has |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
375 has not been properly trimmed and filtered. |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
376 |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
377 EOF |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
378 else |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
379 # Move the bad ranges to the specified file |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
380 echo "### Writing read IDs with bad primer ranges ###" |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
381 /bin/mv bad_primer_ranges.txt "$BAD_PRIMER_RANGES" |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
382 fi |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
383 else |
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
384 N_BAD_PRIMERS=0 |
0 | 385 fi |
386 # | |
7
5e133b7b79a6
Uploaded version 0.02.04.6 (uses conda dependency resolution).
pjbriggs
parents:
6
diff
changeset
|
387 # Sort microsat_summary output |
5e133b7b79a6
Uploaded version 0.02.04.6 (uses conda dependency resolution).
pjbriggs
parents:
6
diff
changeset
|
388 echo "### Sorting microsat summary output ###" |
5e133b7b79a6
Uploaded version 0.02.04.6 (uses conda dependency resolution).
pjbriggs
parents:
6
diff
changeset
|
389 head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
390 echo "readsWithBadRanges:"$'\t'"$((N_BAD_PRIMERS * 2))" >>microsat_summary.sorted |
7
5e133b7b79a6
Uploaded version 0.02.04.6 (uses conda dependency resolution).
pjbriggs
parents:
6
diff
changeset
|
391 grep "^$" Output/microsat_summary.txt>>microsat_summary.sorted |
5e133b7b79a6
Uploaded version 0.02.04.6 (uses conda dependency resolution).
pjbriggs
parents:
6
diff
changeset
|
392 grep "^Microsat Type" Output/microsat_summary.txt >>microsat_summary.sorted |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
393 tail -n +11 Output/microsat_summary.txt | sort -r -n -k 5 >>microsat_summary.sorted |
7
5e133b7b79a6
Uploaded version 0.02.04.6 (uses conda dependency resolution).
pjbriggs
parents:
6
diff
changeset
|
394 mv microsat_summary.sorted Output/microsat_summary.txt |
5e133b7b79a6
Uploaded version 0.02.04.6 (uses conda dependency resolution).
pjbriggs
parents:
6
diff
changeset
|
395 # |
5e133b7b79a6
Uploaded version 0.02.04.6 (uses conda dependency resolution).
pjbriggs
parents:
6
diff
changeset
|
396 # Sort PAL_summary output |
5e133b7b79a6
Uploaded version 0.02.04.6 (uses conda dependency resolution).
pjbriggs
parents:
6
diff
changeset
|
397 echo "### Sorting PAL summary output ###" |
2 | 398 head -1 Output/PAL_summary.txt > Output/PAL_summary.sorted.txt |
399 if [ "$PLATFORM" == "Illumina" ] ; then | |
400 grep -v "^readPairID" Output/PAL_summary.txt | sort -k 1 >> Output/PAL_summary.sorted.txt | |
401 else | |
402 grep -v "^SequenceID" Output/PAL_summary.txt | sort -k 1 >> Output/PAL_summary.sorted.txt | |
403 fi | |
404 mv Output/PAL_summary.sorted.txt Output/PAL_summary.txt | |
405 # | |
406 # Run the filtering & assembly script | |
407 if [ ! -z "$FILTERED_MICROSATS" ] || [ ! -z "$OUTPUT_ASSEMBLY" ] ; then | |
408 echo "### Running filtering & assembly script ###" | |
6 | 409 python $PALFINDER_FILTER -i $fastq_r1 -j $fastq_r2 -p Output/PAL_summary.txt $FILTER_OPTIONS 1>pal_filter.log 2>&1 |
410 echo "### Output from pal_filter ###" | |
411 if [ $(wc -l pal_filter.log | cut -d" " -f1) -gt $MAX_LINES ] ; then | |
412 echo WARNING output too long, truncated to last $MAX_LINES lines: | |
413 echo ... | |
414 fi | |
415 tail -$MAX_LINES pal_filter.log | |
0 | 416 if [ $? -ne 0 ] ; then |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
417 fatal $PALFINDER_FILTER exited with non-zero status |
2 | 418 elif [ ! -f PAL_summary.filtered ] ; then |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
419 fatal no output from $PALFINDER_FILTER |
0 | 420 fi |
421 fi | |
422 # | |
423 # Clean up | |
1
771ebe02636f
Uploaded version 0.02.04.2: fix bug that causes tool to fail when prefix includes spaces; add explicit dependency on Perl 5.16.3.
pjbriggs
parents:
0
diff
changeset
|
424 echo "### Handling output files ###" |
0 | 425 if [ -f Output/microsat_summary.txt ] ; then |
426 /bin/mv Output/microsat_summary.txt $MICROSAT_SUMMARY | |
427 fi | |
428 if [ -f Output/PAL_summary.txt ] ; then | |
429 /bin/mv Output/PAL_summary.txt $PAL_SUMMARY | |
430 fi | |
2 | 431 if [ ! -z "$FILTERED_MICROSATS" ] && [ -f PAL_summary.filtered ] ; then |
432 /bin/mv PAL_summary.filtered $FILTERED_MICROSATS | |
433 fi | |
434 if [ ! -z "$OUTPUT_ASSEMBLY" ] ; then | |
3
e1a14ed7a9d6
Updated to version 0.02.04.4 (new pal_filter script)
pjbriggs
parents:
2
diff
changeset
|
435 assembly=${fastq_r1%.*}_pal_filter_assembly_output.txt |
2 | 436 if [ -f "$assembly" ] ; then |
437 /bin/mv $assembly "$OUTPUT_ASSEMBLY" | |
3
e1a14ed7a9d6
Updated to version 0.02.04.4 (new pal_filter script)
pjbriggs
parents:
2
diff
changeset
|
438 else |
8
4e625d3672ba
Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
7
diff
changeset
|
439 fatal no assembly output found |
2 | 440 fi |
0 | 441 fi |
442 if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then | |
443 /bin/mv config.txt $OUTPUT_CONFIG_FILE | |
444 fi | |
6 | 445 # |
446 echo "### Pal_finder tool completed ###" | |
0 | 447 ## |
448 # |