Mercurial > repos > nml > abacas
comparison abacas.1.1.pl @ 0:a1fdc6925620 draft default tip
"planemo upload for repository https://github.com/phac-nml/abacas commit f6856961094e89e4cad0ee7df6c2a49bf005e4bf"
author | nml |
---|---|
date | Thu, 21 Nov 2019 12:53:20 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a1fdc6925620 |
---|---|
1 #!/usr/bin/env perl | |
2 # Copyright (C) 2008 Genome Research Limited. All Rights Reserved. | |
3 # | |
4 # This program is free software; you can redistribute it and/or | |
5 # modify it under the terms of the GNU General Public License | |
6 # as published by the Free Software Foundation; either version 2 | |
7 # of the License, or (at your option) any later version. | |
8 #This program is distributed in the hope that it will be useful, | |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11 # GNU General Public License for more details. | |
12 # | |
13 # You should have received a copy of the GNU General Public License | |
14 # along with this program; if not, write to the Free Software | |
15 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
16 | |
17 | |
18 use strict; | |
19 use warnings; | |
20 use POSIX qw(ceil floor); | |
21 use Getopt::Std; | |
22 | |
23 #------------------------------------------------------------------------------- | |
24 | |
25 if (@ARGV < 1) { usage();} | |
26 | |
27 my ($help, $reference, $query_file, $choice, $sen, $seed, $mlfas, $fasta_bin, $avoid_Ns, | |
28 $tbx, $min_id, $min_cov, $diff_cov, $min_len,$add_bin_2ps, $pick_primer, | |
29 $flank, $chk_uniq,$redoMummer, $is_circular,$escapeToPrimers, $debug, $gaps_2file, $prefix) | |
30 =checkUserInput( @ARGV ); | |
31 | |
32 my $ref_inline; | |
33 if ($escapeToPrimers ==1) | |
34 { | |
35 pickPrimers ($reference, $query_file, $flank, $chk_uniq); | |
36 exit; | |
37 } | |
38 #BEGIN | |
39 #------------------------------------------------------------------------------- | |
40 print_header(); | |
41 | |
42 $ref_inline = Ref_Inline($reference); | |
43 #Get length of the reference sequence | |
44 my $ref_len = length($ref_inline); | |
45 print "PREPARING DATA FOR $choice \n"; | |
46 | |
47 ################### | |
48 # Running MUMmer # | |
49 ################### | |
50 my ($path_dir, $run_mum, $path_toPass); | |
51 if ($debug) | |
52 { | |
53 print "the seed is $seed \n"; | |
54 print "RedoMummer= ",$redoMummer."\n"; | |
55 } | |
56 my @do_mum_return; | |
57 if ($redoMummer) | |
58 { | |
59 @do_mum_return = doMummer($reference, $query_file, $choice,$sen,$seed,$min_id, $min_cov, $diff_cov,$min_len, $debug, $is_circular) or die "Couldn't run MUMmer\n"; | |
60 } | |
61 | |
62 #################################### | |
63 # Processing tiling output # | |
64 #################################### | |
65 if ($debug) { | |
66 print "Do tiling...\n"; | |
67 } | |
68 | |
69 #-------------------------------- | |
70 my $mummer_tiling = $do_mum_return[0]; | |
71 $path_dir = $do_mum_return[2]; | |
72 $path_toPass = $do_mum_return[1]; | |
73 ################## | |
74 #Do Tiling | |
75 #------------------------------------------- | |
76 doTiling ($mummer_tiling, $path_toPass, $path_dir,$reference, $query_file, $choice, $prefix,$mlfas, $fasta_bin, $avoid_Ns, $ref_len, $gaps_2file, $ref_inline, $add_bin_2ps, $pick_primer, $flank, $chk_uniq, $tbx); | |
77 | |
78 | |
79 | |
80 | |
81 | |
82 ########################################################################################################################################################## | |
83 #################################Contig ordering ########################################################## | |
84 ########################### | |
85 | |
86 sub help | |
87 { | |
88 | |
89 die <<EOF | |
90 | |
91 *********************************************************************************** | |
92 * ABACAS: Algorithm Based Automatic Contiguation of Assembled Sequences * | |
93 * * | |
94 * * | |
95 * Copyright (C) 2009 The Wellcome Trust Sanger Institute, Cambridge, UK. * | |
96 * All Rights Reserved. * | |
97 * * | |
98 *********************************************************************************** | |
99 | |
100 USAGE | |
101 abacas.pl -r <reference file: single fasta> -q <query sequence file: fasta> -p <nucmer/promer> [OPTIONS] | |
102 | |
103 -r reference sequence in a single fasta file | |
104 -q contigs in multi-fasta format | |
105 -p MUMmer program to use: 'nucmer' or 'promer' | |
106 OR | |
107 abacas.pl -r <reference file: single fasta> -q <pseudomolecule/ordered sequence file: fasta> -e | |
108 OPTIONS | |
109 -h print usage | |
110 -d use default nucmer/promer parameters | |
111 -s int minimum length of exact matching word (nucmer default = 12, promer default = 4) | |
112 -m print ordered contigs to file in multifasta format | |
113 -b print contigs in bin to file | |
114 -N print a pseudomolecule without "N"s | |
115 -i int mimimum percent identity [default 40] | |
116 -v int mimimum contig coverage [default 40] | |
117 -V int minimum contig coverage difference [default 1] | |
118 -l int minimum contig length [default 1] | |
119 -t run tblastx on contigs that are not mapped | |
120 -g string (file name) print gaps on reference to file name | |
121 -a append contigs in bin to the pseudomolecule | |
122 -o prefix output files will have this prefix | |
123 -P pick primer sets to close gaps | |
124 -f int number of flanking bases on either side of a gap for primer design (default 350) | |
125 -R Redo mummer [default 1, use -R 0 to avoid running mummer] | |
126 -e Escape contig ordering i.e. go to primer design | |
127 -c Reference sequence is circular | |
128 | |
129 EOF | |
130 } | |
131 ######## | |
132 sub usage | |
133 { | |
134 | |
135 die <<EOF | |
136 | |
137 USAGE | |
138 abacas.pl -r <reference file: single fasta> -q <query sequence file: fasta> -p <nucmer/promer> [OPTIONS] | |
139 -r reference sequence in a single fasta file | |
140 -q contigs in multi-fasta format | |
141 -p MUMmer program to use: 'nucmer' or 'promer' | |
142 for contig ordering and primer design | |
143 | |
144 OR | |
145 abacas.pl -r <reference file: single fasta> -q <pseudomolecule/ordered sequence file: fasta> -e 1 | |
146 to escape contig ordering and go directly to primer design | |
147 | |
148 OR | |
149 abacas.pl -h for help | |
150 | |
151 EOF | |
152 } | |
153 ######## | |
154 ################## | |
155 sub print_header | |
156 { | |
157 print " | |
158 *********************************************************************************** | |
159 * ABACAS: Algorithm Based Automatic Contiguation of Assembled Sequences * | |
160 * * | |
161 * * | |
162 * Copyright (C) 2009 The Wellcome Trust Sanger Institute, Cambridge, UK. * | |
163 * All Rights Reserved. * | |
164 * * | |
165 *********************************************************************************** | |
166 \n"; | |
167 } | |
168 ######################################### | |
169 sub checkUserInput{ | |
170 my %options; | |
171 getopts('hr:q:p:ds:mbNi:v:V:l:tg:ao:Pf:R:u:ecD', \%options); | |
172 | |
173 my ($help, $reference, $query_file, $choice, $sen, $seed, $mlfas, $fasta_bin, $avoid_Ns, | |
174 $tbx, $min_id, $min_cov, $diff_cov, $min_len,$add_bin_2ps, $pick_primer, | |
175 $flank, $chk_uniq,$redoMummer, $is_circular,$escapeToPrimers, $debug, $gaps_2file, $prefix); | |
176 | |
177 if($options{h}) { | |
178 $help = $options{h}; | |
179 help(); | |
180 } | |
181 else{ | |
182 $help =0; | |
183 } | |
184 if ($options{r} && $options{q} ){ | |
185 ($reference, $query_file) = ($options{r},$options{q}); | |
186 | |
187 } | |
188 else | |
189 { | |
190 usage() unless $options{e}; | |
191 } | |
192 | |
193 if ($options{p}) | |
194 { | |
195 $choice = $options{p}; | |
196 unless ($choice eq "nucmer" || $choice eq "promer"){ | |
197 print "Unknown MuMmer function\n Please use nucmer or promer\n"; | |
198 exit; | |
199 } | |
200 } | |
201 else | |
202 { | |
203 usage() unless $options{e}; | |
204 } | |
205 if ($options{e}) #$escapeToPrimers) | |
206 { | |
207 print_header(); | |
208 print "Primer design selected,... escaping contig ordering\n"; | |
209 $escapeToPrimers = 1; | |
210 $chk_uniq = "nucmer"; | |
211 $choice = ""; | |
212 } | |
213 else | |
214 { | |
215 $escapeToPrimers = 0; | |
216 } | |
217 if ($options{d}) {$sen =1;} else {$sen =0;} #print $sen , " ---sen\n"; | |
218 #print $options{t}, "\n"; exit; | |
219 if($options{t}) {$tbx = 1;} else {$tbx = 0;} #print $tbx, " ---tbx\n"; # | |
220 unless ($options{s}){ | |
221 if ($choice eq "nucmer"){ | |
222 $seed = 12; | |
223 } | |
224 else{ | |
225 $seed =4; | |
226 } | |
227 } | |
228 if ($options{m}){$mlfas =1;} else { $mlfas =0; } #print $mlfas , " ---mlfasta\n"; | |
229 if ($options{b}) { $fasta_bin =1;} else {$fasta_bin =0;} | |
230 if ($options {N}) {$avoid_Ns =1;} else {$avoid_Ns=0;} | |
231 | |
232 unless($options{i}) {$min_id =40;} | |
233 unless ($options{v}) {$min_cov =40;} | |
234 unless ($options{V}) {$diff_cov =1;} | |
235 unless ($options {l}) {$min_len = 1;} | |
236 if ($options{a}) {$add_bin_2ps = 1; }else {$add_bin_2ps =0;} | |
237 if ($options{P}) {$pick_primer=1;} else {$pick_primer =0;} | |
238 if ($options{f}) {$flank = $options{f};} else {$flank = 1000;} | |
239 if ($options{u}) {$chk_uniq = $options{u}; } else {$chk_uniq = "nucmer";} | |
240 | |
241 if($options{R}) {$redoMummer = $options{R}; }else {$redoMummer = 1;} | |
242 unless ($options{c}) {$is_circular = 0;} | |
243 if ($options{g}) {$gaps_2file = $options{g};} else {$gaps_2file ="";} | |
244 if($options{o}) {$prefix = $options{o};} else {$prefix = "";} | |
245 unless ($options{D}) {$debug =0}; | |
246 if ($tbx ==1 && $fasta_bin !=1) | |
247 { | |
248 print "ERROR: Please use -t -b if you want to run tblastx on contigs in bin\n"; | |
249 exit; | |
250 } | |
251 #print $tbx, "\n"; exit; | |
252 return ($help, $reference, $query_file, $choice, $sen, $seed, $mlfas, $fasta_bin, $avoid_Ns, | |
253 $tbx, $min_id, $min_cov, $diff_cov, $min_len,$add_bin_2ps, $pick_primer, | |
254 $flank, $chk_uniq,$redoMummer, $is_circular,$escapeToPrimers, $debug, $gaps_2file, $prefix); | |
255 | |
256 } | |
257 ############# | |
258 ## get the reference sequence in one line | |
259 #-------------------------------------------------- | |
260 sub Ref_Inline | |
261 { | |
262 my $ref = shift; | |
263 open (refFH, $ref) or die "Could not open file\n"; | |
264 my $seq =""; | |
265 my @r = <refFH>; | |
266 my $num_chr =0; | |
267 foreach(@r){ | |
268 if ($_ =~ /\>/){ | |
269 $num_chr +=1; | |
270 } | |
271 } | |
272 if ($num_chr > 1){ | |
273 print "\nERROR: Please use a single fasta reference file. You can simply merge chromosomes in to a union fasta file.\n\n"; | |
274 exit; | |
275 } | |
276 shift @r; | |
277 foreach(@r){ | |
278 chomp; | |
279 $seq = $seq.$_; | |
280 } | |
281 return $seq; | |
282 } | |
283 ################ | |
284 # Run mummer | |
285 #-------------------------------------------- | |
286 sub doMummer | |
287 { | |
288 my ($reference, $query_file, $choice, $sen,$seed,$min_id, $min_cov, $diff_cov,$min_len, $debug, $is_circular ) = @_; | |
289 | |
290 my $df = 'delta-filter'; | |
291 my $st = 'show-tiling'; | |
292 my $ask = 'which'; | |
293 my ($path_toPass, $run_mum); # params to return... | |
294 my ($command, $Path, $dir) = checkProg($choice); | |
295 my ($run_df, $df_path, $df_dir) = checkProg($df); | |
296 my ($run_st, $st_path, $st_dir) = checkProg($st); | |
297 my (@running, @deltaRes, @coordsRes); | |
298 if ($choice eq "nucmer") | |
299 { | |
300 if ($sen ==0) | |
301 { | |
302 @running = `$command --maxmatch -l $seed -p $choice $reference $query_file &> /dev/null`; | |
303 @deltaRes = `$run_df -q $choice.delta >$choice.filtered.delta`; | |
304 if ($is_circular == 1) | |
305 { | |
306 @coordsRes = `$run_st -c -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`; | |
307 } | |
308 else | |
309 { | |
310 @coordsRes = `$run_st -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`; | |
311 | |
312 } | |
313 } | |
314 else | |
315 { | |
316 @running = `$command -p $choice $reference $query_file &> /dev/null`; | |
317 @deltaRes = `$run_df -q $choice.delta >$choice.filtered.delta`; | |
318 if ($is_circular ==1) {@coordsRes = `$run_st -c -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`;} | |
319 else { @coordsRes = `$run_st -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`;} | |
320 } | |
321 | |
322 } | |
323 else | |
324 { | |
325 if ($sen ==0) | |
326 { | |
327 @running = `$command --maxmatch -l $seed -x 1 -p $choice $reference $query_file &> /dev/null`; | |
328 @deltaRes = `$run_df -q $choice.delta >$choice.filtered.delta`; | |
329 if ($is_circular == 1) | |
330 { | |
331 @coordsRes= `$run_st -c -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`; | |
332 } | |
333 else | |
334 { | |
335 @coordsRes= `$run_st -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`; | |
336 } | |
337 } | |
338 else | |
339 { | |
340 @running = `$command -l $seed -p $choice $reference $query_file &> /dev/null`; | |
341 @deltaRes = `$run_df -q $choice.delta >$choice.filtered.delta`; | |
342 if ($is_circular == 1) { | |
343 @coordsRes= `$run_st -c -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`; | |
344 } | |
345 else | |
346 { | |
347 @coordsRes= `$run_st -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`; | |
348 } | |
349 } | |
350 } | |
351 my $Coordsfull= "$choice.tiling"; | |
352 return ($Coordsfull,$Path, $dir); | |
353 } | |
354 | |
355 ## ############################################################################# | |
356 sub checkProg{ #checks if a given excutable is in the path... | |
357 my $prog = shift; | |
358 my $ask = 'which'; | |
359 my @check_prog = `$ask $prog`; | |
360 my $path_toPass; | |
361 my $path_dir; | |
362 my $command; | |
363 if (defined $check_prog[0] && $check_prog[0] =~ /$prog$/) | |
364 { | |
365 $path_toPass = $prog; | |
366 $command = $prog; | |
367 } | |
368 else | |
369 { | |
370 print "\nENTER the directory for your ", $prog, " executables [default ./]: "; | |
371 my $path=<STDIN>; | |
372 chomp $path; | |
373 $path_dir = $path; | |
374 if ($path_dir =~/\/$/) | |
375 { | |
376 $path_dir = $path_dir; | |
377 } | |
378 else | |
379 { | |
380 $path_dir = $path_dir."/"; | |
381 } | |
382 my @final_check = `$ask $command`; | |
383 if (exists $final_check[0] && $final_check[0] =~ /$prog$/) | |
384 { | |
385 $command = $path_dir.$prog; | |
386 $path_toPass = $command; | |
387 } | |
388 else | |
389 { | |
390 print "ERROR: Could not run ", $prog, ", please check if it is installed in your path\n or provide the directory \n"; | |
391 exit; | |
392 } | |
393 } | |
394 | |
395 return ($command, $path_toPass, $path_dir); | |
396 } | |
397 | |
398 ############################## | |
399 # converts a fasta file to an ordered single line | |
400 #-------------------------------------------------- | |
401 sub Fasta2ordered | |
402 { | |
403 if( @_ != 1 ) | |
404 { | |
405 print "Usage: Fasta2ordered <fasta_file>\n"; | |
406 exit; | |
407 } | |
408 my $fast = shift; #print $fast; exit; | |
409 my @fasta = split (/\n/, $fast); | |
410 if ($fasta[0] =~ /\>/ ) #remove chromosome name if exists in the onput sequence. | |
411 { | |
412 my $ch_name = $fasta[0]; | |
413 shift @fasta; | |
414 } | |
415 #print $fasta[0]; exit; | |
416 foreach(@fasta){chomp;} | |
417 my $num_lines = scalar(@fasta); | |
418 my $dna = ''; | |
419 for(my $i=0; $i< $num_lines; $i+=1) | |
420 { | |
421 $dna = $dna.$fasta[$i]; | |
422 } | |
423 my $ordered_dna = $dna; | |
424 return $ordered_dna; | |
425 } | |
426 ############################################################## | |
427 # Hash input contigs | |
428 #------------------------------------------------ | |
429 | |
430 sub hash_contigs { | |
431 if( @_ != 1 ) | |
432 { | |
433 print "Usage: hash_contigs contigs_file"; | |
434 exit; | |
435 } | |
436 my $contigs_file = shift; | |
437 if( $contigs_file =~ /\.gz$/ ) | |
438 { | |
439 open(CONTIGS, $contigs_file, "gunzip -c $contigs_file |" ) or die "Cant open contigs file: $!\n"; | |
440 } | |
441 else | |
442 { | |
443 open( CONTIGS, $contigs_file) or die "Cant open contigs file: $!\n"; | |
444 } | |
445 | |
446 my %contigs_hash; # hash to store contig names and sequences | |
447 my $contigName; | |
448 | |
449 | |
450 while (<CONTIGS>) ##add error checking... | |
451 { | |
452 if (/^>(\S+)/) { | |
453 $contigName=$1; | |
454 } | |
455 else { | |
456 chomp; | |
457 $contigs_hash{$contigName} .= $_; | |
458 } | |
459 } | |
460 close(CONTIGS); | |
461 #tdo | |
462 ## check if qual exists | |
463 my %contigs_qual_hash; | |
464 | |
465 | |
466 if (-r "$contigs_file.qual" or -r "$contigs_file.qual.gz") { | |
467 if( -r "$contigs_file.qual.gz" ) | |
468 { | |
469 open(CONTIGS, "$contigs_file.qual.gz", "gunzip -c $contigs_file.qual.gz |" ) or die "Cant open contigs file: $!\n"; | |
470 } | |
471 else | |
472 { | |
473 open( CONTIGS, "$contigs_file.qual" ) or die "Cant open contigs file: $!\n"; | |
474 } | |
475 | |
476 | |
477 while (<CONTIGS>) { | |
478 if (/^>(\S+)/) { | |
479 $contigName=$1; | |
480 } | |
481 else { | |
482 chomp; | |
483 $contigs_qual_hash{$contigName} .= $_; | |
484 } | |
485 } | |
486 | |
487 } # end tdo # end if exist | |
488 | |
489 | |
490 return (\%contigs_hash,\%contigs_qual_hash); | |
491 } | |
492 ####################### | |
493 ############################## | |
494 ### it gets a delta name | |
495 sub getMummerComparison{ | |
496 my $deltaName = shift; | |
497 | |
498 | |
499 ### transform the delta file to coords | |
500 my $call ="show-coords -H -T -q $deltaName > $deltaName.coords "; | |
501 !system(" $call") or die "Problems doing the show-coords comparison: $call $!\n"; | |
502 | |
503 | |
504 ### willh old results | |
505 my %h; | |
506 | |
507 ### has as index the postion with the max hits | |
508 my %h_max; | |
509 my $tmp=0; | |
510 my $tmp_index; | |
511 my $key=''; | |
512 my $is_promer =0; | |
513 if ($deltaName =~/^promer/) | |
514 { | |
515 $is_promer =1; | |
516 } | |
517 | |
518 open (F,"$deltaName.coords") or die "Problem in getComparisonFile to open file $deltaName.coords\n"; | |
519 my @File=<F>; | |
520 my @a; | |
521 @a=split(/\s+/,$File[0]); | |
522 $tmp=$a[5]; | |
523 $tmp_index=$a[0]; | |
524 if ($is_promer ==1) | |
525 { | |
526 $key=$a[12]; ## nucmer: $key = $a[8] | |
527 } | |
528 else | |
529 { | |
530 $key = $a[8]; | |
531 } | |
532 | |
533 foreach (@File) { | |
534 @a=split(/\s+/); | |
535 | |
536 if ($is_promer ==1) | |
537 { | |
538 push @{ $h{$a[12]}}, "$a[12]\t$a[11]\t$a[7]\t$a[5]\t0\t0\t$a[2]\t$a[3]\t$a[0]\t$a[1]\t1\t$a[5]\n"; | |
539 if ($key eq $a[12] and $a[5]>$tmp) | |
540 { | |
541 $tmp=$a[5]; # length | |
542 $tmp_index=$a[0]; # position reference | |
543 } | |
544 elsif ($key ne $a[12]) { | |
545 ### here possible bugg... | |
546 $h_max{$tmp_index}=$key; | |
547 $key=$a[12]; | |
548 $tmp=$a[5]; # length | |
549 $tmp_index=$a[0]; # position reference | |
550 | |
551 } | |
552 } #end if | |
553 #else i.e. if nucmer | |
554 else | |
555 { | |
556 push @{ $h{$a[8]}}, "$a[8]\t$a[6]\t$a[6]\t$a[5]\t0\t0\t$a[2]\t$a[3]\t$a[0]\t$a[1]\t1\t$a[5]\n"; | |
557 if ($key eq $a[8] and $a[5]>$tmp) | |
558 { | |
559 $tmp=$a[5]; # length | |
560 $tmp_index=$a[0]; # position reference | |
561 } | |
562 elsif ($key ne $a[8]) { | |
563 ### here possible bugg... | |
564 $h_max{$tmp_index}=$key; | |
565 $key=$a[8]; | |
566 $tmp=$a[5]; # length | |
567 $tmp_index=$a[0]; # position reference | |
568 | |
569 } | |
570 } | |
571 | |
572 } | |
573 $h_max{$tmp_index}=$key; | |
574 # print Dumper %h_max; | |
575 | |
576 return (\%h,\%h_max); | |
577 } | |
578 ################################## | |
579 ########################### | |
580 sub writeBinContigs2Ref{ | |
581 my $nameBin = shift; | |
582 my $name = shift; | |
583 | |
584 open (F, "$nameBin") or die "Couldn't find file $nameBin: $!\n"; | |
585 | |
586 my @ar; | |
587 | |
588 my $count=0; | |
589 | |
590 while (<F>) { | |
591 push @ar, $_; | |
592 $count++; | |
593 } | |
594 #### sa4: added error checking:- if file is empty | |
595 if (scalar(@ar) < 1) | |
596 { | |
597 print "No contigs in unusedcontigs file\n"; | |
598 $count = 0; | |
599 | |
600 } | |
601 else | |
602 { | |
603 open (F, "> $name.notMapped.contigs.tab") or die "Couldn't write file $name.tab: $!\n"; | |
604 print F doArt(\@ar); | |
605 close(F); | |
606 } | |
607 return $count; | |
608 | |
609 } | |
610 | |
611 | |
612 ############################## | |
613 sub doArt{ | |
614 my ($ref) = @_; | |
615 | |
616 | |
617 ## hash of array with all positions of the contig | |
618 my %Pos; | |
619 | |
620 ## Hash with note of result line of nucmer | |
621 my %lines; | |
622 | |
623 foreach (@$ref) { | |
624 chomp; | |
625 my @ar=split(/\t/); | |
626 push @{ $Pos{$ar[12]}}, "$ar[0]..$ar[1]"; | |
627 $lines{$ar[12]} .= "FT $_\n"; | |
628 } | |
629 | |
630 my $res; | |
631 | |
632 foreach my $contig (keys %lines) { | |
633 | |
634 if (scalar(@{ $Pos{$contig} } >1)) { | |
635 my $tmp; | |
636 | |
637 foreach (@{ $Pos{$contig} }) { | |
638 $tmp.="$_,"; | |
639 } | |
640 $tmp =~ s/,$//g; # get away last comma | |
641 $res .= "FT contig join($tmp)\n"; | |
642 } | |
643 else { | |
644 $res .= "FT contig $Pos{$contig}[0]\n"; | |
645 } | |
646 $res .= "FT /systematic_id=\"$contig\"\n"; | |
647 $res .= "FT /note=\"Contig $contig couldn't map perfectly.\n"; | |
648 $res .= $lines{$contig}; | |
649 $res .= "FT \"\n"; | |
650 | |
651 } | |
652 | |
653 return $res; | |
654 } | |
655 | |
656 ########################################################################## | |
657 #--------------------------- | |
658 sub makeN #creates a string of Ns | |
659 { | |
660 my $n = shift; | |
661 my $Ns= "N"; | |
662 for (my $i =1; $i < $n; $i+=1) | |
663 { | |
664 $Ns = $Ns."N"; | |
665 } | |
666 return $Ns; | |
667 } | |
668 | |
669 ########################################################################### | |
670 ## reverse complement a sequence | |
671 #--------------------------------------------- | |
672 sub revComp { | |
673 my $dna = shift; | |
674 my $revcomp = reverse($dna); | |
675 | |
676 $revcomp =~ tr/ACGTacgt/TGCAtgca/; | |
677 | |
678 return $revcomp; | |
679 } | |
680 | |
681 ################################################################################ | |
682 ### function to visualize rep. regions in Reference genome. | |
683 sub findRepeats | |
684 { | |
685 my $reference = shift; | |
686 my $name = shift; | |
687 my $path_prog = shift; | |
688 | |
689 # get path | |
690 my ($path_coords) = $path_prog; | |
691 | |
692 $path_coords =~ s/nucmer/show-coords/; | |
693 | |
694 my $call = "$path_prog --maxmatch -c 100 -b 40 -p $name.repeats -l 25 $reference $reference &> /dev/null "; | |
695 !system("$call") or die "Problems doing the nucmer comparison: $call $!\n"; | |
696 $call ="$path_coords -r -c -l $name.repeats.delta > $name.repeats.coords "; | |
697 !system(" $call") or die "Problems doing the show-coords comparison: $call $!\n"; | |
698 | |
699 my @Res; | |
700 open (F, "$name.repeats.coords" ) or die "Problems to open file $reference.repeats.coords: Is MUMmer installed correctly and inserted in the PATH environment variable? ($!)\n"; | |
701 $_=<F>; $_=<F>; $_=<F>; $_=<F>;$_=<F>; | |
702 while (<F>) { | |
703 my @ar = split(/\s+/); | |
704 if (!($ar[1] == $ar[4] or $ar[2] == $ar[5] or $ar[7] > 100000)) { # to exclude self alignment | |
705 | |
706 foreach ($ar[1]..$ar[2]) { | |
707 $Res[($_-1)]++; | |
708 } | |
709 } | |
710 } | |
711 | |
712 ### write the result to the plot file | |
713 | |
714 my $res; | |
715 foreach (@Res){ | |
716 if (defined($_)) { | |
717 $res .= "$_\n"; | |
718 } | |
719 else { | |
720 $res .= "0\n"; | |
721 } | |
722 } | |
723 | |
724 open (F, "> $name.Repeats.plot") or die "Couldn't open file $name.plot to write: $! \n"; | |
725 print F $res; | |
726 close(F); | |
727 | |
728 ### delete files | |
729 unlink("$name.repeats.delta"); | |
730 unlink("$name.repeats.coords"); | |
731 unlink("$name.repeats.cluster"); | |
732 } | |
733 | |
734 ################################################################################ | |
735 # reverse a list of qualities | |
736 #------------------------------ | |
737 sub reverseQual{ | |
738 my $str = shift; | |
739 | |
740 $str =~ s/\s+$//g; | |
741 | |
742 my @ar = split(/\s/,$str); | |
743 my $res; | |
744 | |
745 for (my $i=(scalar(@ar)-1);$i>=0;$i--) { | |
746 $res.="$ar[$i] "; | |
747 } | |
748 return $res; | |
749 } | |
750 | |
751 ############################################################################## | |
752 # ---------------- | |
753 sub getPosCoords{ | |
754 my $ref_ar = shift; | |
755 my $contig = shift; | |
756 | |
757 my $offset = shift; | |
758 my $res; | |
759 # print Dumper $$ref_ar{$contig}; | |
760 foreach (@{$$ref_ar{$contig}}) { | |
761 print "in getPos Coords: $_\n";; | |
762 | |
763 my @ar=split(/\t/); | |
764 $ar[6]+=$offset; | |
765 $ar[7]+=$offset; | |
766 $res .= join("\t",@ar);; | |
767 } | |
768 | |
769 return $res; | |
770 | |
771 } | |
772 ############################################################################# | |
773 # ----------------------------- | |
774 sub getPosCoordsTurn{ | |
775 my $ref_ar = shift; | |
776 my $contig = shift; | |
777 | |
778 my $offset = shift; | |
779 | |
780 | |
781 my $res; | |
782 | |
783 # print Dumper $$ref_ar{$contig}; | |
784 foreach (@{$$ref_ar{$contig}}) { | |
785 my @ar=split(/\t/); | |
786 my $tmp_8=$ar[8]; | |
787 my $tmp_9=$ar[9]; | |
788 | |
789 $ar[8]=$ar[6]+$offset; | |
790 $ar[9]=$ar[7]+$offset; | |
791 $ar[6]=$tmp_8; | |
792 $ar[7]=$tmp_9; | |
793 | |
794 ## change query subject | |
795 | |
796 $res .= join("\t",@ar);; | |
797 } | |
798 | |
799 return $res; | |
800 | |
801 } | |
802 | |
803 ############################################################################ | |
804 #------------------------ | |
805 sub printStats{ | |
806 | |
807 my ($num_fortillingcontigs,$num_notsetTilling,$num_mapped, $num_contigs, $num_inComparisoncontigs, $ref_len, $total_bases_mpd) = @_; | |
808 $num_fortillingcontigs=$num_notsetTilling+$num_mapped; | |
809 my $res; | |
810 $res.= "Short statistics of run.\n"; | |
811 $res.= "$num_contigs\t\tcontigs entered to be mapped against the reference.\n"; | |
812 $res.= sprintf("$num_inComparisoncontigs\t%0.2f \%\tmade a hit against the reference to the given parameter (-s -d etc)\n",($num_inComparisoncontigs*100/$num_contigs)); | |
813 $res.= sprintf("$num_fortillingcontigs\t%0.2f \%\twere considered for the tilling graph (-s -d etc)\n",($num_fortillingcontigs*100/$num_contigs)); | |
814 $res.= sprintf("$num_mapped\t%0.2f \%\tare mapped in the tilling graph (-s -d etc)\n",($num_mapped*100/$num_contigs)); | |
815 $res.= sprintf("\nCoverage: The reference is $ref_len long. Up to $total_bases_mpd bp (%0.2f \%) are covered by the contigs (This doesn't mean that these regions are really similar...)\n",($total_bases_mpd*100/$ref_len)); | |
816 | |
817 print $res; | |
818 | |
819 } | |
820 ################################################## | |
821 #### Do Tiling | |
822 #---------------------------------------------------- | |
823 sub doTiling { | |
824 | |
825 my ($mummer_tiling, $path_toPass, $path_dir,$reference, $query_file, $choice, $prefix,$mlfas, $fasta_bin, $avoid_Ns, $ref_len, $gaps_2file, $ref_inline, $add_bin_2ps, $pick_primer, $flank, $chk_uniq, $run_blast) = @_; | |
826 | |
827 ### these are also defined in the main script.... to be changed!! | |
828 my ($num_contigs , $num_inbincontigs, $avg_cont_size,$num_overlaps , $num_gaps, | |
829 $num_mapped,$total_bases_mpd,$p_ref_covered,$num_ambigus,$num_inComparisoncontigs, | |
830 $num_fortillingcontigs,$num_notsetTilling)= (0,0,0,0,0,0,0,0,0,0,0,0); | |
831 | |
832 my ($href, $ref_contigs_qual) = hash_contigs($query_file); | |
833 my $qualAvailable=0; | |
834 my %contigs_hash = %{$href}; | |
835 my @c_keys = keys %contigs_hash; | |
836 $num_contigs = scalar(@c_keys); | |
837 my @cont_lens; | |
838 my (@ids ,$id, $id_len); | |
839 my (@Rs, @Re, @G, @Len, @cov, @pid, @orient, @Cid); | |
840 my (@Ps, @Pe); | |
841 my ($total); | |
842 my $g; #define gap size between contigs | |
843 my $tiling_gap; #gap size from tiling graph | |
844 open (TIL, $mummer_tiling) or die "Could not open $mummer_tiling: $!"; | |
845 while (<TIL>) | |
846 { | |
847 chomp; | |
848 if ($_ =~/^>/) | |
849 { | |
850 my $line = substr $_, 1; | |
851 my @splits = split /\s+/, $line; | |
852 $id = $splits[0]; | |
853 push @ids, $id; | |
854 $id_len= $splits[1]; | |
855 } | |
856 else | |
857 { | |
858 my @splits = split /\s+/, $_; | |
859 push @Rs, $splits[0]; | |
860 push @Re, $splits[1]; | |
861 push @G, $splits[2]; | |
862 push @Len, $splits[3]; | |
863 push @cov, $splits[4]; | |
864 push @pid, $splits[5]; | |
865 push @orient, $splits[6]; | |
866 push @Cid, $splits[7]; | |
867 } | |
868 | |
869 } | |
870 close (TIL); | |
871 if (scalar(@Rs) != scalar(@Re)) | |
872 { | |
873 print "ERROR: unequal array size\n"; | |
874 exit; | |
875 } | |
876 else | |
877 { | |
878 $total = scalar(@Rs); | |
879 $num_mapped = scalar(@Rs); | |
880 } | |
881 my $ref_loc = $reference; # get locations of reference and query files | |
882 my $qry_loc = $query_file; | |
883 my $dif_dir =0; #assume query and reference are in the working directory | |
884 my @splits_reference = split (/\//, $reference); | |
885 my $new_reference_file = $splits_reference[(scalar(@splits_reference)-1)]; | |
886 my @splits_query = split (/\//, $query_file); | |
887 my $new_query_file = $splits_query[(scalar(@splits_query)-1)]; | |
888 if ($prefix eq "") | |
889 { | |
890 $prefix = $new_query_file."_".$new_reference_file; | |
891 } | |
892 #------------------------------------------------------------------- | |
893 #define file handles for output files and open files to write output | |
894 #------------------------------------------------------------------- | |
895 my ($seqFH,$tabFH,$binFH,$crunchFH, $gapFH, $mlFH, $dbinFH, $avoidNFH); | |
896 open ($seqFH, '>', $prefix . '.fasta') or die "Could not open file $prefix.fasta for write: $!\n"; | |
897 open ($tabFH, '>', $prefix . '.tab') or die "Could not open file $prefix.tab for write: $!\n"; | |
898 open ($binFH, '>', $prefix . '.bin') or die "Could not open file $prefix.bin for write: $!\n"; | |
899 open ($crunchFH, '>', $prefix . '.crunch') or die "Could not open file $prefix.crunch for write: $!\n"; | |
900 open ($gapFH, '>', $prefix . '.gaps') or die "Could not open file $prefix.gaps for write: $!\n"; | |
901 if ($mlfas ==1) | |
902 { | |
903 open ($mlFH, '>', $prefix . '.contigs.fas') or die "Could not open file $prefix.contigs.fas for write: $!\n"; | |
904 } | |
905 if ($fasta_bin ==1) | |
906 { | |
907 open ($dbinFH, '>', $prefix . '.contigsInbin.fas') or die "Could not open file $prefix.contigsInbin.fas for write: $!\n"; | |
908 } | |
909 if ($avoid_Ns ==1) | |
910 { | |
911 open ($avoidNFH, '>', $prefix .'.NoNs.fasta') or die "Could not open file $prefix.NoNs.fasta for write: $!\n"; | |
912 } | |
913 #------------------------------------------------------------------------- | |
914 # Writing tiling graph and generating a pseudomolecule | |
915 # Note use use ps for pseudomolecule | |
916 #@Ps = start of ps, and @Pe = end of ps | |
917 my $ps_start =1; | |
918 $Ps[0] = 1; | |
919 $Pe[0] = $Ps[0] + $Len[0] -1; | |
920 my $tmp_qual; | |
921 my $tmp_nqual; | |
922 my $tmp_seq =""; | |
923 my $tmp_nseq =""; | |
924 print "Total contigs = $total \n"; | |
925 | |
926 #------------------------------------------------------------ | |
927 # The 'for loop' loops over each contig in the Tiling output | |
928 #Writing to file is done for each contig to speed up the process | |
929 #This part could potentially be a separate subroutine | |
930 | |
931 print $tabFH "ID ",$id, "\n"; | |
932 print $seqFH ">", "ordered_", $id, "\n"; | |
933 for (my $i=1; $i <= $total; $i+=1) | |
934 { | |
935 my $covv =sprintf("%.0f",$cov[$i -1]); #ROUNDING | |
936 my $pidd = sprintf("%.0f", $pid[$i -1]); | |
937 my ($contig_coord, $color, $contig_seq); | |
938 my $contig_qual=''; | |
939 $tiling_gap = $G[$i -1]; | |
940 if ($tiling_gap <= 5){ #insert 100Ns for overlaps and gaps of size less than or equal to 5bp | |
941 $g = 99; # default gap size to | |
942 } | |
943 else{ | |
944 $g = $tiling_gap; | |
945 } | |
946 if (defined($Len[$i])) | |
947 { | |
948 $Ps[$i] = $Pe[$i-1] +$g +1; | |
949 $Pe[$i] = $Ps[$i] + $Len[$i] -1; | |
950 $total_bases_mpd+=$Len[$i]; | |
951 } | |
952 | |
953 if ($Rs[$i -1] <0) #check if a reference starting position is less than 0 | |
954 { | |
955 $Rs[$i -1] =1; | |
956 } | |
957 | |
958 if($orient[$i-1] eq "+") | |
959 { | |
960 $contig_coord = $Ps[$i -1]."..".$Pe[$i-1]; | |
961 $color = 4; | |
962 $contig_seq = $contigs_hash{$Cid[$i-1]}; | |
963 } | |
964 else | |
965 { | |
966 $contig_coord = "complement(".$Ps[$i -1]."..".$Pe[$i-1].")"; | |
967 $color =3; | |
968 $contig_seq = revComp($contigs_hash{$Cid[$i-1]}); #REVERSE COMPLEMENT A SEQUENCE | |
969 | |
970 } | |
971 push (@cont_lens, length($contig_seq)); | |
972 | |
973 # tdo | |
974 if (defined($$ref_contigs_qual{$Cid[$i-1]})) { | |
975 ## flag to know, that the qual exists | |
976 $qualAvailable=1; | |
977 $contig_qual = $$ref_contigs_qual{$Cid[$i-1]}; | |
978 } | |
979 $tmp_qual .= $contig_qual; | |
980 $tmp_seq .= $contig_seq; | |
981 if ($avoid_Ns ==1) | |
982 { | |
983 $tmp_nseq.= $contig_seq; | |
984 #tdo | |
985 $tmp_nqual .= $contig_qual; | |
986 } | |
987 if ($mlfas ==1) | |
988 { | |
989 my $head = $Cid[$i-1]; | |
990 my $multifasta_seq = write_Fasta_headers ($contig_seq,$head); | |
991 print $mlFH $multifasta_seq; | |
992 } | |
993 if ($Re[$i -1] > $ref_len) | |
994 { | |
995 $Re[$i -1] = $ref_len -1; | |
996 } | |
997 if ($Pe[$i -1] > length($tmp_seq)) | |
998 { | |
999 $Pe[$i -1] = length($tmp_seq); | |
1000 } | |
1001 | |
1002 #----------------------------------------- | |
1003 print $crunchFH $covv, " ", $pidd, " ", $Ps[$i -1], " ", $Pe[$i -1], " ", $Cid[$i -1], " ", $Rs[$i -1], " ", $Re[$i-1], " ", "unknown NONE\n"; | |
1004 | |
1005 #WRITE FEATURE FILE | |
1006 print $tabFH "FT contig ",$contig_coord, "\n"; | |
1007 print $tabFH "FT ", "/systematic_id=\"", $Cid[$i-1],"\"","\n"; | |
1008 print $tabFH "FT ", "/method=\"", "mummer\"", "\n"; | |
1009 print $tabFH "FT ", "/Contig_coverage=\"",$cov[$i -1], "\"", "\n"; | |
1010 print $tabFH "FT ", "/Percent_identity=\"",$pid[$i -1], "\"", "\n"; | |
1011 if ($tiling_gap > 1) #WRITE GAP LOCATIONS AND SIZE TO FILE | |
1012 { | |
1013 my $gap_start = $Pe[$i -1] +1; | |
1014 my $gap_end = ""; | |
1015 if (defined $Ps[$i]) | |
1016 { | |
1017 $gap_end = $Ps[$i] -1; | |
1018 } | |
1019 my $ref_start = $Re[$i -1] +1; | |
1020 my $ref_end; | |
1021 if (defined $Rs[$i]) | |
1022 { | |
1023 $ref_end =$Rs[$i]-1; | |
1024 } | |
1025 else | |
1026 { | |
1027 $ref_end = "END"; | |
1028 } | |
1029 print $gapFH "Gap\t",$tiling_gap, "\t", $gap_start, "\t", $gap_end, "\t", $ref_start, "\t", $ref_end,"\n"; | |
1030 if ($gaps_2file ne "" && $ref_start < $ref_len) | |
1031 { | |
1032 my $ref_gapsFH; | |
1033 open ($ref_gapsFH, '>', $gaps_2file.'.Gaps_onRef') or die "Could not open file $gaps_2file.Gaps_onRef for write: $!\n"; | |
1034 my $gapOnref = substr ($ref_inline, $ref_start, $g); | |
1035 print $ref_gapsFH ">",$g,"_",$ref_start, "\n"; | |
1036 my $file_toPrint = write_Fasta ($gapOnref); | |
1037 print $ref_gapsFH $file_toPrint; | |
1038 } | |
1039 } | |
1040 else | |
1041 { | |
1042 $color = 5; | |
1043 print $tabFH "FT ", "/Overlapping=\"", "YES\"", "\n"; | |
1044 } | |
1045 my $ns = makeN($g); | |
1046 $tmp_seq = $tmp_seq.$ns; | |
1047 #tdo | |
1048 for (1..length($ns)) { | |
1049 $tmp_qual .= "0 "; | |
1050 } | |
1051 print $tabFH "FT ", "/colour=\"",$color, "\"", "\n"; | |
1052 } | |
1053 #------------------------------------------------------------------ | |
1054 #tdo | |
1055 my @Quality_Array; | |
1056 if ($qualAvailable) { | |
1057 @Quality_Array = split(/\s/,$tmp_qual); | |
1058 my $res; | |
1059 foreach (@Quality_Array) { | |
1060 $res .= "$_\n"; | |
1061 } | |
1062 ## get name | |
1063 my @splits_query = split (/\//, $query_file); | |
1064 $new_query_file = $splits_query[(scalar(@splits_query)-1)]; | |
1065 open (F,"> $new_query_file.qual.plot") or die "problems\n"; | |
1066 print F $res; | |
1067 close(F); | |
1068 } | |
1069 ##WRITE PSEUDOMOLECULE WITHOUT 'N's | |
1070 #-------------------------------------- | |
1071 if ($avoid_Ns ==1) | |
1072 { | |
1073 print $avoidNFH ">", "ordered_", $id, "without 'N's","\n"; | |
1074 my $toWrite = write_Fasta ($tmp_nseq); | |
1075 print $avoidNFH $toWrite; | |
1076 } | |
1077 #################################### | |
1078 #WRITE CONTIGS WITH NO HIT TO FILE # | |
1079 ################################# | |
1080 my %Cids; | |
1081 | |
1082 foreach(@Cid) | |
1083 { | |
1084 chomp; | |
1085 $Cids{$_} = 1; | |
1086 } | |
1087 my @contigs_2bin = (); | |
1088 my %h_contigs_2bin; | |
1089 | |
1090 foreach (@c_keys) | |
1091 { | |
1092 push(@contigs_2bin, $_) unless exists $Cids{$_}; | |
1093 | |
1094 } | |
1095 foreach(@contigs_2bin) | |
1096 { | |
1097 $h_contigs_2bin{$_}=1; | |
1098 | |
1099 print $binFH "$_ \n"; | |
1100 | |
1101 } | |
1102 $num_inbincontigs= scalar(@contigs_2bin); | |
1103 | |
1104 ######## | |
1105 # WRITE PSEUDOMOLECULE TO FILE | |
1106 #---------------------------------- | |
1107 my $new_seq = $tmp_seq; | |
1108 my $prev_len = length($tmp_seq); | |
1109 my $total_len = $prev_len; | |
1110 foreach (@contigs_2bin) | |
1111 { | |
1112 chomp; | |
1113 #my $binseq = $contigs_hash{$contigs_2bin[$i]}; | |
1114 my $l = length ($contigs_hash{$_}); | |
1115 $total_len +=$l; | |
1116 } | |
1117 if ($add_bin_2ps ==1) #appending unmapped contigs to pseudomolecule | |
1118 { | |
1119 | |
1120 for (my $i =0; $i < scalar(@contigs_2bin); $i+=1) | |
1121 { | |
1122 my $binseq = $contigs_hash{$contigs_2bin[$i]}; | |
1123 $new_seq .=$contigs_hash{$contigs_2bin[$i]}; | |
1124 my $len_current_contig = length($binseq); | |
1125 my $start = $prev_len +1; | |
1126 my $end = $start + $len_current_contig -1; | |
1127 my $col = 7; | |
1128 if ($start > $total_len) | |
1129 { | |
1130 $start = $total_len; | |
1131 } | |
1132 if ($end >$total_len) | |
1133 { | |
1134 $end = $total_len; | |
1135 } | |
1136 my $co_cord = $start."..".$end; | |
1137 my $note = "NO_HIT"; | |
1138 print $tabFH "FT contig ",$co_cord, "\n"; | |
1139 print $tabFH "FT ", "/systematic_id=\"", $contigs_2bin[$i],"\"","\n"; | |
1140 print $tabFH "FT ", "/method=\"", "mummer\"", "\n"; | |
1141 print $tabFH "FT ", "/colour=\"",$col, "\"", "\n"; | |
1142 print $tabFH "FT ", "/", $note, "\n"; | |
1143 $prev_len= $end; | |
1144 } | |
1145 } | |
1146 my $to_write = write_Fasta ($new_seq); | |
1147 print $seqFH $to_write; | |
1148 ######## | |
1149 #WRITE CONTIGS IN BIN TO FILE # | |
1150 #------------------------------------------------------ | |
1151 if ($fasta_bin ==1) | |
1152 { | |
1153 foreach(@contigs_2bin) | |
1154 { | |
1155 print $dbinFH ">", $_, "\n"; | |
1156 my $to_write = write_Fasta($contigs_hash{$_}); | |
1157 print $dbinFH $to_write; | |
1158 } | |
1159 } | |
1160 #unlink ("$choice.delta"); | |
1161 #unlink ("$choice.filtered.delta"); | |
1162 #unlink ("$choice.cluster"); | |
1163 #unlink ("$choice.tiling"); | |
1164 #PRINT FINAL MESSAGE | |
1165 print " FINISHED CONTIG ORDERING\n"; | |
1166 print "\nTo view your results in ACT\n\t\t Sequence file 1: $new_reference_file\n\t\t Comparison file 1: $prefix.crunch\n\t\t Sequence file 2: $prefix.fasta\n | |
1167 \t\tACT feature file is: $prefix.tab\n | |
1168 \t\tContigs bin file is: $prefix.bin\n | |
1169 \t\tGaps in pseudomolecule are in: $prefix.gaps\n\n"; | |
1170 | |
1171 #Run tblastx.... | |
1172 if ($run_blast ==1) | |
1173 { | |
1174 print "Running tblastx on contigs in bin...\nThis may take several minutes ...\n"; | |
1175 my $formatdb = 'formatdb -p F -i' ; | |
1176 # my @formating = ` | |
1177 !system("$formatdb $new_reference_file") or die "ERROR: Could not find 'formatdb' for blast\n"; | |
1178 my $blast_opt = 'blastall -m 9 -p tblastx -d '; | |
1179 my $contigs_inBin = $prefix.'.contigsInbin.fas'; | |
1180 # my @bigger_b = ` | |
1181 !system("$blast_opt $new_reference_file -i $contigs_inBin -o blast.out") or die "ERROR: Could not find 'blastall' , please install blast in your working path (other dir==0)\n$blast_opt $new_reference_file -i $contigs_inBin -o blast.out\n \n"; | |
1182 } | |
1183 | |
1184 | |
1185 if ($pick_primer == 1) | |
1186 { | |
1187 print " DESIGNING PRIMERS FOR GAP CLOSURE...\n"; | |
1188 my $qq = "$prefix.fasta"; | |
1189 pickPrimers($qq, $reference, $flank, $path_toPass, $chk_uniq,$qualAvailable,@Quality_Array); | |
1190 } | |
1191 } | |
1192 | |
1193 | |
1194 #------------------------------ | |
1195 sub write_Fasta { | |
1196 my $sequence = shift; | |
1197 my $fasta_seq =""; | |
1198 my $length = length($sequence); | |
1199 if ($length <= 60) | |
1200 { | |
1201 $fasta_seq = $sequence ; | |
1202 } | |
1203 elsif ($length> 60 ) | |
1204 { | |
1205 for (my $i =0; $i < $length; $i+=60) | |
1206 { | |
1207 my $tmp_s = substr $sequence, $i, 60; | |
1208 $fasta_seq .= $tmp_s."\n"; | |
1209 } | |
1210 } | |
1211 | |
1212 return $fasta_seq; | |
1213 } | |
1214 | |
1215 sub write_Fasta_headers { | |
1216 my $sequence = shift; | |
1217 my $header = shift; | |
1218 my $fasta_seq =">$header\n"; | |
1219 my $length = length($sequence); | |
1220 if ($length <= 60) | |
1221 { | |
1222 $fasta_seq.= $sequence ; | |
1223 } | |
1224 elsif ($length> 60 ) | |
1225 { | |
1226 for (my $i =0; $i < $length; $i+=60) | |
1227 { | |
1228 my $tmp_s = substr $sequence, $i, 60; | |
1229 $fasta_seq .= $tmp_s."\n"; | |
1230 } | |
1231 } | |
1232 | |
1233 return $fasta_seq; | |
1234 } | |
1235 | |
1236 #---------------------------------------------- END OF CONTIG ORDERING SUBROUTINES---------------------------------------------------- | |
1237 | |
1238 #----------------------------------------------- PRIMER DESIGN --------------------------------------------------- | |
1239 sub pickPrimers | |
1240 { | |
1241 #$ps = pseudo molecule,$rf = reference, $flan = flanking region size | |
1242 my ($ps,$rf, $flan, $passed_path, $chk_uniq,$qualAvailable, @Quality_Array); | |
1243 if (@_==4){ | |
1244 ($rf,$ps, $flan, $chk_uniq) = @_; | |
1245 print "Primers without ordering..\n"; | |
1246 print $rf; | |
1247 $passed_path = "nucmer"; | |
1248 $qualAvailable =0; | |
1249 @Quality_Array = []; | |
1250 } | |
1251 else #(@_ == 7) | |
1252 { | |
1253 ($ps,$rf, $flan, $passed_path, $chk_uniq, $qualAvailable, @Quality_Array) = @_; | |
1254 } | |
1255 | |
1256 my $dna=''; | |
1257 my @gappedSeq; | |
1258 my $records=''; | |
1259 my @sequence; | |
1260 my $input=''; | |
1261 #tdo | |
1262 my @gappedQual; | |
1263 #my $quality=''; | |
1264 my $path_toPass = $passed_path; | |
1265 my @fasta; | |
1266 my $query=''; | |
1267 my @exc_regions; | |
1268 my $ch_name; | |
1269 #my $flank = $flan; | |
1270 open (FH, $rf) or die "Could not open reference file\n"; | |
1271 open (FH2, $ps) or die "Could not open query/pseudomolecule file\n"; | |
1272 my $ref; #print ".... ", $rf; exit; | |
1273 my @r = <FH>; | |
1274 my @qry = <FH2>; | |
1275 my $dn = join ("", @qry); | |
1276 $ref = join ("", @r); | |
1277 $dna = Fasta2ordered ($dn); | |
1278 #check if primer3 is installed | |
1279 my $pr3 = "primer3_core"; | |
1280 my ($pr3_path, $pr3_Path, $pr3_path_dir) = checkProg ($pr3); | |
1281 #my @check_prog = `which primer3_core`; | |
1282 open (PRI, '>primer3.summary.out') or die "Could not open file for write\n"; | |
1283 | |
1284 # | |
1285 | |
1286 #print $ref; exit; | |
1287 #PARSING FOR PRIMER3 INPUT | |
1288 my ($opt,$min,$max,$optTemp,$minTemp,$maxTemp,$flank,$lowRange,$maxRange,$gcMin,$gcOpt,$gcMax,$gclamp,$exclude,$quality) = getPoptions($qualAvailable); | |
1289 my ($gap_position,@positions, %seq_hash); | |
1290 | |
1291 my $exc1 = $flank -$exclude; #start of left exclude | |
1292 print "Please wait... extracting target regions ...\n"; | |
1293 #regular expression extracts dna sequence before and after gaps in sequence (defined by N) | |
1294 while($dna=~ /([atgc]{$flank,$flank}N+[atgc]{$flank,$flank})/gi) | |
1295 { | |
1296 $records= $1; | |
1297 push (@gappedSeq, $records); | |
1298 $gap_position = index($dna, $records); | |
1299 push @positions, $gap_position; | |
1300 $seq_hash{$gap_position}=$records; | |
1301 #dna | |
1302 if ($qualAvailable) { | |
1303 my $res; | |
1304 for (my $nn=($gap_position-1); $nn <= ($gap_position-1+length($records)-1); $nn++) { | |
1305 $res.="$Quality_Array[$nn] "; | |
1306 } | |
1307 push @gappedQual, $res; | |
1308 } | |
1309 } | |
1310 #loop prints out primer targets into a file format accepted by primer3 | |
1311 my $count=1; | |
1312 my $identify=''; | |
1313 my $seq_num = scalar @gappedSeq; | |
1314 my $name= " "; | |
1315 | |
1316 my ($totalp, @left_name, @right_name, @left_seq, @right_seq); | |
1317 | |
1318 my ($leftP_names, $rightP_names, $leftP_seqs, $rightP_seqs, $leftP_start, $leftP_lens, $rightP_ends, $rightP_lens,$left_Exclude,$right_Exclude, $primers_toExc, $prod_size)= | |
1319 ("","","","","","","","","","", "", ""); | |
1320 | |
1321 print $seq_num, " gaps found in target sequence\n"; | |
1322 print "Please wait...\nLooking for primers...\n"; | |
1323 print "Running Primer3 and checking uniquness of primers...\nCounting left and right primer hits from a nucmer mapping (-c 15 -l 15)\n"; | |
1324 | |
1325 for (my $i=0; $i<$seq_num; $i+=1) | |
1326 { | |
1327 $identify = $count++; | |
1328 if (defined $ch_name) | |
1329 { | |
1330 $name = $ch_name; | |
1331 } | |
1332 my $len = length($gappedSeq[$i]); | |
1333 my $exc2 = $len - $flank; | |
1334 open(FILE, '>data') or die "Could not open file\n"; | |
1335 #tdo | |
1336 my $qual=''; | |
1337 if ($qualAvailable) { | |
1338 $qual="PRIMER_SEQUENCE_QUALITY=$gappedQual[$i]\nPRIMER_MIN_QUALITY=$quality\n"; | |
1339 } | |
1340 | |
1341 #WARNING: indenting the following lines may cause problems in primer3 | |
1342 print FILE "PRIMER_SEQUENCE_ID=Starting_Pos $positions[$i] | |
1343 SEQUENCE=$gappedSeq[$i] | |
1344 PRIMER_OPT_SIZE=$opt | |
1345 PRIMER_MIN_SIZE=$min | |
1346 PRIMER_MAX_SIZE=$max | |
1347 PRIMER_OPT_TM=$optTemp | |
1348 PRIMER_MIN_TM=$minTemp | |
1349 PRIMER_MAX_TM=$maxTemp | |
1350 PRIMER_NUM_NS_ACCEPTED=1 | |
1351 PRIMER_PRODUCT_SIZE_RANGE=$lowRange-$maxRange | |
1352 PRIMER_MIN_GC=$gcMin | |
1353 PRIMER_GC_CLAMP =$gclamp | |
1354 PRIMER_OPT_GC_PERCENT=$gcOpt | |
1355 PRIMER_MAX_GC=$gcMax | |
1356 PRIMER_INTERNAL_OLIGO_EXCLUDED_REGION=$exc1,$exclude $exc2,$exclude | |
1357 ".$qual."Number To Return=1 | |
1358 =\n"; | |
1359 close FILE; | |
1360 | |
1361 #runs primer3 from commandline | |
1362 | |
1363 ################# NOTE: PRIMER3 SHOULD BE IN YOUR WORKING PATH ######### | |
1364 | |
1365 my @Pr3_output = `$pr3_path -format_output <data`; | |
1366 #print $positions[$i], "\t", $i, " ", $path_toPass, " ", $rf, $exc1, " ",$exc2, "\n"; | |
1367 my $fil = join (":%:", @Pr3_output); | |
1368 my ($uniq_primer, $string,$left_nm,$right_nm,$left_sq, $right_sq,$left_strt,$left_ln, $right_End,$right_ln,$primers_toExclude, $product_size) | |
1369 = check_Primers ($fil, $positions[$i], $i,$path_toPass, $rf, $exc1, $exc2); | |
1370 | |
1371 print PRI $string; | |
1372 if ($uniq_primer ==1) | |
1373 { | |
1374 $leftP_names.=$left_nm."\n"; | |
1375 $rightP_names.=$right_nm."\n"; | |
1376 $leftP_seqs.=$left_sq."\n"; | |
1377 $rightP_seqs.=$right_sq."\n"; | |
1378 $leftP_start.=$left_strt."\n"; | |
1379 $leftP_lens.=$left_ln."\n"; | |
1380 $rightP_ends.=$right_End."\n"; | |
1381 $rightP_lens.=$right_ln."\n"; | |
1382 $left_Exclude.=$exc1."\n"; | |
1383 $right_Exclude.=$exc2."\n"; | |
1384 $prod_size.=$product_size."\n"; | |
1385 } | |
1386 if ($primers_toExclude ne "") | |
1387 { | |
1388 $primers_toExc.= $primers_toExclude; #."\n"; | |
1389 } | |
1390 | |
1391 } | |
1392 write_Primers ($leftP_names, $rightP_names, $leftP_seqs, $rightP_seqs, $leftP_start, $leftP_lens, $rightP_ends, $rightP_lens,$primers_toExc,$left_Exclude,$right_Exclude, $prod_size); | |
1393 #write_Primers (@left_name, @right_name, @left_seq, @right_seq,@left_start, @left_len, @right_end, @right_len, @left_exclude, @right_exclude, $primers_toExclude); | |
1394 | |
1395 } | |
1396 | |
1397 #checks the uniqueness of primers | |
1398 #input an array with promer3 output for each gap | |
1399 sub check_Primers | |
1400 { | |
1401 | |
1402 my ($fil, $position, $index,$path_toPass, $rf, $exc1, $exc2) = @_; | |
1403 my @Pr3_output = split /:%:/, $fil; | |
1404 my ($left_name, $right_name, $left_seq, $right_seq, $left_start,$left_len,$right_end,$right_len,$left_exclude,$right_exclude) = ("", "", "", "", "", "", "", "", "", ""); | |
1405 my $primers_toExclude =""; | |
1406 my $product_size =""; | |
1407 my $string =""; | |
1408 my $uniq_primer = 0; | |
1409 $string.="=========================================================================================\n"; | |
1410 $string.="Primer set for region starting at ".$position."\n"; | |
1411 | |
1412 if (defined $Pr3_output[5] && defined $Pr3_output[6]) | |
1413 { | |
1414 if ($Pr3_output[5]=~ /LEFT PRIMER/) | |
1415 { | |
1416 # print $Pr3_output[5]; | |
1417 #check uniquness of primer against the genome | |
1418 my @splits_1 = split (/\s+/, $Pr3_output[5]); | |
1419 my $left_primer = $splits_1[8]; | |
1420 my $left_st = $splits_1[2]; | |
1421 my $left_length = $splits_1[3]; | |
1422 | |
1423 my @splits_2 = split (/\s+/, $Pr3_output[6]); | |
1424 my $right_primer = $splits_2[8]; | |
1425 my $right_st = $splits_2[2]; | |
1426 my $right_length = $splits_2[3]; | |
1427 | |
1428 open (QRY_1, '>./left_query'); # open a file for left primers | |
1429 print QRY_1 ">left_01\n"; # | |
1430 print QRY_1 $left_primer,"\n"; | |
1431 open (QRY_2, '>./right_query'); | |
1432 print QRY_2 ">right_01\n"; | |
1433 print QRY_2 $right_primer,"\n"; | |
1434 | |
1435 my ($left_count, $right_count); | |
1436 #if ($chk_uniq eq "nucmer") | |
1437 #{ | |
1438 my $options = "-c 15 --coords -l 15 "; | |
1439 my $rq = "right_query"; | |
1440 my $lq = "left_query"; | |
1441 my (@right_ps, @left_ps); | |
1442 # print $path_toPass, "\t", $options, "\n"; | |
1443 | |
1444 | |
1445 my @Rrun = `$path_toPass $options -p R $rf $rq &> /dev/null`; | |
1446 print "."; | |
1447 my $f1 = "R.coords"; | |
1448 open (RP, $f1) or die "Could not open file $f1 while checking uniqueness of right primer\n"; | |
1449 while (<RP>) | |
1450 { | |
1451 chomp; | |
1452 if ($_ =~ /right_01$/) | |
1453 { | |
1454 push @right_ps, $_; | |
1455 } | |
1456 } | |
1457 close (RP); | |
1458 my @Lrun = `$path_toPass $options -p L $rf $lq &> /dev/null`; | |
1459 print "."; | |
1460 my $f2 = "L.coords"; | |
1461 open (LQ, $f2) or die "Could not open file $f2\n"; | |
1462 while (<LQ>) | |
1463 { | |
1464 chomp; | |
1465 if ($_ =~ /left_01$/) | |
1466 { | |
1467 push @left_ps, $_; | |
1468 } | |
1469 } | |
1470 close (LQ); | |
1471 $right_count = scalar (@right_ps); | |
1472 $left_count = scalar(@left_ps); | |
1473 #check if a primer is not in the excluded region:: | |
1474 my $primer_NearEnd =0; | |
1475 if ($left_st > $exc1 || $right_st < $exc2) | |
1476 { | |
1477 $primer_NearEnd = 1; | |
1478 } | |
1479 | |
1480 if ($left_count < 2 && $right_count<2 && $primer_NearEnd ==0) | |
1481 { | |
1482 $string.=$left_count."\t".$Pr3_output[5]."\n"; | |
1483 $string.=$right_count."\t".$Pr3_output[6]."\n"; | |
1484 $string.="***************************** PRIMER3 OUTPUT **************************\n"; | |
1485 foreach (@Pr3_output) {$string.=$_;} | |
1486 | |
1487 my @prod_size_split = split /\s+/, $Pr3_output[10]; | |
1488 | |
1489 $product_size = substr($prod_size_split[2], 0, -1); | |
1490 $left_name = $position; | |
1491 $right_name = $position; | |
1492 my $lp_uc = uc ($left_primer); | |
1493 my $rp_uc = uc($right_primer); | |
1494 #print $left_count, "..", $right_count, "\t"; | |
1495 $left_seq = $lp_uc; | |
1496 $right_seq= $rp_uc; | |
1497 | |
1498 $left_start= $left_st; | |
1499 $left_len = $left_length; | |
1500 | |
1501 $right_end = $right_st; | |
1502 $right_len = $right_length; | |
1503 | |
1504 $left_exclude = $exc1; | |
1505 $right_exclude =$exc2; | |
1506 $uniq_primer =1; | |
1507 } | |
1508 else | |
1509 { | |
1510 if ($primer_NearEnd ==1) | |
1511 { | |
1512 $string.="One of the oligos is near the end of a contig\n"; | |
1513 } | |
1514 else | |
1515 { | |
1516 $string.="Primer set not unique\n"; | |
1517 } | |
1518 $primers_toExclude.=">L.".$position."\n".$left_primer."\n"; | |
1519 $primers_toExclude.=">R.".$position."\n".$right_primer."\n"; | |
1520 } | |
1521 | |
1522 } | |
1523 else | |
1524 { | |
1525 $string.="No Primers found\n"; | |
1526 } | |
1527 } | |
1528 | |
1529 return ($uniq_primer, $string,$left_name,$right_name,$left_seq, $right_seq,$left_start,$left_len, $right_end,$right_len,$primers_toExclude, $product_size); | |
1530 | |
1531 | |
1532 } | |
1533 | |
1534 ###------------------------------------ | |
1535 # Writes primers and their regions to file | |
1536 sub write_Primers { | |
1537 my ($leftP_names, $rightP_names, $leftP_seqs, $rightP_seqs, $leftP_start, $leftP_lens, $rightP_ends, $rightP_lens,$primers_toExclude,$left_Exclude,$right_Exclude, $product_sizes) = @_; | |
1538 my (@left_name, @right_name, @left_seq, @right_seq, @left_start, @left_len, @right_end, @right_len, @left_exclude, @right_exclude, @product_size); | |
1539 | |
1540 #open files to read | |
1541 @left_name = split /\n/, $leftP_names; | |
1542 @right_name= split /\n/, $rightP_names; | |
1543 @left_seq = split /\n/, $leftP_seqs; | |
1544 @right_seq = split /\n/, $rightP_seqs; | |
1545 | |
1546 @left_start = split /\n/, $leftP_start; | |
1547 @left_len = split /\n/, $leftP_lens; | |
1548 @right_end = split/\n/, $rightP_ends; | |
1549 @right_len = split /\n/, $rightP_lens; | |
1550 @left_exclude = split /\n/, $left_Exclude; | |
1551 @right_exclude = split /\n/,$right_Exclude; | |
1552 @product_size = split /\n/, $product_sizes; | |
1553 | |
1554 my $primers_withSize =""; | |
1555 open (SEN, '>sense_primers.out') or die "Could not open file for write\n"; | |
1556 open (ASEN, '>antiSense_primers.out') or die "Could not open file for write\n"; | |
1557 open (REG_1, '>sense_regions.out') or die "Could not open file for write\n"; | |
1558 open (REG_2, '>antiSense_regions.out') or die "Could not open file for write\n"; | |
1559 | |
1560 if ($primers_toExclude ne "") | |
1561 { | |
1562 open (PEX, '>primers_toExclude.out') or die "Could not open file for write\n"; | |
1563 print PEX $primers_toExclude; | |
1564 } | |
1565 | |
1566 | |
1567 my $totalp = scalar (@left_name); | |
1568 | |
1569 #print $totalp, "\n"; exit; | |
1570 | |
1571 my $well_pos; | |
1572 my $max_plates = ceil($totalp/96); | |
1573 #print "MAX Ps ", $max_plates, "\n"; | |
1574 my $plate=1; | |
1575 my $sen =""; | |
1576 my $asen =""; | |
1577 my $plate_counter =0; | |
1578 my $wells = 96; | |
1579 for (my $index =0; $index < $totalp; $index += $wells) | |
1580 { | |
1581 my $do = $index; | |
1582 my $upper_bound= $index + $wells; | |
1583 if ($upper_bound > $totalp) | |
1584 { | |
1585 $upper_bound = $totalp; | |
1586 } | |
1587 | |
1588 for (my $j=$index; $j <= ($upper_bound-1); $j+=1) | |
1589 { | |
1590 my $i = $j; | |
1591 if ($j < 96) | |
1592 { | |
1593 $well_pos = get_WellPosition ($j); | |
1594 } | |
1595 else | |
1596 { | |
1597 $well_pos = get_WellPosition ($j - $wells) | |
1598 } | |
1599 | |
1600 #$primers_withSize.=$product_size[$i]."\t"."Plate_".$plate. "\t\tS.".$i."\tS.".$left_name[$i]."\t". | |
1601 print SEN "Plate_".$plate, "\t\t","S.", $i, "\tS.", $left_name[$i], "\t", $left_seq[$i], "\t\t+", "\t", $well_pos, "\n"; | |
1602 print ASEN "Plate_".$plate, "\t\t","AS.", $i, "\tAS.", $right_name[$i], "\t", $right_seq[$i], "\t\t-","\t", $well_pos,"\n"; | |
1603 print REG_1 "Plate_".$plate, "\t\t","S.", $i, "\t", $left_start[$i], "\t", $left_len[$i], "\n"; | |
1604 print REG_2 "Plate_".$plate, "\t\t","AS.", $i, "\t", $right_end[$i], "\t",$right_len[$i], "\n"; | |
1605 | |
1606 } | |
1607 $plate +=1; | |
1608 } | |
1609 | |
1610 #delete tmp. files | |
1611 #my $rm = "rm -f"; | |
1612 system ("rm -f data left_query right_query R.delta R.cluster R.coords L.delta L.cluster L.coords"); | |
1613 print "\nPRIMER DESIGN DONE\n\n"; | |
1614 # end of primer design program | |
1615 }#// | |
1616 ##### | |
1617 # returns a well position for oligos | |
1618 sub get_WellPosition{ | |
1619 | |
1620 my $j = shift; | |
1621 my $well_pos; | |
1622 if ($j < 12) | |
1623 { | |
1624 $well_pos = "a".($j+1); | |
1625 } | |
1626 elsif ($j>11 && $j<24) { | |
1627 $well_pos = "b". (($j+1) -12); | |
1628 } | |
1629 elsif ($j>23 && $j<36) { | |
1630 $well_pos = "c". (($j+1) -24); | |
1631 } | |
1632 elsif ($j>35 && $j<48) { | |
1633 $well_pos = "d". (($j+1) - 36); | |
1634 } | |
1635 elsif($j>47 && $j<60) { | |
1636 $well_pos = "e". (($j+1) -48); | |
1637 } | |
1638 elsif ($j>59 && $j<72) | |
1639 { | |
1640 $well_pos = "f". (($j+1) - 60); | |
1641 } | |
1642 elsif ($j>71 && $j< 84) | |
1643 { | |
1644 $well_pos = "g". (($j+1) - 72); | |
1645 } | |
1646 elsif ($j>83 && $j<96) | |
1647 { | |
1648 $well_pos = "h". (($j+1) - 84); | |
1649 } | |
1650 return $well_pos; | |
1651 } | |
1652 | |
1653 | |
1654 #################################################################### | |
1655 #get options for primer design | |
1656 #---------------------- | |
1657 sub getPoptions{ | |
1658 | |
1659 my $qualAvailable = shift; | |
1660 #### USER INPUTS ########## | |
1661 #ask for optimum primer size | |
1662 print "\nEnter Optimum Primer size (default 20 bases):"; | |
1663 my $opt=<STDIN>; | |
1664 chomp $opt; | |
1665 if($opt eq '') | |
1666 { | |
1667 $opt = 20; | |
1668 } | |
1669 #ask for minimum primer size | |
1670 print "\nEnter Minimum Primer size (default 18 bases):"; | |
1671 my $min=<STDIN>; | |
1672 chomp $min; | |
1673 if($min eq '') | |
1674 { | |
1675 $min = 18; | |
1676 } | |
1677 #ask for maximum primer size | |
1678 print "\nEnter Maximum Primer size (default 27 bases):"; | |
1679 my $max= <STDIN>; | |
1680 chomp $max; | |
1681 if($max eq '') | |
1682 { | |
1683 $max= 27; | |
1684 } | |
1685 #ask for optimum primer temperature | |
1686 print "\nEnter Optimum melting temperature (Celcius) for a primer oligo (default 60.0C):"; | |
1687 my $optTemp=<STDIN>; | |
1688 chomp $optTemp; | |
1689 if($optTemp eq '') | |
1690 { | |
1691 $optTemp = 60.0; | |
1692 } | |
1693 #ask for minimum primer temperature | |
1694 print "\nEnter Minimum melting temperature (Celcius) for a primer oligo (default 57.0C):"; | |
1695 my $minTemp=<STDIN>; | |
1696 chomp $minTemp; | |
1697 if($minTemp eq '') | |
1698 { | |
1699 $minTemp = 57.0; | |
1700 } | |
1701 #ask for maximum primer temperature | |
1702 print "\nEnter Maximum melting temperature (Celcius) for a primer oligo (default 63.0C):"; | |
1703 my $maxTemp=<STDIN>; | |
1704 chomp $maxTemp; | |
1705 if($maxTemp eq '') | |
1706 { | |
1707 $maxTemp = 63.0; | |
1708 } | |
1709 print "\nEnter flanking region size (default 1000 bases): "; | |
1710 my $flank=<STDIN>; | |
1711 chomp $flank; | |
1712 if ($flank eq '') | |
1713 { | |
1714 $flank = 1000; | |
1715 } | |
1716 #ask for primer product range | |
1717 print "\nEnter minimum product size produced by primers (default =flanking size):"; | |
1718 my $lowRange=<STDIN>; | |
1719 chomp $lowRange; | |
1720 if($lowRange eq '') | |
1721 { | |
1722 $lowRange = $flank; | |
1723 } | |
1724 print "\nEnter maxmimum product size produced by primers (default 7000):"; | |
1725 my $maxRange=<STDIN>; | |
1726 chomp $maxRange; | |
1727 if($maxRange eq '') | |
1728 { | |
1729 $maxRange = 7000; | |
1730 } | |
1731 #ask for minimum GC content in primers | |
1732 print "\nEnter minimum GC content in primers (default 20%):"; | |
1733 my $gcMin=<STDIN>; | |
1734 chomp $gcMin; | |
1735 if($gcMin eq '') | |
1736 { | |
1737 $gcMin = 20.0; | |
1738 } | |
1739 #ask for optimum GC content in primers | |
1740 print "\nEnter optimum GC content in primers (default 50%):"; | |
1741 my $gcOpt=<STDIN>; | |
1742 chomp $gcOpt; | |
1743 if($gcOpt eq '') | |
1744 { | |
1745 $gcOpt = 50.0; | |
1746 } | |
1747 #ask for maximum GC content in primers | |
1748 print "\nEnter maximum GC content in primers (default 80%):"; | |
1749 my $gcMax=<STDIN>; | |
1750 chomp $gcMax; | |
1751 if($gcMax eq '') | |
1752 { | |
1753 $gcMax = 80.0; | |
1754 } | |
1755 print "\nEnter GC clamp (default 1):"; | |
1756 my $gclamp=<STDIN>; | |
1757 chomp $gclamp; | |
1758 if($gclamp eq '') | |
1759 { | |
1760 $gclamp = 1; | |
1761 } | |
1762 print "\nEnter size of region to exclude at the end of contigs (default 100 bases):"; | |
1763 my $exclude=<STDIN>; | |
1764 chomp $exclude; | |
1765 if ($exclude eq '') | |
1766 { | |
1767 $exclude = 100; | |
1768 } | |
1769 | |
1770 | |
1771 #tdo | |
1772 my $quality=''; | |
1773 if ($qualAvailable) | |
1774 { | |
1775 | |
1776 print "\nEnter minimum quality for primer pick (default 40):"; | |
1777 $quality=<STDIN>; | |
1778 chomp $quality; | |
1779 if($quality eq '') | |
1780 { | |
1781 $quality = 40; | |
1782 } | |
1783 } | |
1784 | |
1785 | |
1786 return ($opt,$min,$max,$optTemp,$minTemp,$maxTemp,$flank,$lowRange,$maxRange,$gcMin,$gcOpt,$gcMax,$gclamp,$exclude, $quality); | |
1787 | |
1788 } | |
1789 ############### | |
1790 #-----------------------------------------------------END of PRIMER DESIGN ---------------------------------------------------------------- | |
1791 #-----------------------------------------------------END OF ABACAS ----------------------------------------------------------------------- | |
1792 | |
1793 |