Mercurial > repos > nml > abacas

#!/usr/bin/env perl
# Copyright (C) 2008 Genome Research Limited. All Rights Reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.


use strict;
use warnings;
use POSIX qw(ceil floor);
use Getopt::Std;

#-------------------------------------------------------------------------------

if (@ARGV < 1) { usage();}

my ($help, $reference, $query_file, $choice, $sen, $seed, $mlfas, $fasta_bin, $avoid_Ns,
   $tbx, $min_id, $min_cov, $diff_cov, $min_len,$add_bin_2ps, $pick_primer,
   $flank, $chk_uniq,$redoMummer, $is_circular,$escapeToPrimers, $debug, $gaps_2file, $prefix)
   =checkUserInput( @ARGV );

my $ref_inline;
if ($escapeToPrimers ==1)
{
    pickPrimers ($reference, $query_file, $flank, $chk_uniq);
    exit;
}
#BEGIN
#-------------------------------------------------------------------------------
print_header();

$ref_inline = Ref_Inline($reference);
#Get length of the reference sequence
my $ref_len =  length($ref_inline);
print "PREPARING DATA FOR   $choice \n";

###################
# Running MUMmer   #
###################
my ($path_dir, $run_mum, $path_toPass);
if ($debug)
{
    print "the seed is  $seed \n";
    print "RedoMummer= ",$redoMummer."\n";
}
my @do_mum_return;
if ($redoMummer)
{
    @do_mum_return = doMummer($reference, $query_file, $choice,$sen,$seed,$min_id, $min_cov, $diff_cov,$min_len, $debug, $is_circular) or die "Couldn't run MUMmer\n";
}

####################################
# Processing tiling output         #
####################################
if ($debug) {
  print "Do tiling...\n";
}

#--------------------------------
my $mummer_tiling = $do_mum_return[0];
$path_dir = $do_mum_return[2];
$path_toPass = $do_mum_return[1];
##################
#Do Tiling
#-------------------------------------------
doTiling ($mummer_tiling, $path_toPass, $path_dir,$reference, $query_file, $choice, $prefix,$mlfas, $fasta_bin, $avoid_Ns, $ref_len, $gaps_2file, $ref_inline, $add_bin_2ps, $pick_primer, $flank, $chk_uniq, $tbx);


##########################################################################################################################################################
#################################Contig ordering ##########################################################
###########################

sub help
{

die <<EOF

***********************************************************************************
* ABACAS: Algorithm Based Automatic Contiguation of Assembled Sequences           *
*                                                                                 *
*                                                                                 *
*   Copyright (C) 2009 The Wellcome Trust Sanger Institute, Cambridge, UK.        *
*   All Rights Reserved.                                                          *
*                                                                                 *
***********************************************************************************

USAGE
abacas.pl -r <reference file: single fasta> -q <query sequence file: fasta> -p <nucmer/promer>  [OPTIONS]

	-r	reference sequence in a single fasta file
	-q	contigs in multi-fasta format
	-p	MUMmer program to use: 'nucmer' or 'promer'
OR
abacas.pl -r <reference file: single fasta>  -q <pseudomolecule/ordered sequence file: fasta> -e
OPTIONS
        -h              print usage
	-d		use default nucmer/promer parameters
	-s	int	minimum length of exact matching word (nucmer default = 12, promer default = 4)
	-m		print ordered contigs to file in multifasta format
	-b		print contigs in bin to file
	-N		print a pseudomolecule without "N"s
	-i 	int 	mimimum percent identity [default 40]
	-v	int	mimimum contig coverage [default 40]
	-V	int	minimum contig coverage difference [default 1]
	-l	int	minimum contig length [default 1]
	-t		run tblastx on contigs that are not mapped
	-g 	string (file name)	print gaps on reference to file name
	-a		append contigs in bin to the pseudomolecule
	-o	prefix  output files will have this prefix
	-P		pick primer sets to close gaps
	-f	int	number of flanking bases on either side of a gap for primer design (default 350)
        -R              Redo mummer [default 1, use -R 0 to avoid running mummer]
	-e 		Escape contig ordering i.e. go to primer design
	-c 		Reference sequence is circular

EOF
}
########
sub usage
{

die <<EOF

USAGE
abacas.pl -r <reference file: single fasta> -q <query sequence file: fasta> -p <nucmer/promer>  [OPTIONS]
	-r	reference sequence in a single fasta file
	-q	contigs in multi-fasta format
	-p	MUMmer program to use: 'nucmer' or 'promer'
for contig ordering and  primer design

OR
abacas.pl -r <reference file: single fasta>  -q <pseudomolecule/ordered sequence file: fasta> -e 1
to escape contig ordering and go directly to primer design

OR
abacas.pl -h  for help

EOF
}
########
##################
sub print_header
{
    print "
***********************************************************************************
* ABACAS: Algorithm Based Automatic Contiguation of Assembled Sequences           *
*                                                                                 *
*                                                                                 *
*   Copyright (C) 2009 The Wellcome Trust Sanger Institute, Cambridge, UK.        *
*   All Rights Reserved.                                                          *
*                                                                                 *
***********************************************************************************
\n";
}
#########################################
sub checkUserInput{
    my %options;
    getopts('hr:q:p:ds:mbNi:v:V:l:tg:ao:Pf:R:u:ecD', \%options);

     my ($help, $reference, $query_file, $choice, $sen, $seed, $mlfas, $fasta_bin, $avoid_Ns,
        $tbx, $min_id, $min_cov, $diff_cov, $min_len,$add_bin_2ps, $pick_primer,
        $flank, $chk_uniq,$redoMummer, $is_circular,$escapeToPrimers, $debug, $gaps_2file, $prefix);

    if($options{h}) {
        $help = $options{h};
        help();
    }
    else{
        $help =0;
    }
    if ($options{r} && $options{q} ){
        ($reference, $query_file) = ($options{r},$options{q});

    }
    else
    {
        usage() unless $options{e};
    }

    if ($options{p})
    {
        $choice =  $options{p};
        unless ($choice eq "nucmer" || $choice eq "promer"){
        print "Unknown MuMmer function\n Please use nucmer or promer\n";
        exit;
        }
    }
    else
    {
        usage() unless $options{e};
    }
    if ($options{e}) #$escapeToPrimers)
    {
        print_header();
        print "Primer design selected,... escaping contig ordering\n";
        $escapeToPrimers = 1;
        $chk_uniq = "nucmer";
        $choice = "";
    }
    else
    {
        $escapeToPrimers = 0;
    }
    if ($options{d}) {$sen =1;} else {$sen =0;} #print $sen , " ---sen\n";
    #print $options{t}, "\n"; exit;
    if($options{t}) {$tbx = 1;} else {$tbx = 0;}   #print $tbx, " ---tbx\n"; #
    unless ($options{s}){
        if ($choice eq "nucmer"){
            $seed = 12;
        }
        else{
            $seed =4;
        }
    }
    if ($options{m}){$mlfas =1;} else { $mlfas =0; } #print $mlfas , " ---mlfasta\n";
    if ($options{b}) { $fasta_bin =1;} else {$fasta_bin =0;}
    if ($options {N}) {$avoid_Ns =1;} else {$avoid_Ns=0;}

    unless($options{i}) {$min_id =40;}
    unless ($options{v}) {$min_cov =40;}
    unless ($options{V}) {$diff_cov =1;}
    unless ($options {l}) {$min_len = 1;}
    if ($options{a}) {$add_bin_2ps = 1; }else {$add_bin_2ps =0;}
    if ($options{P}) {$pick_primer=1;} else {$pick_primer =0;}
    if ($options{f}) {$flank = $options{f};} else {$flank = 1000;}
    if ($options{u}) {$chk_uniq = $options{u}; } else {$chk_uniq = "nucmer";}

    if($options{R}) {$redoMummer = $options{R}; }else {$redoMummer = 1;}
    unless ($options{c}) {$is_circular = 0;}
    if ($options{g}) {$gaps_2file = $options{g};} else {$gaps_2file ="";}
    if($options{o}) {$prefix = $options{o};} else {$prefix = "";}
    unless ($options{D}) {$debug =0};
    if ($tbx ==1 && $fasta_bin !=1)
    {
	print "ERROR:  Please use  -t -b  if you want to run tblastx on contigs in bin\n";
	exit;
    }
    #print $tbx, "\n"; exit;
    return ($help, $reference, $query_file, $choice, $sen, $seed, $mlfas, $fasta_bin, $avoid_Ns,
        $tbx, $min_id, $min_cov, $diff_cov, $min_len,$add_bin_2ps, $pick_primer,
        $flank, $chk_uniq,$redoMummer, $is_circular,$escapeToPrimers, $debug, $gaps_2file, $prefix);

}
#############
## get the reference sequence in one line
#--------------------------------------------------
sub Ref_Inline
{
	my $ref = shift;
	open (refFH, $ref) or die "Could not open file\n";
	my $seq ="";
	my @r = <refFH>;
        my $num_chr =0;
        foreach(@r){
            if ($_ =~ /\>/){
                $num_chr +=1;
            }
        }
        if ($num_chr > 1){
            print "\nERROR: Please use a single fasta reference file. You can simply merge chromosomes in to a union fasta file.\n\n";
            exit;
        }
	shift @r;
	foreach(@r){
	    chomp;
	    $seq = $seq.$_;
	}
	return $seq;
}
################
# Run mummer
#--------------------------------------------
sub doMummer
{
    my ($reference, $query_file, $choice, $sen,$seed,$min_id, $min_cov, $diff_cov,$min_len, $debug, $is_circular ) = @_;

    my $df = 'delta-filter';
    my $st = 'show-tiling';
    my $ask = 'which';
    my ($path_toPass, $run_mum); # params to return...
    my ($command, $Path, $dir) = checkProg($choice);
    my ($run_df, $df_path, $df_dir) = checkProg($df);
    my ($run_st, $st_path, $st_dir) = checkProg($st);
    my (@running, @deltaRes, @coordsRes);
    if ($choice eq "nucmer")
    {
	if ($sen ==0)
	{
	    @running = `$command --maxmatch -l $seed -p $choice $reference $query_file &> /dev/null`;
	    @deltaRes = `$run_df -q $choice.delta >$choice.filtered.delta`;
	    if ($is_circular == 1)
	    {
		@coordsRes = `$run_st -c -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`;
	    }
	    else
	    {
		@coordsRes = `$run_st -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`;

	    }
	}
	else
	{
	    @running = `$command -p $choice $reference $query_file &> /dev/null`;
	    @deltaRes = `$run_df -q $choice.delta >$choice.filtered.delta`;
	    if ($is_circular ==1) {@coordsRes = `$run_st -c -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`;}
	    else { @coordsRes = `$run_st -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`;}
	}

    }
    else
    {
	if ($sen ==0)
	{
	    @running = `$command  --maxmatch -l $seed -x 1 -p $choice $reference $query_file &> /dev/null`;
	    @deltaRes = `$run_df -q $choice.delta >$choice.filtered.delta`;
	    if ($is_circular == 1)
	    {
		@coordsRes= `$run_st -c -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`;
	    }
	    else
	    {
		@coordsRes= `$run_st -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`;
	    }
	}
	else
	{
	    @running = `$command -l $seed -p $choice $reference $query_file &> /dev/null`;
	    @deltaRes = `$run_df -q $choice.delta >$choice.filtered.delta`;
	    if ($is_circular == 1) {
		@coordsRes= `$run_st -c -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`;
	    }
	    else
	    {
		@coordsRes= `$run_st -i $min_id -v $min_cov -V $diff_cov -l $min_len -R -u unused_contigs.out $choice.filtered.delta > $choice.tiling`;
	    }
	}
    }
    my $Coordsfull= "$choice.tiling";
    return ($Coordsfull,$Path, $dir);
}

## #############################################################################
sub checkProg{ #checks if a given excutable is in the path...
    my $prog = shift;
    my $ask = 'which';
    my @check_prog = `$ask $prog`;
    my $path_toPass;
    my $path_dir;
    my $command;
    if (defined $check_prog[0] && $check_prog[0] =~ /$prog$/)
    {
	$path_toPass = $prog;
	$command = $prog;
    }
    else
    {
	print "\nENTER the directory for your ", $prog, " executables [default ./]:  ";
        my $path=<STDIN>;
        chomp $path;
	$path_dir = $path;
	if ($path_dir =~/\/$/)
	{
	    $path_dir = $path_dir;
	}
	else
	{
	    $path_dir = $path_dir."/";
	}
	my @final_check = `$ask $command`;
	if (exists $final_check[0] && $final_check[0] =~ /$prog$/)
	{
	    $command = $path_dir.$prog;
	    $path_toPass = $command;
	}
	else
	{
	   print "ERROR: Could not run  ", $prog, ", please check if it is installed in your path\n or provide the directory \n";
	   exit;
	}
    }

    return ($command, $path_toPass, $path_dir);
}

##############################
# converts a fasta file to an ordered single line
#--------------------------------------------------
sub Fasta2ordered
{
	if(  @_ != 1 )
	{
		print "Usage: Fasta2ordered <fasta_file>\n";
		exit;
	}
	my $fast = shift; #print $fast; exit;
	my @fasta = split (/\n/, $fast);
	if ($fasta[0] =~ /\>/ ) #remove chromosome name if exists in the onput sequence.
	{
        	my $ch_name = $fasta[0];
        	shift @fasta;
	}
	#print $fasta[0]; exit;
	foreach(@fasta){chomp;}
	my $num_lines = scalar(@fasta);
	my $dna = '';
	for(my $i=0; $i< $num_lines; $i+=1)
	{
                $dna = $dna.$fasta[$i];
	}
	my $ordered_dna = $dna;
	return $ordered_dna;
}
##############################################################
# Hash input contigs
#------------------------------------------------

sub hash_contigs {
  if(  @_ != 1 )
	{
	  print "Usage: hash_contigs contigs_file";
	  exit;
	}
  my $contigs_file = shift;
  if( $contigs_file =~ /\.gz$/ )
	{
	  open(CONTIGS, $contigs_file, "gunzip -c $contigs_file |" ) or die "Cant open contigs file: $!\n";
	}
  else
	{
	  open( CONTIGS, $contigs_file) or die "Cant open contigs file: $!\n";
	}

  my %contigs_hash; # hash to store contig names and sequences
  my $contigName;


  while (<CONTIGS>) ##add error checking...
	{
	  if (/^>(\S+)/) {
		$contigName=$1;
	  }
	  else {
		chomp;
		$contigs_hash{$contigName} .= $_;
	  }
	}
  close(CONTIGS);
  #tdo
  ## check if qual exists
  my %contigs_qual_hash;


  if (-r "$contigs_file.qual" or -r "$contigs_file.qual.gz") {
	if( -r "$contigs_file.qual.gz" )
	  {
		open(CONTIGS, "$contigs_file.qual.gz", "gunzip -c $contigs_file.qual.gz |" ) or die "Cant open contigs file: $!\n";
	  }
	else
	  {
		open( CONTIGS, "$contigs_file.qual" ) or die "Cant open contigs file: $!\n";
	  }


	while (<CONTIGS>) {
	  if (/^>(\S+)/) {
		$contigName=$1;
	  }
	  else {
		chomp;
		$contigs_qual_hash{$contigName} .= $_;
	  }
	}

  } # end tdo # end if exist


  return (\%contigs_hash,\%contigs_qual_hash);
}
#######################
##############################
### it gets a delta name
sub getMummerComparison{
  my $deltaName = shift;


  ### transform the delta file to coords
  my $call ="show-coords -H  -T -q  $deltaName >  $deltaName.coords ";
  !system(" $call") or die "Problems doing the show-coords comparison: $call $!\n";


  ### willh old results
  my %h;

  ### has as index the postion with the max hits
  my %h_max;
  my $tmp=0;
  my $tmp_index;
  my $key='';
  my $is_promer =0;
  if ($deltaName =~/^promer/)
  {
     $is_promer =1;
  }

  open (F,"$deltaName.coords") or die "Problem in getComparisonFile to open file $deltaName.coords\n";
  my @File=<F>;
  my @a;
  @a=split(/\s+/,$File[0]);
  $tmp=$a[5];
  $tmp_index=$a[0];
  if ($is_promer ==1)
  {
    $key=$a[12]; ## nucmer:  $key = $a[8]
  }
  else
  {
    $key = $a[8];
  }

  foreach (@File) {
	@a=split(/\s+/);

        if ($is_promer ==1)
        {
            push @{ $h{$a[12]}}, "$a[12]\t$a[11]\t$a[7]\t$a[5]\t0\t0\t$a[2]\t$a[3]\t$a[0]\t$a[1]\t1\t$a[5]\n";
            if ($key eq $a[12] and $a[5]>$tmp)
            {
              $tmp=$a[5];       # length
              $tmp_index=$a[0]; # position reference
            }
            elsif ($key ne $a[12]) {
              ### here possible bugg...
              $h_max{$tmp_index}=$key;
              $key=$a[12];
              $tmp=$a[5];       # length
              $tmp_index=$a[0]; # position reference

            }
        } #end if
        #else i.e. if nucmer
        else
        {
            push @{ $h{$a[8]}}, "$a[8]\t$a[6]\t$a[6]\t$a[5]\t0\t0\t$a[2]\t$a[3]\t$a[0]\t$a[1]\t1\t$a[5]\n";
            if ($key eq $a[8] and $a[5]>$tmp)
            {
              $tmp=$a[5];       # length
              $tmp_index=$a[0]; # position reference
            }
            elsif ($key ne $a[8]) {
              ### here possible bugg...
              $h_max{$tmp_index}=$key;
              $key=$a[8];
              $tmp=$a[5];       # length
              $tmp_index=$a[0]; # position reference

            }
        }

  }
  $h_max{$tmp_index}=$key;
#  print Dumper %h_max;

  return (\%h,\%h_max);
}
##################################
###########################
sub writeBinContigs2Ref{
  my $nameBin = shift;
  my $name    = shift;

  open (F, "$nameBin") or die "Couldn't find file $nameBin: $!\n";

  my @ar;

  my $count=0;

  while (<F>) {
	push @ar, $_;
	$count++;
  }
    #### sa4: added error checking:- if file is empty
    if (scalar(@ar) < 1)
    {
        print "No contigs in unusedcontigs file\n";
        $count = 0;

    }
    else
    {
        open (F, "> $name.notMapped.contigs.tab") or die "Couldn't write file $name.tab: $!\n";
        print F doArt(\@ar);
        close(F);
    }
    return $count;

}


##############################
sub doArt{
  my ($ref) = @_;


  ## hash of array with all positions of the contig
  my %Pos;

  ## Hash with note of result line of nucmer
  my %lines;

  foreach (@$ref) {
	chomp;
	my @ar=split(/\t/);
	push @{ $Pos{$ar[12]}}, "$ar[0]..$ar[1]";
	$lines{$ar[12]} .= "FT                   $_\n";
  }

  my $res;

  foreach my $contig (keys %lines) {

	if (scalar(@{ $Pos{$contig} } >1)) {
	  my $tmp;

	  foreach (@{ $Pos{$contig} }) {
		$tmp.="$_,";
	  }
	  $tmp =~ s/,$//g; # get away last comma
	  $res .= "FT   contig          join($tmp)\n";
	}
	else {
	  $res .= "FT   contig          $Pos{$contig}[0]\n";
	}
	$res .=   "FT                   /systematic_id=\"$contig\"\n";
	$res .=   "FT                   /note=\"Contig $contig couldn't map perfectly.\n";
	$res .= $lines{$contig};
	$res .= "FT                   \"\n";

  }

  return $res;
}

##########################################################################
#---------------------------
sub makeN #creates a string of Ns
{
	my $n = shift;
	my $Ns= "N";
	for (my $i =1; $i < $n; $i+=1)
	{
		$Ns = $Ns."N";
	}
	return $Ns;
}

###########################################################################
## reverse complement a sequence
#---------------------------------------------
sub revComp {
  my $dna = shift;
  my $revcomp = reverse($dna);

  $revcomp =~ tr/ACGTacgt/TGCAtgca/;

  return $revcomp;
}

################################################################################
### function to visualize rep. regions in Reference genome.
sub findRepeats
{
    my $reference        = shift;
    my $name             = shift;
    my $path_prog        = shift;

    # get path
    my ($path_coords) = $path_prog;

    $path_coords =~ s/nucmer/show-coords/;

    my $call = "$path_prog --maxmatch -c 100 -b 40 -p $name.repeats -l 25 $reference $reference  &> /dev/null ";
    !system("$call") or die "Problems doing the nucmer comparison: $call $!\n";
    $call ="$path_coords -r -c -l  $name.repeats.delta >  $name.repeats.coords ";
    !system(" $call") or die "Problems doing the show-coords comparison: $call $!\n";

    my @Res;
    open (F, "$name.repeats.coords" ) or die "Problems to open file $reference.repeats.coords: Is MUMmer installed correctly and inserted in the PATH environment variable? ($!)\n";
    $_=<F>; $_=<F>; $_=<F>; $_=<F>;$_=<F>;
    while (<F>) {
          my @ar = split(/\s+/);
          if (!($ar[1] == $ar[4] or $ar[2] == $ar[5] or $ar[7] > 100000)) { # to exclude self alignment

            foreach ($ar[1]..$ar[2]) {
                  $Res[($_-1)]++;
            }
          }
    }

    ### write the result to the plot file

    my $res;
    foreach (@Res){
          if (defined($_)) {
            $res .= "$_\n";
          }
          else {
            $res .= "0\n";
          }
    }

    open (F, "> $name.Repeats.plot") or die "Couldn't open file $name.plot to write: $! \n";
    print F $res;
    close(F);

    ### delete files
    unlink("$name.repeats.delta");
    unlink("$name.repeats.coords");
    unlink("$name.repeats.cluster");
}

################################################################################
# reverse a list of qualities
#------------------------------
sub reverseQual{
  my $str = shift;

  $str =~ s/\s+$//g;

  my @ar = split(/\s/,$str);
  my $res;

  for (my $i=(scalar(@ar)-1);$i>=0;$i--) {
	$res.="$ar[$i] ";
  }
  return $res;
}

##############################################################################
# ----------------
sub getPosCoords{
  my $ref_ar = shift;
  my $contig = shift;

  my $offset = shift;
  my $res;
  #  print Dumper $$ref_ar{$contig};
  foreach (@{$$ref_ar{$contig}}) {
	print "in getPos Coords: $_\n";;

	my @ar=split(/\t/);
	$ar[6]+=$offset;
	$ar[7]+=$offset;
	$res .= join("\t",@ar);;
  }

  return $res;

}
#############################################################################
# -----------------------------
sub getPosCoordsTurn{
  my $ref_ar = shift;
  my $contig = shift;

  my $offset = shift;


  my $res;

  #  print Dumper $$ref_ar{$contig};
  foreach (@{$$ref_ar{$contig}}) {
	my @ar=split(/\t/);
	my $tmp_8=$ar[8];
	my $tmp_9=$ar[9];

	$ar[8]=$ar[6]+$offset;
	$ar[9]=$ar[7]+$offset;
	$ar[6]=$tmp_8;
	$ar[7]=$tmp_9;

	## change query subject

	$res .= join("\t",@ar);;
  }

  return $res;

}

############################################################################
#------------------------
sub printStats{

  my ($num_fortillingcontigs,$num_notsetTilling,$num_mapped, $num_contigs, $num_inComparisoncontigs, $ref_len, $total_bases_mpd) = @_;
  $num_fortillingcontigs=$num_notsetTilling+$num_mapped;
  my $res;
  $res.= "Short statistics of run.\n";
  $res.= "$num_contigs\t\tcontigs entered to be mapped against the reference.\n";
  $res.= sprintf("$num_inComparisoncontigs\t%0.2f \%\tmade a hit against the reference to the given parameter (-s -d etc)\n",($num_inComparisoncontigs*100/$num_contigs));
  $res.= sprintf("$num_fortillingcontigs\t%0.2f \%\twere considered for the tilling graph (-s -d etc)\n",($num_fortillingcontigs*100/$num_contigs));
  $res.= sprintf("$num_mapped\t%0.2f \%\tare mapped in the tilling graph (-s -d etc)\n",($num_mapped*100/$num_contigs));
  $res.= sprintf("\nCoverage: The reference is $ref_len long. Up to $total_bases_mpd bp (%0.2f \%) are covered by the contigs (This doesn't mean that these regions are really similar...)\n",($total_bases_mpd*100/$ref_len));

  print $res;

}
##################################################
#### Do Tiling
#----------------------------------------------------
sub doTiling {

    my ($mummer_tiling, $path_toPass, $path_dir,$reference, $query_file, $choice, $prefix,$mlfas, $fasta_bin, $avoid_Ns, $ref_len, $gaps_2file, $ref_inline, $add_bin_2ps, $pick_primer, $flank, $chk_uniq, $run_blast) = @_;

    ### these are also defined in the main script.... to be changed!!
    my ($num_contigs , $num_inbincontigs, $avg_cont_size,$num_overlaps , $num_gaps,
    $num_mapped,$total_bases_mpd,$p_ref_covered,$num_ambigus,$num_inComparisoncontigs,
    $num_fortillingcontigs,$num_notsetTilling)= (0,0,0,0,0,0,0,0,0,0,0,0);

    my ($href, $ref_contigs_qual) = hash_contigs($query_file);
    my $qualAvailable=0;
    my %contigs_hash = %{$href};
    my @c_keys = keys %contigs_hash;
    $num_contigs = scalar(@c_keys);
    my @cont_lens;
    my (@ids ,$id, $id_len);
    my (@Rs, @Re, @G, @Len, @cov, @pid, @orient, @Cid);
    my (@Ps, @Pe);
    my ($total);
    my $g; #define gap size between contigs
    my $tiling_gap; #gap size from tiling graph
    open (TIL, $mummer_tiling) or die "Could not open $mummer_tiling: $!";
    while (<TIL>)
    {
            chomp;
            if ($_ =~/^>/)
            {
                    my $line = substr $_, 1;
                    my @splits = split /\s+/, $line;
                    $id = $splits[0];
                    push @ids, $id;
                    $id_len= $splits[1];
            }
            else
            {
                    my @splits = split /\s+/, $_;
                    push @Rs, $splits[0];
                    push @Re, $splits[1];
                    push @G, $splits[2];
                    push @Len, $splits[3];
                    push @cov, $splits[4];
                    push @pid, $splits[5];
                    push @orient, $splits[6];
                    push @Cid, $splits[7];
            }

    }
    close (TIL);
    if (scalar(@Rs) != scalar(@Re))
    {
            print "ERROR: unequal array size\n";
            exit;
    }
    else
    {
            $total = scalar(@Rs);
            $num_mapped = scalar(@Rs);
    }
    my $ref_loc = $reference;  # get locations of reference and query files
    my $qry_loc = $query_file;
    my $dif_dir =0; 	#assume query and reference are in the working directory
    my @splits_reference = split (/\//, $reference);
    my $new_reference_file = $splits_reference[(scalar(@splits_reference)-1)];
    my @splits_query = split (/\//, $query_file);
    my $new_query_file = $splits_query[(scalar(@splits_query)-1)];
    if ($prefix eq "")
    {
        $prefix = $new_query_file."_".$new_reference_file;
    }
    #-------------------------------------------------------------------
    #define file handles for output files and open files to write output
    #-------------------------------------------------------------------
    my ($seqFH,$tabFH,$binFH,$crunchFH, $gapFH, $mlFH, $dbinFH, $avoidNFH);
    open ($seqFH, '>', $prefix . '.fasta') or die "Could not open file $prefix.fasta for write: $!\n";
    open ($tabFH, '>', $prefix . '.tab') or die "Could not open file $prefix.tab for write: $!\n";
    open ($binFH, '>', $prefix . '.bin') or die "Could not open file  $prefix.bin for write: $!\n";
    open ($crunchFH, '>', $prefix . '.crunch') or die "Could not open file $prefix.crunch for write: $!\n";
    open ($gapFH, '>', $prefix . '.gaps') or die "Could not open file $prefix.gaps for write: $!\n";
    if ($mlfas ==1)
    {
        open ($mlFH, '>', $prefix . '.contigs.fas') or die "Could not open file $prefix.contigs.fas for write: $!\n";
    }
    if ($fasta_bin ==1)
    {
        open ($dbinFH, '>', $prefix . '.contigsInbin.fas') or die "Could not open file $prefix.contigsInbin.fas for write: $!\n";
    }
    if ($avoid_Ns ==1)
    {
        open ($avoidNFH, '>', $prefix .'.NoNs.fasta') or die "Could not open file $prefix.NoNs.fasta for write: $!\n";
    }
    #-------------------------------------------------------------------------
    # Writing tiling graph and generating a pseudomolecule
    # Note use use ps for pseudomolecule
    #@Ps = start of ps, and @Pe = end of ps
    my $ps_start =1;
    $Ps[0] = 1;
    $Pe[0] = $Ps[0] + $Len[0] -1;
    my $tmp_qual;
    my $tmp_nqual;
    my $tmp_seq ="";
    my $tmp_nseq ="";
    print "Total contigs = $total \n";

    #------------------------------------------------------------
    # The 'for loop' loops over each contig in the Tiling output
    #Writing to file is done for each contig to speed up the process
    #This part could potentially be a separate subroutine

    print $tabFH "ID   ",$id, "\n";
    print $seqFH ">", "ordered_", $id, "\n";
     for (my $i=1; $i <= $total; $i+=1)
    {
        my $covv =sprintf("%.0f",$cov[$i -1]); #ROUNDING
        my $pidd = sprintf("%.0f", $pid[$i -1]);
        my ($contig_coord, $color, $contig_seq);
        my $contig_qual='';
        $tiling_gap = $G[$i -1];
        if ($tiling_gap <= 5){ #insert 100Ns for overlaps and gaps of size less than or equal to 5bp
            $g = 99; # default gap size to
        }
        else{
            $g = $tiling_gap;
        }
        if (defined($Len[$i]))
	{
		  $Ps[$i] =  $Pe[$i-1] +$g +1;
		  $Pe[$i] = $Ps[$i] + $Len[$i] -1;
		  $total_bases_mpd+=$Len[$i];
	}

        if ($Rs[$i -1] <0) #check if a reference starting position is less than 0
	{
            $Rs[$i -1] =1;
	}

        if($orient[$i-1] eq "+")
        {
            $contig_coord = $Ps[$i -1]."..".$Pe[$i-1];
            $color = 4;
            $contig_seq = $contigs_hash{$Cid[$i-1]};
        }
        else
        {
            $contig_coord = "complement(".$Ps[$i -1]."..".$Pe[$i-1].")";
	    $color =3;
	    $contig_seq = revComp($contigs_hash{$Cid[$i-1]}); #REVERSE COMPLEMENT A SEQUENCE

        }
        push (@cont_lens, length($contig_seq));

        # tdo
        if (defined($$ref_contigs_qual{$Cid[$i-1]})) {
            ## flag to know, that the qual exists
            $qualAvailable=1;
            $contig_qual = $$ref_contigs_qual{$Cid[$i-1]};
        }
        $tmp_qual .= $contig_qual;
        $tmp_seq .= $contig_seq;
        if ($avoid_Ns ==1)
        {
            $tmp_nseq.= $contig_seq;
            #tdo
            $tmp_nqual .= $contig_qual;
        }
        if ($mlfas ==1)
        {
		my $head = $Cid[$i-1];
            my $multifasta_seq = write_Fasta_headers ($contig_seq,$head);
            print $mlFH $multifasta_seq;
        }
        if ($Re[$i -1] > $ref_len)
	{
		$Re[$i -1] = $ref_len -1;
	}
	if ($Pe[$i -1] > length($tmp_seq))
	{
		$Pe[$i -1] = length($tmp_seq);
	}

        #-----------------------------------------
        print $crunchFH $covv, " ", $pidd, " ", $Ps[$i -1], " ", $Pe[$i -1], " ", $Cid[$i -1], " ", $Rs[$i -1], " ", $Re[$i-1], " ", "unknown NONE\n";

        #WRITE FEATURE FILE
	print $tabFH "FT   contig          ",$contig_coord, "\n";
	print $tabFH "FT                   ", "/systematic_id=\"", $Cid[$i-1],"\"","\n";
	print $tabFH "FT                   ", "/method=\"", "mummer\"", "\n";
        print $tabFH "FT                   ", "/Contig_coverage=\"",$cov[$i -1], "\"", "\n";
        print $tabFH "FT                   ", "/Percent_identity=\"",$pid[$i -1], "\"", "\n";
        if ($tiling_gap > 1)       #WRITE GAP LOCATIONS AND SIZE TO FILE
	{
            my $gap_start = $Pe[$i -1] +1;
	    my $gap_end = "";
            if (defined $Ps[$i])
            {
                $gap_end = $Ps[$i] -1;
            }
            my $ref_start = $Re[$i -1] +1;
            my $ref_end;
            if (defined $Rs[$i])
            {
                $ref_end =$Rs[$i]-1;
            }
            else
            {
                $ref_end = "END";
            }
            print $gapFH "Gap\t",$tiling_gap, "\t", $gap_start, "\t", $gap_end, "\t", $ref_start, "\t", $ref_end,"\n";
            if ($gaps_2file ne "" && $ref_start < $ref_len)
	    {
                    my $ref_gapsFH;
                    open ($ref_gapsFH, '>', $gaps_2file.'.Gaps_onRef') or die "Could not open file $gaps_2file.Gaps_onRef for write: $!\n";
		    my $gapOnref = substr ($ref_inline, $ref_start, $g);
		    print $ref_gapsFH ">",$g,"_",$ref_start, "\n";
                    my $file_toPrint = write_Fasta ($gapOnref);
                    print $ref_gapsFH $file_toPrint;
            }
        }
        else
        {
            $color = 5;
	    print $tabFH "FT                   ", "/Overlapping=\"", "YES\"", "\n";
        }
        my $ns = makeN($g);
        $tmp_seq = $tmp_seq.$ns;
        #tdo
        for (1..length($ns)) {
          $tmp_qual .= "0 ";
        }
        print $tabFH "FT                   ", "/colour=\"",$color, "\"", "\n";
    }
    #------------------------------------------------------------------
    #tdo
    my @Quality_Array;
    if ($qualAvailable) {
        @Quality_Array = split(/\s/,$tmp_qual);
        my $res;
        foreach (@Quality_Array) {
            $res .= "$_\n";
        }
        ## get name
        my @splits_query = split (/\//, $query_file);
        $new_query_file = $splits_query[(scalar(@splits_query)-1)];
        open (F,"> $new_query_file.qual.plot") or die "problems\n";
        print F $res;
        close(F);
    }
    ##WRITE PSEUDOMOLECULE WITHOUT 'N's
    #--------------------------------------
    if ($avoid_Ns ==1)
    {
        print $avoidNFH ">", "ordered_", $id, "without 'N's","\n";
	my $toWrite = write_Fasta ($tmp_nseq);
        print $avoidNFH $toWrite;
    }
    ####################################
    #WRITE CONTIGS WITH NO HIT TO FILE #
    #################################
    my %Cids;

    foreach(@Cid)
    {
            chomp;
            $Cids{$_} = 1;
    }
    my @contigs_2bin = ();
    my %h_contigs_2bin;

    foreach (@c_keys)
    {
        push(@contigs_2bin, $_) unless exists $Cids{$_};

    }
    foreach(@contigs_2bin)
    {
      $h_contigs_2bin{$_}=1;

      print $binFH "$_ \n";

    }
    $num_inbincontigs= scalar(@contigs_2bin);

    ########
    # WRITE PSEUDOMOLECULE TO FILE
    #----------------------------------
    my $new_seq = $tmp_seq;
    my $prev_len = length($tmp_seq);
    my $total_len = $prev_len;
    foreach (@contigs_2bin)
    {
        chomp;
        #my $binseq = $contigs_hash{$contigs_2bin[$i]};
        my $l = length ($contigs_hash{$_});
        $total_len +=$l;
    }
    if ($add_bin_2ps ==1) #appending unmapped contigs to pseudomolecule
    {

        for (my $i =0; $i < scalar(@contigs_2bin); $i+=1)
        {
            my $binseq = $contigs_hash{$contigs_2bin[$i]};
            $new_seq .=$contigs_hash{$contigs_2bin[$i]};
            my $len_current_contig = length($binseq);
            my $start = $prev_len +1;
            my $end = $start + $len_current_contig -1;
            my $col = 7;
            if ($start > $total_len)
            {
                    $start = $total_len;
            }
            if ($end >$total_len)
            {
                    $end = $total_len;
            }
            my $co_cord = $start."..".$end;
            my $note = "NO_HIT";
            print $tabFH "FT   contig          ",$co_cord, "\n";
            print $tabFH "FT                   ", "/systematic_id=\"", $contigs_2bin[$i],"\"","\n";
            print $tabFH "FT                   ", "/method=\"", "mummer\"", "\n";
            print $tabFH "FT                   ", "/colour=\"",$col, "\"", "\n";
            print $tabFH "FT                   ", "/", $note, "\n";
            $prev_len= $end;
        }
    }
    my $to_write = write_Fasta ($new_seq);
    print $seqFH $to_write;
    ########
    #WRITE CONTIGS IN BIN TO FILE   #
    #------------------------------------------------------
    if ($fasta_bin ==1)
    {
        foreach(@contigs_2bin)
        {
            print $dbinFH ">", $_, "\n";
            my $to_write = write_Fasta($contigs_hash{$_});
            print $dbinFH $to_write;
        }
    }
        #unlink ("$choice.delta");
        #unlink ("$choice.filtered.delta");
        #unlink ("$choice.cluster");
        #unlink ("$choice.tiling");
    #PRINT FINAL MESSAGE
    print " FINISHED CONTIG ORDERING\n";
    print "\nTo view your results in ACT\n\t\t Sequence file 1: $new_reference_file\n\t\t Comparison file 1: $prefix.crunch\n\t\t Sequence file 2: $prefix.fasta\n
    \t\tACT feature file is: $prefix.tab\n
    \t\tContigs bin file is: $prefix.bin\n
    \t\tGaps in pseudomolecule are in: $prefix.gaps\n\n";

    #Run tblastx....
    if ($run_blast ==1)
        {
            print "Running tblastx on contigs in bin...\nThis may take several minutes ...\n";
            my $formatdb = 'formatdb -p F -i' ;
#		my @formating = `
            !system("$formatdb $new_reference_file") or die "ERROR: Could not find 'formatdb' for blast\n";
            my $blast_opt = 'blastall  -m 9 -p tblastx -d ';
            my $contigs_inBin = $prefix.'.contigsInbin.fas';
#			my @bigger_b = `
                !system("$blast_opt $new_reference_file -i $contigs_inBin -o blast.out") or die "ERROR: Could not find 'blastall' , please install blast in your working path (other dir==0)\n$blast_opt $new_reference_file -i $contigs_inBin -o blast.out\n \n";
        }


    if ($pick_primer == 1)
    {
        print " DESIGNING PRIMERS FOR GAP CLOSURE...\n";
        my $qq = "$prefix.fasta";
        pickPrimers($qq, $reference, $flank, $path_toPass, $chk_uniq,$qualAvailable,@Quality_Array);
    }
}


#------------------------------
sub write_Fasta {
    my $sequence = shift;
    my $fasta_seq ="";
    my $length = length($sequence);
    if ($length <= 60)
    {
      $fasta_seq = $sequence ;
    }
    elsif ($length> 60 )
    {
        for (my $i =0; $i < $length; $i+=60)
        {
            my $tmp_s = substr $sequence, $i, 60;
            $fasta_seq .= $tmp_s."\n";
        }
    }

    return $fasta_seq;
}

sub write_Fasta_headers {
    my $sequence = shift;
	my $header = shift;
	    my $fasta_seq =">$header\n";
    my $length = length($sequence);
    if ($length <= 60)
    {
      $fasta_seq.= $sequence ;
    }
    elsif ($length> 60 )
    {
        for (my $i =0; $i < $length; $i+=60)
        {
            my $tmp_s = substr $sequence, $i, 60;
            $fasta_seq .= $tmp_s."\n";
        }
    }

    return $fasta_seq;
}

#---------------------------------------------- END OF CONTIG ORDERING SUBROUTINES----------------------------------------------------

#----------------------------------------------- PRIMER DESIGN ---------------------------------------------------
sub pickPrimers
{
          #$ps = pseudo molecule,$rf = reference, $flan = flanking region size
          my ($ps,$rf, $flan, $passed_path, $chk_uniq,$qualAvailable, @Quality_Array);
          if (@_==4){
                  ($rf,$ps, $flan, $chk_uniq) = @_;
                   print "Primers without ordering..\n";
                   print $rf;
                   $passed_path = "nucmer";
                   $qualAvailable =0;
                   @Quality_Array = [];
          }
          else #(@_ == 7)
          {
                ($ps,$rf, $flan, $passed_path, $chk_uniq, $qualAvailable, @Quality_Array) = @_;
          }

          my $dna='';
          my @gappedSeq;
          my $records='';
          my @sequence;
          my $input='';
	  #tdo
	  my @gappedQual;
	  #my $quality='';
          my $path_toPass = $passed_path;
          my @fasta;
          my $query='';
          my @exc_regions;
          my $ch_name;
          #my $flank = $flan;
          open (FH, $rf) or die "Could not open reference file\n";
          open (FH2, $ps) or die "Could not open query/pseudomolecule file\n";
          my $ref; #print ".... ", $rf; exit;
          my @r = <FH>;
          my @qry = <FH2>;
          my $dn = join ("", @qry);
          $ref = join ("", @r);
          $dna = Fasta2ordered ($dn);
          #check if primer3 is installed
          my $pr3 = "primer3_core";
          my ($pr3_path, $pr3_Path, $pr3_path_dir) = checkProg ($pr3);
          #my @check_prog = `which primer3_core`;
          open (PRI, '>primer3.summary.out') or die "Could not open file for write\n";

         #

          #print $ref; exit;
          #PARSING FOR PRIMER3 INPUT
          my ($opt,$min,$max,$optTemp,$minTemp,$maxTemp,$flank,$lowRange,$maxRange,$gcMin,$gcOpt,$gcMax,$gclamp,$exclude,$quality) = getPoptions($qualAvailable);
          my ($gap_position,@positions, %seq_hash);

          my $exc1 = $flank -$exclude;  #start of left exclude
          print "Please wait... extracting target regions ...\n";
          #regular expression extracts dna sequence before and after gaps in sequence (defined by N)
          while($dna=~ /([atgc]{$flank,$flank}N+[atgc]{$flank,$flank})/gi)
          {
	        $records= $1;
			push (@gappedSeq, $records);
			$gap_position = index($dna, $records);
			push @positions, $gap_position;
			$seq_hash{$gap_position}=$records;
			#dna
			if ($qualAvailable) {
			  my $res;
			  for (my $nn=($gap_position-1); $nn <= ($gap_position-1+length($records)-1); $nn++) {
				$res.="$Quality_Array[$nn] ";
			  }
			  push @gappedQual, $res;
			}
          }
        #loop prints out primer targets into a file format accepted by primer3
        my $count=1;
        my $identify='';
        my $seq_num = scalar @gappedSeq;
        my $name= " ";

        my ($totalp, @left_name, @right_name, @left_seq, @right_seq);

        my ($leftP_names, $rightP_names, $leftP_seqs, $rightP_seqs, $leftP_start, $leftP_lens, $rightP_ends, $rightP_lens,$left_Exclude,$right_Exclude, $primers_toExc, $prod_size)=
        ("","","","","","","","","","", "", "");

        print $seq_num, " gaps found in target sequence\n";
        print "Please wait...\nLooking for primers...\n";
        print "Running Primer3 and checking uniquness of primers...\nCounting left and right primer hits from a nucmer mapping (-c 15 -l 15)\n";

        for (my $i=0; $i<$seq_num; $i+=1)
        {
                    $identify = $count++;
                    if (defined $ch_name)
                    {
                            $name = $ch_name;
                    }
                    my $len = length($gappedSeq[$i]);
                    my $exc2 = $len - $flank;
                    open(FILE, '>data') or die "Could not open file\n";
                    #tdo
                    my $qual='';
                    if ($qualAvailable) {
                      $qual="PRIMER_SEQUENCE_QUALITY=$gappedQual[$i]\nPRIMER_MIN_QUALITY=$quality\n";
                    }

#WARNING: indenting the following lines may cause problems in primer3
print FILE "PRIMER_SEQUENCE_ID=Starting_Pos $positions[$i]
SEQUENCE=$gappedSeq[$i]
PRIMER_OPT_SIZE=$opt
PRIMER_MIN_SIZE=$min
PRIMER_MAX_SIZE=$max
PRIMER_OPT_TM=$optTemp
PRIMER_MIN_TM=$minTemp
PRIMER_MAX_TM=$maxTemp
PRIMER_NUM_NS_ACCEPTED=1
PRIMER_PRODUCT_SIZE_RANGE=$lowRange-$maxRange
PRIMER_MIN_GC=$gcMin
PRIMER_GC_CLAMP =$gclamp
PRIMER_OPT_GC_PERCENT=$gcOpt
PRIMER_MAX_GC=$gcMax
PRIMER_INTERNAL_OLIGO_EXCLUDED_REGION=$exc1,$exclude $exc2,$exclude
".$qual."Number To Return=1
=\n";
close FILE;

        #runs primer3 from commandline

        ################# NOTE: PRIMER3 SHOULD BE IN YOUR WORKING PATH #########

                    my  @Pr3_output = `$pr3_path -format_output <data`;
                    #print $positions[$i], "\t", $i, " ", $path_toPass, "  ", $rf, $exc1, " ",$exc2, "\n";
                    my $fil = join (":%:", @Pr3_output);
                   my ($uniq_primer, $string,$left_nm,$right_nm,$left_sq, $right_sq,$left_strt,$left_ln, $right_End,$right_ln,$primers_toExclude, $product_size)
                    = check_Primers ($fil, $positions[$i], $i,$path_toPass, $rf, $exc1, $exc2);

                    print PRI $string;
                    if ($uniq_primer ==1)
                    {
                              $leftP_names.=$left_nm."\n";
                              $rightP_names.=$right_nm."\n";
                              $leftP_seqs.=$left_sq."\n";
                              $rightP_seqs.=$right_sq."\n";
                              $leftP_start.=$left_strt."\n";
                              $leftP_lens.=$left_ln."\n";
                              $rightP_ends.=$right_End."\n";
                              $rightP_lens.=$right_ln."\n";
                              $left_Exclude.=$exc1."\n";
                              $right_Exclude.=$exc2."\n";
                              $prod_size.=$product_size."\n";
                    }
                    if ($primers_toExclude ne "")
                    {
                              $primers_toExc.= $primers_toExclude; #."\n";
                    }

          }
        write_Primers ($leftP_names, $rightP_names, $leftP_seqs, $rightP_seqs, $leftP_start, $leftP_lens, $rightP_ends, $rightP_lens,$primers_toExc,$left_Exclude,$right_Exclude, $prod_size);
        #write_Primers (@left_name, @right_name, @left_seq, @right_seq,@left_start, @left_len, @right_end, @right_len, @left_exclude, @right_exclude, $primers_toExclude);

}

#checks the uniqueness of primers
#input an array with promer3 output for each gap
sub check_Primers
{

          my ($fil, $position, $index,$path_toPass, $rf, $exc1, $exc2) = @_;
          my  @Pr3_output = split /:%:/, $fil;
          my ($left_name, $right_name, $left_seq, $right_seq, $left_start,$left_len,$right_end,$right_len,$left_exclude,$right_exclude) = ("", "", "", "", "", "", "", "", "", "");
          my $primers_toExclude ="";
          my $product_size ="";
          my $string ="";
          my $uniq_primer = 0;
          $string.="=========================================================================================\n";
          $string.="Primer set for region starting at ".$position."\n";

          if (defined $Pr3_output[5] && defined $Pr3_output[6])
          {
                    if ($Pr3_output[5]=~ /LEFT PRIMER/)
                    {
                           # print $Pr3_output[5];
                            #check uniquness of primer against the genome
                            my @splits_1 = split (/\s+/, $Pr3_output[5]);
                            my $left_primer = $splits_1[8];
                            my $left_st = $splits_1[2];
                            my $left_length = $splits_1[3];

                            my @splits_2 = split (/\s+/, $Pr3_output[6]);
                            my $right_primer = $splits_2[8];
                            my $right_st = $splits_2[2];
                            my $right_length = $splits_2[3];

                            open (QRY_1, '>./left_query'); # open a file for left primers
                            print QRY_1 ">left_01\n";    #
                            print QRY_1 $left_primer,"\n";
                            open (QRY_2, '>./right_query');
                            print QRY_2 ">right_01\n";
                            print QRY_2 $right_primer,"\n";

                            my ($left_count, $right_count);
                            #if ($chk_uniq eq "nucmer")
                            #{
                                my $options = "-c 15 --coords  -l 15 ";
                                my $rq = "right_query";
                                my $lq = "left_query";
                                my (@right_ps, @left_ps);
                               # print $path_toPass, "\t", $options, "\n";


                                my @Rrun = `$path_toPass $options -p R $rf  $rq &> /dev/null`;
                                print ".";
                                my $f1 = "R.coords";
                                open (RP, $f1) or die "Could not open file $f1 while checking uniqueness of right primer\n";
                                  while (<RP>)
                                {
                                    chomp;
                                    if ($_ =~ /right_01$/)
                                    {
                                        push @right_ps, $_;
                                    }
                                }
                                close (RP);
                                my @Lrun = `$path_toPass $options -p L $rf  $lq &> /dev/null`;
                                print ".";
                                my $f2 = "L.coords";
                                open (LQ, $f2) or die "Could not open file $f2\n";
                                while (<LQ>)
                                {
                                    chomp;
                                    if ($_ =~ /left_01$/)
                                    {
                                        push @left_ps, $_;
                                    }
                                }
                                close (LQ);
                                $right_count = scalar (@right_ps);
                                $left_count = scalar(@left_ps);
                              #check if a primer is not in the excluded region::
                              my $primer_NearEnd =0;
                              if ($left_st > $exc1 || $right_st < $exc2)
                              {
                                        $primer_NearEnd = 1;
                              }

                            if ($left_count < 2 && $right_count<2 && $primer_NearEnd ==0)
                            {
                                    $string.=$left_count."\t".$Pr3_output[5]."\n";
                                    $string.=$right_count."\t".$Pr3_output[6]."\n";
                                    $string.="***************************** PRIMER3 OUTPUT **************************\n";
                                    foreach (@Pr3_output) {$string.=$_;}

                                    my @prod_size_split = split /\s+/, $Pr3_output[10];

                                    $product_size = substr($prod_size_split[2], 0, -1);
                                    $left_name = $position;
                                    $right_name = $position;
                                    my $lp_uc = uc ($left_primer);
                                    my $rp_uc = uc($right_primer);
                                        #print $left_count, "..", $right_count, "\t";
                                    $left_seq = $lp_uc;
                                    $right_seq= $rp_uc;

                                    $left_start= $left_st;
                                    $left_len = $left_length;

                                    $right_end = $right_st;
                                    $right_len = $right_length;

                                    $left_exclude = $exc1;
                                    $right_exclude =$exc2;
                                    $uniq_primer =1;
                            }
                            else
                            {
                                        if ($primer_NearEnd ==1)
                                        {
                                             $string.="One of the oligos is near the end of a contig\n";
                                        }
                                        else
                                        {
                                                  $string.="Primer set not unique\n";
                                        }
                                    $primers_toExclude.=">L.".$position."\n".$left_primer."\n";
                                    $primers_toExclude.=">R.".$position."\n".$right_primer."\n";
                            }

                    }
                    else
                    {
                            $string.="No Primers found\n";
                    }
          }

          return ($uniq_primer, $string,$left_name,$right_name,$left_seq, $right_seq,$left_start,$left_len, $right_end,$right_len,$primers_toExclude, $product_size);


}

###------------------------------------
# Writes primers and their regions to file
sub  write_Primers {
          my ($leftP_names, $rightP_names, $leftP_seqs, $rightP_seqs, $leftP_start, $leftP_lens, $rightP_ends, $rightP_lens,$primers_toExclude,$left_Exclude,$right_Exclude, $product_sizes) = @_;
          my (@left_name, @right_name, @left_seq, @right_seq, @left_start, @left_len, @right_end, @right_len, @left_exclude, @right_exclude, @product_size);

          #open files to read
          @left_name = split /\n/, $leftP_names;
          @right_name= split /\n/, $rightP_names;
          @left_seq = split /\n/, $leftP_seqs;
          @right_seq = split /\n/, $rightP_seqs;

          @left_start = split /\n/, $leftP_start;
          @left_len = split /\n/, $leftP_lens;
          @right_end = split/\n/, $rightP_ends;
          @right_len = split /\n/, $rightP_lens;
          @left_exclude = split /\n/, $left_Exclude;
          @right_exclude = split /\n/,$right_Exclude;
          @product_size = split /\n/, $product_sizes;

          my $primers_withSize ="";
          open (SEN, '>sense_primers.out') or die "Could not open file for write\n";
          open (ASEN, '>antiSense_primers.out') or die "Could not open file for write\n";
          open (REG_1, '>sense_regions.out') or die "Could not open file for write\n";
          open (REG_2, '>antiSense_regions.out') or die "Could not open file for write\n";

          if ($primers_toExclude ne "")
          {
                    open (PEX, '>primers_toExclude.out') or die "Could not open file for write\n";
                    print PEX $primers_toExclude;
          }


          my $totalp = scalar (@left_name);

          #print $totalp, "\n"; exit;

          my $well_pos;
          my $max_plates = ceil($totalp/96);
          #print "MAX Ps ", $max_plates, "\n";
          my $plate=1;
          my $sen ="";
          my $asen ="";
          my $plate_counter =0;
          my $wells = 96;
          for (my $index =0; $index < $totalp; $index += $wells)
          {
                   my $do = $index;
                   my $upper_bound= $index + $wells;
                   if ($upper_bound > $totalp)
                   {
                              $upper_bound = $totalp;
                   }

                   for (my $j=$index; $j <= ($upper_bound-1); $j+=1)
                    {
                       my $i = $j;
                       if ($j < 96)
                       {
                              $well_pos = get_WellPosition ($j);
                       }
                       else
                       {
                            $well_pos = get_WellPosition ($j - $wells)
                       }

                           #$primers_withSize.=$product_size[$i]."\t"."Plate_".$plate. "\t\tS.".$i."\tS.".$left_name[$i]."\t".
                           print SEN "Plate_".$plate, "\t\t","S.", $i, "\tS.", $left_name[$i], "\t", $left_seq[$i], "\t\t+", "\t", $well_pos, "\n";
                           print ASEN "Plate_".$plate, "\t\t","AS.", $i, "\tAS.", $right_name[$i], "\t", $right_seq[$i], "\t\t-","\t", $well_pos,"\n";
                           print REG_1 "Plate_".$plate, "\t\t","S.", $i, "\t", $left_start[$i], "\t", $left_len[$i], "\n";
                           print REG_2 "Plate_".$plate, "\t\t","AS.", $i, "\t", $right_end[$i], "\t",$right_len[$i], "\n";

                    }
                    $plate +=1;
          }

        #delete tmp. files
        #my $rm = "rm -f";
        system ("rm -f data left_query right_query R.delta R.cluster R.coords L.delta L.cluster L.coords");
        print "\nPRIMER DESIGN DONE\n\n";
	# end of primer design program
}#//
#####
# returns a well position for oligos
sub get_WellPosition{

          my $j = shift;
          my $well_pos;
          if ($j < 12)
          {
                    $well_pos = "a".($j+1);
          }
          elsif ($j>11 && $j<24) {
                    $well_pos = "b". (($j+1) -12);
          }
          elsif ($j>23 && $j<36) {
                    $well_pos = "c". (($j+1) -24);
          }
          elsif ($j>35 && $j<48) {
                    $well_pos = "d". (($j+1) - 36);
          }
          elsif($j>47 && $j<60) {
                    $well_pos = "e". (($j+1) -48);
          }
          elsif ($j>59 && $j<72)
          {
                    $well_pos = "f". (($j+1) - 60);
          }
          elsif ($j>71 && $j< 84)
          {
                    $well_pos = "g". (($j+1) - 72);
          }
          elsif ($j>83 && $j<96)
          {
                    $well_pos = "h". (($j+1) - 84);
          }
          return $well_pos;
}


####################################################################
#get options for primer design
#----------------------
sub getPoptions{

          my $qualAvailable = shift;
          #### USER INPUTS ##########
        #ask for optimum primer size
        print "\nEnter Optimum Primer size (default 20 bases):";
        my $opt=<STDIN>;
        chomp $opt;
        if($opt eq '')
        {
                $opt = 20;
        }
        #ask for minimum primer size
        print "\nEnter Minimum Primer size (default 18 bases):";
        my $min=<STDIN>;
        chomp $min;
        if($min eq '')
        {
                $min = 18;
        }
        #ask for maximum primer size
        print "\nEnter Maximum Primer size (default 27 bases):";
        my $max= <STDIN>;
        chomp $max;
        if($max eq '')
        {
                $max= 27;
        }
        #ask for optimum primer temperature
        print "\nEnter Optimum melting temperature (Celcius) for a primer oligo (default 60.0C):";
        my $optTemp=<STDIN>;
        chomp $optTemp;
        if($optTemp eq '')
        {
                $optTemp = 60.0;
        }
        #ask for minimum primer temperature
        print "\nEnter Minimum melting temperature (Celcius) for a primer oligo (default 57.0C):";
        my $minTemp=<STDIN>;
        chomp $minTemp;
        if($minTemp eq '')
        {
                $minTemp = 57.0;
        }
        #ask for maximum primer temperature
        print "\nEnter Maximum melting temperature (Celcius) for a primer oligo (default 63.0C):";
        my $maxTemp=<STDIN>;
        chomp $maxTemp;
        if($maxTemp eq '')
        {
                $maxTemp = 63.0;
        }
        print "\nEnter flanking region size (default 1000 bases): ";
        my $flank=<STDIN>;
        chomp $flank;
        if ($flank eq '')
        {
          $flank = 1000;
        }
        #ask for primer product range
        print "\nEnter minimum product size produced by primers (default =flanking size):";
        my $lowRange=<STDIN>;
        chomp $lowRange;
        if($lowRange eq '')
        {
                $lowRange = $flank;
        }
        print "\nEnter maxmimum product size produced by primers (default 7000):";
        my $maxRange=<STDIN>;
        chomp $maxRange;
        if($maxRange eq '')
        {
                $maxRange = 7000;
        }
        #ask for minimum GC content in primers
        print "\nEnter minimum GC content in primers (default 20%):";
        my $gcMin=<STDIN>;
        chomp $gcMin;
        if($gcMin eq '')
        {
                $gcMin = 20.0;
        }
        #ask for optimum GC content in primers
        print "\nEnter optimum GC content in primers (default 50%):";
        my $gcOpt=<STDIN>;
        chomp $gcOpt;
        if($gcOpt eq '')
        {
                $gcOpt = 50.0;
        }
        #ask for maximum GC content in primers
        print "\nEnter maximum GC content in primers (default 80%):";
        my $gcMax=<STDIN>;
        chomp $gcMax;
        if($gcMax eq '')
        {
                $gcMax = 80.0;
        }
        print "\nEnter GC clamp  (default 1):";
        my $gclamp=<STDIN>;
        chomp $gclamp;
        if($gclamp eq '')
        {
                $gclamp = 1;
        }
	print "\nEnter size of region to exclude at the end of contigs (default 100 bases):";
	my $exclude=<STDIN>;
	chomp $exclude;
	if ($exclude eq '')
	{
		$exclude = 100;
	}


	  #tdo
          my $quality='';
	  if ($qualAvailable)
          {

                    print "\nEnter minimum quality for primer pick (default 40):";
                    $quality=<STDIN>;
                    chomp $quality;
                    if($quality eq '')
		  {
			$quality = 40;
		  }
	  }


return ($opt,$min,$max,$optTemp,$minTemp,$maxTemp,$flank,$lowRange,$maxRange,$gcMin,$gcOpt,$gcMax,$gclamp,$exclude, $quality);

}
###############
#-----------------------------------------------------END of PRIMER DESIGN ----------------------------------------------------------------
#-----------------------------------------------------END OF ABACAS -----------------------------------------------------------------------