diff multispecies_MicrosatDataGenerator_interrupted_GALAXY.pl @ 0:275433d3a395 draft

Uploaded tool tarball.
author devteam
date Wed, 25 Sep 2013 11:26:57 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/multispecies_MicrosatDataGenerator_interrupted_GALAXY.pl	Wed Sep 25 11:26:57 2013 -0400
@@ -0,0 +1,5606 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use Term::ANSIColor;
+use File::Basename;
+use IO::Handle;
+use Cwd;
+use File::Path;
+use vars qw($distance @thresholds @tags $species_set @allspecies $printer $treeSpeciesNum $focalspec $mergestarts $mergeends $mergemicros $interrtypecord $microscanned $interrcord $interr_poscord $no_of_interruptionscord $infocord $typecord $startcord $strandcord $endcord $microsatcord $motifcord $sequencepos $no_of_species $gapcord $prinkter);
+use File::Path qw(make_path remove_tree);
+use File::Temp qw/ tempfile tempdir /;
+my $tdir = tempdir( CLEANUP => 1 );
+chdir $tdir;
+my $dir = getcwd;  
+#print "dir = $dir\n";
+
+#$ENV{'PATH'} .= ':' . dirname($0);
+my $date = `date`;
+
+my ($mafile, $mafile_sputt, $orthfile, $threshold_array,  $allspeciesin, $tree_definition_all, $separation) = @ARGV;
+if (!$mafile or !$mafile_sputt or !$orthfile or !$threshold_array or !$separation or !$tree_definition_all or !$allspeciesin) { die "missing arguments\n"; }
+
+$tree_definition_all =~ s/\s+//g;
+$threshold_array =~ s/\s+//g;
+$allspeciesin =~ s/\s+//g;
+#-------------------------------------------------------------------------------
+# WHICH SPUTNIK USED?
+my $sputnikpath = ();
+$sputnikpath = "sputnik_lowthresh_MATCH_MIN_SCORE3" ;
+#$sputnikpath = "/Users/ydk/work/rhesus_microsat/codes/./sputnik_Mac-PowerPC";
+#print "sputnik_Mac-PowerPC non-existant\n" if !-e $sputnikpath;
+#exit if !-e $sputnikpath;
+#$sputnikpath = "bx-sputnik" ;
+#print "ARGV input = @ARGV\n";
+#print "ARGV input :\n mafile=$mafile\n orthfile=$orthfile\n threshold_array=$threshold_array\n  species_set=$species_set\n tree_definition=$tree_definition\n separation=$separation\n";
+#-------------------------------------------------------------------------------
+# RUNFILE
+#-------------------------------------------------------------------------------
+$distance = 1; #bp
+$distance++;
+my @tree_definitions=MakeTrees($tree_definition_all);
+my $allspeciesset = $tree_definition_all;
+$allspeciesset =~ s/[\(\) ]+//g;
+@allspecies = split(/,/,$allspeciesset);
+
+my @outputfiles = ();
+my $round = 0;
+#my $tdir = tempdir( CLEANUP => 0 );
+#chdir $tdir;
+
+foreach my $tree_definition (@tree_definitions){
+	my @commas = ($tree_definition =~ /,/g) ;
+	#print "commas = @commas\n"; <STDIN>;
+	next if scalar(@commas) <= 1;
+	#print "species_set = $species_set\n";
+	$treeSpeciesNum = scalar(@commas) + 1;
+	$species_set = $tree_definition;
+	$species_set =~ s/[\)\( ;]+//g;
+	#print "species_set = $species_set\n"; <STDIN>;
+
+	$round++;
+	#-------------------------------------------------------------------------------
+	# MICROSATELLITE THRESHOLD SETTINGS (LENGTH, BP)
+	$threshold_array=~ s/,/_/g;
+	my @thresharr = split("_",$threshold_array);
+	@thresholds=@thresharr;
+	#my $threshold_array = join("_",($mono_threshold, $di_threshold, $tri_threshold, $tetra_threshold));
+	#print "current dit=$dir\n";
+	#-------------------------------------------------------------------------------
+	# CREATE AXT FILES IN FORWARD AND REVERSE ORDERS IF NECESSARY
+	my @chrfiles=();
+	
+	#my $mafile =  "/Users/ydk/work/rhesus_microsat/results/galay/align.txt"; #$ARGV[0];
+	my $chromt=int(rand(10000));
+	my $p_chr=$chromt;
+	
+	
+	my @exactspeciesset_unarranged = split(/,/,$species_set);
+	$tree_definition=~s/[\)\(, ]/\t/g;
+	my @treespecies=split(/\t+/,$tree_definition);
+	my @exactspecies=();
+	
+	foreach my $spec (@treespecies){
+		foreach my $espec (@exactspeciesset_unarranged){
+			push @exactspecies, $spec if $spec eq $espec;
+		}
+	}
+	#print "exactspecies=@exactspecies\n";
+	$focalspec = $exactspecies[0];
+	my $arranged_species_set=join(".",@exactspecies);
+	my $chr_name = join(".",("chr".$p_chr),$arranged_species_set, "net", "axt");
+	my $chr_name_sputt = join(".",("chr".$p_chr),$arranged_species_set, "net", "axt_sputt");
+	#print "sending to maftoAxt_multispecies: $mafile, $tree_definition, $chr_name, $species_set .. focalspec=$focalspec \n"; 
+	maftoAxt_multispecies($mafile, $tree_definition, $chr_name, $species_set);
+	maftoAxt_multispecies($mafile_sputt, $tree_definition, $chr_name_sputt, $species_set);
+	#print "done maf to axt conversion\n";
+	my $reverse_chr_name = join(".",("chr".$p_chr."r"),$arranged_species_set, "net", "axt");
+	artificial_axdata_inverter ($chr_name, $reverse_chr_name);
+	#print "reverse_chr_name=$reverse_chr_name\n"; 
+	#-------------------------------------------------------------------------------
+	# FIND THE CORRESPONDING CHIMP CHROMOSOME FROM FILE ORTp_chrS.TXT
+	foreach my $direct ("reverse_direction","forward_direction"){
+		$p_chr=$chromt;
+		#print "direction = $direct\n";
+		$p_chr = $p_chr."r" if $direct eq "reverse_direction";
+		$p_chr = $p_chr if $direct eq "forward_direction";
+		my $config = $species_set;
+		$config=~s/,/./g;
+		my @orgs = split(/\./,$arranged_species_set);
+		#print "ORGS= @orgs\n";
+		my @tag=@orgs;
+			
+		
+		my $tags = join(",", @tag);
+		my @tags=@tag;
+		chomp $p_chr;
+		$tags = join("_", split(/,/, $tags));
+		my $pchr = "chr".$p_chr;
+		
+		my $ptag = $orgs[0]."-".$pchr.".".join(".",@orgs[1 ... scalar(@orgs)-1])."-".$threshold_array;
+		my @sp_tags = ();
+		
+#		print "$ptag _ orthfile\n"; <STDIN>;
+		#print "orgs=@orgs, pchr=$pchr, hence, ptag = $ptag\n";
+		foreach my $sp (@tag){
+			push(@sp_tags, ($sp.".".$ptag));
+		}
+	
+		my $preptag = $orgs[0]."-".$pchr.".".join(".",@orgs[1 ... scalar(@orgs)-1]);
+		my @presp_tags = ();
+		
+		foreach my $sp (@tag){
+			push(@presp_tags, ($sp.".".$preptag));
+		}
+	
+		my $resultdir = "";
+		my $orthdir = "";
+		my $filtereddir = "";
+		my $pipedir = "";
+		
+		my @title_queries = ();
+		push(@title_queries, "^[0-9]+");
+		my $sep="\\s";
+		for my $or (0 ... $#orgs){
+			my $title =  join($sep, ($orgs[$or],  "[A-Za-z_]+[0-9a-zA-Z]+", "[0-9]+", "[0-9]+", "[\\-\\+]"));
+			#$title =~ s/chr\\+\\s+\+/chr/g;
+			push(@title_queries, $title);
+		}
+		my $title_query = join($sep, @title_queries);
+		#print "title_queries=@title_queries\n";
+		#print "query = >$title_query<\n"; 
+		#print "orgs = @orgs\n"; 
+		#-------------------------------------------------------------------------------
+		# GET AXTNET FILES, EDIT THEM AND SPLIT THEM INTO HUMAN AND CHIMP INPUT FILES
+		my $t1input = $pchr.".".$arranged_species_set.".net.axt";
+		
+		my @t1outputs = ();
+		
+		foreach my $sp (@presp_tags){
+			push(@t1outputs, $sp."_gap_op");
+		}
+		
+		
+		
+		multi_species_t1($t1input,$tags,(join(",", @t1outputs)), $title_query); 
+		#print "t1outputs=@t1outputs\n";
+		#print "done t1\n"; <STDIN>;
+		#-------------------------------------------------------------------------------
+		#START T2.PL
+		
+		my $stag  = (); my $tag1 = (); my $tag2 = ();  my $schrs = ();
+		
+		for my $t (0 ... scalar(@tags)-1){
+			multi_species_t2($t1outputs[$t], $tag[$t]);		
+		}
+		#-------------------------------------------------------------------------------
+		#START T2.2.PL
+		
+		my @temp_tags = @tag;
+		
+		foreach my $sp (@presp_tags){
+			my $t2input =  $sp."_nogap_op_unrand";
+			multi_species_t2_2($t2input, shift(@temp_tags));
+		}
+		undef (@temp_tags);
+		
+		#-------------------------------------------------------------------------------
+		#START SPUTNIK
+		
+		my @jobIDs = ();
+		@temp_tags = @tag;
+		my @sput_filelist = ();
+		
+		foreach my $sp (@presp_tags){
+			#print "sp = $sp\n";
+			my $sputnikoutput = $pipedir.$sp."_sput_op0";
+			my $sputnikinput = $pipedir.$sp."_nogap_op_unrand";
+			push(@sput_filelist, $sputnikinput);
+			my $sputnikcommand = $sputnikpath." ".$sputnikinput." > ".$sputnikoutput;
+		#	print "$sputnikcommand\n";
+			my @sputnikcommand_system = $sputnikcommand;
+			system(@sputnikcommand_system);
+		}
+	
+		#-------------------------------------------------------------------------------
+		#START SPUTNIK OUTPUT CORRECTOR
+		
+		foreach my $sp (@presp_tags){
+			my $corroutput = $pipedir.$sp."_sput_op1";
+			my $corrinput = $pipedir.$sp."_sput_op0";
+			sputnikoutput_corrector($corrinput,$corroutput);
+			
+			my $t4output = $pipedir.$sp."_sput_op2";
+			multi_species_t4($corroutput,$t4output);
+		
+			my $t5output = $pipedir.$sp."_sput_op3";
+			multi_species_t5($t4output,$t5output);
+			#print "done t5.pl for $sp\n";
+		
+			my $t6output = $pipedir.$sp."_sput_op4";
+			multi_species_t6($t5output,$t6output,scalar(@orgs));
+		}
+		#-------------------------------------------------------------------------------
+		#START T9.PL FOR T10.PL AND FOR INTERRUPTED HUNTING
+		
+		foreach my $sp (@presp_tags){
+			my $t9output = $pipedir.$sp."_gap_op_unrand_match";
+			my $t9sequence = $pipedir.$sp."_gap_op_unrand2";
+			my $t9micro = $pipedir.$sp."_sput_op4";
+			t9($t9micro,$t9sequence,$t9output);
+			
+			my $t9output2 = $pipedir.$sp."_nogap_op_unrand2_match";
+			my $t9sequence2 = $pipedir.$sp."_nogap_op_unrand2";
+			t9($t9micro,$t9sequence2,$t9output2);
+		}
+		#print "done both t9.pl for all orgs\n";
+	
+		#-------------------------------------------------------------------------------
+		# FIND COMPOUND MICROSATELLITES
+		
+		@jobIDs = ();
+		my $species_counter = 0;
+		
+		foreach my $sp (@presp_tags){
+			my $simple_microsats=$pipedir.$sp."_sput_op4_simple";
+			my $compound_microsats=$pipedir.$sp."_sput_op4_compound";
+			my $input_micro = $pipedir.$sp."_sput_op4";
+			my $input_seq = $pipedir.$sp."_nogap_op_unrand2_match";
+			multiSpecies_compound_microsat_hunter3($input_micro,$input_seq,$simple_microsats,$compound_microsats,$orgs[$species_counter], scalar(@sp_tags), $threshold_array );
+			$species_counter++;
+		}
+		
+		#-------------------------------------------------------------------------------
+		# READING  AND FILTERING SIMPLE MICROSATELLITES
+		my $spcounter2=0;
+		foreach my $sp (@sp_tags){
+			my $presp = $presp_tags[$spcounter2];
+			$spcounter2++;
+			my $simple_microsats=$pipedir.$presp."_sput_op4_simple";
+			my $simple_filterout = $pipedir.$sp."_sput_op4_simple_filtered";
+			my $simple_residue = $pipedir.$sp."_sput_op4_simple_residue";
+			multiSpecies_filtering_interrupted_microsats($simple_microsats, $simple_filterout, $simple_residue,$threshold_array,$threshold_array,scalar(@sp_tags));
+		}
+		
+		#-------------------------------------------------------------------------------
+		# ANALYZE  COMPOUND MICROSATELLITES FOR BEING INTERRUPTED MICROSATS
+		
+		$species_counter = 0;
+		foreach my $sp (@sp_tags){
+			my $presp = $presp_tags[$species_counter];
+			my $compound_microsats = $pipedir.$presp."_sput_op4_compound";
+			my $analyzed_simple_microsats=$pipedir.$presp."_sput_op4_compound_interrupted";
+			my $analyzed_compound_microsats=$pipedir.$presp."_sput_op4_compound_pure";
+			my $seq_file = $pipedir.$presp."_nogap_op_unrand2_match";
+			multiSpecies_compound_microsat_analyzer($compound_microsats,$seq_file,$analyzed_simple_microsats,$analyzed_compound_microsats,$orgs[$species_counter], scalar(@sp_tags));
+			$species_counter++;
+		}
+		#-------------------------------------------------------------------------------
+		# REANALYZE COMPOUND MICROSATELLITES FOR PRESENCE OF SIMPLE ONES WITHIN THEM.. 
+		$species_counter = 0;
+		
+		foreach my $sp (@sp_tags){
+			my $presp = $presp_tags[$species_counter];
+			my $compound_microsats = $pipedir.$presp."_sput_op4_compound_pure";
+			my $compound_interrupted = $pipedir.$presp."_sput_op4_compound_clarifiedInterrupted";
+			my $compound_compound = $pipedir.$presp."_sput_op4_compound_compound";
+			my $seq_file = $pipedir.$presp."_nogap_op_unrand2_match";
+			multiSpecies_compoundClarifyer($compound_microsats,$seq_file,$compound_interrupted,$compound_compound,$orgs[$species_counter], scalar(@sp_tags), "2_4_6_8", "3_4_6_8", "2_4_6_8");
+			$species_counter++;
+		}
+		#-------------------------------------------------------------------------------
+		# READING  AND FILTERING SIMPLE AND COMPOUND MICROSATELLITES
+		$species_counter = 0;
+		
+		foreach my $sp (@sp_tags){
+			my $presp = $presp_tags[$species_counter];
+		
+			my $simple_microsats=$pipedir.$presp."_sput_op4_compound_clarifiedInterrupted";
+			my $simple_filterout = $pipedir.$sp."_sput_op4_compound_clarifiedInterrupted_filtered";
+			my $simple_residue = $pipedir.$sp."_sput_op4_compound_clarifiedInterrupted_residue";
+			multiSpecies_filtering_interrupted_microsats($simple_microsats, $simple_filterout, $simple_residue,$threshold_array,$threshold_array,scalar(@sp_tags));
+		
+			my $simple_microsats2 = $pipedir.$presp."_sput_op4_compound_interrupted";
+			my $simple_filterout2 = $pipedir.$sp."_sput_op4_compound_interrupted_filtered";
+			my $simple_residue2 = $pipedir.$sp."_sput_op4_compound_interrupted_residue";
+			multiSpecies_filtering_interrupted_microsats($simple_microsats2, $simple_filterout2, $simple_residue2,$threshold_array,$threshold_array,scalar(@sp_tags));
+		
+			my $compound_microsats=$pipedir.$presp."_sput_op4_compound_compound";
+			my $compound_filterout = $pipedir.$sp."_sput_op4_compound_compound_filtered";
+			my $compound_residue = $pipedir.$sp."_sput_op4_compound_compound_residue";
+			multispecies_filtering_compound_microsats($compound_microsats, $compound_filterout, $compound_residue,$threshold_array,$threshold_array,scalar(@sp_tags));
+			$species_counter++;
+		}
+		#print "done filtering both simple and compound microsatellites \n";
+		
+		#-------------------------------------------------------------------------------
+		
+		my @combinedarray = ();
+		my @combinedarray_indicators = ("mononucleotide", "dinucleotide", "trinucleotide", "tetranucleotide");
+		my @combinedarray_tags = ("mono", "di", "tri", "tetra");
+		$species_counter = 0;
+		
+		foreach my $sp (@sp_tags){
+			my $simple_interrupted = $pipedir.$sp."_simple_analyzed_simple";
+			push @{$combinedarray[$species_counter]}, $pipedir.$sp."_simple_analyzed_simple_mono", $pipedir.$sp."_simple_analyzed_simple_di", $pipedir.$sp."_simple_analyzed_simple_tri", $pipedir.$sp."_simple_analyzed_simple_tetra";
+			$species_counter++;
+		}
+		
+		#-------------------------------------------------------------------------------
+		# PUT TOGETHER THE INTERRUPTED AND SIMPLE MICROSATELLITES BASED ON THEIR MOTIF SIZE FOR FURTHER EXTENTION	
+		my $sp_counter = 0;
+		foreach my $sp (@sp_tags){	
+			my $analyzed_simple = $pipedir.$sp."_sput_op4_compound_interrupted_filtered";
+			my $clarifyed_simple = $pipedir.$sp."_sput_op4_compound_clarifiedInterrupted_filtered";
+			my $simple = $pipedir.$sp."_sput_op4_simple_filtered";
+			my $simple_analyzed_simple = $pipedir.$sp."_simple_analyzed_simple";
+			`cat $analyzed_simple $clarifyed_simple $simple > $simple_analyzed_simple`;
+			for my $i (0 ... 3){
+				`grep "$combinedarray_indicators[$i]" $simple_analyzed_simple > $combinedarray[$sp_counter][$i]`;	
+			}
+			$sp_counter++;		
+		}
+		#print "\ndone grouping interrupted & simple microsats based on their motif size for further extention\n";
+		
+		#-------------------------------------------------------------------------------
+		# BREAK CHROMOSOME INTO PARTS OF CERTAIN NO. CONTIGS EACH, FOR FUTURE SEARCHING OF INTERRUPTED MICROSATELLITES
+		# ESPECIALLY DI, TRI AND TETRANUCLEOTIDE MICROSATELLITES
+		@temp_tags = @sp_tags;
+		my $increment = 1000000;
+		my @splist = ();
+		my $targetdir = $pipedir;
+		$species_counter=0;
+		
+		foreach my $sp (@sp_tags){
+			my $presp = $presp_tags[$species_counter];
+			$species_counter++;
+			my $localtag = shift @temp_tags;
+			my $locallist = $targetdir.$localtag."_".$p_chr."_list";
+			push(@splist, $locallist);
+			my $input = $pipedir.$presp."_nogap_op_unrand2_match";
+			chromosome_unrand_breaker($input,$targetdir,$locallist,$increment, $localtag, $pchr);
+		}
+		
+	
+		my @unionarray = ();
+		#print "splist=@splist\n";
+		#-------------------------------------------------------------------------------
+		# FIND INTERRUPTED MICROSATELLITES
+		
+		$species_counter = 0;		
+		
+		for my $i (0 .. $#combinedarray){
+			
+			@jobIDs = ();
+			open (JLIST1, "$splist[$i]") or die "Cannot open file $splist[$i]: $!";
+			
+			while (my $sp1  = <JLIST1>){
+				#print "$splist[$i]: sp1=$sp1\n";
+				chomp $sp1;
+				
+				for my $j (0 ... $#combinedarray_tags){
+					my $interr  = $sp1."_interr_".$combinedarray_tags[$j];
+					my $simple  = $sp1."_simple_".$combinedarray_tags[$j];
+					push @{$unionarray[$i]}, $interr, $simple;	
+					multiSpecies_interruptedMicrosatHunter($combinedarray[$i][$j],$sp1,$interr ,$simple, $orgs[$species_counter], scalar(@sp_tags), "3_4_6_8"); 
+				}		
+			}
+			$species_counter++;
+		}
+		close JLIST1;	
+		#-------------------------------------------------------------------------------
+		#	REUNION AND ZIPPING BEFORE T10.PL
+		
+		my @allarray = ();
+		
+		for my $i (0 ... $#sp_tags){	
+			my $localfile = $pipedir.$sp_tags[$i]."_allmicrosats";
+			unlink $localfile if -e $localfile;
+			push(@allarray, $localfile);
+		
+			my $unfiltered_localfile= $localfile."_unfiltered";
+			my $residue_localfile= $localfile."_residue";
+			
+			unlink $unfiltered_localfile;
+			#unlink $unfiltered_localfile;
+			for my $j (0 ... $#{$unionarray[$i]}){
+				#print "listing files for species $i  and list number $j= \n$unionarray[$i][$j] \n";
+				`cat $unionarray[$i][$j] >> $unfiltered_localfile`;
+				unlink $unionarray[$i][$j];
+			}
+	
+			multiSpecies_filtering_interrupted_microsats($unfiltered_localfile, $localfile, $residue_localfile,$threshold_array,$threshold_array,scalar(@sp_tags) );
+			my $analyzed_compound = $pipedir.$sp_tags[$i]."_sput_op4_compound_compound_filtered";
+			my $simple_residue = $pipedir.$sp_tags[$i]."_sput_op4_simple_residue";
+			my $compound_residue = $pipedir.$sp_tags[$i]."_sput_op4_compound_residue";
+			
+			`cat $analyzed_compound >> $localfile`;
+		}
+		#-------------------------------------------------------------------------------
+		# MERGING MICROSATELLITES THAT ARE VERY CLOSE TO EACH OTHER, INCLUDING THOSE FOUND BY SEARCHING IN 2 OPPOSIT DIRECTIONS
+		
+		my $toescape=0;
+		
+		
+		for my $i (0 ... $#sp_tags){	
+			my $localfile = $pipedir.$sp_tags[$i]."_allmicrosats";
+			$localfile =~ /$focalspec\-(chr[0-9a-zA-Z]+)\./;
+			my $direction = $1;
+			#print "localfile = $localfile , direction = $direction\n"; 
+	#		`gzip $reverse_chr_name` if $direction =~ /chr[0-9a-zA-Z]+r/ && $switchboard{"deleting_processFiles"} != 1;
+			$toescape =1 if $direction =~ /chr[0-9a-zA-Z]+r/;
+			last if $direction =~ /chr[0-9a-zA-Z]+r/;
+			my $nogap_sequence = $pipedir.$presp_tags[$i]."_nogap_op_unrand2_match";
+			my $gap_sequence = $pipedir.$presp_tags[$i]."_gap_op_unrand_match";
+			my $reverselocal = $localfile;
+			$reverselocal =~ s/\-chr([0-9a-zA-Z]+)\./-chr$1r./g;
+			merge_interruptedMicrosats($nogap_sequence,$localfile, $reverselocal ,scalar(@sp_tags)); 
+			#-------------------------------------------------------------------------------
+			my $forward_separate = $localfile."_separate";
+			my $reverse_separate = $reverselocal."_separate";
+			my $diff = $forward_separate."_diff";
+			my $miss = $forward_separate."_miss";
+			my $common = $forward_separate."_common";
+			forward_reverse_sputoutput_comparer($nogap_sequence,$forward_separate, $reverse_separate, $diff, $miss, $common ,scalar(@sp_tags)); 
+			#-------------------------------------------------------------------------------
+			my $symmetrical_file = $localfile."_symmetrical";
+			my $merged_file = $localfile."_merged";
+			#print "cating: $merged_file $common  into -> $symmetrical_file \n"; 
+			`cat $merged_file $common > $symmetrical_file`;
+			#-------------------------------------------------------------------------------
+			my $t10output = $symmetrical_file."_fin_hit_all_2";
+			new_multispecies_t10($gap_sequence, $symmetrical_file, $t10output, join(".", @orgs));
+			#-------------------------------------------------------------------------------		
+		}
+		next if $toescape == 1;
+		#------------------------------------------------------------------------------------------------
+		# BRINGING IT ALL TOGETHER: FINDING ORTHOLOGOUS MICROSATELLITES AMONG THE SPECIES
+		
+		
+		my @micros_array = ();
+		my $sampletag = ();
+		for my $i (0 ... $#sp_tags){
+			my $finhitFile = $pipedir.$sp_tags[$i]."_allmicrosats_symmetrical_fin_hit_all_2";
+			push(@micros_array, $finhitFile);
+			$sampletag = $sp_tags[$i];
+		}
+		#$sampletag =~ s/^([A-Z]+\.)/ORTH_/;
+		#$sampletag = $sampletag."_monoThresh-".$mono_threshold."bp";
+		my $orthfiletemp = $ptag."_orthfile";
+		my $orthanswer = multiSpecies_orthFinder4($t1input, join(":",@micros_array), $orthfiletemp, join(":", @orgs), $separation);
+
+		my $maskedorthfiletemp = $ptag."_orthfile_masked";
+		qualityFilter ($orthfiletemp, $chr_name_sputt, $maskedorthfiletemp);
+
+		push @outputfiles , $maskedorthfiletemp;
+	}
+	$date = `date`;
+}
+
+`cat @outputfiles > $orthfile`;
+
+my $rootdir = $dir;
+$rootdir =~ s/\/[A-Za-z0-9\-_]+$//;
+chdir $rootdir;
+remove_tree($dir);
+
+#print "date = $date\n";
+#remove_tree($tdir);
+#------------------------------------------------------------------------------------------------
+#------------------------------------------------------------------------------------------------
+#------------------------------------------------------------------------------------------------
+#------------------------------------------------------------------------------------------------
+
+#xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx 
+
+sub maftoAxt_multispecies {
+	#print "in maftoAxt_multispecies : got @_\n";
+	my $fname=$_[0];
+	open(IN,"<$_[0]") or die "Cannot open $_[0]: $! \n";
+	my $treedefinition = $_[1];
+	open(OUT,">$_[2]") or die "Cannot open $_[2]: $! \n";
+	my $counter = 0;
+	my $exactspeciesset = $_[3];
+	my @exactspeciesset_unarranged = split(/,/,$exactspeciesset);
+	
+	$treedefinition=~s/[\)\(, ]/\t/g;
+	my @species=split(/\t+/,$treedefinition);
+	my @exactspecies=();
+	
+	foreach my $spec (@species){
+		foreach my $espec (@exactspeciesset_unarranged){
+			push @exactspecies, $spec if $spec eq $espec;
+		}
+	}
+	#print "exactspecies=@exactspecies\n";
+	
+	###########
+	my $select = 2;  
+	#select = 1 if all species need sequences to be present for each block otherwise, it is 0
+	#select = 2 only the allowed set make up the alignment. use the removeset
+	# information to detect alignmenets that have other important genomes aligned.
+	###########
+	my @allowedset = ();
+	@allowedset = split(/;/,allowedSetOfSpecies(join("_",@species))) if $select == 0;
+	@allowedset = join("_",0,@species) if $select == 1;
+	#print "species = @species , allowedset =",join("\n", @allowedset) ," \n"; 
+	@allowedset = join("_",0,@exactspecies) if $select == 2;
+	#print "allowedset = @allowedset and exactspecies = @exactspecies\n";
+	
+	my $start = 0;
+	my @sequences = ();
+	my @titles = ();
+	my $species_counter = "0";
+	my $countermatch = 0;
+	my $outsideSpecies=0;
+	
+	while(my $line = <IN>){
+#		print $line;
+		next if $line =~ /^#/;
+		next if $line =~ /^i/;
+		chomp $line;
+		my @fields = split(/\s+/,$line);
+		chomp $line;
+		if ($line =~ /^a /){
+			$start = 1;
+		}
+		
+		if ($line =~ /^s /){
+		
+			foreach my $sp (@allspecies){
+#				print "checking species $sp\n";
+				if ($fields[1] =~ /$sp/){
+					$species_counter = $species_counter."_".$sp;
+					push(@sequences, $fields[6]);
+					my @sp_info = split(/\./,$fields[1]);
+					my $title = join(" ",@sp_info, $fields[2], ($fields[2]+$fields[3]), $fields[4]);
+					push(@titles, $title);				
+#					print "species_counter = $species_counter\n";
+				}
+			}
+		}
+		
+		if (($line !~ /^a/) && ($line !~ /^s/) && ($line !~ /^#/) && ($line !~ /^i/) && ($start = 1)){
+#			print "species_counter = $species_counter\n"; 
+			my $arranged = reorderSpecies($species_counter, @allspecies);
+			my $stopper = 1;
+			my $arrno = 0;
+			
+#			print "checking if ", scalar(@sequences), " match @exactspecies allowedset=@allowedset\n";
+			if (scalar(@sequences) == scalar(@exactspecies)){
+				foreach my $set (@allowedset){
+#					print "testing $arranged against $set\n";
+					if ($arranged eq $set){
+						$stopper = 0; last;
+					}
+					$arrno++;
+				}
+			}
+			else{
+				$stopper = 1;
+			}
+			
+			
+			if ($stopper == 0) {
+				@titles = split ";", orderInfo(join(";", @titles), $species_counter, $arranged) if $species_counter ne $arranged;				
+				@sequences = split ";", orderInfo(join(";", @sequences), $species_counter, $arranged) if $species_counter ne $arranged;				
+				my $filteredseq = filter_gaps(@sequences);
+				
+				if ($filteredseq ne "SHORT"){
+					#print "printing"; <STDIN>;
+					$counter++;
+					print OUT join (" ",$counter, @titles), "\n";
+					print OUT $filteredseq, "\n";
+					print OUT "\n"; 
+					$countermatch++;
+				}
+			}
+			else{ #print "nexting\n";<STDIN>;
+			}
+	
+			@sequences = (); @titles = (); $start = 0;$species_counter = "0";
+			next;		
+			
+		}
+	}
+#	print "countermatch = $countermatch\n";
+}
+
+sub reorderSpecies{
+	my @inarr=@_;
+	my $currSpecies = shift (@inarr);
+	my $ordered_species = 0;
+	my @species=@inarr;
+	#print "species = @species\n";
+	foreach my $order (@species){
+		$ordered_species = $ordered_species."_".$order	if	$currSpecies=~ /$order/;
+	}
+	return $ordered_species;
+
+}
+
+sub filter_gaps{
+	my @sequences = @_;
+#	print "sequences sent are @sequences\n";
+	my $seq_length = length($sequences[0]);
+	my $seq_no = scalar(@sequences);
+	my $allgaps = ();
+	for (1 ... $seq_no){
+		$allgaps = $allgaps."-";
+	}
+	
+	my @seq_array = ();
+	my $seq_counter = 0;
+	foreach my $seq (@sequences){
+#		my @sequence = split(/\s*/,$seq);
+		$seq_array[$seq_counter] = [split(/\s*/,$seq)];
+#		push @seq_array, [@sequence];
+		$seq_counter++;
+	}
+	my $g = 0;
+	while ( $g < $seq_length){
+		last if (!exists $seq_array[0][$g]);
+		my $bases = ();
+		for my $u (0 ... $#seq_array){
+			$bases = $bases.$seq_array[$u][$g];
+		}	
+#		print $bases, "\n";
+		if ($bases eq $allgaps){
+#			print "bases are $bases, position is $g \n";
+			for my $seq (@seq_array){
+				splice(@$seq , $g, 1);
+			}
+		}
+		else {
+			$g++;
+		}
+	}
+	
+	my @outs = ();
+	
+	foreach my $seq (@seq_array){
+		push(@outs, join("",@$seq));
+	}
+	return "SHORT" if length($outs[0]) <=100;
+	return (join("\n", @outs));	
+}
+
+
+sub allowedSetOfSpecies{
+	my @allowed_species = split(/_/,$_[0]);
+	unshift @allowed_species, 0;
+#	print "allowed set = @allowed_species \n";
+	my @output = ();
+	for (0 ... scalar(@allowed_species) - 4){
+		push(@output, join("_",@allowed_species));
+		pop @allowed_species;
+	}
+	return join(";",reverse(@output));
+
+}
+
+
+sub orderInfo{
+	my @info = split(/;/,$_[0]);
+#	print "info = @info";
+	my @old = split(/_/,$_[1]);
+	my @new = split(/_/,$_[2]);
+	shift @old; shift @new;
+	my @outinfo = ();
+	foreach my $spe (@new){
+		for my $no (0 ... $#old){
+			if ($spe eq $old[$no]){
+				push(@outinfo, $info[$no]);
+			}
+		}
+	}
+#	print "outinfo = @outinfo \n"; 
+	return join(";", @outinfo);
+}
+
+#xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx xxxxxxx maftoAxt_multispecies xxxxxxx 
+
+#xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx 
+sub artificial_axdata_inverter{
+	open(IN,"<$_[0]") or die "Cannot open file $_[0]: $!";
+	open(OUT,">$_[1]") or die "Cannot open file $_[1]: $!";
+	my $linecounter=0;
+	while (my $line = <IN>){
+		$linecounter++;
+		#print "$linecounter\n";
+		chomp $line;	
+		my $final_line = $line;	
+		my $trycounter = 0;
+		if ($line =~ /^[a-zA-Z\-]/){
+		#	while ($final_line eq $line){
+				my @fields = split(/\s*/,$line);
+				
+				$final_line = join("",reverse(@fields));
+		#		print colored ['red'], "$line\n$final_line\n" if $final_line eq $line && $line !~ /chr/ && $line =~ /[a-zA-Z]/;
+		#		$trycounter++;
+		#		print "trying again....$trycounter : $final_line\n" if $final_line eq $line;
+		#	}
+		}
+		
+	#	print colored ['yellow'], "$line\n$final_line\n" if $final_line eq $line && $line !~ /chr/ && $line =~ /[a-zA-Z]/;
+		if ($line =~ /^[0-9]/){
+			$line =~ s/chr([A-Z0-9a-b]+)/chr$1r/g;
+			$final_line = $line;
+		}
+		print OUT $final_line,"\n";
+		#print "$line\n$final_line\n" if $final_line eq $line && $line !~ /chr/ && $line =~ /[a-zA-Z]/;
+	}
+	close OUT;
+}
+#xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx xxxxxxx artificial_axdata_inverter xxxxxxx 
+
+
+#xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx 
+
+sub multi_species_t1 {
+
+	my $input1 = $_[0];
+	#print "@_\n"; <STDIN>;
+	my @tags = split(/_/, $_[1]);
+	my @outputs = split(/,/, $_[2]);
+	my $title_query = $_[3];
+	my @handles = ();
+	
+	open(FILEB,"<$input1")or die "Cannot open file: $input1 $!";
+	my $i = 0;
+	foreach my $path (@outputs){
+		$handles[$i] = IO::Handle->new();
+		open ($handles[$i], ">$path") or die "Can't open $path : $!";
+		$i++;
+	}
+	
+	my $curdef;
+	my $start = 0;
+	
+	while (my $line = <FILEB> ) {
+		if ($line =~ /^\d/){
+			$line =~ s/ +/\t/g;
+			my @fields = split(/\s+/, $line);
+			if (($line =~ /$title_query/)){
+				my $title = $line;
+				my $counter = 0;
+				foreach my $tag (@tags){
+					$line = <FILEB>;
+					print {$handles[$counter]} ">",$tag,"\t",$title, " ",$line;  	
+					$counter++;
+				}
+			}
+			else{
+					foreach my $tag (@tags){
+					my $tine = <FILEB>;
+				}		
+			}
+		
+		}
+	}
+	
+	foreach my $hand (@handles){
+		$hand->close(); 
+	}
+	
+	close FILEB;
+}
+
+#xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx xxxxxxx multi_species_t1 xxxxxxx 
+
+#xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx 
+
+sub multi_species_t2{
+	
+	my $input = $_[0];
+	my $species = $_[1];
+	my $output1 = $input."_unr";
+	
+	#------------------------------------------------------------------------------------------
+	open (FILEF1, "<$input") or die "Cannot open file $input :$!";
+	open (FILEF2, ">$output1") or die "Cannot open file $output1 :$!";
+	
+	my $line1 = <FILEF1>;
+	
+	while($line1){
+	{
+	#    chomp($line);
+		if ($line1 =~ (m/^\>$species/)){
+		chomp($line1);
+		print FILEF2 $line1;
+		$line1 = <FILEF1>;
+		chomp($line1);
+		print FILEF2 "\t", $line1,"\n";
+	   }   
+	}
+	$line1 = <FILEF1>;
+	}
+	
+	close FILEF1;
+	close FILEF2;
+	#------------------------------------------------------------------------------------------
+	
+	my $output2 = $output1."and";
+	my $output3 = $output1."and2";
+	open(IN,"<$output1");
+	open (FILEF3, ">$output2");
+	open (FILEF4, ">$output3");
+	
+	
+	while (<IN>){
+		my $line = $_;
+		chomp($line);
+		my @fields=split (/\t/, $line);
+	  #  print $line,"\n"; <STDIN>;
+		if($line !~ /random/){
+			print FILEF3 join ("\t",@fields[0 ... scalar(@fields)-2]), "\n", $fields[scalar(@fields)-1], "\n";
+			print FILEF4 join ("\t",@fields[0 ... scalar(@fields)-2]), "\t", $fields[scalar(@fields)-1], "\n";	
+		}
+	}
+	
+	
+	close IN;
+	close FILEF3;
+	close FILEF4;
+	unlink $output1;
+	
+	#------------------------------------------------------------------------------------------
+	# OLD T3.PL RUDIMENT
+	
+	my $t3output = $output2;
+	$t3output =~ s/gap_op_unrand/nogap_op_unrand/g;
+	
+	open(IN,"<$output2");
+	open(OUTA,">$t3output");
+	
+	
+	while (<IN>){
+		s/-//g unless /^>/;
+		print OUTA;
+	}
+	
+	close IN;
+	close OUTA;
+	#------------------------------------------------------------------------------------------
+}
+#xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx xxxxxxx multi_species_t2 xxxxxxx 
+
+
+#xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxxmulti_species_t2_2 xxxxxxx 
+sub multi_species_t2_2{
+	#print "IN multi_species_t2_2 : @_\n";
+	my $input = $_[0];
+	my $species = $_[1];
+	my $output1 = $input."2";
+	
+	
+	open (FILEF1, "<$input");
+	open (FILEF2, ">$output1");
+	
+	my $line1 = <FILEF1>;
+	
+	while($line1){
+	{
+	#    chomp($line);
+		if ($line1 =~ (m/^\>$species/)){
+		chomp($line1);
+		print FILEF2 $line1;
+		$line1 = <FILEF1>;
+		chomp($line1);
+		print FILEF2 "\t", $line1,"\n";
+	   }   
+	}
+	$line1 = <FILEF1>;
+	}
+	
+	close FILEF1;
+	close FILEF2;
+}
+
+#xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxx multi_species_t2_2 xxxxxxx xxxxxxx multi_species_t2_2 xxxxxxx 
+
+
+#xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx 
+sub sputnikoutput_corrector{
+	my $input = $_[0];
+	my $output = $_[1];
+	open(IN,"<$input") or die "Cannot open file $input :$!";
+	open(OUT,">$output") or die "Cannot open file $output :$!";
+	my $tine;
+	while (my $line=<IN>){
+		if($line =~/length /){
+			$tine = $line;
+			$tine =~ s/\s+/\t/g;
+			my @fields = split(/\t/,$tine);	
+			if ($fields[6] > 60){
+				print OUT $line;
+				$line = <IN>;
+			
+				while (($line !~ /nucleotide/) && ($line !~ /^>/)){
+					chomp $line;
+					print OUT $line;
+					$line = <IN>;
+				}
+				print OUT "\n";
+				print OUT $line;
+			}
+			else{
+				print OUT $line;
+			}
+		}
+		else{
+			print OUT $line;
+		}
+	}
+	close IN;
+	close OUT;
+}
+#xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx xxxxxxx sputnikoutput_corrector xxxxxxx 
+
+
+#xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx 
+sub multi_species_t4{
+#	print "multi_species_t4 : @_\n";
+	my $input = $_[0];
+	my $output = $_[1];
+	open (FILEA, "<$input");
+	open (FILEB, ">$output");
+	
+	my $line = <FILEA>;
+	
+	while ($line) {
+	   # chomp $line;
+		if ($line =~ />/) {
+			chomp $line;
+			print FILEB $line, "\n"; 
+		}
+	
+		
+		if ($line =~ /^m/ | $line =~ /^d/ | $line =~ /^t/ | $line =~ /^p/){
+		chomp $line;
+		print FILEB $line, " " ;
+		$line = <FILEA>;
+		chomp $line;
+		print FILEB $line,"\n";
+		}
+	
+		$line = <FILEA>;
+	}
+	
+	
+	close FILEA;
+	close FILEB;
+
+}
+
+#xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx xxxxxxx multi_species_t4 xxxxxxx 
+
+
+#xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx 
+sub multi_species_t5{
+	
+	my $input = $_[0];
+	my $output = $_[1];
+	
+	open(FILEB,"<$input");
+	open(FILEC,">$output");
+	
+	my $curdef;
+	
+	while (my $line = <FILEB> ) {
+	
+		if ($line =~ /^>/){
+		chomp $line;
+		$curdef = $line;
+		next;
+	}
+	
+	if ($line =~ /^m/ | $line =~ /^d/ | $line =~ /^t/ | $line =~ /^p/){
+		print  FILEC $curdef," ",$line;
+	}
+	
+	}
+	
+	
+	close FILEB;
+	close FILEC;
+
+}
+#xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx xxxxxxx multi_species_t5 xxxxxxx 
+
+
+#xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx 
+sub multi_species_t6{
+	my $input = $_[0];
+	my $output = $_[1];	
+	my $focalstrand=$_[3];
+#	print "inpput = @_\n"; 
+	open (FILE, "<$input");
+	open (FILE_MICRO, ">$output");
+	my $linecounter=0;	
+	while (my $line = <FILE>){
+		$linecounter++;
+		chomp $line;
+		#print "line = $line\n";
+		#MONO#
+		$line =~ /$focalspec\s[a-zA-Z]+[0-9a-zA-Z]+\s[0-9]+\s[0-9]+\s([+\-])/;
+		my $strand=$1;
+		my $no_of_species = ($line =~ s/\s+[+\-]\s+/ /g);
+		#print "line = $line\n";
+		my $specfieldsend = 2 + ($no_of_species*4) - 1;
+		my @fields = split(/\s+/, $line);
+		my @speciesdata = @fields[0 ... $specfieldsend];
+		$line =~ /([a-z]+nucleotide)\s([0-9]+)\s:\s([0-9]+)/;
+		my ($tide, $start, $end) = ($1, $2, $3);
+		#print "no_of_species=$no_of_species.. speciesdata = @speciesdata and ($tide, $start, $end)\n";
+		if($line  =~ /mononucleotide/){
+			print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields],  mono($fields[$#fields]),),"\n";
+		}
+		#DI#	
+		elsif($line =~ /dinucleotide/){
+			print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields],  di($fields[$#fields]),),"\n";
+		}
+		#TRI#	
+		elsif($line =~ /trinucleotide/ ){
+			print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields],  tri($fields[$#fields]),),"\n";
+		}
+		#TETRA#
+		elsif($line =~ /tetranucleotide/){
+			print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields],  tetra($fields[$#fields]),),"\n";
+		}
+		#PENTA#
+		elsif($line =~ /pentanucleotide/){
+			#print FILE_MICRO join("\t",@speciesdata, $tide, $start, $strand,$end, $fields[$#fields],  penta($fields[$#fields]),),"\n";
+		}
+		else{
+		#	print "not: @fields\n";
+		}
+	}
+#	print "linecounter=$linecounter\n"; 
+	close FILE;
+	close FILE_MICRO;
+}
+
+sub mono {
+	my $st = $_[0];
+	my $tp = unpack "A1"x(length($st)/1),$st;
+	my $var1 = substr($tp, 0, 1);
+	return join ("\t", $var1);
+}
+sub di {
+	my $st = $_[0];
+	my $tp = unpack "A2"x(length($st)/2),$st;
+	my $var1 = substr($tp, 0, 2);
+	return join ("\t", $var1);
+}
+sub tri {
+	my $st = $_[0];
+	my $tp = unpack "A3"x(length($st)/3),$st;
+	my $var1 = substr($tp, 0, 3);
+	return join ("\t", $var1);
+}
+sub tetra {
+	my $st = $_[0];
+	my $tp = unpack "A4"x(length($st)/4),$st;
+	my $var1 = substr($tp, 0, 4);
+	return join ("\t", $var1);
+}
+sub penta {
+	my $st = $_[0];
+	my $tp = unpack "A5"x(length($st)/5),$st;
+	my $var1 = substr($tp, 0, 5);
+	return join ("\t", $var1);
+}
+
+#xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx xxxxxxx multi_species_t6 xxxxxxx 
+
+
+#xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx 
+sub t9{
+	my $input1 = $_[0];
+	my $input2 = $_[1];
+	my $output = $_[2];
+	
+	
+	open(IN1,"<$input1") if -e $input1; 
+	open(IN2,"<$input2") or die "cannot open file $_[1] : $!";
+	open(OUT,">$output") or die "cannot open file $_[2] : $!"; 
+	
+	
+	my %seen = ();
+	my $prevkey = 0;
+	
+	if (-e $input1){
+		while (my $line = <IN1>){
+			chomp($line);
+			my @fields = split(/\t/,$line);
+			my $key1 = join ("_K10K1_",@fields[0,1,3,4,5]);
+		#	print "key in t9 = $key1\n";
+			$seen{$key1}++	if ($prevkey ne $key1) ;
+			$prevkey = $key1;
+		}
+#		print "done first hash\n";
+		close IN1;
+	}
+	
+	while (my $line = <IN2>){
+	#	print $line, "**\n";
+		if (-e $input1){
+			chomp($line);
+			my @fields = split(/\t/,$line);
+			my $key2 = join ("_K10K1_",@fields[0,1,3,4,5]);
+			if (exists $seen{$key2}){
+				print OUT "$line\n"	;
+				delete $seen{$key2};
+			}
+		}
+		else {
+			print OUT "$line\n"	;		
+#			print  "$line\n"	;		
+		}
+	}
+	
+	close IN2;
+	close OUT;
+}
+#xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx xxxxxxxxxxxxxx t9 xxxxxxxxxxxxxx 
+
+
+#xxxxxxxxxxxxxx multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx  multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx  multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx 
+
+
+sub multiSpecies_compound_microsat_hunter3{	
+	
+	my $input1 = $_[0];  ###### the *_sput_op4_ii file
+	my $input2 = $_[1];  ###### looks like this: my $t8humanoutput = $pipedir.$ptag."_nogap_op_unrand2"
+	my $output1 = $_[2]; ###### plain microsatellite file
+	my $output2 = $_[3]; ###### compound microsatellite file
+	my $org = $_[4]; ###### 1 or 2
+	$no_of_species = $_[5];
+	#print "IN multiSpecies_compound_microsat_hunter3: @_\n"; 
+	#my @tags = split(/\t/,$info);
+	sub compoundify;
+	open(IN,"<$input1") or die "Cannot open file $input1 $!";
+	open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
+	open(OUT,">$output1") or die "Cannot open file $output1 $!";
+	open(OUT2,">$output2") or die "Cannot open file $output2 $!";
+	$infocord = 2 + (4*$no_of_species) - 1;
+	$startcord = 2 + (4*$no_of_species) + 2 - 1;
+	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
+	$endcord = 2 + (4*$no_of_species) + 4 - 1;
+	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
+	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
+	my $sequencepos = 2 + (5*$no_of_species) + 1 -1 ; 
+	
+	my @thresholds = ("0");
+	push(@thresholds, split(/_/,$_[6]));
+	sub thresholdCheck;
+	my %micros = ();
+	while (my $line = <IN>){
+	#	print "$org\t(chr[0-9]+)\t([0-9]+)\t([0-9])+\t \n";
+		next if $line =~ /\t\t/;
+		if ($line =~ /^>[A-Za-z0-9_]+\s+([0-9]+)\s+([a-zA-Z0-9]+)\s([a-zA-Z]+[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
+			my $key = join("\t",$1, $2, $3, $4, $5);
+		#	print $key, "#-#-#-#-#-#-#-#\n";
+			push (@{$micros{$key}},$line);		
+		}
+		else{
+		}
+	}
+	close IN;
+	my @deletedlines = ();
+	
+	my $linecount = 0;
+	
+	while(my $sine = <SEQ>){
+		my %microstart=();
+		my %microend=();
+	
+		my @sields = split(/\t/,$sine);
+	
+		my $key = ();
+	
+		if ($sine =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([a-zA-Z0-9]+)\s([a-zA-Z]+[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
+			$key = join("\t",$1, $2, $3, $4, $5);
+		#	print $key, "<-<-<-<-<-<-<-<\n";		
+		}
+		else{
+		}
+	
+		if (exists $micros{$key}){
+			$linecount++;	
+			my @microstring = @{$micros{$key}};
+			my @tempmicrostring = @{$micros{$key}};
+			
+			foreach my $line (@tempmicrostring){
+				my @fields = split(/\t/,$line);
+				my $start = $fields[$startcord];
+				my $end = $fields[$endcord];
+				push (@{$microstart{$start}},$line);
+				push (@{$microend{$end}},$line);	
+			}
+			my $firstflag = 'down';
+			while( my $line =shift(@microstring)){
+	#			print "-----------\nline = $line ";
+				chomp $line;
+				my @fields = split(/\t/,$line);
+				my $start = $fields[$startcord];
+				my $end = $fields[$endcord];
+				my $startmicro = $line;
+				my $endmicro = $line;
+	
+			#	print "fields=@fields, start = $start end=$end, startcord=$startcord, endcord=$endcord\n";
+	
+				delete ($microstart{$start});
+				delete ($microend{$end});
+				my $flag = 'down';	
+				my $startflag = 'down';
+				my $endflag = 'down';
+				my $prestart = $start - $distance;
+				my $postend = $end + $distance;
+				my @compoundlines = ();
+				my %compoundhash = ();
+				push (@compoundlines, $line);
+				push (@{$compoundhash{$line}},$line);
+				my $startrank = 1;
+				my $endrank = 1;
+				
+				while( ($startflag eq "down") || ($endflag eq "down") ){
+				if ((($prestart < 0) && $firstflag eq "up") || (($postend > length($sields[$sequencepos])) && $firstflag eq "up") ) {
+#					print "coming to the end of sequence,prestart = $prestart &  post end = $postend and sequence length =", length($sields[$sequencepos])," so exiting\n";
+					last;
+				}
+					
+				$firstflag = "up";
+				if ($startflag eq "down"){		
+					for my $i ($prestart ... $start){
+					
+						if(exists $microend{$i}){	
+							chomp $microend{$i}[0];
+							if(exists $compoundhash{$microend{$i}[0]}) {next;}
+	#						print "sending from microend $startmicro, $microend{$i}[0] |||\n";
+							if (identityMatch_thresholdCheck($startmicro, $microend{$i}[0], $startrank) eq "proceed"){
+								push(@compoundlines, $microend{$i}[0]);
+	#							print "accepted\n";
+								my @tields = split(/\t/,$microend{$i}[0]);
+								$startmicro = $microend{$i}[0];
+								chomp $startmicro;
+								$start = $tields[$startcord];
+								$flag = 'down';
+								$startrank++;
+	#							print "startcompund = $microend{$i}[0]\n";
+								delete $microend{$i};
+								delete $microstart{$start};
+								$startflag = 'down';
+								$prestart = $start - $distance;
+								last;
+							}	
+							else{
+								$flag = 'up';
+								$startflag = 'up';							
+							}
+						}
+						else{
+							$flag = 'up';
+							$startflag = 'up';
+						}
+					}
+				}
+					
+				$endrank = $startrank;
+				
+				if ($endflag eq "down"){				
+					for my $i ($end ... $postend){
+					
+						if(exists $microstart{$i} ){
+							chomp $microstart{$i}[0];
+							if(exists $compoundhash{$microstart{$i}[0]}) {next;}	
+	#						print "sending from microstart $endmicro, $microstart{$i}[0] |||\n";
+	
+							if(identityMatch_thresholdCheck($endmicro,$microstart{$i}[0], $endrank) eq "proceed"){
+								push(@compoundlines, $microstart{$i}[0]);
+	#								print "accepted\n";
+								my @tields = split(/\t/,$microstart{$i}[0]);
+								$end = $tields[$endcord]-0;
+								$endmicro = $microstart{$i}[0];
+								$endrank++;
+								chomp $endmicro;
+								$flag = 'down';
+	#							print "endcompund = $microstart{$i}[0]\n";
+								delete $microstart{$i};
+								delete $microend{$end};
+								shift @microstring;
+								$postend = $end + $distance;
+								$endflag = 'down';
+								last;								
+							}
+							else{
+								$flag = 'up';
+								$endflag = 'up';							
+							}
+						}
+						else{
+							$flag = 'up';
+							$endflag = 'up';
+						}
+					}
+				}
+	#			print "for next turn, flag status: startflag = $startflag and endflag = $endflag \n";
+			} 														#end while( $flag eq "down")
+	#			print "compoundlines = @compoundlines \n";
+			if (scalar (@compoundlines) == 1){
+				print OUT $line,"\n";			
+			}
+			if (scalar (@compoundlines) > 1){
+				my $compoundline = compoundify(\@compoundlines, $sields[$sequencepos]);
+	#				print $compoundline,"\n";
+				print OUT2 $compoundline,"\n";
+			}
+			} #end foreach my $line (@microstring){
+		}	#if (exists $micros{$key}){
+	
+	
+	}
+	
+	close OUT;
+	close OUT2;
+}
+
+
+#------------------------------------------------------------------------------------------------
+sub compoundify{
+	my ($compoundlines, $sequence)  = @_;
+#	print "\nfound to compound : @$compoundlines and$sequence \n";
+	my $noOfComps = @$compoundlines;
+#	print "Number of elements in hash is $noOfComps\n";
+	my @starts;
+	my @ends;
+	foreach my $line (@$compoundlines){
+#		print "compoundify.. line = $line \n";
+		chomp $line;
+		my @fields = split(/\t/,$line);
+		my $start = $fields[$startcord];
+		my $end = $fields[$endcord];
+	#	print "start = $start, end = $end \n";
+		push(@starts, $start);
+		push(@ends,$end);		
+	}
+	my @temp = @$compoundlines;
+	my $startline=$temp[0];
+	my @mields  = split(/\t/,$startline);
+	my $startcoord = $mields[$startcord];
+	my $startgapsign=$mields[$endcord];
+	my @startsorted = sort { $a <=> $b } @starts;
+	my @endsorted = sort { $a <=> $b } @ends;
+	my @intervals;
+	for my $end (0 ... (scalar(@endsorted)-2)){
+		my $interval = substr($sequence,($endsorted[$end]+1),(($startsorted[$end+1])-($endsorted[$end])-1));
+		push(@intervals,$interval);
+	#	print "interval = $interval =\n";
+	#	print "substr(sequence,($endsorted[$end]+1),(($startsorted[$end+1])-($endsorted[$end])-1))\n";
+	}
+	push(@intervals,"");
+	my $compoundmicrosat=();
+	my $multiunit="";
+	foreach my $line (@$compoundlines){
+		my @fields = split(/\t/,$line);
+		my $component="[".$fields[$microsatcord]."]".shift(@intervals);
+		$compoundmicrosat=$compoundmicrosat.$component;
+		$multiunit=$multiunit."[".$fields[$motifcord]."]";
+#		print "multiunit = $multiunit\n";
+	}
+	my $compoundcopy = $compoundmicrosat;
+	$compoundcopy =~ s/\[|\]//g;
+	my $compoundlength = $mields[$startcord] + length($compoundcopy) - 1;
+	
+	
+	my $compoundline = join("\t",(@mields[0 ... $infocord], "compound",@mields[$startcord ... $startcord+1],$compoundlength,$compoundmicrosat, $multiunit));
+	return $compoundline;
+}	
+
+#------------------------------------------------------------------------------------------------
+
+sub identityMatch_thresholdCheck{
+	my $line1 = $_[0];
+	my $line2 = $_[1];
+	my $rank = $_[2];
+	my @lields1 = split(/\t/,$line1);
+	my @lields2 = split(/\t/,$line2);
+#	print "recieved $line1 && $line2\n motif comparison: ", length($lields1[$motifcord])," : ",length($lields2[$motifcord]),"\n";
+	
+	if (length($lields1[$motifcord]) == length($lields2[$motifcord])){
+		my $probe = $lields1[$motifcord].$lields1[$motifcord];
+			#print "$probe :: $lields2[$motifcord]\n";
+		return "proceed" if $probe =~ /$lields2[$motifcord]/;
+			#print "line recieved\n";
+		if ($rank ==1){
+			return "proceed" if thresholdCheck($line1) eq "proceed" && thresholdCheck($line2) eq "proceed";
+		}
+		else {
+			return "proceed" if thresholdCheck($line2) eq "proceed";
+			return "stop";
+		}
+	}
+	else{
+		if ($rank ==1){
+			return "proceed" if thresholdCheck($line1) eq "proceed" && thresholdCheck($line2) eq "proceed";
+		}
+		else {
+			return "proceed" if thresholdCheck($line2) eq "proceed";
+			return "stop";
+		}
+	}
+	return "stop";
+}
+#------------------------------------------------------------------------------------------------
+
+sub thresholdCheck{
+	my @checkthresholds=(0,@thresholds);
+	#print "IN thresholdCheck: @_\n";
+	my $line = $_[0];
+	my @lields = split(/\t/,$line);
+	return "proceed" if length($lields[$microsatcord]) >= $checkthresholds[length($lields[$motifcord])];
+	return "stop";
+}
+#xxxxxxxxxxxxxx multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx  multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx  multiSpecies_compound_microsat_hunter3 xxxxxxxxxxxxxx 
+
+
+#xxxxxxxxxxxxxx multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx  multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx  multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx 
+
+sub multiSpecies_filtering_interrupted_microsats{
+#	print "IN multiSpecies_filtering_interrupted_microsats: @_\n";
+	my $unfiltered = $_[0];
+	my $filtered = $_[1];
+	my $residue = $_[2];
+	my $no_of_species = $_[5];
+	open(UNF,"<$unfiltered") or die "Cannot open file $unfiltered: $!";
+	open(FIL,">$filtered") or die "Cannot open file $filtered: $!";
+	open(RES,">$residue") or die "Cannot open file $residue: $!";
+		
+	$infocord = 2 + (4*$no_of_species) - 1;
+	$startcord = 2 + (4*$no_of_species) + 2 - 1;
+	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
+	$endcord = 2 + (4*$no_of_species) + 4 - 1;
+	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
+	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
+	
+	
+	my @sub_thresholds = (0);
+	
+	push(@sub_thresholds, split(/_/,$_[3]));
+	my @thresholds = (0);
+	
+	push(@thresholds, split(/_/,$_[4]));
+	
+	while (my $line = <UNF>) {
+		next if $line !~ /[a-z]/;
+		#print $line;
+		chomp $line;
+		my @fields = split(/\t/,$line);
+		my $motif = $fields[$motifcord];
+		my $realmotif = $motif;
+		#print "motif = $motif\n";
+		if ($motif =~ /^\[/){
+			$motif =~ s/^\[//g;
+			my @motifs = split(/\]/,$motif);
+			$realmotif = $motifs[0];
+		}
+#		print "realmotif = $realmotif";
+		my $motif_size = length($realmotif);
+		
+		my $microsat = $fields[$microsatcord];
+#		print "microsat = $microsat\n";
+		$microsat =~ s/^\[|\]$//sg;
+		my @microsats = split(/\][a-zA-Z|-]*\[/,$microsat);
+		
+		$microsat = join("",@microsats);
+		if (length($microsat) < $thresholds[$motif_size]) {
+		#	print length($microsat)," < ",$thresholds[$motif_size],"\n"; 
+			print RES $line,"\n"; next;
+		}
+		my @lengths = ();
+		foreach my $mic (@microsats){
+			push(@lengths, length($mic));
+		}
+		if (largest_microsat(@lengths) < $sub_thresholds[$motif_size]) {
+	#		print largest_microsat(@lengths)," < ",$sub_thresholds[$motif_size],"\n"; 
+			print RES $line,"\n"; next;}
+			else {print FIL $line,"\n"; next;
+		}
+	}
+	close FIL;
+	close RES;
+
+}
+
+sub largest_microsat{
+	my $counter = 0;
+	my($max) = shift(@_);
+    foreach my $temp (@_) {
+    	#print "finding largest array: $maxcounter \n";
+    	if($temp > $max){
+        	$max = $temp;
+        }
+    }
+    return($max);
+}
+
+#xxxxxxxxxxxxxx multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx  multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx  multiSpecies_filtering_interrupted_microsats xxxxxxxxxxxxxx 
+
+
+#xxxxxxxxxxxxxx multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx  multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx  multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx 
+sub multiSpecies_compound_microsat_analyzer{
+	####### PARAMETER ########
+	##########################
+	
+	my $input1 = $_[0];  ###### the *_sput_op4_ii file
+	my $input2 = $_[1];  ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
+	my $output1 = $_[2]; ###### interrupted microsatellite file, in new .interrupted format
+	my $output2 = $_[3]; ###### the pure compound microsatellites
+	my $org = $_[4];
+	my $no_of_species = $_[5];
+#	print "IN multiSpecies_compound_microsat_analyzer: $input1\n $input2\n $output1\n $output2\n $org\n $no_of_species\n";
+	$infocord = 2 + (4*$no_of_species) - 1;
+	$typecord = 2 + (4*$no_of_species) + 1 - 1;
+	$startcord = 2 + (4*$no_of_species) + 2 - 1;
+	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
+	$endcord = 2 + (4*$no_of_species) + 4 - 1;
+	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
+	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
+	
+	open(IN,"<$input1") or die "Cannot open file $input1 $!";
+	open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
+	
+	open(OUT,">$output1") or die "Cannot open file $output1 $!";
+	open(OUT2,">$output2") or die "Cannot open file $output2 $!";
+	
+	
+#	print "opened files \n";
+	my %micros = ();
+	my $keycounter=0;
+	my $linecounter=0;
+	while (my $line = <IN>){
+		$linecounter++;
+		if ($line =~ /([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
+			my $key = join("\t",$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
+			push (@{$micros{$key}},$line);		
+			$keycounter++;
+		}
+		else{
+	#		print "no key\n";
+		}
+	}
+	close IN;
+	my @deletedlines = ();
+#	print "done hash . linecounter=$linecounter, keycounter=$keycounter\n";
+	#---------------------------------------------------------------------------------------------------
+	# NOW READING THE SEQUENCE FILE
+	my $keyfound=0;
+	my $keyexists=0;
+	my $inter=0;
+	my $pure=0;
+	
+	while(my $sine = <SEQ>){
+		my %microstart=();
+		my %microend=();
+		my @sields = split(/\t/,$sine);
+		my $key = 0;
+		if ($sine =~ /([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
+			$key = join("\t",$1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
+		#	print $sine;
+		#	print $key;
+			$keyfound++;
+		}
+		else{
+		
+		}
+		#<STDIN> if !defined $key;
+
+		if (exists $micros{$key}){
+			$keyexists++;
+			my @microstring = @{$micros{$key}};
+	
+			my @filteredmicrostring;
+			
+			foreach my $line (@microstring){
+				chomp $line;
+				my $copy_line = $line;
+				my @fields = split(/\t/,$line);
+				my $start = $fields[$startcord];
+				my $end = $fields[$endcord];				
+				# FOR COMPOUND MICROSATELLITES	
+				if ($fields[$typecord] eq "compound"){					
+					$line = compound_microsat_analyser($line);	
+					if ($line eq "NULL") {
+						print OUT2 "$copy_line\n";
+						$pure++;
+						next;
+					}
+					else{
+						print OUT "$line\n";
+						$inter++;
+						next;
+					}
+				}
+			}
+			
+		}	#if (exists $micros{$key}){
+	}
+	close OUT;
+	close OUT2;
+#	print "keyfound=$keyfound, keyexists=$keyexists, pure=$pure, inter=$inter\n";
+}
+	
+sub compound_microsat_analyser{
+	my $line = $_[0];
+	my @fields = split(/\t/,$line);
+	my $motifline = $fields[$motifcord];
+	my $microsat = $fields[$microsatcord];
+	$motifline =~ s/^\[|\]$//g;
+	$microsat =~ s/^\[|\]$//g;
+	$microsat =~ s/-//g;
+	my @interruptions = ();
+	my @motields = split(/\]\[/,$motifline);
+	my @microields = split(/\][a-zA-Z|-]*\[/,$microsat);
+	my @inields = split(/[.*]/,$microsat);
+	shift @inields;
+	my @motifcount = scalar(@motields);
+	my $prevmotif = $motields[0];
+	my $prevmicro = $microields[0];
+	my $prevphase = substr($microields[0],-(length($motields[0])),length($motields[0]));
+	my $localflag = 'down';
+	my @infoarray = ();
+	
+	for my $l (1 ... (scalar(@motields)-1)){
+		my $probe = $prevmotif.$prevmotif;
+		if (length $prevmotif != length $motields[$l]) {$localflag = "up"; last;}
+		
+		if ($probe =~ /$motields[$l]/i){ 
+			my $curr_endphase = substr($microields[$l],-length($motields[$l]),length($motields[$l]));
+			my $curr_startphase = substr($microields[$l],0,length($motields[$l]));
+			if ($curr_startphase =~ /$prevphase/i) {
+				$infoarray[$l-1] = "insertion";
+			}
+			else {
+				$infoarray[$l-1] = "indel/substitution";
+			}
+		
+			$prevmotif = $motields[$l]; $prevmicro = $microields[$l]; $prevphase = $curr_endphase;
+			next;
+		}
+		else {$localflag = "up"; last;}
+	}
+	if ($localflag eq 'up') {return "NULL";}
+	
+	if (length($prevmotif) == 1) {$fields[$typecord] = "mononucleotide";}
+	if (length($prevmotif) == 2) {$fields[$typecord] = "dinucleotide";}
+	if (length($prevmotif) == 3) {$fields[$typecord] = "trinucleotide";}
+	if (length($prevmotif) == 4) {$fields[$typecord] = "tetranucleotide";}
+	if (length($prevmotif) == 5) {$fields[$typecord] = "pentanucleotide";}
+
+	@microields = split(/[\[|\]]/,$microsat);
+	my @microsats = ();
+	my @positions = ();
+	my $lengthtracker = 0;
+
+	for my $i (0 ... (scalar(@microields ) - 1)){
+		if ($i%2 == 0){
+			push(@microsats,$microields[$i]);
+			$lengthtracker = $lengthtracker + length($microields[$i]);
+
+		}
+		else{			
+			push(@interruptions,$microields[$i]);
+			push(@positions, $lengthtracker+1);
+			$lengthtracker = $lengthtracker + length($microields[$i]);	
+		}				
+		
+	}
+	my $returnline = join("\t",(join("\t",@fields),join(",",(@infoarray)),join(",",(@interruptions)),join(",",(@positions)),scalar(@interruptions)));
+	return($returnline);
+}
+
+#xxxxxxxxxxxxxx multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx  multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx  multiSpecies_compound_microsat_analyzer xxxxxxxxxxxxxx 
+
+
+#xxxxxxxxxxxxxx multiSpecies_compoundClarifyer xxxxxxxxxxxxxx  multiSpecies_compoundClarifyer xxxxxxxxxxxxxx  multiSpecies_compoundClarifyer xxxxxxxxxxxxxx 
+
+sub multiSpecies_compoundClarifyer{	
+#	print "IN multiSpecies_compoundClarifyer: @_\n";
+	my $input1 = $_[0];  ###### the *_sput_compound
+	my $input2 = $_[1];  ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
+	my $output1 = $_[2]; ###### interrupted microsatellite file, in new .interrupted format
+	my $output2 = $_[3]; ###### compound file
+	my $org = $_[4];
+	my $no_of_species = $_[5];
+	@thresholds = "0";
+	push(@thresholds, split(/_/,$_[6]));
+	
+	
+	$infocord = 2 + (4*$no_of_species) - 1;
+	$typecord = 2 + (4*$no_of_species) + 1 - 1;
+	$startcord = 2 + (4*$no_of_species) + 2 - 1;
+	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
+	$endcord = 2 + (4*$no_of_species) + 4 - 1;
+	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
+	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
+	$sequencepos = 2 + (5*$no_of_species) + 1 -1 ; 
+	
+	$interr_poscord = $motifcord + 3;
+	$no_of_interruptionscord = $motifcord + 4;
+	$interrcord = $motifcord + 2;
+	$interrtypecord = $motifcord + 1;
+	
+	
+	open(IN,"<$input1") or die "Cannot open file $input1 $!";
+	open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
+	
+	open(INT,">$output1") or die "Cannot open file $output2 $!";
+	open(COMP,">$output2") or die "Cannot open file $output2 $!";
+	#open(CH,">changed") or die "Cannot open file changed $!";
+		
+#	print "opened files \n";
+	my $linecounter = 0;
+	my $microcounter = 0;
+	
+	my %micros = ();
+	while (my $line = <IN>){
+	#	print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
+		$linecounter++;
+		if ($line =~ /($focalspec)\s+([0-9a-zA-Z_\-]+)\s+([0-9]+)\s+([0-9]+)/ ) {
+			my $key = join("\t",$1, $2, $3, $4);
+	#		print $key, "#-#-#-#-#-#-#-#\n";
+	#		print "key = $key\n";
+			push (@{$micros{$key}},$line);	
+			$microcounter++;
+		}
+		else {#print $line," key not made\n"; <STDIN>;
+			}
+	}
+#	print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
+	close IN;
+	my @deletedlines = ();
+#	print "done hash \n";
+	$linecounter = 0;
+	#---------------------------------------------------------------------------------------------------
+	# NOW READING THE SEQUENCE FILE
+	my @microsat_types = qw(_ mononucleotide dinucleotide trinucleotide tetranucleotide);
+	 $printer = 0;
+	
+	while(my $sine = <SEQ>){
+		my %microstart=();
+		my %microend=();
+		my @sields = split(/\t/,$sine);
+		my $key = ();
+		
+#		print "sine = $sine. focalspec = $focalspec \n"; #<STDIN>;
+		
+		if ($sine =~ /($focalspec)\s+([0-9a-zA-Z_\-]+)\s+([0-9]+)\s+([0-9]+)/ ) {
+			
+#			if ($sine =~ /([a-z0-9A-Z]+)\s+([0-9a-zA-Z_]+)\s+([0-9]+)\s+([0-9]+)\s+[\+|\-]\s+([a-z0-9A-Z]+)\s+([0-9a-zA-Z_]+)\s+([0-9]+)\s+([0-9]+)\s+[\+|\-]\s+([a-z0-9A-Z]+)\s+([0-9a-zA-Z_]+)\s+([0-9]+)\s+([0-9]+)\s/ ) {
+			$key = join("\t",$1, $2, $3, $4);
+#			print "key = $key\n";
+		}
+		else{
+#			print "no key in $sine\nfor pattern ([a-z0-9A-Z]+) (chr[0-9a-zA-Z]+) ([0-9]+) ([0-9]+) [\+|\-] (a-z0-9A-Z) (chr[0-9a-zA-Z]+) ([0-9]+) ([0-9]+) [\+|\-] (a-z0-9A-Z) (chr[0-9a-zA-Z]+) ([0-9]+) ([0-9]+)   / \n"; 
+		}
+	
+		if (exists $micros{$key}){
+			my @microstring = @{$micros{$key}};
+			delete $micros{$key};
+			
+			foreach my $line (@microstring){
+#				print "#---------#---------#---------#---------#---------#---------#---------#---------\n" if $printer == 1; 
+#				print "microsat = $line" if $printer == 1; 
+				$linecounter++; 
+				my $copy_line = $line;
+				my @mields = split(/\t/,$line);
+				my @fields = @mields;
+				my $start = $fields[$startcord];
+				my $end = $fields[$endcord];
+				my $microsat = $fields[$microsatcord];
+				my $motifline = $fields[$motifcord];
+				my $microsatcopy = $microsat;
+				my $positioner = $microsat;
+				$positioner =~ s/[a-zA-Z|-]/_/g;
+				$microsatcopy =~ s/^\[|\]$//gs;
+				chomp $microsatcopy;
+				my @microields = split(/\][a-zA-Z|-]*\[/,$microsatcopy);
+				my @inields = split(/\[[a-zA-Z|-]*\]/,$microsat);
+				my $absolutstart = 1; my $absolutend = $absolutstart + ($end-$start);
+#				print "absolut: start = $absolutstart, end = $absolutend\n" if $printer == 1;
+				shift @inields;
+				#print "inields =@inields<\n";
+				$motifline =~ s/^\[|\]$//gs;
+				chomp $motifline;
+				#print "microsat = $microsat, its copy = $microsatcopy motifline = $motifline<\n";
+				my @motields = split(/\]\[/,$motifline);
+				my $seq = $microsatcopy;
+				$seq =~ s/\[|\]//g;
+				my $seqlen = length($seq);
+				$seq = " ".$seq;
+	
+				my $longestmotif_no = longest_array_element(@motields);
+				my $shortestmotif_no = shortest_array_element(@motields);
+				#print "shortest motif = $motields[$shortestmotif_no], longest motif = $motields[$longestmotif_no] \n";
+			
+				my $search = $motields[$longestmotif_no].$motields[$longestmotif_no];
+				if ((length($motields[$longestmotif_no]) == length($motields[$shortestmotif_no])) && ($search !~ /$motields[$shortestmotif_no]/) ){
+					print COMP $line;
+					next;
+				}
+			
+				my @shortestmotif_nos = ();
+				for my $m (0 ... $#motields){
+					push(@shortestmotif_nos, $m) if (length($motields[$m]) == length($motields[$shortestmotif_no]) );
+				}
+				## LOOKING AT LEFT OF THE SHORTEST MOTIF------------------------------------------------
+				my $newleft =();
+				my $leftstopper = 0; my $rightstopper = 0;
+				foreach my $shortmotif_no (@shortestmotif_nos){
+					next if $shortmotif_no == 0;
+					my $last_left =  $shortmotif_no; #$#motields;
+					my $last_hitter = 0;
+					for (my $i =($shortmotif_no-1); $i>=0; $i--){	
+						my $search  = $motields[$shortmotif_no];
+						if (length($motields[$shortmotif_no]) == 1){ $search = $motields[$shortmotif_no].$motields[$shortmotif_no] ;}
+						if( (length($motields[$i]) > length($motields[$shortmotif_no])) && length($microields[$i]) > (2.5 * length($motields[$i])) ){
+							$last_hitter = 1;
+							$last_left = $i+1; last;				
+						}
+						my $probe = $motields[$i];
+						if (length($motields[$shortmotif_no]) == length($motields[$i])) {$probe = $motields[$i].$motields[$i];}
+	
+						if ($probe !~ /$search/){
+							$last_hitter = 1;
+							$last_left = $i+1; 
+	#						print "hit the last match: before $microields[$i]..last left = $last_left.. exiting.\n";
+							last;
+						}
+						$last_left--;$last_hitter = 1;
+	#					print "passed tests, last left = $last_left\n";
+					}
+	#				print "comparing whether $last_left < $shortmotif_no, lasthit = $last_hitter\n";
+					if (($last_left) < $shortmotif_no && $last_hitter == 1) {$leftstopper=0; last;}
+					else {$leftstopper = 1;
+	#					print "leftstopper = 1\n";
+					}
+				}
+				
+				## LOOKING AT LEFT OF THE SHORTEST MOTIF------------------------------------------------
+				my $newright =();
+				foreach my $shortmotif_no (@shortestmotif_nos){
+					next if $shortmotif_no == $#motields;
+					my $last_right =  $shortmotif_no;# -1;
+					for my $i ($shortmotif_no+1 ... $#motields){
+						my $search  = $motields[$shortmotif_no];
+						if (length($motields[$shortmotif_no]) == 1 ){ $search = $motields[$shortmotif_no].$motields[$shortmotif_no] ;}
+						if ( (length($motields[$i]) > length($motields[$shortmotif_no])) && length($microields[$i]) > (2.5 * length($motields[$i])) ){
+							$last_right = $i-1; last;			
+						}
+						my $probe = $motields[$i];
+						if (length($motields[$shortmotif_no]) == length($motields[$i])) {$probe = $motields[$i].$motields[$i];}
+						if (  $probe !~ /$search/){
+							$last_right = $i-1; last;
+						}
+						$last_right++; 
+					}
+					if (($last_right) > $shortmotif_no) {$rightstopper=0; last;# print "rightstopper = 0\n";
+					}
+					else {$rightstopper = 1;
+					}
+				}
+				
+				
+				if ($rightstopper == 1 && $leftstopper == 1){
+					print COMP $line; 
+#					print "rightstopper == 1 && leftstopper == 1\n" if $printer == 1; 
+					next;
+				}
+	
+#				print "pased initial testing phase \n" if $printer == 1; 
+				my @outputs = ();
+				my @orig_starts = ();
+				my @orig_ends = ();
+				for my $mic (0 ... $#microields){
+					my $miclen = length($microields[$mic]);
+					my $microleftlen = 0;
+					#print "\nmic = $mic\n";
+					if($mic > 0){
+						for my $submin (0 ... $mic-1){
+							my $interval = ();
+							if (!exists $inields[$submin]) {$interval = "";}
+							else {$interval = $inields[$submin];}
+							#print "inield =$interval< and microield =$microields[$submin]<\n  ";
+							$microleftlen = $microleftlen + length($microields[$submin]) + length($interval);
+						}
+					}
+					push(@orig_starts,($microleftlen+1));
+					push(@orig_ends, ($microleftlen+1 + $miclen -1));
+				}				
+	
+	#############  F I N A L L Y   S T U D Y I N G   S E Q U E N C E S  #########@@@@#########@@@@#########@@@@#########@@@@#########@@@@
+	
+	
+				for my $mic (0 ... $#microields){
+					my $miclen = length($microields[$mic]);
+					my $microleftlen = 0;
+					if($mic > 0){
+						for my $submin (0 ... $mic-1){
+						#	if(!exists $inields[$submin]) {$inields[$submin] = "";}
+							my $interval = ();
+							if (!exists $inields[$submin]) {$interval = "";}
+							else {$interval = $inields[$submin];}
+							#print "inield =$interval< and microield =$microields[$submin]<\n  ";
+							$microleftlen = $microleftlen + length($microields[$submin]) + length($interval);
+						}
+					}
+					$fields[$startcord] = $microleftlen+1;
+					$fields[$endcord] = $fields[$startcord] + $miclen -1;
+					$fields[$typecord] = $microsat_types[length($motields[$mic])];
+					$fields[$microsatcord] = $microields[$mic];
+					$fields[$motifcord] = $motields[$mic];		
+					my $templine = join("\t", (@fields[0 .. $motifcord]) );
+					my $orig_templine = join("\t", (@fields[0 .. $motifcord]) );
+					my $newline;
+					my $lefter = 1; my $righter = 1;
+					if ( $fields[$startcord] < 2){$lefter = 0;}
+					if ($fields[$endcord] == $seqlen){$righter = 0;}
+	
+					while($lefter == 1){
+						$newline = left_extender($templine, $seq,$org);
+#						print "returned line from left extender= $newline \n" if $printer == 1; 
+						if ($newline eq $templine){$templine = $newline; last;}
+						else {$templine = $newline;}
+						
+						if (left_extention_permission_giver($templine) eq "no") {last;}
+					}
+					while($righter == 1){
+						$newline = right_extender($templine, $seq,$org);
+#						print "returned line from right extender= $newline \n" if $printer == 1; 
+						if ($newline eq $templine){$templine = $newline; last;}
+						else {$templine = $newline;}
+						if (right_extention_permission_giver($templine) eq "no") {last;}
+					}
+					my @tempfields = split(/\t/,$templine);
+					$tempfields[$microsatcord] =~ s/\]|\[//g;
+					$tempfields[$motifcord] =~ s/^\[|\]$//gs;
+					my @tempmotields = split(/\]\[/,$tempfields[$motifcord]);			
+					
+					if (scalar(@tempmotields) == 1 && $templine eq $orig_templine) { 
+#						print "scalar ( tempmotields) = 1\n" if $printer == 1; 
+						next;
+					} 
+					my $prevmotif = shift(@tempmotields);
+					my $stopper = 0;
+					
+					foreach my $tempmot (@tempmotields){
+						if (length($tempmot) != length($prevmotif)) {$stopper = 1; last;}
+						my $search = $prevmotif.$prevmotif;
+						if ($search !~ /$tempmot/) {$stopper = 1; last;}
+						$prevmotif = $tempmot;
+					}
+					if ( $stopper == 1) { 
+#						print "length tempmot  != length prevmotif\n" if $printer == 1; 
+						next; 
+					} 
+					my $lastend  = 0;
+					#----------------------------------------------------------
+					my $left_captured = (); my $right_captured = ();
+					my $left_bp = (); my $right_bp = ();
+	#				print "new startcord = $tempfields[$startcord] , new endcord  = $tempfields[$endcord].. orig strts = @orig_starts and orig ends = @orig_ends\n";
+					for my $o (0 ... $#orig_starts){
+#						print "we are talking abut tempstart:$tempfields[$startcord] >= origstart:$lastend && tempstart:$tempfields[$startcord] <= origend: $orig_ends[$o] \n" if $printer == 1; 
+#						print "we are talking abut tempend:$tempfields[$endcord] >= origstart:$lastend && tempstart:$tempfields[$endcord] >= origend: $orig_ends[$o] \n" if $printer == 1; 
+	
+						if (($tempfields[$startcord] > $lastend)  && ($tempfields[$startcord] <= $orig_ends[$o])){ # && ($tempfields[$startcord] != $fields[$startcord])
+#							print "motif captured on left is $microields[$o] from $microsat\n" if $printer == 1; 
+							$left_captured  = $o;
+							$left_bp =  $orig_ends[$o] - $tempfields[$startcord] + 1;
+						}
+						elsif ($tempfields[$endcord] > $lastend  && $tempfields[$endcord] <= $orig_ends[$o]){ #&& $tempfields[$endcord] != $fields[$endcord])
+#							print "motif captured on right is $microields[$o] from $microsat\n" if $printer == 1; 
+							$right_captured  = $o;
+							$right_bp = $tempfields[$endcord]  - $orig_starts[$o] + 1;
+						}
+						$lastend = $orig_ends[$o]
+					}
+#					print "leftcaptured = $left_captured, right = $right_captured\n" if $printer==1;
+					my $leftmotif = (); my $left_trashed = ();
+					if ($tempfields[$startcord] != $fields[$startcord]) {
+						$leftmotif = $motields[$left_captured]; 
+#						print "$left_captured in @microields: $motields[$left_captured]\n" if $printer == 1; 
+						if ( $left_captured !~ /[0-9]+/) {#print $line,"\n", $templine,"\n"; 
+						}
+						 $left_trashed = length($microields[$left_captured]) - $left_bp;
+					}
+					my $rightmotif = (); my $right_trashed = ();
+					if ($tempfields[$endcord] != $fields[$endcord]) {
+#						print "$right_captured in @microields: $motields[$right_captured]\n" if $printer == 1; 
+						$rightmotif = $motields[$right_captured];
+						$right_trashed = length($microields[$right_captured]) - $right_bp;
+					} 
+					
+					########## P A R A M S #####################@@@@#########@@@@#########@@@@#########@@@@#########@@@@#########@@@@#########@@@@
+					$stopper = 0;
+					my $deletioner = 0;
+					#if($tempfields[$startcord] != $fields[$startcord]){
+#						print "enter left: tempfields,startcord  : $tempfields[$startcord] != $absolutstart && left_captured: $left_captured != 0 \n" if $printer==1;
+						if ($left_captured != 0){ 
+#							print "at line 370, going: 0 ... $left_captured-1 \n" if $printer == 1;
+							for my $e (0 ... $left_captured-1){
+								if( length($motields[$e]) > 2 && length($microields[$e]) > (3* length($motields[$e]) )){
+#									print "motif on left not included too big to be ignored : $microields[$e] \n" if $printer == 1;  
+									$deletioner++; last;
+								}
+								if( length($motields[$e]) == 2 && length($microields[$e]) > (3* length($motields[$e]) )){
+#									print "motif on left not included too big to be ignored : $microields[$e] \n" if $printer == 1;  
+									$deletioner++; last;
+								}
+								if( length($motields[$e]) == 1 && length($microields[$e]) > (4* length($motields[$e]) )){
+#									print "motif on left not included too big to be ignored : $microields[$e] \n" if $printer == 1; 
+									$deletioner++; last;
+								}
+							}				
+						}
+					#}
+#					print "after left search, deletioner = $deletioner\n" if $printer == 1;
+					if ($deletioner >= 1) {  
+#						print "deletioner = $deletioner\n" if $printer == 1;
+						next; 
+					} 
+					
+					$deletioner = 0;
+	
+					#if($tempfields[$endcord] != $fields[$endcord]){				
+#						print "if tempfields endcord: $tempfields[$endcord] != absolutend: $absolutend\n and $right_captured != $#microields\n" if $printer==1;
+						if ($right_captured != $#microields){ 
+#							print "at line 394, going: $right_captured+1 ... $#microields \n" if $printer == 1;
+							for my $e ($right_captured+1 ... $#microields){
+								if( length($motields[$e]) > 2 &&  length($microields[$e]) > (3* length($motields[$e])) ){
+#									print "motif on right not included too big to be ignored : $microields[$e] \n" if $printer == 1; 
+									$deletioner++; last;
+								}
+								if( length($motields[$e]) == 2 && length($microields[$e]) > (3* length($motields[$e]) )){
+#									print "motif on right not included too big to be ignored : $microields[$e] \n" if $printer == 1;
+									$deletioner++; last;
+								}
+								if( length($motields[$e]) == 1 && length($microields[$e]) > (4* length($motields[$e]) )){
+#									print "motif on right not included too big to be ignored : $microields[$e] \n" if $printer == 1;
+									$deletioner++; last;
+								}
+							}				
+						}
+					#}
+#					print "deletioner = $deletioner\n" if $printer == 1;
+					if ($deletioner >= 1) {  
+						next; 
+					} 
+					my $leftMotifs_notCaptured = ();
+					my $rightMotifs_notCaptured = ();
+									
+					if ($tempfields[$startcord] != $fields[$startcord] ){
+						#print "in left params: (length($leftmotif) == 1 && $tempfields[$startcord] != $fields[$startcord]) ... and .... $left_trashed > (1.5* length($leftmotif]) && ($tempfields[$startcord] != $fields[$startcord])\n";
+						if (length($leftmotif) == 1 && $left_trashed > 3){
+#							print "invaded left motif is long mononucleotide" if $printer == 1;
+							 next;
+	
+						}
+						elsif ((length($leftmotif) != 1 && $left_trashed > ( thrashallow($leftmotif)) && ($tempfields[$startcord] != $fields[$startcord]) ) ){
+#							print "invaded left motif too long" if $printer == 1; 						
+							 next; 
+						}
+					}
+					if ($tempfields[$endcord] != $fields[$endcord] ){
+						#print "in right params: after $tempfields[$endcord] != $fields[$endcord]  .....   (length($rightmotif)==1 && $tempfields[$endcord] != $fields[$endcord]) ... and ... $right_trashed > (1.5* length($rightmotif))\n";
+						if (length($rightmotif)==1 && $right_trashed){
+#							print "invaded right motif is long mononucleotide" if $printer == 1; 
+							 next; 
+	
+						}
+						elsif (length($rightmotif) !=1 && ($right_trashed > ( thrashallow($rightmotif))  && $tempfields[$endcord] != $fields[$endcord])){
+#							print "invaded right motif too long" if $printer == 1;
+							 next; 
+	
+						}
+					}
+					push @outputs, $templine;
+				}
+				if (scalar(@outputs) == 0){ print COMP $line; next;}
+	#			print "outputs are:", join("\n",@outputs),"\n";
+				if (scalar(@outputs) == 1){ 
+					my @oields = split(/\t/,$outputs[0]);
+					my $start = $oields[$startcord]+$mields[$startcord]-1;
+					my $end = $start+($oields[$endcord]-$oields[$startcord]);
+					$oields[$startcord] = $start; $oields[$endcord] = $end;
+					print INT join("\t",@oields), "\n"; 
+				#	print CH $line,;
+				}
+				if (scalar(@outputs) > 1){ 
+					my $motif_min = 10;
+					my $chosen_one = $outputs[0];
+					foreach my $micro (@outputs){
+						my @oields = split(/\t/,$micro);
+						my $tempmotif = $oields[$motifcord];
+						$tempmotif =~ s/^\[|\]$//gs;
+						my @omots = split(/\]\[/, $tempmotif);
+			#			print "motif_min  = $motif_min, current motif  = $tempmotif\n";
+						my $start = $oields[$startcord]+$mields[$startcord]-1;
+						my $end = $start+($oields[$endcord]-$oields[$startcord]);
+						$oields[$startcord] = $start; $oields[$endcord] = $end;
+						if(length($omots[0]) < $motif_min) {
+							$chosen_one = join("\t",@oields); 
+							$motif_min = length($omots[0]);
+						}
+					}
+					print INT $chosen_one, "\n";
+				#	print "chosen one is ".$chosen_one, "\n";
+				#	print CH $line;
+				
+				
+				}
+				
+			}
+			
+		}	#if (exists $micros{$key}){
+		else{
+		}
+	}
+	close INT;
+	close COMP;
+}
+sub left_extender{
+	#print "left extender\n";
+	my ($line, $seq, $org) = @_;	
+#	print "in left extender... line passed = $line and sequence is $seq\n";
+	chomp $line;
+	my @fields = split(/\t/,$line);
+	my $rstart = $fields[$startcord];
+	my $microsat = $fields[$microsatcord];
+	$microsat =~ s/\[|\]//g;
+	my $rend = $rstart + length($microsat)-1;
+	$microsat =~ s/-//g;
+	my $motif = $fields[$motifcord];
+	my $firstmotif = ();
+
+	if ($motif =~ /^\[/){
+		$motif =~ s/^\[//g;
+		$motif =~ /([a-zA-Z]+)\].*/;
+		$firstmotif = $1;
+	}
+	else {$firstmotif = $motif;}
+	
+	#print "hacked microsat = $microsat, motif = $motif, firstmotif = $firstmotif\n";
+	my $leftphase = substr($microsat, 0,length($firstmotif));
+	my $phaser = $leftphase.$leftphase;
+	my @phase = split(/\s*/,$leftphase);
+	my @phases;
+	my @copy_phases = @phases;
+	my $crawler=0;
+	for (0 ... (length($leftphase)-1)){
+		push(@phases, substr($phaser, $crawler, length($leftphase)));
+		$crawler++;
+	}
+
+	my $start = $rstart;
+	my $end = $rend;
+	
+	my $leftseq = substr($seq, 0, $start);
+#	print "left phases are @phases , start = $start left sequence = ",substr($leftseq, -10),"\n";	
+	my @extentions = ();
+	my @trappeds = ();
+	my @intervalposs = ();
+	my @trappedposs = ();
+	my @trappedphases = ();
+	my @intervals = ();
+	my $firstmotif_length = length($firstmotif);
+	foreach my $phase (@phases){
+#		print "left phase\t",substr($leftseq, -10),"\t$phase\n";
+#		print "search patter = (($phase)+([a-zA-Z|-]{0,$firstmotif_length})) \n";
+		if ($leftseq =~ /(($phase)+([a-zA-Z|-]{0,$firstmotif_length}))$/i){
+#			print "in left pattern\n";
+			my $trapped = $1;
+			my $trappedpos = length($leftseq)-length($trapped);
+			my $interval = $3;
+			my $intervalpos = index($trapped, $interval) + 1;
+#			print "left trapped = $trapped, interval = $interval, intervalpos = $intervalpos\n";
+
+			my $extention = substr($trapped, 0, length($trapped)-length($interval));
+			my $leftpeep = substr($seq, 0, ($start-length($trapped)));
+			my @passed_overhangs;
+			
+			for my $i (1 ... length($phase)-1){
+				my $overhang = substr($phase, -length($phase)+$i);
+#				print "current overhang = $overhang, leftpeep = ",substr($leftpeep,-10)," whole sequence = ",substr($seq, ($end - ($end-$start) - 20), (($end-$start)+20)),"\n";
+				#TEMPORARY... BETTER METHOD NEEDED
+				$leftpeep =~ s/-//g;
+				if ($leftpeep =~ /$overhang$/i){
+					push(@passed_overhangs,$overhang);
+#					print "l overhang\n";
+				}
+			}
+			
+			if(scalar(@passed_overhangs)>0){
+				my $overhang = $passed_overhangs[longest_array_element(@passed_overhangs)];
+				$extention = $overhang.$extention;
+				$trapped = $overhang.$trapped;
+				#print "trapped extended to $trapped \n";
+				$trappedpos = length($leftseq)-length($trapped);
+			}
+			
+			push(@extentions,$extention);
+#			print "extentions = @extentions \n";
+
+			push(@trappeds,$trapped );
+			push(@intervalposs,length($extention)+1);
+			push(@trappedposs, $trappedpos);
+#			print "trappeds = @trappeds\n";
+			push(@trappedphases, substr($extention,0,length($phase)));
+			push(@intervals, $interval);
+		}
+	}
+	if (scalar(@trappeds == 0)) {return $line;}
+	
+	my $nikaal = shortest_array_element(@intervals);
+	
+	if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
+	$fields[$motifcord] = "[".$trappedphases[$nikaal]."]".$fields[$motifcord];
+	##print "new fields 9 = $fields[9]\n";
+	$fields[$startcord] = $fields[$startcord]-length($trappeds[$nikaal]);
+
+	if($fields[$microsatcord] !~ /^\[/i){
+		$fields[$microsatcord] = "[".$fields[$microsatcord]."]";
+	}
+	
+	$fields[$microsatcord] = "[".$extentions[$nikaal]."]".$intervals[$nikaal].$fields[$microsatcord];
+	
+	if (exists ($fields[$motifcord+1])){
+		$fields[$motifcord+1] = "indel/deletion,".$fields[$motifcord+1];
+	}
+	else{$fields[$motifcord+1] = "indel/deletion";}
+	##print "new fields 14 = $fields[14]\n";
+	
+	if (exists ($fields[$motifcord+2])){		
+		$fields[$motifcord+2] = $intervals[$nikaal].",".$fields[$motifcord+2];
+	}
+	else{$fields[$motifcord+2] =  $intervals[$nikaal];}	
+	my @seventeen=();	
+	if (exists ($fields[$motifcord+3])){			
+		@seventeen = split(/,/,$fields[$motifcord+3]);	
+	#	#print "scalarseventeen =@seventeen<-\n";
+		for (0 ... scalar(@seventeen)-1) {$seventeen[$_] = $seventeen[$_]+length($trappeds[$nikaal]);}
+		$fields[$motifcord+3] = ($intervalposs[$nikaal]).",".join(",",@seventeen);
+		$fields[$motifcord+4] = $fields[$motifcord+4]+1;
+	}
+	
+	else {$fields[$motifcord+3] = $intervalposs[$nikaal]; $fields[$motifcord+4]=1}
+	
+	##print "new fields 16 = $fields[16]\n";
+	##print "new fields 17 = $fields[17]\n";
+	
+	
+	my $returnline =  join("\t",@fields);
+	my $pastline  = $returnline;
+	if ($fields[$microsatcord] =~ /\[/){
+		$returnline = multiSpecies_compoundClarifyer_merge($returnline);
+	}
+	return $returnline;
+}
+sub right_extender{
+	my ($line, $seq, $org) = @_;	
+	chomp $line;
+	my @fields = split(/\t/,$line);
+	my $rstart = $fields[$startcord];
+	my $microsat = $fields[$microsatcord];
+	$microsat =~ s/\[|\]//g;
+	my $rend = $rstart + length($microsat)-1;
+	$microsat =~ s/-//g;
+	my $motif = $fields[$motifcord];
+	my $temp_lastmotif = ();
+
+	if ($motif =~ /\]$/s){
+		$motif =~ s/\]$//sg;
+		$motif =~ /.*\[([a-zA-Z]+)/;
+		$temp_lastmotif = $1;
+	}
+	else {$temp_lastmotif = $motif;}
+	my $lastmotif = substr($microsat,-length($temp_lastmotif));
+	##print "hacked microsat = $microsat, motif = $motif, lastmotif = $lastmotif\n";
+	my $rightphase = substr($microsat, -length($lastmotif));
+	my $phaser = $rightphase.$rightphase;
+	my @phase = split(/\s*/,$rightphase);
+	my @phases;
+	my @copy_phases = @phases;
+	my $crawler=0;
+	for (0 ... (length($rightphase)-1)){
+		push(@phases, substr($phaser, $crawler, length($rightphase)));
+		$crawler++;
+	}
+
+	my $start = $rstart;
+	my $end = $rend;
+	
+	my $rightseq = substr($seq, $end+1);
+	my @extentions = ();
+	my @trappeds = ();
+	my @intervalposs = ();
+	my @trappedposs = ();
+	my @trappedphases = ();
+	my @intervals = ();
+	my $lastmotif_length = length($lastmotif);
+	foreach my $phase (@phases){
+		if ($rightseq =~ /^(([a-zA-Z|-]{0,$lastmotif_length}?)($phase)+)/i){
+			my $trapped = $1;
+			my $trappedpos = $end+1;
+			my $interval = $2;
+			my $intervalpos = index($trapped, $interval) + 1;
+
+			my $extention = substr($trapped, length($interval));
+			my $rightpeep = substr($seq, ($end+length($trapped))+1);
+			my @passed_overhangs = "";
+			
+			#TEMPORARY... BETTER METHOD NEEDED
+			$rightpeep =~ s/-//g;
+
+			for my $i (1 ... length($phase)-1){
+				my $overhang = substr($phase,0, $i);
+#				#print "current extention = $extention, overhang = $overhang, rightpeep = ",substr($rightpeep,0,10),"\n";
+				if ($rightpeep =~ /^$overhang/i){
+					push(@passed_overhangs, $overhang);
+#					#print "r overhang\n";
+				}
+			}
+			if (scalar(@passed_overhangs) > 0){			
+				my $overhang = @passed_overhangs[longest_array_element(@passed_overhangs)];
+				$extention = $extention.$overhang;
+				$trapped = $trapped.$overhang;
+#				#print "trapped extended to $trapped \n";
+			}
+		
+			push(@extentions,$extention);
+			##print "extentions = @extentions \n";
+
+			push(@trappeds,$trapped );
+			push(@intervalposs,$intervalpos);
+			push(@trappedposs, $trappedpos);
+#			#print "trappeds = @trappeds\n";
+			push(@trappedphases, substr($extention,0,length($phase)));
+			push(@intervals, $interval);
+		}
+	}
+	if (scalar(@trappeds == 0)) {return $line;}
+	
+#	my $nikaal = longest_array_element(@trappeds);
+	my $nikaal = shortest_array_element(@intervals);
+	
+#	#print "longest element found = $nikaal \n";
+	
+	if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
+	$fields[$motifcord] = $fields[$motifcord]."[".$trappedphases[$nikaal]."]";
+	##print "new fields 9 = $fields[9]";
+	$fields[$endcord] = $fields[$endcord] + length($trappeds[$nikaal]);
+
+	##print "new fields 11 = $fields[11]\n";
+
+	if($fields[$microsatcord] !~ /^\[/i){
+		$fields[$microsatcord] = "[".$fields[$microsatcord]."]";
+	}
+	
+	$fields[$microsatcord] = $fields[$microsatcord].$intervals[$nikaal]."[".$extentions[$nikaal]."]";
+	##print "new fields 12 = $fields[12]\n";
+	
+	##print "scalar of fields = ",scalar(@fields),"\n";
+	if (exists ($fields[$motifcord+1])){
+#		print " print fields = @fields.. scalar=", scalar(@fields),".. motifcord+1 = $motifcord + 1 \n " if !exists $fields[$motifcord+1];
+#		<STDIN> if !exists $fields[$motifcord+1];
+		$fields[$motifcord+1] = $fields[$motifcord+1].",indel/deletion";
+	}
+	else{$fields[$motifcord+1] = "indel/deletion";}
+	##print "new fields 14 = $fields[14]\n";
+	
+	if (exists ($fields[$motifcord+2])){
+		$fields[$motifcord+2] = $fields[$motifcord+2].",".$intervals[$nikaal];
+	}
+	else{$fields[$motifcord+2] =  $intervals[$nikaal];}	
+	##print "new fields 15 = $fields[15]\n";
+
+	my @seventeen=();
+	if (exists ($fields[$motifcord+3])){
+		##print "at 608 we are doing this:length($microsat)+$intervalposs[$nikaal]\n";
+#		print " print fields = @fields\n " if !exists $fields[$motifcord+3];
+		<STDIN> if !exists $fields[$motifcord+3];
+		my $currpos = length($microsat)+$intervalposs[$nikaal];
+		$fields[$motifcord+3] = $fields[$motifcord+3].",".$currpos;
+		$fields[$motifcord+4] = $fields[$motifcord+4]+1;
+
+	}
+	
+	else {$fields[$motifcord+3] = length($microsat)+$intervalposs[$nikaal]; $fields[$motifcord+4]=1}
+	
+	##print "new fields 16 = $fields[16]\n";
+	
+	##print "new fields 17 = $fields[17]\n";
+	my $returnline = join("\t",@fields);
+	my $pastline  = $returnline;
+	if ($fields[$microsatcord] =~ /\[/){
+		$returnline = multiSpecies_compoundClarifyer_merge($returnline);
+	}
+	#print "finally right-extended line = ",$returnline,"\n";
+	return $returnline;
+}
+sub longest_array_element{
+	my $counter = 0;
+	my($max) = shift(@_);
+	my $maxcounter = 0;
+    foreach my $temp (@_) {
+    	$counter++;
+    	#print "finding largest array: $maxcounter \n" if $prinkter == 1;
+    	if(length($temp) > length($max)){
+        	$max = $temp;
+        	$maxcounter = $counter;
+        }
+    }
+    return($maxcounter);
+}
+sub shortest_array_element{
+	my $counter = 0;
+	my($min) = shift(@_);
+	my $mincounter = 0;
+    foreach my $temp (@_) {
+    	$counter++;
+    	#print "finding largest array: $mincounter \n" if $prinkter == 1;
+    	if(length($temp) < length($min)){
+        	$min = $temp;
+        	$mincounter = $counter;
+        }
+    }
+    return($mincounter);
+}
+
+
+sub left_extention_permission_giver{
+	my @fields = split(/\t/,$_[0]);
+	my $microsat = $fields[$microsatcord];
+	$microsat =~ s/(^\[)|-//g;
+	my $motif = $fields[$motifcord];
+	my $firstmotif = ();
+	my $firststretch = ();
+	my @stretches=();
+	if ($motif =~ /^\[/){
+		$motif =~ s/^\[//g;
+		$motif =~ /([a-zA-Z]+)\].*/;
+		$firstmotif = $1;
+		@stretches = split(/\]/,$microsat);
+		$firststretch = $stretches[0];
+		##print "firststretch = $firststretch\n";
+	}
+	else {$firstmotif = $motif;$firststretch = $microsat;}
+	
+	if (length($firststretch) < $thresholds[length($firstmotif)]){
+		return "no";
+	}
+	else {return "yes";}
+
+}
+sub right_extention_permission_giver{
+	my @fields = split(/\t/,$_[0]);
+	my $microsat = $fields[$microsatcord];
+	$microsat =~ s/-|(\]$)//sg;
+	my $motif = $fields[$motifcord];
+	my $temp_lastmotif = ();
+	my $laststretch = ();
+	my @stretches=();
+
+
+	if ($motif =~ /\]/){
+		$motif =~ s/\]$//gs;
+		$motif =~ /.*\[([a-zA-Z]+)$/;
+		$temp_lastmotif = $1;
+		@stretches = split(/\[/,$microsat);
+		$laststretch = pop(@stretches);
+		##print "last stretch = $laststretch\n";
+	}
+	else {$temp_lastmotif = $motif; $laststretch = $microsat;}
+
+	if (length($laststretch) < $thresholds[length($temp_lastmotif)]){
+		return "no";
+	}
+	else { return "yes";}
+
+
+}
+sub multiSpecies_compoundClarifyer_merge{
+	my $line = $_[0];
+	#print "sent for mering: $line \n";
+	my @mields = split(/\t/,$line);
+	my @fields = @mields;
+	my $microsat = $fields[$microsatcord];
+	my $motifline = $fields[$motifcord];
+	my $microsatcopy = $microsat;
+	$microsatcopy =~ s/^\[|\]$//sg;
+	my @microields = split(/\][a-zA-Z|-]*\[/,$microsatcopy);
+	my @inields = split(/\[[a-zA-Z|-]*\]/,$microsat);
+	shift @inields;
+	#print "inields =@inields<\n";
+	$motifline =~ s/^\[|\]$//sg;
+	my @motields = split(/\]\[/,$motifline);
+	my @firstmotifs = ();
+	my @lastmotifs = ();
+	for my $i  (0 ... $#microields){
+		$firstmotifs[$i] =  substr($microields[$i],0,length($motields[$i]));
+		$lastmotifs[$i] = substr($microields[$i],-length($motields[$i]));
+	}
+	#print "firstmotif = @firstmotifs... lastmotif = @lastmotifs\n";
+	my @mergelist = ();
+	my @inter_poses = split(/,/,$fields[$interr_poscord]);
+	my $no_of_interruptions = $fields[$no_of_interruptionscord];
+	my @interruptions = split(/,/,$fields[$interrcord]);
+	my @interrtypes = split(/,/,$fields[$interrtypecord]);
+	my $stopper = 0;
+	for my $i (0 ... $#motields-1){
+		#print "studying connection of $motields[$i] and $motields[$i+1], i = $i in $microsat\n";
+		if (($lastmotifs[$i] eq $firstmotifs[$i+1]) && !exists $inields[$i]){
+			$stopper = 1;
+			push(@mergelist, ($i)."_".($i+1));
+		}
+	}
+	
+	return $line if scalar(@mergelist) == 0;
+	
+	foreach my $merging (@mergelist){
+		my @sets = split(/_/, $merging);
+		my @tempmicro = ();
+		my @tempmot = ();
+		for my $i (0 ... $sets[0]-1){
+			push(@tempmicro, "[".$microields[$i]."]");
+			push(@tempmicro, $inields[$i]);
+			push(@tempmot, "[".$motields[$i]."]");
+			#print "adding pre-motifs number $i\n";
+		}
+		my $pusher = "[".$microields[$sets[0]].$microields[$sets[1]]."]";
+		push (@tempmicro, $pusher);
+		push(@tempmot, "[".$motields[$sets[0]]."]");
+		my $outcoming = -2;
+		for my $i ($sets[1]+1 ... $#microields-1){
+			push(@tempmicro, "[".$microields[$i]."]");
+			push(@tempmicro, $inields[$i]);	
+			push(@tempmot, "[".$motields[$i]."]");
+			#print "adding post-motifs number $i\n";
+			$outcoming  = $i;
+		}
+		if ($outcoming != -2){
+			#print "outcoming = $outcoming \n";
+			push(@tempmicro, "[".$microields[$outcoming+1 ]."]");
+			push(@tempmot,"[". $motields[$outcoming+1]."]");
+		}
+		$fields[$microsatcord] = join("",@tempmicro);
+		$fields[$motifcord] = join("",@tempmot);
+		
+		splice(@interrtypes, $sets[0], 1);
+		$fields[$interrtypecord] = join(",",@interrtypes);
+		splice(@interruptions, $sets[0], 1);
+		$fields[$interrcord] = join(",",@interruptions);
+		splice(@inter_poses, $sets[0], 1);
+		$fields[$interr_poscord] = join(",",@inter_poses);
+		$no_of_interruptions = $no_of_interruptions - 1;
+	}	
+
+	if ($no_of_interruptions == 0){
+		$fields[$microsatcord] =~ s/^\[|\]$//sg;
+		$fields[$motifcord] =~ s/^\[|\]$//sg;
+		$line = join("\t", @fields[0 ... $motifcord]);		
+	}
+	else{
+		$line = join("\t", @fields);
+	}
+	return $line;
+}
+
+sub thrashallow{
+	my $motif = $_[0];
+	return 4 if length($motif) == 2;
+	return 6 if length($motif) == 3;
+	return 8 if length($motif) == 4;
+	
+}
+
+#xxxxxxxxxxxxxx multiSpecies_compoundClarifyer xxxxxxxxxxxxxx  multiSpecies_compoundClarifyer xxxxxxxxxxxxxx  multiSpecies_compoundClarifyer xxxxxxxxxxxxxx 
+
+
+#xxxxxxxxxxxxxx multispecies_filtering_compound_microsats xxxxxxxxxxxxxx  multispecies_filtering_compound_microsats xxxxxxxxxxxxxx  multispecies_filtering_compound_microsats xxxxxxxxxxxxxx 
+sub multispecies_filtering_compound_microsats{
+	my $unfiltered = $_[0];
+	my $filtered = $_[1];
+	my $residue = $_[2];
+	my $no_of_species = $_[5];
+	open(UNF,"<$unfiltered") or die "Cannot open file $unfiltered: $!";
+	open(FIL,">$filtered") or die "Cannot open file $filtered: $!";
+	open(RES,">$residue") or die "Cannot open file $residue: $!";
+	
+	$infocord = 2 + (4*$no_of_species) - 1;
+	$startcord = 2 + (4*$no_of_species) + 2 - 1;
+	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
+	$endcord = 2 + (4*$no_of_species) + 4 - 1;
+	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
+	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
+	
+	my @sub_thresholds = ("0");
+	push(@sub_thresholds, split(/_/,$_[3]));
+	my @thresholds = ("0");
+	push(@thresholds, split(/_/,$_[4]));
+	
+	while (my $line = <UNF>) {
+		if ($line !~ /compound/){
+			print FIL $line,"\n"; next;
+		}
+		chomp $line;
+		my @fields = split(/\t/,$line);
+		my $motifline = $fields[$motifcord];
+		$motifline =~ s/^\[|\]$//g;
+		my @motifs = split(/\]\[/,$motifline);
+		my $microsat = $fields[$microsatcord];
+		$microsat =~ s/^\[|\]$|-//g;
+		my @microsats = split(/\][a-zA-Z|-]*\[/,$microsat);		
+		
+		my $stopper = 0;
+		for my $i (0 ... $#motifs){
+			my @common = ();
+			my $probe = $motifs[$i].$motifs[$i];
+			my $motif_size = length($motifs[$i]);
+		
+			for my $j (0 ... $#motifs){	
+				next if length($motifs[$i]) != length($motifs[$j]);
+				push(@common, length($microsats[$j])) if $probe =~ /$motifs[$j]/i;
+			}
+			
+			if (largest_microsat(@common) < $sub_thresholds[$motif_size]) {$stopper = 1; last;}
+			else {next;}
+		}
+		
+		if ($stopper  == 1){
+			print RES $line,"\n";
+		}
+		else { print FIL $line,"\n"; }
+	}
+	close FIL;
+	close RES;
+}
+
+#xxxxxxxxxxxxxx multispecies_filtering_compound_microsats xxxxxxxxxxxxxx  multispecies_filtering_compound_microsats xxxxxxxxxxxxxx  multispecies_filtering_compound_microsats xxxxxxxxxxxxxx 
+
+
+#xxxxxxxxxxxxxx chromosome_unrand_breaker xxxxxxxxxxxxxx  chromosome_unrand_breaker xxxxxxxxxxxxxx  chromosome_unrand_breaker xxxxxxxxxxxxxx 
+
+sub chromosome_unrand_breaker{
+#	print "IN chromosome_unrand_breaker: @_\n ";
+	my $input1 = $_[0];  ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
+	my $dir = $_[1]; ###### directory where subsets are put
+	my $output2 = $_[2]; ###### list of subset files
+	my $increment = $_[3];
+	my $info = $_[4];
+	my $chr = $_[5];
+	open(SEQ,"<$input1") or die "Cannot open file $input1 $!";
+	
+	open(OUT,">$output2") or die "Cannot open file $output2 $!";
+	
+	#---------------------------------------------------------------------------------------------------
+	# NOW READING THE SEQUENCE FILE
+	
+	my $seed = 0;
+	my $subset = $dir.$info."_".$chr."_".$seed."_".($seed+$increment);
+	print OUT $subset,"\n";
+	open(SUB,">$subset");
+	
+	while(my $sine = <SEQ>){
+		$seed++;
+		print SUB $sine;
+		
+		if ($seed%$increment == 0 ){
+			close SUB;
+			$subset = $dir.$info."_".$chr."_".$seed."_".($seed+$increment);
+			open(SUB,">$subset");
+			print SUB $sine;
+			print OUT $subset,"\n";
+	#		print $subset,"\n";
+		}
+	}
+	close OUT;
+	close SUB;
+}
+#xxxxxxxxxxxxxx chromosome_unrand_breaker xxxxxxxxxxxxxx  chromosome_unrand_breaker xxxxxxxxxxxxxx  chromosome_unrand_breaker xxxxxxxxxxxxxx 
+
+
+#xxxxxxxxxxxxxx multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx  multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx  multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx 
+sub multiSpecies_interruptedMicrosatHunter{
+#	print "IN multiSpecies_interruptedMicrosatHunter: @_\n";
+	my $input1 = $_[0];  ###### the *_sput_op4_ii file
+	my $input2 = $_[1];  ###### looks like this: my $t8humanoutput = "*_nogap_op_unrand2_match"
+	my $output1 = $_[2]; ###### interrupted microsatellite file, in new .interrupted format
+	my $output2 = $_[3]; ###### uninterrupted microsatellite file
+	my $org = $_[4];
+	my $no_of_species = $_[5];
+	
+	my @thresholds = "0";
+	push(@thresholds, split(/_/,$_[6]));
+	
+#	print "thresholds = @thresholds \n";
+	$infocord = 2 + (4*$no_of_species) - 1;
+	$typecord = 2 + (4*$no_of_species) + 1 - 1;
+	$startcord = 2 + (4*$no_of_species) + 2 - 1;
+	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
+	$endcord = 2 + (4*$no_of_species) + 4 - 1;
+	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
+	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
+	$sequencepos = 2 + (5*$no_of_species) + 1 -1 ; 
+	
+	$interr_poscord = $motifcord + 3;
+	$no_of_interruptionscord = $motifcord + 4;
+	$interrcord = $motifcord + 2;
+	$interrtypecord = $motifcord + 1;
+	
+	
+	$prinkter = 0;
+#	print "prionkytet = $prinkter\n";
+	
+	open(IN,"<$input1") or die "Cannot open file $input1 $!";
+	open(SEQ,"<$input2") or die "Cannot open file $input2 $!";
+	
+	open(INT,">$output1") or die "Cannot open file $output2 $!";
+	open(UNINT,">$output2") or die "Cannot open file $output2 $!";
+	
+#	print "opened files !!\n";
+	my $linecounter = 0;
+	my $microcounter = 0;
+	
+	my %micros = ();
+	while (my $line = <IN>){
+	#	print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
+		$linecounter++;
+		if ($line =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s+([0-9a-zA-Z_]+)\s([0-9]+)\s+([0-9]+)\s/ ) {
+			my $key = join("\t",$1, $2, $3, $4, $5);
+		#	print $key, "#-#-#-#-#-#-#-#\n" if $prinkter == 1;
+			push (@{$micros{$key}},$line);	
+			$microcounter++;
+		}
+		else {#print $line if $prinkter == 1;
+		}
+	}
+#	print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
+	close IN;
+	my @deletedlines = ();
+#	print "done hash \n";
+	$linecounter = 0;
+	#---------------------------------------------------------------------------------------------------
+	# NOW READING THE SEQUENCE FILE
+	while(my $sine = <SEQ>){
+		#print $linecounter,"\n" if $linecounter % 1000 == 0;
+		my %microstart=();
+		my %microend=();
+		my @sields = split(/\t/,$sine);
+		my $key = ();
+		if ($sine =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
+			$key = join("\t",$1, $2, $3, $4, $5);
+	#		print $key, "<-<-<-<-<-<-<-<\n";		
+		}
+	
+	#	$prinkter = 1 if $sine =~ /^>H\t499\t/;
+	
+		if (exists $micros{$key}){
+			my @microstring = @{$micros{$key}};
+			delete $micros{$key};
+			my @filteredmicrostring;
+#			print "sequence = $sields[$sequencepos]" if $prinkter == 1;
+			foreach my $line (@microstring){
+				$linecounter++;
+				my $copy_line = $line;
+				my @fields = split(/\t/,$line);
+				my $start = $fields[$startcord];
+				my $end = $fields[$endcord];
+				
+#				print $line if $prinkter == 1;
+				#LOOKING FOR LEFTWARD EXTENTION OF MICROSATELLITE 
+				my $newline;
+				while(1){
+				#	print "\n before left sequence = $sields[$sequencepos]\n" if $prinkter == 1;
+					if (multiSpecies_interruptedMicrosatHunter_left_extention_permission_giver($line) eq "no") {last;}
+	
+					$newline = multiSpecies_interruptedMicrosatHunter_left_extender($line, $sields[$sequencepos],$org);
+					if ($newline eq $line){$line = $newline; last;}
+					else {$line = $newline;}
+					
+					if (multiSpecies_interruptedMicrosatHunter_left_extention_permission_giver($line) eq "no") {last;}
+#					print "returned line from left extender= $line \n" if $prinkter == 1;
+				}
+				while(1){
+				#	print "sequence = $sields[$sequencepos]\n" if $prinkter == 1;
+					if (multiSpecies_interruptedMicrosatHunter_right_extention_permission_giver($line) eq "no") {last;}
+					
+					$newline = multiSpecies_interruptedMicrosatHunter_right_extender($line, $sields[$sequencepos],$org);
+					if ($newline eq $line){$line = $newline; last;}
+					else {$line = $newline;}
+					
+					if (multiSpecies_interruptedMicrosatHunter_right_extention_permission_giver($line) eq "no") {last;}
+#					print "returned line from right extender= $line \n" if $prinkter == 1;
+				}
+#				print "\n>>>>>>>>>>>>>>>>\n In the end, the line is: \n$line\n<<<<<<<<<<<<<<<<\n" if $prinkter == 1;
+	
+				my @tempfields = split(/\t/,$line);
+				if ($tempfields[$microsatcord] =~ /\[/){
+					print INT $line,"\n";
+				}
+				else{
+					print UNINT $line,"\n";
+				}
+				
+				if ($line =~ /NULL/){ next; }
+				push(@filteredmicrostring, $line);
+				push (@{$microstart{$start}},$line);
+				push (@{$microend{$end}},$line);
+			}
+			
+			my $firstflag = 'down';
+	
+		}	#if (exists $micros{$key}){
+	}
+	close INT;
+	close UNINT;
+#	print "final number of lines = $linecounter\n";
+}
+
+sub multiSpecies_interruptedMicrosatHunter_left_extender{
+	my ($line, $seq, $org) = @_;	
+#	print "left extender, like passed = $line\n" if $prinkter == 1;
+#	print "in left extender... line passed = $line and sequence is $seq\n" if $prinkter == 1;
+	chomp $line;
+	my @fields = split(/\t/,$line);
+	my $rstart = $fields[$startcord];
+	my $microsat = $fields[$microsatcord];
+	$microsat =~ s/\[|\]//g;
+	my $rend = $rstart + length($microsat)-1;
+	$microsat =~ s/-//g;
+	my $motif = $fields[$motifcord];
+	my $firstmotif = ();
+
+	if ($motif =~ /^\[/){
+		$motif =~ s/^\[//g;
+		$motif =~ /([a-zA-Z]+)\].*/;
+		$firstmotif = $1;
+	}
+	else {$firstmotif = $motif;}
+	
+#	print "hacked microsat = $microsat, motif = $motif, firstmotif = $firstmotif\n" if $prinkter == 1;
+	my $leftphase = substr($microsat, 0,length($firstmotif));
+	my $phaser = $leftphase.$leftphase;
+	my @phase = split(/\s*/,$leftphase);
+	my @phases;
+	my @copy_phases = @phases;
+	my $crawler=0;
+	for (0 ... (length($leftphase)-1)){
+		push(@phases, substr($phaser, $crawler, length($leftphase)));
+		$crawler++;
+	}
+
+	my $start = $rstart;
+	my $end = $rend;
+	
+	my $leftseq = substr($seq, 0, $start);
+#	print "left phases are @phases , start = $start left sequence = ",substr($leftseq, -10),"\n" if $prinkter == 1;	
+	my @extentions = ();
+	my @trappeds = ();
+	my @intervalposs = ();
+	my @trappedposs = ();
+	my @trappedphases = ();
+	my @intervals = ();
+	my $firstmotif_length = length($firstmotif);
+	foreach my $phase (@phases){
+#		print "left phase\t",substr($leftseq, -10),"\t$phase\n" if $prinkter == 1;
+#		print "search patter = (($phase)+([a-zA-Z|-]{0,$firstmotif_length})) \n" if $prinkter == 1;
+		if ($leftseq =~ /(($phase)+([a-zA-Z|-]{0,$firstmotif_length}))$/i){
+#			print "in left pattern\n" if $prinkter == 1;
+			my $trapped = $1;
+			my $trappedpos = length($leftseq)-length($trapped);
+			my $interval = $3;
+			my $intervalpos = index($trapped, $interval) + 1;
+#			print "left trapped = $trapped, interval = $interval, intervalpos = $intervalpos\n" if $prinkter == 1;
+
+			my $extention = substr($trapped, 0, length($trapped)-length($interval));
+			my $leftpeep = substr($seq, 0, ($start-length($trapped)));
+			my @passed_overhangs;
+			
+			for my $i (1 ... length($phase)-1){
+				my $overhang = substr($phase, -length($phase)+$i);
+#				print "current overhang = $overhang, leftpeep = ",substr($leftpeep,-10)," whole sequence = ",substr($seq, ($end - ($end-$start) - 20), (($end-$start)+20)),"\n" if $prinkter == 1;
+				#TEMPORARY... BETTER METHOD NEEDED
+				$leftpeep =~ s/-//g;
+				if ($leftpeep =~ /$overhang$/i){
+					push(@passed_overhangs,$overhang);
+#					print "l overhang\n" if $prinkter == 1;
+				}
+			}
+			
+			if(scalar(@passed_overhangs)>0){
+				my $overhang = $passed_overhangs[longest_array_element(@passed_overhangs)];
+				$extention = $overhang.$extention;
+				$trapped = $overhang.$trapped;
+#				print "trapped extended to $trapped \n" if $prinkter == 1;
+				$trappedpos = length($leftseq)-length($trapped);
+			}
+			
+			push(@extentions,$extention);
+#			print "extentions = @extentions \n" if $prinkter == 1;
+
+			push(@trappeds,$trapped );
+			push(@intervalposs,length($extention)+1);
+			push(@trappedposs, $trappedpos);
+#			print "trappeds = @trappeds\n" if $prinkter == 1;
+			push(@trappedphases, substr($extention,0,length($phase)));
+			push(@intervals, $interval);
+		}
+	}
+	if (scalar(@trappeds == 0)) {return $line;}
+	
+############################	my $nikaal = longest_array_element(@trappeds);
+	my $nikaal = shortest_array_element(@intervals);
+	
+#	print "longest element found = $nikaal \n" if $prinkter == 1;
+	
+	if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
+	$fields[$motifcord] = "[".$trappedphases[$nikaal]."]".$fields[$motifcord];
+	#print "new fields 9 = $fields[9]\n" if $prinkter == 1;
+	$fields[$startcord] = $fields[$startcord]-length($trappeds[$nikaal]);
+
+	#print "new fields 9 = $fields[9]\n" if $prinkter == 1;
+
+	if($fields[$microsatcord] !~ /^\[/i){
+		$fields[$microsatcord] = "[".$fields[$microsatcord]."]";
+	}
+	
+	$fields[$microsatcord] = "[".$extentions[$nikaal]."]".$intervals[$nikaal].$fields[$microsatcord];
+	#print "new fields 14 = $fields[12]\n" if $prinkter == 1;
+	
+	#print "scalar of fields = ",scalar(@fields),"\n" if $prinkter == 1;
+
+	
+	if (scalar(@fields) > $motifcord+1){
+		$fields[$motifcord+1] = "indel/deletion,".$fields[$motifcord+1];
+	}
+	else{$fields[$motifcord+1] = "indel/deletion";}
+	#print "new fields 14 = $fields[14]\n" if $prinkter == 1;
+	
+	if (scalar(@fields)>$motifcord+2){		
+		$fields[$motifcord+2] = $intervals[$nikaal].",".$fields[$motifcord+2];
+	}
+	else{$fields[$motifcord+2] =  $intervals[$nikaal];}	
+	#print "new fields 15 = $fields[15]\n" if $prinkter == 1;
+
+	my @seventeen=();
+	
+	if (scalar(@fields)>$motifcord+3){			
+		@seventeen = split(/,/,$fields[$motifcord+3]);	
+	#	print "scalarseventeen =@seventeen<-\n" if $prinkter == 1;
+		for (0 ... scalar(@seventeen)-1) {$seventeen[$_] = $seventeen[$_]+length($trappeds[$nikaal]);}
+		$fields[$motifcord+3] = ($intervalposs[$nikaal]).",".join(",",@seventeen);
+		$fields[$motifcord+4] = $fields[$motifcord+4]+1;
+	}
+	
+	else {$fields[$motifcord+3] = $intervalposs[$nikaal]; $fields[$motifcord+4]=1}
+	
+	#print "new fields 16 = $fields[16]\n" if $prinkter == 1;
+	#print "new fields 17 = $fields[17]\n" if $prinkter == 1;
+	
+#	return join("\t",@fields);	
+	my $returnline = join("\t",@fields);
+	my $pastline  = $returnline;
+	if ($fields[$microsatcord] =~ /\[/){
+		$returnline = multiSpecies_interruptedMicrosatHunter_merge($returnline);
+	}
+#	print "finally left-extended line = ",$returnline,"\n" if $prinkter == 1;
+	return $returnline;
+}
+
+sub multiSpecies_interruptedMicrosatHunter_right_extender{
+#	print "right extender\n" if $prinkter == 1;
+	my ($line, $seq, $org) = @_;	
+#	print "in right extender... line passed = $line\n" if $prinkter == 1;
+#	print "line = $line, sequence = ",$seq, "\n" if $prinkter == 1;
+	chomp $line;
+	my @fields = split(/\t/,$line);
+	my $rstart = $fields[$startcord];
+	my $microsat = $fields[$microsatcord];
+	$microsat =~ s/\[|\]//g;
+	my $rend = $rstart + length($microsat)-1;
+	$microsat =~ s/-//g;
+	my $motif = $fields[$motifcord];
+	my $temp_lastmotif = ();
+
+	if ($motif =~ /\]$/){
+		$motif =~ s/\]$//g;
+		$motif =~ /.*\[([a-zA-Z]+)/;
+		$temp_lastmotif = $1;
+	}
+	else {$temp_lastmotif = $motif;}
+	my $lastmotif = substr($microsat,-length($temp_lastmotif));
+#	print "hacked microsat = $microsat, motif = $motif, lastmotif = $lastmotif\n" if $prinkter == 1;
+	my $rightphase = substr($microsat, -length($lastmotif));
+	my $phaser = $rightphase.$rightphase;
+	my @phase = split(/\s*/,$rightphase);
+	my @phases;
+	my @copy_phases = @phases;
+	my $crawler=0;
+	for (0 ... (length($rightphase)-1)){
+		push(@phases, substr($phaser, $crawler, length($rightphase)));
+		$crawler++;
+	}
+
+	my $start = $rstart;
+	my $end = $rend;
+	
+	my $rightseq = substr($seq, $end+1);
+#	print "length of sequence  = " ,length($seq), "the coordinate to start from = ", $end+1, "\n" if $prinkter == 1;
+#	print "right phases are @phases , end = $end right sequence = ",substr($rightseq,0,10),"\n" if $prinkter == 1;	
+	my @extentions = ();
+	my @trappeds = ();
+	my @intervalposs = ();
+	my @trappedposs = ();
+	my @trappedphases = ();
+	my @intervals = ();
+	my $lastmotif_length = length($lastmotif);
+	foreach my $phase (@phases){
+#		print "right phase\t$phase\t",substr($rightseq,0,10),"\n" if $prinkter == 1;
+#		print "search patter = (([a-zA-Z|-]{0,$lastmotif_length})($phase)+) \n" if $prinkter == 1;
+		if ($rightseq =~ /^(([a-zA-Z|-]{0,$lastmotif_length}?)($phase)+)/i){
+#			print "in right pattern\n" if $prinkter == 1;
+			my $trapped = $1;
+			my $trappedpos = $end+1;
+			my $interval = $2;
+			my $intervalpos = index($trapped, $interval) + 1;
+#			print "trapped = $trapped, interval = $interval\n" if $prinkter == 1;
+
+			my $extention = substr($trapped, length($interval));
+			my $rightpeep = substr($seq, ($end+length($trapped))+1);
+			my @passed_overhangs = "";
+			
+			#TEMPORARY... BETTER METHOD NEEDED
+			$rightpeep =~ s/-//g;
+
+			for my $i (1 ... length($phase)-1){
+				my $overhang = substr($phase,0, $i);
+#				print "current extention = $extention, overhang = $overhang, rightpeep = ",substr($rightpeep,0,10),"\n" if $prinkter == 1;
+				if ($rightpeep =~ /^$overhang/i){
+					push(@passed_overhangs, $overhang);
+#					print "r overhang\n" if $prinkter == 1;
+				}
+			}
+			if (scalar(@passed_overhangs) > 0){			
+				my $overhang = @passed_overhangs[longest_array_element(@passed_overhangs)];
+				$extention = $extention.$overhang;
+				$trapped = $trapped.$overhang;
+#				print "trapped extended to $trapped \n" if $prinkter == 1;
+			}
+		
+			push(@extentions,$extention);
+			#print "extentions = @extentions \n" if $prinkter == 1;
+
+			push(@trappeds,$trapped );
+			push(@intervalposs,$intervalpos);
+			push(@trappedposs, $trappedpos);
+#			print "trappeds = @trappeds\n" if $prinkter == 1;
+			push(@trappedphases, substr($extention,0,length($phase)));
+			push(@intervals, $interval);
+		}
+	}
+	if (scalar(@trappeds == 0)) {return $line;}
+	
+###################################	my $nikaal = longest_array_element(@trappeds);
+	my $nikaal = shortest_array_element(@intervals);
+	
+#	print "longest element found = $nikaal \n" if $prinkter == 1;
+	
+	if ($fields[$motifcord] !~ /\[/i) {$fields[$motifcord] = "[".$fields[$motifcord]."]";}
+	$fields[$motifcord] = $fields[$motifcord]."[".$trappedphases[$nikaal]."]";
+	$fields[$endcord] = $fields[$endcord] + length($trappeds[$nikaal]);
+
+
+	if($fields[$microsatcord] !~ /^\[/i){
+		$fields[$microsatcord] = "[".$fields[$microsatcord]."]";
+	}
+	
+	$fields[$microsatcord] = $fields[$microsatcord].$intervals[$nikaal]."[".$extentions[$nikaal]."]";
+	
+	
+	if (scalar(@fields) > $motifcord+1){
+		$fields[$motifcord+1] = $fields[$motifcord+1].",indel/deletion";
+	}
+	else{$fields[$motifcord+1] = "indel/deletion";}
+	
+	if (scalar(@fields)>$motifcord+2){		
+		$fields[$motifcord+2] = $fields[$motifcord+2].",".$intervals[$nikaal];
+	}
+	else{$fields[$motifcord+2] =  $intervals[$nikaal];}	
+
+	my @seventeen=();
+	if (scalar(@fields)>$motifcord+3){
+		#print "at 608 we are doing this:length($microsat)+$intervalposs[$nikaal]\n" if $prinkter == 1;
+		my $currpos = length($microsat)+$intervalposs[$nikaal];
+		$fields[$motifcord+3] = $fields[$motifcord+3].",".$currpos;
+		$fields[$motifcord+4] = $fields[$motifcord+4]+1;
+
+	}
+	
+	else {$fields[$motifcord+3] = length($microsat)+$intervalposs[$nikaal]; $fields[$motifcord+4]=1}
+	
+#	print "finally right-extended line = ",join("\t",@fields),"\n" if $prinkter == 1;
+#	return join("\t",@fields);
+
+	my $returnline = join("\t",@fields);
+	my $pastline  = $returnline;
+	if ($fields[$microsatcord] =~ /\[/){
+		$returnline = multiSpecies_interruptedMicrosatHunter_merge($returnline);
+	}
+#	print "finally right-extended line = ",$returnline,"\n" if $prinkter == 1;
+	return $returnline;
+
+}
+
+sub multiSpecies_interruptedMicrosatHunter_left_extention_permission_giver{
+	my @fields = split(/\t/,$_[0]);
+	my $microsat = $fields[$microsatcord];
+	$microsat =~ s/(^\[)|-//sg;
+	my $motif = $fields[$motifcord];
+	chomp $motif;
+#	print $motif, "\n" if $motif !~ /^\[/;
+	my $firstmotif = ();
+	my $firststretch = ();
+	my @stretches=();
+	
+#	print "motif = $motif, microsat = $microsat\n" if $prinkter == 1;
+	if ($motif =~ /^\[/){
+		$motif =~ s/^\[//sg;
+		$motif =~ /([a-zA-Z]+)\].*/;
+		$firstmotif = $1;
+		@stretches = split(/\]/,$microsat);
+		$firststretch = $stretches[0];
+		#print "firststretch = $firststretch\n" if $prinkter == 1;
+	}
+	else {$firstmotif = $motif;$firststretch = $microsat;}
+#	print "if length:firststretch - length($firststretch) < threshes length :firstmotif ($firstmotif) - $thresholds[length($firstmotif)]\n" if $prinkter == 1; 
+	if (length($firststretch) < $thresholds[length($firstmotif)]){
+		return "no";
+	}
+	else {return "yes";}
+
+}
+sub multiSpecies_interruptedMicrosatHunter_right_extention_permission_giver{
+	my @fields = split(/\t/,$_[0]);
+	my $microsat = $fields[$microsatcord];
+	$microsat =~ s/-|(\]$)//sg;
+	my $motif = $fields[$motifcord];
+	chomp $motif;
+	my $temp_lastmotif = ();
+	my $laststretch = ();
+	my @stretches=();
+
+
+	if ($motif =~ /\]/){
+		$motif =~ s/\]$//sg;
+		$motif =~ /.*\[([a-zA-Z]+)$/;
+		$temp_lastmotif = $1;
+		@stretches = split(/\[/,$microsat);
+		$laststretch = pop(@stretches);
+		#print "last stretch = $laststretch\n" if $prinkter == 1;
+	}
+	else {$temp_lastmotif = $motif; $laststretch = $microsat;}
+
+	if (length($laststretch) < $thresholds[length($temp_lastmotif)]){
+		return "no";
+	}
+	else { return "yes";}
+
+
+}
+sub checking_substitutions{
+	
+	my ($line, $seq, $startprobes, $endprobes) = @_;
+	#print "sequence = $seq \n" if $prinkter == 1;
+	#print "COMMAND  = \n $line, \n $seq, \n $startprobes \n, $endprobes\n";
+		#		<STDIN>;
+	my @seqarray = split(/\s*/,$seq);
+	my @startsubst_probes = split(/\|/,$startprobes); 
+	my @endsubst_probes = split(/\|/,$endprobes);
+	chomp $line;
+	my @fields = split(/\t/,$line);
+	my $start = $fields[11] - $fields[10];
+	my $end = $fields[13] - $fields[10];
+	my $motif = $fields[9]; #IN FUTURE, USE THIS AS A PROBE, LIKE MOTIF = $FIELDS[9].$FIELDS[9]
+	$motif =~ s/\[|\]//g;
+	my $microsat = $fields[14];
+	$microsat =~ s/\[|\]//g;
+	#------------------------------------------------------------------------
+	# GETTING START AND END PHASES
+	my $startphase = substr($microsat,0, length($motif));
+	my $endphase = substr($microsat,-length($motif), length($motif));
+	#print "start and end phases are  - $startphase and $endphase\n";
+	my $startflag = 'down';
+	my $endflag = 'down';
+	my $substitution_distance = length($motif);
+	my $prestart = $start - $substitution_distance;
+	my $postend = $end + $substitution_distance;
+	my @endadds = ();
+	my @startadds = ();
+		if (($prestart < 0) || ($postend > scalar(@seqarray))) {
+			last;
+		}
+	#------------------------------------------------------------------------#------------------------------------------------------------------------
+	# CHECKING FOR SUBSTITUTION PROBES NOW			
+		
+	if ($fields[8] ne "mononucleotide"){				
+		while ($startflag eq "down"){		
+			my $search = join("",@seqarray[$prestart...($start-1)]);
+			#print "search is from $prestart...($start-1) = $search\n";
+			foreach my $probe (@startsubst_probes){
+				#print "\t\tprobe = $probe\n";				
+				if ($search =~ /^$probe/){
+					#print "\tfound addition to the left - $search \n";
+					my $copyprobe = $probe;
+					my $type;
+					my $subspos = 0;
+					my $interruption = "";
+					if ($search eq $startphase) { $type = "NONE";}
+					else{
+						$copyprobe =~ s/\[a-zA-Z\]/^/g;
+						$subspos = index($copyprobe,"^") + 1;
+						$type = "substitution";
+						$interruption  = substr($search, $subspos,1);
+					}
+					my $addinfo = join("\t",$prestart, $start, $search, $type, $interruption, $subspos);
+					#print "adding information: $addinfo \n";
+					push(@startadds, $addinfo);
+					$prestart = $prestart - $substitution_distance;
+					$start = $start-$substitution_distance;
+					$startflag = 'down';
+					
+					last;
+				}
+				else{
+					$startflag = 'up';
+				}
+			}
+		}
+		#<STDIN>;		
+		while ($endflag eq "down"){									
+			my $search = join("",@seqarray[($end+1)...$postend]);
+			#print "search is from ($end+1)...$postend] = $search\n";
+
+			foreach my $probe (@endsubst_probes){
+				#print "\t\tprobe = $probe\n";
+				if ($search =~ /$probe$/){
+					my $copyprobe = $probe;
+					my $type;
+					my $subspos = 0;
+					my $interruption = "";
+					if ($search eq $endphase) { $type = "NONE";}
+					else{
+						$copyprobe =~ s/\[a-zA-Z\]/^/g;
+						$subspos = index($copyprobe,"^") + 1;
+						$type = "substitution";
+						$interruption  = substr($search, $subspos,1);
+					}
+					my $addinfo = join("\t",$end, $postend, $search, $type, $interruption, $subspos);
+					#print "adding information: $addinfo \n";
+					push(@endadds, $addinfo);
+					$postend = $postend + $substitution_distance;
+					$end = $end+$substitution_distance;
+					push(@endadds, $search);
+					$endflag = 'down';
+					last;
+				}
+				else{
+					$endflag = 'up';
+				}
+			}
+		}
+		#print "startadds = @startadds, endadds  = @endadds \n";
+	
+	}
+}
+sub microsat_packer{
+	my $microsat = $_[0];
+	my $addition = $_[1];
+
+
+
+}
+sub multiSpecies_interruptedMicrosatHunter_merge{
+		$prinkter = 0;
+#	print "~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~|||~~~~~~~~\n";
+	my $line = $_[0];
+#	print "sent for mering: $line \n" if $prinkter ==1;
+	my @mields = split(/\t/,$line);
+	my @fields = @mields;
+	my $microsat = allCaps($fields[$microsatcord]);
+	my $motifline = allCaps($fields[$motifcord]);
+	my $microsatcopy = $microsat;
+#	print "microsat = $microsat|\n" if $prinkter ==1;
+	$microsatcopy =~ s/^\[|\]$//sg;
+	chomp $microsatcopy;
+	my @microields = split(/\][a-zA-Z|-]*\[/,$microsatcopy);
+	my @inields = split(/\[[a-zA-Z|-]*\]/,$microsat);
+	shift @inields;
+#	print "inields =",join("|",@inields)," microields = ",join("|",@microields)," and count of microields = ", $#microields,"\n" if $prinkter ==1;
+	$motifline =~ s/^\[|\]$//sg;
+	my @motields = split(/\]\[/,$motifline);
+	my @firstmotifs = ();
+	my @lastmotifs = ();
+	for my $i  (0 ... $#microields){
+		$firstmotifs[$i] =  substr($microields[$i],0,length($motields[$i]));
+		$lastmotifs[$i] = substr($microields[$i],-length($motields[$i]));
+	}
+#	print "firstmotif = @firstmotifs... lastmotif = @lastmotifs\n" if $prinkter ==1;
+	my @mergelist = ();
+	my @inter_poses = split(/,/,$fields[$interr_poscord]);
+	my $no_of_interruptions = $fields[$no_of_interruptionscord];
+	my @interruptions = split(/,/,$fields[$interrcord]);
+	my @interrtypes = split(/,/,$fields[$interrtypecord]);
+	my $stopper = 0;
+	for my $i (0 ... $#motields-1){
+#		print "studying connection of $motields[$i] and $motields[$i+1], i = $i in $microsat\n:$lastmotifs[$i] eq $firstmotifs[$i+1]?\n" if $prinkter ==1;
+		if ((allCaps($lastmotifs[$i]) eq allCaps($firstmotifs[$i+1])) && (!exists $inields[$i] || $inields[$i] !~ /[a-zA-Z]/)){
+			$stopper = 1;
+			push(@mergelist, ($i)."_".($i+1)); #<STDIN> if $prinkter ==1;
+		}
+	}
+	
+#	print "mergelist = @mergelist\n" if $prinkter ==1;
+	return $line if scalar(@mergelist) == 0;
+#	print "merging @mergelist\n" if $prinkter ==1;
+#	<STDIN> if $prinkter ==1;
+	
+	foreach my $merging (@mergelist){
+		my @sets = split(/_/, $merging);
+#		print "sets = @sets\n" if $prinkter ==1;
+		my @tempmicro = ();
+		my @tempmot = ();
+#		print "for loop going from 0 ... ", $sets[0]-1, "\n" if $prinkter ==1;
+		for my $i (0 ... $sets[0]-1){
+#			print " adding pre- i = $i adding: microields= $microields[$i]. motields = $motields[$i], inields = |$inields[$i]|\n" if $prinkter ==1;
+			push(@tempmicro, "[".$microields[$i]."]");
+			push(@tempmicro, $inields[$i]);
+			push(@tempmot, "[".$motields[$i]."]");
+#			print "adding pre-motifs number $i\n" if $prinkter ==1;
+#			print "tempmot = @tempmot, tempmicro = @tempmicro \n" if $prinkter ==1;
+		}
+#		print "tempmot = @tempmot, tempmicro = @tempmicro \n" if $prinkter ==1;
+#		print "now pushing ", "[",$microields[$sets[0]]," and ",$microields[$sets[1]],"]\n" if $prinkter ==1;
+		my $pusher = "[".$microields[$sets[0]].$microields[$sets[1]]."]";
+#		print "middle is, from @motields -   @sets, number 0 which is  is\n";
+#		print ": $motields[$sets[0]]\n";
+		push (@tempmicro, $pusher);
+		push(@tempmot, "[".$motields[$sets[0]]."]");
+		push (@tempmicro, $inields[$sets[1]]) if $sets[1] != $#microields && exists $sets[1] && exists $inields[$sets[1]];
+		my $outcoming = -2;
+#		print "tempmot = @tempmot, tempmicro = @tempmicro \n" if $prinkter ==1;
+#		print "for loop going from ",$sets[1]+1, " ... ", $#microields, "\n" if $prinkter ==1;
+		for my $i ($sets[1]+1 ... $#microields){
+#			print " adding post- i = $i adding: microields= $microields[$i]. motields = $motields[$i]\n" if $prinkter ==1;
+			push(@tempmicro, "[".$microields[$i]."]") if exists $microields[$i];
+			push(@tempmicro, $inields[$i]) unless $i == $#microields || !exists $inields[$i];
+			push(@tempmot, "[".$motields[$i]."]");
+#			print "adding post-motifs number $i\n" if $prinkter ==1;
+			$outcoming  = $i;
+		}
+#		print "____________________________________________________________________________\n";
+		$prinkter = 0;
+		$fields[$microsatcord] = join("",@tempmicro);
+		$fields[$motifcord] = join("",@tempmot);
+#		print "tempmot = @tempmot, tempmicro = @tempmicro . microsat = $fields[$microsatcord] and motif = $fields[$motifcord] \n" if $prinkter ==1;
+		
+		splice(@interrtypes, $sets[0], 1);
+		$fields[$interrtypecord] = join(",",@interrtypes);
+		splice(@interruptions, $sets[0], 1);
+		$fields[$interrcord] = join(",",@interruptions);
+		splice(@inter_poses, $sets[0], 1);
+		$fields[$interr_poscord] = join(",",@inter_poses);
+		$no_of_interruptions = $no_of_interruptions - 1;
+	}	
+
+	if ($no_of_interruptions == 0 && $line !~ /compound/){
+		$fields[$microsatcord] =~ s/^\[|\]$//sg;
+		$fields[$motifcord] =~ s/^\[|\]$//sg;
+		$line = join("\t", @fields[0 ... $motifcord]);		
+	}
+	else{
+		$line = join("\t", @fields);
+	}
+#	print "post merging, the line is $line\n" if $prinkter ==1;
+	#<STDIN> if $stopper ==1;
+	return $line;
+}
+sub interval_asseser{
+	my $pre_phase = $_[0]; my $post_phase = $_[1]; my $inter = $_[3];
+}
+#---------------------------------------------------------------------------------------------------
+sub allCaps{
+	my $motif = $_[0];
+	$motif =~ s/a/A/g;
+	$motif =~ s/c/C/g;
+	$motif =~ s/t/T/g;
+	$motif =~ s/g/G/g;
+	return $motif;
+}
+
+
+#xxxxxxxxxxxxxx multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx  chromosome_unrand_breamultiSpecies_interruptedMicrosatHunterker xxxxxxxxxxxxxx  multiSpecies_interruptedMicrosatHunter xxxxxxxxxxxxxx 
+
+
+#xxxxxxxxxxxxxx merge_interruptedMicrosats xxxxxxxxxxxxxx  merge_interruptedMicrosats xxxxxxxxxxxxxx  merge_interruptedMicrosats xxxxxxxxxxxxxx 
+sub merge_interruptedMicrosats{
+#	print "IN merge_interruptedMicrosats: @_\n";
+	my $input0 = $_[0];  ######looks like this: my $t8humanoutput = $pipedir.$ptag."_nogap_op_unrand2"
+	my $input1 = $_[1];  ###### the *_sput_op4_ii file
+	my $input2 = $_[2];  ###### the *_sput_op4_ii file
+	$no_of_species = $_[3];
+	
+	my $output1 = $_[1]."_separate";    #$_[3]; ###### plain microsatellite file forward
+	my $output2 = $_[2]."_separate";    ##$_[4]; ###### plain microsatellite file reverse
+	my $output3 = $_[1]."_merged";    ##$_[5]; ###### plain microsatellite file forward
+	#my $output4 = $_[2]."_merged";    ##$_[6]; ###### plain microsatellite file reverse
+	#my $info = $_[4];
+	#my @tags = split(/\t/,$info);
+	
+	open(SEQ,"<$input0") or die "Cannot open file $input0 $!";
+	open(INF,"<$input1") or die "Cannot open file $input1 $!";
+	open(INR,"<$input2") or die "Cannot open file $input2 $!";
+	open(OUTF,">$output1") or die "Cannot open file $output1 $!";
+	open(OUTR,">$output2") or die "Cannot open file $output2 $!";
+	open(MER,">$output3") or die "Cannot open file $output3 $!";
+	#open(MERR,">$output4") or die "Cannot open file $output4 $!";
+		
+	
+		
+	 $printer = 0;
+	
+#	print "files opened \n";
+	$infocord = 2 + (4*$no_of_species) - 1;
+	$startcord = 2 + (4*$no_of_species) + 2 - 1;
+	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
+	$endcord = 2 + (4*$no_of_species) + 4 - 1;
+	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
+	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
+	$typecord = $infocord + 1;
+	my $sequencepos = 2 + (5*$no_of_species) + 1 -1 ; 
+	
+	$interrtypecord = $motifcord + 1;
+	$interrcord = $motifcord + 2;
+	$interr_poscord = $motifcord + 3;
+	$no_of_interruptionscord = $motifcord + 4;
+	$mergestarts  = $no_of_interruptionscord+ 1;
+	$mergeends = $no_of_interruptionscord+ 2;
+	$mergemicros = $no_of_interruptionscord+ 3;
+	
+	# NOW ADDING FORWARD MICROSATELLITES TO HASH
+	my %fmicros = ();
+	my $microcounter=0;
+	my $linecounter = 0;
+	while (my $line = <INF>){
+	#	print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
+		$linecounter++;
+		if ($line =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
+			my $key = join("\t",$1, $2, $4, $5);
+		#	print $key, "#-#-#-#-#-#-#-#\n";
+			push (@{$fmicros{$key}},$line);	
+			$microcounter++;
+		}
+		else {
+			#print $line;
+		}
+	}
+#	print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
+	close INF;
+	my @deletedlines = ();
+#	print "done forward hash \n";
+	$linecounter = 0;
+	#---------------------------------------------------------------------------------------------------
+	# NOW ADDING REVERSE MICROSATELLITES TO HASH
+	my %rmicros = ();
+	$microcounter=0;
+	while (my $line = <INR>){
+	#	print "$org\t(chr[0-9a-zA-Z]+)\t([0-9]+)\t([0-9])+\t \n";
+		$linecounter++;
+		if ($line =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
+			my $key = join("\t",$1, $2, $4, $5);
+	#		print $key, "#-#-#-#-#-#-#-#\n";
+			push (@{$rmicros{$key}},$line);	
+			$microcounter++;
+		}
+		else {
+			#print "cant make key\n";
+		}
+	}
+#	print "number of reverse microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
+	close INR;
+#	print "done reverse hash \n";
+	$linecounter = 0;
+	
+	#------------------------------------------------------------------------------------------------
+	
+	while(my $sine = <SEQ>){
+		#<STDIN> if $sine =~ /16349128/;
+		next if $sine !~ /[a-zA-Z0-9]/;
+#		print "-" x 150, "\n"  if $printer == 1;
+		my @sields = split(/\t/,$sine);
+		my @merged = ();
+	
+		my $key = ();
+	
+		if ($sine =~ /^>[A-Za-z0-9]+\s+([0-9]+)\s+([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
+			$key = join("\t",$1, $2, $4, $5);
+		#	print $key, "<-<-<-<-<-<-<-<\n";		
+		}
+	#	print "key = $key\n";
+		
+		my @sets1;
+		my @sets2;
+		chomp $sields[$sequencepos];
+		my $rev_sequence = reverse($sields[$sequencepos]); 
+		$rev_sequence =~ s/ //g;
+		$rev_sequence = " ".$rev_sequence;
+		next if (!exists $fmicros{$key} && !exists $rmicros{$key});
+		
+		if (exists $fmicros{$key}){
+		#	print "line no : $linecount\n";
+			my @raw_microstring = @{$fmicros{$key}};
+			my %starts = (); my %ends = ();
+#			print colored ['yellow'],"unsorted, unfiltered microats = \n" if $printer == 1; foreach (@raw_microstring) {print colored ['blue'],$_,"\n" if $printer == 1;}
+			my @microstring=();
+			for my $u (0 ... $#raw_microstring){
+				my @tields = split(/\t/,$raw_microstring[$u]);
+				next if exists $starts{$tields[$startcord]} && exists $ends{$tields[$endcord]};
+				push(@microstring, $raw_microstring[$u]);
+				$starts{$tields[$startcord]} = $tields[$startcord];
+				$ends{$tields[$endcord]} = $tields[$endcord];
+			}
+			
+	#		print "founf microstring in forward\n: @microstring\n"; 
+			chomp @microstring;
+			my $clusterresult = (find_clusters(@microstring, $sields[$sequencepos])); 	
+			@sets1 = split("\=", $clusterresult);
+			my @temp = split(/_X0X_/,$sets1[0]) ; $microscanned+= scalar(@temp);
+		#	print "sets = ", join("<all\nmerged>", @sets1), "\n<<-sets1\n"; <STDIN>;
+		}	#if (exists $micros{$key}){
+	
+		if (exists $rmicros{$key}){
+		#	print "line no : $linecount\n";
+			my @raw_microstring = @{$rmicros{$key}};
+			my %starts = (); my %ends = ();
+#			print colored ['yellow'],"unsorted, unfiltered microats = \n" if $printer == 1; foreach (@raw_microstring) {print colored ['blue'],$_,"\n" if $printer == 1;}
+			my @microstring=();
+			for my $u (0 ... $#raw_microstring){
+				my @tields = split(/\t/,$raw_microstring[$u]);
+				next if exists $starts{$tields[$startcord]} && exists $ends{$tields[$endcord]};
+				push(@microstring, $raw_microstring[$u]);
+				$starts{$tields[$startcord]} = $tields[$startcord];
+				$ends{$tields[$endcord]} = $tields[$endcord];
+			}
+	#		print "founf microstring in reverse\n: @microstring\n"; <STDIN>;
+			chomp @microstring;
+	#		print "sending reversed sequence\n";
+			my $clusterresult = (find_clusters(@microstring, $rev_sequence ) ); 	
+			@sets2 = split("\=", $clusterresult);
+			my @temp = split(/_X0X_/,$sets2[0]) ; $microscanned+= scalar(@temp);
+		}	#if (exists $micros{$key}){
+	
+		my @popout1 = ();
+		my @popout2 = ();
+		my @forwardset = ();
+		if (exists $sets2[1] ){
+			if(exists $sets1[0]) {
+				push (@popout1, $sets1[0],$sets2[1]);
+				my @forwardset = split("=", popOuter(@popout1, $rev_sequence ));#		
+				print OUTF join("\n",split("_X0X_", $forwardset[0])), "\n";
+				my @localmerged = split("_X0X_", $forwardset[1]);
+				my $sequence = $sields[$sequencepos];
+				$sequence =~ s/ //g;
+#				print "\nforwardset = @forwardset\n";
+				for my $j (0 ... $#localmerged){
+					$localmerged[$j] = 	invert_justCoordinates ($localmerged[$j], length($sequence));
+				}
+				
+				push (@merged, @localmerged);
+				
+			}
+			else{
+				my @localmerged = split("_X0X_", $sets2[1]);
+				my $sequence = $sields[$sequencepos];
+				$sequence =~ s/ //g;
+				for my $j (0 ... $#localmerged){
+#					print "\nlocalmerged = @localmerged\n";
+					$localmerged[$j] = 	invert_justCoordinates ($localmerged[$j], length($sequence));
+				}
+				
+				push (@merged, @localmerged);
+			}
+		}
+		elsif (exists $sets1[0]){
+			print OUTF join("\n",split("_X0X_", $sets1[0])), "\n";
+		}
+		
+		my @reverseset= ();
+		if (exists $sets1[1]){
+			if (exists $sets2[0]){
+				push (@popout2, $sets2[0],$sets1[1]);
+			#	print "popout2 = @popout2\n";
+				my @reverseset = split("=", popOuter(@popout2, $sields[$sequencepos]));
+				#print "reverseset = $reverseset[1] < --- reverseset1\n"; 
+				print OUTR join("\n",split("_X0X_", $reverseset[0])), "\n";
+				push(@merged,  (split("_X0X_", $reverseset[1])));
+			}
+			else{
+				push(@merged,  (split("_X0X_", $sets1[1])));
+			}
+		}
+		elsif (exists $sets2[0]){
+			print OUTR join("\n",split("_X0X_", $sets2[0])), "\n";
+		
+		}
+		
+		if (scalar @merged > 0){
+			my @filtered_merged = split("__",(filterDuplicates_merged(@merged)));
+			print MER join("\n", @filtered_merged),"\n"; 
+		}	
+	#		<STDIN> if $sine =~ /16349128/;
+	
+	}
+	close(SEQ);
+	close(INF);
+	close(INR);
+	close(OUTF);
+	close(OUTR);
+	close(MER);
+
+}
+sub find_clusters{
+	my @input = @_;
+	my $sequence = pop(@input);
+	$sequence =~ s/ //g;
+	my @microstring0 = @input;
+#	print "IN: find_clusters:\n";
+	my %microstart=();
+	my %microend=();
+	my @nonmerged = ();
+	my @mergedSet = ();
+#		print "set of microsats = @microstring \n";
+	my @microstring = map { $_->[0] } sort custom map { [$_, split /\t/ ] } @microstring0; 
+#	print "microstring = ", join("\n",@microstring0) ," \n---->\n", join("\n", @microstring),"\n ,,+." if $printer == 1; 
+	#<STDIN> if $printer == 1; 
+	my @tempmicrostring = @microstring;
+	foreach my $line (@tempmicrostring){
+		my @fields = split(/\t/,$line);
+		my $start = $fields[$startcord];
+		my $end = $fields[$endcord];
+		next if $start !~ /[0-9]+/ || $end !~ /[0-9]+/;
+	#		print " starts >>> start: $start = $fields[11] - $fields[10] || $end = $fields[13] - $fields[10]\n";
+		push (@{$microstart{$start}},$line);
+		push (@{$microend{$end}},$line);	
+	}
+	my $firstflag = 'down';
+	while( my $line =shift(@microstring)){
+#		print "-----------\nline = $line \n" if $printer == 1;
+		chomp $line;
+		my @fields = split(/\t/,$line);
+		my $start = $fields[$startcord];
+		my $end = $fields[$endcord];
+		next if $start !~ /[0-9]+/ || $end !~ /[0-9]+/ || $distance !~ /[0-9]+/ ;
+		my $startmicro = $line;
+		my $endmicro = $line;
+#		print "start: $start = $fields[11] - $fields[10] || $end = $fields[13] - $fields[10]\n";
+
+		delete ($microstart{$start});
+		delete ($microend{$end});
+		my $flag = 'down';	
+		my $startflag = 'down';
+		my $endflag = 'down';
+		my $prestart = $start - $distance;
+		my $postend = $end + $distance;
+		my @compoundlines = ();
+		my %compoundhash = ();
+		push (@compoundlines, $line);
+		push (@{$compoundhash{$line}},$line);
+		my $startrank = 1;
+		my $endrank = 1;
+
+		while( ($startflag eq "down") || ($endflag eq "down") ){
+#			print "prestart=$prestart, post end =$postend.. seqlen =", length($sequence)," firstflag = $firstflag \n" if $printer == 1;
+			if ( (($prestart < 0) && $firstflag eq "up") || (($postend > length($sequence) && $firstflag eq "up")) ){
+#				print "coming to the end of sequence,post end = $postend and sequence length =", length($sequence)," so exiting\n" if $printer == 1;
+				last;
+			}
+			
+			$firstflag = "up";
+			if ($startflag eq "down"){		
+				for my $i ($prestart ... $end){
+					if(exists $microend{$i}){	
+						chomp $microend{$i}[0];
+						if(exists $compoundhash{$microend{$i}[0]}) {next;}
+							chomp $microend{$i}[0];
+							push(@compoundlines, $microend{$i}[0]);
+							my @tields = split(/\t/,$microend{$i}[0]);
+							$startmicro = $microend{$i}[0];
+							chomp $startmicro;
+							$flag = 'down';
+							$startrank++;
+#							print "deleting $microend{$i}[0] and $microstart{$tields[$startcord]}[0]\n" if $printer == 1;
+							delete $microend{$i};
+							delete $microstart{$tields[$startcord]};
+							$end = $tields[$endcord];
+							$startflag = 'down';
+							$prestart = $tields[$startcord] - $distance;
+							last;
+					}
+					else{
+						$flag = 'up';
+						$startflag = 'up';
+					}
+				}
+			}
+	
+			if ($endflag eq "down"){	
+
+				for my $i ($start ... $postend){
+#					print "$start ----> $i -----> $postend\n" if $printer == 1;
+					if(exists $microstart{$i} ){
+						chomp $microstart{$i}[0];
+						if(exists $compoundhash{$microstart{$i}[0]}) {next;}	
+							chomp $microstart{$i}[0];
+							push(@compoundlines, $microstart{$i}[0]);
+							my @tields = split(/\t/,$microstart{$i}[0]);
+							$endmicro = $microstart{$i}[0];
+							$endrank++;
+							chomp $endmicro;
+							$flag = 'down';
+#							print "deleting $microend{$tields[$endcord]}[0]\n" if $printer == 1;
+							
+							delete $microstart{$i} if exists $microstart{$i} ;
+							delete $microend{$tields[$endcord]} if exists $microend{$tields[$endcord]};
+#							print "done\n" if $printer == 1;
+							
+							shift @microstring;
+							$end = $tields[$endcord];
+							$postend = $tields[$endcord] + $distance;
+							$endflag = 'down';
+							last;								
+					}
+					else{
+						$flag = 'up';
+						$endflag = 'up';
+					}
+#					print "out of the if\n" if $printer == 1;
+				}
+#				print "out of the for\n" if $printer == 1;
+
+			}
+#			print "for next turn, flag status: startflag = $startflag and endflag = $endflag \n";
+		} 														#end while( $flag eq "down")
+#			print "compoundlines = @compoundlines \n" if $printer == 1;
+
+		if (scalar (@compoundlines) == 1){
+			push(@nonmerged, $line);		
+
+		}
+		if (scalar (@compoundlines) > 1){
+#			print "FROM CLUSTERER\n"  if $printer == 1;
+			push(@mergedSet,merge_microsats(@compoundlines, $sequence) );
+		}
+	} #end foreach my $line (@microstring){
+#	print join("\n",@mergedSet),"<-----mergedSet\n"  if $printer == 1;
+#<STDIN> if scalar(@mergedSet) > 0;
+#	print "EXIT: find_clusters\n";
+return (join("_X0X_",@nonmerged). "=".join("_X0X_",@mergedSet));
+}
+
+sub custom {
+	$a->[$startcord+1] <=> $b->[$startcord+1];
+}
+
+sub popOuter {
+#	print "\nIN: popOuter @_\n"; <STDIN>;
+	my @all = split ("_X0X_",$_[0]);
+#	<STDIN> if !defined $_[0];
+	my @merged = split ("_X0X_",$_[1]);
+	my $sequence = $_[2];
+	my $seqlen = length($sequence);
+	my %microstart=();
+	my %microend=();
+	my @mergedSet = ();
+	my @nonmerged = ();
+	
+	foreach my $line (@all){
+		my @fields = split(/\t/,$line);
+		my $start = $seqlen - $fields[$startcord]+ 1;
+		my $end = $seqlen - $fields[$endcord] + 1;
+		push (@{$microstart{$start}},$line);
+		push (@{$microend{$end}},$line);	
+	}
+	my $firstflag = 'down';
+	my %forPopouting = ();
+
+	while( my $line =shift(@merged)){
+	#	print "\n MErgedline: $line .. startcord = $startcord  ... endcord = $endcord\n" ;
+		chomp $line;
+		my @fields = split(/\t/,$line);
+		my $start = $fields[$startcord];
+		my $end = $fields[$endcord];
+		my $startmicro = $line;
+		my $endmicro = $line;
+		
+		
+		delete ($microstart{$start});
+		delete ($microend{$end});
+		my $flag = 'down';	
+		my $startflag = 'down';
+		my $endflag = 'down';
+		my $prestart = $start - $distance;
+		my $postend = $end + $distance;
+		my @compoundlines = ();
+		my %compoundhash = ();
+		push (@compoundlines, $line);
+		my $startrank = 1;
+		my $endrank = 1;
+		
+	#	print "\nstart = $start, end = $end\n";
+	#	<STDIN>;
+		for my $i ($start ... $end){
+			if(exists $microend{$i}){	
+		#		print "\nmicrosat exists: $microend{$i}[0] microsat exists\n"; 
+				chomp $microend{$i}[0];
+				my @fields = split(/\t/,$microend{$i}[0]);
+				delete $microstart{$seqlen - $fields[$startcord] + 1};
+				my $invertseq = $sequence;
+				$invertseq =~ s/ //g;
+				push(@compoundlines, invert_microsat($microend{$i}[0] , $invertseq ));
+				delete $microend{$i};
+				
+			}
+
+			if(exists $microstart{$i} ){
+		#		print "\nmicrosat exists: $microstart{$i}[0] microsat exists\n"; 
+
+				chomp $microstart{$i}[0];
+				my @fields = split(/\t/,$microstart{$i}[0]);
+				delete $microend{$seqlen - $fields[$endcord] + 1};
+				my $invertseq = $sequence;
+				$invertseq =~ s/ //g;
+				push(@compoundlines, invert_microsat($microstart{$i}[0], $invertseq) );						
+				delete $microstart{$i};
+			}
+		}			
+	
+		if (scalar (@compoundlines) == 1){
+			push(@mergedSet,join("\t",@compoundlines) );
+		}
+		else {
+#			print "FROM POPOUTER\n" if $printer == 1;
+			push(@mergedSet, merge_microsats(@compoundlines, $sequence) );
+		}
+	} 
+	
+	foreach my $key (sort keys %microstart) {
+    	push(@nonmerged,$microstart{$key}[0]);
+	}
+	
+	return (join("_X0X_",@nonmerged). "=".join("_X0X_",@mergedSet) );
+}
+
+
+
+sub invert_justCoordinates{
+	my $microsat = $_[0];
+#	print "IN invert_justCoordinates ... @_\n" ; <STDIN>;
+	chomp $microsat;
+	my $seqLength = $_[1];
+	my @fields = split(/\t/,$microsat);
+	my $start = $seqLength - $fields[$endcord] + 1;
+	my $end = $seqLength - $fields[$startcord] + 1;
+	$fields[$startcord] = $start;
+	$fields[$endcord] = $end;
+	$fields[$microsatcord] = reverse_micro($fields[$microsatcord]);
+#	print "RETURNIG: ", join("\t",@fields), "\n" if $printer == 1;
+	return join("\t",@fields); 	
+}
+
+sub largest_number{
+	my $counter = 0;
+	my($max) = shift(@_);
+    foreach my $temp (@_) {
+    	#print "finding largest array: $maxcounter \n";
+    	if($temp > $max){
+        	$max = $temp;
+        }
+    }
+    return($max);
+}
+sub smallest_number{
+	my $counter = 0;
+	my($min) = shift(@_);
+    foreach my $temp (@_) {
+    	#print "finding largest array: $maxcounter \n";
+    	if($temp < $min){
+        	$min = $temp;
+        }
+    }
+    return($min);
+}
+
+
+sub filterDuplicates_merged{
+	my @merged = @_;
+	my %revmerged = ();
+	my @fmerged = ();
+	foreach my $micro (@merged) {
+		my @fields = split(/\t/,$micro);
+		if ($fields[3] =~ /chr[A-Z0-9a-z]+r/){
+			my $key = join("_K0K_",$fields[1], $fields[$startcord], $fields[$endcord]);
+	#		print "adding  ... \n$key\n$micro\n";
+			push(@{$revmerged{$key}}, $micro);
+		}
+		else{
+		#	print "pushing.. $micro\n";
+			push(@fmerged, $micro);
+		}
+	}
+#	print "\n";
+	foreach my $micro (@fmerged) {
+			my @fields = split(/\t/,$micro);
+			my $key = join("_K0K_",$fields[1], $fields[$startcord], $fields[$endcord]);
+	#		print "searching for key $key\n";
+			if (exists $revmerged{$key}){
+		#		print "deleting $revmerged{$key}[0]\n";
+				delete $revmerged{$key};
+			}
+	}
+	foreach my $key (sort keys %revmerged) {
+    	push(@fmerged,$revmerged{$key}[0]);
+	}
+#	print "returning ", join("\n", @fmerged),"\n" ;
+	return join("__", @fmerged);
+}
+
+sub invert_microsat{
+	my $micro = $_[0];
+	chomp $micro;
+	if ($micro =~ /chr[A-Z0-9a-z]+r/) { $micro =~  s/chr([0-9a-b]+)r/chr$1/g ;}
+	else {  $micro =~  s/chr([0-9a-b]+)/chr$1r/g ; }
+	my $sequence = $_[1];
+	$sequence =~ s/ //g;
+	my $seqlen = length($sequence);
+	my @fields = split(/\t/,$micro);
+	my $start = $seqlen - $fields[$endcord] +1;
+	my $end = $seqlen - $fields[$startcord] +1;
+	$fields[$startcord]  = $start;
+	$fields[$endcord] = $end;
+	$fields[$motifcord] = reverse_micro($fields[$motifcord]);
+	$fields[$microsatcord] = reverse_micro($fields[$microsatcord]);
+	if ($fields[$typecord] ne "compound" && exists $fields[$no_of_interruptionscord] ){
+		my @intertypes = split(/,/,$fields[$interrtypecord]);
+		my @inters = split(/,/,$fields[$interrcord]);
+		my @interposes = split(/,/,$fields[$interr_poscord]);
+		$fields[$interrtypecord] = join(",",reverse(@intertypes));
+		$fields[$no_of_interruptionscord] = scalar(@interposes);
+		for my $i (0 ... $fields[$no_of_interruptionscord]-1){
+			if (exists $inters[$i] && $inters[$i] =~ /[a-zA-Z]/){
+				$inters[$i] = reverse($inters[$i]);
+				$interposes[$i] = $interposes[$i] + length($inters[$i]) - 1;
+			}	
+			else{
+				$inters[$i] = "";
+				$interposes[$i] = $interposes[$i] - 1;
+			}
+			$interposes[$i] = ($end - $start + 1) - $interposes[$i] + 1;
+		}
+		
+		$fields[$interrcord] = join(",",reverse(@inters));
+		$fields[$interr_poscord] = join(",",reverse(@interposes));
+	}
+	
+	my $finalmicrosat = join("\t", @fields);
+	return $finalmicrosat;			
+
+}
+sub reverse_micro{
+	my $micro = reverse($_[0]);
+	my @strand = split(/\s*/,$micro);
+	for my $i (0 ... $#strand){
+		if ($strand[$i] =~ /\[/i) {$strand[$i] = "]";next;}
+		if ($strand[$i] =~ /\]/i) {$strand[$i] = "[";next;}
+	}
+	return join("",@strand);
+}
+
+#xxxxxxxxxxxxxx merge_interruptedMicrosats xxxxxxxxxxxxxx  merge_interruptedMicrosats xxxxxxxxxxxxxx  merge_interruptedMicrosats xxxxxxxxxxxxxx 
+
+
+#xxxxxxxxxxxxxx forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx  forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx  forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx 
+
+sub forward_reverse_sputoutput_comparer	{
+#	print "IN forward_reverse_sputoutput_comparer: @_\n";
+	my $input0 = $_[0];  ###### the *nogap_unrand_match file
+	my $input1 = $_[1];  ###### the real file, *sput* data
+	my $input2 = $_[2];  ###### the reverse file, *sput* data
+	my $output1 = $_[3]; ###### microsats different in real file
+	my $output2 = $_[4]; ###### microsats missing in real file
+	my $output3 = $_[5]; ###### microsats common among real and reverse file
+	my $no_of_species = $_[6];
+	
+	$infocord = 2 + (4*$no_of_species) - 1;
+	$typecord = 2 + (4*$no_of_species) + 1 - 1;
+	$startcord = 2 + (4*$no_of_species) + 2 - 1;
+	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
+	$endcord = 2 + (4*$no_of_species) + 4 - 1;
+	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
+	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
+	$sequencepos = 2 + (5*$no_of_species) + 1 -1 ; 
+	$interrtypecord = $motifcord + 1;
+	$interrcord = $motifcord + 2;
+	$interr_poscord = $motifcord + 3;
+	$no_of_interruptionscord = $motifcord + 4;
+	$mergestarts  = $no_of_interruptionscord+ 1;
+	$mergeends = $no_of_interruptionscord+ 2;
+	$mergemicros = $no_of_interruptionscord+ 3;
+	
+	
+	open(SEQ,"<$input0") or die "Cannot open file $input0 $!";
+	open(INF,"<$input1") or die "Cannot open file $input1 $!";
+	open(INR,"<$input2") or die "Cannot open file $input2 $!";
+	
+	open(DIFF,">$output1") or die "Cannot open file $output1 $!";
+	#open(MISS,">$output2") or die "Cannot open file $output2 $!";
+	open(SAME,">$output3") or die "Cannot open file $output3 $!";
+		
+		
+#	print "opened files \n";
+	my $linecounter = 0;
+	my $fcounter = 0;
+	my $rcounter = 0;
+	
+	$printer = 0;
+	#---------------------------------------------------------------------------------------------------
+	# NOW ADDING FORWARD MICROSATELLITES TO HASH
+	my %fmicros = ();
+	my $microcounter=0;
+	while (my $line = <INF>){
+		$linecounter++;
+		if ($line =~ /([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
+			my $key = join("\t",$1, $3, $4, $5, $7, $8, $9, $11, $12);
+		#	print $key, "#-#-#-#-#-#-#-#\n";
+			push (@{$fmicros{$key}},$line);	
+			$microcounter++;
+		}
+		else {
+		#print $line;
+		}
+	}
+#	print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
+	close INF;
+	my @deletedlines = ();
+#	print "done forward hash \n";
+	$linecounter = 0;
+	#---------------------------------------------------------------------------------------------------
+	# NOW ADDING REVERSE MICROSATELLITES TO HASH
+	my %rmicros = ();
+	$microcounter=0;
+	while (my $line = <INR>){
+		$linecounter++;
+		if ($line =~ /([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
+			my $key = join("\t",$1, $3, $4, $5, $7, $8, $9, $11, $12);
+		#	print $key, "#-#-#-#-#-#-#-#\n";
+			push (@{$rmicros{$key}},$line);	
+			$microcounter++;
+		}
+		else {}
+	}
+#	print "number of microsatellites added to hash = $microcounter\nnumber of lines scanned = $linecounter\n";
+	close INR;
+#	print "done reverse hash \n";
+	$linecounter = 0;
+	#---------------------------------------------------------------------------------------------------
+	#---------------------------------------------------------------------------------------------------
+	# NOW READING THE SEQUENCE FILE
+	while(my $sine = <SEQ>){
+		my %microstart=();
+		my %microend=();
+		my @sields = split(/\t/,$sine);
+		my $key = ();
+		if ($sine =~ /([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s[\+|\-]\s([0-9a-zA-Z]+)\s([0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)\s/ ) {
+			$key = join("\t",$1, $3, $4, $5, $7, $8, $9, $11, $12);
+		}
+		else {
+			next;
+		}
+		$printer = 0;
+		my $sequence = $sields[$sequencepos];
+		chomp $sequence;
+		$sequence =~ s/ //g;
+		my @localfs = ();
+		my @localrs = ();
+		
+		if (exists $fmicros{$key}){
+			@localfs = @{$fmicros{$key}};
+			delete $fmicros{$key};		
+		}
+	
+		my %forwardstarts = ();
+		my %forwardends = ();
+	
+		foreach my $f (@localfs){
+			my @fields = split(/\t/,$f);
+			push (@{$forwardstarts{$fields[$startcord]}},$f);
+			push (@{$forwardends{$fields[$endcord]}},$fields[$startcord]);
+		}
+	
+		if (exists $rmicros{$key}){
+			@localrs = @{$rmicros{$key}};
+			delete $rmicros{$key};		
+		}
+		else{
+		}
+	
+		foreach my $r (@localrs){
+			chomp $r;
+			my @rields = split(/\t/,$r);
+#			print "rields = @rields\n" if $printer == 1;
+			my $reciprocalstart = length($sequence) - $rields[$endcord] + 1;
+			my $reciprocalend = length($sequence) - $rields[$startcord] + 1;
+#			print "reciprocal start = $reciprocalstart = ",length($sequence)," - $rields[$endcord] + 1\n" if $printer == 1;
+			my $microsat = reverse_micro(all_caps($rields[$microsatcord]));
+			my @localcollection=();
+			for my $i ($reciprocalstart+1 ... $reciprocalend-1){
+				if (exists $forwardstarts{$i}){
+					push(@localcollection, $forwardstarts{$i}[0] );
+					delete $forwardstarts{$i}; 
+				}
+				if (exists $forwardends{$i}){
+					next if !exists $forwardstarts{$forwardends{$i}[0]};
+					push(@localcollection, $forwardstarts{$forwardends{$i}[0]}[0] );
+				}
+			}
+			if (exists $forwardstarts{$reciprocalstart} && exists $forwardends{$reciprocalend}) {push(@localcollection,$forwardstarts{$reciprocalstart}[0]);}
+			
+			if (scalar(@localcollection) == 0){
+				print SAME invert_microsat($r,($sequence) ), "\n";
+			}
+	
+			elsif (scalar(@localcollection) == 1){
+#				print "f microsat = $localcollection[0]\n"  if $printer == 1;
+				my @lields = split(/\t/,$localcollection[0]);
+				$lields[$microsatcord]=all_caps($lields[$microsatcord]);
+#				print "comparing: $microsat and $lields[$microsatcord]\n" if $printer == 1;
+#				print "coordinates are: $lields[$startcord]-$lields[$endcord] and $reciprocalstart-$reciprocalend\n" if $printer == 1;
+				if ($microsat eq $lields[$microsatcord]){
+					chomp $localcollection[0];
+					print SAME $localcollection[0], "\n";
+				}
+				if ($microsat ne $lields[$microsatcord]){
+					chomp $localcollection[0];
+					my $newmicro = microsatChooser(join("\t",@lields), join("\t",@rields), $sequence);
+#					print "newmicro = $newmicro\n"  if $printer == 1;
+					if ($newmicro =~ /[a-zA-Z]/){
+						print SAME $newmicro,"\n";
+					}
+					else{
+					print DIFF join("\t",$localcollection[0],"-->",$rields[$typecord],$reciprocalstart,$reciprocalend, $rields[$microsatcord], reverse_micro($rields[$motifcord]), @rields[$motifcord+1 ... $#rields] ),"\n";
+#					print join("\t",$localcollection[0],"-->",$rields[$typecord],$reciprocalstart,$reciprocalend, $rields[$microsatcord], reverse_micro($rields[$motifcord]), @rields[$motifcord+1 ... $#rields] ),"\n" if $printer == 1;
+#					print "@rields\n@lields\n" if $printer == 1;
+					}
+				}			
+			}
+			else{
+#				print "multiple found for $r --> ", join("\t",@localcollection),"\n" if $printer == 1;
+			}
+		}
+	}	
+		
+	close(SEQ);
+	close(INF);
+	close(INR);
+	close(DIFF);
+	close(SAME);
+	
+}
+sub all_caps{
+	my @strand = split(/\s*/,$_[0]);
+	for my $i (0 ... $#strand){
+		if ($strand[$i] =~ /c/) {$strand[$i] = "C";next;}
+		if ($strand[$i] =~ /a/) {$strand[$i] = "A";next;}
+		if ($strand[$i] =~ /t/) { $strand[$i] = "T";next;}
+		if ($strand[$i] =~ /g/) {$strand[$i] = "G";next;}
+	}
+	return join("",@strand);
+}
+sub microsatChooser{
+	my $forward = $_[0];
+	my $reverse = $_[1];
+	my $sequence = $_[2];
+	my $seqLength = length($sequence);
+	$sequence =~ s/ //g;
+	my @fields = split(/\t/,$forward);
+	my @rields = split(/\t/,$reverse);	
+	my $r_start = $seqLength - $rields[$endcord] + 1;
+	my $r_end = $seqLength - $rields[$startcord] + 1;
+	
+	
+	my $f_microsat = $fields[$microsatcord];
+	my $r_microsat = $rields[$microsatcord];
+	
+	if ($fields[$typecord] =~ /\./ && $rields[$typecord] =~ /\./) {
+		return $forward if length($f_microsat) >= length($r_microsat);
+		return invert_microsat($reverse, $sequence) if length($f_microsat) < length($r_microsat);
+	}
+	return $forward if all_caps($fields[$motifcord]) eq all_caps($rields[$motifcord]) && $fields[$startcord] == $rields[$startcord] && $fields[$endcord] == $rields[$endcord];	
+
+	my $f_microsat_copy = $f_microsat;
+	my $r_microsat_copy = $r_microsat;
+	$f_microsat_copy =~ s/^\[|\]$//g;
+	$r_microsat_copy =~ s/^\[|\]$//g;
+
+	my @f_microields = split(/\][a-zA-Z]*\[/,$f_microsat_copy);
+	my @r_microields = split(/\][a-zA-Z]*\[/,$r_microsat_copy);
+	my @f_intields = split(/\][a-zA-Z]*\[/,$f_microsat_copy);
+	my @r_intields = split(/\][a-zA-Z]*\[/,$r_microsat_copy);
+	
+	my $f_motif = $fields[$motifcord];
+	my $r_motif = $rields[$motifcord];
+	my $f_motif_copy = $f_motif;
+	my $r_motif_copy = $r_motif;
+	$f_motif_copy =~ s/^\[|\]$//g;
+	$r_motif_copy =~ s/^\[|\]$//g;
+
+	my @f_motields = split(/\]\[/,$f_motif_copy);
+	my @r_motields = split(/\]\[/,$r_motif_copy);
+
+	my $f_purestretch = join("",@f_microields);
+	my $r_purestretch = join("",@r_microields);
+
+	if ($fields[$typecord]=~/nucleotide/ && $rields[$typecord]=~/nucleotide/){
+#		print "now.. studying $forward\n$reverse\n" if $printer == 1;
+		if ($fields[$typecord] eq $rields[$typecord]){
+#			print "comparing motifs::", all_caps($fields[$motifcord]) ," and ", all_caps(reverse_micro($rields[$motifcord])), "\n" if $printer == 1;
+
+			if(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 1){
+				my $subset_answer = isSubset($forward, $reverse, $seqLength);
+#				print "subset answer = $subset_answer\n" if $printer == 1;
+				return $forward if $subset_answer == 1; 
+				return invert_microsat($reverse, $sequence) if $subset_answer == 2; 
+				return $forward if $subset_answer == 0 && length($f_purestretch) >= length($r_purestretch);
+				return invert_microsat($reverse, $sequence) if $subset_answer == 0 && length($f_purestretch) < length($r_purestretch);
+				return $forward if $subset_answer == 3 && slided_microsat($forward, $reverse, $seqLength) == 0 && length($f_purestretch) >= length($r_purestretch);
+				return invert_microsat($reverse, $sequence) if $subset_answer == 3 && slided_microsat($forward, $reverse, $seqLength) == 0 && length($f_purestretch) < length($r_purestretch);
+				return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence) if $subset_answer == 3 ;			
+			}
+			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 0){
+				return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
+			}
+			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 2){
+				return $forward;
+			}
+			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 3){
+				return invert_microsat($reverse, $sequence);
+			}
+		}
+		else{
+			my $fmotlen = ();
+			my $rmotlen = ();
+	 		$fmotlen =1 if $fields[$typecord] eq "mononucleotide";
+	 		$fmotlen =2 if $fields[$typecord] eq "dinucleotide";
+	 		$fmotlen =3 if $fields[$typecord] eq "trinucleotide";
+	 		$fmotlen =4 if $fields[$typecord] eq "tetranucleotide";
+	 		$rmotlen =1 if $rields[$typecord] eq "mononucleotide";
+	 		$rmotlen =2 if $rields[$typecord] eq "dinucleotide";
+	 		$rmotlen =3 if $rields[$typecord] eq "trinucleotide";
+	 		$rmotlen =4 if $rields[$typecord] eq "tetranucleotide";
+			
+			if ($fmotlen < $rmotlen){
+				if (abs($fields[$startcord] -  $r_start) <= $fmotlen || abs($fields[$endcord] -  $r_end) <= $fmotlen ){
+					return $forward;
+				}
+				else{
+					return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
+				}
+			}
+			if ($fmotlen > $rmotlen){
+				if (abs($fields[$startcord] -  $r_start) <= $rmotlen || abs($fields[$endcord] -  $r_end) <= $rmotlen){
+					return invert_microsat($reverse, $sequence);
+				}
+				else{
+					return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
+				}
+			}
+		}
+	}
+	if ($fields[$typecord] eq "compound" && $rields[$typecord] eq "compound"){
+#			print "comparing compound motifs::", all_caps($fields[$motifcord]) ," and ", all_caps(reverse_micro($rields[$motifcord])), "\n" if $printer == 1;
+			if(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 1){
+				my $subset_answer = isSubset($forward, $reverse, $seqLength);
+#				print "subset answer = $subset_answer\n" if $printer == 1;
+				return $forward if $subset_answer == 1; 
+				return invert_microsat($reverse, $sequence) if $subset_answer == 2; 
+#				print length($f_purestretch) ,">", length($r_purestretch)," \n" if $printer == 1;
+				return $forward if $subset_answer == 0 && length($f_purestretch) >= length($r_purestretch);
+				return invert_microsat($reverse, $sequence) if $subset_answer == 0 && length($f_purestretch) < length($r_purestretch);
+				if ($subset_answer == 3){
+					if ($fields[$startcord] < $r_start || $fields[$endcord] > $r_end){
+						if (abs($fields[$startcord] -  $r_start) < length($f_motields[0]) || abs($fields[$endcord] -  $r_end)  < length($f_motields[$#f_motields]) ){
+							return $forward;
+						}
+						else{
+							return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
+						}
+					}
+					if ($fields[$startcord] > $r_start || $fields[$endcord] < $r_end){
+						if (abs($fields[$startcord] -  $r_start) < length($r_motields[0]) || abs($fields[$endcord] -  $r_end) < length($r_motields[$#r_motields]) ){
+							return invert_microsat($reverse, $sequence);
+						}
+						else{
+							return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
+						}
+					}
+				}
+			}
+			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 0){
+				return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
+			}
+			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 2){
+				return $forward;
+			}
+			elsif(motifBYmotif_match(all_caps($fields[$motifcord]), all_caps(reverse_micro($rields[$motifcord]))) == 3){
+				return invert_microsat($reverse, $sequence);
+			}
+		
+	}
+	
+	if ($fields[$typecord] eq "compound" && $rields[$typecord] =~ /nucleotide/){
+#		print "one compound, one nucleotide\n" if $printer == 1; 
+		return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
+	}
+	if ($fields[$typecord] =~ /nucleotide/ && $rields[$typecord]eq "compound"){
+#		print "one compound, one nucleotide\n" if $printer == 1; 
+		return merge_microsats($forward, invert_microsat($reverse, $sequence), $sequence);
+	}
+}
+
+sub isSubset{
+	my $forward = $_[0]; 	my @fields = split(/\t/,$forward);
+	my $reverse = $_[1];	my @rields = split(/\t/,$reverse);
+	my $seqLength = $_[2];
+	my $r_start = $seqLength - $rields[$endcord] + 1;
+	my $r_end = $seqLength - $rields[$startcord] + 1;
+#	print "we have $fields[$startcord] -> $fields[$endcord] && $r_start -> $r_end\n" if $printer == 1;
+	return "0" if $fields[$startcord] == $r_start && $fields[$endcord] == $r_end;
+	return "1" if $fields[$startcord] <= $r_start && $fields[$endcord] >= $r_end;
+	return "2" if $r_start <= $fields[$startcord] && $r_end >= $fields[$endcord];
+	return "3";
+}
+
+sub motifBYmotif_match{
+	my $forward = $_[0];
+	my $reverse = $_[1];
+	$forward =~ s/^\[|\]$//g;
+	$reverse =~ s/^\[|\]$//g;
+	my @f_motields=split(/\]\[/, $forward);
+	my @r_motields=split(/\]\[/, $reverse);
+	my $finalresult = 0;
+
+	if (scalar(@f_motields) != scalar(@r_motields)){
+		my $subresult = 0;
+		my @mega = (); my @sub = ();
+		@mega = @f_motields if scalar(@f_motields) > scalar(@r_motields);
+		@sub = @f_motields if scalar(@f_motields) > scalar(@r_motields);
+		@mega = @r_motields if scalar(@f_motields) < scalar(@r_motields);
+		@sub = @r_motields if scalar(@f_motields) < scalar(@r_motields);
+		
+		for my $i (0 ... $#sub){
+			my $probe = $sub[$i].$sub[$i];
+#			print "probing $probe and $mega[$i]\n" if $printer == 1;
+			if ($probe =~ /$mega[$i]/) {$subresult = 1; }
+			else {$subresult = 0; last; }
+		}
+		
+		return 0 if $subresult == 0;
+		return 2 if $subresult == 1 && scalar(@f_motields) > scalar(@r_motields); # r is subset of f
+		return 3 if $subresult == 1 && scalar(@f_motields) < scalar(@r_motields);  # ^reverse
+		
+	}
+	else{
+		for my $i (0 ... $#f_motields){
+			my $probe = $f_motields[$i].$f_motields[$i];
+			if ($probe =~ /$r_motields[$i]/) {$finalresult = 1 ;}
+			else {$finalresult = 0 ;last;}
+		}
+	}
+#	print "finalresult = $finalresult\n" if $printer == 1;
+	return $finalresult;
+}
+
+sub merge_microsats{
+	my @input = @_;
+	my $sequence = pop(@input);
+	$sequence =~ s/ //g;
+	my @seq_string = @input;
+#	print "IN: merge_microsats\n";
+#	print "recieved for merging: ", join("\n", @seq_string), "\nsequence = $sequence\n"; 
+	my $start;
+	my $end;
+	my @micros = map { $_->[0] } sort custom map { [$_, split /\t/ ] } @seq_string; 
+#	print "\nrearranged into @micros \n";
+	my (@motifs, @microsats, @interruptiontypes, @interruptions, @interrposes, @no_of_interruptions, @types, @starts, @ends, @mergestart, @mergeend, @mergemicro) = ();
+	my @fields = ();
+	for my $i (0 ... $#micros){
+		chomp $micros[$i];
+		@fields = split(/\t/,$micros[$i]);
+		push(@types, $fields[$typecord]);
+		push(@motifs, $fields[$motifcord]);		
+
+		if (exists $fields[$interrtypecord]){ push(@interruptiontypes, $fields[$interrtypecord]);}
+			else { push(@interruptiontypes, "NA"); }
+		if (exists $fields[$interrcord]) {push(@interruptions, $fields[$interrcord]);}
+			else { push(@interruptions, "NA"); }
+		if (exists $fields[$interr_poscord]) { push(@interrposes, $fields[$interr_poscord]);}	
+			else { push(@interrposes, "NA"); }
+		if (exists $fields[$no_of_interruptionscord]) {push(@no_of_interruptions, $fields[$no_of_interruptionscord]);}
+			else { push(@no_of_interruptions, "NA"); }
+		if(exists $fields[$mergestarts]) { @mergestart = (@mergestart, split(/\./,$fields[$mergestarts]));}
+			else { push(@mergestart, $fields[$startcord]); }
+		if(exists $fields[$mergeends]) { @mergeend = (@mergeend, split(/\./,$fields[$mergeends]));}
+			else { push(@mergeend, $fields[$endcord]); }
+		if(exists $fields[$mergemicros]) { push(@mergemicro, $fields[$mergemicros]);}
+			else { push(@mergemicro, $fields[$microsatcord]); }
+			
+
+	}
+	$start = smallest_number(@mergestart);
+	$end = largest_number(@mergeend);
+	my $microsat_entry = "[".substr( $sequence, $start-1, ($end - $start + 1) )."]";
+	my $microsat = join("\t", @fields[0 ... $infocord], join(".", @types), $start, $fields[$strandcord], $end, $microsat_entry , join(".", @motifs), join(".", @interruptiontypes),join(".", @interruptions),join(".", @interrposes),join(".", @no_of_interruptions), join(".", @mergestart), join(".", @mergeend) , join(".", @mergemicro));
+	return $microsat;
+}
+
+sub slided_microsat{
+	my $forward = $_[0]; 	my @fields = split(/\t/,$forward);
+	my $reverse = $_[1];	my @rields = split(/\t/,$reverse);
+	my $seqLength = $_[2];
+	my $r_start = $seqLength - $rields[$endcord] + 1;
+	my $r_end = $seqLength - $rields[$startcord] + 1;
+	my $motlen =();
+	 $motlen =1 if $fields[$typecord] eq "mononucleotide";
+	 $motlen =2 if $fields[$typecord] eq "dinucleotide";
+	 $motlen =3 if $fields[$typecord] eq "trinucleotide";
+	 $motlen =4 if $fields[$typecord] eq "tetranucleotide";
+	
+	if (abs($fields[$startcord] - $r_start) < $motlen || abs($fields[$endcord] - $r_end) < $motlen ) {
+		return 0;
+	}
+	else{
+		return 1;
+	}
+	
+}
+
+#xxxxxxxxxxxxxx forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx  forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx  forward_reverse_sputoutput_comparer xxxxxxxxxxxxxx 
+
+
+
+#xxxxxxxxxxxxxx new_multispecies_t10 xxxxxxxxxxxxxx  new_multispecies_t10 xxxxxxxxxxxxxx  new_multispecies_t10 xxxxxxxxxxxxxx 
+sub new_multispecies_t10{
+	my $input1 = $_[0];  #gap_op_unrand_match
+	my $input2 = $_[1];  #sput
+	my $output = $_[2];  #output
+	my $bin = $output."_bin";
+	my $orgs = join("|",split(/\./,$_[3]));
+	my @organisms = split(/\./,$_[3]);
+	my $no_of_species = scalar(@organisms); #3
+	my $t10info = $output."_info";
+	$prinkter = 0;
+	
+	open (MATCH, "<$input1");
+	open (SPUT, "<$input2");
+	open (OUT, ">$output");
+	open (INFO, ">$t10info");
+
+
+	sub microsat_bracketer;
+	sub custom;
+	my %seen = ();
+	$infocord = 2 + (4*$no_of_species) - 1;
+	$typecord = 2 + (4*$no_of_species) + 1 - 1;
+	$startcord = 2 + (4*$no_of_species) + 2 - 1;
+	$strandcord = 2 + (4*$no_of_species) + 3 - 1;
+	$endcord = 2 + (4*$no_of_species) + 4 - 1;
+	$microsatcord = 2 + (4*$no_of_species) + 5 - 1;
+	$motifcord = 2 + (4*$no_of_species) + 6 - 1;
+	$sequencepos = 2 + (5*$no_of_species) + 1 -1 ; 
+	#---------------------------------------------------------------------------------------------------------------#
+	#	MAKING A HASH FROM SPUT, WITH HASH KEYS GENERATED BELOW AND SEQUENCES STORED AS VALUES	#
+	#---------------------------------------------------------------------------------------------------------------#
+	my $linecounter = 0;
+	my $microcounter = 0;
+	while (my $line = <SPUT>){
+		chomp $line;
+	#	print "$org\t(chr[0-9]+)\t([0-9]+)\t([0-9])+\t \n";
+		next if $line !~ /[0-9a-z]+/;
+		$linecounter++;
+	#		my $key = join("\t",$1 , $2, $4, $5, $6, $8, $9, $10, $12, $13);
+	#		print $key, "#-#-#-#-#-#-#-#\n";
+		if ($line =~ /([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
+			my $key = join("\t",$1, $2, $3, $4, $5);
+#			print "key = $key\n" if $prinkter == 1;
+			push (@{$seen{$key}},$line);	
+			$microcounter++;
+		}
+		else {		
+			#print "could not make ker in SPUT : \n$line \n"; 
+		}
+	}
+#	print "done hash.. linecounter = $linecounter, microcounter = $microcounter and total keys entered = ",scalar(keys %seen),"\n";
+#	print INFO  "done hash.. linecounter = $linecounter, microcounter = $microcounter and total keys entered = ",scalar(keys %seen),"\n";
+	close SPUT;
+	
+	#----------------------------------------------------------------------------------------------------------------
+	
+	#-------------------------------------------------------------------------------------------------------#
+	#	THE ENTIRE CODE BELOW IS DEVOTED TO GENERATING HASH KEYS FROM MATCH FOLLOWED BY			#
+	#	USING THESE HASH KEYS TO CORRESPOND EACH SEQUENCE IN FIRST FILE TO ITS MICROSAT REPEATS IN			#
+	#   SECOND FILE FOLLOWED BY																				#
+	#	FINDING THE EXACT LOCATION OF EACH MICROSAT REPEAT WITHIN EACH SEQUENCE USING THE 'index' FUNCTION	#
+	#-------------------------------------------------------------------------------------------------------#		
+	my $ref = 0;
+	my $ref2 = 0;
+	my $ref3 = 0;
+	my $ref4 = 0;
+	my $deletes= 0;
+	my $duplicates = 0;
+	my $neighbors = 0;
+	my $tooshort = 0;
+	my $prevmicrol=();
+	my $startnotfound = 0;
+	my $matchkeysformed = 0;
+	my $keysused = 0;
+	
+	while (my $line = <MATCH>)	{
+#		print    colored ['magenta'], $line  if $prinkter == 1;
+		next if $line !~ /[a-zA-Z0-9]/;
+		chomp $line;	
+		my @fields2 = split(/\t/,$line);
+		my $key2 = ();
+	#		$key2 = join("\t",$1 , $2, $4, $5, $6, $8, $9, $10, $12, $13);
+		if ($line =~ /([0-9]+)\s+([0-9a-zA-Z]+)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)\s/ ) {
+			$matchkeysformed++;
+			$key2 = join("\t",$1, $2, $3, $4, $5);
+#			print "key = $key2 \n" if $prinkter == 1; 
+		}
+		else{
+#			print "could not make ker in SEQ : $line\n";
+			next;
+		}
+		my $sequence = $fields2[$sequencepos];
+		$sequence =~ s/\*/-/g;
+		my $count = 0;
+		if (exists $seen{$key2}){
+			$keysused++;
+			my @unsorted_raw = @{$seen{$key2}};
+			delete $seen{$key2};
+			my @sequencearr = split(/\s*/, $sequence);
+			
+#			print "sequencearr = @sequencearr\n" if $prinkter == 1;
+			
+			my $counter;
+	
+			my %start_database = ();
+			my %end_database = ();
+			foreach my $uns (@unsorted_raw){
+				my @uields = split(/\t/,$uns);
+				$start_database{$uields[$startcord]} = $uns;
+				$end_database{$uields[$endcord]} = $uns;
+			}
+			
+			my @unsorted = ();
+			my %starts = (); my %ends = ();
+#			print colored ['yellow'],"unsorted, unfiltered microats = \n" if $prinkter == 1; foreach (@unsorted_raw) {print colored ['blue'],$_,"\n" if $prinkter == 1;}
+			for my $u (0 ... $#unsorted_raw){
+				my @tields = split(/\t/,$unsorted_raw[$u]);
+				next if exists $starts{$tields[$startcord]} && exists $ends{$tields[$endcord]};
+				push(@unsorted, $unsorted_raw[$u]);
+				$starts{$tields[$startcord]} = $unsorted_raw[$u];
+#				print "in starts : $tields[$startcord] -> $unsorted_raw[$u]\n" if $prinkter == 1;
+			}
+			
+			my $basecounter= 0;
+			my $gapcounter = 0;
+			my $poscounter = 0;
+			
+			for my $s (@sequencearr){
+				
+				$poscounter++;
+				if ($s eq "-"){
+					$gapcounter++; next;
+				}
+				else{
+					$basecounter++;
+				}
+				
+				
+				#print "s = $s, poscounter = $poscounter, basecounter = $basecounter, gapcpunter = $gapcounter\n" if $prinkter == 1;
+				#print "s = $s, basecounter = $basecounter, gapcpunter = $gapcounter\n" if $prinkter == 1;
+				#print "s = $s, gapcpunter = $gapcounter\n" if $prinkter == 1;
+				
+				if (exists $starts{$basecounter}){
+					my $locus = $starts{$basecounter};
+#					print "locus identified = $locus\n" if $prinkter == 1;
+					my @fields3 = split(/\t/,$locus);
+					my $start = $fields3[$startcord];
+					my $end = $fields3[$endcord];
+					my $motif = $fields3[$motifcord];
+					my $microsat = $fields3[$microsatcord];				
+					my @leftbracketpos = ();			
+					my @rightbracketpos = (); 
+					my $bracket_picker = 'no';
+					my $leftbrackets=();
+					my $rightbrackets = ();
+					my $micro_cpy = $microsat;
+#					print "microsat = $microsat\n" if $prinkter == 1;
+					while($microsat =~ m/\[/g) {push(@leftbracketpos, (pos($microsat)));  $leftbrackets = join("__",@leftbracketpos);$bracket_picker='yes';}
+					while($microsat =~ m/\]/g) {push(@rightbracketpos, (pos($microsat))); $rightbrackets = join("__",@rightbracketpos);}
+					$microsat =~ s/[\[\]\-\*]//g;
+#					print "microsat = $microsat\n" if $prinkter == 1;
+					my $human_search = join '-*', split //, $microsat;
+					my $temp = substr($sequence, $poscounter-1);
+#					print "with poscounter = $poscounter\n" if $prinkter == 1;
+					my $search_result = ();
+					my $posnow  = ();
+					
+#					print "for $line, temp $temp or human_search $human_search not defined\n" if !defined $temp || !defined $human_search;
+#					<STDIN> if !defined $temp || !defined $human_search;
+					
+					while ($temp =~ /($human_search)/gi){
+						$search_result = $1;
+					#	$posnow  = pos($temp);
+						last;
+					}
+					
+					my @gapspos = ();
+					next if !defined $search_result;
+					
+					while($search_result =~ m/-/g) {push(@gapspos, (pos($search_result))); }
+					my $gaps  = join("__",@gapspos);
+	
+					my $final_microsat = $search_result;
+					if ($bracket_picker eq "yes"){
+						$final_microsat = microsat_bracketer($search_result, $gaps,$leftbrackets,$rightbrackets);
+					}
+					
+					my $outsentence = join("\t",join ("\t",@fields3[0 ... $infocord]),$fields3[$typecord],$fields3[$motifcord],$gapcounter,$poscounter,$fields3[$strandcord],$poscounter + length($search_result) -1 ,$final_microsat);
+					
+					if ($bracket_picker eq "yes") {
+						$outsentence = $outsentence."\t".join("\t",@fields3[($motifcord+1) ... $#fields3]);
+					}
+					print OUT $outsentence,"\n";	
+				}			
+			}
+		}
+	}
+	my $unusedkeys = scalar(keys %seen);
+#	print INFO "in hash = $ref, looped = $ref4, captured = $ref3\n REMOVED: \nmicrosats with too long gaps = $deletes\n";
+#	print INFO "exact duplicated removed = $duplicates \nmicrosats removed due to multiple microsats defined  in +-10 bp neighboring region: $neighbors \n";
+#	print INFO "microsatellites too short = $tooshort\n";
+#	print INFO "keysused = $keysused...starts not found = $startnotfound ... matchkeysformed=$matchkeysformed ... unusedkeys=$unusedkeys\n";
+	
+	#print  "in hash = $ref, looped = $ref4, captured = $ref3\n REMOVED: \nmicrosats with too long gaps = $deletes\n";
+	#print  "exact duplicated removed = $duplicates \nmicrosats removed due to multiple microsats defined  in +-10 bp neighboring region: $neighbors \n";
+	#print  "microsatellites too short = $tooshort\n";
+	#print  "keysused = $keysused...starts not found = $startnotfound ... matchkeysformed=$matchkeysformed ... unusedkeys=$unusedkeys\n";
+	#print "unused keys = \n",join("\n", (keys %seen)),"\n";
+	close (MATCH);
+	close (SPUT);
+	close (OUT);
+	close (INFO);
+}	
+
+sub microsat_bracketer{
+#	print "in bracketer: @_\n";
+	my ($microsat, $gapspos, $leftbracketpos, $rightbracketpos) = @_;
+	my @gaps = split(/__/,$gapspos);
+	my @lefts = split(/__/,$leftbracketpos);
+	my @rights = split(/__/,$rightbracketpos);
+	my @new=();
+	my $pure = $microsat;
+	$pure =~ s/-//g;
+	my $off = 0;
+	my $finallength  = length($microsat) + scalar(@lefts)+scalar(@rights);
+	push(@gaps, 0);
+	push(@lefts,0);
+	push(@rights,0);
+	
+	for my $i (1 ... $finallength){
+#		print "1 current i = >$i<>, right = >$rights[0]<  gap = $gaps[0] left = >$lefts[0]< and $rights[0] == $i\n";
+		if($rights[0] == $i){
+	#		print "pushed a ]\n";
+			push(@new, "]");
+			shift(@rights);
+			push(@rights,0);
+			for my $j (0 ... scalar(@gaps)-1) {$gaps[$j]++;}	
+			next;
+		}
+		if($gaps[0] == $i){
+	#		print "pushed a -\n";
+			push(@new, "-");
+			shift(@gaps);
+			push(@gaps, 0);
+			for my $j (0 ... scalar(@rights)-1) {$rights[$j]++;}	
+			for my $j (0 ... scalar(@lefts)-1) {$lefts[$j]++;}	
+
+			next;
+		}
+		if($lefts[0] == $i){
+#			print "pushed a [\n";
+			push(@new, "[");
+			shift(@lefts);
+			push(@lefts,0);
+			for my $j (0 ... scalar(@gaps)-1) {$gaps[$j]++;}	
+			next;
+		}
+		else{
+			my $pushed = substr($pure,$off,1);
+			$off++;
+			push(@new,$pushed );
+#			print "pushed an alphabet, now new = @new, pushed = $pushed\n";
+			next;
+		}
+	}
+	my $returnmicrosat = join("",@new);
+#	print "final microsat = $returnmicrosat \n";
+	return($returnmicrosat);
+}
+
+#xxxxxxxxxxxxxx new_multispecies_t10 xxxxxxxxxxxxxx  new_multispecies_t10 xxxxxxxxxxxxxx  new_multispecies_t10 xxxxxxxxxxxxxx 
+
+
+#xxxxxxxxxxxxxx multiSpecies_orthFinder4 xxxxxxxxxxxxxx  multiSpecies_orthFinder4 xxxxxxxxxxxxxx  multiSpecies_orthFinder4 xxxxxxxxxxxxxx 
+sub multiSpecies_orthFinder4{
+	#print "IN multiSpecies_orthFinder4: @_\n";
+	my @handles = ();
+	#1 SEPT 30TH 2008
+	#2 THIS CODE (multiSpecies_orthFinder4.pl)  IS BEING MADE SO THAT IN THE REMOVAL OF MICROSATELLITES THAT ARE CLOSER TO EACH OTHER
+	#3 THAN 50 BP (HE 50BP RADIUS OF EXCLUSION), WE ARE LOOKING ACCROSS ALIGNMENT BLOCKS.. AND NOT JUST LOOKING WITHIN THE ALIGNMENT BLOCKS. THIS WILL
+	#4 POTENTIALLY REMOVE EVEN MORE MICROSATELLITES THAN BEFORE, BUT THIS WILL RESCUE THOSE MICROSATELLITES THAT WERE LOST
+	#5 DUE TO OUR PREVIOUS REQUIREMENT FROM VERSION 3, THAT MICROSATELLITES THAT ARE CLOSER TO THE BOUNDARY THAN 25 BP NEED TO BE REMOVED
+	#6 SUCH A REQUIREMENT WAS A CRUDE WAY TO IMPOSE THE ABOVE 50 BP RADIUS OF EXCLUSION ACCROSS THE ALIGNMENT BLOCKS WITHOUT ACTUALLY
+	#7 CHECKING COORDINATES OF THE EXCLUDED MICROSATELLITES.
+	#8 IN ORDER TO TAKE CARE OF THE CASES WHERE MICROSATELLITES ARE PRELIOUSLY CLOSE TO ENDS OF THE ALIGNMENT BLOCKS, WE IMPOSE HERE
+	#9 A NEW REQUIREMENT THAT FOR A MICROSATELLITE TO BE CONSIDERED, ALL THE SPECIES NEED TO HAVE AT LEAST 10 BP OF NON-MICROSATELLITE SEQUENCE
+	#10 ON EITHER SIDE OF IT.. GAPLESS. THIS INFORMATION IS STORED IN THE VARIABLE: $FLANK_SUPPORT. THIS PART, INSTEAD OF BEING INCLUDED IN
+	#11 THIS CODE, WILL BE INCLUDED IN A NEW CODE THAT WE WILL BE WRITING AS PART OF THE PIPELINE: multiSpecies_microsatSetSelector.pl
+	
+	#1 trial run:
+	#2 perl ../../../codes/multiSpecies_orthFinder4.pl /gpfs/home/ydk104/work/rhesus_microsat/axtNet/hg18.panTro2.ponAbe2.rheMac2.calJac1/chr22.hg18.panTro2.ponAbe2.rheMac2.calJac1.net.axt H.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:C.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:O.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:R.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2:M.hg18-chr22.panTro2.ponAbe2.rheMac2.calJac1_allmicrosats_symmetrical_fin_hit_all_2 orth22 hg18:panTro2:ponAbe2:rheMac2:calJac1 50
+	
+	$prinkter=0;
+	
+	#############
+	my $CLUSTER_DIST = $_[4];
+	#############
+	
+	
+	my $aligns = $_[0];
+	my @micros = split(/:/, $_[1]);
+	my $orth = $_[2];
+	#my $not_orth = "notorth";
+	@tags = split(/:/, $_[3]);
+	
+	$no_of_species=scalar(@tags);
+	my $junkfile = $orth."_junk";
+	#open(JUNK,">$junkfile");
+	
+	#my $info = $output1."_info";
+	#print "inputs are : \n"; foreach(@micros){print $_,"\n";} 
+	#print "info = @_\n";
+	
+	
+	open (BO, "<$aligns") or die "Cannot open alignment file: $aligns: $!";
+	open (ORTH, ">$orth");
+	my $output=$orth."_out";
+	open (OUTP, ">$output");
+	
+	
+	#open (NORTH, ">$not_orth");
+	#open (INF, ">$info");
+	my $i = 0;
+	foreach my $path (@micros){
+		$handles[$i] = IO::Handle->new();
+		open ($handles[$i], "<$path") or die "Can't open microsat file $path : $!";
+		$i++;
+	}
+	
+	#print "Opened files\n";
+	
+	
+	$infocord = 2 + (4*$no_of_species) - 1;
+	$typecord = 2 + (4*$no_of_species) + 1 - 1;
+	$motifcord = $typecord + 1;
+	$gapcord = $motifcord+1;
+	$startcord = $gapcord + 1;
+	$strandcord = $startcord + 1;
+	$endcord = $strandcord + 1;
+	$microsatcord = $endcord + 1;
+	$sequencepos = 2 + (4*$no_of_species) + 1 -1 ; 
+	#$sequencepos = 17;
+	#	GENERATING HASHES CONTAINING CHIMP AND HUMAN DATA FROM ABOVE FILES
+	#----------------------------------------------------------------------------------------------------------------
+	my @hasharr = ();
+	foreach my $path (@micros){
+		open(READ, "<$path") or die "Cannot open file $path :$!";
+		my %single_hash = ();
+		my $key = ();
+		my $counter = 0;
+		while (my $line = <READ>){
+			$counter++;
+		#	print $line;
+			chomp $line;
+			my @fields1 = split(/\t/,$line);
+			if ($line =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
+				$key = join("\t",$1, $2,  $4, $5);
+	
+#					print "key =  : $key\n" if $prinkter == 1;
+				
+#				print $line if $prinkter == 1;
+				push (@{$single_hash{$key}},$line);
+			}
+			else{
+			#	print "microsat line incompatible\n";
+			}
+		}
+		push @hasharr, {%single_hash};
+	#	print "@{$single_hash{$key}} \n";
+#		print "done $path: counter = $counter\n" if $prinkter == 1;
+		close READ;
+	}	
+#	print "Done hashes\n";
+	#----------------------------------------------------------------------------------------------------------------
+	my $question=();
+	#----------------------------------------------------------------------------------------------------------------
+	my @contigstarts = ();
+	my @contigends = ();
+	
+	my %contigclusters = ();
+	my %contigclustersFirstStartOnly=();
+	my %contigclustersLastEndOnly=();
+	my %contigclustersLastEndLengthOnly=();
+	my %contigclustersFirstStartLengthOnly=();
+	my %contigpath=();
+	my $dotcounter = 0;
+	while (my $line = <BO>){
+#		print "x" x 60, "\n" if $prinkter == 1;
+		$dotcounter++;
+#		print "." if $dotcounter % 100 ==0;
+#		print "\n" if $dotcounter % 5000 ==0;
+		next if $line !~ /^[0-9]+/;
+#		print $line if $prinkter == 1;
+		chomp $line;	
+		my @fields2 = split(/\t/,$line);
+		my $key2 = ();
+		if ($line =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z]+)\s([0-9]+)\s([0-9]+)/ ) {
+			$key2 = join("\t",$1, $2,  $4, $5);
+		}
+		else {
+#			print "seq line $line incompatible\n" if $prinkter == 1; 
+			next;}
+		
+		
+		
+		
+		
+		
+		my @sequences = ();
+		for (0 ... $#tags){
+			my $seq = <BO>;
+	#		print $seq;
+			chomp $seq;
+			push(@sequences , " ".$seq);
+		}
+		my @origsequences = @sequences;
+		my $seqcopy = $sequences[0];
+		my @strings = ();
+		$seqcopy =~ s/[a-zA-Z]|-/x/g;
+		my @string = split(/\s*/,$seqcopy);
+	
+		for my $s (0 ... $#tags){
+			$sequences[$s] =~ s/-//g;
+			$sequences[$s] =~ s/[a-zA-Z]/x/g;		
+	#		print "length  of sequence = ",length($sequences[$s]),"\n";
+			my @tempstring = split(/\s*/,$sequences[$s]);
+			push(@strings, [@tempstring])
+			
+		}
+	
+		my @species_list = ();
+		my @micro_count = 0;
+		my @starthash = ();
+		my $stopper = 1;
+		my @endhash = ();
+		
+		my @currentcontigstarts=();
+		my @currentcontigends=();
+		my @currentcontigchrs=();
+		
+		for my $i (0 ... $#tags){
+#			print "searching for : if exists  hasharr: $i : $tags[$i] : $key2 \n" if $prinkter == 1;
+			my @temparr = (); 
+	
+			if (exists $hasharr[$i]{$key2}){
+				@temparr =  @{$hasharr[$i]{$key2}};
+				
+				$line =~ /$tags[$i]\s([a-zA-Z0-9_]+)\s([0-9]+)\s([0-9]+)/;
+##				print "in line $line, trying to hunt for: $tags[$i]\\s([a-zA-Z0-9_])+\\s([0-9]+)\\s([0-9]+) \n" if $prinkter == 1;
+#				print "org = $tags[$i], and chr = $1, start = $2, end =$3 \n" if $prinkter == 1; 
+				my $startkey = $1."_SK0SK_".$2; #print "adding start key for this alignmebt block: $startkey to species $tags[$i]\n" if $prinkter == 1;
+				my $endkey = $1."_EK0EK_".$3; #print "adding end key for this alignmebt block: $endkey to species $tags[$i]\n" if $prinkter == 1;
+				$contigstarts[$i]{$startkey}= $key2;
+				$contigends[$i]{$endkey}= $key2;
+#				print "confirming existance: \n" if $prinkter == 1;
+#				print "present \n" if exists $contigends[$i]{$endkey} && $prinkter == 1;
+#				print "absent \n" if !exists $contigends[$i]{$endkey} && $prinkter == 1;			
+				$currentcontigchrs[$i]=$1;
+				$currentcontigstarts[$i]=$2;
+				$currentcontigends[$i]=$3;
+				
+			} # print "exists: @{$hasharr[$i]{$key2}}[0]\n"}
+			else {
+				push (@starthash, {0 => "0"});
+				push (@endhash, {0 => "0"});
+				$currentcontigchrs[$i] = 0;
+				next;
+			}
+			$stopper = 0;
+	#		print "exists: @temparr\n" if $prinkter == 1;
+			push(@micro_count, scalar(@temparr));
+			push(@species_list, [@temparr]);	
+			my @tempstart = (); my @tempend = ();
+			my %localends = ();
+			my %localhash = ();
+	#		print "---------------------------\n";
+	
+			foreach my $templine (@temparr){
+#				print "templine = $templine\n" if $prinkter == 1;
+				my @tields = split(/\t/,$templine);
+				my $start = $tields[$startcord]; # - $tields[$gapcord];
+				my $end = $tields[$endcord]; #- $tields[$gapcord];
+				my $realstart = $tields[$startcord]- $tields[$gapcord];
+				my $gapsinmicrosat = ($tields[$microsatcord] =~ s/-/-/g);
+				$gapsinmicrosat = 0 if $gapsinmicrosat !~ /[0-9]+/;
+	#			print "infocord = $infocord  typecord = $typecord  motifcord = $motifcord  gapcord = $gapcord  startcord = $startcord strandcord = $strandcord endcord = $endcord   microsatcord = $microsatcord  sequencepos = $sequencepos\n";
+				my $realend = $tields[$endcord]- $tields[$gapcord]- $gapsinmicrosat;
+		#		print "real start = $realstart, realend = $realend \n";
+				for my $pos ($realstart ... $realend){  $strings[$i][$pos] = $strings[$i][$pos].",".$i.":".$start."-".$end;}
+				push(@tempstart, $start);
+				push(@tempend, $end);
+				$localhash{$start."-".$end} = $templine;
+				}
+			push @starthash, {%localhash};
+			my $foundclusters  =findClusters(join("!",@{$strings[$i]}), $CLUSTER_DIST);
+	
+#			print "foundclusters = $foundclusters\n";
+	
+			my @clusters = split(/_/,$foundclusters);
+			
+			my $clustno = 0;
+			
+			foreach my $cluster (@clusters) {
+				my @constituenst = split(/,/,$cluster);
+#				print "clusters returned: @constituenst\n" if $prinkter == 1;
+			}
+	
+			@string = split("_S0S_",stringPainter(join("_C0C_",@string),$foundclusters));
+			
+			
+		}
+		next if $stopper == 1;
+	
+#		print colored ['blue'],"FINAL:\n" if $prinkter == 1;
+		my $finalclusters  =findClusters(join("!",@string), 1);
+#		print "finalclusters = $finalclusters\n";
+#		print colored ['blue'],"----------------------\n" if $prinkter == 1;
+		my @clusters = split(/,/,$finalclusters);
+#			print "@string\n" if $prinkter == 1;
+#			print "@clusters\n" if $prinkter == 1;
+#			print "------------------------------------------------------------------\n" if $prinkter == 1;
+		
+		my $clustno = 0;
+		
+	#	foreach my $cluster (@clusters) {
+	#		my @constituenst = split(/,/,$cluster);
+	#		print "clusters returned: @constituenst\n";
+	#	}
+	
+		next if (scalar @clusters == 0);
+	
+		my @contigcluster=();
+		my $clusterno=0;
+		my @contigClusterstarts=();
+		my @contigClusterends = ();
+		
+		foreach my $clust (@clusters){
+	 #		print "cluster: $clust\n";   		
+			$clusterno++;
+			my @localclust = split(/\./, $clust);
+			my @result = ();
+			my @starts = ();
+			my @ends = ();
+	 
+			for my $i (0 ... $#localclust){
+	 #			print "localclust[$i]: $localclust[$i]\n";   		
+				my @pattern = split(/:/, $localclust[$i]);
+				my @cords = split(/-/, $pattern[1]);
+				push (@starts, $cords[0]);
+				push (@ends, $cords[1]);
+			}
+	
+			my $extremestart = smallest_number(@starts);
+			my $extremeend = largest_number(@ends);
+			push(@contigClusterstarts, $extremestart);
+			push(@contigClusterends, $extremeend);
+#			print "cluster starts from $extremestart and ends at $extremeend \n" if $prinkter == 1 ;
+					
+			foreach my $clustparts (@localclust){
+				my @pattern = split(/:/, $clustparts);
+	# 			print "printing from pattern: $pattern[1]: $starthash[$pattern[0]]{$pattern[1]}\n";
+				push (@result, $starthash[$pattern[0]]{$pattern[1]});
+			}
+			push(@contigcluster, join("\t", @result));
+#			print join("\t", @result),"<-result \n" if $prinkter == 1 ;
+		}	
+		
+	
+		my $firstclusterstart = smallest_number(@contigClusterstarts);
+		my $lastclusterend = largest_number(@contigClusterends);
+		
+		
+		$contigclustersFirstStartOnly{$key2}=$firstclusterstart;	
+		$contigclustersLastEndOnly{$key2} = $lastclusterend;
+		$contigclusters{$key2}=[ @contigcluster ];
+#		print "currentcontigchr are @currentcontigchrs , firstclusterstart = $firstclusterstart, lastclusterend = $lastclusterend\n " if $prinkter == 1;
+		for my $i (0 ... $#tags){
+			#1 check if there exists adjacent alignment block wrt coordinates of this species.
+			next if $currentcontigchrs[$i] eq "0"; #1 this means that there are no microsats in this species in this alignment block.. 
+												 #2	no need to worry about proximity of anything in adjacent block!
+			
+			#1 BELOW, the following is really to calclate the distance between the end coordinate of the 
+			#2 cluster and the end of the gap-free sequence of each species. this is so that if an
+			#3 adjacent alignment block is found lateron, the exact distance between the potentially
+			#4 adjacent microsat clusters can be found here. the exact start coordinate will be used
+			#5 immediately below.
+	#		print "full sequence = $origsequences[$i] and its length = ",length($origsequences[$i])," \n" if $prinkter == 1;		
+			
+			my $species_startsubstring = substr($origsequences[$i], 0, $firstclusterstart);
+			my $species_endsubstring = ();
+			
+			if (length ($origsequences[$i]) <= $lastclusterend+1){ $species_endsubstring = "";}
+			else{  $species_endsubstring = substr($origsequences[$i], $lastclusterend+1);}
+			
+#			print "\nnot defined species_endsubstring...\n"  if !defined $species_endsubstring && $prinkter == 1;
+#			print "for species: $tags[$i]: \n" if $prinkter == 1;
+			
+			$species_startsubstring =~ s/-| //g;
+			$species_endsubstring =~ s/-| //g;
+			$contigclustersLastEndLengthOnly{$key2}[$i]=length($species_endsubstring);
+			$contigclustersFirstStartLengthOnly{$key2}[$i]=length($species_startsubstring);
+	
+	
+	
+#			print "species_startsubstring = $species_startsubstring, and its length =",length($species_startsubstring)," \n" if $prinkter == 1;
+#			print "species_endsubstring = $species_endsubstring, and its length =",length($species_endsubstring)," \n" if $prinkter == 1;
+#			print "attaching to contigclustersLastEndOnly: $key2: $i\n" if $prinkter == 1;	
+			
+#			print "just confirming: $contigclustersLastEndLengthOnly{$key2}[$i] \n" if $prinkter == 1;
+	
+		}
+	
+	
+	}
+#	print "\ndone the job of filling... \n";
+	#///////////////////////////////////////////////////////////////////////////////////////
+	#///////////////////////////////////////////////////////////////////////////////////////
+	#///////////////////////////////////////////////////////////////////////////////////////
+	#///////////////////////////////////////////////////////////////////////////////////////
+	$prinkter=0;
+	open (BO, "<$aligns") or die "Cannot open alignment file: $aligns: $!";
+	
+	my %clusteringpaths=();
+	my %clustersholder=();
+	my %foundkeys=();
+	my %clusteringpathsRev=();
+	
+	
+	my $totalcount=();
+	my $founkeys_enteredcount=();
+	my $transfered=0;
+	my $complete_transfered=0;
+	my $plain_transfered=0;
+	my $existing_removed=0;
+	
+	while (my $line = <BO>){
+#		print "x" x 60, "\n" if $prinkter == 1;
+		next if $line !~ /^[0-9]+/;
+		#print $line;
+		chomp $line;	
+		my @fields2 = split(/\t/,$line);
+		my $key2 = ();
+		if ($line =~ /([0-9]+)\s+($focalspec)\s(chr[0-9a-zA-Z_]+)\s([0-9]+)\s([0-9]+)/ ) {
+			$key2 = join("\t",$1, $2,  $4, $5);
+		}
+		
+		else {
+		#	print "seq line $line incompatible\n";
+			next;
+		}
+#		print "KEY =  : $key2\n" if $prinkter == 1;
+	
+	
+		my @currentcontigstarts=();
+		my @currentcontigends=();
+		my @currentcontigchrs=();
+		my @clusters = ();
+		my @clusterscopy=();
+		if (exists $contigclusters{$key2}){
+			@clusters =  @{$contigclusters{$key2}};
+			@clusterscopy=@clusters;
+			for my $i (0 ... $#tags){
+	#			print "in line $line, trying to hunt for: $tags[$i]\\s([a-zA-Z0-9])+\\s([0-9]+)\\s([0-9]+) \n" if $prinkter == 1;
+				if ($line =~ /$tags[$i]\s([a-zA-Z0-9_]+)\s([0-9]+)\s([0-9]+)/){
+	#				print "org = $tags[$i], and chr = $1, start = $2, end =$3 \n" if $prinkter == 1; 
+					my $startkey = $1."_S0E_".$2; #print "adding start key for this alignmebt block: $startkey to species $tags[$i]\n" if $prinkter == 1;
+					my $endkey = $1."_S0E_".$3; #print "adding end key for this alignmebt block: $endkey to species $tags[$i]\n" if $prinkter == 1;
+					$currentcontigchrs[$i]=$1;
+					$currentcontigstarts[$i]=$2;
+					$currentcontigends[$i]=$3;			
+				}
+				else {
+					$currentcontigchrs[$i] = 0;
+	#				print "no microsat clusters for $key2\n" if $prinkter == 1; next;	
+				}
+			}
+		} # print "exists: @{$hasharr[$i]{$key2}}[0]\n"}
+		
+		my @sequences = ();
+		for (0 ... $#tags){
+			my $seq = <BO>;
+	#		print $seq;
+			chomp $seq;
+			push(@sequences , " ".$seq);
+		}
+		
+		next if scalar @currentcontigchrs == 0;
+		
+	#	print "contigchrs= @currentcontigchrs \n" if $prinkter == 1;
+		my %visitedcontigs=();
+	
+		for my $i (0 ... $#tags){
+			#1 check if there exists adjacent alignment block wrt coordinates of this species.
+			next if $currentcontigchrs[$i] eq "0"; #1 this means that there are no microsats in this species in this alignment block.. 
+													#2	no need to worry about proximity of anything in adjacent block!
+			@clusters=@clusterscopy;
+			#1 BELOW, the following is really to calclate the distance between the end coordinate of the 
+			#2 cluster and the end of the gap-free sequence of each species. this is so that if an
+			#3 adjacent alignment block is found lateron, the exact distance between the potentially
+			#4 adjacent microsat clusters can be found here. the exact start coordinate will be used
+			#5 immediately below.
+			my $firstclusterstart = $contigclustersFirstStartOnly{$key2};
+			my $lastclusterend = $contigclustersLastEndOnly{$key2};
+					
+			my $key3 = $currentcontigchrs[$i]."_S0E_".($currentcontigstarts[$i]);
+#			print  "check if exists $key3 in  contigends for $i\n" if $prinkter == 1; 
+			
+			if (exists($contigends[$i]{$key3}) && !exists $visitedcontigs{$contigends[$i]{$key3}}){
+				$visitedcontigs{$contigends[$i]{$key3}} = $contigends[$i]{$key3}; #1 this array keeps track of adjacent contigs that we have already visited, thus saving computational time and potential redundancies#
+	#			print "just checking the hash visitedcontigs: ",$visitedcontigs{$contigends[$i]{$key3}} ,"\n" if $prinkter == 1;
+	
+				#1 extract coordinates of the last cluster of this found alignment block
+#				print "key of the found alignment block = ", $contigends[$i]{$key3},"\n" if $prinkter == 1;
+	#			print "we are trying to mine: contigclustersAllLastEndLengthOnly_raw: $contigends[$i]{$key3}: $i \n" if $prinkter == 1;
+	#			print "EXISTS\n" if exists $contigclusters{$contigends[$i]{$key3}} && $prinkter == 1;
+	#			print "does NOT EXIST\n" if !exists $contigclusters{$contigends[$i]{$key3}} && $prinkter == 1;
+				my @contigclustersAllFirstStartLengthOnly_raw=@{$contigclustersFirstStartLengthOnly{$key2}};
+				my @contigclustersAllLastEndLengthOnly_raw=@{$contigclustersLastEndLengthOnly{$contigends[$i]{$key3}}};
+				my @contigclustersAllFirstStartLengthOnly=(); my @contigclustersAllLastEndLengthOnly=();
+				
+				for my $val (0 ... $#contigclustersAllFirstStartLengthOnly_raw){
+	#				print "val = $val\n" if $prinkter == 1;
+					if (defined $contigclustersAllFirstStartLengthOnly_raw[$val]){
+						push(@contigclustersAllFirstStartLengthOnly, $contigclustersAllFirstStartLengthOnly_raw[$val]) if $contigclustersAllFirstStartLengthOnly_raw[$val] =~ /[0-9]+/;
+					}
+				}
+	#			print "-----\n" if $prinkter == 1;
+				for my $val (0 ... $#contigclustersAllLastEndLengthOnly_raw){
+	#				print "val = $val\n" if $prinkter == 1;
+					if (defined $contigclustersAllLastEndLengthOnly_raw[$val]){
+						push(@contigclustersAllLastEndLengthOnly, $contigclustersAllLastEndLengthOnly_raw[$val]) if  $contigclustersAllLastEndLengthOnly_raw[$val] =~ /[0-9]+/;
+					}
+				}
+				
+				
+	#			print "our two arrays are: starts = <@contigclustersAllFirstStartLengthOnly> ......... and ends = <@contigclustersAllLastEndLengthOnly>\n" if $prinkter == 1;
+	#			print "the last cluster's end in that one is: ",smallest_number(@contigclustersAllFirstStartLengthOnly) + smallest_number(@contigclustersAllLastEndLengthOnly)," = ", smallest_number(@contigclustersAllFirstStartLengthOnly)," + ",smallest_number(@contigclustersAllLastEndLengthOnly),"\n" if $prinkter == 1; 
+				
+	#			if ($contigclustersFirstStartLengthOnly{$key2}[$i] + $contigclustersLastEndLengthOnly{$contigends[$i]{$key3}}[$i] < 50){
+				if (smallest_number(@contigclustersAllFirstStartLengthOnly) + smallest_number(@contigclustersAllLastEndLengthOnly) < $CLUSTER_DIST){
+					my @regurgitate = @{$contigclusters{$contigends[$i]{$key3}}}; 
+					$regurgitate[$#regurgitate]=~s/\n//g;
+					$regurgitate[$#regurgitate] = $regurgitate[$#regurgitate]."\t".shift(@clusters);
+					delete $contigclusters{$contigends[$i]{$key3}};
+					$contigclusters{$contigends[$i]{$key3}}=[ @regurgitate ];
+					delete $contigclusters{$key2};
+					$contigclusters{$key2}= [ @clusters ] if scalar(@clusters) >0;
+					$contigclusters{$key2}= [ "" ] if scalar(@clusters) ==0;
+					
+					if (scalar(@clusters) < 1){
+				#		print "$key2-> $clusteringpaths{$key2} in the loners\n" if exists $foundkeys{$key2};
+						$clusteringpaths{$key2}=$contigends[$i]{$key3};	
+						$clusteringpathsRev{$contigends[$i]{$key3}}=$key2;				
+						print OUTP "$contigends[$i]{$key3} -> $clusteringpathsRev{$contigends[$i]{$key3}}\n";
+	#					print " clusteringpaths $key2 -> $contigends[$i]{$key3}\n";
+						$founkeys_enteredcount-- if exists $foundkeys{$key2};
+						$existing_removed++  if exists $foundkeys{$key2};
+#						print "$key2->",@{$contigclusters{$key2}},"->>$foundkeys{$key2}\n" if exists $foundkeys{$key2} && $prinkter == 1;
+						delete $foundkeys{$key2} if exists $foundkeys{$key2};
+						$complete_transfered++;
+					}
+					else{
+						print OUTP "$key2-> 0 not so lonely\n"  if !exists $clusteringpathsRev{$key2};
+						$clusteringpaths{$key2}=$key2  if !exists $clusteringpaths{$key2};
+						$clusteringpathsRev{$key2}=0 if !exists $clusteringpathsRev{$key2};
+						
+						$founkeys_enteredcount++ if !exists $foundkeys{$key2};
+						$foundkeys{$key2} = $key2 if !exists $foundkeys{$key2};
+	#					print "adding foundkeys entry $foundkeys{$key2}\n";
+						$transfered++;				
+					}
+					#$contigclusters{$key2}=[ @contigcluster ];
+				}
+			}
+			else{
+	#					print "adjacent block with species $tags[$i] does not exist\n" if $prinkter == 1; 
+						$plain_transfered++;
+						print OUTP "$key2-> 0 , going straight\n"  if exists $contigclusters{$key2} && !exists $clusteringpathsRev{$key2};
+						$clusteringpaths{$key2}=$key2 if exists $contigclusters{$key2} && !exists $clusteringpaths{$key2};
+						$clusteringpathsRev{$key2}=0  if exists $contigclusters{$key2} && !exists $clusteringpathsRev{$key2};
+						$founkeys_enteredcount++ if !exists $foundkeys{$key2} && exists $contigclusters{$key2};
+						$foundkeys{$key2} = $key2 if !exists $foundkeys{$key2} && exists $contigclusters{$key2};
+	#					print "adding foundkeys entry $foundkeys{$key2}\n";
+				
+			}
+			$totalcount++;
+	
+		}
+		
+		
+	}
+	close BO;
+	#close (NORTH);
+	#///////////////////////////////////////////////////////////////////////////////////////
+	#///////////////////////////////////////////////////////////////////////////////////////
+	#///////////////////////////////////////////////////////////////////////////////////////
+	#///////////////////////////////////////////////////////////////////////////////////////
+	
+	my $founkeys_count=();
+	my $nopath_count=();
+	my $pathed_count=0; 
+	foreach my $key2 (keys %foundkeys){
+		#print "x" x 60, "\n";
+#		print "x" if $dotcounter % 100 ==0;
+#		print "\n" if $dotcounter % 5000 ==0;
+		$founkeys_count++;
+		my $key = $key2;
+#		print "$key2 -> $clusteringpaths{$key2}\n" if $prinkter == 1;
+		if ($clusteringpaths{$key} eq $key){
+#			print "printing hit the alignment block immediately... no path needed\n" if $prinkter == 1;
+			$nopath_count++;
+			delete $foundkeys{$key2};
+			print ORTH join ("\n",@{$contigclusters{$key2}}),"\n";
+		}
+		else{
+			my @pool=();
+			my $key3=();
+			$pathed_count++;
+#			print "going reverse... clusteringpathsRev, $key = $clusteringpathsRev{$key}\n" if exists $clusteringpathsRev{$key} && $prinkter == 1;
+#			print "going reverse... clusteringpathsRev  $key does not exist\n" if !exists $clusteringpathsRev{$key} && $prinkter == 1;
+			if ($clusteringpathsRev{$key} eq "0") {
+ 				next;
+			}
+			else{
+				my $yek3 = $clusteringpathsRev{$key};
+				my $yek = $key;
+#				print "caught in the middle of a path, now goin down from $yek to $yek3, which is $clusteringpathsRev{$key} \n" if $prinkter == 1;
+				while ($yek3 ne "0"){
+#					print "$yek->$yek3," if $prinkter == 1;
+					$yek = $yek3;
+					$yek3 = $clusteringpathsRev{$yek};
+				}
+#				print "\nfinally reached the end of path: $yek3, and the next in line is $yek, and its up-route is  $clusteringpaths{$yek}\n" if $prinkter == 1; 
+				$key3 = $clusteringpaths{$yek};
+				$key = $yek;
+			}
+		
+#			print "now that we are at bottom of the path, lets start climbing up again\n" if $prinkter == 1;
+			
+			while($key ne $key3){
+#				print "KEEY $key->$key3\n" if $prinkter == 1;
+#				print "our contigcluster = @{$contigclusters{$key}}\n----------\n" if $prinkter == 1;
+				
+				if (scalar(@{$contigclusters{$key}}) > 0) {push @pool, @{$contigclusters{$key}}; 
+				#	print "now pool = @pool\n" if $prinkter == 1;
+				}
+				delete $foundkeys{$key3};
+				$key=$key3;
+				$key3=$clusteringpaths{$key};
+			}
+#			print "\nfinally, adding the first element of path: @{$contigclusters{$key}}\n AND printing the contents:\n" if $prinkter == 1;
+			my @firstcontig= @{$contigclusters{$key}};
+			delete $foundkeys{$key2} if exists $foundkeys{$key2} ;
+			delete $foundkeys{$key} if exists $foundkeys{$key};
+	
+			unshift @pool, pop @firstcontig;
+#			print join("\t",@pool),"\n" if $prinkter == 1;
+			print ORTH join ("\n",@firstcontig),"\n" if scalar(@firstcontig) > 0;
+			print ORTH join ("\t",@pool),"\n";		
+		#	join();
+		}
+	
+	}
+	#close (NORTH);
+#	print "founkeys_entered =$founkeys_enteredcount, plain_transfered=$plain_transfered,existing_removed=$existing_removed,founkeys_count =$founkeys_count, nopath_count =$nopath_count, transfered = $transfered, complete_transfered = $complete_transfered, totalcount = $totalcount, pathed=$pathed_count\n" if $prinkter == 1;
+	close (BO);
+	close (ORTH);
+	close (OUTP);
+	return 1;
+	
+}
+sub stringPainter{
+	my @string  = split(/_C0C_/,$_[0]);
+#	print $_[0], " <- in stringPainter\n";
+#	print $_[1], " <- in clusters\n";
+	
+	my @clusters = split(/,/, $_[1]);
+	for my $i (0 ... $#clusters){
+		my $cluster = $clusters[$i];
+#		print "cluster = $cluster\n";
+		my @parts = split(/\./,$cluster);
+		my @cord = split(/:|-/,shift(@parts));
+		my $minstart = $cord[1];
+		my $maxend = $cord[2];
+#		print "minstart = $minstart , maxend = $maxend\n";
+		
+		for my $j (0 ... $#parts){
+#			print "oing thri $parts[$j]\n";
+			my @cord = split(/:|-/,$parts[$j]);
+			$minstart = $cord[1] if $cord[1] < $minstart;
+			$maxend = $cord[2] if $cord[2] > $maxend;
+		}
+#		print "minstart = $minstart , maxend = $maxend\n";
+		for my $pos ($minstart ... $maxend){ $string[$pos] = $string[$pos].",".$cluster;}
+				
+		
+	}
+#	print "@string <-done from function stringPainter\n";
+	return join("_S0S_",@string);
+}
+
+sub findClusters{
+	my $continue = 0;
+	my @mapped_clusters = ();	
+	my $clusterdist = $_[1];
+	my $previous = 'x';
+	my @localcluster = ();
+	my $cluster_starts = ();
+	my $cluster_ends = ();
+	my $localcluster_start = ();
+	my $localcluster_end = ();
+	my @record_cluster = ();
+	my @string = split(/\!/, $_[0]);
+	my $zerolength=0;
+	
+	for my $pos_pos (1 ... $#string){
+			my $pos = $string[$pos_pos];
+#			print $pos, "\n";
+			if ($continue == 0 && $pos eq "x") {next;}
+			
+			if ($continue == 1 && $pos eq "x" && $zerolength <= $clusterdist){ 
+				if ($zerolength == 0) {$localcluster_end = $pos_pos-1};
+				$zerolength++; 
+				$continue = 1; 
+			}
+
+			if ($continue == 1 && $pos eq "x" && $zerolength > $clusterdist) { 
+				$zerolength = 0; 
+				$continue = 0; 
+				my %seen;
+				my @uniqed = grep !$seen{$_}++, @localcluster;
+#				print "caught cluster : @uniqed \n";
+				push(@mapped_clusters, [@uniqed]);
+#				print "clustered:\n@uniqed\n";
+				@localcluster = ();
+				@record_cluster = ();
+				
+			}
+			
+			if ($pos ne "x"){
+				$zerolength = 0;
+				$continue = 1;
+				$pos =~ s/x,//g;
+				my @entries = split(/,/,$pos);
+				$localcluster_end = 0;
+				$localcluster_start = 0;
+				push(@record_cluster,$pos);
+			
+				if ($continue == 0){
+					@localcluster = ();
+					@localcluster = (@localcluster, @entries);
+					$localcluster_start = $pos_pos;
+				}
+			
+				if ($continue == 1 ) {
+					@localcluster = (@localcluster, @entries);
+				}
+			}
+	}
+	
+	if (scalar(@localcluster) > 0){
+		my %seen;
+		my @uniqed = grep !$seen{$_}++, @localcluster;
+	#	print "caught cluster : @uniqed \n";
+		push(@mapped_clusters, [@uniqed]);
+	#	print "clustered:\n@uniqed\n";
+		@localcluster = ();
+		@record_cluster = ();
+	}
+
+	my @returner = ();
+	
+	foreach my $clust (@mapped_clusters){
+		my @localclust = @$clust;
+		my @result = ();
+		foreach my $clustparts (@localclust){
+			push(@result,$clustparts);
+		}
+		push(@returner , join(".",@result));
+	}	
+#	print "returnig: ", join(",",@returner), "\n";
+	return join(",",@returner);
+}
+#xxxxxxxxxxxxxx multiSpecies_orthFinder4 xxxxxxxxxxxxxx  multiSpecies_orthFinder4 xxxxxxxxxxxxxx  multiSpecies_orthFinder4 xxxxxxxxxxxxxx 
+
+#xxxxxxxxxxxxxx MakeTrees xxxxxxxxxxxxxxxxxxxxxxxxxxxx  MakeTrees xxxxxxxxxxxxxxxxxxxxxxxxxxxx  MakeTrees xxxxxxxxxxxxxxxxxxxxxxxxxxxx 
+
+sub MakeTrees{
+	my $tree = $_[0];
+	my @parts=($tree);
+#	my @parts=();
+	
+	while (1){
+		$tree =~ s/^\(//g;
+		$tree =~ s/\)$//g;
+		my @arr = ();
+	
+		if ($tree =~ /^([a-zA-Z0-9_]+),([a-zA-Z0-9_\(\),]+)\)$/){
+			@arr = $tree =~ /^([a-zA-Z0-9_]+),([a-zA-Z0-9_\(\),]+)$/;
+			$tree = $2;
+			push @parts, $tree;
+		}
+		elsif ($tree =~ /^\(([a-zA-Z0-9_\(\),]+),([a-zA-Z0-9_]+)$/){
+			@arr = $tree =~ /^([a-zA-Z0-9_\(\),]+),([a-zA-Z0-9_]+)$/;
+			$tree = $1;
+			push @parts, $tree;
+		}
+		elsif ($tree =~ /^([a-zA-Z0-9_]+),([a-zA-Z0-9_]+)$/){
+			last;
+		}
+	}	
+	return @parts;
+}
+
+#xxxxxxxxxxxxxx qualityFilter xxxxxxxxxxxxxxxxxxxxxxxxxxxx  qualityFilter xxxxxxxxxxxxxxxxxxxxxxxxxxxx  qualityFilter xxxxxxxxxxxxxxxxxxxxxxxxxxxx 
+
+sub qualityFilter{
+	my $unmaskedorthfile = $_[0];
+	my $seqfile = $_[1];
+	my $maskedorthfile = $_[2];
+	my $filteredout = $maskedorthfile."_residue";
+	open (PMORTH, "<$unmaskedorthfile") or die "Cannot open unmaskedorthfile file: $unmaskedorthfile: $!";
+
+	my %keyhash = ();
+	
+	while (my $line = <PMORTH>){
+		my $key = join("\t", $1,$2,$3,$4) if $line =~ /($focalspec)\s+([a-zA-Z0-9\-_]+)\s+([0-9]+)\s+([0-9]+)/;
+		push @{$keyhash{$key}}, $line;
+	}
+	
+	open (SEQ, "<$seqfile") or die "Cannot open seqfile file: $seqfile: $!";
+	open (MORTH, ">$maskedorthfile") or die "Cannot open maskedorthfile file: $maskedorthfile: $!";
+        open (RES, ">$filteredout") or die "Cannot open filteredout file: $filteredout: $!";
+
+
+	
+	while (my $line = <SEQ>){
+		chomp $line;
+		if ($line =~ /($focalspec)\s+([a-zA-Z0-9\-_]+)\s+([0-9]+)\s+([0-9]+)/){
+			my $key = join("\t", $1,$2,$3,$4);
+			next if !exists $keyhash{$key};
+			my @orths = @{$keyhash{$key}} if exists $keyhash{$key};
+			delete $keyhash{$key};
+			
+			my $sine = <SEQ>;
+			
+			foreach my $orth (@orths){
+				#print "-----------------------------------------------------------------\n";
+				#print $orth;
+				my $orthcopy = $orth;
+				$orth =~ s/^>//;
+				my @parts = split(/>/,$orth);
+				
+				my @starts = ();
+				my @ends = ();
+				
+				foreach my $part (@parts){
+					my $no_of_species = adjustCoordinates($part);
+					my @pields = split(/\t/,$part);
+					
+				#	print "pields = @pields .. no_of_species = $no_of_species .. startcord = $pields[$startcord]\n";
+					
+					push @starts, $pields[$startcord];
+					push @ends, $pields[$endcord];
+				}
+				
+				#print "starts = @starts ... ends = @ends\n";
+				
+				my $leftend = smallest_number(@starts)-10;
+				my $rightend = largest_number(@ends)+10;
+
+				my $maskarea = substr($sine, $leftend, $rightend-$leftend+1);
+				print RES $orth if $maskarea =~ /#/;		
+				
+				
+				next if $maskarea =~ /#/;
+				
+				print MORTH $orthcopy;
+			}
+		}
+		else{
+			next;
+		}
+		
+		
+	}
+	
+#	print "UNDONE: ", scalar(keys %keyhash),"\n";
+#	print MORTH "UNDONE: ", scalar(keys %keyhash),"\n";
+	
+}
+
+sub adjustCoordinates{
+	my $line = $_[0];
+	my $no_of_species = $line =~ s/(chr[0-9a-zA-Z]+)|(Contig[0-9a-zA-Z\._\-]+)|(scaffold[0-9a-zA-Z\._\-]+)|(supercontig[0-9a-zA-Z\._\-]+)/x/ig;
+	my @got = ($line =~ s/(chr[0-9a-zA-Z]+)|(Contig[0-9a-zA-Z\._\-]+)/x/g);
+# print "line = $line\n";
+	$infocord = 2 + (4*$no_of_species) - 1;
+	$typecord = 2 + (4*$no_of_species) + 1 - 1;
+	$motifcord = 2 + (4*$no_of_species) + 2 - 1;
+	$gapcord = $motifcord+1;
+	$startcord = $gapcord+1;
+	$strandcord = $startcord+1;
+	$endcord = $strandcord + 1;
+	$microsatcord = $endcord + 1;
+	$sequencepos = 2 + (5*$no_of_species) + 1 -1 ; 
+	$interr_poscord = $microsatcord + 3;
+	$no_of_interruptionscord = $microsatcord + 4;
+	$interrcord = $microsatcord + 2;
+#	print "$line\n startcord = $startcord, and endcord = $endcord and no_of_species = $no_of_species\n" if $line !~ /calJac/i;
+	return $no_of_species;
+}
+
+
+
+